diff options
author | Matthieu Herrb <matthieu@cvs.openbsd.org> | 2013-12-01 20:34:21 +0000 |
---|---|---|
committer | Matthieu Herrb <matthieu@cvs.openbsd.org> | 2013-12-01 20:34:21 +0000 |
commit | a5bbbb8d49b940f16b8fca367fa3e2cc5489f862 (patch) | |
tree | 79a94b85f71cf67460335388866fdcdf78942987 /lib | |
parent | 8a0bfc9a32ee1e84d7274040f2902b8e1b459d0f (diff) |
Update to pixman 0.32.4. Tested by naddy@ and ajacoutot@
Diffstat (limited to 'lib')
47 files changed, 3883 insertions, 1926 deletions
diff --git a/lib/pixman/Makefile.bsd-wrapper b/lib/pixman/Makefile.bsd-wrapper index 50edd691d..186518b08 100644 --- a/lib/pixman/Makefile.bsd-wrapper +++ b/lib/pixman/Makefile.bsd-wrapper @@ -1,8 +1,8 @@ -# $OpenBSD: Makefile.bsd-wrapper,v 1.21 2013/08/13 07:07:28 guenther Exp $ +# $OpenBSD: Makefile.bsd-wrapper,v 1.22 2013/12/01 20:34:20 matthieu Exp $ .include <bsd.own.mk> -SHARED_LIBS= pixman-1 31.0 +SHARED_LIBS= pixman-1 32.4 .if ${MACHINE_ARCH} == arm CONFIGURE_ARGS += --disable-arm-simd --disable-arm-neon diff --git a/lib/pixman/Makefile.in b/lib/pixman/Makefile.in index 6cfa1d14d..d9c9d1294 100644 --- a/lib/pixman/Makefile.in +++ b/lib/pixman/Makefile.in @@ -205,7 +205,7 @@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ HAVE_LIBPNG = @HAVE_LIBPNG@ -HAVE_PTHREAD_SETSPECIFIC = @HAVE_PTHREAD_SETSPECIFIC@ +HAVE_PTHREADS = @HAVE_PTHREADS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ @@ -251,6 +251,7 @@ PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ PNG_CFLAGS = @PNG_CFLAGS@ PNG_LIBS = @PNG_LIBS@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ PTHREAD_LDFLAGS = @PTHREAD_LDFLAGS@ PTHREAD_LIBS = @PTHREAD_LIBS@ RANLIB = @RANLIB@ @@ -259,6 +260,7 @@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SSE2_CFLAGS = @SSE2_CFLAGS@ SSE2_LDFLAGS = @SSE2_LDFLAGS@ +SSSE3_CFLAGS = @SSSE3_CFLAGS@ STRIP = @STRIP@ TESTPROGS_EXTRA_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR = @TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR@ diff --git a/lib/pixman/config.h.in b/lib/pixman/config.h.in index d26107ff8..fbca39084 100644 --- a/lib/pixman/config.h.in +++ b/lib/pixman/config.h.in @@ -6,6 +6,9 @@ /* Whether we have alarm() */ #undef HAVE_ALARM +/* Whether the compiler supports __builtin_clz */ +#undef HAVE_BUILTIN_CLZ + /* Define to 1 if you have the <dlfcn.h> header file. */ #undef HAVE_DLFCN_H @@ -48,8 +51,8 @@ /* Whether we have posix_memalign() */ #undef HAVE_POSIX_MEMALIGN -/* Whether pthread_setspecific() is supported */ -#undef HAVE_PTHREAD_SETSPECIFIC +/* Whether pthreads is supported */ +#undef HAVE_PTHREADS /* Whether we have sigaction() */ #undef HAVE_SIGACTION @@ -142,6 +145,9 @@ /* use SSE2 compiler intrinsics */ #undef USE_SSE2 +/* use SSSE3 compiler intrinsics */ +#undef USE_SSSE3 + /* use VMX compiler intrinsics */ #undef USE_VMX diff --git a/lib/pixman/configure b/lib/pixman/configure index f68eac867..78dadb34f 100644 --- a/lib/pixman/configure +++ b/lib/pixman/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for pixman 0.30.2. +# Generated by GNU Autoconf 2.69 for pixman 0.32.4. # # Report bugs to <pixman@lists.freedesktop.org>. # @@ -590,8 +590,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='pixman' PACKAGE_TARNAME='pixman' -PACKAGE_VERSION='0.30.2' -PACKAGE_STRING='pixman 0.30.2' +PACKAGE_VERSION='0.32.4' +PACKAGE_STRING='pixman 0.32.4' PACKAGE_BUGREPORT='pixman@lists.freedesktop.org' PACKAGE_URL='' @@ -639,9 +639,10 @@ HAVE_LIBPNG PNG_LIBS PNG_CFLAGS TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR +PTHREAD_CFLAGS PTHREAD_LIBS PTHREAD_LDFLAGS -HAVE_PTHREAD_SETSPECIFIC +HAVE_PTHREADS TOOLCHAIN_SUPPORTS__THREAD HAVE_GTK_FALSE HAVE_GTK_TRUE @@ -665,12 +666,15 @@ USE_ARM_SIMD_TRUE USE_VMX_FALSE USE_VMX_TRUE VMX_CFLAGS +SSSE3_CFLAGS SSE2_LDFLAGS SSE2_CFLAGS MMX_LDFLAGS MMX_CFLAGS IWMMXT_CFLAGS LS_CFLAGS +USE_SSSE3_FALSE +USE_SSSE3_TRUE USE_SSE2_FALSE USE_SSE2_TRUE USE_X86_MMX_FALSE @@ -815,6 +819,7 @@ enable_openmp enable_loongson_mmi enable_mmx enable_sse2 +enable_ssse3 enable_vmx enable_arm_simd enable_arm_neon @@ -1385,7 +1390,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures pixman 0.30.2 to adapt to many kinds of systems. +\`configure' configures pixman 0.32.4 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1455,7 +1460,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of pixman 0.30.2:";; + short | recursive ) echo "Configuration of pixman 0.32.4:";; esac cat <<\_ACEOF @@ -1478,6 +1483,7 @@ Optional Features: --disable-loongson-mmi disable Loongson MMI fast paths --disable-mmx disable x86 MMX fast paths --disable-sse2 disable SSE2 fast paths + --disable-ssse3 disable SSSE3 fast paths --disable-vmx disable VMX fast paths --disable-arm-simd disable ARM SIMD fast paths --disable-arm-neon disable ARM NEON fast paths @@ -1589,7 +1595,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -pixman configure 0.30.2 +pixman configure 0.32.4 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2187,7 +2193,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by pixman $as_me 0.30.2, which was +It was created by pixman $as_me 0.32.4, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -3011,7 +3017,7 @@ fi # Define the identity of the package. PACKAGE='pixman' - VERSION='0.30.2' + VERSION='0.32.4' cat >>confdefs.h <<_ACEOF @@ -12195,13 +12201,13 @@ fi -LT_VERSION_INFO="30:2:30" +LT_VERSION_INFO="32:4:32" PIXMAN_VERSION_MAJOR=0 -PIXMAN_VERSION_MINOR=30 +PIXMAN_VERSION_MINOR=32 -PIXMAN_VERSION_MICRO=2 +PIXMAN_VERSION_MICRO=4 @@ -12255,6 +12261,53 @@ rm -f core conftest.err conftest.$ac_objext \ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $_yesno" >&5 $as_echo "$_yesno" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports -Wdeclaration-after-statement" >&5 +$as_echo_n "checking whether the compiler supports -Wdeclaration-after-statement... " >&6; } + save_CFLAGS="$CFLAGS" + save_LDFLAGS="$LDFLAGS" + save_LIBS="$LIBS" + CFLAGS="" + LDFLAGS="" + LIBS="" + CFLAGS="$WERROR -Wdeclaration-after-statement" + CFLAGS="$save_CFLAGS $CFLAGS" + LDFLAGS="$save_LDFLAGS $LDFLAGS" + LIBS="$save_LIBS $LIBS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + int main(int c, char **v) { (void)c; (void)v; return 0; } + +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pixman_cc_stderr=`test -f conftest.err && cat conftest.err` + pixman_cc_flag=yes +else + pixman_cc_stderr=`test -f conftest.err && cat conftest.err` + pixman_cc_flag=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + + if test "x$pixman_cc_stderr" != "x"; then + pixman_cc_flag=no + fi + + if test "x$pixman_cc_flag" = "xyes"; then + _yesno=yes + else + _yesno=no + fi + CFLAGS="$save_CFLAGS" + LDFLAGS="$save_LDFLAGS" + LIBS="$save_LIBS" + + if test "x$_yesno" = xyes; then + CFLAGS="$CFLAGS -Wdeclaration-after-statement" + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $_yesno" >&5 +$as_echo "$_yesno" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports -fno-strict-aliasing" >&5 $as_echo_n "checking whether the compiler supports -fno-strict-aliasing... " >&6; } save_CFLAGS="$CFLAGS" @@ -12675,6 +12728,12 @@ int main () { : "y" (v), "K" (5) ); + /* Some versions of clang will choke on this */ + asm ("pmulhuw %1, %0\n\t" + : "+y" (w) + : "y" (v) + ); + return _mm_cvtsi64_si32 (v); } _ACEOF @@ -12793,6 +12852,69 @@ else fi + +if test "x$SSSE3_CFLAGS" = "x" ; then + SSSE3_CFLAGS="-mssse3 -Winline" +fi + +have_ssse3_intrinsics=no +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to use SSSE3 intrinsics" >&5 +$as_echo_n "checking whether to use SSSE3 intrinsics... " >&6; } +xserver_save_CFLAGS=$CFLAGS +CFLAGS="$SSSE3_CFLAGS $CFLAGS" + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include <mmintrin.h> +#include <xmmintrin.h> +#include <emmintrin.h> +#include <tmmintrin.h> +int main () { + __m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c; + c = _mm_maddubs_epi16 (a, b); + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + have_ssse3_intrinsics=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +CFLAGS=$xserver_save_CFLAGS + +# Check whether --enable-ssse3 was given. +if test "${enable_ssse3+set}" = set; then : + enableval=$enable_ssse3; enable_ssse3=$enableval +else + enable_ssse3=auto +fi + + +if test $enable_ssse3 = no ; then + have_ssse3_intrinsics=disabled +fi + +if test $have_ssse3_intrinsics = yes ; then + +$as_echo "#define USE_SSSE3 1" >>confdefs.h + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $have_ssse3_intrinsics" >&5 +$as_echo "$have_ssse3_intrinsics" >&6; } +if test $enable_ssse3 = yes && test $have_ssse3_intrinsics = no ; then + as_fn_error $? "SSSE3 intrinsics not detected" "$LINENO" 5 +fi + + if test $have_ssse3_intrinsics = yes; then + USE_SSSE3_TRUE= + USE_SSSE3_FALSE='#' +else + USE_SSSE3_TRUE='#' + USE_SSSE3_FALSE= +fi + + case $host_os in solaris*) # When building 32-bit binaries, apply a mapfile to ensure that the @@ -12837,6 +12959,7 @@ esac + if test -n "`$CC -v 2>&1 | grep version | grep Apple`"; then VMX_CFLAGS="-faltivec" else @@ -13060,8 +13183,8 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext #ifndef __IWMMXT__ #error "IWMMXT not enabled (with -march=iwmmxt)" #endif -#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 5)) -#error "Need GCC >= 4.5 for IWMMXT intrinsics" +#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)) +#error "Need GCC >= 4.8 for IWMMXT intrinsics" #endif #include <mmintrin.h> int main () { @@ -13933,13 +14056,12 @@ fi -if test $ac_cv_tls = none ; then - support_for_pthread_setspecific=no +support_for_pthreads=no - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_setspecific" >&5 -$as_echo_n "checking for pthread_setspecific... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthreads" >&5 +$as_echo_n "checking for pthreads... " >&6; } - if test "z$support_for_pthread_setspecific" != "zyes"; then + if test "z$support_for_pthreads" != "zyes"; then save_CFLAGS="$CFLAGS" save_LDFLAGS="$LDFLAGS" save_LIBS="$LIBS" @@ -14004,7 +14126,7 @@ rm -f core conftest.err conftest.$ac_objext \ PTHREAD_CFLAGS="$CFLAGS" PTHREAD_LIBS="$LIBS" PTHREAD_LDFLAGS="$LDFLAGS" - support_for_pthread_setspecific=yes + support_for_pthreads=yes else : fi @@ -14014,7 +14136,7 @@ rm -f core conftest.err conftest.$ac_objext \ fi - if test "z$support_for_pthread_setspecific" != "zyes"; then + if test "z$support_for_pthreads" != "zyes"; then save_CFLAGS="$CFLAGS" save_LDFLAGS="$LDFLAGS" save_LIBS="$LIBS" @@ -14079,7 +14201,7 @@ rm -f core conftest.err conftest.$ac_objext \ PTHREAD_CFLAGS="$CFLAGS" PTHREAD_LIBS="$LIBS" PTHREAD_LDFLAGS="$LDFLAGS" - support_for_pthread_setspecific=yes + support_for_pthreads=yes else : fi @@ -14089,7 +14211,7 @@ rm -f core conftest.err conftest.$ac_objext \ fi - if test "z$support_for_pthread_setspecific" != "zyes"; then + if test "z$support_for_pthreads" != "zyes"; then save_CFLAGS="$CFLAGS" save_LDFLAGS="$LDFLAGS" save_LIBS="$LIBS" @@ -14154,7 +14276,7 @@ rm -f core conftest.err conftest.$ac_objext \ PTHREAD_CFLAGS="$CFLAGS" PTHREAD_LIBS="$LIBS" PTHREAD_LDFLAGS="$LDFLAGS" - support_for_pthread_setspecific=yes + support_for_pthreads=yes else : fi @@ -14164,7 +14286,7 @@ rm -f core conftest.err conftest.$ac_objext \ fi - if test "z$support_for_pthread_setspecific" != "zyes"; then + if test "z$support_for_pthreads" != "zyes"; then save_CFLAGS="$CFLAGS" save_LDFLAGS="$LDFLAGS" save_LIBS="$LIBS" @@ -14229,7 +14351,7 @@ rm -f core conftest.err conftest.$ac_objext \ PTHREAD_CFLAGS="$CFLAGS" PTHREAD_LIBS="$LIBS" PTHREAD_LDFLAGS="$LDFLAGS" - support_for_pthread_setspecific=yes + support_for_pthreads=yes else : fi @@ -14240,17 +14362,19 @@ rm -f core conftest.err conftest.$ac_objext \ fi - if test $support_for_pthread_setspecific = yes; then - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" +if test $support_for_pthreads = yes; then -$as_echo "#define HAVE_PTHREAD_SETSPECIFIC /**/" >>confdefs.h +$as_echo "#define HAVE_PTHREADS /**/" >>confdefs.h + if test $ac_cv_tls = none ; then + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" fi - - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $support_for_pthread_setspecific" >&5 -$as_echo "$support_for_pthread_setspecific" >&6; }; fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $support_for_pthreads" >&5 +$as_echo "$support_for_pthreads" >&6; } + + @@ -14319,6 +14443,32 @@ fi $as_echo "$support_for_float128" >&6; } +support_for_builtin_clz=no + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clz" >&5 +$as_echo_n "checking for __builtin_clz... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +unsigned int x = 11; int main (void) { return __builtin_clz(x); } + +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + support_for_builtin_clz=yes +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + +if test x$support_for_builtin_clz = xyes; then + +$as_echo "#define HAVE_BUILTIN_CLZ /**/" >>confdefs.h + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $support_for_builtin_clz" >&5 +$as_echo "$support_for_builtin_clz" >&6; } + + # Check whether --enable-libpng was given. if test "${enable_libpng+set}" = set; then : enableval=$enable_libpng; have_libpng=$enableval @@ -14653,6 +14803,10 @@ if test -z "${USE_SSE2_TRUE}" && test -z "${USE_SSE2_FALSE}"; then as_fn_error $? "conditional \"USE_SSE2\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${USE_SSSE3_TRUE}" && test -z "${USE_SSSE3_FALSE}"; then + as_fn_error $? "conditional \"USE_SSSE3\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${USE_VMX_TRUE}" && test -z "${USE_VMX_FALSE}"; then as_fn_error $? "conditional \"USE_VMX\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 @@ -15078,7 +15232,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by pixman $as_me 0.30.2, which was +This file was extended by pixman $as_me 0.32.4, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -15144,7 +15298,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -pixman config.status 0.30.2 +pixman config.status 0.32.4 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/lib/pixman/configure.ac b/lib/pixman/configure.ac index 25c6c5b7d..67d082cca 100644 --- a/lib/pixman/configure.ac +++ b/lib/pixman/configure.ac @@ -53,8 +53,8 @@ AC_PREREQ([2.57]) # m4_define([pixman_major], 0) -m4_define([pixman_minor], 30) -m4_define([pixman_micro], 2) +m4_define([pixman_minor], 32) +m4_define([pixman_micro], 4) m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro]) @@ -183,6 +183,7 @@ AC_SUBST(LT_VERSION_INFO) # Check for dependencies PIXMAN_CHECK_CFLAG([-Wall]) +PIXMAN_CHECK_CFLAG([-Wdeclaration-after-statement]) PIXMAN_CHECK_CFLAG([-fno-strict-aliasing]) dnl ========================================================================= @@ -355,6 +356,12 @@ int main () { : "y" (v), "K" (5) ); + /* Some versions of clang will choke on this */ + asm ("pmulhuw %1, %0\n\t" + : "+y" (w) + : "y" (v) + ); + return _mm_cvtsi64_si32 (v); }]])], have_mmx_intrinsics=yes) CFLAGS=$xserver_save_CFLAGS @@ -437,6 +444,50 @@ fi AM_CONDITIONAL(USE_SSE2, test $have_sse2_intrinsics = yes) dnl =========================================================================== +dnl Check for SSSE3 + +if test "x$SSSE3_CFLAGS" = "x" ; then + SSSE3_CFLAGS="-mssse3 -Winline" +fi + +have_ssse3_intrinsics=no +AC_MSG_CHECKING(whether to use SSSE3 intrinsics) +xserver_save_CFLAGS=$CFLAGS +CFLAGS="$SSSE3_CFLAGS $CFLAGS" + +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ +#include <mmintrin.h> +#include <xmmintrin.h> +#include <emmintrin.h> +#include <tmmintrin.h> +int main () { + __m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c; + c = _mm_maddubs_epi16 (a, b); + return 0; +}]])], have_ssse3_intrinsics=yes) +CFLAGS=$xserver_save_CFLAGS + +AC_ARG_ENABLE(ssse3, + [AC_HELP_STRING([--disable-ssse3], + [disable SSSE3 fast paths])], + [enable_ssse3=$enableval], [enable_ssse3=auto]) + +if test $enable_ssse3 = no ; then + have_ssse3_intrinsics=disabled +fi + +if test $have_ssse3_intrinsics = yes ; then + AC_DEFINE(USE_SSSE3, 1, [use SSSE3 compiler intrinsics]) +fi + +AC_MSG_RESULT($have_ssse3_intrinsics) +if test $enable_ssse3 = yes && test $have_ssse3_intrinsics = no ; then + AC_MSG_ERROR([SSSE3 intrinsics not detected]) +fi + +AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_intrinsics = yes) + +dnl =========================================================================== dnl Other special flags needed when building code using MMX or SSE instructions case $host_os in solaris*) @@ -471,6 +522,7 @@ AC_SUBST(MMX_CFLAGS) AC_SUBST(MMX_LDFLAGS) AC_SUBST(SSE2_CFLAGS) AC_SUBST(SSE2_LDFLAGS) +AC_SUBST(SSSE3_CFLAGS) dnl =========================================================================== dnl Check for VMX/Altivec @@ -631,8 +683,8 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ #ifndef __IWMMXT__ #error "IWMMXT not enabled (with -march=iwmmxt)" #endif -#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 5)) -#error "Need GCC >= 4.5 for IWMMXT intrinsics" +#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)) +#error "Need GCC >= 4.8 for IWMMXT intrinsics" #endif #include <mmintrin.h> int main () { @@ -916,38 +968,39 @@ main () ]])) AC_DEFUN([PIXMAN_CHECK_PTHREAD],[dnl - if test "z$support_for_pthread_setspecific" != "zyes"; then + if test "z$support_for_pthreads" != "zyes"; then PIXMAN_LINK_WITH_ENV( [$1], [pthread_test_program], [PTHREAD_CFLAGS="$CFLAGS" PTHREAD_LIBS="$LIBS" PTHREAD_LDFLAGS="$LDFLAGS" - support_for_pthread_setspecific=yes]) + support_for_pthreads=yes]) fi ]) -if test $ac_cv_tls = none ; then - support_for_pthread_setspecific=no +support_for_pthreads=no - AC_MSG_CHECKING(for pthread_setspecific) +AC_MSG_CHECKING(for pthreads) - PIXMAN_CHECK_PTHREAD([CFLAGS=""; LIBS="-L/usr/X11R6/lib -lpthread-stubs"]) - PIXMAN_CHECK_PTHREAD([CFLAGS="-pthread"; LDFLAGS="-pthread"]) - PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LIBS="-lpthread"]) - PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LDFLAGS="-lroot"]) +PIXMAN_CHECK_PTHREAD([CFLAGS=""; LIBS="-L/usr/X11R6/lib -lpthread-stubs"]) +PIXMAN_CHECK_PTHREAD([CFLAGS="-pthread"; LDFLAGS="-pthread"]) +PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LIBS="-lpthread"]) +PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LDFLAGS="-lroot"]) - if test $support_for_pthread_setspecific = yes; then - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - AC_DEFINE([HAVE_PTHREAD_SETSPECIFIC], [], [Whether pthread_setspecific() is supported]) +if test $support_for_pthreads = yes; then + AC_DEFINE([HAVE_PTHREADS], [], [Whether pthreads is supported]) + if test $ac_cv_tls = none ; then + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" fi - - AC_MSG_RESULT($support_for_pthread_setspecific); fi +AC_MSG_RESULT($support_for_pthreads) + AC_SUBST(TOOLCHAIN_SUPPORTS__THREAD) -AC_SUBST(HAVE_PTHREAD_SETSPECIFIC) +AC_SUBST(HAVE_PTHREADS) AC_SUBST(PTHREAD_LDFLAGS) AC_SUBST(PTHREAD_LIBS) +AC_SUBST(PTHREAD_CFLAGS) dnl ===================================== dnl __attribute__((constructor)) @@ -992,6 +1045,22 @@ fi AC_MSG_RESULT($support_for_float128) +dnl ===================================== +dnl __builtin_clz + +support_for_builtin_clz=no + +AC_MSG_CHECKING(for __builtin_clz) +AC_LINK_IFELSE([AC_LANG_SOURCE([[ +unsigned int x = 11; int main (void) { return __builtin_clz(x); } +]])], support_for_builtin_clz=yes) + +if test x$support_for_builtin_clz = xyes; then + AC_DEFINE([HAVE_BUILTIN_CLZ], [], [Whether the compiler supports __builtin_clz]) +fi + +AC_MSG_RESULT($support_for_builtin_clz) + dnl ================== dnl libpng diff --git a/lib/pixman/demos/Makefile.am b/lib/pixman/demos/Makefile.am index 9be9ab670..e04743d7f 100644 --- a/lib/pixman/demos/Makefile.am +++ b/lib/pixman/demos/Makefile.am @@ -1,3 +1,5 @@ +EXTRA_DIST = parrot.c parrot.jpg scale.ui + if HAVE_GTK AM_CFLAGS = $(OPENMP_CFLAGS) @@ -28,8 +30,6 @@ DEMOS = \ srgb-test \ scale -EXTRA_DIST = parrot.c parrot.jpg scale.ui - gradient_test_SOURCES = gradient-test.c $(GTK_UTILS) alpha_test_SOURCES = alpha-test.c $(GTK_UTILS) composite_test_SOURCES = composite-test.c $(GTK_UTILS) diff --git a/lib/pixman/demos/Makefile.in b/lib/pixman/demos/Makefile.in index 277649371..1b1ce9d7a 100644 --- a/lib/pixman/demos/Makefile.in +++ b/lib/pixman/demos/Makefile.in @@ -337,7 +337,7 @@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ HAVE_LIBPNG = @HAVE_LIBPNG@ -HAVE_PTHREAD_SETSPECIFIC = @HAVE_PTHREAD_SETSPECIFIC@ +HAVE_PTHREADS = @HAVE_PTHREADS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ @@ -383,6 +383,7 @@ PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ PNG_CFLAGS = @PNG_CFLAGS@ PNG_LIBS = @PNG_LIBS@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ PTHREAD_LDFLAGS = @PTHREAD_LDFLAGS@ PTHREAD_LIBS = @PTHREAD_LIBS@ RANLIB = @RANLIB@ @@ -391,6 +392,7 @@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SSE2_CFLAGS = @SSE2_CFLAGS@ SSE2_LDFLAGS = @SSE2_LDFLAGS@ +SSSE3_CFLAGS = @SSSE3_CFLAGS@ STRIP = @STRIP@ TESTPROGS_EXTRA_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR = @TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR@ @@ -449,6 +451,7 @@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ +EXTRA_DIST = parrot.c parrot.jpg scale.ui @HAVE_GTK_TRUE@AM_CFLAGS = $(OPENMP_CFLAGS) @HAVE_GTK_TRUE@AM_LDFLAGS = $(OPENMP_CFLAGS) @HAVE_GTK_TRUE@LDADD = $(top_builddir)/pixman/libpixman-1.la -lm $(GTK_LIBS) $(PNG_LIBS) @@ -475,7 +478,6 @@ top_srcdir = @top_srcdir@ @HAVE_GTK_TRUE@ srgb-test \ @HAVE_GTK_TRUE@ scale -@HAVE_GTK_TRUE@EXTRA_DIST = parrot.c parrot.jpg scale.ui @HAVE_GTK_TRUE@gradient_test_SOURCES = gradient-test.c $(GTK_UTILS) @HAVE_GTK_TRUE@alpha_test_SOURCES = alpha-test.c $(GTK_UTILS) @HAVE_GTK_TRUE@composite_test_SOURCES = composite-test.c $(GTK_UTILS) diff --git a/lib/pixman/demos/scale.c b/lib/pixman/demos/scale.c index 869ada12b..d00307e44 100644 --- a/lib/pixman/demos/scale.c +++ b/lib/pixman/demos/scale.c @@ -103,8 +103,8 @@ compute_extents (pixman_f_transform_t *trans, double *sx, double *sy) typedef struct { - char name [20]; - pixman_kernel_t value; + char name [20]; + int value; } named_int_t; static const named_int_t filters[] = @@ -127,7 +127,7 @@ static const named_int_t repeats[] = { "Pad", PIXMAN_REPEAT_PAD }, }; -static pixman_kernel_t +static int get_value (app_t *app, const named_int_t table[], const char *box_name) { GtkComboBox *box = GTK_COMBO_BOX (get_widget (app, box_name)); diff --git a/lib/pixman/demos/scale.ui b/lib/pixman/demos/scale.ui index b3450d34d..ee985dd1c 100644 --- a/lib/pixman/demos/scale.ui +++ b/lib/pixman/demos/scale.ui @@ -24,7 +24,7 @@ <property name="page_size">10</property> </object> <object class="GtkAdjustment" id="subsample_adjustment"> - <property name="lower">1</property> + <property name="lower">0</property> <property name="upper">12</property> <property name="step_increment">1</property> <property name="page_increment">1</property> diff --git a/lib/pixman/pixman/Makefile.am b/lib/pixman/pixman/Makefile.am index b9ea75424..b376d9aeb 100644 --- a/lib/pixman/pixman/Makefile.am +++ b/lib/pixman/pixman/Makefile.am @@ -52,6 +52,18 @@ libpixman_1_la_LIBADD += libpixman-sse2.la ASM_CFLAGS_sse2=$(SSE2_CFLAGS) endif +# ssse3 code +if USE_SSSE3 +noinst_LTLIBRARIES += libpixman-ssse3.la +libpixman_ssse3_la_SOURCES = \ + pixman-ssse3.c +libpixman_ssse3_la_CFLAGS = $(SSSE3_CFLAGS) +libpixman_1_la_LDFLAGS += $(SSSE3_LDFLAGS) +libpixman_1_la_LIBADD += libpixman-ssse3.la + +ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS) +endif + # arm simd code if USE_ARM_SIMD noinst_LTLIBRARIES += libpixman-arm-simd.la diff --git a/lib/pixman/pixman/Makefile.in b/lib/pixman/pixman/Makefile.in index ec79ebf6d..26c5fbe4f 100644 --- a/lib/pixman/pixman/Makefile.in +++ b/lib/pixman/pixman/Makefile.in @@ -69,24 +69,29 @@ DIST_COMMON = $(libpixmaninclude_HEADERS) $(srcdir)/Makefile.am \ @USE_SSE2_TRUE@am__append_7 = $(SSE2_LDFLAGS) @USE_SSE2_TRUE@am__append_8 = libpixman-sse2.la +# ssse3 code +@USE_SSSE3_TRUE@am__append_9 = libpixman-ssse3.la +@USE_SSSE3_TRUE@am__append_10 = $(SSSE3_LDFLAGS) +@USE_SSSE3_TRUE@am__append_11 = libpixman-ssse3.la + # arm simd code -@USE_ARM_SIMD_TRUE@am__append_9 = libpixman-arm-simd.la -@USE_ARM_SIMD_TRUE@am__append_10 = libpixman-arm-simd.la +@USE_ARM_SIMD_TRUE@am__append_12 = libpixman-arm-simd.la +@USE_ARM_SIMD_TRUE@am__append_13 = libpixman-arm-simd.la # arm neon code -@USE_ARM_NEON_TRUE@am__append_11 = libpixman-arm-neon.la -@USE_ARM_NEON_TRUE@am__append_12 = libpixman-arm-neon.la -@USE_ARM_IWMMXT_TRUE@am__append_13 = libpixman-iwmmxt.la -@USE_ARM_IWMMXT_TRUE@am__append_14 = libpixman-iwmmxt.la +@USE_ARM_NEON_TRUE@am__append_14 = libpixman-arm-neon.la +@USE_ARM_NEON_TRUE@am__append_15 = libpixman-arm-neon.la +@USE_ARM_IWMMXT_TRUE@am__append_16 = libpixman-iwmmxt.la +@USE_ARM_IWMMXT_TRUE@am__append_17 = libpixman-iwmmxt.la # mips dspr2 code -@USE_MIPS_DSPR2_TRUE@am__append_15 = libpixman-mips-dspr2.la -@USE_MIPS_DSPR2_TRUE@am__append_16 = libpixman-mips-dspr2.la +@USE_MIPS_DSPR2_TRUE@am__append_18 = libpixman-mips-dspr2.la +@USE_MIPS_DSPR2_TRUE@am__append_19 = libpixman-mips-dspr2.la # loongson code -@USE_LOONGSON_MMI_TRUE@am__append_17 = libpixman-loongson-mmi.la -@USE_LOONGSON_MMI_TRUE@am__append_18 = $(LS_LDFLAGS) -@USE_LOONGSON_MMI_TRUE@am__append_19 = libpixman-loongson-mmi.la +@USE_LOONGSON_MMI_TRUE@am__append_20 = libpixman-loongson-mmi.la +@USE_LOONGSON_MMI_TRUE@am__append_21 = $(LS_LDFLAGS) +@USE_LOONGSON_MMI_TRUE@am__append_22 = libpixman-loongson-mmi.la subdir = pixman ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/configure.ac @@ -127,8 +132,9 @@ am__installdirs = "$(DESTDIR)$(libdir)" \ "$(DESTDIR)$(libpixmanincludedir)" LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES) libpixman_1_la_DEPENDENCIES = $(am__append_3) $(am__append_5) \ - $(am__append_8) $(am__append_10) $(am__append_12) \ - $(am__append_14) $(am__append_16) $(am__append_19) + $(am__append_8) $(am__append_11) $(am__append_13) \ + $(am__append_15) $(am__append_17) $(am__append_19) \ + $(am__append_22) am__objects_1 = pixman.lo pixman-access.lo pixman-access-accessors.lo \ pixman-bits-image.lo pixman-combine32.lo \ pixman-combine-float.lo pixman-conical-gradient.lo \ @@ -216,6 +222,16 @@ libpixman_sse2_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ $(libpixman_sse2_la_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ -o $@ @USE_SSE2_TRUE@am_libpixman_sse2_la_rpath = +libpixman_ssse3_la_LIBADD = +am__libpixman_ssse3_la_SOURCES_DIST = pixman-ssse3.c +@USE_SSSE3_TRUE@am_libpixman_ssse3_la_OBJECTS = \ +@USE_SSSE3_TRUE@ libpixman_ssse3_la-pixman-ssse3.lo +libpixman_ssse3_la_OBJECTS = $(am_libpixman_ssse3_la_OBJECTS) +libpixman_ssse3_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libpixman_ssse3_la_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +@USE_SSSE3_TRUE@am_libpixman_ssse3_la_rpath = libpixman_vmx_la_LIBADD = am__libpixman_vmx_la_SOURCES_DIST = pixman-vmx.c pixman-combine32.h @USE_VMX_TRUE@am_libpixman_vmx_la_OBJECTS = \ @@ -275,7 +291,8 @@ SOURCES = $(libpixman_1_la_SOURCES) $(libpixman_arm_neon_la_SOURCES) \ $(libpixman_iwmmxt_la_SOURCES) \ $(libpixman_loongson_mmi_la_SOURCES) \ $(libpixman_mips_dspr2_la_SOURCES) $(libpixman_mmx_la_SOURCES) \ - $(libpixman_sse2_la_SOURCES) $(libpixman_vmx_la_SOURCES) + $(libpixman_sse2_la_SOURCES) $(libpixman_ssse3_la_SOURCES) \ + $(libpixman_vmx_la_SOURCES) DIST_SOURCES = $(libpixman_1_la_SOURCES) \ $(am__libpixman_arm_neon_la_SOURCES_DIST) \ $(am__libpixman_arm_simd_la_SOURCES_DIST) \ @@ -284,6 +301,7 @@ DIST_SOURCES = $(libpixman_1_la_SOURCES) \ $(am__libpixman_mips_dspr2_la_SOURCES_DIST) \ $(am__libpixman_mmx_la_SOURCES_DIST) \ $(am__libpixman_sse2_la_SOURCES_DIST) \ + $(am__libpixman_ssse3_la_SOURCES_DIST) \ $(am__libpixman_vmx_la_SOURCES_DIST) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ @@ -326,7 +344,7 @@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ HAVE_LIBPNG = @HAVE_LIBPNG@ -HAVE_PTHREAD_SETSPECIFIC = @HAVE_PTHREAD_SETSPECIFIC@ +HAVE_PTHREADS = @HAVE_PTHREADS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ @@ -372,6 +390,7 @@ PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ PNG_CFLAGS = @PNG_CFLAGS@ PNG_LIBS = @PNG_LIBS@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ PTHREAD_LDFLAGS = @PTHREAD_LDFLAGS@ PTHREAD_LIBS = @PTHREAD_LIBS@ RANLIB = @RANLIB@ @@ -380,6 +399,7 @@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SSE2_CFLAGS = @SSE2_CFLAGS@ SSE2_LDFLAGS = @SSE2_LDFLAGS@ +SSSE3_CFLAGS = @SSSE3_CFLAGS@ STRIP = @STRIP@ TESTPROGS_EXTRA_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR = @TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR@ @@ -484,17 +504,17 @@ libpixman_headers = \ lib_LTLIBRARIES = libpixman-1.la libpixman_1_la_LDFLAGS = -version-info $(LT_VERSION_INFO) \ -no-undefined @PTHREAD_LDFLAGS@ $(am__append_2) \ - $(am__append_7) $(am__append_18) + $(am__append_7) $(am__append_10) $(am__append_21) libpixman_1_la_LIBADD = @PTHREAD_LIBS@ -lm $(am__append_3) \ - $(am__append_5) $(am__append_8) $(am__append_10) \ - $(am__append_12) $(am__append_14) $(am__append_16) \ - $(am__append_19) + $(am__append_5) $(am__append_8) $(am__append_11) \ + $(am__append_13) $(am__append_15) $(am__append_17) \ + $(am__append_19) $(am__append_22) libpixman_1_la_SOURCES = $(libpixman_sources) $(libpixman_headers) libpixmanincludedir = $(includedir)/pixman-1 libpixmaninclude_HEADERS = pixman.h pixman-version.h noinst_LTLIBRARIES = $(am__append_1) $(am__append_4) $(am__append_6) \ - $(am__append_9) $(am__append_11) $(am__append_13) \ - $(am__append_15) $(am__append_17) + $(am__append_9) $(am__append_12) $(am__append_14) \ + $(am__append_16) $(am__append_18) $(am__append_20) EXTRA_DIST = \ Makefile.win32 \ pixman-region.c \ @@ -517,6 +537,11 @@ EXTRA_DIST = \ @USE_SSE2_TRUE@libpixman_sse2_la_CFLAGS = $(SSE2_CFLAGS) @USE_SSE2_TRUE@ASM_CFLAGS_sse2 = $(SSE2_CFLAGS) +@USE_SSSE3_TRUE@libpixman_ssse3_la_SOURCES = \ +@USE_SSSE3_TRUE@ pixman-ssse3.c + +@USE_SSSE3_TRUE@libpixman_ssse3_la_CFLAGS = $(SSSE3_CFLAGS) +@USE_SSSE3_TRUE@ASM_CFLAGS_ssse3 = $(SSSE3_CFLAGS) @USE_ARM_SIMD_TRUE@libpixman_arm_simd_la_SOURCES = \ @USE_ARM_SIMD_TRUE@ pixman-arm-simd.c \ @USE_ARM_SIMD_TRUE@ pixman-arm-common.h \ @@ -650,6 +675,8 @@ libpixman-mmx.la: $(libpixman_mmx_la_OBJECTS) $(libpixman_mmx_la_DEPENDENCIES) $ $(AM_V_CCLD)$(libpixman_mmx_la_LINK) $(am_libpixman_mmx_la_rpath) $(libpixman_mmx_la_OBJECTS) $(libpixman_mmx_la_LIBADD) $(LIBS) libpixman-sse2.la: $(libpixman_sse2_la_OBJECTS) $(libpixman_sse2_la_DEPENDENCIES) $(EXTRA_libpixman_sse2_la_DEPENDENCIES) $(AM_V_CCLD)$(libpixman_sse2_la_LINK) $(am_libpixman_sse2_la_rpath) $(libpixman_sse2_la_OBJECTS) $(libpixman_sse2_la_LIBADD) $(LIBS) +libpixman-ssse3.la: $(libpixman_ssse3_la_OBJECTS) $(libpixman_ssse3_la_DEPENDENCIES) $(EXTRA_libpixman_ssse3_la_DEPENDENCIES) + $(AM_V_CCLD)$(libpixman_ssse3_la_LINK) $(am_libpixman_ssse3_la_rpath) $(libpixman_ssse3_la_OBJECTS) $(libpixman_ssse3_la_LIBADD) $(LIBS) libpixman-vmx.la: $(libpixman_vmx_la_OBJECTS) $(libpixman_vmx_la_DEPENDENCIES) $(EXTRA_libpixman_vmx_la_DEPENDENCIES) $(AM_V_CCLD)$(libpixman_vmx_la_LINK) $(am_libpixman_vmx_la_rpath) $(libpixman_vmx_la_OBJECTS) $(libpixman_vmx_la_LIBADD) $(LIBS) @@ -662,6 +689,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_loongson_mmi_la-pixman-mmx.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_mmx_la-pixman-mmx.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_sse2_la-pixman-sse2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_ssse3_la-pixman-ssse3.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_vmx_la-pixman-vmx.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-access-accessors.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-access.Plo@am__quote@ @@ -767,6 +795,13 @@ libpixman_sse2_la-pixman-sse2.lo: pixman-sse2.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_sse2_la_CFLAGS) $(CFLAGS) -c -o libpixman_sse2_la-pixman-sse2.lo `test -f 'pixman-sse2.c' || echo '$(srcdir)/'`pixman-sse2.c +libpixman_ssse3_la-pixman-ssse3.lo: pixman-ssse3.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_ssse3_la_CFLAGS) $(CFLAGS) -MT libpixman_ssse3_la-pixman-ssse3.lo -MD -MP -MF $(DEPDIR)/libpixman_ssse3_la-pixman-ssse3.Tpo -c -o libpixman_ssse3_la-pixman-ssse3.lo `test -f 'pixman-ssse3.c' || echo '$(srcdir)/'`pixman-ssse3.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libpixman_ssse3_la-pixman-ssse3.Tpo $(DEPDIR)/libpixman_ssse3_la-pixman-ssse3.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pixman-ssse3.c' object='libpixman_ssse3_la-pixman-ssse3.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_ssse3_la_CFLAGS) $(CFLAGS) -c -o libpixman_ssse3_la-pixman-ssse3.lo `test -f 'pixman-ssse3.c' || echo '$(srcdir)/'`pixman-ssse3.c + libpixman_vmx_la-pixman-vmx.lo: pixman-vmx.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_vmx_la_CFLAGS) $(CFLAGS) -MT libpixman_vmx_la-pixman-vmx.lo -MD -MP -MF $(DEPDIR)/libpixman_vmx_la-pixman-vmx.Tpo -c -o libpixman_vmx_la-pixman-vmx.lo `test -f 'pixman-vmx.c' || echo '$(srcdir)/'`pixman-vmx.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libpixman_vmx_la-pixman-vmx.Tpo $(DEPDIR)/libpixman_vmx_la-pixman-vmx.Plo diff --git a/lib/pixman/pixman/Makefile.win32 b/lib/pixman/pixman/Makefile.win32 index 57ed7a5dc..7b64033bc 100644 --- a/lib/pixman/pixman/Makefile.win32 +++ b/lib/pixman/pixman/Makefile.win32 @@ -14,8 +14,14 @@ ifeq ($(SSE2_VAR),) SSE2_VAR=on endif +SSSE3_VAR = $(SSSE3) +ifeq ($(SSSE3_VAR),) +SSSE3_VAR=on +endif + MMX_CFLAGS = -DUSE_X86_MMX -w14710 -w14714 SSE2_CFLAGS = -DUSE_SSE2 +SSSE3_CFLAGS = -DUSE_SSSE3 # MMX compilation flags ifeq ($(MMX_VAR),on) @@ -29,10 +35,16 @@ PIXMAN_CFLAGS += $(SSE2_CFLAGS) libpixman_sources += pixman-sse2.c endif +# SSSE3 compilation flags +ifeq ($(SSSE3_VAR),on) +PIXMAN_CFLAGS += $(SSSE3_CFLAGS) +libpixman_sources += pixman-ssse3.c +endif + OBJECTS = $(patsubst %.c, $(CFG_VAR)/%.obj, $(libpixman_sources)) # targets -all: inform informMMX informSSE2 $(CFG_VAR)/$(LIBRARY).lib +all: inform informMMX informSSE2 informSSSE3 $(CFG_VAR)/$(LIBRARY).lib informMMX: ifneq ($(MMX),off) @@ -60,9 +72,22 @@ endif endif endif +informSSSE3: +ifneq ($(SSSE3),off) +ifneq ($(SSSE3),on) +ifneq ($(SSSE3),) + @echo "Invalid specified SSE option : "$(SSSE3)"." + @echo + @echo "Possible choices for SSSE3 are 'on' or 'off'" + @exit 1 +endif + @echo "Setting SSSE3 flag to default value 'on'... (use SSSE3=on or SSSE3=off)" +endif +endif + # pixman linking $(CFG_VAR)/$(LIBRARY).lib: $(OBJECTS) @$(AR) $(PIXMAN_ARFLAGS) -OUT:$@ $^ -.PHONY: all informMMX informSSE2 +.PHONY: all informMMX informSSE2 informSSSE3 diff --git a/lib/pixman/pixman/pixman-access.c b/lib/pixman/pixman/pixman-access.c index b5c8e4017..4f0642d77 100644 --- a/lib/pixman/pixman/pixman-access.c +++ b/lib/pixman/pixman/pixman-access.c @@ -294,14 +294,14 @@ convert_pixel (pixman_format_code_t from, pixman_format_code_t to, uint32_t pixe } static force_inline uint32_t -convert_pixel_to_a8r8g8b8 (pixman_image_t *image, +convert_pixel_to_a8r8g8b8 (bits_image_t *image, pixman_format_code_t format, uint32_t pixel) { if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY || PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR) { - return image->bits.indexed->rgba[pixel]; + return image->indexed->rgba[pixel]; } else { @@ -332,7 +332,7 @@ convert_pixel_from_a8r8g8b8 (pixman_image_t *image, } static force_inline uint32_t -fetch_and_convert_pixel (pixman_image_t * image, +fetch_and_convert_pixel (bits_image_t * image, const uint8_t * bits, int offset, pixman_format_code_t format) @@ -417,7 +417,7 @@ convert_and_store_pixel (bits_image_t * image, #define MAKE_ACCESSORS(format) \ static void \ - fetch_scanline_ ## format (pixman_image_t *image, \ + fetch_scanline_ ## format (bits_image_t *image, \ int x, \ int y, \ int width, \ @@ -425,7 +425,7 @@ convert_and_store_pixel (bits_image_t * image, const uint32_t *mask) \ { \ uint8_t *bits = \ - (uint8_t *)(image->bits.bits + y * image->bits.rowstride); \ + (uint8_t *)(image->bits + y * image->rowstride); \ int i; \ \ for (i = 0; i < width; ++i) \ @@ -461,8 +461,8 @@ convert_and_store_pixel (bits_image_t * image, uint8_t *bits = \ (uint8_t *)(image->bits + line * image->rowstride); \ \ - return fetch_and_convert_pixel ((pixman_image_t *)image, \ - bits, offset, PIXMAN_ ## format); \ + return fetch_and_convert_pixel ( \ + image, bits, offset, PIXMAN_ ## format); \ } \ \ static const void *const __dummy__ ## format @@ -583,14 +583,14 @@ to_srgb (float f) } static void -fetch_scanline_a8r8g8b8_sRGB_float (pixman_image_t *image, +fetch_scanline_a8r8g8b8_sRGB_float (bits_image_t * image, int x, int y, int width, uint32_t * b, const uint32_t *mask) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *bits = image->bits + y * image->rowstride; const uint32_t *pixel = bits + x; const uint32_t *end = pixel + width; argb_t *buffer = (argb_t *)b; @@ -612,14 +612,14 @@ fetch_scanline_a8r8g8b8_sRGB_float (pixman_image_t *image, /* Expects a float buffer */ static void -fetch_scanline_a2r10g10b10_float (pixman_image_t *image, +fetch_scanline_a2r10g10b10_float (bits_image_t * image, int x, int y, int width, uint32_t * b, const uint32_t *mask) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *bits = image->bits + y * image->rowstride; const uint32_t *pixel = bits + x; const uint32_t *end = pixel + width; argb_t *buffer = (argb_t *)b; @@ -643,14 +643,14 @@ fetch_scanline_a2r10g10b10_float (pixman_image_t *image, /* Expects a float buffer */ static void -fetch_scanline_x2r10g10b10_float (pixman_image_t *image, +fetch_scanline_x2r10g10b10_float (bits_image_t *image, int x, int y, int width, uint32_t * b, const uint32_t *mask) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *bits = image->bits + y * image->rowstride; const uint32_t *pixel = (uint32_t *)bits + x; const uint32_t *end = pixel + width; argb_t *buffer = (argb_t *)b; @@ -673,14 +673,14 @@ fetch_scanline_x2r10g10b10_float (pixman_image_t *image, /* Expects a float buffer */ static void -fetch_scanline_a2b10g10r10_float (pixman_image_t *image, +fetch_scanline_a2b10g10r10_float (bits_image_t *image, int x, int y, int width, uint32_t * b, const uint32_t *mask) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *bits = image->bits + y * image->rowstride; const uint32_t *pixel = bits + x; const uint32_t *end = pixel + width; argb_t *buffer = (argb_t *)b; @@ -704,14 +704,14 @@ fetch_scanline_a2b10g10r10_float (pixman_image_t *image, /* Expects a float buffer */ static void -fetch_scanline_x2b10g10r10_float (pixman_image_t *image, +fetch_scanline_x2b10g10r10_float (bits_image_t *image, int x, int y, int width, uint32_t * b, const uint32_t *mask) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *bits = image->bits + y * image->rowstride; const uint32_t *pixel = (uint32_t *)bits + x; const uint32_t *end = pixel + width; argb_t *buffer = (argb_t *)b; @@ -733,14 +733,14 @@ fetch_scanline_x2b10g10r10_float (pixman_image_t *image, } static void -fetch_scanline_yuy2 (pixman_image_t *image, +fetch_scanline_yuy2 (bits_image_t *image, int x, int line, int width, uint32_t * buffer, const uint32_t *mask) { - const uint32_t *bits = image->bits.bits + image->bits.rowstride * line; + const uint32_t *bits = image->bits + image->rowstride * line; int i; for (i = 0; i < width; i++) @@ -767,7 +767,7 @@ fetch_scanline_yuy2 (pixman_image_t *image, } static void -fetch_scanline_yv12 (pixman_image_t *image, +fetch_scanline_yv12 (bits_image_t *image, int x, int line, int width, @@ -1121,30 +1121,30 @@ store_scanline_generic_float (bits_image_t * image, } static void -fetch_scanline_generic_float (pixman_image_t *image, +fetch_scanline_generic_float (bits_image_t * image, int x, int y, int width, uint32_t * buffer, const uint32_t *mask) { - image->bits.fetch_scanline_32 (image, x, y, width, buffer, NULL); + image->fetch_scanline_32 (image, x, y, width, buffer, NULL); - pixman_expand_to_float ((argb_t *)buffer, buffer, image->bits.format, width); + pixman_expand_to_float ((argb_t *)buffer, buffer, image->format, width); } /* The 32_sRGB paths should be deleted after narrow processing * is no longer invoked for formats that are considered wide. * (Also see fetch_pixel_generic_lossy_32) */ static void -fetch_scanline_a8r8g8b8_32_sRGB (pixman_image_t *image, +fetch_scanline_a8r8g8b8_32_sRGB (bits_image_t *image, int x, int y, int width, uint32_t *buffer, const uint32_t *mask) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *bits = image->bits + y * image->rowstride; const uint32_t *pixel = (uint32_t *)bits + x; const uint32_t *end = pixel + width; uint32_t tmp; diff --git a/lib/pixman/pixman/pixman-bits-image.c b/lib/pixman/pixman/pixman-bits-image.c index 75a39a115..f9121a365 100644 --- a/lib/pixman/pixman/pixman-bits-image.c +++ b/lib/pixman/pixman/pixman-bits-image.c @@ -137,221 +137,6 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image, return bilinear_interpolation (tl, tr, bl, br, distx, disty); } -static uint32_t * -bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter, - const uint32_t *mask) -{ - - pixman_image_t * ima = iter->image; - int offset = iter->x; - int line = iter->y++; - int width = iter->width; - uint32_t * buffer = iter->buffer; - - bits_image_t *bits = &ima->bits; - pixman_fixed_t x_top, x_bottom, x; - pixman_fixed_t ux_top, ux_bottom, ux; - pixman_vector_t v; - uint32_t top_mask, bottom_mask; - uint32_t *top_row; - uint32_t *bottom_row; - uint32_t *end; - uint32_t zero[2] = { 0, 0 }; - uint32_t one = 1; - int y, y1, y2; - int disty; - int mask_inc; - int w; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (bits->common.transform, &v)) - return iter->buffer; - - ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0]; - x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2; - - y = v.vector[1] - pixman_fixed_1/2; - disty = pixman_fixed_to_bilinear_weight (y); - - /* Load the pointers to the first and second lines from the source - * image that bilinear code must read. - * - * The main trick in this code is about the check if any line are - * outside of the image; - * - * When I realize that a line (any one) is outside, I change - * the pointer to a dummy area with zeros. Once I change this, I - * must be sure the pointer will not change, so I set the - * variables to each pointer increments inside the loop. - */ - y1 = pixman_fixed_to_int (y); - y2 = y1 + 1; - - if (y1 < 0 || y1 >= bits->height) - { - top_row = zero; - x_top = 0; - ux_top = 0; - } - else - { - top_row = bits->bits + y1 * bits->rowstride; - x_top = x; - ux_top = ux; - } - - if (y2 < 0 || y2 >= bits->height) - { - bottom_row = zero; - x_bottom = 0; - ux_bottom = 0; - } - else - { - bottom_row = bits->bits + y2 * bits->rowstride; - x_bottom = x; - ux_bottom = ux; - } - - /* Instead of checking whether the operation uses the mast in - * each loop iteration, verify this only once and prepare the - * variables to make the code smaller inside the loop. - */ - if (!mask) - { - mask_inc = 0; - mask = &one; - } - else - { - /* If have a mask, prepare the variables to check it */ - mask_inc = 1; - } - - /* If both are zero, then the whole thing is zero */ - if (top_row == zero && bottom_row == zero) - { - memset (buffer, 0, width * sizeof (uint32_t)); - return iter->buffer; - } - else if (bits->format == PIXMAN_x8r8g8b8) - { - if (top_row == zero) - { - top_mask = 0; - bottom_mask = 0xff000000; - } - else if (bottom_row == zero) - { - top_mask = 0xff000000; - bottom_mask = 0; - } - else - { - top_mask = 0xff000000; - bottom_mask = 0xff000000; - } - } - else - { - top_mask = 0; - bottom_mask = 0; - } - - end = buffer + width; - - /* Zero fill to the left of the image */ - while (buffer < end && x < pixman_fixed_minus_1) - { - *buffer++ = 0; - x += ux; - x_top += ux_top; - x_bottom += ux_bottom; - mask += mask_inc; - } - - /* Left edge - */ - while (buffer < end && x < 0) - { - uint32_t tr, br; - int32_t distx; - - tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask; - br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; - - distx = pixman_fixed_to_bilinear_weight (x); - - *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty); - - x += ux; - x_top += ux_top; - x_bottom += ux_bottom; - mask += mask_inc; - } - - /* Main part */ - w = pixman_int_to_fixed (bits->width - 1); - - while (buffer < end && x < w) - { - if (*mask) - { - uint32_t tl, tr, bl, br; - int32_t distx; - - tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; - tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask; - bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; - br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; - - distx = pixman_fixed_to_bilinear_weight (x); - - *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty); - } - - buffer++; - x += ux; - x_top += ux_top; - x_bottom += ux_bottom; - mask += mask_inc; - } - - /* Right Edge */ - w = pixman_int_to_fixed (bits->width); - while (buffer < end && x < w) - { - if (*mask) - { - uint32_t tl, bl; - int32_t distx; - - tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; - bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; - - distx = pixman_fixed_to_bilinear_weight (x); - - *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty); - } - - buffer++; - x += ux; - x_top += ux_top; - x_bottom += ux_bottom; - mask += mask_inc; - } - - /* Zero fill to the left of the image */ - while (buffer < end) - *buffer++ = 0; - - return iter->buffer; -} - static force_inline uint32_t bits_image_fetch_pixel_convolution (bits_image_t *image, pixman_fixed_t x, @@ -720,472 +505,6 @@ bits_image_fetch_general (pixman_iter_t *iter, return buffer; } -typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x); - -static force_inline void -bits_image_fetch_separable_convolution_affine (pixman_image_t * image, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask, - - convert_pixel_t convert_pixel, - pixman_format_code_t format, - pixman_repeat_t repeat_mode) -{ - bits_image_t *bits = &image->bits; - pixman_fixed_t *params = image->common.filter_params; - int cwidth = pixman_fixed_to_int (params[0]); - int cheight = pixman_fixed_to_int (params[1]); - int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1; - int y_off = ((cheight << 16) - pixman_fixed_1) >> 1; - int x_phase_bits = pixman_fixed_to_int (params[2]); - int y_phase_bits = pixman_fixed_to_int (params[3]); - int x_phase_shift = 16 - x_phase_bits; - int y_phase_shift = 16 - y_phase_bits; - pixman_fixed_t vx, vy; - pixman_fixed_t ux, uy; - pixman_vector_t v; - int k; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (image->common.transform, &v)) - return; - - ux = image->common.transform->matrix[0][0]; - uy = image->common.transform->matrix[1][0]; - - vx = v.vector[0]; - vy = v.vector[1]; - - for (k = 0; k < width; ++k) - { - pixman_fixed_t *y_params; - int satot, srtot, sgtot, sbtot; - pixman_fixed_t x, y; - int32_t x1, x2, y1, y2; - int32_t px, py; - int i, j; - - if (mask && !mask[k]) - goto next; - - /* Round x and y to the middle of the closest phase before continuing. This - * ensures that the convolution matrix is aligned right, since it was - * positioned relative to a particular phase (and not relative to whatever - * exact fraction we happen to get here). - */ - x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1); - y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1); - - px = (x & 0xffff) >> x_phase_shift; - py = (y & 0xffff) >> y_phase_shift; - - x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); - y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); - x2 = x1 + cwidth; - y2 = y1 + cheight; - - satot = srtot = sgtot = sbtot = 0; - - y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight; - - for (i = y1; i < y2; ++i) - { - pixman_fixed_t fy = *y_params++; - - if (fy) - { - pixman_fixed_t *x_params = params + 4 + px * cwidth; - - for (j = x1; j < x2; ++j) - { - pixman_fixed_t fx = *x_params++; - int rx = j; - int ry = i; - - if (fx) - { - pixman_fixed_t f; - uint32_t pixel, mask; - uint8_t *row; - - mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - repeat (repeat_mode, &rx, bits->width); - repeat (repeat_mode, &ry, bits->height); - - row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry; - pixel = convert_pixel (row, rx) | mask; - } - else - { - if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height) - { - pixel = 0; - } - else - { - row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry; - pixel = convert_pixel (row, rx) | mask; - } - } - - f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16; - srtot += (int)RED_8 (pixel) * f; - sgtot += (int)GREEN_8 (pixel) * f; - sbtot += (int)BLUE_8 (pixel) * f; - satot += (int)ALPHA_8 (pixel) * f; - } - } - } - } - - satot = (satot + 0x8000) >> 16; - srtot = (srtot + 0x8000) >> 16; - sgtot = (sgtot + 0x8000) >> 16; - sbtot = (sbtot + 0x8000) >> 16; - - satot = CLIP (satot, 0, 0xff); - srtot = CLIP (srtot, 0, 0xff); - sgtot = CLIP (sgtot, 0, 0xff); - sbtot = CLIP (sbtot, 0, 0xff); - - buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0); - - next: - vx += ux; - vy += uy; - } -} - -static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - -static force_inline void -bits_image_fetch_bilinear_affine (pixman_image_t * image, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask, - - convert_pixel_t convert_pixel, - pixman_format_code_t format, - pixman_repeat_t repeat_mode) -{ - pixman_fixed_t x, y; - pixman_fixed_t ux, uy; - pixman_vector_t v; - bits_image_t *bits = &image->bits; - int i; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (image->common.transform, &v)) - return; - - ux = image->common.transform->matrix[0][0]; - uy = image->common.transform->matrix[1][0]; - - x = v.vector[0]; - y = v.vector[1]; - - for (i = 0; i < width; ++i) - { - int x1, y1, x2, y2; - uint32_t tl, tr, bl, br; - int32_t distx, disty; - int width = image->bits.width; - int height = image->bits.height; - const uint8_t *row1; - const uint8_t *row2; - - if (mask && !mask[i]) - goto next; - - x1 = x - pixman_fixed_1 / 2; - y1 = y - pixman_fixed_1 / 2; - - distx = pixman_fixed_to_bilinear_weight (x1); - disty = pixman_fixed_to_bilinear_weight (y1); - - y1 = pixman_fixed_to_int (y1); - y2 = y1 + 1; - x1 = pixman_fixed_to_int (x1); - x2 = x1 + 1; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - uint32_t mask; - - mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - - repeat (repeat_mode, &x1, width); - repeat (repeat_mode, &y1, height); - repeat (repeat_mode, &x2, width); - repeat (repeat_mode, &y2, height); - - row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1; - row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2; - - tl = convert_pixel (row1, x1) | mask; - tr = convert_pixel (row1, x2) | mask; - bl = convert_pixel (row2, x1) | mask; - br = convert_pixel (row2, x2) | mask; - } - else - { - uint32_t mask1, mask2; - int bpp; - - /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value, - * which means if you use it in expressions, those - * expressions become unsigned themselves. Since - * the variables below can be negative in some cases, - * that will lead to crashes on 64 bit architectures. - * - * So this line makes sure bpp is signed - */ - bpp = PIXMAN_FORMAT_BPP (format); - - if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0) - { - buffer[i] = 0; - goto next; - } - - if (y2 == 0) - { - row1 = zero; - mask1 = 0; - } - else - { - row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1; - row1 += bpp / 8 * x1; - - mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - } - - if (y1 == height - 1) - { - row2 = zero; - mask2 = 0; - } - else - { - row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2; - row2 += bpp / 8 * x1; - - mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - } - - if (x2 == 0) - { - tl = 0; - bl = 0; - } - else - { - tl = convert_pixel (row1, 0) | mask1; - bl = convert_pixel (row2, 0) | mask2; - } - - if (x1 == width - 1) - { - tr = 0; - br = 0; - } - else - { - tr = convert_pixel (row1, 1) | mask1; - br = convert_pixel (row2, 1) | mask2; - } - } - - buffer[i] = bilinear_interpolation ( - tl, tr, bl, br, distx, disty); - - next: - x += ux; - y += uy; - } -} - -static force_inline void -bits_image_fetch_nearest_affine (pixman_image_t * image, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask, - - convert_pixel_t convert_pixel, - pixman_format_code_t format, - pixman_repeat_t repeat_mode) -{ - pixman_fixed_t x, y; - pixman_fixed_t ux, uy; - pixman_vector_t v; - bits_image_t *bits = &image->bits; - int i; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (image->common.transform, &v)) - return; - - ux = image->common.transform->matrix[0][0]; - uy = image->common.transform->matrix[1][0]; - - x = v.vector[0]; - y = v.vector[1]; - - for (i = 0; i < width; ++i) - { - int width, height, x0, y0; - const uint8_t *row; - - if (mask && !mask[i]) - goto next; - - width = image->bits.width; - height = image->bits.height; - x0 = pixman_fixed_to_int (x - pixman_fixed_e); - y0 = pixman_fixed_to_int (y - pixman_fixed_e); - - if (repeat_mode == PIXMAN_REPEAT_NONE && - (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width)) - { - buffer[i] = 0; - } - else - { - uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - repeat (repeat_mode, &x0, width); - repeat (repeat_mode, &y0, height); - } - - row = (uint8_t *)bits->bits + bits->rowstride * 4 * y0; - - buffer[i] = convert_pixel (row, x0) | mask; - } - - next: - x += ux; - y += uy; - } -} - -static force_inline uint32_t -convert_a8r8g8b8 (const uint8_t *row, int x) -{ - return *(((uint32_t *)row) + x); -} - -static force_inline uint32_t -convert_x8r8g8b8 (const uint8_t *row, int x) -{ - return *(((uint32_t *)row) + x); -} - -static force_inline uint32_t -convert_a8 (const uint8_t *row, int x) -{ - return *(row + x) << 24; -} - -static force_inline uint32_t -convert_r5g6b5 (const uint8_t *row, int x) -{ - return convert_0565_to_0888 (*((uint16_t *)row + x)); -} - -#define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode) \ - static uint32_t * \ - bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t *iter, \ - const uint32_t * mask) \ - { \ - bits_image_fetch_separable_convolution_affine ( \ - iter->image, \ - iter->x, iter->y++, \ - iter->width, \ - iter->buffer, mask, \ - convert_ ## format, \ - PIXMAN_ ## format, \ - repeat_mode); \ - \ - return iter->buffer; \ - } - -#define MAKE_BILINEAR_FETCHER(name, format, repeat_mode) \ - static uint32_t * \ - bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t *iter, \ - const uint32_t * mask) \ - { \ - bits_image_fetch_bilinear_affine (iter->image, \ - iter->x, iter->y++, \ - iter->width, \ - iter->buffer, mask, \ - convert_ ## format, \ - PIXMAN_ ## format, \ - repeat_mode); \ - return iter->buffer; \ - } - -#define MAKE_NEAREST_FETCHER(name, format, repeat_mode) \ - static uint32_t * \ - bits_image_fetch_nearest_affine_ ## name (pixman_iter_t *iter, \ - const uint32_t * mask) \ - { \ - bits_image_fetch_nearest_affine (iter->image, \ - iter->x, iter->y++, \ - iter->width, \ - iter->buffer, mask, \ - convert_ ## format, \ - PIXMAN_ ## format, \ - repeat_mode); \ - return iter->buffer; \ - } - -#define MAKE_FETCHERS(name, format, repeat_mode) \ - MAKE_NEAREST_FETCHER (name, format, repeat_mode) \ - MAKE_BILINEAR_FETCHER (name, format, repeat_mode) \ - MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode) - -MAKE_FETCHERS (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD) -MAKE_FETCHERS (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE) -MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT) -MAKE_FETCHERS (normal_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NORMAL) -MAKE_FETCHERS (pad_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_PAD) -MAKE_FETCHERS (none_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NONE) -MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT) -MAKE_FETCHERS (normal_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NORMAL) -MAKE_FETCHERS (pad_a8, a8, PIXMAN_REPEAT_PAD) -MAKE_FETCHERS (none_a8, a8, PIXMAN_REPEAT_NONE) -MAKE_FETCHERS (reflect_a8, a8, PIXMAN_REPEAT_REFLECT) -MAKE_FETCHERS (normal_a8, a8, PIXMAN_REPEAT_NORMAL) -MAKE_FETCHERS (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD) -MAKE_FETCHERS (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE) -MAKE_FETCHERS (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT) -MAKE_FETCHERS (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL) - static void replicate_pixel_32 (bits_image_t * bits, int x, @@ -1253,9 +572,9 @@ bits_image_fetch_untransformed_repeat_none (bits_image_t *image, w = MIN (width, image->width - x); if (wide) - image->fetch_scanline_float ((pixman_image_t *)image, x, y, w, buffer, NULL); + image->fetch_scanline_float (image, x, y, w, buffer, NULL); else - image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL); + image->fetch_scanline_32 (image, x, y, w, buffer, NULL); width -= w; buffer += w * (wide? 4 : 1); @@ -1301,9 +620,9 @@ bits_image_fetch_untransformed_repeat_normal (bits_image_t *image, w = MIN (width, image->width - x); if (wide) - image->fetch_scanline_float ((pixman_image_t *)image, x, y, w, buffer, NULL); + image->fetch_scanline_float (image, x, y, w, buffer, NULL); else - image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL); + image->fetch_scanline_32 (image, x, y, w, buffer, NULL); buffer += w * (wide? 4 : 1); x += w; @@ -1381,92 +700,6 @@ static const fetcher_info_t fetcher_info[] = bits_image_fetch_untransformed_float }, -#define FAST_BILINEAR_FLAGS \ - (FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_HAS_TRANSFORM | \ - FAST_PATH_AFFINE_TRANSFORM | \ - FAST_PATH_X_UNIT_POSITIVE | \ - FAST_PATH_Y_UNIT_ZERO | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_BILINEAR_FILTER) - - { PIXMAN_a8r8g8b8, - FAST_BILINEAR_FLAGS, - bits_image_fetch_bilinear_no_repeat_8888, - _pixman_image_get_scanline_generic_float - }, - - { PIXMAN_x8r8g8b8, - FAST_BILINEAR_FLAGS, - bits_image_fetch_bilinear_no_repeat_8888, - _pixman_image_get_scanline_generic_float - }, - -#define GENERAL_BILINEAR_FLAGS \ - (FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_HAS_TRANSFORM | \ - FAST_PATH_AFFINE_TRANSFORM | \ - FAST_PATH_BILINEAR_FILTER) - -#define GENERAL_NEAREST_FLAGS \ - (FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_HAS_TRANSFORM | \ - FAST_PATH_AFFINE_TRANSFORM | \ - FAST_PATH_NEAREST_FILTER) - -#define GENERAL_SEPARABLE_CONVOLUTION_FLAGS \ - (FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_HAS_TRANSFORM | \ - FAST_PATH_AFFINE_TRANSFORM | \ - FAST_PATH_SEPARABLE_CONVOLUTION_FILTER) - -#define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \ - { PIXMAN_ ## format, \ - GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ - bits_image_fetch_separable_convolution_affine_ ## name, \ - _pixman_image_get_scanline_generic_float \ - }, - -#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ - { PIXMAN_ ## format, \ - GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ - bits_image_fetch_bilinear_affine_ ## name, \ - _pixman_image_get_scanline_generic_float \ - }, - -#define NEAREST_AFFINE_FAST_PATH(name, format, repeat) \ - { PIXMAN_ ## format, \ - GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ - bits_image_fetch_nearest_affine_ ## name, \ - _pixman_image_get_scanline_generic_float \ - }, - -#define AFFINE_FAST_PATHS(name, format, repeat) \ - SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \ - BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ - NEAREST_AFFINE_FAST_PATH(name, format, repeat) - - AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD) - AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE) - AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT) - AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL) - AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD) - AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE) - AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT) - AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL) - AFFINE_FAST_PATHS (pad_a8, a8, PAD) - AFFINE_FAST_PATHS (none_a8, a8, NONE) - AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT) - AFFINE_FAST_PATHS (normal_a8, a8, NORMAL) - AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD) - AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE) - AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT) - AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL) - /* Affine, no alpha */ { PIXMAN_any, (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM), @@ -1528,7 +761,7 @@ dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) int width = iter->width; uint32_t * buffer = iter->buffer; - image->bits.fetch_scanline_32 (image, x, y, width, buffer, mask); + image->bits.fetch_scanline_32 (&image->bits, x, y, width, buffer, mask); if (image->common.alpha_map) { uint32_t *alpha; @@ -1541,8 +774,7 @@ dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask) y -= image->common.alpha_origin_y; image->common.alpha_map->fetch_scanline_32 ( - (pixman_image_t *)image->common.alpha_map, - x, y, width, alpha, mask); + image->common.alpha_map, x, y, width, alpha, mask); for (i = 0; i < width; ++i) { @@ -1567,7 +799,7 @@ dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) argb_t * buffer = (argb_t *)iter->buffer; image->fetch_scanline_float ( - (pixman_image_t *)image, x, y, width, (uint32_t *)buffer, mask); + image, x, y, width, (uint32_t *)buffer, mask); if (image->common.alpha_map) { argb_t *alpha; @@ -1580,8 +812,7 @@ dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) y -= image->common.alpha_origin_y; image->common.alpha_map->fetch_scanline_float ( - (pixman_image_t *)image->common.alpha_map, - x, y, width, (uint32_t *)alpha, mask); + image->common.alpha_map, x, y, width, (uint32_t *)alpha, mask); for (i = 0; i < width; ++i) buffer[i].a = alpha[i].a; diff --git a/lib/pixman/pixman/pixman-combine32.c b/lib/pixman/pixman/pixman-combine32.c index 3ac7576bd..450114a52 100644 --- a/lib/pixman/pixman/pixman-combine32.c +++ b/lib/pixman/pixman/pixman-combine32.c @@ -142,12 +142,12 @@ combine_mask (const uint32_t *src, const uint32_t *mask, int i) static void combine_clear (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { - memset (dest, 0, width * sizeof(uint32_t)); + memset (dest, 0, width * sizeof (uint32_t)); } static void @@ -155,7 +155,7 @@ combine_dst (pixman_implementation_t *imp, pixman_op_t op, uint32_t * dest, const uint32_t * src, - const uint32_t * mask, + const uint32_t * mask, int width) { return; @@ -164,9 +164,9 @@ combine_dst (pixman_implementation_t *imp, static void combine_src_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -189,9 +189,9 @@ combine_src_u (pixman_implementation_t *imp, static void combine_over_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -254,9 +254,9 @@ combine_over_u (pixman_implementation_t *imp, static void combine_over_reverse_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -274,9 +274,9 @@ combine_over_reverse_u (pixman_implementation_t *imp, static void combine_in_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -293,9 +293,9 @@ combine_in_u (pixman_implementation_t *imp, static void combine_in_reverse_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -313,9 +313,9 @@ combine_in_reverse_u (pixman_implementation_t *imp, static void combine_out_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -332,9 +332,9 @@ combine_out_u (pixman_implementation_t *imp, static void combine_out_reverse_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -352,9 +352,9 @@ combine_out_reverse_u (pixman_implementation_t *imp, static void combine_atop_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -374,9 +374,9 @@ combine_atop_u (pixman_implementation_t *imp, static void combine_atop_reverse_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -396,9 +396,9 @@ combine_atop_reverse_u (pixman_implementation_t *imp, static void combine_xor_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -418,9 +418,9 @@ combine_xor_u (pixman_implementation_t *imp, static void combine_add_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -437,9 +437,9 @@ combine_add_u (pixman_implementation_t *imp, static void combine_saturate_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -463,39 +463,66 @@ combine_saturate_u (pixman_implementation_t *imp, } } + /* * PDF blend modes: + * * The following blend modes have been taken from the PDF ISO 32000 * specification, which at this point in time is available from - * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf - * The relevant chapters are 11.3.5 and 11.3.6. + * + * http://www.adobe.com/devnet/pdf/pdf_reference.html + * + * The specific documents of interest are the PDF spec itself: + * + * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf + * + * chapters 11.3.5 and 11.3.6 and a later supplement for Adobe Acrobat + * 9.1 and Reader 9.1: + * + * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000_1.pdf + * + * that clarifies the specifications for blend modes ColorDodge and + * ColorBurn. + * * The formula for computing the final pixel color given in 11.3.6 is: - * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) - * with B() being the blend function. - * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs - * - * These blend modes should match the SVG filter draft specification, as - * it has been designed to mirror ISO 32000. Note that at the current point - * no released draft exists that shows this, as the formulas have not been - * updated yet after the release of ISO 32000. - * - * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and - * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an - * argument. Note that this implementation operates on premultiplied colors, - * while the PDF specification does not. Therefore the code uses the formula - * Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as) + * + * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) + * + * with B() is the blend function. When B(Cb, Cs) = Cs, this formula + * reduces to the regular OVER operator. + * + * Cs and Cb are not premultiplied, so in our implementation we instead + * use: + * + * cr = (1 – αs) × cb + (1 – αb) × cs + αb × αs × B (cb/αb, cs/αs) + * + * where cr, cs, and cb are premultiplied colors, and where the + * + * αb × αs × B(cb/αb, cs/αs) + * + * part is first arithmetically simplified under the assumption that αb + * and αs are not 0, and then updated to produce a meaningful result when + * they are. + * + * For all the blend mode operators, the alpha channel is given by + * + * αr = αs + αb + αb × αs */ /* * Multiply - * B(Dca, ad, Sca, as) = Dca.Sca + * + * ad * as * B(d / ad, s / as) + * = ad * as * d/ad * s/as + * = d * s + * */ static void combine_multiply_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -519,9 +546,9 @@ combine_multiply_u (pixman_implementation_t *imp, static void combine_multiply_ca (pixman_implementation_t *imp, pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -548,13 +575,14 @@ combine_multiply_ca (pixman_implementation_t *imp, static void \ combine_ ## name ## _u (pixman_implementation_t *imp, \ pixman_op_t op, \ - uint32_t * dest, \ - const uint32_t * src, \ - const uint32_t * mask, \ + uint32_t * dest, \ + const uint32_t * src, \ + const uint32_t * mask, \ int width) \ { \ int i; \ - for (i = 0; i < width; ++i) { \ + for (i = 0; i < width; ++i) \ + { \ uint32_t s = combine_mask (src, mask, i); \ uint32_t d = *(dest + i); \ uint8_t sa = ALPHA_8 (s); \ @@ -577,13 +605,14 @@ combine_multiply_ca (pixman_implementation_t *imp, static void \ combine_ ## name ## _ca (pixman_implementation_t *imp, \ pixman_op_t op, \ - uint32_t * dest, \ - const uint32_t * src, \ - const uint32_t * mask, \ - int width) \ + uint32_t * dest, \ + const uint32_t * src, \ + const uint32_t * mask, \ + int width) \ { \ int i; \ - for (i = 0; i < width; ++i) { \ + for (i = 0; i < width; ++i) \ + { \ uint32_t m = *(mask + i); \ uint32_t s = *(src + i); \ uint32_t d = *(dest + i); \ @@ -608,49 +637,69 @@ combine_multiply_ca (pixman_implementation_t *imp, /* * Screen - * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca + * + * ad * as * B(d/ad, s/as) + * = ad * as * (d/ad + s/as - s/as * d/ad) + * = ad * s + as * d - s * d */ static inline uint32_t -blend_screen (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +blend_screen (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) { - return DIV_ONE_UN8 (sca * da + dca * sa - sca * dca); + return DIV_ONE_UN8 (s * ad + d * as - s * d); } PDF_SEPARABLE_BLEND_MODE (screen) /* * Overlay - * B(Dca, Da, Sca, Sa) = - * if 2.Dca < Da - * 2.Sca.Dca - * otherwise - * Sa.Da - 2.(Da - Dca).(Sa - Sca) + * + * ad * as * B(d/ad, s/as) + * = ad * as * Hardlight (s, d) + * = if (d / ad < 0.5) + * as * ad * Multiply (s/as, 2 * d/ad) + * else + * as * ad * Screen (s/as, 2 * d / ad - 1) + * = if (d < 0.5 * ad) + * as * ad * s/as * 2 * d /ad + * else + * as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1)) + * = if (2 * d < ad) + * 2 * s * d + * else + * ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1) + * = if (2 * d < ad) + * 2 * s * d + * else + * as * ad - 2 * (ad - d) * (as - s) */ static inline uint32_t -blend_overlay (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +blend_overlay (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) { - uint32_t rca; + uint32_t r; - if (2 * dca < da) - rca = 2 * sca * dca; + if (2 * d < ad) + r = 2 * s * d; else - rca = sa * da - 2 * (da - dca) * (sa - sca); - return DIV_ONE_UN8 (rca); + r = as * ad - 2 * (ad - d) * (as - s); + + return DIV_ONE_UN8 (r); } PDF_SEPARABLE_BLEND_MODE (overlay) /* * Darken - * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa) + * + * ad * as * B(d/ad, s/as) + * = ad * as * MIN(d/ad, s/as) + * = MIN (as * d, ad * s) */ static inline uint32_t -blend_darken (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +blend_darken (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) { - uint32_t s, d; + s = ad * s; + d = as * d; - s = sca * da; - d = dca * sa; return DIV_ONE_UN8 (s > d ? d : s); } @@ -658,15 +707,17 @@ PDF_SEPARABLE_BLEND_MODE (darken) /* * Lighten - * B(Dca, Da, Sca, Sa) = max (Sca.Da, Dca.Sa) + * + * ad * as * B(d/ad, s/as) + * = ad * as * MAX(d/ad, s/as) + * = MAX (as * d, ad * s) */ static inline uint32_t -blend_lighten (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +blend_lighten (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) { - uint32_t s, d; - - s = sca * da; - d = dca * sa; + s = ad * s; + d = as * d; + return DIV_ONE_UN8 (s > d ? s : d); } @@ -674,152 +725,197 @@ PDF_SEPARABLE_BLEND_MODE (lighten) /* * Color dodge - * B(Dca, Da, Sca, Sa) = - * if Dca == 0 - * 0 - * if Sca == Sa - * Sa.Da - * otherwise - * Sa.Da. min (1, Dca / Da / (1 - Sca/Sa)) + * + * ad * as * B(d/ad, s/as) + * = if d/ad = 0 + * ad * as * 0 + * else if (d/ad >= (1 - s/as) + * ad * as * 1 + * else + * ad * as * ((d/ad) / (1 - s/as)) + * = if d = 0 + * 0 + * elif as * d >= ad * (as - s) + * ad * as + * else + * as * (as * d / (as - s)) + * */ static inline uint32_t -blend_color_dodge (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) -{ - if (sca >= sa) - { - return dca == 0 ? 0 : DIV_ONE_UN8 (sa * da); - } +blend_color_dodge (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) +{ + if (d == 0) + return 0; + else if (as * d >= ad * (as - s)) + return DIV_ONE_UN8 (as * ad); + else if (as - s == 0) + return DIV_ONE_UN8 (as * ad); else - { - uint32_t rca = dca * sa / (sa - sca); - return DIV_ONE_UN8 (sa * MIN (rca, da)); - } + return DIV_ONE_UN8 (as * ((d * as) / ((as - s)))); } PDF_SEPARABLE_BLEND_MODE (color_dodge) /* * Color burn - * B(Dca, Da, Sca, Sa) = - * if Dca == Da - * Sa.Da - * if Sca == 0 - * 0 - * otherwise - * Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca)) + * + * We modify the first clause "if d = 1" to "if d >= 1" since with + * premultiplied colors d > 1 can actually happen. + * + * ad * as * B(d/ad, s/as) + * = if d/ad >= 1 + * ad * as * 1 + * elif (1 - d/ad) >= s/as + * ad * as * 0 + * else + * ad * as * (1 - ((1 - d/ad) / (s/as))) + * = if d >= ad + * ad * as + * elif as * ad - as * d >= ad * s + * 0 + * else + * ad * as - as * as * (ad - d) / s */ static inline uint32_t -blend_color_burn (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) -{ - if (sca == 0) - { - return dca < da ? 0 : DIV_ONE_UN8 (sa * da); - } +blend_color_burn (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) +{ + if (d >= ad) + return DIV_ONE_UN8 (ad * as); + else if (as * ad - as * d >= ad * s) + return 0; + else if (s == 0) + return 0; else - { - uint32_t rca = (da - dca) * sa / sca; - return DIV_ONE_UN8 (sa * (MAX (rca, da) - rca)); - } + return DIV_ONE_UN8 (ad * as - (as * as * (ad - d)) / s); } PDF_SEPARABLE_BLEND_MODE (color_burn) /* * Hard light - * B(Dca, Da, Sca, Sa) = - * if 2.Sca < Sa - * 2.Sca.Dca - * otherwise - * Sa.Da - 2.(Da - Dca).(Sa - Sca) + * + * ad * as * B(d/ad, s/as) + * = if (s/as <= 0.5) + * ad * as * Multiply (d/ad, 2 * s/as) + * else + * ad * as * Screen (d/ad, 2 * s/as - 1) + * = if 2 * s <= as + * ad * as * d/ad * 2 * s / as + * else + * ad * as * (d/ad + (2 * s/as - 1) + d/ad * (2 * s/as - 1)) + * = if 2 * s <= as + * 2 * s * d + * else + * as * ad - 2 * (ad - d) * (as - s) */ static inline uint32_t -blend_hard_light (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +blend_hard_light (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) { - if (2 * sca < sa) - return DIV_ONE_UN8 (2 * sca * dca); + if (2 * s < as) + return DIV_ONE_UN8 (2 * s * d); else - return DIV_ONE_UN8 (sa * da - 2 * (da - dca) * (sa - sca)); + return DIV_ONE_UN8 (as * ad - 2 * (ad - d) * (as - s)); } PDF_SEPARABLE_BLEND_MODE (hard_light) /* * Soft light - * B(Dca, Da, Sca, Sa) = - * if (2.Sca <= Sa) - * Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa)) - * otherwise if Dca.4 <= Da - * Dca.(Sa + (2.Sca - Sa).((16.Dca/Da - 12).Dca/Da + 3) - * otherwise - * (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa)) + * + * ad * as * B(d/ad, s/as) + * = if (s/as <= 0.5) + * ad * as * (d/ad - (1 - 2 * s/as) * d/ad * (1 - d/ad)) + * else if (d/ad <= 0.25) + * ad * as * (d/ad + (2 * s/as - 1) * ((((16 * d/ad - 12) * d/ad + 4) * d/ad) - d/ad)) + * else + * ad * as * (d/ad + (2 * s/as - 1) * sqrt (d/ad)) + * = if (2 * s <= as) + * d * as - d * (ad - d) * (as - 2 * s) / ad; + * else if (4 * d <= ad) + * (2 * s - as) * d * ((16 * d / ad - 12) * d / ad + 3); + * else + * d * as + (sqrt (d * ad) - d) * (2 * s - as); */ static inline uint32_t -blend_soft_light (uint32_t dca_org, - uint32_t da_org, - uint32_t sca_org, - uint32_t sa_org) -{ - double dca = dca_org * (1.0 / MASK); - double da = da_org * (1.0 / MASK); - double sca = sca_org * (1.0 / MASK); - double sa = sa_org * (1.0 / MASK); - double rca; - - if (2 * sca < sa) +blend_soft_light (uint32_t d_org, + uint32_t ad_org, + uint32_t s_org, + uint32_t as_org) +{ + double d = d_org * (1.0 / MASK); + double ad = ad_org * (1.0 / MASK); + double s = s_org * (1.0 / MASK); + double as = as_org * (1.0 / MASK); + double r; + + if (2 * s < as) { - if (da == 0) - rca = dca * sa; + if (ad == 0) + r = d * as; else - rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da; + r = d * as - d * (ad - d) * (as - 2 * s) / ad; } - else if (da == 0) + else if (ad == 0) { - rca = 0; + r = 0; } - else if (4 * dca <= da) + else if (4 * d <= ad) { - rca = dca * sa + - (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3); + r = d * as + + (2 * s - as) * d * ((16 * d / ad - 12) * d / ad + 3); } else { - rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa); + r = d * as + (sqrt (d * ad) - d) * (2 * s - as); } - return rca * MASK + 0.5; + return r * MASK + 0.5; } PDF_SEPARABLE_BLEND_MODE (soft_light) /* * Difference - * B(Dca, Da, Sca, Sa) = abs (Dca.Sa - Sca.Da) + * + * ad * as * B(s/as, d/ad) + * = ad * as * abs (s/as - d/ad) + * = if (s/as <= d/ad) + * ad * as * (d/ad - s/as) + * else + * ad * as * (s/as - d/ad) + * = if (ad * s <= as * d) + * as * d - ad * s + * else + * ad * s - as * d */ static inline uint32_t -blend_difference (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +blend_difference (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) { - uint32_t dcasa = dca * sa; - uint32_t scada = sca * da; + uint32_t das = d * as; + uint32_t sad = s * ad; - if (scada < dcasa) - return DIV_ONE_UN8 (dcasa - scada); + if (sad < das) + return DIV_ONE_UN8 (das - sad); else - return DIV_ONE_UN8 (scada - dcasa); + return DIV_ONE_UN8 (sad - das); } PDF_SEPARABLE_BLEND_MODE (difference) /* * Exclusion - * B(Dca, Da, Sca, Sa) = (Sca.Da + Dca.Sa - 2.Sca.Dca) + * + * ad * as * B(s/as, d/ad) + * = ad * as * (d/ad + s/as - 2 * d/ad * s/as) + * = as * d + ad * s - 2 * s * d */ /* This can be made faster by writing it directly and not using * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */ static inline uint32_t -blend_exclusion (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +blend_exclusion (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) { - return DIV_ONE_UN8 (sca * da + dca * sa - 2 * dca * sca); + return DIV_ONE_UN8 (s * ad + d * as - 2 * d * s); } PDF_SEPARABLE_BLEND_MODE (exclusion) @@ -834,103 +930,70 @@ PDF_SEPARABLE_BLEND_MODE (exclusion) * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue * * clip_color (C): - * l = LUM (C) - * min = Cmin - * max = Cmax - * if n < 0.0 - * C = l + ( ( ( C – l ) × l ) ⁄ ( l – min ) ) - * if x > 1.0 - * C = l + ( ( ( C – l ) × ( 1 – l ) ) ⁄ ( max – l ) ) - * return C + * l = LUM (C) + * min = Cmin + * max = Cmax + * if n < 0.0 + * C = l + (((C – l) × l) ⁄ (l – min)) + * if x > 1.0 + * C = l + (((C – l) × (1 – l) ) ⁄ (max – l)) + * return C * * set_lum (C, l): - * d = l – LUM (C) - * C += d - * return clip_color (C) + * d = l – LUM (C) + * C += d + * return clip_color (C) * * SAT (C) = CH_MAX (C) - CH_MIN (C) * * set_sat (C, s): - * if Cmax > Cmin - * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) ) - * Cmax = s - * else - * Cmid = Cmax = 0.0 - * Cmin = 0.0 - * return C + * if Cmax > Cmin + * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) ) + * Cmax = s + * else + * Cmid = Cmax = 0.0 + * Cmin = 0.0 + * return C */ /* For premultiplied colors, we need to know what happens when C is * multiplied by a real number. LUM and SAT are linear: * - * LUM (r × C) = r × LUM (C) SAT (r * C) = r * SAT (C) + * LUM (r × C) = r × LUM (C) SAT (r * C) = r * SAT (C) * * If we extend clip_color with an extra argument a and change * - * if x >= 1.0 + * if x >= 1.0 * * into * - * if x >= a + * if x >= a * * then clip_color is also linear: * - * r * clip_color (C, a) = clip_color (r_c, ra); + * r * clip_color (C, a) = clip_color (r * C, r * a); * * for positive r. * * Similarly, we can extend set_lum with an extra argument that is just passed * on to clip_color: * - * r * set_lum ( C, l, a) + * r * set_lum (C, l, a) * - * = r × clip_color ( C + l - LUM (C), a) + * = r × clip_color (C + l - LUM (C), a) * - * = clip_color ( r * C + r × l - r * LUM (C), r * a) + * = clip_color (r * C + r × l - r * LUM (C), r * a) * - * = set_lum ( r * C, r * l, r * a) + * = set_lum (r * C, r * l, r * a) * * Finally, set_sat: * - * r * set_sat (C, s) = set_sat (x * C, r * s) + * r * set_sat (C, s) = set_sat (x * C, r * s) * * The above holds for all non-zero x, because the x'es in the fraction for * C_mid cancel out. Specifically, it holds for x = r: * - * r * set_sat (C, s) = set_sat (r_c, rs) - * - */ - -/* So, for the non-separable PDF blend modes, we have (using s, d for - * non-premultiplied colors, and S, D for premultiplied: - * - * Color: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1) - * = set_lum (S * a_d, a_s * LUM (D), a_s * a_d) - * - * - * Luminosity: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1) - * = set_lum (a_s * D, a_d * LUM(S), a_s * a_d) - * - * - * Saturation: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1) - * = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)), - * a_s * LUM (D), a_s * a_d) - * = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d)) - * - * Hue: - * - * a_s * a_d * B(s, d) - * = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1) - * = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d) + * r * set_sat (C, s) = set_sat (r * C, r * s) * */ @@ -942,11 +1005,11 @@ PDF_SEPARABLE_BLEND_MODE (exclusion) #define PDF_NON_SEPARABLE_BLEND_MODE(name) \ static void \ combine_ ## name ## _u (pixman_implementation_t *imp, \ - pixman_op_t op, \ - uint32_t *dest, \ - const uint32_t *src, \ - const uint32_t *mask, \ - int width) \ + pixman_op_t op, \ + uint32_t * dest, \ + const uint32_t * src, \ + const uint32_t * mask, \ + int width) \ { \ int i; \ for (i = 0; i < width; ++i) \ @@ -958,7 +1021,7 @@ PDF_SEPARABLE_BLEND_MODE (exclusion) uint8_t da = ALPHA_8 (d); \ uint8_t ida = ~da; \ uint32_t result; \ - uint32_t sc[3], dc[3], c[3]; \ + uint32_t sc[3], dc[3], c[3]; \ \ result = d; \ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida); \ @@ -1104,80 +1167,94 @@ set_sat (uint32_t dest[3], uint32_t src[3], uint32_t sat) } } -/* - * Hue: - * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb)) +/* Hue: + * + * as * ad * B(s/as, d/as) + * = as * ad * set_lum (set_sat (s/as, SAT (d/ad)), LUM (d/ad), 1) + * = set_lum (set_sat (ad * s, as * SAT (d)), as * LUM (d), as * ad) + * */ static inline void -blend_hsl_hue (uint32_t c[3], - uint32_t dc[3], - uint32_t da, - uint32_t sc[3], - uint32_t sa) +blend_hsl_hue (uint32_t r[3], + uint32_t d[3], + uint32_t ad, + uint32_t s[3], + uint32_t as) { - c[0] = sc[0] * da; - c[1] = sc[1] * da; - c[2] = sc[2] * da; - set_sat (c, c, SAT (dc) * sa); - set_lum (c, c, sa * da, LUM (dc) * sa); + r[0] = s[0] * ad; + r[1] = s[1] * ad; + r[2] = s[2] * ad; + set_sat (r, r, SAT (d) * as); + set_lum (r, r, as * ad, LUM (d) * as); } PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue) -/* - * Saturation: - * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb)) +/* + * Saturation + * + * as * ad * B(s/as, d/ad) + * = as * ad * set_lum (set_sat (d/ad, SAT (s/as)), LUM (d/ad), 1) + * = set_lum (as * ad * set_sat (d/ad, SAT (s/as)), + * as * LUM (d), as * ad) + * = set_lum (set_sat (as * d, ad * SAT (s), as * LUM (d), as * ad)) */ static inline void -blend_hsl_saturation (uint32_t c[3], - uint32_t dc[3], - uint32_t da, - uint32_t sc[3], - uint32_t sa) +blend_hsl_saturation (uint32_t r[3], + uint32_t d[3], + uint32_t ad, + uint32_t s[3], + uint32_t as) { - c[0] = dc[0] * sa; - c[1] = dc[1] * sa; - c[2] = dc[2] * sa; - set_sat (c, c, SAT (sc) * da); - set_lum (c, c, sa * da, LUM (dc) * sa); + r[0] = d[0] * as; + r[1] = d[1] * as; + r[2] = d[2] * as; + set_sat (r, r, SAT (s) * ad); + set_lum (r, r, as * ad, LUM (d) * as); } PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation) -/* - * Color: - * B(Cb, Cs) = set_lum (Cs, LUM (Cb)) +/* + * Color + * + * as * ad * B(s/as, d/as) + * = as * ad * set_lum (s/as, LUM (d/ad), 1) + * = set_lum (s * ad, as * LUM (d), as * ad) */ static inline void -blend_hsl_color (uint32_t c[3], - uint32_t dc[3], - uint32_t da, - uint32_t sc[3], - uint32_t sa) +blend_hsl_color (uint32_t r[3], + uint32_t d[3], + uint32_t ad, + uint32_t s[3], + uint32_t as) { - c[0] = sc[0] * da; - c[1] = sc[1] * da; - c[2] = sc[2] * da; - set_lum (c, c, sa * da, LUM (dc) * sa); + r[0] = s[0] * ad; + r[1] = s[1] * ad; + r[2] = s[2] * ad; + set_lum (r, r, as * ad, LUM (d) * as); } PDF_NON_SEPARABLE_BLEND_MODE (hsl_color) /* - * Luminosity: - * B(Cb, Cs) = set_lum (Cb, LUM (Cs)) + * Luminosity + * + * as * ad * B(s/as, d/ad) + * = as * ad * set_lum (d/ad, LUM (s/as), 1) + * = set_lum (as * d, ad * LUM (s), as * ad) */ static inline void -blend_hsl_luminosity (uint32_t c[3], - uint32_t dc[3], - uint32_t da, - uint32_t sc[3], - uint32_t sa) +blend_hsl_luminosity (uint32_t r[3], + uint32_t d[3], + uint32_t ad, + uint32_t s[3], + uint32_t as) { - c[0] = dc[0] * sa; - c[1] = dc[1] * sa; - c[2] = dc[2] * sa; - set_lum (c, c, sa * da, LUM (sc) * da); + r[0] = d[0] * as; + r[1] = d[1] * as; + r[2] = d[2] * as; + set_lum (r, r, as * ad, LUM (s) * ad); } PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity) @@ -1194,7 +1271,7 @@ PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity) * come from each of the four areas of the picture -- areas covered by neither * A nor B, areas covered only by A, areas covered only by B and finally * areas covered by both A and B. - * + * * Disjoint Conjoint * Fa Fb Fa Fb * (0,0,0,0) 0 0 0 0 diff --git a/lib/pixman/pixman/pixman-compiler.h b/lib/pixman/pixman/pixman-compiler.h index 9b190b422..2489adc38 100644 --- a/lib/pixman/pixman/pixman-compiler.h +++ b/lib/pixman/pixman/pixman-compiler.h @@ -178,7 +178,7 @@ # define PIXMAN_GET_THREAD_LOCAL(name) \ (&name) -#elif defined(HAVE_PTHREAD_SETSPECIFIC) +#elif defined(HAVE_PTHREADS) #include <pthread.h> diff --git a/lib/pixman/pixman/pixman-fast-path.c b/lib/pixman/pixman/pixman-fast-path.c index 247aea645..c6e43de10 100644 --- a/lib/pixman/pixman/pixman-fast-path.c +++ b/lib/pixman/pixman/pixman-fast-path.c @@ -2263,87 +2263,1022 @@ fast_write_back_r5g6b5 (pixman_iter_t *iter) typedef struct { - pixman_format_code_t format; - pixman_iter_get_scanline_t get_scanline; - pixman_iter_write_back_t write_back; -} fetcher_info_t; + int y; + uint64_t * buffer; +} line_t; -static const fetcher_info_t fetchers[] = +typedef struct { - { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, - { PIXMAN_null } -}; + line_t lines[2]; + pixman_fixed_t y; + pixman_fixed_t x; + uint64_t data[1]; +} bilinear_info_t; -static pixman_bool_t -fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static void +fetch_horizontal (bits_image_t *image, line_t *line, + int y, pixman_fixed_t x, pixman_fixed_t ux, int n) { - pixman_image_t *image = iter->image; + uint32_t *bits = image->bits + y * image->rowstride; + int i; -#define FLAGS \ - (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ - FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + for (i = 0; i < n; ++i) + { + int x0 = pixman_fixed_to_int (x); + int x1 = x0 + 1; + int32_t dist_x; + + uint32_t left = *(bits + x0); + uint32_t right = *(bits + x1); + + dist_x = pixman_fixed_to_bilinear_weight (x); + dist_x <<= (8 - BILINEAR_INTERPOLATION_BITS); + +#if SIZEOF_LONG <= 4 + { + uint32_t lag, rag, ag; + uint32_t lrb, rrb, rb; + + lag = (left & 0xff00ff00) >> 8; + rag = (right & 0xff00ff00) >> 8; + ag = (lag << 8) + dist_x * (rag - lag); + + lrb = (left & 0x00ff00ff); + rrb = (right & 0x00ff00ff); + rb = (lrb << 8) + dist_x * (rrb - lrb); + + *((uint32_t *)(line->buffer + i)) = ag; + *((uint32_t *)(line->buffer + i) + 1) = rb; + } +#else + { + uint64_t lagrb, ragrb; + uint32_t lag, rag; + uint32_t lrb, rrb; + + lag = (left & 0xff00ff00); + lrb = (left & 0x00ff00ff); + rag = (right & 0xff00ff00); + rrb = (right & 0x00ff00ff); + lagrb = (((uint64_t)lag) << 24) | lrb; + ragrb = (((uint64_t)rag) << 24) | rrb; + + line->buffer[i] = (lagrb << 8) + dist_x * (ragrb - lagrb); + } +#endif + + x += ux; + } + + line->y = y; +} + +static uint32_t * +fast_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask) +{ + pixman_fixed_t fx, ux; + bilinear_info_t *info = iter->data; + line_t *line0, *line1; + int y0, y1; + int32_t dist_y; + int i; + + fx = info->x; + ux = iter->image->common.transform->matrix[0][0]; + + y0 = pixman_fixed_to_int (info->y); + y1 = y0 + 1; + dist_y = pixman_fixed_to_bilinear_weight (info->y); + dist_y <<= (8 - BILINEAR_INTERPOLATION_BITS); + + line0 = &info->lines[y0 & 0x01]; + line1 = &info->lines[y1 & 0x01]; + + if (line0->y != y0) + { + fetch_horizontal ( + &iter->image->bits, line0, y0, fx, ux, iter->width); + } + + if (line1->y != y1) + { + fetch_horizontal ( + &iter->image->bits, line1, y1, fx, ux, iter->width); + } + + for (i = 0; i < iter->width; ++i) + { +#if SIZEOF_LONG <= 4 + uint32_t ta, tr, tg, tb; + uint32_t ba, br, bg, bb; + uint32_t tag, trb; + uint32_t bag, brb; + uint32_t a, r, g, b; + + tag = *((uint32_t *)(line0->buffer + i)); + trb = *((uint32_t *)(line0->buffer + i) + 1); + bag = *((uint32_t *)(line1->buffer + i)); + brb = *((uint32_t *)(line1->buffer + i) + 1); + + ta = tag >> 16; + ba = bag >> 16; + a = (ta << 8) + dist_y * (ba - ta); + + tr = trb >> 16; + br = brb >> 16; + r = (tr << 8) + dist_y * (br - tr); + + tg = tag & 0xffff; + bg = bag & 0xffff; + g = (tg << 8) + dist_y * (bg - tg); + + tb = trb & 0xffff; + bb = brb & 0xffff; + b = (tb << 8) + dist_y * (bb - tb); + + a = (a << 8) & 0xff000000; + r = (r << 0) & 0x00ff0000; + g = (g >> 8) & 0x0000ff00; + b = (b >> 16) & 0x000000ff; +#else + uint64_t top = line0->buffer[i]; + uint64_t bot = line1->buffer[i]; + uint64_t tar = (top & 0xffff0000ffff0000ULL) >> 16; + uint64_t bar = (bot & 0xffff0000ffff0000ULL) >> 16; + uint64_t tgb = (top & 0x0000ffff0000ffffULL); + uint64_t bgb = (bot & 0x0000ffff0000ffffULL); + uint64_t ar, gb; + uint32_t a, r, g, b; + + ar = (tar << 8) + dist_y * (bar - tar); + gb = (tgb << 8) + dist_y * (bgb - tgb); + + a = ((ar >> 24) & 0xff000000); + r = ((ar >> 0) & 0x00ff0000); + g = ((gb >> 40) & 0x0000ff00); + b = ((gb >> 16) & 0x000000ff); +#endif + + iter->buffer[i] = a | r | g | b; + } + + info->y += iter->image->common.transform->matrix[1][1]; + + return iter->buffer; +} + +static void +bilinear_cover_iter_fini (pixman_iter_t *iter) +{ + free (iter->data); +} + +static void +fast_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info) +{ + int width = iter->width; + bilinear_info_t *info; + pixman_vector_t v; + + /* Reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (iter->image->common.transform, &v)) + goto fail; + + info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t)); + if (!info) + goto fail; + + info->x = v.vector[0] - pixman_fixed_1 / 2; + info->y = v.vector[1] - pixman_fixed_1 / 2; + + /* It is safe to set the y coordinates to -1 initially + * because COVER_CLIP_BILINEAR ensures that we will only + * be asked to fetch lines in the [0, height) interval + */ + info->lines[0].y = -1; + info->lines[0].buffer = &(info->data[0]); + info->lines[1].y = -1; + info->lines[1].buffer = &(info->data[width]); + + iter->get_scanline = fast_fetch_bilinear_cover; + iter->fini = bilinear_cover_iter_fini; + + iter->data = info; + return; + +fail: + /* Something went wrong, either a bad matrix or OOM; in such cases, + * we don't guarantee any particular rendering. + */ + _pixman_log_error ( + FUNC, "Allocation failure or bad matrix, skipping rendering\n"); + + iter->get_scanline = _pixman_iter_get_scanline_noop; + iter->fini = NULL; +} + +static uint32_t * +bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter, + const uint32_t *mask) +{ + + pixman_image_t * ima = iter->image; + int offset = iter->x; + int line = iter->y++; + int width = iter->width; + uint32_t * buffer = iter->buffer; + + bits_image_t *bits = &ima->bits; + pixman_fixed_t x_top, x_bottom, x; + pixman_fixed_t ux_top, ux_bottom, ux; + pixman_vector_t v; + uint32_t top_mask, bottom_mask; + uint32_t *top_row; + uint32_t *bottom_row; + uint32_t *end; + uint32_t zero[2] = { 0, 0 }; + uint32_t one = 1; + int y, y1, y2; + int disty; + int mask_inc; + int w; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; - if ((iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FLAGS) == FLAGS) + if (!pixman_transform_point_3d (bits->common.transform, &v)) + return iter->buffer; + + ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0]; + x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2; + + y = v.vector[1] - pixman_fixed_1/2; + disty = pixman_fixed_to_bilinear_weight (y); + + /* Load the pointers to the first and second lines from the source + * image that bilinear code must read. + * + * The main trick in this code is about the check if any line are + * outside of the image; + * + * When I realize that a line (any one) is outside, I change + * the pointer to a dummy area with zeros. Once I change this, I + * must be sure the pointer will not change, so I set the + * variables to each pointer increments inside the loop. + */ + y1 = pixman_fixed_to_int (y); + y2 = y1 + 1; + + if (y1 < 0 || y1 >= bits->height) + { + top_row = zero; + x_top = 0; + ux_top = 0; + } + else { - const fetcher_info_t *f; + top_row = bits->bits + y1 * bits->rowstride; + x_top = x; + ux_top = ux; + } - for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + if (y2 < 0 || y2 >= bits->height) + { + bottom_row = zero; + x_bottom = 0; + ux_bottom = 0; + } + else + { + bottom_row = bits->bits + y2 * bits->rowstride; + x_bottom = x; + ux_bottom = ux; + } + + /* Instead of checking whether the operation uses the mast in + * each loop iteration, verify this only once and prepare the + * variables to make the code smaller inside the loop. + */ + if (!mask) + { + mask_inc = 0; + mask = &one; + } + else + { + /* If have a mask, prepare the variables to check it */ + mask_inc = 1; + } + + /* If both are zero, then the whole thing is zero */ + if (top_row == zero && bottom_row == zero) + { + memset (buffer, 0, width * sizeof (uint32_t)); + return iter->buffer; + } + else if (bits->format == PIXMAN_x8r8g8b8) + { + if (top_row == zero) { - if (image->common.extended_format_code == f->format) - { - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; + top_mask = 0; + bottom_mask = 0xff000000; + } + else if (bottom_row == zero) + { + top_mask = 0xff000000; + bottom_mask = 0; + } + else + { + top_mask = 0xff000000; + bottom_mask = 0xff000000; + } + } + else + { + top_mask = 0; + bottom_mask = 0; + } - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; - iter->stride = s; + end = buffer + width; - iter->get_scanline = f->get_scanline; - return TRUE; - } + /* Zero fill to the left of the image */ + while (buffer < end && x < pixman_fixed_minus_1) + { + *buffer++ = 0; + x += ux; + x_top += ux_top; + x_bottom += ux_bottom; + mask += mask_inc; + } + + /* Left edge + */ + while (buffer < end && x < 0) + { + uint32_t tr, br; + int32_t distx; + + tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask; + br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; + + distx = pixman_fixed_to_bilinear_weight (x); + + *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty); + + x += ux; + x_top += ux_top; + x_bottom += ux_bottom; + mask += mask_inc; + } + + /* Main part */ + w = pixman_int_to_fixed (bits->width - 1); + + while (buffer < end && x < w) + { + if (*mask) + { + uint32_t tl, tr, bl, br; + int32_t distx; + + tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; + tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask; + bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; + br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask; + + distx = pixman_fixed_to_bilinear_weight (x); + + *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty); } + + buffer++; + x += ux; + x_top += ux_top; + x_bottom += ux_bottom; + mask += mask_inc; } - return FALSE; + /* Right Edge */ + w = pixman_int_to_fixed (bits->width); + while (buffer < end && x < w) + { + if (*mask) + { + uint32_t tl, bl; + int32_t distx; + + tl = top_row [pixman_fixed_to_int (x_top)] | top_mask; + bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask; + + distx = pixman_fixed_to_bilinear_weight (x); + + *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty); + } + + buffer++; + x += ux; + x_top += ux_top; + x_bottom += ux_bottom; + mask += mask_inc; + } + + /* Zero fill to the left of the image */ + while (buffer < end) + *buffer++ = 0; + + return iter->buffer; } -static pixman_bool_t -fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x); + +static force_inline void +bits_image_fetch_separable_convolution_affine (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask, + + convert_pixel_t convert_pixel, + pixman_format_code_t format, + pixman_repeat_t repeat_mode) { - pixman_image_t *image = iter->image; + bits_image_t *bits = &image->bits; + pixman_fixed_t *params = image->common.filter_params; + int cwidth = pixman_fixed_to_int (params[0]); + int cheight = pixman_fixed_to_int (params[1]); + int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1; + int y_off = ((cheight << 16) - pixman_fixed_1) >> 1; + int x_phase_bits = pixman_fixed_to_int (params[2]); + int y_phase_bits = pixman_fixed_to_int (params[3]); + int x_phase_shift = 16 - x_phase_bits; + int y_phase_shift = 16 - y_phase_bits; + pixman_fixed_t vx, vy; + pixman_fixed_t ux, uy; + pixman_vector_t v; + int k; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; - if ((iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS) + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + + vx = v.vector[0]; + vy = v.vector[1]; + + for (k = 0; k < width; ++k) { - const fetcher_info_t *f; + pixman_fixed_t *y_params; + int satot, srtot, sgtot, sbtot; + pixman_fixed_t x, y; + int32_t x1, x2, y1, y2; + int32_t px, py; + int i, j; + + if (mask && !mask[k]) + goto next; + + /* Round x and y to the middle of the closest phase before continuing. This + * ensures that the convolution matrix is aligned right, since it was + * positioned relative to a particular phase (and not relative to whatever + * exact fraction we happen to get here). + */ + x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1); + y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1); - for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + px = (x & 0xffff) >> x_phase_shift; + py = (y & 0xffff) >> y_phase_shift; + + x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); + y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); + x2 = x1 + cwidth; + y2 = y1 + cheight; + + satot = srtot = sgtot = sbtot = 0; + + y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight; + + for (i = y1; i < y2; ++i) { - if (image->common.extended_format_code == f->format) - { - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; + pixman_fixed_t fy = *y_params++; - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; - iter->stride = s; + if (fy) + { + pixman_fixed_t *x_params = params + 4 + px * cwidth; - if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == - (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) + for (j = x1; j < x2; ++j) { - iter->get_scanline = fast_dest_fetch_noop; + pixman_fixed_t fx = *x_params++; + int rx = j; + int ry = i; + + if (fx) + { + pixman_fixed_t f; + uint32_t pixel, mask; + uint8_t *row; + + mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &rx, bits->width); + repeat (repeat_mode, &ry, bits->height); + + row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry; + pixel = convert_pixel (row, rx) | mask; + } + else + { + if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height) + { + pixel = 0; + } + else + { + row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry; + pixel = convert_pixel (row, rx) | mask; + } + } + + f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16; + srtot += (int)RED_8 (pixel) * f; + sgtot += (int)GREEN_8 (pixel) * f; + sbtot += (int)BLUE_8 (pixel) * f; + satot += (int)ALPHA_8 (pixel) * f; + } } - else - { - iter->get_scanline = f->get_scanline; - } - iter->write_back = f->write_back; - return TRUE; } } + + satot = (satot + 0x8000) >> 16; + srtot = (srtot + 0x8000) >> 16; + sgtot = (sgtot + 0x8000) >> 16; + sbtot = (sbtot + 0x8000) >> 16; + + satot = CLIP (satot, 0, 0xff); + srtot = CLIP (srtot, 0, 0xff); + sgtot = CLIP (sgtot, 0, 0xff); + sbtot = CLIP (sbtot, 0, 0xff); + + buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0); + + next: + vx += ux; + vy += uy; } - return FALSE; } +static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + +static force_inline void +bits_image_fetch_bilinear_affine (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask, + + convert_pixel_t convert_pixel, + pixman_format_code_t format, + pixman_repeat_t repeat_mode) +{ + pixman_fixed_t x, y; + pixman_fixed_t ux, uy; + pixman_vector_t v; + bits_image_t *bits = &image->bits; + int i; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + + x = v.vector[0]; + y = v.vector[1]; + + for (i = 0; i < width; ++i) + { + int x1, y1, x2, y2; + uint32_t tl, tr, bl, br; + int32_t distx, disty; + int width = image->bits.width; + int height = image->bits.height; + const uint8_t *row1; + const uint8_t *row2; + + if (mask && !mask[i]) + goto next; + + x1 = x - pixman_fixed_1 / 2; + y1 = y - pixman_fixed_1 / 2; + + distx = pixman_fixed_to_bilinear_weight (x1); + disty = pixman_fixed_to_bilinear_weight (y1); + + y1 = pixman_fixed_to_int (y1); + y2 = y1 + 1; + x1 = pixman_fixed_to_int (x1); + x2 = x1 + 1; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + uint32_t mask; + + mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + + repeat (repeat_mode, &x1, width); + repeat (repeat_mode, &y1, height); + repeat (repeat_mode, &x2, width); + repeat (repeat_mode, &y2, height); + + row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1; + row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2; + + tl = convert_pixel (row1, x1) | mask; + tr = convert_pixel (row1, x2) | mask; + bl = convert_pixel (row2, x1) | mask; + br = convert_pixel (row2, x2) | mask; + } + else + { + uint32_t mask1, mask2; + int bpp; + + /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value, + * which means if you use it in expressions, those + * expressions become unsigned themselves. Since + * the variables below can be negative in some cases, + * that will lead to crashes on 64 bit architectures. + * + * So this line makes sure bpp is signed + */ + bpp = PIXMAN_FORMAT_BPP (format); + + if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0) + { + buffer[i] = 0; + goto next; + } + + if (y2 == 0) + { + row1 = zero; + mask1 = 0; + } + else + { + row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1; + row1 += bpp / 8 * x1; + + mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + } + + if (y1 == height - 1) + { + row2 = zero; + mask2 = 0; + } + else + { + row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2; + row2 += bpp / 8 * x1; + + mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + } + + if (x2 == 0) + { + tl = 0; + bl = 0; + } + else + { + tl = convert_pixel (row1, 0) | mask1; + bl = convert_pixel (row2, 0) | mask2; + } + + if (x1 == width - 1) + { + tr = 0; + br = 0; + } + else + { + tr = convert_pixel (row1, 1) | mask1; + br = convert_pixel (row2, 1) | mask2; + } + } + + buffer[i] = bilinear_interpolation ( + tl, tr, bl, br, distx, disty); + + next: + x += ux; + y += uy; + } +} + +static force_inline void +bits_image_fetch_nearest_affine (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask, + + convert_pixel_t convert_pixel, + pixman_format_code_t format, + pixman_repeat_t repeat_mode) +{ + pixman_fixed_t x, y; + pixman_fixed_t ux, uy; + pixman_vector_t v; + bits_image_t *bits = &image->bits; + int i; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + + x = v.vector[0]; + y = v.vector[1]; + + for (i = 0; i < width; ++i) + { + int width, height, x0, y0; + const uint8_t *row; + + if (mask && !mask[i]) + goto next; + + width = image->bits.width; + height = image->bits.height; + x0 = pixman_fixed_to_int (x - pixman_fixed_e); + y0 = pixman_fixed_to_int (y - pixman_fixed_e); + + if (repeat_mode == PIXMAN_REPEAT_NONE && + (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width)) + { + buffer[i] = 0; + } + else + { + uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &x0, width); + repeat (repeat_mode, &y0, height); + } + + row = (uint8_t *)bits->bits + bits->rowstride * 4 * y0; + + buffer[i] = convert_pixel (row, x0) | mask; + } + + next: + x += ux; + y += uy; + } +} + +static force_inline uint32_t +convert_a8r8g8b8 (const uint8_t *row, int x) +{ + return *(((uint32_t *)row) + x); +} + +static force_inline uint32_t +convert_x8r8g8b8 (const uint8_t *row, int x) +{ + return *(((uint32_t *)row) + x); +} + +static force_inline uint32_t +convert_a8 (const uint8_t *row, int x) +{ + return *(row + x) << 24; +} + +static force_inline uint32_t +convert_r5g6b5 (const uint8_t *row, int x) +{ + return convert_0565_to_0888 (*((uint16_t *)row + x)); +} + +#define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode) \ + static uint32_t * \ + bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t *iter, \ + const uint32_t * mask) \ + { \ + bits_image_fetch_separable_convolution_affine ( \ + iter->image, \ + iter->x, iter->y++, \ + iter->width, \ + iter->buffer, mask, \ + convert_ ## format, \ + PIXMAN_ ## format, \ + repeat_mode); \ + \ + return iter->buffer; \ + } + +#define MAKE_BILINEAR_FETCHER(name, format, repeat_mode) \ + static uint32_t * \ + bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t *iter, \ + const uint32_t * mask) \ + { \ + bits_image_fetch_bilinear_affine (iter->image, \ + iter->x, iter->y++, \ + iter->width, \ + iter->buffer, mask, \ + convert_ ## format, \ + PIXMAN_ ## format, \ + repeat_mode); \ + return iter->buffer; \ + } + +#define MAKE_NEAREST_FETCHER(name, format, repeat_mode) \ + static uint32_t * \ + bits_image_fetch_nearest_affine_ ## name (pixman_iter_t *iter, \ + const uint32_t * mask) \ + { \ + bits_image_fetch_nearest_affine (iter->image, \ + iter->x, iter->y++, \ + iter->width, \ + iter->buffer, mask, \ + convert_ ## format, \ + PIXMAN_ ## format, \ + repeat_mode); \ + return iter->buffer; \ + } + +#define MAKE_FETCHERS(name, format, repeat_mode) \ + MAKE_NEAREST_FETCHER (name, format, repeat_mode) \ + MAKE_BILINEAR_FETCHER (name, format, repeat_mode) \ + MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode) + +MAKE_FETCHERS (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NORMAL) +MAKE_FETCHERS (pad_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NORMAL) +MAKE_FETCHERS (pad_a8, a8, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_a8, a8, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_a8, a8, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_a8, a8, PIXMAN_REPEAT_NORMAL) +MAKE_FETCHERS (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD) +MAKE_FETCHERS (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE) +MAKE_FETCHERS (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT) +MAKE_FETCHERS (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL) + +#define IMAGE_FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + +static const pixman_iter_info_t fast_iters[] = +{ + { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW | ITER_SRC, + _pixman_iter_init_bits_stride, fast_fetch_r5g6b5, NULL }, + + { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS, + ITER_NARROW | ITER_DEST, + _pixman_iter_init_bits_stride, + fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, + + { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS, + ITER_NARROW | ITER_DEST | ITER_IGNORE_RGB | ITER_IGNORE_ALPHA, + _pixman_iter_init_bits_stride, + fast_dest_fetch_noop, fast_write_back_r5g6b5 }, + + { PIXMAN_a8r8g8b8, + (FAST_PATH_STANDARD_FLAGS | + FAST_PATH_SCALE_TRANSFORM | + FAST_PATH_BILINEAR_FILTER | + FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR), + ITER_NARROW | ITER_SRC, + fast_bilinear_cover_iter_init, + NULL, NULL + }, + +#define FAST_BILINEAR_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_X_UNIT_POSITIVE | \ + FAST_PATH_Y_UNIT_ZERO | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_BILINEAR_FILTER) + + { PIXMAN_a8r8g8b8, + FAST_BILINEAR_FLAGS, + ITER_NARROW | ITER_SRC, + NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL + }, + + { PIXMAN_x8r8g8b8, + FAST_BILINEAR_FLAGS, + ITER_NARROW | ITER_SRC, + NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL + }, + +#define GENERAL_BILINEAR_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_BILINEAR_FILTER) + +#define GENERAL_NEAREST_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_NEAREST_FILTER) + +#define GENERAL_SEPARABLE_CONVOLUTION_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_SEPARABLE_CONVOLUTION_FILTER) + +#define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \ + { PIXMAN_ ## format, \ + GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ + ITER_NARROW | ITER_SRC, \ + NULL, bits_image_fetch_separable_convolution_affine_ ## name, NULL \ + }, + +#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ + { PIXMAN_ ## format, \ + GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ + ITER_NARROW | ITER_SRC, \ + NULL, bits_image_fetch_bilinear_affine_ ## name, NULL, \ + }, + +#define NEAREST_AFFINE_FAST_PATH(name, format, repeat) \ + { PIXMAN_ ## format, \ + GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ + ITER_NARROW | ITER_SRC, \ + NULL, bits_image_fetch_nearest_affine_ ## name, NULL \ + }, + +#define AFFINE_FAST_PATHS(name, format, repeat) \ + SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \ + BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ + NEAREST_AFFINE_FAST_PATH(name, format, repeat) + + AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD) + AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE) + AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT) + AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL) + AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD) + AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE) + AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT) + AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL) + AFFINE_FAST_PATHS (pad_a8, a8, PAD) + AFFINE_FAST_PATHS (none_a8, a8, NONE) + AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT) + AFFINE_FAST_PATHS (normal_a8, a8, NORMAL) + AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD) + AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE) + AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT) + AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL) + + { PIXMAN_null }, +}; pixman_implementation_t * _pixman_implementation_create_fast_path (pixman_implementation_t *fallback) @@ -2351,8 +3286,7 @@ _pixman_implementation_create_fast_path (pixman_implementation_t *fallback) pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); imp->fill = fast_path_fill; - imp->src_iter_init = fast_src_iter_init; - imp->dest_iter_init = fast_dest_iter_init; + imp->iter_info = fast_iters; return imp; } diff --git a/lib/pixman/pixman/pixman-filter.c b/lib/pixman/pixman/pixman-filter.c index 5ff7b6eaa..b2bf53fed 100644 --- a/lib/pixman/pixman/pixman-filter.c +++ b/lib/pixman/pixman/pixman-filter.c @@ -275,7 +275,7 @@ create_1d_filter (int *width, } total += c; - *p++ = (pixman_fixed_t)(c * 65535.0 + 0.5); + *p++ = (pixman_fixed_t)(c * 65536.0 + 0.5); } /* Normalize */ diff --git a/lib/pixman/pixman/pixman-general.c b/lib/pixman/pixman/pixman-general.c index 93a1b9acf..a653fa71a 100644 --- a/lib/pixman/pixman/pixman-general.c +++ b/lib/pixman/pixman/pixman-general.c @@ -37,43 +37,47 @@ #include <string.h> #include "pixman-private.h" -static pixman_bool_t -general_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static void +general_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *info) { pixman_image_t *image = iter->image; - if (image->type == LINEAR) - _pixman_linear_gradient_iter_init (image, iter); - else if (image->type == RADIAL) + switch (image->type) + { + case BITS: + if ((iter->iter_flags & ITER_SRC) == ITER_SRC) + _pixman_bits_image_src_iter_init (image, iter); + else + _pixman_bits_image_dest_iter_init (image, iter); + break; + + case LINEAR: + _pixman_linear_gradient_iter_init (image, iter); + break; + + case RADIAL: _pixman_radial_gradient_iter_init (image, iter); - else if (image->type == CONICAL) + break; + + case CONICAL: _pixman_conical_gradient_iter_init (image, iter); - else if (image->type == BITS) - _pixman_bits_image_src_iter_init (image, iter); - else if (image->type == SOLID) + break; + + case SOLID: _pixman_log_error (FUNC, "Solid image not handled by noop"); - else - _pixman_log_error (FUNC, "Pixman bug: unknown image type\n"); + break; - return TRUE; + default: + _pixman_log_error (FUNC, "Pixman bug: unknown image type\n"); + break; + } } -static pixman_bool_t -general_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static const pixman_iter_info_t general_iters[] = { - if (iter->image->type == BITS) - { - _pixman_bits_image_dest_iter_init (iter->image, iter); - - return TRUE; - } - else - { - _pixman_log_error (FUNC, "Trying to write to a non-writable image"); - - return FALSE; - } -} + { PIXMAN_any, 0, 0, general_iter_init, NULL, NULL }, + { PIXMAN_null }, +}; typedef struct op_info_t op_info_t; struct op_info_t @@ -110,13 +114,13 @@ general_composite_rect (pixman_implementation_t *imp, pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); - uint64_t stack_scanline_buffer[(SCANLINE_BUFFER_LENGTH * 3 + 7) / 8]; + uint8_t stack_scanline_buffer[3 * SCANLINE_BUFFER_LENGTH]; uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer; uint8_t *src_buffer, *mask_buffer, *dest_buffer; pixman_iter_t src_iter, mask_iter, dest_iter; pixman_combine_32_func_t compose; pixman_bool_t component_alpha; - iter_flags_t narrow, src_iter_flags; + iter_flags_t width_flag, src_iter_flags; int Bpp; int i; @@ -124,28 +128,36 @@ general_composite_rect (pixman_implementation_t *imp, (!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) && (dest_image->common.flags & FAST_PATH_NARROW_FORMAT)) { - narrow = ITER_NARROW; + width_flag = ITER_NARROW; Bpp = 4; } else { - narrow = 0; + width_flag = ITER_WIDE; Bpp = 16; } - if (width * Bpp > SCANLINE_BUFFER_LENGTH) +#define ALIGN(addr) \ + ((uint8_t *)((((uintptr_t)(addr)) + 15) & (~15))) + + src_buffer = ALIGN (scanline_buffer); + mask_buffer = ALIGN (src_buffer + width * Bpp); + dest_buffer = ALIGN (mask_buffer + width * Bpp); + + if (ALIGN (dest_buffer + width * Bpp) > + scanline_buffer + sizeof (stack_scanline_buffer)) { - scanline_buffer = pixman_malloc_abc (width, 3, Bpp); + scanline_buffer = pixman_malloc_ab_plus_c (width, Bpp * 3, 32 * 3); if (!scanline_buffer) return; - } - src_buffer = scanline_buffer; - mask_buffer = src_buffer + width * Bpp; - dest_buffer = mask_buffer + width * Bpp; + src_buffer = ALIGN (scanline_buffer); + mask_buffer = ALIGN (src_buffer + width * Bpp); + dest_buffer = ALIGN (mask_buffer + width * Bpp); + } - if (!narrow) + if (width_flag == ITER_WIDE) { /* To make sure there aren't any NANs in the buffers */ memset (src_buffer, 0, width * Bpp); @@ -154,11 +166,12 @@ general_composite_rect (pixman_implementation_t *imp, } /* src iter */ - src_iter_flags = narrow | op_flags[op].src; + src_iter_flags = width_flag | op_flags[op].src | ITER_SRC; - _pixman_implementation_src_iter_init (imp->toplevel, &src_iter, src_image, - src_x, src_y, width, height, - src_buffer, src_iter_flags, info->src_flags); + _pixman_implementation_iter_init (imp->toplevel, &src_iter, src_image, + src_x, src_y, width, height, + src_buffer, src_iter_flags, + info->src_flags); /* mask iter */ if ((src_iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == @@ -176,17 +189,19 @@ general_composite_rect (pixman_implementation_t *imp, mask_image->common.component_alpha && PIXMAN_FORMAT_RGB (mask_image->bits.format); - _pixman_implementation_src_iter_init ( - imp->toplevel, &mask_iter, mask_image, mask_x, mask_y, width, height, - mask_buffer, narrow | (component_alpha? 0 : ITER_IGNORE_RGB), info->mask_flags); + _pixman_implementation_iter_init ( + imp->toplevel, &mask_iter, + mask_image, mask_x, mask_y, width, height, mask_buffer, + ITER_SRC | width_flag | (component_alpha? 0 : ITER_IGNORE_RGB), + info->mask_flags); /* dest iter */ - _pixman_implementation_dest_iter_init ( + _pixman_implementation_iter_init ( imp->toplevel, &dest_iter, dest_image, dest_x, dest_y, width, height, - dest_buffer, narrow | op_flags[op].dst, info->dest_flags); + dest_buffer, ITER_DEST | width_flag | op_flags[op].dst, info->dest_flags); compose = _pixman_implementation_lookup_combiner ( - imp->toplevel, op, component_alpha, narrow); + imp->toplevel, op, component_alpha, width_flag != ITER_WIDE); for (i = 0; i < height; ++i) { @@ -201,6 +216,13 @@ general_composite_rect (pixman_implementation_t *imp, dest_iter.write_back (&dest_iter); } + if (src_iter.fini) + src_iter.fini (&src_iter); + if (mask_iter.fini) + mask_iter.fini (&mask_iter); + if (dest_iter.fini) + dest_iter.fini (&dest_iter); + if (scanline_buffer != (uint8_t *) stack_scanline_buffer) free (scanline_buffer); } @@ -219,8 +241,7 @@ _pixman_implementation_create_general (void) _pixman_setup_combiner_functions_32 (imp); _pixman_setup_combiner_functions_float (imp); - imp->src_iter_init = general_src_iter_init; - imp->dest_iter_init = general_dest_iter_init; + imp->iter_info = general_iters; return imp; } diff --git a/lib/pixman/pixman/pixman-glyph.c b/lib/pixman/pixman/pixman-glyph.c index 5a271b64b..96a349ab4 100644 --- a/lib/pixman/pixman/pixman-glyph.c +++ b/lib/pixman/pixman/pixman-glyph.c @@ -391,6 +391,9 @@ box32_intersect (pixman_box32_t *dest, return dest->x2 > dest->x1 && dest->y2 > dest->y1; } +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) +__attribute__((__force_align_arg_pointer__)) +#endif PIXMAN_EXPORT void pixman_composite_glyphs_no_mask (pixman_op_t op, pixman_image_t *src, @@ -630,6 +633,9 @@ out: * - Trim the mask to the destination clip/image? * - Trim composite region based on sources, when the op ignores 0s. */ +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) +__attribute__((__force_align_arg_pointer__)) +#endif PIXMAN_EXPORT void pixman_composite_glyphs (pixman_op_t op, pixman_image_t *src, diff --git a/lib/pixman/pixman/pixman-image.c b/lib/pixman/pixman/pixman-image.c index 65041b43b..1ff1a4974 100644 --- a/lib/pixman/pixman/pixman-image.c +++ b/lib/pixman/pixman/pixman-image.c @@ -502,8 +502,10 @@ compute_image_info (pixman_image_t *image) break; } - /* Alpha map */ - if (!image->common.alpha_map) + /* Alpha maps are only supported for BITS images, so it's always + * safe to ignore their presense for non-BITS images + */ + if (!image->common.alpha_map || image->type != BITS) { flags |= FAST_PATH_NO_ALPHA_MAP; } @@ -918,12 +920,15 @@ _pixman_image_get_solid (pixman_implementation_t *imp, pixman_iter_t iter; otherwise: - _pixman_implementation_src_iter_init ( + _pixman_implementation_iter_init ( imp, &iter, image, 0, 0, 1, 1, (uint8_t *)&result, - ITER_NARROW, image->common.flags); + ITER_NARROW | ITER_SRC, image->common.flags); result = *iter.get_scanline (&iter, NULL); + + if (iter.fini) + iter.fini (&iter); } /* If necessary, convert RGB <--> BGR. */ diff --git a/lib/pixman/pixman/pixman-implementation.c b/lib/pixman/pixman/pixman-implementation.c index cfb82bb1f..588405451 100644 --- a/lib/pixman/pixman/pixman-implementation.c +++ b/lib/pixman/pixman/pixman-implementation.c @@ -285,18 +285,26 @@ _pixman_implementation_fill (pixman_implementation_t *imp, return FALSE; } -pixman_bool_t -_pixman_implementation_src_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t iter_flags, - uint32_t image_flags) +static uint32_t * +get_scanline_null (pixman_iter_t *iter, const uint32_t *mask) { + return NULL; +} + +void +_pixman_implementation_iter_init (pixman_implementation_t *imp, + pixman_iter_t *iter, + pixman_image_t *image, + int x, + int y, + int width, + int height, + uint8_t *buffer, + iter_flags_t iter_flags, + uint32_t image_flags) +{ + pixman_format_code_t format; + iter->image = image; iter->buffer = (uint32_t *)buffer; iter->x = x; @@ -305,48 +313,40 @@ _pixman_implementation_src_iter_init (pixman_implementation_t *imp, iter->height = height; iter->iter_flags = iter_flags; iter->image_flags = image_flags; + iter->fini = NULL; - while (imp) + if (!iter->image) { - if (imp->src_iter_init && (*imp->src_iter_init) (imp, iter)) - return TRUE; - - imp = imp->fallback; + iter->get_scanline = get_scanline_null; + return; } - return FALSE; -} - -pixman_bool_t -_pixman_implementation_dest_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t iter_flags, - uint32_t image_flags) -{ - iter->image = image; - iter->buffer = (uint32_t *)buffer; - iter->x = x; - iter->y = y; - iter->width = width; - iter->height = height; - iter->iter_flags = iter_flags; - iter->image_flags = image_flags; + format = iter->image->common.extended_format_code; while (imp) { - if (imp->dest_iter_init && (*imp->dest_iter_init) (imp, iter)) - return TRUE; - - imp = imp->fallback; + if (imp->iter_info) + { + const pixman_iter_info_t *info; + + for (info = imp->iter_info; info->format != PIXMAN_null; ++info) + { + if ((info->format == PIXMAN_any || info->format == format) && + (info->image_flags & image_flags) == info->image_flags && + (info->iter_flags & iter_flags) == info->iter_flags) + { + iter->get_scanline = info->get_scanline; + iter->write_back = info->write_back; + + if (info->initializer) + info->initializer (iter, info); + return; + } + } + } + + imp = imp->fallback; } - - return FALSE; } pixman_bool_t diff --git a/lib/pixman/pixman/pixman-matrix.c b/lib/pixman/pixman/pixman-matrix.c index cacc05cc8..4032c137a 100644 --- a/lib/pixman/pixman/pixman-matrix.c +++ b/lib/pixman/pixman/pixman-matrix.c @@ -37,8 +37,7 @@ static force_inline int count_leading_zeros (uint32_t x) { -#if defined(__GNUC__) && \ - (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +#ifdef HAVE_BUILTIN_CLZ return __builtin_clz (x); #else int n = 0; diff --git a/lib/pixman/pixman/pixman-mmx.c b/lib/pixman/pixman/pixman-mmx.c index 14790c029..f9a92ce09 100644 --- a/lib/pixman/pixman/pixman-mmx.c +++ b/lib/pixman/pixman/pixman-mmx.c @@ -301,6 +301,29 @@ negate (__m64 mask) return _mm_xor_si64 (mask, MC (4x00ff)); } +/* Computes the product of two unsigned fixed-point 8-bit values from 0 to 1 + * and maps its result to the same range. + * + * Jim Blinn gives multiple ways to compute this in "Jim Blinn's Corner: + * Notation, Notation, Notation", the first of which is + * + * prod(a, b) = (a * b + 128) / 255. + * + * By approximating the division by 255 as 257/65536 it can be replaced by a + * multiply and a right shift. This is the implementation that we use in + * pix_multiply(), but we _mm_mulhi_pu16() by 257 (part of SSE1 or Extended + * 3DNow!, and unavailable at the time of the book's publication) to perform + * the multiplication and right shift in a single operation. + * + * prod(a, b) = ((a * b + 128) * 257) >> 16. + * + * A third way (how pix_multiply() was implemented prior to 14208344) exists + * also that performs the multiplication by 257 with adds and shifts. + * + * Where temp = a * b + 128 + * + * prod(a, b) = (temp + (temp >> 8)) >> 8. + */ static force_inline __m64 pix_multiply (__m64 a, __m64 b) { @@ -3538,7 +3561,6 @@ mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp, #define BILINEAR_DECLARE_VARIABLES \ const __m64 mm_wt = _mm_set_pi16 (wt, wt, wt, wt); \ const __m64 mm_wb = _mm_set_pi16 (wb, wb, wb, wb); \ - const __m64 mm_BSHIFT = _mm_set_pi16 (BSHIFT, BSHIFT, BSHIFT, BSHIFT); \ const __m64 mm_addc7 = _mm_set_pi16 (0, 1, 0, 1); \ const __m64 mm_xorc7 = _mm_set_pi16 (0, BMSK, 0, BMSK); \ const __m64 mm_ux = _mm_set_pi16 (unit_x, unit_x, unit_x, unit_x); \ @@ -3557,36 +3579,16 @@ do { \ __m64 b_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (b, mm_zero), mm_wb); \ __m64 hi = _mm_add_pi16 (t_hi, b_hi); \ __m64 lo = _mm_add_pi16 (t_lo, b_lo); \ - vx += unit_x; \ - if (BILINEAR_INTERPOLATION_BITS < 8) \ - { \ - /* calculate horizontal weights */ \ - __m64 mm_wh = _mm_add_pi16 (mm_addc7, _mm_xor_si64 (mm_xorc7, \ + /* calculate horizontal weights */ \ + __m64 mm_wh = _mm_add_pi16 (mm_addc7, _mm_xor_si64 (mm_xorc7, \ _mm_srli_pi16 (mm_x, \ 16 - BILINEAR_INTERPOLATION_BITS))); \ - /* horizontal interpolation */ \ - __m64 p = _mm_unpacklo_pi16 (lo, hi); \ - __m64 q = _mm_unpackhi_pi16 (lo, hi); \ - lo = _mm_madd_pi16 (p, mm_wh); \ - hi = _mm_madd_pi16 (q, mm_wh); \ - } \ - else \ - { \ - /* calculate horizontal weights */ \ - __m64 mm_wh_lo = _mm_sub_pi16 (mm_BSHIFT, _mm_srli_pi16 (mm_x, \ - 16 - BILINEAR_INTERPOLATION_BITS)); \ - __m64 mm_wh_hi = _mm_srli_pi16 (mm_x, \ - 16 - BILINEAR_INTERPOLATION_BITS); \ - /* horizontal interpolation */ \ - __m64 mm_lo_lo = _mm_mullo_pi16 (lo, mm_wh_lo); \ - __m64 mm_lo_hi = _mm_mullo_pi16 (hi, mm_wh_hi); \ - __m64 mm_hi_lo = _mm_mulhi_pu16 (lo, mm_wh_lo); \ - __m64 mm_hi_hi = _mm_mulhi_pu16 (hi, mm_wh_hi); \ - lo = _mm_add_pi32 (_mm_unpacklo_pi16 (mm_lo_lo, mm_hi_lo), \ - _mm_unpacklo_pi16 (mm_lo_hi, mm_hi_hi)); \ - hi = _mm_add_pi32 (_mm_unpackhi_pi16 (mm_lo_lo, mm_hi_lo), \ - _mm_unpackhi_pi16 (mm_lo_hi, mm_hi_hi)); \ - } \ + /* horizontal interpolation */ \ + __m64 p = _mm_unpacklo_pi16 (lo, hi); \ + __m64 q = _mm_unpackhi_pi16 (lo, hi); \ + vx += unit_x; \ + lo = _mm_madd_pi16 (p, mm_wh); \ + hi = _mm_madd_pi16 (q, mm_wh); \ mm_x = _mm_add_pi16 (mm_x, mm_ux); \ /* shift and pack the result */ \ hi = _mm_srli_pi32 (hi, BILINEAR_INTERPOLATION_BITS * 2); \ @@ -3899,52 +3901,23 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) return iter->buffer; } -typedef struct -{ - pixman_format_code_t format; - pixman_iter_get_scanline_t get_scanline; -} fetcher_info_t; - -static const fetcher_info_t fetchers[] = -{ - { PIXMAN_x8r8g8b8, mmx_fetch_x8r8g8b8 }, - { PIXMAN_r5g6b5, mmx_fetch_r5g6b5 }, - { PIXMAN_a8, mmx_fetch_a8 }, - { PIXMAN_null } -}; - -static pixman_bool_t -mmx_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) -{ - pixman_image_t *image = iter->image; - -#define FLAGS \ +#define IMAGE_FLAGS \ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) - if ((iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FLAGS) == FLAGS) - { - const fetcher_info_t *f; - - for (f = &fetchers[0]; f->format != PIXMAN_null; f++) - { - if (image->common.extended_format_code == f->format) - { - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; - - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; - iter->stride = s; - - iter->get_scanline = f->get_scanline; - return TRUE; - } - } - } - - return FALSE; -} +static const pixman_iter_info_t mmx_iters[] = +{ + { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL + }, + { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, mmx_fetch_r5g6b5, NULL + }, + { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, mmx_fetch_a8, NULL + }, + { PIXMAN_null }, +}; static const pixman_fast_path_t mmx_fast_paths[] = { @@ -4074,7 +4047,7 @@ _pixman_implementation_create_mmx (pixman_implementation_t *fallback) imp->blt = mmx_blt; imp->fill = mmx_fill; - imp->src_iter_init = mmx_src_iter_init; + imp->iter_info = mmx_iters; return imp; } diff --git a/lib/pixman/pixman/pixman-noop.c b/lib/pixman/pixman/pixman-noop.c index e39996d9d..e59890492 100644 --- a/lib/pixman/pixman/pixman-noop.c +++ b/lib/pixman/pixman/pixman-noop.c @@ -37,12 +37,6 @@ noop_composite (pixman_implementation_t *imp, return; } -static void -dest_write_back_direct (pixman_iter_t *iter) -{ - iter->buffer += iter->image->bits.rowstride; -} - static uint32_t * noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask) { @@ -53,110 +47,102 @@ noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask) return result; } -static uint32_t * -get_scanline_null (pixman_iter_t *iter, const uint32_t *mask) -{ - return NULL; +static void +noop_init_solid_narrow (pixman_iter_t *iter, + const pixman_iter_info_t *info) +{ + pixman_image_t *image = iter->image; + uint32_t *buffer = iter->buffer; + uint32_t *end = buffer + iter->width; + uint32_t color; + + if (iter->image->type == SOLID) + color = image->solid.color_32; + else + color = image->bits.fetch_pixel_32 (&image->bits, 0, 0); + + while (buffer < end) + *(buffer++) = color; } -static pixman_bool_t -noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static void +noop_init_solid_wide (pixman_iter_t *iter, + const pixman_iter_info_t *info) { pixman_image_t *image = iter->image; + argb_t *buffer = (argb_t *)iter->buffer; + argb_t *end = buffer + iter->width; + argb_t color; -#define FLAGS \ - (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM) - - if (!image) - { - iter->get_scanline = get_scanline_null; - } - else if ((iter->iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == - (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) - { - iter->get_scanline = _pixman_iter_get_scanline_noop; - } - else if (image->common.extended_format_code == PIXMAN_solid && - (iter->image->type == SOLID || - (iter->image_flags & FAST_PATH_NO_ALPHA_MAP))) - { - if (iter->iter_flags & ITER_NARROW) - { - uint32_t *buffer = iter->buffer; - uint32_t *end = buffer + iter->width; - uint32_t color; - - if (image->type == SOLID) - color = image->solid.color_32; - else - color = image->bits.fetch_pixel_32 (&image->bits, 0, 0); - - while (buffer < end) - *(buffer++) = color; - } - else - { - argb_t *buffer = (argb_t *)iter->buffer; - argb_t *end = buffer + iter->width; - argb_t color; - - if (image->type == SOLID) - color = image->solid.color_float; - else - color = image->bits.fetch_pixel_float (&image->bits, 0, 0); - - while (buffer < end) - *(buffer++) = color; - } - - iter->get_scanline = _pixman_iter_get_scanline_noop; - } - else if (image->common.extended_format_code == PIXMAN_a8r8g8b8 && - (iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FLAGS) == FLAGS && - iter->x >= 0 && iter->y >= 0 && - iter->x + iter->width <= image->bits.width && - iter->y + iter->height <= image->bits.height) - { - iter->buffer = - image->bits.bits + iter->y * image->bits.rowstride + iter->x; - - iter->get_scanline = noop_get_scanline; - } + if (iter->image->type == SOLID) + color = image->solid.color_float; else - { - return FALSE; - } + color = image->bits.fetch_pixel_float (&image->bits, 0, 0); - return TRUE; + while (buffer < end) + *(buffer++) = color; } -static pixman_bool_t -noop_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +static void +noop_init_direct_buffer (pixman_iter_t *iter, const pixman_iter_info_t *info) { pixman_image_t *image = iter->image; - uint32_t image_flags = iter->image_flags; - uint32_t iter_flags = iter->iter_flags; - - if ((image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS && - (iter_flags & ITER_NARROW) == ITER_NARROW && - ((image->common.extended_format_code == PIXMAN_a8r8g8b8) || - (image->common.extended_format_code == PIXMAN_x8r8g8b8 && - (iter_flags & (ITER_LOCALIZED_ALPHA))))) - { - iter->buffer = image->bits.bits + iter->y * image->bits.rowstride + iter->x; - - iter->get_scanline = _pixman_iter_get_scanline_noop; - iter->write_back = dest_write_back_direct; - - return TRUE; - } - else - { - return FALSE; - } + + iter->buffer = + image->bits.bits + iter->y * image->bits.rowstride + iter->x; } +static void +dest_write_back_direct (pixman_iter_t *iter) +{ + iter->buffer += iter->image->bits.rowstride; +} + +static const pixman_iter_info_t noop_iters[] = +{ + /* Source iters */ + { PIXMAN_any, + 0, ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_SRC, + NULL, + _pixman_iter_get_scanline_noop, + NULL + }, + { PIXMAN_solid, + FAST_PATH_NO_ALPHA_MAP, ITER_NARROW | ITER_SRC, + noop_init_solid_narrow, + _pixman_iter_get_scanline_noop, + NULL, + }, + { PIXMAN_solid, + FAST_PATH_NO_ALPHA_MAP, ITER_WIDE | ITER_SRC, + noop_init_solid_wide, + _pixman_iter_get_scanline_noop, + NULL + }, + { PIXMAN_a8r8g8b8, + FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, + ITER_NARROW | ITER_SRC, + noop_init_direct_buffer, + noop_get_scanline, + NULL + }, + /* Dest iters */ + { PIXMAN_a8r8g8b8, + FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST, + noop_init_direct_buffer, + _pixman_iter_get_scanline_noop, + dest_write_back_direct + }, + { PIXMAN_x8r8g8b8, + FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST | ITER_LOCALIZED_ALPHA, + noop_init_direct_buffer, + _pixman_iter_get_scanline_noop, + dest_write_back_direct + }, + { PIXMAN_null }, +}; + static const pixman_fast_path_t noop_fast_paths[] = { { PIXMAN_OP_DST, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, noop_composite }, @@ -169,8 +155,7 @@ _pixman_implementation_create_noop (pixman_implementation_t *fallback) pixman_implementation_t *imp = _pixman_implementation_create (fallback, noop_fast_paths); - imp->src_iter_init = noop_src_iter_init; - imp->dest_iter_init = noop_dest_iter_init; + imp->iter_info = noop_iters; return imp; } diff --git a/lib/pixman/pixman/pixman-private.h b/lib/pixman/pixman/pixman-private.h index 6d9c05321..6ca13b216 100644 --- a/lib/pixman/pixman/pixman-private.h +++ b/lib/pixman/pixman/pixman-private.h @@ -57,7 +57,7 @@ struct argb_t float b; }; -typedef void (*fetch_scanline_t) (pixman_image_t *image, +typedef void (*fetch_scanline_t) (bits_image_t *image, int x, int y, int width, @@ -209,10 +209,12 @@ union pixman_image typedef struct pixman_iter_t pixman_iter_t; typedef uint32_t *(* pixman_iter_get_scanline_t) (pixman_iter_t *iter, const uint32_t *mask); typedef void (* pixman_iter_write_back_t) (pixman_iter_t *iter); +typedef void (* pixman_iter_fini_t) (pixman_iter_t *iter); typedef enum { - ITER_NARROW = (1 << 0), + ITER_NARROW = (1 << 0), + ITER_WIDE = (1 << 1), /* "Localized alpha" is when the alpha channel is used only to compute * the alpha value of the destination. This means that the computation @@ -229,9 +231,15 @@ typedef enum * we can treat it as if it were ARGB, which means in some cases we can * avoid copying it to a temporary buffer. */ - ITER_LOCALIZED_ALPHA = (1 << 1), - ITER_IGNORE_ALPHA = (1 << 2), - ITER_IGNORE_RGB = (1 << 3) + ITER_LOCALIZED_ALPHA = (1 << 2), + ITER_IGNORE_ALPHA = (1 << 3), + ITER_IGNORE_RGB = (1 << 4), + + /* These indicate whether the iterator is for a source + * or a destination image + */ + ITER_SRC = (1 << 5), + ITER_DEST = (1 << 6) } iter_flags_t; struct pixman_iter_t @@ -248,6 +256,7 @@ struct pixman_iter_t /* These function pointers are initialized by the implementation */ pixman_iter_get_scanline_t get_scanline; pixman_iter_write_back_t write_back; + pixman_iter_fini_t fini; /* These fields are scratch data that implementations can use */ void * data; @@ -255,6 +264,19 @@ struct pixman_iter_t int stride; }; +typedef struct pixman_iter_info_t pixman_iter_info_t; +typedef void (* pixman_iter_initializer_t) (pixman_iter_t *iter, + const pixman_iter_info_t *info); +struct pixman_iter_info_t +{ + pixman_format_code_t format; + uint32_t image_flags; + iter_flags_t iter_flags; + pixman_iter_initializer_t initializer; + pixman_iter_get_scanline_t get_scanline; + pixman_iter_write_back_t write_back; +}; + void _pixman_bits_image_setup_accessors (bits_image_t *image); @@ -454,8 +476,6 @@ typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp, int width, int height, uint32_t filler); -typedef pixman_bool_t (*pixman_iter_init_func_t) (pixman_implementation_t *imp, - pixman_iter_t *iter); void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp); void _pixman_setup_combiner_functions_float (pixman_implementation_t *imp); @@ -477,11 +497,10 @@ struct pixman_implementation_t pixman_implementation_t * toplevel; pixman_implementation_t * fallback; const pixman_fast_path_t * fast_paths; + const pixman_iter_info_t * iter_info; pixman_blt_func_t blt; pixman_fill_func_t fill; - pixman_iter_init_func_t src_iter_init; - pixman_iter_init_func_t dest_iter_init; pixman_combine_32_func_t combine_32[PIXMAN_N_OPERATORS]; pixman_combine_32_func_t combine_32_ca[PIXMAN_N_OPERATORS]; @@ -542,29 +561,17 @@ _pixman_implementation_fill (pixman_implementation_t *imp, int height, uint32_t filler); -pixman_bool_t -_pixman_implementation_src_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t flags, - uint32_t image_flags); - -pixman_bool_t -_pixman_implementation_dest_iter_init (pixman_implementation_t *imp, - pixman_iter_t *iter, - pixman_image_t *image, - int x, - int y, - int width, - int height, - uint8_t *buffer, - iter_flags_t flags, - uint32_t image_flags); +void +_pixman_implementation_iter_init (pixman_implementation_t *imp, + pixman_iter_t *iter, + pixman_image_t *image, + int x, + int y, + int width, + int height, + uint8_t *buffer, + iter_flags_t flags, + uint32_t image_flags); /* Specific implementations */ pixman_implementation_t * @@ -586,6 +593,11 @@ pixman_implementation_t * _pixman_implementation_create_sse2 (pixman_implementation_t *fallback); #endif +#ifdef USE_SSSE3 +pixman_implementation_t * +_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback); +#endif + #ifdef USE_ARM_SIMD pixman_implementation_t * _pixman_implementation_create_arm_simd (pixman_implementation_t *fallback); @@ -647,6 +659,9 @@ _pixman_compute_composite_region32 (pixman_region32_t * region, uint32_t * _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask); +void +_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info); + /* These "formats" all have depth 0, so they * will never clash with any real ones */ @@ -777,6 +792,9 @@ pixman_malloc_ab (unsigned int n, unsigned int b); void * pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c); +void * +pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c); + pixman_bool_t _pixman_multiply_overflows_size (size_t a, size_t b); diff --git a/lib/pixman/pixman/pixman-sse2.c b/lib/pixman/pixman/pixman-sse2.c index 8a82eda7e..a6e780815 100644 --- a/lib/pixman/pixman/pixman-sse2.c +++ b/lib/pixman/pixman/pixman-sse2.c @@ -30,6 +30,9 @@ #include <config.h> #endif +/* PSHUFD is slow on a lot of old processors, and new processors have SSSE3 */ +#define PSHUFD_IS_FAST 0 + #include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */ #include <emmintrin.h> /* for SSE2 intrinsics */ #include "pixman-private.h" @@ -5554,77 +5557,134 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, scaled_nearest_scanline_sse2_8888_n_8888_OVER, uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE) -#if BILINEAR_INTERPOLATION_BITS < 8 +#if PSHUFD_IS_FAST + +/***********************************************************************************/ + # define BILINEAR_DECLARE_VARIABLES \ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ - const __m128i xmm_ux = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ - unit_x, -unit_x, unit_x, -unit_x); \ + const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ + unit_x, -unit_x, unit_x, -unit_x); \ + const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4); \ const __m128i xmm_zero = _mm_setzero_si128 (); \ - __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), \ - vx, -(vx + 1), vx, -(vx + 1)) -#else + __m128i xmm_x = _mm_set_epi16 (vx + unit_x * 3, -(vx + 1) - unit_x * 3, \ + vx + unit_x * 2, -(vx + 1) - unit_x * 2, \ + vx + unit_x * 1, -(vx + 1) - unit_x * 1, \ + vx + unit_x * 0, -(vx + 1) - unit_x * 0); \ + __m128i xmm_wh_state; + +#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase_) \ +do { \ + int phase = phase_; \ + __m128i xmm_wh, xmm_a, xmm_b; \ + /* fetch 2x2 pixel block into sse2 registers */ \ + __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \ + __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \ + vx += unit_x; \ + /* vertical interpolation */ \ + xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \ + xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \ + xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \ + /* calculate horizontal weights */ \ + if (phase <= 0) \ + { \ + xmm_wh_state = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS)); \ + xmm_x = _mm_add_epi16 (xmm_x, (phase < 0) ? xmm_ux1 : xmm_ux4); \ + phase = 0; \ + } \ + xmm_wh = _mm_shuffle_epi32 (xmm_wh_state, _MM_SHUFFLE (phase, phase, \ + phase, phase)); \ + /* horizontal interpolation */ \ + xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \ + xmm_a, _MM_SHUFFLE (1, 0, 3, 2)), xmm_a), xmm_wh); \ + /* shift the result */ \ + pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS * 2); \ +} while (0) + +#else /************************************************************************/ + # define BILINEAR_DECLARE_VARIABLES \ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ - const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \ - const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x, \ - -unit_x, -unit_x, -unit_x, -unit_x); \ + const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ + const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ + unit_x, -unit_x, unit_x, -unit_x); \ + const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4, \ + unit_x * 4, -unit_x * 4); \ const __m128i xmm_zero = _mm_setzero_si128 (); \ - __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, \ - -(vx + 1), -(vx + 1), -(vx + 1), -(vx + 1)) -#endif + __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), \ + vx, -(vx + 1), vx, -(vx + 1)) -#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \ +#define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase) \ do { \ - __m128i xmm_wh, xmm_lo, xmm_hi, a; \ + __m128i xmm_wh, xmm_a, xmm_b; \ /* fetch 2x2 pixel block into sse2 registers */ \ - __m128i tltr = _mm_loadl_epi64 ( \ - (__m128i *)&src_top[pixman_fixed_to_int (vx)]); \ - __m128i blbr = _mm_loadl_epi64 ( \ - (__m128i *)&src_bottom[pixman_fixed_to_int (vx)]); \ + __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \ + __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \ + (void)xmm_ux4; /* suppress warning: unused variable 'xmm_ux4' */ \ vx += unit_x; \ /* vertical interpolation */ \ - a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), \ - xmm_wt), \ - _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), \ - xmm_wb)); \ - if (BILINEAR_INTERPOLATION_BITS < 8) \ - { \ - /* calculate horizontal weights */ \ - xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ + xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \ + xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \ + xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \ + /* calculate horizontal weights */ \ + xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ 16 - BILINEAR_INTERPOLATION_BITS)); \ - xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ - /* horizontal interpolation */ \ - a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \ - a, _MM_SHUFFLE (1, 0, 3, 2)), a), xmm_wh); \ - } \ - else \ - { \ - /* calculate horizontal weights */ \ - xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ - 16 - BILINEAR_INTERPOLATION_BITS)); \ - xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ - /* horizontal interpolation */ \ - xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \ - xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \ - a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \ - _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \ - } \ - /* shift and pack the result */ \ - a = _mm_srli_epi32 (a, BILINEAR_INTERPOLATION_BITS * 2); \ - a = _mm_packs_epi32 (a, a); \ - a = _mm_packus_epi16 (a, a); \ - pix = _mm_cvtsi128_si32 (a); \ + xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \ + /* horizontal interpolation */ \ + xmm_b = _mm_unpacklo_epi64 (/* any value is fine here */ xmm_b, xmm_a); \ + xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); \ + /* shift the result */ \ + pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS * 2); \ } while (0) +/***********************************************************************************/ + +#endif + +#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix); \ +do { \ + __m128i xmm_pix; \ + BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix, -1); \ + xmm_pix = _mm_packs_epi32 (xmm_pix, xmm_pix); \ + xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); \ + pix = _mm_cvtsi128_si32 (xmm_pix); \ +} while(0) + +#define BILINEAR_INTERPOLATE_FOUR_PIXELS(pix); \ +do { \ + __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; \ + BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix1, 0); \ + BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix2, 1); \ + BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix3, 2); \ + BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix4, 3); \ + xmm_pix1 = _mm_packs_epi32 (xmm_pix1, xmm_pix2); \ + xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); \ + pix = _mm_packus_epi16 (xmm_pix1, xmm_pix3); \ +} while(0) + #define BILINEAR_SKIP_ONE_PIXEL() \ do { \ vx += unit_x; \ - xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ + xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \ +} while(0) + +#define BILINEAR_SKIP_FOUR_PIXELS() \ +do { \ + vx += unit_x * 4; \ + xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4); \ } while(0) +/***********************************************************************************/ + static force_inline void scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst, const uint32_t * mask, @@ -5633,24 +5693,28 @@ scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst, int32_t w, int wt, int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, + pixman_fixed_t vx_, + pixman_fixed_t unit_x_, pixman_fixed_t max_vx, pixman_bool_t zero_src) { + intptr_t vx = vx_; + intptr_t unit_x = unit_x_; BILINEAR_DECLARE_VARIABLES; - uint32_t pix1, pix2, pix3, pix4; + uint32_t pix1, pix2; - while ((w -= 4) >= 0) + while (w && ((uintptr_t)dst & 15)) { BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); *dst++ = pix1; - *dst++ = pix2; - *dst++ = pix3; - *dst++ = pix4; + w--; + } + + while ((w -= 4) >= 0) { + __m128i xmm_src; + BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); + _mm_store_si128 ((__m128i *)dst, xmm_src); + dst += 4; } if (w & 2) @@ -5687,6 +5751,66 @@ FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC, NORMAL, FLAG_NONE) static force_inline void +scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx_, + pixman_fixed_t unit_x_, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + intptr_t vx = vx_; + intptr_t unit_x = unit_x_; + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1, pix2; + + while (w && ((uintptr_t)dst & 15)) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + *dst++ = pix1 | 0xFF000000; + w--; + } + + while ((w -= 4) >= 0) { + __m128i xmm_src; + BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); + _mm_store_si128 ((__m128i *)dst, _mm_or_si128 (xmm_src, mask_ff000000)); + dst += 4; + } + + if (w & 2) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); + *dst++ = pix1 | 0xFF000000; + *dst++ = pix2 | 0xFF000000; + } + + if (w & 1) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + *dst = pix1 | 0xFF000000; + } +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_cover_SRC, + scaled_bilinear_scanline_sse2_x888_8888_SRC, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_pad_SRC, + scaled_bilinear_scanline_sse2_x888_8888_SRC, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_normal_SRC, + scaled_bilinear_scanline_sse2_x888_8888_SRC, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_NONE) + +static force_inline void scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst, const uint32_t * mask, const uint32_t * src_top, @@ -5694,13 +5818,15 @@ scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst, int32_t w, int wt, int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, + pixman_fixed_t vx_, + pixman_fixed_t unit_x_, pixman_fixed_t max_vx, pixman_bool_t zero_src) { + intptr_t vx = vx_; + intptr_t unit_x = unit_x_; BILINEAR_DECLARE_VARIABLES; - uint32_t pix1, pix2, pix3, pix4; + uint32_t pix1, pix2; while (w && ((uintptr_t)dst & 15)) { @@ -5722,12 +5848,7 @@ scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst, __m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo; __m128i xmm_alpha_hi, xmm_alpha_lo; - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); - - xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1); + BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); if (!is_zero (xmm_src)) { @@ -5794,13 +5915,15 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst, int32_t w, int wt, int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, + pixman_fixed_t vx_, + pixman_fixed_t unit_x_, pixman_fixed_t max_vx, pixman_bool_t zero_src) { + intptr_t vx = vx_; + intptr_t unit_x = unit_x_; BILINEAR_DECLARE_VARIABLES; - uint32_t pix1, pix2, pix3, pix4; + uint32_t pix1, pix2; uint32_t m; while (w && ((uintptr_t)dst & 15)) @@ -5851,12 +5974,7 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst, if (m) { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); - - xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1); + BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); if (m == 0xffffffff && is_opaque (xmm_src)) { @@ -5883,10 +6001,7 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst, } else { - BILINEAR_SKIP_ONE_PIXEL (); - BILINEAR_SKIP_ONE_PIXEL (); - BILINEAR_SKIP_ONE_PIXEL (); - BILINEAR_SKIP_ONE_PIXEL (); + BILINEAR_SKIP_FOUR_PIXELS (); } w -= 4; @@ -5958,13 +6073,15 @@ scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst, int32_t w, int wt, int wb, - pixman_fixed_t vx, - pixman_fixed_t unit_x, + pixman_fixed_t vx_, + pixman_fixed_t unit_x_, pixman_fixed_t max_vx, pixman_bool_t zero_src) { + intptr_t vx = vx_; + intptr_t unit_x = unit_x_; BILINEAR_DECLARE_VARIABLES; - uint32_t pix1, pix2, pix3, pix4; + uint32_t pix1; __m128i xmm_mask; if (zero_src || (*mask >> 24) == 0) @@ -5994,19 +6111,15 @@ scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst, while (w >= 4) { - BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); - BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); + __m128i xmm_src; + BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); - if (pix1 | pix2 | pix3 | pix4) + if (!is_zero (xmm_src)) { - __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_src_lo, xmm_src_hi; __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; __m128i xmm_alpha_lo, xmm_alpha_hi; - xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1); - xmm_dst = load_128_aligned ((__m128i*)dst); unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); @@ -6194,6 +6307,13 @@ static const pixman_fast_path_t sse2_fast_paths[] = SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, sse2_8888_8888), SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), @@ -6340,54 +6460,25 @@ sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) return iter->buffer; } -typedef struct -{ - pixman_format_code_t format; - pixman_iter_get_scanline_t get_scanline; -} fetcher_info_t; - -static const fetcher_info_t fetchers[] = -{ - { PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 }, - { PIXMAN_r5g6b5, sse2_fetch_r5g6b5 }, - { PIXMAN_a8, sse2_fetch_a8 }, - { PIXMAN_null } -}; - -static pixman_bool_t -sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) -{ - pixman_image_t *image = iter->image; - -#define FLAGS \ +#define IMAGE_FLAGS \ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) - if ((iter->iter_flags & ITER_NARROW) && - (iter->image_flags & FLAGS) == FLAGS) - { - const fetcher_info_t *f; - - for (f = &fetchers[0]; f->format != PIXMAN_null; f++) - { - if (image->common.extended_format_code == f->format) - { - uint8_t *b = (uint8_t *)image->bits.bits; - int s = image->bits.rowstride * 4; - - iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; - iter->stride = s; - - iter->get_scanline = f->get_scanline; - return TRUE; - } - } - } - - return FALSE; -} +static const pixman_iter_info_t sse2_iters[] = +{ + { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, sse2_fetch_x8r8g8b8, NULL + }, + { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, sse2_fetch_r5g6b5, NULL + }, + { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, + _pixman_iter_init_bits_stride, sse2_fetch_a8, NULL + }, + { PIXMAN_null }, +}; -#if defined(__GNUC__) // && !defined(__x86_64__) && !defined(__amd64__) +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) __attribute__((__force_align_arg_pointer__)) #endif pixman_implementation_t * @@ -6443,7 +6534,7 @@ _pixman_implementation_create_sse2 (pixman_implementation_t *fallback) imp->blt = sse2_blt; imp->fill = sse2_fill; - imp->src_iter_init = sse2_src_iter_init; + imp->iter_info = sse2_iters; return imp; } diff --git a/lib/pixman/pixman/pixman-ssse3.c b/lib/pixman/pixman/pixman-ssse3.c new file mode 100644 index 000000000..680d6b95a --- /dev/null +++ b/lib/pixman/pixman/pixman-ssse3.c @@ -0,0 +1,351 @@ +/* + * Copyright © 2013 Soren Sandmann Pedersen + * Copyright © 2013 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Soren Sandmann (soren.sandmann@gmail.com) + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdlib.h> +#include <mmintrin.h> +#include <xmmintrin.h> +#include <emmintrin.h> +#include <tmmintrin.h> +#include "pixman-private.h" +#include "pixman-inlines.h" + +typedef struct +{ + int y; + uint64_t * buffer; +} line_t; + +typedef struct +{ + line_t lines[2]; + pixman_fixed_t y; + pixman_fixed_t x; + uint64_t data[1]; +} bilinear_info_t; + +static void +ssse3_fetch_horizontal (bits_image_t *image, line_t *line, + int y, pixman_fixed_t x, pixman_fixed_t ux, int n) +{ + uint32_t *bits = image->bits + y * image->rowstride; + __m128i vx = _mm_set_epi16 ( + - (x + 1), x, - (x + 1), x, + - (x + ux + 1), x + ux, - (x + ux + 1), x + ux); + __m128i vux = _mm_set_epi16 ( + - 2 * ux, 2 * ux, - 2 * ux, 2 * ux, + - 2 * ux, 2 * ux, - 2 * ux, 2 * ux); + __m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0); + __m128i *b = (__m128i *)line->buffer; + __m128i vrl0, vrl1; + + while ((n -= 2) >= 0) + { + __m128i vw, vr, s; + + vrl1 = _mm_loadl_epi64 ( + (__m128i *)(bits + pixman_fixed_to_int (x + ux))); + /* vrl1: R1, L1 */ + + final_pixel: + vrl0 = _mm_loadl_epi64 ( + (__m128i *)(bits + pixman_fixed_to_int (x))); + /* vrl0: R0, L0 */ + + /* The weights are based on vx which is a vector of + * + * - (x + 1), x, - (x + 1), x, + * - (x + ux + 1), x + ux, - (x + ux + 1), x + ux + * + * so the 16 bit weights end up like this: + * + * iw0, w0, iw0, w0, iw1, w1, iw1, w1 + * + * and after shifting and packing, we get these bytes: + * + * iw0, w0, iw0, w0, iw1, w1, iw1, w1, + * iw0, w0, iw0, w0, iw1, w1, iw1, w1, + * + * which means the first and the second input pixel + * have to be interleaved like this: + * + * la0, ra0, lr0, rr0, la1, ra1, lr1, rr1, + * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1 + * + * before maddubsw can be used. + */ + + vw = _mm_add_epi16 ( + vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS)); + /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1 + */ + + vw = _mm_packus_epi16 (vw, vw); + /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1, + * iw0, w0, iw0, w0, iw1, w1, iw1, w1 + */ + vx = _mm_add_epi16 (vx, vux); + + x += 2 * ux; + + vr = _mm_unpacklo_epi16 (vrl1, vrl0); + /* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */ + + s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2)); + /* s: lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */ + + vr = _mm_unpackhi_epi8 (vr, s); + /* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1, + * lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1 + */ + + vr = _mm_maddubs_epi16 (vr, vw); + + /* When the weight is 0, the inverse weight is + * 128 which can't be represented in a signed byte. + * As a result maddubsw computes the following: + * + * r = l * -128 + r * 0 + * + * rather than the desired + * + * r = l * 128 + r * 0 + * + * We fix this by taking the absolute value of the + * result. + */ + vr = _mm_abs_epi16 (vr); + + /* vr: A0, R0, A1, R1, G0, B0, G1, B1 */ + _mm_store_si128 (b++, vr); + } + + if (n == -1) + { + vrl1 = _mm_setzero_si128(); + goto final_pixel; + } + + line->y = y; +} + +static uint32_t * +ssse3_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask) +{ + pixman_fixed_t fx, ux; + bilinear_info_t *info = iter->data; + line_t *line0, *line1; + int y0, y1; + int32_t dist_y; + __m128i vw; + int i; + + fx = info->x; + ux = iter->image->common.transform->matrix[0][0]; + + y0 = pixman_fixed_to_int (info->y); + y1 = y0 + 1; + + line0 = &info->lines[y0 & 0x01]; + line1 = &info->lines[y1 & 0x01]; + + if (line0->y != y0) + { + ssse3_fetch_horizontal ( + &iter->image->bits, line0, y0, fx, ux, iter->width); + } + + if (line1->y != y1) + { + ssse3_fetch_horizontal ( + &iter->image->bits, line1, y1, fx, ux, iter->width); + } + + dist_y = pixman_fixed_to_bilinear_weight (info->y); + dist_y <<= (16 - BILINEAR_INTERPOLATION_BITS); + + vw = _mm_set_epi16 ( + dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y); + + for (i = 0; i + 3 < iter->width; i += 4) + { + __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i)); + __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i)); + __m128i top1 = _mm_load_si128 ((__m128i *)(line0->buffer + i + 2)); + __m128i bot1 = _mm_load_si128 ((__m128i *)(line1->buffer + i + 2)); + __m128i r0, r1, tmp, p; + + r0 = _mm_mulhi_epu16 ( + _mm_sub_epi16 (bot0, top0), vw); + tmp = _mm_cmplt_epi16 (bot0, top0); + tmp = _mm_and_si128 (tmp, vw); + r0 = _mm_sub_epi16 (r0, tmp); + r0 = _mm_add_epi16 (r0, top0); + r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS); + /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */ + r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1)); + /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */ + + r1 = _mm_mulhi_epu16 ( + _mm_sub_epi16 (bot1, top1), vw); + tmp = _mm_cmplt_epi16 (bot1, top1); + tmp = _mm_and_si128 (tmp, vw); + r1 = _mm_sub_epi16 (r1, tmp); + r1 = _mm_add_epi16 (r1, top1); + r1 = _mm_srli_epi16 (r1, BILINEAR_INTERPOLATION_BITS); + r1 = _mm_shuffle_epi32 (r1, _MM_SHUFFLE (2, 0, 3, 1)); + /* r1: A3 R3 G3 B3 A2 R2 G2 B2 */ + + p = _mm_packus_epi16 (r0, r1); + + _mm_storeu_si128 ((__m128i *)(iter->buffer + i), p); + } + + while (i < iter->width) + { + __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i)); + __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i)); + __m128i r0, tmp, p; + + r0 = _mm_mulhi_epu16 ( + _mm_sub_epi16 (bot0, top0), vw); + tmp = _mm_cmplt_epi16 (bot0, top0); + tmp = _mm_and_si128 (tmp, vw); + r0 = _mm_sub_epi16 (r0, tmp); + r0 = _mm_add_epi16 (r0, top0); + r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS); + /* r0: A0 R0 A1 R1 G0 B0 G1 B1 */ + r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1)); + /* r0: A1 R1 G1 B1 A0 R0 G0 B0 */ + + p = _mm_packus_epi16 (r0, r0); + + if (iter->width - i == 1) + { + *(uint32_t *)(iter->buffer + i) = _mm_cvtsi128_si32 (p); + i++; + } + else + { + _mm_storel_epi64 ((__m128i *)(iter->buffer + i), p); + i += 2; + } + } + + info->y += iter->image->common.transform->matrix[1][1]; + + return iter->buffer; +} + +static void +ssse3_bilinear_cover_iter_fini (pixman_iter_t *iter) +{ + free (iter->data); +} + +static void +ssse3_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info) +{ + int width = iter->width; + bilinear_info_t *info; + pixman_vector_t v; + + /* Reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (iter->image->common.transform, &v)) + goto fail; + + info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t) + 64); + if (!info) + goto fail; + + info->x = v.vector[0] - pixman_fixed_1 / 2; + info->y = v.vector[1] - pixman_fixed_1 / 2; + +#define ALIGN(addr) \ + ((void *)((((uintptr_t)(addr)) + 15) & (~15))) + + /* It is safe to set the y coordinates to -1 initially + * because COVER_CLIP_BILINEAR ensures that we will only + * be asked to fetch lines in the [0, height) interval + */ + info->lines[0].y = -1; + info->lines[0].buffer = ALIGN (&(info->data[0])); + info->lines[1].y = -1; + info->lines[1].buffer = ALIGN (info->lines[0].buffer + width); + + iter->get_scanline = ssse3_fetch_bilinear_cover; + iter->fini = ssse3_bilinear_cover_iter_fini; + + iter->data = info; + return; + +fail: + /* Something went wrong, either a bad matrix or OOM; in such cases, + * we don't guarantee any particular rendering. + */ + _pixman_log_error ( + FUNC, "Allocation failure or bad matrix, skipping rendering\n"); + + iter->get_scanline = _pixman_iter_get_scanline_noop; + iter->fini = NULL; +} + +static const pixman_iter_info_t ssse3_iters[] = +{ + { PIXMAN_a8r8g8b8, + (FAST_PATH_STANDARD_FLAGS | + FAST_PATH_SCALE_TRANSFORM | + FAST_PATH_BILINEAR_FILTER | + FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR), + ITER_NARROW | ITER_SRC, + ssse3_bilinear_cover_iter_init, + NULL, NULL + }, + + { PIXMAN_null }, +}; + +static const pixman_fast_path_t ssse3_fast_paths[] = +{ + { PIXMAN_OP_NONE }, +}; + +pixman_implementation_t * +_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = + _pixman_implementation_create (fallback, ssse3_fast_paths); + + imp->iter_info = ssse3_iters; + + return imp; +} diff --git a/lib/pixman/pixman/pixman-utils.c b/lib/pixman/pixman/pixman-utils.c index f31171f6d..4a3a835c4 100644 --- a/lib/pixman/pixman/pixman-utils.c +++ b/lib/pixman/pixman/pixman-utils.c @@ -49,6 +49,15 @@ _pixman_addition_overflows_int (unsigned int a, unsigned int b) } void * +pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c) +{ + if (!b || a >= INT32_MAX / b || (a * b) > INT32_MAX - c) + return NULL; + + return malloc (a * b + c); +} + +void * pixman_malloc_ab (unsigned int a, unsigned int b) { @@ -214,6 +223,17 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask) return iter->buffer; } +void +_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info) +{ + pixman_image_t *image = iter->image; + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (info->format) / 8; + iter->stride = s; +} + #define N_TMP_BOXES (16) pixman_bool_t diff --git a/lib/pixman/pixman/pixman-vmx.c b/lib/pixman/pixman/pixman-vmx.c index f629003ab..c33631c0e 100644 --- a/lib/pixman/pixman/pixman-vmx.c +++ b/lib/pixman/pixman/pixman-vmx.c @@ -134,15 +134,11 @@ over (vector unsigned int src, source ## _mask = vec_lvsl (0, source); #define COMPUTE_SHIFT_MASKS(dest, source) \ - dest ## _mask = vec_lvsl (0, dest); \ - source ## _mask = vec_lvsl (0, source); \ - store_mask = vec_lvsr (0, dest); + source ## _mask = vec_lvsl (0, source); #define COMPUTE_SHIFT_MASKC(dest, source, mask) \ mask ## _mask = vec_lvsl (0, mask); \ - dest ## _mask = vec_lvsl (0, dest); \ - source ## _mask = vec_lvsl (0, source); \ - store_mask = vec_lvsr (0, dest); + source ## _mask = vec_lvsl (0, source); /* notice you have to declare temp vars... * Note: tmp3 and tmp4 must remain untouched! @@ -151,23 +147,17 @@ over (vector unsigned int src, #define LOAD_VECTORS(dest, source) \ tmp1 = (typeof(tmp1))vec_ld (0, source); \ tmp2 = (typeof(tmp2))vec_ld (15, source); \ - tmp3 = (typeof(tmp3))vec_ld (0, dest); \ v ## source = (typeof(v ## source)) \ vec_perm (tmp1, tmp2, source ## _mask); \ - tmp4 = (typeof(tmp4))vec_ld (15, dest); \ - v ## dest = (typeof(v ## dest)) \ - vec_perm (tmp3, tmp4, dest ## _mask); + v ## dest = (typeof(v ## dest))vec_ld (0, dest); #define LOAD_VECTORSC(dest, source, mask) \ tmp1 = (typeof(tmp1))vec_ld (0, source); \ tmp2 = (typeof(tmp2))vec_ld (15, source); \ - tmp3 = (typeof(tmp3))vec_ld (0, dest); \ v ## source = (typeof(v ## source)) \ vec_perm (tmp1, tmp2, source ## _mask); \ - tmp4 = (typeof(tmp4))vec_ld (15, dest); \ tmp1 = (typeof(tmp1))vec_ld (0, mask); \ - v ## dest = (typeof(v ## dest)) \ - vec_perm (tmp3, tmp4, dest ## _mask); \ + v ## dest = (typeof(v ## dest))vec_ld (0, dest); \ tmp2 = (typeof(tmp2))vec_ld (15, mask); \ v ## mask = (typeof(v ## mask)) \ vec_perm (tmp1, tmp2, mask ## _mask); @@ -178,11 +168,7 @@ over (vector unsigned int src, splat_alpha (v ## mask)); #define STORE_VECTOR(dest) \ - edges = vec_perm (tmp4, tmp3, dest ## _mask); \ - tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \ - tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \ - vec_st ((vector unsigned int) tmp3, 15, dest); \ - vec_st ((vector unsigned int) tmp1, 0, dest); + vec_st ((vector unsigned int) v ## dest, 0, dest); static void vmx_combine_over_u_no_mask (uint32_t * dest, @@ -191,8 +177,19 @@ vmx_combine_over_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -230,8 +227,23 @@ vmx_combine_over_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ia; + + UN8x4_MUL_UN8 (s, m); + + ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -286,8 +298,18 @@ vmx_combine_over_reverse_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -324,8 +346,21 @@ vmx_combine_over_reverse_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); + + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -379,8 +414,17 @@ vmx_combine_in_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t a = ALPHA_8 (*dest); + + UN8x4_MUL_UN8 (s, a); + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -415,8 +459,20 @@ vmx_combine_in_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t a = ALPHA_8 (*dest); + + UN8x4_MUL_UN8 (s, m); + UN8x4_MUL_UN8 (s, a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -468,8 +524,18 @@ vmx_combine_in_reverse_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t d = *dest; + uint32_t a = ALPHA_8 (*src++); + + UN8x4_MUL_UN8 (d, a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -505,8 +571,21 @@ vmx_combine_in_reverse_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t d = *dest; + uint32_t a = *src++; + + UN8x4_MUL_UN8 (a, m); + a = ALPHA_8 (a); + UN8x4_MUL_UN8 (d, a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -559,8 +638,18 @@ vmx_combine_out_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t a = ALPHA_8 (~(*dest)); + + UN8x4_MUL_UN8 (s, a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -596,8 +685,20 @@ vmx_combine_out_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t a = ALPHA_8 (~(*dest)); + + UN8x4_MUL_UN8 (s, m); + UN8x4_MUL_UN8 (s, a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -649,8 +750,18 @@ vmx_combine_out_reverse_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t d = *dest; + uint32_t a = ALPHA_8 (~(*src++)); + + UN8x4_MUL_UN8 (d, a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -687,8 +798,21 @@ vmx_combine_out_reverse_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t d = *dest; + uint32_t a = *src++; + + UN8x4_MUL_UN8 (a, m); + a = ALPHA_8 (~a); + UN8x4_MUL_UN8 (d, a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -741,8 +865,20 @@ vmx_combine_atop_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t dest_a = ALPHA_8 (d); + uint32_t src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -781,8 +917,25 @@ vmx_combine_atop_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t dest_a = ALPHA_8 (d); + uint32_t src_ia; + + UN8x4_MUL_UN8 (s, m); + + src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -840,8 +993,20 @@ vmx_combine_atop_reverse_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_a = ALPHA_8 (s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -880,8 +1045,25 @@ vmx_combine_atop_reverse_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_a; + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); + + src_a = ALPHA_8 (s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -939,8 +1121,20 @@ vmx_combine_xor_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_ia = ALPHA_8 (~s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); @@ -979,8 +1173,25 @@ vmx_combine_xor_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t src_ia; + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); + + src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1038,8 +1249,18 @@ vmx_combine_add_u_no_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t s = *src++; + uint32_t d = *dest; + + UN8x4_ADD_UN8x4 (d, s); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ @@ -1074,8 +1295,20 @@ vmx_combine_add_u_mask (uint32_t * dest, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + vector unsigned char tmp1, tmp2, src_mask, mask_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t m = ALPHA_8 (*mask++); + uint32_t s = *src++; + uint32_t d = *dest; + + UN8x4_MUL_UN8 (s, m); + UN8x4_ADD_UN8x4 (d, s); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1130,8 +1363,18 @@ vmx_combine_src_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + + UN8x4_MUL_UN8x4 (s, a); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1170,8 +1413,22 @@ vmx_combine_over_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1214,8 +1471,21 @@ vmx_combine_over_reverse_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t ida = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1257,8 +1527,20 @@ vmx_combine_in_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t da = ALPHA_8 (*dest); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (s, da); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1299,8 +1581,20 @@ vmx_combine_in_reverse_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (*src++); + + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4 (d, a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1342,8 +1636,21 @@ vmx_combine_out_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (s, da); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1386,8 +1693,21 @@ vmx_combine_out_reverse_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4 (d, ~a); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1430,8 +1750,23 @@ vmx_combine_atop_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask, vsrca; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1481,8 +1816,23 @@ vmx_combine_atop_reverse_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1529,8 +1879,23 @@ vmx_combine_xor_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); + + *dest++ = d; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); @@ -1577,8 +1942,20 @@ vmx_combine_add_ca (pixman_implementation_t *imp, { int i; vector unsigned int vdest, vsrc, vmask; - vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + vector unsigned char tmp1, tmp2, mask_mask, src_mask; + + while (width && ((uintptr_t)dest & 15)) + { + uint32_t a = *mask++; + uint32_t s = *src++; + uint32_t d = *dest; + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_ADD_UN8x4 (s, d); + + *dest++ = s; + width--; + } COMPUTE_SHIFT_MASKC (dest, src, mask); diff --git a/lib/pixman/pixman/pixman-x86.c b/lib/pixman/pixman/pixman-x86.c index 57e4d1f35..05297c476 100644 --- a/lib/pixman/pixman/pixman-x86.c +++ b/lib/pixman/pixman/pixman-x86.c @@ -25,7 +25,7 @@ #include "pixman-private.h" -#if defined(USE_X86_MMX) || defined (USE_SSE2) +#if defined(USE_X86_MMX) || defined (USE_SSE2) || defined (USE_SSSE3) /* The CPU detection code needs to be in a file not compiled with * "-mmmx -msse", as gcc would generate CMOV instructions otherwise @@ -39,7 +39,8 @@ typedef enum X86_MMX_EXTENSIONS = (1 << 1), X86_SSE = (1 << 2) | X86_MMX_EXTENSIONS, X86_SSE2 = (1 << 3), - X86_CMOV = (1 << 4) + X86_CMOV = (1 << 4), + X86_SSSE3 = (1 << 5) } cpu_features_t; #ifdef HAVE_GETISAX @@ -64,6 +65,8 @@ detect_cpu_features (void) features |= X86_SSE; if (result & AV_386_SSE2) features |= X86_SSE2; + if (result & AV_386_SSSE3) + features |= X86_SSSE3; } return features; @@ -167,6 +170,8 @@ detect_cpu_features (void) features |= X86_SSE; if (d & (1 << 26)) features |= X86_SSE2; + if (c & (1 << 9)) + features |= X86_SSSE3; /* Check for AMD specific features */ if ((features & X86_MMX) && !(features & X86_SSE)) @@ -222,6 +227,7 @@ _pixman_x86_get_implementations (pixman_implementation_t *imp) { #define MMX_BITS (X86_MMX | X86_MMX_EXTENSIONS) #define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2) +#define SSSE3_BITS (X86_SSE | X86_SSE2 | X86_SSSE3) #ifdef USE_X86_MMX if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS)) @@ -233,5 +239,10 @@ _pixman_x86_get_implementations (pixman_implementation_t *imp) imp = _pixman_implementation_create_sse2 (imp); #endif +#ifdef USE_SSSE3 + if (!_pixman_disabled ("ssse3") && have_feature (SSSE3_BITS)) + imp = _pixman_implementation_create_ssse3 (imp); +#endif + return imp; } diff --git a/lib/pixman/pixman/pixman.c b/lib/pixman/pixman/pixman.c index 184f0c4e6..9555ceaaf 100644 --- a/lib/pixman/pixman/pixman.c +++ b/lib/pixman/pixman/pixman.c @@ -605,7 +605,7 @@ pixman_image_composite32 (pixman_op_t op, else { mask_format = PIXMAN_null; - info.mask_flags = FAST_PATH_IS_OPAQUE; + info.mask_flags = FAST_PATH_IS_OPAQUE | FAST_PATH_NO_ALPHA_MAP; } dest_format = dest->common.extended_format_code; diff --git a/lib/pixman/pixman/pixman.h b/lib/pixman/pixman/pixman.h index 7ff9fb52a..509ba5e53 100644 --- a/lib/pixman/pixman/pixman.h +++ b/lib/pixman/pixman/pixman.h @@ -1030,7 +1030,7 @@ struct pixman_triangle #define pixman_trapezoid_valid(t) \ ((t)->left.p1.y != (t)->left.p2.y && \ (t)->right.p1.y != (t)->right.p2.y && \ - (int) ((t)->bottom - (t)->top) > 0) + ((t)->bottom > (t)->top)) struct pixman_span_fix { diff --git a/lib/pixman/test-driver b/lib/pixman/test-driver new file mode 100644 index 000000000..32bf39e83 --- /dev/null +++ b/lib/pixman/test-driver @@ -0,0 +1,127 @@ +#! /bin/sh +# test-driver - basic testsuite driver script. + +scriptversion=2012-06-27.10; # UTC + +# Copyright (C) 2011-2013 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to <bug-automake@gnu.org> or send patches to +# <automake-patches@gnu.org>. + +# Make unconditional expansion of undefined variables an error. This +# helps a lot in preventing typo-related bugs. +set -u + +usage_error () +{ + echo "$0: $*" >&2 + print_usage >&2 + exit 2 +} + +print_usage () +{ + cat <<END +Usage: + test-driver --test-name=NAME --log-file=PATH --trs-file=PATH + [--expect-failure={yes|no}] [--color-tests={yes|no}] + [--enable-hard-errors={yes|no}] [--] TEST-SCRIPT +The '--test-name', '--log-file' and '--trs-file' options are mandatory. +END +} + +# TODO: better error handling in option parsing (in particular, ensure +# TODO: $log_file, $trs_file and $test_name are defined). +test_name= # Used for reporting. +log_file= # Where to save the output of the test script. +trs_file= # Where to save the metadata of the test run. +expect_failure=no +color_tests=no +enable_hard_errors=yes +while test $# -gt 0; do + case $1 in + --help) print_usage; exit $?;; + --version) echo "test-driver $scriptversion"; exit $?;; + --test-name) test_name=$2; shift;; + --log-file) log_file=$2; shift;; + --trs-file) trs_file=$2; shift;; + --color-tests) color_tests=$2; shift;; + --expect-failure) expect_failure=$2; shift;; + --enable-hard-errors) enable_hard_errors=$2; shift;; + --) shift; break;; + -*) usage_error "invalid option: '$1'";; + esac + shift +done + +if test $color_tests = yes; then + # Keep this in sync with 'lib/am/check.am:$(am__tty_colors)'. + red='[0;31m' # Red. + grn='[0;32m' # Green. + lgn='[1;32m' # Light green. + blu='[1;34m' # Blue. + mgn='[0;35m' # Magenta. + std='[m' # No color. +else + red= grn= lgn= blu= mgn= std= +fi + +do_exit='rm -f $log_file $trs_file; (exit $st); exit $st' +trap "st=129; $do_exit" 1 +trap "st=130; $do_exit" 2 +trap "st=141; $do_exit" 13 +trap "st=143; $do_exit" 15 + +# Test script is run here. +"$@" >$log_file 2>&1 +estatus=$? +if test $enable_hard_errors = no && test $estatus -eq 99; then + estatus=1 +fi + +case $estatus:$expect_failure in + 0:yes) col=$red res=XPASS recheck=yes gcopy=yes;; + 0:*) col=$grn res=PASS recheck=no gcopy=no;; + 77:*) col=$blu res=SKIP recheck=no gcopy=yes;; + 99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;; + *:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;; + *:*) col=$red res=FAIL recheck=yes gcopy=yes;; +esac + +# Report outcome to console. +echo "${col}${res}${std}: $test_name" + +# Register the test result, and other relevant metadata. +echo ":test-result: $res" > $trs_file +echo ":global-test-result: $res" >> $trs_file +echo ":recheck: $recheck" >> $trs_file +echo ":copy-in-global-log: $gcopy" >> $trs_file + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "; # UTC" +# End: diff --git a/lib/pixman/test/Makefile.am b/lib/pixman/test/Makefile.am index 5d901d572..88dc36d2a 100644 --- a/lib/pixman/test/Makefile.am +++ b/lib/pixman/test/Makefile.am @@ -1,8 +1,8 @@ include $(top_srcdir)/test/Makefile.sources -AM_CFLAGS = $(OPENMP_CFLAGS) -AM_LDFLAGS = $(OPENMP_CFLAGS) $(TESTPROGS_EXTRA_LDFLAGS) -LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm $(PNG_LIBS) +AM_CFLAGS = $(OPENMP_CFLAGS) $(PTHREAD_CFLAGS) +AM_LDFLAGS = $(OPENMP_CFLAGS) $(TESTPROGS_EXTRA_LDFLAGS) $(PTHREAD_LDFLAGS) +LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm $(PNG_LIBS) $(PTHREAD_LIBS) AM_CPPFLAGS = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(PNG_CFLAGS) libutils_la_SOURCES = $(libutils_sources) $(libutils_headers) diff --git a/lib/pixman/test/Makefile.in b/lib/pixman/test/Makefile.in index 48974546f..d7fa0d297 100644 --- a/lib/pixman/test/Makefile.in +++ b/lib/pixman/test/Makefile.in @@ -79,162 +79,203 @@ am__EXEEXT_1 = prng-test$(EXEEXT) a1-trap-test$(EXEEXT) \ region-translate-test$(EXEEXT) combiner-test$(EXEEXT) \ pixel-test$(EXEEXT) fetch-test$(EXEEXT) rotate-test$(EXEEXT) \ oob-test$(EXEEXT) infinite-loop$(EXEEXT) trap-crasher$(EXEEXT) \ - alpha-loop$(EXEEXT) scaling-crash-test$(EXEEXT) \ - scaling-helpers-test$(EXEEXT) gradient-crash-test$(EXEEXT) \ - region-contains-test$(EXEEXT) alphamap$(EXEEXT) \ - matrix-test$(EXEEXT) stress-test$(EXEEXT) \ + alpha-loop$(EXEEXT) thread-test$(EXEEXT) \ + scaling-crash-test$(EXEEXT) scaling-helpers-test$(EXEEXT) \ + gradient-crash-test$(EXEEXT) region-contains-test$(EXEEXT) \ + alphamap$(EXEEXT) matrix-test$(EXEEXT) stress-test$(EXEEXT) \ composite-traps-test$(EXEEXT) blitters-test$(EXEEXT) \ glyph-test$(EXEEXT) scaling-test$(EXEEXT) affine-test$(EXEEXT) \ composite$(EXEEXT) am__EXEEXT_2 = lowlevel-blt-bench$(EXEEXT) radial-perf-test$(EXEEXT) \ - check-formats$(EXEEXT) + check-formats$(EXEEXT) scaling-bench$(EXEEXT) PROGRAMS = $(noinst_PROGRAMS) a1_trap_test_SOURCES = a1-trap-test.c a1_trap_test_OBJECTS = a1-trap-test.$(OBJEXT) a1_trap_test_LDADD = $(LDADD) am__DEPENDENCIES_1 = a1_trap_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) affine_test_SOURCES = affine-test.c affine_test_OBJECTS = affine-test.$(OBJEXT) affine_test_LDADD = $(LDADD) affine_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) alpha_loop_SOURCES = alpha-loop.c alpha_loop_OBJECTS = alpha-loop.$(OBJEXT) alpha_loop_LDADD = $(LDADD) alpha_loop_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) alphamap_SOURCES = alphamap.c alphamap_OBJECTS = alphamap.$(OBJEXT) alphamap_LDADD = $(LDADD) alphamap_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) blitters_test_SOURCES = blitters-test.c blitters_test_OBJECTS = blitters-test.$(OBJEXT) blitters_test_LDADD = $(LDADD) blitters_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) check_formats_SOURCES = check-formats.c check_formats_OBJECTS = check-formats.$(OBJEXT) check_formats_LDADD = $(LDADD) check_formats_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) combiner_test_SOURCES = combiner-test.c combiner_test_OBJECTS = combiner-test.$(OBJEXT) combiner_test_LDADD = $(LDADD) combiner_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) composite_SOURCES = composite.c composite_OBJECTS = composite.$(OBJEXT) composite_LDADD = $(LDADD) composite_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) composite_traps_test_SOURCES = composite-traps-test.c composite_traps_test_OBJECTS = composite-traps-test.$(OBJEXT) composite_traps_test_LDADD = $(LDADD) composite_traps_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) fetch_test_SOURCES = fetch-test.c fetch_test_OBJECTS = fetch-test.$(OBJEXT) fetch_test_LDADD = $(LDADD) fetch_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) glyph_test_SOURCES = glyph-test.c glyph_test_OBJECTS = glyph-test.$(OBJEXT) glyph_test_LDADD = $(LDADD) glyph_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) gradient_crash_test_SOURCES = gradient-crash-test.c gradient_crash_test_OBJECTS = gradient-crash-test.$(OBJEXT) gradient_crash_test_LDADD = $(LDADD) gradient_crash_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) infinite_loop_SOURCES = infinite-loop.c infinite_loop_OBJECTS = infinite-loop.$(OBJEXT) infinite_loop_LDADD = $(LDADD) infinite_loop_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) lowlevel_blt_bench_SOURCES = lowlevel-blt-bench.c lowlevel_blt_bench_OBJECTS = lowlevel-blt-bench.$(OBJEXT) lowlevel_blt_bench_LDADD = $(LDADD) lowlevel_blt_bench_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) matrix_test_SOURCES = matrix-test.c matrix_test_OBJECTS = matrix-test.$(OBJEXT) matrix_test_LDADD = $(LDADD) matrix_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) oob_test_SOURCES = oob-test.c oob_test_OBJECTS = oob-test.$(OBJEXT) oob_test_LDADD = $(LDADD) oob_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) pdf_op_test_SOURCES = pdf-op-test.c pdf_op_test_OBJECTS = pdf-op-test.$(OBJEXT) pdf_op_test_LDADD = $(LDADD) pdf_op_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) pixel_test_SOURCES = pixel-test.c pixel_test_OBJECTS = pixel-test.$(OBJEXT) pixel_test_LDADD = $(LDADD) pixel_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) prng_test_SOURCES = prng-test.c prng_test_OBJECTS = prng-test.$(OBJEXT) prng_test_LDADD = $(LDADD) prng_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) radial_perf_test_SOURCES = radial-perf-test.c radial_perf_test_OBJECTS = radial-perf-test.$(OBJEXT) radial_perf_test_LDADD = $(LDADD) radial_perf_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) region_contains_test_SOURCES = region-contains-test.c region_contains_test_OBJECTS = region-contains-test.$(OBJEXT) region_contains_test_LDADD = $(LDADD) region_contains_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) region_test_SOURCES = region-test.c region_test_OBJECTS = region-test.$(OBJEXT) region_test_LDADD = $(LDADD) region_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) region_translate_test_SOURCES = region-translate-test.c region_translate_test_OBJECTS = region-translate-test.$(OBJEXT) region_translate_test_LDADD = $(LDADD) region_translate_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) rotate_test_SOURCES = rotate-test.c rotate_test_OBJECTS = rotate-test.$(OBJEXT) rotate_test_LDADD = $(LDADD) rotate_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) +scaling_bench_SOURCES = scaling-bench.c +scaling_bench_OBJECTS = scaling-bench.$(OBJEXT) +scaling_bench_LDADD = $(LDADD) +scaling_bench_DEPENDENCIES = libutils.la \ + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) scaling_crash_test_SOURCES = scaling-crash-test.c scaling_crash_test_OBJECTS = scaling-crash-test.$(OBJEXT) scaling_crash_test_LDADD = $(LDADD) scaling_crash_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) scaling_helpers_test_SOURCES = scaling-helpers-test.c scaling_helpers_test_OBJECTS = scaling-helpers-test.$(OBJEXT) scaling_helpers_test_LDADD = $(LDADD) scaling_helpers_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) scaling_test_SOURCES = scaling-test.c scaling_test_OBJECTS = scaling-test.$(OBJEXT) scaling_test_LDADD = $(LDADD) scaling_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) stress_test_SOURCES = stress-test.c stress_test_OBJECTS = stress-test.$(OBJEXT) stress_test_LDADD = $(LDADD) stress_test_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) +thread_test_SOURCES = thread-test.c +thread_test_OBJECTS = thread-test.$(OBJEXT) +thread_test_LDADD = $(LDADD) +thread_test_DEPENDENCIES = libutils.la \ + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) trap_crasher_SOURCES = trap-crasher.c trap_crasher_OBJECTS = trap-crasher.$(OBJEXT) trap_crasher_LDADD = $(LDADD) trap_crasher_DEPENDENCIES = libutils.la \ - $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false @@ -276,8 +317,9 @@ SOURCES = $(libutils_la_SOURCES) a1-trap-test.c affine-test.c \ infinite-loop.c lowlevel-blt-bench.c matrix-test.c oob-test.c \ pdf-op-test.c pixel-test.c prng-test.c radial-perf-test.c \ region-contains-test.c region-test.c region-translate-test.c \ - rotate-test.c scaling-crash-test.c scaling-helpers-test.c \ - scaling-test.c stress-test.c trap-crasher.c + rotate-test.c scaling-bench.c scaling-crash-test.c \ + scaling-helpers-test.c scaling-test.c stress-test.c \ + thread-test.c trap-crasher.c DIST_SOURCES = $(libutils_la_SOURCES) a1-trap-test.c affine-test.c \ alpha-loop.c alphamap.c blitters-test.c check-formats.c \ combiner-test.c composite.c composite-traps-test.c \ @@ -285,8 +327,9 @@ DIST_SOURCES = $(libutils_la_SOURCES) a1-trap-test.c affine-test.c \ infinite-loop.c lowlevel-blt-bench.c matrix-test.c oob-test.c \ pdf-op-test.c pixel-test.c prng-test.c radial-perf-test.c \ region-contains-test.c region-test.c region-translate-test.c \ - rotate-test.c scaling-crash-test.c scaling-helpers-test.c \ - scaling-test.c stress-test.c trap-crasher.c + rotate-test.c scaling-bench.c scaling-crash-test.c \ + scaling-helpers-test.c scaling-test.c stress-test.c \ + thread-test.c trap-crasher.c am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ @@ -331,7 +374,7 @@ GREP = @GREP@ GTK_CFLAGS = @GTK_CFLAGS@ GTK_LIBS = @GTK_LIBS@ HAVE_LIBPNG = @HAVE_LIBPNG@ -HAVE_PTHREAD_SETSPECIFIC = @HAVE_PTHREAD_SETSPECIFIC@ +HAVE_PTHREADS = @HAVE_PTHREADS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ @@ -377,6 +420,7 @@ PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ PNG_CFLAGS = @PNG_CFLAGS@ PNG_LIBS = @PNG_LIBS@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ PTHREAD_LDFLAGS = @PTHREAD_LDFLAGS@ PTHREAD_LIBS = @PTHREAD_LIBS@ RANLIB = @RANLIB@ @@ -385,6 +429,7 @@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SSE2_CFLAGS = @SSE2_CFLAGS@ SSE2_LDFLAGS = @SSE2_LDFLAGS@ +SSSE3_CFLAGS = @SSSE3_CFLAGS@ STRIP = @STRIP@ TESTPROGS_EXTRA_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR = @TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR@ @@ -459,6 +504,7 @@ TESTPROGRAMS = \ infinite-loop \ trap-crasher \ alpha-loop \ + thread-test \ scaling-crash-test \ scaling-helpers-test \ gradient-crash-test \ @@ -480,6 +526,7 @@ OTHERPROGRAMS = \ lowlevel-blt-bench \ radial-perf-test \ check-formats \ + scaling-bench \ $(NULL) @@ -494,9 +541,9 @@ libutils_headers = \ utils-prng.h \ $(NULL) -AM_CFLAGS = $(OPENMP_CFLAGS) -AM_LDFLAGS = $(OPENMP_CFLAGS) $(TESTPROGS_EXTRA_LDFLAGS) -LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm $(PNG_LIBS) +AM_CFLAGS = $(OPENMP_CFLAGS) $(PTHREAD_CFLAGS) +AM_LDFLAGS = $(OPENMP_CFLAGS) $(TESTPROGS_EXTRA_LDFLAGS) $(PTHREAD_LDFLAGS) +LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm $(PNG_LIBS) $(PTHREAD_LIBS) AM_CPPFLAGS = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(PNG_CFLAGS) libutils_la_SOURCES = $(libutils_sources) $(libutils_headers) noinst_LTLIBRARIES = libutils.la @@ -629,6 +676,9 @@ region-translate-test$(EXEEXT): $(region_translate_test_OBJECTS) $(region_transl rotate-test$(EXEEXT): $(rotate_test_OBJECTS) $(rotate_test_DEPENDENCIES) $(EXTRA_rotate_test_DEPENDENCIES) @rm -f rotate-test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(rotate_test_OBJECTS) $(rotate_test_LDADD) $(LIBS) +scaling-bench$(EXEEXT): $(scaling_bench_OBJECTS) $(scaling_bench_DEPENDENCIES) $(EXTRA_scaling_bench_DEPENDENCIES) + @rm -f scaling-bench$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(scaling_bench_OBJECTS) $(scaling_bench_LDADD) $(LIBS) scaling-crash-test$(EXEEXT): $(scaling_crash_test_OBJECTS) $(scaling_crash_test_DEPENDENCIES) $(EXTRA_scaling_crash_test_DEPENDENCIES) @rm -f scaling-crash-test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(scaling_crash_test_OBJECTS) $(scaling_crash_test_LDADD) $(LIBS) @@ -641,6 +691,9 @@ scaling-test$(EXEEXT): $(scaling_test_OBJECTS) $(scaling_test_DEPENDENCIES) $(EX stress-test$(EXEEXT): $(stress_test_OBJECTS) $(stress_test_DEPENDENCIES) $(EXTRA_stress_test_DEPENDENCIES) @rm -f stress-test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(stress_test_OBJECTS) $(stress_test_LDADD) $(LIBS) +thread-test$(EXEEXT): $(thread_test_OBJECTS) $(thread_test_DEPENDENCIES) $(EXTRA_thread_test_DEPENDENCIES) + @rm -f thread-test$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(thread_test_OBJECTS) $(thread_test_LDADD) $(LIBS) trap-crasher$(EXEEXT): $(trap_crasher_OBJECTS) $(trap_crasher_DEPENDENCIES) $(EXTRA_trap_crasher_DEPENDENCIES) @rm -f trap-crasher$(EXEEXT) $(AM_V_CCLD)$(LINK) $(trap_crasher_OBJECTS) $(trap_crasher_LDADD) $(LIBS) @@ -675,10 +728,12 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/region-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/region-translate-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rotate-test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scaling-bench.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scaling-crash-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scaling-helpers-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scaling-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stress-test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/thread-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/trap-crasher.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utils-prng.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utils.Plo@am__quote@ diff --git a/lib/pixman/test/Makefile.sources b/lib/pixman/test/Makefile.sources index b5fc740f3..2ae5d9f8d 100644 --- a/lib/pixman/test/Makefile.sources +++ b/lib/pixman/test/Makefile.sources @@ -13,6 +13,7 @@ TESTPROGRAMS = \ infinite-loop \ trap-crasher \ alpha-loop \ + thread-test \ scaling-crash-test \ scaling-helpers-test \ gradient-crash-test \ @@ -33,6 +34,7 @@ OTHERPROGRAMS = \ lowlevel-blt-bench \ radial-perf-test \ check-formats \ + scaling-bench \ $(NULL) # Utility functions diff --git a/lib/pixman/test/affine-test.c b/lib/pixman/test/affine-test.c index 2506250db..8e19023a3 100644 --- a/lib/pixman/test/affine-test.c +++ b/lib/pixman/test/affine-test.c @@ -80,6 +80,18 @@ test_composite (int testnum, prng_randmemset (srcbuf, src_stride * src_height, 0); prng_randmemset (dstbuf, dst_stride * dst_height, 0); + if (prng_rand_n (2) == 0) + { + srcbuf += (src_stride / 4) * (src_height - 1); + src_stride = - src_stride; + } + + if (prng_rand_n (2) == 0) + { + dstbuf += (dst_stride / 4) * (dst_height - 1); + dst_stride = - dst_stride; + } + src_fmt = src_bpp == 4 ? (prng_rand_n (2) == 0 ? PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5; @@ -273,32 +285,20 @@ test_composite (int testnum, pixman_image_composite (op, src_img, NULL, dst_img, src_x, src_y, 0, 0, dst_x, dst_y, w, h); - if (dst_fmt == PIXMAN_x8r8g8b8) - { - /* ignore unused part */ - for (i = 0; i < dst_stride * dst_height / 4; i++) - dstbuf[i] &= 0xFFFFFF; - } - - image_endian_swap (dst_img); - + crc32 = compute_crc32_for_image (0, dst_img); + if (verbose) - { - int j; - - for (i = 0; i < dst_height; i++) - { - for (j = 0; j < dst_stride; j++) - printf ("%02X ", *((uint8_t *)dstbuf + i * dst_stride + j)); - - printf ("\n"); - } - } + print_image (dst_img); pixman_image_unref (src_img); pixman_image_unref (dst_img); - crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height); + if (src_stride < 0) + srcbuf += (src_stride / 4) * (src_height - 1); + + if (dst_stride < 0) + dstbuf += (dst_stride / 4) * (dst_height - 1); + free (srcbuf); free (dstbuf); @@ -306,12 +306,10 @@ test_composite (int testnum, return crc32; } -#if BILINEAR_INTERPOLATION_BITS == 8 -#define CHECKSUM 0x2CDF1F07 -#elif BILINEAR_INTERPOLATION_BITS == 7 -#define CHECKSUM 0xBC00B1DF +#if BILINEAR_INTERPOLATION_BITS == 7 +#define CHECKSUM 0xBE724CFE #elif BILINEAR_INTERPOLATION_BITS == 4 -#define CHECKSUM 0xA227306B +#define CHECKSUM 0x79BBE501 #else #define CHECKSUM 0x00000000 #endif diff --git a/lib/pixman/test/blitters-test.c b/lib/pixman/test/blitters-test.c index a2c6ff4d8..ea03f475d 100644 --- a/lib/pixman/test/blitters-test.c +++ b/lib/pixman/test/blitters-test.c @@ -57,6 +57,13 @@ create_random_image (pixman_format_code_t *allowed_formats, prng_randmemset (buf, stride * height, RANDMEMSET_MORE_00_AND_FF); } + /* test negative stride */ + if (prng_rand_n (4) == 0) + { + buf += (stride / 4) * (height - 1); + stride = - stride; + } + img = pixman_image_create_bits (fmt, width, height, buf, stride); if (PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_COLOR) @@ -89,6 +96,9 @@ free_random_image (uint32_t initcrc, if (fmt != PIXMAN_null) crc32 = compute_crc32_for_image (initcrc, img); + if (img->bits.rowstride < 0) + data += img->bits.rowstride * (img->bits.height - 1); + pixman_image_unref (img); free (data); @@ -222,7 +232,6 @@ static pixman_format_code_t mask_fmt_list[] = { uint32_t test_composite (int testnum, int verbose) { - int i; pixman_image_t *src_img = NULL; pixman_image_t *dst_img = NULL; pixman_image_t *mask_img = NULL; @@ -235,7 +244,7 @@ test_composite (int testnum, int verbose) int w, h; pixman_op_t op; pixman_format_code_t src_fmt, dst_fmt, mask_fmt; - uint32_t *dstbuf, *srcbuf, *maskbuf; + uint32_t *srcbuf, *maskbuf; uint32_t crc32; int max_width, max_height, max_extra_stride; FLOAT_REGS_CORRUPTION_DETECTOR_START (); @@ -282,7 +291,6 @@ test_composite (int testnum, int verbose) dst_height = pixman_image_get_height (dst_img); dst_stride = pixman_image_get_stride (dst_img); - dstbuf = pixman_image_get_data (dst_img); srcbuf = pixman_image_get_data (src_img); src_x = prng_rand_n (src_width); @@ -355,23 +363,7 @@ test_composite (int testnum, int verbose) src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h); if (verbose) - { - int j; - - printf ("---\n"); - for (i = 0; i < dst_height; i++) - { - for (j = 0; j < dst_stride; j++) - { - if (j == (dst_width * PIXMAN_FORMAT_BPP (dst_fmt) + 7) / 8) - printf ("| "); - - printf ("%02X ", *((uint8_t *)dstbuf + i * dst_stride + j)); - } - printf ("\n"); - } - printf ("---\n"); - } + print_image (dst_img); free_random_image (0, src_img, PIXMAN_null); crc32 = free_random_image (0, dst_img, dst_fmt); @@ -402,6 +394,6 @@ main (int argc, const char *argv[]) } return fuzzer_test_main("blitters", 2000000, - 0x0CF3283B, + 0xE0A07495, test_composite, argc, argv); } diff --git a/lib/pixman/test/composite-traps-test.c b/lib/pixman/test/composite-traps-test.c index 2983eae83..86a035564 100644 --- a/lib/pixman/test/composite-traps-test.c +++ b/lib/pixman/test/composite-traps-test.c @@ -97,19 +97,25 @@ test_composite (int testnum, int src_width = prng_rand_n (MAX_SRC_WIDTH) + 1; int src_height = prng_rand_n (MAX_SRC_HEIGHT) + 1; int src_stride = src_width * src_bpp + prng_rand_n (MAX_STRIDE) * src_bpp; - uint32_t *bits; + uint32_t *bits, *orig; src_x = -(src_width / 4) + prng_rand_n (src_width * 3 / 2); src_y = -(src_height / 4) + prng_rand_n (src_height * 3 / 2); src_stride = (src_stride + 3) & ~3; - bits = (uint32_t *)make_random_bytes (src_stride * src_height); + orig = bits = (uint32_t *)make_random_bytes (src_stride * src_height); + if (prng_rand_n (2) == 0) + { + bits += (src_stride / 4) * (src_height - 1); + src_stride = - src_stride; + } + src_img = pixman_image_create_bits ( src_format, src_width, src_height, bits, src_stride); - pixman_image_set_destroy_function (src_img, destroy_bits, bits); + pixman_image_set_destroy_function (src_img, destroy_bits, orig); if (prng_rand_n (8) == 0) { @@ -153,6 +159,12 @@ test_composite (int testnum, dst_bits = (uint32_t *)make_random_bytes (dst_stride * dst_height); + if (prng_rand_n (2) == 0) + { + dst_bits += (dst_stride / 4) * (dst_height - 1); + dst_stride = - dst_stride; + } + dst_x = -(dst_width / 4) + prng_rand_n (dst_width * 3 / 2); dst_y = -(dst_height / 4) + prng_rand_n (dst_height * 3 / 2); @@ -214,30 +226,14 @@ test_composite (int testnum, pixman_composite_trapezoids (op, src_img, dst_img, mask_format, src_x, src_y, dst_x, dst_y, n_traps, traps); - if (dst_format == PIXMAN_x8r8g8b8) - { - /* ignore unused part */ - for (i = 0; i < dst_stride * dst_height / 4; i++) - dst_bits[i] &= 0xFFFFFF; - } - - image_endian_swap (dst_img); + crc32 = compute_crc32_for_image (0, dst_img); if (verbose) - { - int j; - - for (i = 0; i < dst_height; i++) - { - for (j = 0; j < dst_stride; j++) - printf ("%02X ", *((uint8_t *)dst_bits + i * dst_stride + j)); - - printf ("\n"); - } - } - - crc32 = compute_crc32 (0, dst_bits, dst_stride * dst_height); + print_image (dst_img); + if (dst_stride < 0) + dst_bits += (dst_stride / 4) * (dst_height - 1); + fence_free (dst_bits); pixman_image_unref (src_img); @@ -251,6 +247,6 @@ test_composite (int testnum, int main (int argc, const char *argv[]) { - return fuzzer_test_main("composite traps", 40000, 0x749BCC57, + return fuzzer_test_main("composite traps", 40000, 0xAF41D210, test_composite, argc, argv); } diff --git a/lib/pixman/test/rotate-test.c b/lib/pixman/test/rotate-test.c index 9d2a620cb..18ca60d9b 100644 --- a/lib/pixman/test/rotate-test.c +++ b/lib/pixman/test/rotate-test.c @@ -61,16 +61,25 @@ static pixman_image_t * make_image (void) { pixman_format_code_t format = RANDOM_FORMAT(); - uint32_t *bytes = malloc (WIDTH * HEIGHT * 4); + uint32_t *bytes, *orig; pixman_image_t *image; + int stride; + orig = bytes = malloc (WIDTH * HEIGHT * 4); prng_randmemset (bytes, WIDTH * HEIGHT * 4, 0); + stride = WIDTH * 4; + if (prng_rand_n (2) == 0) + { + bytes += (stride / 4) * (HEIGHT - 1); + stride = - stride; + } + image = pixman_image_create_bits ( - format, WIDTH, HEIGHT, bytes, WIDTH * 4); + format, WIDTH, HEIGHT, bytes, stride); pixman_image_set_transform (image, RANDOM_TRANSFORM()); - pixman_image_set_destroy_function (image, on_destroy, bytes); + pixman_image_set_destroy_function (image, on_destroy, orig); pixman_image_set_repeat (image, PIXMAN_REPEAT_NORMAL); image_endian_swap (image); @@ -106,6 +115,6 @@ int main (int argc, const char *argv[]) { return fuzzer_test_main ("rotate", 15000, - 0xECF5E426, + 0x81E9EC2F, test_transform, argc, argv); } diff --git a/lib/pixman/test/scaling-bench.c b/lib/pixman/test/scaling-bench.c new file mode 100644 index 000000000..365e79850 --- /dev/null +++ b/lib/pixman/test/scaling-bench.c @@ -0,0 +1,80 @@ +#include <stdlib.h> +#include "utils.h" + +#define SOURCE_WIDTH 320 +#define SOURCE_HEIGHT 240 +#define TEST_REPEATS 3 + +static pixman_image_t * +make_source (void) +{ + size_t n_bytes = (SOURCE_WIDTH + 2) * (SOURCE_HEIGHT + 2) * 4; + uint32_t *data = malloc (n_bytes); + pixman_image_t *source; + + prng_randmemset (data, n_bytes, 0); + + source = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, SOURCE_WIDTH + 2, SOURCE_HEIGHT + 2, + data, + (SOURCE_WIDTH + 2) * 4); + + pixman_image_set_filter (source, PIXMAN_FILTER_BILINEAR, NULL, 0); + + return source; +} + +int +main () +{ + double scale; + pixman_image_t *src; + + prng_srand (23874); + + src = make_source (); + printf ("# %-6s %-22s %-14s %-12s\n", + "ratio", + "resolutions", + "time / ms", + "time per pixel / ns"); + for (scale = 0.1; scale < 10.005; scale += 0.01) + { + int i; + int dest_width = SOURCE_WIDTH * scale + 0.5; + int dest_height = SOURCE_HEIGHT * scale + 0.5; + int dest_byte_stride = (dest_width * 4 + 15) & ~15; + pixman_fixed_t s = (1 / scale) * 65536.0 + 0.5; + pixman_transform_t transform; + pixman_image_t *dest; + double t1, t2, t = -1; + uint32_t *dest_buf = aligned_malloc (16, dest_byte_stride * dest_height); + memset (dest_buf, 0, dest_byte_stride * dest_height); + + pixman_transform_init_scale (&transform, s, s); + pixman_image_set_transform (src, &transform); + + dest = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, dest_width, dest_height, dest_buf, dest_byte_stride); + + for (i = 0; i < TEST_REPEATS; i++) + { + t1 = gettime(); + pixman_image_composite ( + PIXMAN_OP_OVER, src, NULL, dest, + scale, scale, 0, 0, 0, 0, dest_width, dest_height); + t2 = gettime(); + if (t < 0 || t2 - t1 < t) + t = t2 - t1; + } + + printf ("%6.2f : %4dx%-4d => %4dx%-4d : %12.4f : %12.4f\n", + scale, SOURCE_WIDTH, SOURCE_HEIGHT, dest_width, dest_height, + t * 1000, (t / (dest_width * dest_height)) * 1000000000); + + pixman_image_unref (dest); + free (dest_buf); + } + + return 0; +} diff --git a/lib/pixman/test/scaling-test.c b/lib/pixman/test/scaling-test.c index a8cb4c47b..e2f7fa9f4 100644 --- a/lib/pixman/test/scaling-test.c +++ b/lib/pixman/test/scaling-test.c @@ -147,6 +147,24 @@ test_composite (int testnum, src_fmt = get_format (src_bpp); dst_fmt = get_format (dst_bpp); + if (prng_rand_n (2)) + { + srcbuf += (src_stride / 4) * (src_height - 1); + src_stride = - src_stride; + } + + if (prng_rand_n (2)) + { + maskbuf += (mask_stride / 4) * (mask_height - 1); + mask_stride = - mask_stride; + } + + if (prng_rand_n (2)) + { + dstbuf += (dst_stride / 4) * (dst_height - 1); + dst_stride = - dst_stride; + } + src_img = pixman_image_create_bits ( src_fmt, src_width, src_height, srcbuf, src_stride); @@ -340,33 +358,24 @@ test_composite (int testnum, pixman_image_composite (op, src_img, mask_img, dst_img, src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h); - if (dst_fmt == PIXMAN_x8r8g8b8 || dst_fmt == PIXMAN_x8b8g8r8) - { - /* ignore unused part */ - for (i = 0; i < dst_stride * dst_height / 4; i++) - dstbuf[i] &= 0xFFFFFF; - } - - image_endian_swap (dst_img); - + crc32 = compute_crc32_for_image (0, dst_img); + if (verbose) - { - int j; - - for (i = 0; i < dst_height; i++) - { - for (j = 0; j < dst_stride; j++) - printf ("%02X ", *((uint8_t *)dstbuf + i * dst_stride + j)); - - printf ("\n"); - } - } + print_image (dst_img); pixman_image_unref (src_img); pixman_image_unref (mask_img); pixman_image_unref (dst_img); - crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height); + if (src_stride < 0) + srcbuf += (src_stride / 4) * (src_height - 1); + + if (mask_stride < 0) + maskbuf += (mask_stride / 4) * (mask_height - 1); + + if (dst_stride < 0) + dstbuf += (dst_stride / 4) * (dst_height - 1); + free (srcbuf); free (maskbuf); free (dstbuf); @@ -375,12 +384,10 @@ test_composite (int testnum, return crc32; } -#if BILINEAR_INTERPOLATION_BITS == 8 -#define CHECKSUM 0x9096E6B6 -#elif BILINEAR_INTERPOLATION_BITS == 7 -#define CHECKSUM 0xCE8EC6BA +#if BILINEAR_INTERPOLATION_BITS == 7 +#define CHECKSUM 0x92E0F068 #elif BILINEAR_INTERPOLATION_BITS == 4 -#define CHECKSUM 0xAB1D39BE +#define CHECKSUM 0x8EFFA1E5 #else #define CHECKSUM 0x00000000 #endif diff --git a/lib/pixman/test/thread-test.c b/lib/pixman/test/thread-test.c new file mode 100644 index 000000000..0b07b269d --- /dev/null +++ b/lib/pixman/test/thread-test.c @@ -0,0 +1,199 @@ +#include "utils.h" + +#ifndef HAVE_PTHREADS + +int main () +{ + printf ("Skipped thread-test - pthreads not supported\n"); + return 0; +} + +#else + +#include <stdlib.h> +#include <pthread.h> + +typedef struct +{ + int thread_no; + uint32_t *dst_buf; + prng_t prng_state; +} info_t; + +static const pixman_op_t operators[] = +{ + PIXMAN_OP_SRC, + PIXMAN_OP_OVER, + PIXMAN_OP_ADD, + PIXMAN_OP_CLEAR, + PIXMAN_OP_SRC, + PIXMAN_OP_DST, + PIXMAN_OP_OVER, + PIXMAN_OP_OVER_REVERSE, + PIXMAN_OP_IN, + PIXMAN_OP_IN_REVERSE, + PIXMAN_OP_OUT, + PIXMAN_OP_OUT_REVERSE, + PIXMAN_OP_ATOP, + PIXMAN_OP_ATOP_REVERSE, + PIXMAN_OP_XOR, + PIXMAN_OP_ADD, + PIXMAN_OP_SATURATE, + PIXMAN_OP_DISJOINT_CLEAR, + PIXMAN_OP_DISJOINT_SRC, + PIXMAN_OP_DISJOINT_DST, + PIXMAN_OP_DISJOINT_OVER, + PIXMAN_OP_DISJOINT_OVER_REVERSE, + PIXMAN_OP_DISJOINT_IN, + PIXMAN_OP_DISJOINT_IN_REVERSE, + PIXMAN_OP_DISJOINT_OUT, + PIXMAN_OP_DISJOINT_OUT_REVERSE, + PIXMAN_OP_DISJOINT_ATOP, + PIXMAN_OP_DISJOINT_ATOP_REVERSE, + PIXMAN_OP_DISJOINT_XOR, + PIXMAN_OP_CONJOINT_CLEAR, + PIXMAN_OP_CONJOINT_SRC, + PIXMAN_OP_CONJOINT_DST, + PIXMAN_OP_CONJOINT_OVER, + PIXMAN_OP_CONJOINT_OVER_REVERSE, + PIXMAN_OP_CONJOINT_IN, + PIXMAN_OP_CONJOINT_IN_REVERSE, + PIXMAN_OP_CONJOINT_OUT, + PIXMAN_OP_CONJOINT_OUT_REVERSE, + PIXMAN_OP_CONJOINT_ATOP, + PIXMAN_OP_CONJOINT_ATOP_REVERSE, + PIXMAN_OP_CONJOINT_XOR, + PIXMAN_OP_MULTIPLY, + PIXMAN_OP_SCREEN, + PIXMAN_OP_OVERLAY, + PIXMAN_OP_DARKEN, + PIXMAN_OP_LIGHTEN, + PIXMAN_OP_COLOR_DODGE, + PIXMAN_OP_COLOR_BURN, + PIXMAN_OP_HARD_LIGHT, + PIXMAN_OP_DIFFERENCE, + PIXMAN_OP_EXCLUSION, +}; + +static const pixman_format_code_t formats[] = +{ + PIXMAN_a8r8g8b8, + PIXMAN_r5g6b5, + PIXMAN_a8, + PIXMAN_a4, + PIXMAN_a1, + PIXMAN_b5g6r5, + PIXMAN_r8g8b8a8, + PIXMAN_a4r4g4b4 +}; + +#define N_ROUNDS 8192 + +#define RAND_ELT(arr) \ + arr[prng_rand_r(&info->prng_state) % ARRAY_LENGTH (arr)] + +#define DEST_WIDTH (7) + +static void * +thread (void *data) +{ + info_t *info = data; + uint32_t crc32 = 0x0; + uint32_t src_buf[64]; + pixman_image_t *dst_img, *src_img; + int i; + + prng_srand_r (&info->prng_state, info->thread_no); + + for (i = 0; i < N_ROUNDS; ++i) + { + pixman_op_t op; + int rand1, rand2; + + prng_randmemset_r (&info->prng_state, info->dst_buf, + DEST_WIDTH * sizeof (uint32_t), 0); + prng_randmemset_r (&info->prng_state, src_buf, + sizeof (src_buf), 0); + + src_img = pixman_image_create_bits ( + RAND_ELT (formats), 4, 4, src_buf, 16); + dst_img = pixman_image_create_bits ( + RAND_ELT (formats), DEST_WIDTH, 1, info->dst_buf, + DEST_WIDTH * sizeof (uint32_t)); + + image_endian_swap (src_img); + image_endian_swap (dst_img); + + rand2 = prng_rand_r (&info->prng_state) % 4; + rand1 = prng_rand_r (&info->prng_state) % 4; + op = RAND_ELT (operators); + + pixman_image_composite32 ( + op, + src_img, NULL, dst_img, + rand1, rand2, 0, 0, 0, 0, DEST_WIDTH, 1); + + crc32 = compute_crc32_for_image (crc32, dst_img); + + pixman_image_unref (src_img); + pixman_image_unref (dst_img); + } + + return (void *)(uintptr_t)crc32; +} + +static inline uint32_t +byteswap32 (uint32_t x) +{ + return ((x & ((uint32_t)0xFF << 24)) >> 24) | + ((x & ((uint32_t)0xFF << 16)) >> 8) | + ((x & ((uint32_t)0xFF << 8)) << 8) | + ((x & ((uint32_t)0xFF << 0)) << 24); +} + +int +main (void) +{ + uint32_t dest[16 * DEST_WIDTH]; + info_t info[16] = { { 0 } }; + pthread_t threads[16]; + void *retvals[16]; + uint32_t crc32s[16], crc32; + int i; + + for (i = 0; i < 16; ++i) + { + info[i].thread_no = i; + info[i].dst_buf = &dest[i * DEST_WIDTH]; + } + + for (i = 0; i < 16; ++i) + pthread_create (&threads[i], NULL, thread, &info[i]); + + for (i = 0; i < 16; ++i) + pthread_join (threads[i], &retvals[i]); + + for (i = 0; i < 16; ++i) + { + crc32s[i] = (uintptr_t)retvals[i]; + + if (is_little_endian()) + crc32s[i] = byteswap32 (crc32s[i]); + } + + crc32 = compute_crc32 (0, crc32s, sizeof crc32s); + +#define EXPECTED 0xE299B18E + + if (crc32 != EXPECTED) + { + printf ("thread-test failed. Got checksum 0x%08X, expected 0x%08X\n", + crc32, EXPECTED); + return 1; + } + + return 0; +} + +#endif + diff --git a/lib/pixman/test/trap-crasher.c b/lib/pixman/test/trap-crasher.c index 4e4cac297..77be1c98b 100644 --- a/lib/pixman/test/trap-crasher.c +++ b/lib/pixman/test/trap-crasher.c @@ -5,7 +5,7 @@ int main() { pixman_image_t *dst; - pixman_trapezoid_t traps[1] = { + pixman_trapezoid_t traps[] = { { 2147483646, 2147483647, @@ -18,6 +18,18 @@ main() { 0, 2147483647 } } }, + { + 32768, + - 2147483647, + { + { 0, 0 }, + { 0, 2147483647 } + }, + { + { 65536, 0 }, + { 0, 2147483647 } + } + }, }; dst = pixman_image_create_bits (PIXMAN_a8, 1, 1, NULL, -1); diff --git a/lib/pixman/test/utils.c b/lib/pixman/test/utils.c index 3d1ba22ae..ebe0ccc09 100644 --- a/lib/pixman/test/utils.c +++ b/lib/pixman/test/utils.c @@ -150,6 +150,12 @@ compute_crc32_for_image_internal (uint32_t crc32, uint32_t mask = 0xffffffff; int i; + if (stride < 0) + { + data += (stride / 4) * (height - 1); + stride = - stride; + } + /* mask unused 'x' part */ if (PIXMAN_FORMAT_BPP (fmt) - PIXMAN_FORMAT_DEPTH (fmt) && PIXMAN_FORMAT_DEPTH (fmt) != 0) @@ -238,6 +244,38 @@ compute_crc32_for_image (uint32_t crc32, return crc32; } +void +print_image (pixman_image_t *image) +{ + int i, j; + int width, height, stride; + pixman_format_code_t format; + uint8_t *buffer; + int s; + + width = pixman_image_get_width (image); + height = pixman_image_get_height (image); + stride = pixman_image_get_stride (image); + format = pixman_image_get_format (image); + buffer = (uint8_t *)pixman_image_get_data (image); + + s = (stride >= 0)? stride : - stride; + + printf ("---\n"); + for (i = 0; i < height; i++) + { + for (j = 0; j < s; j++) + { + if (j == (width * PIXMAN_FORMAT_BPP (format) + 7) / 8) + printf ("| "); + + printf ("%02X ", *((uint8_t *)buffer + i * stride + j)); + } + printf ("\n"); + } + printf ("---\n"); +} + /* perform endian conversion of pixel data */ void @@ -259,11 +297,12 @@ image_endian_swap (pixman_image_t *img) for (i = 0; i < height; i++) { uint8_t *line_data = (uint8_t *)data + stride * i; - + int s = (stride >= 0)? stride : - stride; + switch (bpp) { case 1: - for (j = 0; j < stride; j++) + for (j = 0; j < s; j++) { line_data[j] = ((line_data[j] & 0x80) >> 7) | @@ -277,13 +316,13 @@ image_endian_swap (pixman_image_t *img) } break; case 4: - for (j = 0; j < stride; j++) + for (j = 0; j < s; j++) { line_data[j] = (line_data[j] >> 4) | (line_data[j] << 4); } break; case 16: - for (j = 0; j + 2 <= stride; j += 2) + for (j = 0; j + 2 <= s; j += 2) { char t1 = line_data[j + 0]; char t2 = line_data[j + 1]; @@ -293,7 +332,7 @@ image_endian_swap (pixman_image_t *img) } break; case 24: - for (j = 0; j + 3 <= stride; j += 3) + for (j = 0; j + 3 <= s; j += 3) { char t1 = line_data[j + 0]; char t2 = line_data[j + 1]; @@ -305,7 +344,7 @@ image_endian_swap (pixman_image_t *img) } break; case 32: - for (j = 0; j + 4 <= stride; j += 4) + for (j = 0; j + 4 <= s; j += 4) { char t1 = line_data[j + 0]; char t2 = line_data[j + 1]; @@ -602,6 +641,32 @@ draw_checkerboard (pixman_image_t *image, } } +static uint32_t +call_test_function (uint32_t (*test_function)(int testnum, int verbose), + int testnum, + int verbose) +{ + uint32_t retval; + +#if defined (__GNUC__) && defined (_WIN32) && (defined (__i386) || defined (__i386__)) + __asm__ ( + /* Deliberately avoid aligning the stack to 16 bytes */ + "pushl %1\n\t" + "pushl %2\n\t" + "call *%3\n\t" + "addl $8, %%esp\n\t" + : "=a" (retval) + : "r" (verbose), + "r" (testnum), + "r" (test_function) + : "edx", "ecx"); /* caller save registers */ +#else + retval = test_function (testnum, verbose); +#endif + + return retval; +} + /* * A function, which can be used as a core part of the test programs, * intended to detect various problems with the help of fuzzing input @@ -671,7 +736,9 @@ fuzzer_test_main (const char *test_name, else if (argc >= 2) { n2 = atoi (argv[1]); - checksum = test_function (n2, 1); + + checksum = call_test_function (test_function, n2, 1); + printf ("%d: checksum=%08X\n", n2, checksum); return 0; } @@ -687,7 +754,7 @@ fuzzer_test_main (const char *test_name, #endif for (i = n1; i <= n2; i++) { - uint32_t crc = test_function (i, 0); + uint32_t crc = call_test_function (test_function, i, 0); if (verbose) printf ("%d: %08X\n", i, crc); checksum += crc; diff --git a/lib/pixman/test/utils.h b/lib/pixman/test/utils.h index c2781516f..ebb14d9e4 100644 --- a/lib/pixman/test/utils.h +++ b/lib/pixman/test/utils.h @@ -6,6 +6,11 @@ #include "pixman-private.h" /* For 'inline' definition */ #include "utils-prng.h" +#if defined(_MSC_VER) +#define snprintf _snprintf +#define strcasecmp _stricmp +#endif + #define ARRAY_LENGTH(A) ((int) (sizeof (A) / sizeof ((A) [0]))) /* A primitive pseudorandom number generator, @@ -63,6 +68,10 @@ uint32_t compute_crc32_for_image (uint32_t in_crc32, pixman_image_t *image); +/* Print the image in hexadecimal */ +void +print_image (pixman_image_t *image); + /* Returns TRUE if running on a little endian system */ static force_inline pixman_bool_t |