diff options
author | Matthieu Herrb <matthieu@cvs.openbsd.org> | 2010-03-25 21:58:53 +0000 |
---|---|---|
committer | Matthieu Herrb <matthieu@cvs.openbsd.org> | 2010-03-25 21:58:53 +0000 |
commit | cea3749b11718d3b585f653d4acbb6c5287794cb (patch) | |
tree | ab4cf3134d3a5b6e9049cde32c7b44ba677554ff /lib | |
parent | 1a68a9b7a165123cd605727933898146a409555c (diff) |
Update to pixman 0.16.6. Tested on a full ports build by naddy@.
Diffstat (limited to 'lib')
66 files changed, 24103 insertions, 17591 deletions
diff --git a/lib/pixman/Makefile.bsd-wrapper b/lib/pixman/Makefile.bsd-wrapper index 310364807..8f45708dc 100644 --- a/lib/pixman/Makefile.bsd-wrapper +++ b/lib/pixman/Makefile.bsd-wrapper @@ -1,6 +1,6 @@ -# $OpenBSD: Makefile.bsd-wrapper,v 1.8 2009/06/12 09:16:54 matthieu Exp $ +# $OpenBSD: Makefile.bsd-wrapper,v 1.9 2010/03/25 21:58:52 matthieu Exp $ -SHARED_LIBS= pixman-1 15.8 +SHARED_LIBS= pixman-1 16.6 .if ${MACHINE} == amd64 CONFIGURE_ARGS += --disable-sse2 diff --git a/lib/pixman/Makefile.in b/lib/pixman/Makefile.in index 49e307285..d51ee56ad 100644 --- a/lib/pixman/Makefile.in +++ b/lib/pixman/Makefile.in @@ -146,6 +146,7 @@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PERL = @PERL@ +PIXMAN_TIMERS = @PIXMAN_TIMERS@ PIXMAN_VERSION_MAJOR = @PIXMAN_VERSION_MAJOR@ PIXMAN_VERSION_MICRO = @PIXMAN_VERSION_MICRO@ PIXMAN_VERSION_MINOR = @PIXMAN_VERSION_MINOR@ diff --git a/lib/pixman/README b/lib/pixman/README index e69de29bb..843b06980 100644 --- a/lib/pixman/README +++ b/lib/pixman/README @@ -0,0 +1,26 @@ +pixman is a library that provides low-level pixel manipulation +features such as image compositing and trapezoid rasterization. + +Please submit bugs & patches to the libpixman bugzilla: + + https://bugs.freedesktop.org/enter_bug.cgi?product=pixman + +All questions regarding this software should be directed to either the +Xorg mailing list: + + http://lists.freedesktop.org/mailman/listinfo/xorg + +or the cairo mailing list: + + http://lists.freedesktop.org/mailman/listinfo/cairo + +The master development code repository can be found at: + + git://anongit.freedesktop.org/git/pixman + + http://gitweb.freedesktop.org/?p=pixman;a=summary + +For more information on the git code manager, see: + + http://wiki.x.org/wiki/GitPage + diff --git a/lib/pixman/TODO b/lib/pixman/TODO index 6abeb0b0d..52d737706 100644 --- a/lib/pixman/TODO +++ b/lib/pixman/TODO @@ -14,6 +14,8 @@ the required precision by simply adding offset_x/y to the relevant rendering API? + - Get rid of workaround for X server bug. + - pixman_image_set_indexed() should copy its argument, and X should be ported over to use a pixman_image as the representation of a Picture, rather than creating one on each diff --git a/lib/pixman/config.h.in b/lib/pixman/config.h.in index 01d2b4edf..283eb1a1b 100644 --- a/lib/pixman/config.h.in +++ b/lib/pixman/config.h.in @@ -15,6 +15,9 @@ /* Define to 1 if you have the <memory.h> header file. */ #undef HAVE_MEMORY_H +/* Whether we have posix_memalign() */ +#undef HAVE_POSIX_MEMALIGN + /* Define to 1 if you have the <stdint.h> header file. */ #undef HAVE_STDINT_H @@ -54,6 +57,9 @@ /* Define to the version of this package. */ #undef PACKAGE_VERSION +/* enable TIMER_BEGIN/TIMER_END macros */ +#undef PIXMAN_TIMERS + /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS diff --git a/lib/pixman/configure b/lib/pixman/configure index 9a0567cee..262b1f57d 100644 --- a/lib/pixman/configure +++ b/lib/pixman/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.62 for pixman 0.15.8. +# Generated by GNU Autoconf 2.62 for pixman 0.16.6. # # Report bugs to <"sandmann@daimi.au.dk">. # @@ -750,8 +750,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='pixman' PACKAGE_TARNAME='pixman' -PACKAGE_VERSION='0.15.8' -PACKAGE_STRING='pixman 0.15.8' +PACKAGE_VERSION='0.16.6' +PACKAGE_STRING='pixman 0.16.6' PACKAGE_BUGREPORT='"sandmann@daimi.au.dk"' # Factoring default headers for most tests. @@ -910,13 +910,14 @@ VMX_CFLAGS USE_VMX_TRUE USE_VMX_FALSE ARM_SIMD_CFLAGS -ARM_NEON_CFLAGS USE_ARM_SIMD_TRUE USE_ARM_SIMD_FALSE +ARM_NEON_CFLAGS USE_ARM_NEON_TRUE USE_ARM_NEON_FALSE USE_GCC_INLINE_ASM_TRUE USE_GCC_INLINE_ASM_FALSE +PIXMAN_TIMERS PKG_CONFIG GTK_CFLAGS GTK_LIBS @@ -943,6 +944,7 @@ enable_vmx enable_arm_simd enable_arm_neon enable_gcc_inline_asm +enable_timers enable_gtk ' ac_precious_vars='build_alias @@ -1515,7 +1517,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures pixman 0.15.8 to adapt to many kinds of systems. +\`configure' configures pixman 0.16.6 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1585,7 +1587,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of pixman 0.15.8:";; + short | recursive ) echo "Configuration of pixman 0.16.6:";; esac cat <<\_ACEOF @@ -1607,6 +1609,7 @@ Optional Features: --disable-arm-neon disable ARM NEON fast paths --disable-gcc-inline-asm disable GNU-style inline assembler + --enable-timers enable TIMER_BEGIN and TIMER_END macros [default=no] --enable-gtk enable tests using GTK+ [default=auto] Optional Packages: @@ -1701,7 +1704,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -pixman configure 0.15.8 +pixman configure 0.16.6 generated by GNU Autoconf 2.62 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -1715,7 +1718,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by pixman $as_me 0.15.8, which was +It was created by pixman $as_me 0.16.6, which was generated by GNU Autoconf 2.62. Invocation command line was $ $0 $@ @@ -2364,7 +2367,7 @@ fi # Define the identity of the package. PACKAGE='pixman' - VERSION='0.15.8' + VERSION='0.16.6' cat >>confdefs.h <<_ACEOF @@ -2598,6 +2601,8 @@ case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac +test_CFLAGS=${CFLAGS+set} # We may override autoconf default CFLAGS. + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -4400,7 +4405,7 @@ ia64-*-hpux*) ;; *-*-irix6*) # Find out which ABI we are using. - echo '#line 4403 "configure"' > conftest.$ac_ext + echo '#line 4408 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -7508,11 +7513,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:7511: $lt_compile\"" >&5) + (eval echo "\"\$as_me:7516: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:7515: \$? = $ac_status" >&5 + echo "$as_me:7520: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -7798,11 +7803,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:7801: $lt_compile\"" >&5) + (eval echo "\"\$as_me:7806: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:7805: \$? = $ac_status" >&5 + echo "$as_me:7810: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -7902,11 +7907,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:7905: $lt_compile\"" >&5) + (eval echo "\"\$as_me:7910: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:7909: \$? = $ac_status" >&5 + echo "$as_me:7914: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -10302,7 +10307,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<EOF -#line 10305 "configure" +#line 10310 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -10402,7 +10407,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<EOF -#line 10405 "configure" +#line 10410 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -12811,11 +12816,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:12814: $lt_compile\"" >&5) + (eval echo "\"\$as_me:12819: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:12818: \$? = $ac_status" >&5 + echo "$as_me:12823: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -12915,11 +12920,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:12918: $lt_compile\"" >&5) + (eval echo "\"\$as_me:12923: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:12922: \$? = $ac_status" >&5 + echo "$as_me:12927: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -14498,11 +14503,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:14501: $lt_compile\"" >&5) + (eval echo "\"\$as_me:14506: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:14505: \$? = $ac_status" >&5 + echo "$as_me:14510: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -14602,11 +14607,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:14605: $lt_compile\"" >&5) + (eval echo "\"\$as_me:14610: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:14609: \$? = $ac_status" >&5 + echo "$as_me:14614: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -16817,11 +16822,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:16820: $lt_compile\"" >&5) + (eval echo "\"\$as_me:16825: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:16824: \$? = $ac_status" >&5 + echo "$as_me:16829: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -17107,11 +17112,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:17110: $lt_compile\"" >&5) + (eval echo "\"\$as_me:17115: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:17114: \$? = $ac_status" >&5 + echo "$as_me:17119: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -17211,11 +17216,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:17214: $lt_compile\"" >&5) + (eval echo "\"\$as_me:17219: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:17218: \$? = $ac_status" >&5 + echo "$as_me:17223: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -20593,6 +20598,16 @@ else fi +# Default CFLAGS to -O -g rather than just the -g from AC_PROG_CC +# if we're using Sun Studio and neither the user nor a config.site +# has set CFLAGS. +if test $SUNCC = yes && \ + test "$test_CFLAGS" == "" && \ + test "$CFLAGS" = "-g" +then + CFLAGS="-O -g" +fi + # # We ignore pixman_major in the version here because the major version should # always be encoded in the actual library name. Ie., the soname is: @@ -20603,13 +20618,13 @@ fi -LT_VERSION_INFO="15:8:15" +LT_VERSION_INFO="16:6:16" PIXMAN_VERSION_MAJOR=0 -PIXMAN_VERSION_MINOR=15 +PIXMAN_VERSION_MINOR=16 -PIXMAN_VERSION_MICRO=8 +PIXMAN_VERSION_MICRO=6 @@ -20618,10 +20633,18 @@ PIXMAN_VERSION_MICRO=8 #PKG_CHECK_MODULES(DEP, x11) if test "x$GCC" = "xyes"; then + case " $CFLAGS " in *[\ \ ]-Wall[\ \ ]*) ;; *) CFLAGS="$CFLAGS -Wall" ;; - esac fi + esac + + case " $CFLAGS " in + *[\ \ ]-fno-strict-aliasing[\ \ ]*) ;; + *) CFLAGS="$CFLAGS -fno-strict-aliasing" ;; + esac + +fi # Extract the first word of "perl", so it can be a program name with args. set dummy perl; ac_word=$2 { $as_echo "$as_me:$LINENO: checking for $ac_word" >&5 @@ -20969,9 +20992,53 @@ case $host_os in solaris*) # When building 32-bit binaries, apply a mapfile to ensure that the # binaries aren't flagged as only able to run on MMX+SSE capable CPUs - # since they check at runtime before using those instructions + # since they check at runtime before using those instructions. + # Not all linkers grok the mapfile format so we check for that first. if test "$AMD64_ABI" = "no" ; then - HWCAP_LDFLAGS='-Wl,-M,$(srcdir)/solaris-hwcap.mapfile' + use_hwcap_mapfile=no + { $as_echo "$as_me:$LINENO: checking whether to use a hardware capability map file" >&5 +$as_echo_n "checking whether to use a hardware capability map file... " >&6; } + hwcap_save_LDFLAGS="$LDFLAGS" + HWCAP_LDFLAGS='-Wl,-M,$(srcdir)/solaris-hwcap.mapfile' + LDFLAGS="$LDFLAGS -Wl,-M,pixman/solaris-hwcap.mapfile" + cat >conftest.$ac_ext <<_ACEOF +int main() { return 0; } +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + $as_test_x conftest$ac_exeext + }; then + use_hwcap_mapfile=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + HWCAP_LDFLAGS="" +fi + +rm -rf conftest.dSYM +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS="$hwcap_save_LDFLAGS" + { $as_echo "$as_me:$LINENO: result: $use_hwcap_mapfile" >&5 +$as_echo "$use_hwcap_mapfile" >&6; } fi if test "x$MMX_LDFLAGS" = "x" ; then MMX_LDFLAGS="$HWCAP_LDFLAGS" @@ -20997,7 +21064,7 @@ have_vmx_intrinsics=no { $as_echo "$as_me:$LINENO: checking whether to use VMX/Altivec intrinsics" >&5 $as_echo_n "checking whether to use VMX/Altivec intrinsics... " >&6; } xserver_save_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS $VMX_CFLAGS" +CFLAGS="$VMX_CFLAGS $CFLAGS" cat >conftest.$ac_ext <<_ACEOF #if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4)) @@ -21082,13 +21149,13 @@ else fi -ARM_SIMD_CFLAGS="" +ARM_SIMD_CFLAGS="-mcpu=arm1136j-s" have_arm_simd=no { $as_echo "$as_me:$LINENO: checking whether to use ARM SIMD assembler" >&5 $as_echo_n "checking whether to use ARM SIMD assembler... " >&6; } xserver_save_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS $ARM_SIMD_CFLAGS" +CFLAGS="$ARM_SIMD_CFLAGS $CFLAGS" cat >conftest.$ac_ext <<_ACEOF int main () { @@ -21155,13 +21222,26 @@ $as_echo "$as_me: error: ARM SIMD intrinsics not detected" >&2;} { (exit 1); exit 1; }; } fi -ARM_NEON_CFLAGS="-mfpu=neon -mfloat-abi=softfp" + + + + +if test $have_arm_simd = yes; then + USE_ARM_SIMD_TRUE= + USE_ARM_SIMD_FALSE='#' +else + USE_ARM_SIMD_TRUE='#' + USE_ARM_SIMD_FALSE= +fi + + +ARM_NEON_CFLAGS="-mfpu=neon -mcpu=cortex-a8" have_arm_neon=no { $as_echo "$as_me:$LINENO: checking whether to use ARM NEON" >&5 $as_echo_n "checking whether to use ARM NEON... " >&6; } xserver_save_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS $ARM_NEON_CFLAGS" +CFLAGS="$ARM_NEON_CFLAGS $CFLAGS" cat >conftest.$ac_ext <<_ACEOF #include <arm_neon.h> @@ -21221,6 +21301,19 @@ else ARM_NEON_CFLAGS= fi + + + + +if test $have_arm_neon = yes; then + USE_ARM_NEON_TRUE= + USE_ARM_NEON_FALSE='#' +else + USE_ARM_NEON_TRUE='#' + USE_ARM_NEON_FALSE= +fi + + { $as_echo "$as_me:$LINENO: result: $have_arm_neon" >&5 $as_echo "$have_arm_neon" >&6; } if test $enable_arm_neon = yes && test $have_arm_neon = no ; then @@ -21299,38 +21392,31 @@ fi - - - - -if test $have_arm_simd = yes; then - USE_ARM_SIMD_TRUE= - USE_ARM_SIMD_FALSE='#' +if test $have_gcc_inline_asm = yes; then + USE_GCC_INLINE_ASM_TRUE= + USE_GCC_INLINE_ASM_FALSE='#' else - USE_ARM_SIMD_TRUE='#' - USE_ARM_SIMD_FALSE= + USE_GCC_INLINE_ASM_TRUE='#' + USE_GCC_INLINE_ASM_FALSE= fi -if test $have_arm_neon = yes; then - USE_ARM_NEON_TRUE= - USE_ARM_NEON_FALSE='#' +# Check whether --enable-timers was given. +if test "${enable_timers+set}" = set; then + enableval=$enable_timers; enable_timers=$enableval else - USE_ARM_NEON_TRUE='#' - USE_ARM_NEON_FALSE= + enable_timers=no fi +if test $enable_timers = yes ; then -if test $have_gcc_inline_asm = yes; then - USE_GCC_INLINE_ASM_TRUE= - USE_GCC_INLINE_ASM_FALSE='#' -else - USE_GCC_INLINE_ASM_TRUE='#' - USE_GCC_INLINE_ASM_FALSE= -fi +cat >>confdefs.h <<\_ACEOF +#define PIXMAN_TIMERS 1 +_ACEOF +fi @@ -21604,6 +21690,106 @@ fi + +{ $as_echo "$as_me:$LINENO: checking for posix_memalign" >&5 +$as_echo_n "checking for posix_memalign... " >&6; } +if test "${ac_cv_func_posix_memalign+set}" = set; then + $as_echo_n "(cached) " >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define posix_memalign to an innocuous variant, in case <limits.h> declares posix_memalign. + For example, HP-UX 11i <limits.h> declares gettimeofday. */ +#define posix_memalign innocuous_posix_memalign + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char posix_memalign (); below. + Prefer <limits.h> to <assert.h> if __STDC__ is defined, since + <limits.h> exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include <limits.h> +#else +# include <assert.h> +#endif + +#undef posix_memalign + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char posix_memalign (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_posix_memalign || defined __stub___posix_memalign +choke me +#endif + +int +main () +{ +return posix_memalign (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + $as_test_x conftest$ac_exeext + }; then + ac_cv_func_posix_memalign=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_func_posix_memalign=no +fi + +rm -rf conftest.dSYM +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:$LINENO: result: $ac_cv_func_posix_memalign" >&5 +$as_echo "$ac_cv_func_posix_memalign" >&6; } +if test $ac_cv_func_posix_memalign = yes; then + have_posix_memalign=yes +else + have_posix_memalign=no +fi + +if test x$have_posix_memalign = xyes; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_POSIX_MEMALIGN 1 +_ACEOF + +fi + ac_config_files="$ac_config_files pixman-1.pc pixman-1-uninstalled.pc Makefile pixman/Makefile pixman/pixman-version.h test/Makefile" cat >confcache <<\_ACEOF @@ -22096,7 +22282,7 @@ exec 6>&1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by pixman $as_me 0.15.8, which was +This file was extended by pixman $as_me 0.16.6, which was generated by GNU Autoconf 2.62. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -22149,7 +22335,7 @@ Report bugs to <bug-autoconf@gnu.org>." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_version="\\ -pixman config.status 0.15.8 +pixman config.status 0.16.6 configured by $0, generated by GNU Autoconf 2.62, with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" diff --git a/lib/pixman/configure.ac b/lib/pixman/configure.ac index c9085d20b..8fa959ae4 100644 --- a/lib/pixman/configure.ac +++ b/lib/pixman/configure.ac @@ -31,7 +31,7 @@ AC_PREREQ([2.57]) # # - Released development versions have an odd MINOR number # -# - Released stable versions have an event MINOR number +# - Released stable versions have an even MINOR number # # - Versions that break ABI must have a new MAJOR number # @@ -53,8 +53,8 @@ AC_PREREQ([2.57]) # m4_define([pixman_major], 0) -m4_define([pixman_minor], 15) -m4_define([pixman_micro], 8) +m4_define([pixman_minor], 16) +m4_define([pixman_micro], 6) m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro]) @@ -65,6 +65,8 @@ AM_CONFIG_HEADER(config.h) AC_CANONICAL_HOST +test_CFLAGS=${CFLAGS+set} # We may override autoconf default CFLAGS. + AC_PROG_CC AC_PROG_LIBTOOL AC_CHECK_FUNCS([getisax]) @@ -75,6 +77,16 @@ AC_C_INLINE AC_CHECK_DECL([__SUNPRO_C], [SUNCC="yes"], [SUNCC="no"]) AC_CHECK_DECL([__amd64], [AMD64_ABI="yes"], [AMD64_ABI="no"]) +# Default CFLAGS to -O -g rather than just the -g from AC_PROG_CC +# if we're using Sun Studio and neither the user nor a config.site +# has set CFLAGS. +if test $SUNCC = yes && \ + test "$test_CFLAGS" == "" && \ + test "$CFLAGS" = "-g" +then + CFLAGS="-O -g" +fi + # # We ignore pixman_major in the version here because the major version should # always be encoded in the actual library name. Ie., the soname is: @@ -101,10 +113,18 @@ AC_SUBST(LT_VERSION_INFO) changequote(,)dnl if test "x$GCC" = "xyes"; then + case " $CFLAGS " in *[\ \ ]-Wall[\ \ ]*) ;; *) CFLAGS="$CFLAGS -Wall" ;; - esac fi changequote([,])dnl + esac + + case " $CFLAGS " in + *[\ \ ]-fno-strict-aliasing[\ \ ]*) ;; + *) CFLAGS="$CFLAGS -fno-strict-aliasing" ;; + esac + +fi changequote([,])dnl AC_PATH_PROG(PERL, perl, no) if test "x$PERL" = xno; then @@ -259,9 +279,19 @@ case $host_os in solaris*) # When building 32-bit binaries, apply a mapfile to ensure that the # binaries aren't flagged as only able to run on MMX+SSE capable CPUs - # since they check at runtime before using those instructions + # since they check at runtime before using those instructions. + # Not all linkers grok the mapfile format so we check for that first. if test "$AMD64_ABI" = "no" ; then - HWCAP_LDFLAGS='-Wl,-M,$(srcdir)/solaris-hwcap.mapfile' + use_hwcap_mapfile=no + AC_MSG_CHECKING(whether to use a hardware capability map file) + hwcap_save_LDFLAGS="$LDFLAGS" + HWCAP_LDFLAGS='-Wl,-M,$(srcdir)/solaris-hwcap.mapfile' + LDFLAGS="$LDFLAGS -Wl,-M,pixman/solaris-hwcap.mapfile" + AC_LINK_IFELSE([int main() { return 0; }], + use_hwcap_mapfile=yes, + HWCAP_LDFLAGS="") + LDFLAGS="$hwcap_save_LDFLAGS" + AC_MSG_RESULT($use_hwcap_mapfile) fi if test "x$MMX_LDFLAGS" = "x" ; then MMX_LDFLAGS="$HWCAP_LDFLAGS" @@ -288,7 +318,7 @@ fi have_vmx_intrinsics=no AC_MSG_CHECKING(whether to use VMX/Altivec intrinsics) xserver_save_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS $VMX_CFLAGS" +CFLAGS="$VMX_CFLAGS $CFLAGS" AC_COMPILE_IFELSE([ #if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4)) error "Need GCC >= 3.4 for sane altivec support" @@ -325,13 +355,14 @@ AC_SUBST(VMX_CFLAGS) AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes) +dnl =========================================================================== dnl Check for ARM SIMD instructions -ARM_SIMD_CFLAGS="" +ARM_SIMD_CFLAGS="-mcpu=arm1136j-s" have_arm_simd=no AC_MSG_CHECKING(whether to use ARM SIMD assembler) xserver_save_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS $ARM_SIMD_CFLAGS" +CFLAGS="$ARM_SIMD_CFLAGS $CFLAGS" AC_COMPILE_IFELSE([ int main () { asm("uqadd8 r1, r1, r2"); @@ -359,13 +390,18 @@ if test $enable_arm_simd = yes && test $have_arm_simd = no ; then AC_MSG_ERROR([ARM SIMD intrinsics not detected]) fi +AC_SUBST(ARM_SIMD_CFLAGS) + +AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes) + +dnl ========================================================================== dnl Check for ARM NEON instructions -ARM_NEON_CFLAGS="-mfpu=neon -mfloat-abi=softfp" +ARM_NEON_CFLAGS="-mfpu=neon -mcpu=cortex-a8" have_arm_neon=no AC_MSG_CHECKING(whether to use ARM NEON) xserver_save_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS $ARM_NEON_CFLAGS" +CFLAGS="$ARM_NEON_CFLAGS $CFLAGS" AC_COMPILE_IFELSE([ #include <arm_neon.h> int main () { @@ -389,11 +425,16 @@ else ARM_NEON_CFLAGS= fi +AC_SUBST(ARM_NEON_CFLAGS) + +AM_CONDITIONAL(USE_ARM_NEON, test $have_arm_neon = yes) + AC_MSG_RESULT($have_arm_neon) if test $enable_arm_neon = yes && test $have_arm_neon = no ; then AC_MSG_ERROR([ARM NEON intrinsics not detected]) fi +dnl ========================================================================================= dnl Check for GNU-style inline assembly support have_gcc_inline_asm=no @@ -423,15 +464,23 @@ if test $enable_gcc_inline_asm = yes && test $have_gcc_inline_asm = no ; then AC_MSG_ERROR([GNU-style inline assembler not detected]) fi +AM_CONDITIONAL(USE_GCC_INLINE_ASM, test $have_gcc_inline_asm = yes) -AC_SUBST(ARM_SIMD_CFLAGS) -AC_SUBST(ARM_NEON_CFLAGS) +dnl ============================================== +dnl Timers -AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes) -AM_CONDITIONAL(USE_ARM_NEON, test $have_arm_neon = yes) -AM_CONDITIONAL(USE_GCC_INLINE_ASM, test $have_gcc_inline_asm = yes) +AC_ARG_ENABLE(timers, + [AC_HELP_STRING([--enable-timers], + [enable TIMER_BEGIN and TIMER_END macros [default=no]])], + [enable_timers=$enableval], [enable_timers=no]) +if test $enable_timers = yes ; then + AC_DEFINE(PIXMAN_TIMERS, 1, [enable TIMER_BEGIN/TIMER_END macros]) +fi +AC_SUBST(PIXMAN_TIMERS) +dnl =================================== +dnl GTK+ AC_ARG_ENABLE(gtk, [AC_HELP_STRING([--enable-gtk], @@ -452,7 +501,15 @@ AC_SUBST(GTK_CFLAGS) AC_SUBST(GTK_LIBS) AC_SUBST(DEP_CFLAGS) AC_SUBST(DEP_LIBS) - + +dnl ===================================== +dnl posix_memalign + +AC_CHECK_FUNC(posix_memalign, have_posix_memalign=yes, have_posix_memalign=no) +if test x$have_posix_memalign = xyes; then + AC_DEFINE(HAVE_POSIX_MEMALIGN, 1, [Whether we have posix_memalign()]) +fi + AC_OUTPUT([pixman-1.pc pixman-1-uninstalled.pc Makefile diff --git a/lib/pixman/ltmain.sh b/lib/pixman/ltmain.sh index 248cd4047..fccf69e28 100644 --- a/lib/pixman/ltmain.sh +++ b/lib/pixman/ltmain.sh @@ -2127,17 +2127,6 @@ EOF ;; esac for pass in $passes; do - # The preopen pass in lib mode reverses $deplibs; put it back here - # so that -L comes before libs that need it for instance... - if test "$linkmode,$pass" = "lib,link"; then - ## FIXME: Find the place where the list is rebuilt in the wrong - ## order, and fix it there properly - tmp_deplibs= - for deplib in $deplibs; do - tmp_deplibs="$deplib $tmp_deplibs" - done - deplibs="$tmp_deplibs" - fi if test "$linkmode,$pass" = "lib,link" || test "$linkmode,$pass" = "prog,scan"; then libs="$deplibs" diff --git a/lib/pixman/pixman/Makefile.am b/lib/pixman/pixman/Makefile.am index 863caa35f..e19fa6e7f 100644 --- a/lib/pixman/pixman/Makefile.am +++ b/lib/pixman/pixman/Makefile.am @@ -4,12 +4,14 @@ libpixman_1_la_LIBADD = @DEP_LIBS@ -lm libpixman_1_la_CFLAGS = -DPIXMAN_DISABLE_DEPRECATED libpixman_1_la_SOURCES = \ pixman.h \ + pixman-accessor.h \ pixman-access.c \ pixman-access-accessors.c \ pixman-cpu.c \ pixman-gradient-walker.c \ pixman-region16.c \ pixman-region32.c \ + pixman-compiler.h \ pixman-private.h \ pixman-image.c \ pixman-implementation.c \ @@ -18,21 +20,18 @@ libpixman_1_la_SOURCES = \ pixman-combine64.c \ pixman-combine64.h \ pixman-general.c \ - pixman-pict.c \ + pixman.c \ pixman-fast-path.c \ pixman-solid-fill.c \ pixman-conical-gradient.c \ pixman-linear-gradient.c \ pixman-radial-gradient.c \ pixman-bits-image.c \ - pixman-transformed.c \ - pixman-transformed-accessors.c \ pixman-utils.c \ pixman-edge.c \ pixman-edge-accessors.c \ pixman-edge-imp.h \ pixman-trap.c \ - pixman-compute-region.c \ pixman-timer.c \ pixman-matrix.c @@ -40,18 +39,20 @@ libpixmanincludedir = $(includedir)/pixman-1/ libpixmaninclude_HEADERS = pixman.h pixman-version.h noinst_LTLIBRARIES = -pixman-combine32.c : combine.inc pixman-combine32.h combine.pl - $(PERL) $(srcdir)/combine.pl 8 < $(srcdir)/combine.inc > $@ || ($(RM) $@; exit 1) -pixman-combine32.h : combine.h.inc combine.pl - $(PERL) $(srcdir)/combine.pl 8 < $(srcdir)/combine.h.inc > $@ || ($(RM) $@; exit 1) +BUILT_SOURCES = pixman-combine32.h pixman-combine32.c pixman-combine64.h pixman-combine64.c -pixman-combine64.c : combine.inc pixman-combine64.h combine.pl - $(PERL) $(srcdir)/combine.pl 16 < $(srcdir)/combine.inc > $@ || ($(RM) $@; exit 1) -pixman-combine64.h : combine.h.inc combine.pl - $(PERL) $(srcdir)/combine.pl 16 < $(srcdir)/combine.h.inc > $@ || ($(RM) $@; exit 1) +pixman-combine32.c : pixman-combine.c.template pixman-combine32.h make-combine.pl + $(PERL) $(srcdir)/make-combine.pl 8 < $(srcdir)/pixman-combine.c.template > $@ || ($(RM) $@; exit 1) +pixman-combine32.h : pixman-combine.h.template make-combine.pl + $(PERL) $(srcdir)/make-combine.pl 8 < $(srcdir)/pixman-combine.h.template > $@ || ($(RM) $@; exit 1) -EXTRA_DIST = Makefile.win32 combine.inc combine.pl pixman-region.c \ - combine.h.inc solaris-hwcap.mapfile +pixman-combine64.c : pixman-combine.c.template pixman-combine64.h make-combine.pl + $(PERL) $(srcdir)/make-combine.pl 16 < $(srcdir)/pixman-combine.c.template > $@ || ($(RM) $@; exit 1) +pixman-combine64.h : pixman-combine.h.template make-combine.pl + $(PERL) $(srcdir)/make-combine.pl 16 < $(srcdir)/pixman-combine.h.template > $@ || ($(RM) $@; exit 1) + +EXTRA_DIST = Makefile.win32 pixman-combine.c.template make-combine.pl pixman-region.c \ + pixman-combine.h.template solaris-hwcap.mapfile pixman-x64-mmx-emulation.h CLEANFILES = pixman-combine32.c pixman-combine64.c pixman-combine32.h pixman-combine64.h # mmx code @@ -91,8 +92,7 @@ endif if USE_ARM_SIMD noinst_LTLIBRARIES += libpixman-arm-simd.la libpixman_arm_simd_la_SOURCES = \ - pixman-arm-simd.c \ - pixman-arm-simd.h + pixman-arm-simd.c libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS) $(ARM_SIMD_CFLAGS) libpixman_arm_simd_la_LIBADD = $(DEP_LIBS) libpixman_1_la_LIBADD += libpixman-arm-simd.la @@ -102,8 +102,7 @@ endif if USE_ARM_NEON noinst_LTLIBRARIES += libpixman-arm-neon.la libpixman_arm_neon_la_SOURCES = \ - pixman-arm-neon.c \ - pixman-arm-neon.h + pixman-arm-neon.c libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS) $(ARM_NEON_CFLAGS) libpixman_arm_neon_la_LIBADD = $(DEP_LIBS) libpixman_1_la_LIBADD += libpixman-arm-neon.la diff --git a/lib/pixman/pixman/Makefile.in b/lib/pixman/pixman/Makefile.in index 7353d1b1a..51c282071 100644 --- a/lib/pixman/pixman/Makefile.in +++ b/lib/pixman/pixman/Makefile.in @@ -97,34 +97,29 @@ am_libpixman_1_la_OBJECTS = libpixman_1_la-pixman-access.lo \ libpixman_1_la-pixman-implementation.lo \ libpixman_1_la-pixman-combine32.lo \ libpixman_1_la-pixman-combine64.lo \ - libpixman_1_la-pixman-general.lo libpixman_1_la-pixman-pict.lo \ + libpixman_1_la-pixman-general.lo libpixman_1_la-pixman.lo \ libpixman_1_la-pixman-fast-path.lo \ libpixman_1_la-pixman-solid-fill.lo \ libpixman_1_la-pixman-conical-gradient.lo \ libpixman_1_la-pixman-linear-gradient.lo \ libpixman_1_la-pixman-radial-gradient.lo \ libpixman_1_la-pixman-bits-image.lo \ - libpixman_1_la-pixman-transformed.lo \ - libpixman_1_la-pixman-transformed-accessors.lo \ libpixman_1_la-pixman-utils.lo libpixman_1_la-pixman-edge.lo \ libpixman_1_la-pixman-edge-accessors.lo \ - libpixman_1_la-pixman-trap.lo \ - libpixman_1_la-pixman-compute-region.lo \ - libpixman_1_la-pixman-timer.lo libpixman_1_la-pixman-matrix.lo + libpixman_1_la-pixman-trap.lo libpixman_1_la-pixman-timer.lo \ + libpixman_1_la-pixman-matrix.lo libpixman_1_la_OBJECTS = $(am_libpixman_1_la_OBJECTS) am__DEPENDENCIES_6 = @USE_ARM_NEON_TRUE@libpixman_arm_neon_la_DEPENDENCIES = \ @USE_ARM_NEON_TRUE@ $(am__DEPENDENCIES_6) -am__libpixman_arm_neon_la_SOURCES_DIST = pixman-arm-neon.c \ - pixman-arm-neon.h +am__libpixman_arm_neon_la_SOURCES_DIST = pixman-arm-neon.c @USE_ARM_NEON_TRUE@am_libpixman_arm_neon_la_OBJECTS = \ @USE_ARM_NEON_TRUE@ libpixman_arm_neon_la-pixman-arm-neon.lo libpixman_arm_neon_la_OBJECTS = $(am_libpixman_arm_neon_la_OBJECTS) @USE_ARM_NEON_TRUE@am_libpixman_arm_neon_la_rpath = @USE_ARM_SIMD_TRUE@libpixman_arm_simd_la_DEPENDENCIES = \ @USE_ARM_SIMD_TRUE@ $(am__DEPENDENCIES_6) -am__libpixman_arm_simd_la_SOURCES_DIST = pixman-arm-simd.c \ - pixman-arm-simd.h +am__libpixman_arm_simd_la_SOURCES_DIST = pixman-arm-simd.c @USE_ARM_SIMD_TRUE@am_libpixman_arm_simd_la_OBJECTS = \ @USE_ARM_SIMD_TRUE@ libpixman_arm_simd_la-pixman-arm-simd.lo libpixman_arm_simd_la_OBJECTS = $(am_libpixman_arm_simd_la_OBJECTS) @@ -235,6 +230,7 @@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PERL = @PERL@ +PIXMAN_TIMERS = @PIXMAN_TIMERS@ PIXMAN_VERSION_MAJOR = @PIXMAN_VERSION_MAJOR@ PIXMAN_VERSION_MICRO = @PIXMAN_VERSION_MICRO@ PIXMAN_VERSION_MINOR = @PIXMAN_VERSION_MINOR@ @@ -315,12 +311,14 @@ libpixman_1_la_LIBADD = @DEP_LIBS@ -lm $(am__append_3) $(am__append_5) \ libpixman_1_la_CFLAGS = -DPIXMAN_DISABLE_DEPRECATED libpixman_1_la_SOURCES = \ pixman.h \ + pixman-accessor.h \ pixman-access.c \ pixman-access-accessors.c \ pixman-cpu.c \ pixman-gradient-walker.c \ pixman-region16.c \ pixman-region32.c \ + pixman-compiler.h \ pixman-private.h \ pixman-image.c \ pixman-implementation.c \ @@ -329,21 +327,18 @@ libpixman_1_la_SOURCES = \ pixman-combine64.c \ pixman-combine64.h \ pixman-general.c \ - pixman-pict.c \ + pixman.c \ pixman-fast-path.c \ pixman-solid-fill.c \ pixman-conical-gradient.c \ pixman-linear-gradient.c \ pixman-radial-gradient.c \ pixman-bits-image.c \ - pixman-transformed.c \ - pixman-transformed-accessors.c \ pixman-utils.c \ pixman-edge.c \ pixman-edge-accessors.c \ pixman-edge-imp.h \ pixman-trap.c \ - pixman-compute-region.c \ pixman-timer.c \ pixman-matrix.c @@ -351,8 +346,9 @@ libpixmanincludedir = $(includedir)/pixman-1/ libpixmaninclude_HEADERS = pixman.h pixman-version.h noinst_LTLIBRARIES = $(am__append_1) $(am__append_4) $(am__append_6) \ $(am__append_9) $(am__append_11) -EXTRA_DIST = Makefile.win32 combine.inc combine.pl pixman-region.c \ - combine.h.inc solaris-hwcap.mapfile +BUILT_SOURCES = pixman-combine32.h pixman-combine32.c pixman-combine64.h pixman-combine64.c +EXTRA_DIST = Makefile.win32 pixman-combine.c.template make-combine.pl pixman-region.c \ + pixman-combine.h.template solaris-hwcap.mapfile pixman-x64-mmx-emulation.h CLEANFILES = pixman-combine32.c pixman-combine64.c pixman-combine32.h pixman-combine64.h @USE_MMX_TRUE@libpixman_mmx_la_SOURCES = \ @@ -372,18 +368,17 @@ CLEANFILES = pixman-combine32.c pixman-combine64.c pixman-combine32.h pixman-com @USE_SSE2_TRUE@libpixman_sse2_la_CFLAGS = $(DEP_CFLAGS) $(SSE2_CFLAGS) @USE_SSE2_TRUE@libpixman_sse2_la_LIBADD = $(DEP_LIBS) @USE_ARM_SIMD_TRUE@libpixman_arm_simd_la_SOURCES = \ -@USE_ARM_SIMD_TRUE@ pixman-arm-simd.c \ -@USE_ARM_SIMD_TRUE@ pixman-arm-simd.h +@USE_ARM_SIMD_TRUE@ pixman-arm-simd.c @USE_ARM_SIMD_TRUE@libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS) $(ARM_SIMD_CFLAGS) @USE_ARM_SIMD_TRUE@libpixman_arm_simd_la_LIBADD = $(DEP_LIBS) @USE_ARM_NEON_TRUE@libpixman_arm_neon_la_SOURCES = \ -@USE_ARM_NEON_TRUE@ pixman-arm-neon.c \ -@USE_ARM_NEON_TRUE@ pixman-arm-neon.h +@USE_ARM_NEON_TRUE@ pixman-arm-neon.c @USE_ARM_NEON_TRUE@libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS) $(ARM_NEON_CFLAGS) @USE_ARM_NEON_TRUE@libpixman_arm_neon_la_LIBADD = $(DEP_LIBS) -all: all-am +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am .SUFFIXES: .SUFFIXES: .c .lo .o .obj @@ -478,7 +473,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-bits-image.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-combine32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-combine64.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-compute-region.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-conical-gradient.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-cpu.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-edge-accessors.Plo@am__quote@ @@ -490,16 +484,14 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-implementation.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-linear-gradient.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-matrix.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-pict.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-radial-gradient.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-region16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-region32.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-solid-fill.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-timer.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-transformed-accessors.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-transformed.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-trap.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-utils.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_arm_neon_la-pixman-arm-neon.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_arm_simd_la-pixman-arm-simd.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_mmx_la-pixman-mmx.Plo@am__quote@ @@ -604,12 +596,12 @@ libpixman_1_la-pixman-general.lo: pixman-general.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-general.lo `test -f 'pixman-general.c' || echo '$(srcdir)/'`pixman-general.c -libpixman_1_la-pixman-pict.lo: pixman-pict.c -@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-pict.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-pict.Tpo" -c -o libpixman_1_la-pixman-pict.lo `test -f 'pixman-pict.c' || echo '$(srcdir)/'`pixman-pict.c; \ -@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-pict.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-pict.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-pict.Tpo"; exit 1; fi -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-pict.c' object='libpixman_1_la-pixman-pict.lo' libtool=yes @AMDEPBACKSLASH@ +libpixman_1_la-pixman.lo: pixman.c +@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman.Tpo" -c -o libpixman_1_la-pixman.lo `test -f 'pixman.c' || echo '$(srcdir)/'`pixman.c; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman.Tpo" "$(DEPDIR)/libpixman_1_la-pixman.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman.c' object='libpixman_1_la-pixman.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-pict.lo `test -f 'pixman-pict.c' || echo '$(srcdir)/'`pixman-pict.c +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman.lo `test -f 'pixman.c' || echo '$(srcdir)/'`pixman.c libpixman_1_la-pixman-fast-path.lo: pixman-fast-path.c @am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-fast-path.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-fast-path.Tpo" -c -o libpixman_1_la-pixman-fast-path.lo `test -f 'pixman-fast-path.c' || echo '$(srcdir)/'`pixman-fast-path.c; \ @@ -653,20 +645,6 @@ libpixman_1_la-pixman-bits-image.lo: pixman-bits-image.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-bits-image.lo `test -f 'pixman-bits-image.c' || echo '$(srcdir)/'`pixman-bits-image.c -libpixman_1_la-pixman-transformed.lo: pixman-transformed.c -@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-transformed.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-transformed.Tpo" -c -o libpixman_1_la-pixman-transformed.lo `test -f 'pixman-transformed.c' || echo '$(srcdir)/'`pixman-transformed.c; \ -@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-transformed.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-transformed.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-transformed.Tpo"; exit 1; fi -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-transformed.c' object='libpixman_1_la-pixman-transformed.lo' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-transformed.lo `test -f 'pixman-transformed.c' || echo '$(srcdir)/'`pixman-transformed.c - -libpixman_1_la-pixman-transformed-accessors.lo: pixman-transformed-accessors.c -@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-transformed-accessors.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-transformed-accessors.Tpo" -c -o libpixman_1_la-pixman-transformed-accessors.lo `test -f 'pixman-transformed-accessors.c' || echo '$(srcdir)/'`pixman-transformed-accessors.c; \ -@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-transformed-accessors.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-transformed-accessors.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-transformed-accessors.Tpo"; exit 1; fi -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-transformed-accessors.c' object='libpixman_1_la-pixman-transformed-accessors.lo' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-transformed-accessors.lo `test -f 'pixman-transformed-accessors.c' || echo '$(srcdir)/'`pixman-transformed-accessors.c - libpixman_1_la-pixman-utils.lo: pixman-utils.c @am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-utils.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-utils.Tpo" -c -o libpixman_1_la-pixman-utils.lo `test -f 'pixman-utils.c' || echo '$(srcdir)/'`pixman-utils.c; \ @am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-utils.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-utils.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-utils.Tpo"; exit 1; fi @@ -695,13 +673,6 @@ libpixman_1_la-pixman-trap.lo: pixman-trap.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-trap.lo `test -f 'pixman-trap.c' || echo '$(srcdir)/'`pixman-trap.c -libpixman_1_la-pixman-compute-region.lo: pixman-compute-region.c -@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-compute-region.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-compute-region.Tpo" -c -o libpixman_1_la-pixman-compute-region.lo `test -f 'pixman-compute-region.c' || echo '$(srcdir)/'`pixman-compute-region.c; \ -@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-compute-region.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-compute-region.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-compute-region.Tpo"; exit 1; fi -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-compute-region.c' object='libpixman_1_la-pixman-compute-region.lo' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-compute-region.lo `test -f 'pixman-compute-region.c' || echo '$(srcdir)/'`pixman-compute-region.c - libpixman_1_la-pixman-timer.lo: pixman-timer.c @am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-timer.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-timer.Tpo" -c -o libpixman_1_la-pixman-timer.lo `test -f 'pixman-timer.c' || echo '$(srcdir)/'`pixman-timer.c; \ @am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-timer.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-timer.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-timer.Tpo"; exit 1; fi @@ -854,13 +825,15 @@ distdir: $(DISTFILES) fi; \ done check-am: all-am -check: check-am +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am all-am: Makefile $(LTLIBRARIES) $(HEADERS) installdirs: for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(libpixmanincludedir)"; do \ test -z "$$dir" || $(mkdir_p) "$$dir"; \ done -install: install-am +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am @@ -885,6 +858,7 @@ distclean-generic: maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) clean: clean-am clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ @@ -952,15 +926,15 @@ uninstall-am: uninstall-info-am uninstall-libLTLIBRARIES \ uninstall-libLTLIBRARIES uninstall-libpixmanincludeHEADERS -pixman-combine32.c : combine.inc pixman-combine32.h combine.pl - $(PERL) $(srcdir)/combine.pl 8 < $(srcdir)/combine.inc > $@ || ($(RM) $@; exit 1) -pixman-combine32.h : combine.h.inc combine.pl - $(PERL) $(srcdir)/combine.pl 8 < $(srcdir)/combine.h.inc > $@ || ($(RM) $@; exit 1) +pixman-combine32.c : pixman-combine.c.template pixman-combine32.h make-combine.pl + $(PERL) $(srcdir)/make-combine.pl 8 < $(srcdir)/pixman-combine.c.template > $@ || ($(RM) $@; exit 1) +pixman-combine32.h : pixman-combine.h.template make-combine.pl + $(PERL) $(srcdir)/make-combine.pl 8 < $(srcdir)/pixman-combine.h.template > $@ || ($(RM) $@; exit 1) -pixman-combine64.c : combine.inc pixman-combine64.h combine.pl - $(PERL) $(srcdir)/combine.pl 16 < $(srcdir)/combine.inc > $@ || ($(RM) $@; exit 1) -pixman-combine64.h : combine.h.inc combine.pl - $(PERL) $(srcdir)/combine.pl 16 < $(srcdir)/combine.h.inc > $@ || ($(RM) $@; exit 1) +pixman-combine64.c : pixman-combine.c.template pixman-combine64.h make-combine.pl + $(PERL) $(srcdir)/make-combine.pl 16 < $(srcdir)/pixman-combine.c.template > $@ || ($(RM) $@; exit 1) +pixman-combine64.h : pixman-combine.h.template make-combine.pl + $(PERL) $(srcdir)/make-combine.pl 16 < $(srcdir)/pixman-combine.h.template > $@ || ($(RM) $@; exit 1) # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/lib/pixman/pixman/Makefile.win32 b/lib/pixman/pixman/Makefile.win32 index 208bb2e39..388bee23a 100644 --- a/lib/pixman/pixman/Makefile.win32 +++ b/lib/pixman/pixman/Makefile.win32 @@ -29,27 +29,30 @@ else CFLAGS += -O2 endif -SOURCES = \ - pixman-image.c \ - pixman-access.c \ - pixman-access-accessors.c \ +SOURCES = \ + pixman-image.c \ + pixman-access.c \ + pixman-access-accessors.c \ pixman-region16.c \ pixman-region32.c \ - pixman-compose.c \ - pixman-compose-accessors.c \ - pixman-combine32.c \ - pixman-combine64.c \ - pixman-pict.c \ - pixman-source.c \ - pixman-transformed.c \ - pixman-transformed-accessors.c \ - pixman-utils.c \ - pixman-edge.c \ + pixman-combine32.c \ + pixman-combine64.c \ + pixman-utils.c \ + pixman-edge.c \ pixman-edge-accessors.c \ - pixman-trap.c \ - pixman-compute-region.c \ - pixman-timer.c \ - pixman-matrix.c \ + pixman-trap.c \ + pixman-timer.c \ + pixman-matrix.c \ + pixman-gradient-walker.c \ + pixman-linear-gradient.c \ + pixman-radial-gradient.c \ + pixman-bits-image.c \ + pixman.c \ + pixman-cpu.c \ + pixman-fast-path.c \ + pixman-implementation.c \ + pixman-solid-fill.c \ + pixman-general.c \ $(NULL) # MMX compilation flags @@ -125,16 +128,16 @@ $(CFG_VAR)/%.obj: %.c $(CFG_VAR)/$(LIBRARY).lib: $(OBJECTS) lib -NOLOGO -OUT:$@ $(OBJECTS) || exit 0 -pixman-combine32.c: combine.inc pixman-combine32.h combine.pl - perl ./combine.pl 8 < $< > $@ || ($(RM) $@; exit 1) -pixman-combine32.h: combine.h.inc combine.pl - perl ./combine.pl 8 < $< > $@ || ($(RM) $@; exit 1) +pixman-combine32.c: pixman-combine.c.template pixman-combine32.h make-combine.pl + perl ./make-combine.pl 8 < $< > $@ || ($(RM) $@; exit 1) +pixman-combine32.h: pixman-combine.h.template make-combine.pl + perl ./make-combine.pl 8 < $< > $@ || ($(RM) $@; exit 1) -pixman-combine64.c: combine.inc pixman-combine64.h combine.pl - perl ./combine.pl 16 < $< > $@ || ($(RM) $@; exit 1) -pixman-combine64.h: combine.h.inc combine.pl - perl ./combine.pl 16 < $< > $@ || ($(RM) $@; exit 1) +pixman-combine64.c: pixman-combine.c.template pixman-combine64.h make-combine.pl + perl ./make-combine.pl 16 < $< > $@ || ($(RM) $@; exit 1) +pixman-combine64.h: pixman-combine.h.template make-combine.pl + perl ./make-combine.pl 16 < $< > $@ || ($(RM) $@; exit 1) clean_r: @rm -f $(CFG_VAR)/*.obj $(CFG_VAR)/*.lib $(CFG_VAR)/*.pdb $(CFG)/*.ilk || exit 0 - @rm -f $(CFG)/*.obj $(CFG)/*.lib $(CFG)/*.pdb $(CFG)/*.ilk pixman-combine32.c pixman-combine64.c || exit 0 + @rm -f $(CFG)/*.obj $(CFG)/*.lib $(CFG)/*.pdb $(CFG)/*.ilk pixman-combine32.c pixman-combine64.c pixman-combine64.c pixman-combine64.h || exit 0 diff --git a/lib/pixman/pixman/combine.h.inc b/lib/pixman/pixman/combine.h.inc deleted file mode 100644 index 6ecd30139..000000000 --- a/lib/pixman/pixman/combine.h.inc +++ /dev/null @@ -1,213 +0,0 @@ - -#define COMPONENT_SIZE -#define MASK -#define ONE_HALF - -#define A_SHIFT -#define R_SHIFT -#define G_SHIFT -#define A_MASK -#define R_MASK -#define G_MASK - -#define RB_MASK -#define AG_MASK -#define RB_ONE_HALF -#define RB_MASK_PLUS_ONE - -#define Alpha(x) ((x) >> A_SHIFT) - -/* - * Helper macros. - */ - -#define IntMult(a,b,t) ( (t) = (a) * (b) + ONE_HALF, ( ( ( (t)>>G_SHIFT ) + (t) )>>G_SHIFT ) ) -#define IntDiv(a,b) (((comp2_t) (a) * MASK) / (b)) - -#define GetComp(v,i) ((comp2_t) (comp1_t) ((v) >> i)) - -#define Add(x,y,i,t) ((t) = GetComp(x,i) + GetComp(y,i), \ - (comp4_t) ((comp1_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i)) - -#define FbGen(x,y,i,ax,ay,t,u,v) ((t) = (IntMult(GetComp(y,i),ay,(u)) + \ - IntMult(GetComp(x,i),ax,(v))), \ - (comp4_t) ((comp1_t) ((t) | \ - (0 - ((t) >> G_SHIFT)))) << (i)) - -/* - The methods below use some tricks to be able to do two color - components at the same time. -*/ - -/* - x_c = (x_c * a) / 255 -*/ -#define FbByteMul(x, a) do { \ - comp4_t t = ((x & RB_MASK) * a) + RB_ONE_HALF; \ - t = (t + ((t >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE; \ - t &= RB_MASK; \ - \ - x = (((x >> COMPONENT_SIZE) & RB_MASK) * a) + RB_ONE_HALF; \ - x = (x + ((x >> COMPONENT_SIZE) & RB_MASK)); \ - x &= RB_MASK << COMPONENT_SIZE; \ - x += t; \ - } while (0) - -/* - x_c = (x_c * a) / 255 + y -*/ -#define FbByteMulAdd(x, a, y) do { \ - /* multiply and divide: trunc((i + 128)*257/65536) */ \ - comp4_t t = ((x & RB_MASK) * a) + RB_ONE_HALF; \ - t = (t + ((t >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE; \ - t &= RB_MASK; \ - \ - /* add */ \ - t += y & RB_MASK; \ - \ - /* saturate */ \ - t |= RB_MASK_PLUS_ONE - ((t >> COMPONENT_SIZE) & RB_MASK); \ - t &= RB_MASK; \ - \ - /* multiply and divide */ \ - x = (((x >> COMPONENT_SIZE) & RB_MASK) * a) + RB_ONE_HALF; \ - x = (x + ((x >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE; \ - x &= RB_MASK; \ - \ - /* add */ \ - x += (y >> COMPONENT_SIZE) & RB_MASK; \ - \ - /* saturate */ \ - x |= RB_MASK_PLUS_ONE - ((x >> COMPONENT_SIZE) & RB_MASK); \ - x &= RB_MASK; \ - \ - /* recombine */ \ - x <<= COMPONENT_SIZE; \ - x += t; \ - } while (0) - -/* - x_c = (x_c * a + y_c * b) / 255 -*/ -#define FbByteAddMul(x, a, y, b) do { \ - comp4_t t; \ - comp4_t r = (x >> A_SHIFT) * a + (y >> A_SHIFT) * b + ONE_HALF; \ - r += (r >> G_SHIFT); \ - r >>= G_SHIFT; \ - \ - t = (x & G_MASK) * a + (y & G_MASK) * b; \ - t += (t >> G_SHIFT) + (ONE_HALF << G_SHIFT); \ - t >>= R_SHIFT; \ - \ - t |= r << R_SHIFT; \ - t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ - t &= RB_MASK; \ - t <<= G_SHIFT; \ - \ - r = ((x >> R_SHIFT) & MASK) * a + \ - ((y >> R_SHIFT) & MASK) * b + ONE_HALF; \ - r += (r >> G_SHIFT); \ - r >>= G_SHIFT; \ - \ - x = (x & MASK) * a + (y & MASK) * b + ONE_HALF; \ - x += (x >> G_SHIFT); \ - x >>= G_SHIFT; \ - x |= r << R_SHIFT; \ - x |= RB_MASK_PLUS_ONE - ((x >> G_SHIFT) & RB_MASK); \ - x &= RB_MASK; \ - x |= t; \ - } while (0) - -/* - x_c = (x_c * a_c) / 255 -*/ -#define FbByteMulC(x, a) do { \ - comp4_t t; \ - comp4_t r = (x & MASK) * (a & MASK); \ - r |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \ - r += RB_ONE_HALF; \ - r = (r + ((r >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ - r &= RB_MASK; \ - \ - x >>= G_SHIFT; \ - t = (x & MASK) * ((a >> G_SHIFT) & MASK); \ - t |= (x & R_MASK) * (a >> A_SHIFT); \ - t += RB_ONE_HALF; \ - t = t + ((t >> G_SHIFT) & RB_MASK); \ - x = r | (t & AG_MASK); \ - } while (0) - -/* - x_c = (x_c * a) / 255 + y -*/ -#define FbByteMulAddC(x, a, y) do { \ - comp4_t t; \ - comp4_t r = (x & MASK) * (a & MASK); \ - r |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \ - r += RB_ONE_HALF; \ - r = (r + ((r >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ - r &= RB_MASK; \ - r += y & RB_MASK; \ - r |= RB_MASK_PLUS_ONE - ((r >> G_SHIFT) & RB_MASK); \ - r &= RB_MASK; \ - \ - x >>= G_SHIFT; \ - t = (x & MASK) * ((a >> G_SHIFT) & MASK); \ - t |= (x & R_MASK) * (a >> A_SHIFT); \ - t += RB_ONE_HALF; \ - t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ - t &= RB_MASK; \ - t += (y >> G_SHIFT) & RB_MASK; \ - t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ - t &= RB_MASK; \ - x = r | (t << G_SHIFT); \ - } while (0) - -/* - x_c = (x_c * a_c + y_c * b) / 255 -*/ -#define FbByteAddMulC(x, a, y, b) do { \ - comp4_t t; \ - comp4_t r = (x >> A_SHIFT) * (a >> A_SHIFT) + \ - (y >> A_SHIFT) * b; \ - r += (r >> G_SHIFT) + ONE_HALF; \ - r >>= G_SHIFT; \ - \ - t = (x & G_MASK) * ((a >> G_SHIFT) & MASK) + (y & G_MASK) * b; \ - t += (t >> G_SHIFT) + (ONE_HALF << G_SHIFT); \ - t >>= R_SHIFT; \ - \ - t |= r << R_SHIFT; \ - t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ - t &= RB_MASK; \ - t <<= G_SHIFT; \ - \ - r = ((x >> R_SHIFT) & MASK) * ((a >> R_SHIFT) & MASK) + \ - ((y >> R_SHIFT) & MASK) * b + ONE_HALF; \ - r += (r >> G_SHIFT); \ - r >>= G_SHIFT; \ - \ - x = (x & MASK) * (a & MASK) + (y & MASK) * b + ONE_HALF; \ - x += (x >> G_SHIFT); \ - x >>= G_SHIFT; \ - x |= r << R_SHIFT; \ - x |= RB_MASK_PLUS_ONE - ((x >> G_SHIFT) & RB_MASK); \ - x &= RB_MASK; \ - x |= t; \ - } while (0) - -/* - x_c = min(x_c + y_c, 255) -*/ -#define FbByteAdd(x, y) do { \ - comp4_t t; \ - comp4_t r = (x & RB_MASK) + (y & RB_MASK); \ - r |= RB_MASK_PLUS_ONE - ((r >> G_SHIFT) & RB_MASK); \ - r &= RB_MASK; \ - \ - t = ((x >> G_SHIFT) & RB_MASK) + ((y >> G_SHIFT) & RB_MASK); \ - t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ - r |= (t & RB_MASK) << G_SHIFT; \ - x = r; \ - } while (0) - diff --git a/lib/pixman/pixman/combine.inc b/lib/pixman/pixman/combine.inc deleted file mode 100644 index 0d5569400..000000000 --- a/lib/pixman/pixman/combine.inc +++ /dev/null @@ -1,1339 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#include <string.h> - -#include "pixman-private.h" - -#include "pixman-combine.h" - -/* - * There are two ways of handling alpha -- either as a single unified value or - * a separate value for each component, hence each macro must have two - * versions. The unified alpha version has a 'U' at the end of the name, - * the component version has a 'C'. Similarly, functions which deal with - * this difference will have two versions using the same convention. - */ - - -/* - * All of the composing functions - */ - -static force_inline comp4_t -combineMask (const comp4_t *src, const comp4_t *mask, int i) -{ - comp4_t s, m; - - if (mask) - { - m = *(mask + i) >> A_SHIFT; - - if (!m) - return 0; - } - - s = *(src + i); - - if (mask) - FbByteMul (s, m); - - return s; -} - -FASTCALL static void -fbCombineClear (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - memset(dest, 0, width*sizeof(comp4_t)); -} - -FASTCALL static void -fbCombineSrcU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - - if (!mask) - memcpy (dest, src, width * sizeof (comp4_t)); - else - { - for (i = 0; i < width; ++i) - { - comp4_t s = combineMask (src, mask, i); - - *(dest + i) = s; - } - } -} - -/* if the Src is opaque, call fbCombineSrcU */ -FASTCALL static void -fbCombineOverU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - for (i = 0; i < width; ++i) { - comp4_t s = combineMask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t ia = Alpha(~s); - - FbByteMulAdd(d, ia, s); - *(dest + i) = d; - } -} - -/* if the Dst is opaque, this is a noop */ -FASTCALL static void -fbCombineOverReverseU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - for (i = 0; i < width; ++i) { - comp4_t s = combineMask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t ia = Alpha(~*(dest + i)); - FbByteMulAdd(s, ia, d); - *(dest + i) = s; - } -} - -/* if the Dst is opaque, call fbCombineSrcU */ -FASTCALL static void -fbCombineInU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - for (i = 0; i < width; ++i) { - comp4_t s = combineMask (src, mask, i); - comp4_t a = Alpha(*(dest + i)); - FbByteMul(s, a); - *(dest + i) = s; - } -} - -/* if the Src is opaque, this is a noop */ -FASTCALL static void -fbCombineInReverseU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - for (i = 0; i < width; ++i) { - comp4_t s = combineMask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t a = Alpha(s); - FbByteMul(d, a); - *(dest + i) = d; - } -} - -/* if the Dst is opaque, call fbCombineClear */ -FASTCALL static void -fbCombineOutU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - for (i = 0; i < width; ++i) { - comp4_t s = combineMask (src, mask, i); - comp4_t a = Alpha(~*(dest + i)); - FbByteMul(s, a); - *(dest + i) = s; - } -} - -/* if the Src is opaque, call fbCombineClear */ -FASTCALL static void -fbCombineOutReverseU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - for (i = 0; i < width; ++i) { - comp4_t s = combineMask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t a = Alpha(~s); - FbByteMul(d, a); - *(dest + i) = d; - } -} - -/* if the Src is opaque, call fbCombineInU */ -/* if the Dst is opaque, call fbCombineOverU */ -/* if both the Src and Dst are opaque, call fbCombineSrcU */ -FASTCALL static void -fbCombineAtopU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - for (i = 0; i < width; ++i) { - comp4_t s = combineMask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t dest_a = Alpha(d); - comp4_t src_ia = Alpha(~s); - - FbByteAddMul(s, dest_a, d, src_ia); - *(dest + i) = s; - } -} - -/* if the Src is opaque, call fbCombineOverReverseU */ -/* if the Dst is opaque, call fbCombineInReverseU */ -/* if both the Src and Dst are opaque, call fbCombineDstU */ -FASTCALL static void -fbCombineAtopReverseU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - for (i = 0; i < width; ++i) { - comp4_t s = combineMask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t src_a = Alpha(s); - comp4_t dest_ia = Alpha(~d); - - FbByteAddMul(s, dest_ia, d, src_a); - *(dest + i) = s; - } -} - -/* if the Src is opaque, call fbCombineOverU */ -/* if the Dst is opaque, call fbCombineOverReverseU */ -/* if both the Src and Dst are opaque, call fbCombineClear */ -FASTCALL static void -fbCombineXorU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - for (i = 0; i < width; ++i) { - comp4_t s = combineMask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t src_ia = Alpha(~s); - comp4_t dest_ia = Alpha(~d); - - FbByteAddMul(s, dest_ia, d, src_ia); - *(dest + i) = s; - } -} - -FASTCALL static void -fbCombineAddU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - for (i = 0; i < width; ++i) { - comp4_t s = combineMask (src, mask, i); - comp4_t d = *(dest + i); - FbByteAdd(d, s); - *(dest + i) = d; - } -} - -/* if the Src is opaque, call fbCombineAddU */ -/* if the Dst is opaque, call fbCombineAddU */ -/* if both the Src and Dst are opaque, call fbCombineAddU */ -FASTCALL static void -fbCombineSaturateU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - for (i = 0; i < width; ++i) { - comp4_t s = combineMask (src, mask, i); - comp4_t d = *(dest + i); - comp2_t sa, da; - - sa = s >> A_SHIFT; - da = ~d >> A_SHIFT; - if (sa > da) - { - sa = IntDiv(da, sa); - FbByteMul(s, sa); - }; - FbByteAdd(d, s); - *(dest + i) = d; - } -} - - -/* - * All of the disjoint composing functions - - The four entries in the first column indicate what source contributions - come from each of the four areas of the picture -- areas covered by neither - A nor B, areas covered only by A, areas covered only by B and finally - areas covered by both A and B. - - Disjoint Conjoint - Fa Fb Fa Fb - (0,0,0,0) 0 0 0 0 - (0,A,0,A) 1 0 1 0 - (0,0,B,B) 0 1 0 1 - (0,A,B,A) 1 min((1-a)/b,1) 1 max(1-a/b,0) - (0,A,B,B) min((1-b)/a,1) 1 max(1-b/a,0) 1 - (0,0,0,A) max(1-(1-b)/a,0) 0 min(1,b/a) 0 - (0,0,0,B) 0 max(1-(1-a)/b,0) 0 min(a/b,1) - (0,A,0,0) min(1,(1-b)/a) 0 max(1-b/a,0) 0 - (0,0,B,0) 0 min(1,(1-a)/b) 0 max(1-a/b,0) - (0,0,B,A) max(1-(1-b)/a,0) min(1,(1-a)/b) min(1,b/a) max(1-a/b,0) - (0,A,0,B) min(1,(1-b)/a) max(1-(1-a)/b,0) max(1-b/a,0) min(1,a/b) - (0,A,B,0) min(1,(1-b)/a) min(1,(1-a)/b) max(1-b/a,0) max(1-a/b,0) - -*/ - -#define CombineAOut 1 -#define CombineAIn 2 -#define CombineBOut 4 -#define CombineBIn 8 - -#define CombineClear 0 -#define CombineA (CombineAOut|CombineAIn) -#define CombineB (CombineBOut|CombineBIn) -#define CombineAOver (CombineAOut|CombineBOut|CombineAIn) -#define CombineBOver (CombineAOut|CombineBOut|CombineBIn) -#define CombineAAtop (CombineBOut|CombineAIn) -#define CombineBAtop (CombineAOut|CombineBIn) -#define CombineXor (CombineAOut|CombineBOut) - -/* portion covered by a but not b */ -FASTCALL static comp1_t -fbCombineDisjointOutPart (comp1_t a, comp1_t b) -{ - /* min (1, (1-b) / a) */ - - b = ~b; /* 1 - b */ - if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */ - return MASK; /* 1 */ - return IntDiv(b,a); /* (1-b) / a */ -} - -/* portion covered by both a and b */ -FASTCALL static comp1_t -fbCombineDisjointInPart (comp1_t a, comp1_t b) -{ - /* max (1-(1-b)/a,0) */ - /* = - min ((1-b)/a - 1, 0) */ - /* = 1 - min (1, (1-b)/a) */ - - b = ~b; /* 1 - b */ - if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */ - return 0; /* 1 - 1 */ - return ~IntDiv(b,a); /* 1 - (1-b) / a */ -} - -/* portion covered by a but not b */ -FASTCALL static comp1_t -fbCombineConjointOutPart (comp1_t a, comp1_t b) -{ - /* max (1-b/a,0) */ - /* = 1-min(b/a,1) */ - - /* min (1, (1-b) / a) */ - - if (b >= a) /* b >= a -> b/a >= 1 */ - return 0x00; /* 0 */ - return ~IntDiv(b,a); /* 1 - b/a */ -} - -/* portion covered by both a and b */ -FASTCALL static comp1_t -fbCombineConjointInPart (comp1_t a, comp1_t b) -{ - /* min (1,b/a) */ - - if (b >= a) /* b >= a -> b/a >= 1 */ - return MASK; /* 1 */ - return IntDiv(b,a); /* b/a */ -} - -FASTCALL static void -fbCombineDisjointGeneralU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width, comp1_t combine) -{ - int i; - for (i = 0; i < width; ++i) { - comp4_t s = combineMask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t m,n,o,p; - comp2_t Fa, Fb, t, u, v; - comp1_t sa = s >> A_SHIFT; - comp1_t da = d >> A_SHIFT; - - switch (combine & CombineA) { - default: - Fa = 0; - break; - case CombineAOut: - Fa = fbCombineDisjointOutPart (sa, da); - break; - case CombineAIn: - Fa = fbCombineDisjointInPart (sa, da); - break; - case CombineA: - Fa = MASK; - break; - } - - switch (combine & CombineB) { - default: - Fb = 0; - break; - case CombineBOut: - Fb = fbCombineDisjointOutPart (da, sa); - break; - case CombineBIn: - Fb = fbCombineDisjointInPart (da, sa); - break; - case CombineB: - Fb = MASK; - break; - } - m = FbGen (s,d,0,Fa,Fb,t, u, v); - n = FbGen (s,d,G_SHIFT,Fa,Fb,t, u, v); - o = FbGen (s,d,R_SHIFT,Fa,Fb,t, u, v); - p = FbGen (s,d,A_SHIFT,Fa,Fb,t, u, v); - s = m|n|o|p; - *(dest + i) = s; - } -} - -FASTCALL static void -fbCombineDisjointOverU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - for (i = 0; i < width; ++i) { - comp4_t s = combineMask (src, mask, i); - comp2_t a = s >> A_SHIFT; - - if (a != 0x00) - { - if (a != MASK) - { - comp4_t d = *(dest + i); - a = fbCombineDisjointOutPart (d >> A_SHIFT, a); - FbByteMulAdd(d, a, s); - s = d; - } - *(dest + i) = s; - } - } -} - -FASTCALL static void -fbCombineDisjointInU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineDisjointGeneralU (dest, src, mask, width, CombineAIn); -} - -FASTCALL static void -fbCombineDisjointInReverseU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineDisjointGeneralU (dest, src, mask, width, CombineBIn); -} - -FASTCALL static void -fbCombineDisjointOutU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineDisjointGeneralU (dest, src, mask, width, CombineAOut); -} - -FASTCALL static void -fbCombineDisjointOutReverseU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineDisjointGeneralU (dest, src, mask, width, CombineBOut); -} - -FASTCALL static void -fbCombineDisjointAtopU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineDisjointGeneralU (dest, src, mask, width, CombineAAtop); -} - -FASTCALL static void -fbCombineDisjointAtopReverseU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineDisjointGeneralU (dest, src, mask, width, CombineBAtop); -} - -FASTCALL static void -fbCombineDisjointXorU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineDisjointGeneralU (dest, src, mask, width, CombineXor); -} - -FASTCALL static void -fbCombineConjointGeneralU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width, comp1_t combine) -{ - int i; - for (i = 0; i < width; ++i) { - comp4_t s = combineMask (src, mask, i); - comp4_t d = *(dest + i); - comp4_t m,n,o,p; - comp2_t Fa, Fb, t, u, v; - comp1_t sa = s >> A_SHIFT; - comp1_t da = d >> A_SHIFT; - - switch (combine & CombineA) { - default: - Fa = 0; - break; - case CombineAOut: - Fa = fbCombineConjointOutPart (sa, da); - break; - case CombineAIn: - Fa = fbCombineConjointInPart (sa, da); - break; - case CombineA: - Fa = MASK; - break; - } - - switch (combine & CombineB) { - default: - Fb = 0; - break; - case CombineBOut: - Fb = fbCombineConjointOutPart (da, sa); - break; - case CombineBIn: - Fb = fbCombineConjointInPart (da, sa); - break; - case CombineB: - Fb = MASK; - break; - } - m = FbGen (s,d,0,Fa,Fb,t, u, v); - n = FbGen (s,d,G_SHIFT,Fa,Fb,t, u, v); - o = FbGen (s,d,R_SHIFT,Fa,Fb,t, u, v); - p = FbGen (s,d,A_SHIFT,Fa,Fb,t, u, v); - s = m|n|o|p; - *(dest + i) = s; - } -} - -FASTCALL static void -fbCombineConjointOverU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralU (dest, src, mask, width, CombineAOver); -} - - -FASTCALL static void -fbCombineConjointOverReverseU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralU (dest, src, mask, width, CombineBOver); -} - - -FASTCALL static void -fbCombineConjointInU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralU (dest, src, mask, width, CombineAIn); -} - - -FASTCALL static void -fbCombineConjointInReverseU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralU (dest, src, mask, width, CombineBIn); -} - -FASTCALL static void -fbCombineConjointOutU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralU (dest, src, mask, width, CombineAOut); -} - -FASTCALL static void -fbCombineConjointOutReverseU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralU (dest, src, mask, width, CombineBOut); -} - -FASTCALL static void -fbCombineConjointAtopU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralU (dest, src, mask, width, CombineAAtop); -} - -FASTCALL static void -fbCombineConjointAtopReverseU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralU (dest, src, mask, width, CombineBAtop); -} - -FASTCALL static void -fbCombineConjointXorU (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralU (dest, src, mask, width, CombineXor); -} - -/********************************************************************************/ -/*************************** Per Channel functions ******************************/ -/********************************************************************************/ - -FASTCALL static void -fbCombineMaskC (comp4_t *src, comp4_t *mask) -{ - comp4_t a = *mask; - - comp4_t x; - comp2_t xa; - - if (!a) - { - *(src) = 0; - return; - } - - x = *(src); - if (a == ~0) - { - x = x >> A_SHIFT; - x |= x << G_SHIFT; - x |= x << R_SHIFT; - *(mask) = x; - return; - } - - xa = x >> A_SHIFT; - FbByteMulC(x, a); - *(src) = x; - FbByteMul(a, xa); - *(mask) = a; -} - -FASTCALL static void -fbCombineMaskValueC (comp4_t *src, const comp4_t *mask) -{ - comp4_t a = *mask; - comp4_t x; - - if (!a) - { - *(src) = 0; - return; - } - - if (a == ~0) - return; - - x = *(src); - FbByteMulC(x, a); - *(src) =x; -} - -FASTCALL static void -fbCombineMaskAlphaC (const comp4_t *src, comp4_t *mask) -{ - comp4_t a = *(mask); - comp4_t x; - - if (!a) - return; - - x = *(src) >> A_SHIFT; - if (x == MASK) - return; - if (a == ~0) - { - x = x >> A_SHIFT; - x |= x << G_SHIFT; - x |= x << R_SHIFT; - *(mask) = x; - return; - } - - FbByteMul(a, x); - *(mask) = a; -} - -FASTCALL static void -fbCombineClearC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - memset(dest, 0, width*sizeof(comp4_t)); -} - -FASTCALL static void -fbCombineSrcC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - - for (i = 0; i < width; ++i) { - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - - fbCombineMaskValueC (&s, &m); - - *(dest) = s; - } -} - -FASTCALL static void -fbCombineOverC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - - for (i = 0; i < width; ++i) { - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - comp4_t a; - - fbCombineMaskC (&s, &m); - - a = ~m; - if (a != ~0) - { - if (a) - { - comp4_t d = *(dest + i); - FbByteMulAddC(d, a, s); - s = d; - } - *(dest + i) = s; - } - } -} - -FASTCALL static void -fbCombineOverReverseC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - - for (i = 0; i < width; ++i) { - comp4_t d = *(dest + i); - comp4_t a = ~d >> A_SHIFT; - - if (a) - { - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - - fbCombineMaskValueC (&s, &m); - - if (a != MASK) - { - FbByteMulAdd(s, a, d); - } - *(dest + i) = s; - } - } -} - -FASTCALL static void -fbCombineInC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - - for (i = 0; i < width; ++i) { - comp4_t d = *(dest + i); - comp2_t a = d >> A_SHIFT; - comp4_t s = 0; - if (a) - { - comp4_t m = *(mask + i); - - s = *(src + i); - fbCombineMaskValueC (&s, &m); - if (a != MASK) - { - FbByteMul(s, a); - } - } - *(dest + i) = s; - } -} - -FASTCALL static void -fbCombineInReverseC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - - for (i = 0; i < width; ++i) { - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - comp4_t a; - - fbCombineMaskAlphaC (&s, &m); - - a = m; - if (a != ~0) - { - comp4_t d = 0; - if (a) - { - d = *(dest + i); - FbByteMulC(d, a); - } - *(dest + i) = d; - } - } -} - -FASTCALL static void -fbCombineOutC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - - for (i = 0; i < width; ++i) { - comp4_t d = *(dest + i); - comp2_t a = ~d >> A_SHIFT; - comp4_t s = 0; - if (a) - { - comp4_t m = *(mask + i); - - s = *(src + i); - fbCombineMaskValueC (&s, &m); - - if (a != MASK) - { - FbByteMul(s, a); - } - } - *(dest + i) = s; - } -} - -FASTCALL static void -fbCombineOutReverseC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - - for (i = 0; i < width; ++i) { - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - comp4_t a; - - fbCombineMaskAlphaC (&s, &m); - - a = ~m; - if (a != ~0) - { - comp4_t d = 0; - if (a) - { - d = *(dest + i); - FbByteMulC(d, a); - } - *(dest + i) = d; - } - } -} - -FASTCALL static void -fbCombineAtopC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - - for (i = 0; i < width; ++i) { - comp4_t d = *(dest + i); - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - comp4_t ad; - comp2_t as = d >> A_SHIFT; - - fbCombineMaskC (&s, &m); - - ad = ~m; - - FbByteAddMulC(d, ad, s, as); - *(dest + i) = d; - } -} - -FASTCALL static void -fbCombineAtopReverseC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - - for (i = 0; i < width; ++i) { - - comp4_t d = *(dest + i); - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - comp4_t ad; - comp2_t as = ~d >> A_SHIFT; - - fbCombineMaskC (&s, &m); - - ad = m; - - FbByteAddMulC(d, ad, s, as); - *(dest + i) = d; - } -} - -FASTCALL static void -fbCombineXorC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - - for (i = 0; i < width; ++i) { - comp4_t d = *(dest + i); - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - comp4_t ad; - comp2_t as = ~d >> A_SHIFT; - - fbCombineMaskC (&s, &m); - - ad = ~m; - - FbByteAddMulC(d, ad, s, as); - *(dest + i) = d; - } -} - -FASTCALL static void -fbCombineAddC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - - for (i = 0; i < width; ++i) { - comp4_t s = *(src + i); - comp4_t m = *(mask + i); - comp4_t d = *(dest + i); - - fbCombineMaskValueC (&s, &m); - - FbByteAdd(d, s); - *(dest + i) = d; - } -} - -FASTCALL static void -fbCombineSaturateC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - int i; - - for (i = 0; i < width; ++i) { - comp4_t s, d; - comp2_t sa, sr, sg, sb, da; - comp2_t t, u, v; - comp4_t m,n,o,p; - - d = *(dest + i); - s = *(src + i); - m = *(mask + i); - - fbCombineMaskC (&s, &m); - - sa = (m >> A_SHIFT); - sr = (m >> R_SHIFT) & MASK; - sg = (m >> G_SHIFT) & MASK; - sb = m & MASK; - da = ~d >> A_SHIFT; - - if (sb <= da) - m = Add(s,d,0,t); - else - m = FbGen (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v); - - if (sg <= da) - n = Add(s,d,G_SHIFT,t); - else - n = FbGen (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v); - - if (sr <= da) - o = Add(s,d,R_SHIFT,t); - else - o = FbGen (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v); - - if (sa <= da) - p = Add(s,d,A_SHIFT,t); - else - p = FbGen (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v); - - *(dest + i) = m|n|o|p; - } -} - -FASTCALL static void -fbCombineDisjointGeneralC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width, comp1_t combine) -{ - int i; - - for (i = 0; i < width; ++i) { - comp4_t s, d; - comp4_t m,n,o,p; - comp4_t Fa, Fb; - comp2_t t, u, v; - comp4_t sa; - comp1_t da; - - s = *(src + i); - m = *(mask + i); - d = *(dest + i); - da = d >> A_SHIFT; - - fbCombineMaskC (&s, &m); - - sa = m; - - switch (combine & CombineA) { - default: - Fa = 0; - break; - case CombineAOut: - m = (comp4_t)fbCombineDisjointOutPart ((comp1_t) (sa >> 0), da); - n = (comp4_t)fbCombineDisjointOutPart ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (comp4_t)fbCombineDisjointOutPart ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (comp4_t)fbCombineDisjointOutPart ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m|n|o|p; - break; - case CombineAIn: - m = (comp4_t)fbCombineDisjointInPart ((comp1_t) (sa >> 0), da); - n = (comp4_t)fbCombineDisjointInPart ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (comp4_t)fbCombineDisjointInPart ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (comp4_t)fbCombineDisjointInPart ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m|n|o|p; - break; - case CombineA: - Fa = ~0; - break; - } - - switch (combine & CombineB) { - default: - Fb = 0; - break; - case CombineBOut: - m = (comp4_t)fbCombineDisjointOutPart (da, (comp1_t) (sa >> 0)); - n = (comp4_t)fbCombineDisjointOutPart (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (comp4_t)fbCombineDisjointOutPart (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (comp4_t)fbCombineDisjointOutPart (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m|n|o|p; - break; - case CombineBIn: - m = (comp4_t)fbCombineDisjointInPart (da, (comp1_t) (sa >> 0)); - n = (comp4_t)fbCombineDisjointInPart (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (comp4_t)fbCombineDisjointInPart (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (comp4_t)fbCombineDisjointInPart (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m|n|o|p; - break; - case CombineB: - Fb = ~0; - break; - } - m = FbGen (s,d,0,GetComp(Fa,0),GetComp(Fb,0),t, u, v); - n = FbGen (s,d,G_SHIFT,GetComp(Fa,G_SHIFT),GetComp(Fb,G_SHIFT),t, u, v); - o = FbGen (s,d,R_SHIFT,GetComp(Fa,R_SHIFT),GetComp(Fb,R_SHIFT),t, u, v); - p = FbGen (s,d,A_SHIFT,GetComp(Fa,A_SHIFT),GetComp(Fb,A_SHIFT),t, u, v); - s = m|n|o|p; - *(dest + i) = s; - } -} - -FASTCALL static void -fbCombineDisjointOverC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOver); -} - -FASTCALL static void -fbCombineDisjointInC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineDisjointGeneralC (dest, src, mask, width, CombineAIn); -} - -FASTCALL static void -fbCombineDisjointInReverseC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineDisjointGeneralC (dest, src, mask, width, CombineBIn); -} - -FASTCALL static void -fbCombineDisjointOutC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOut); -} - -FASTCALL static void -fbCombineDisjointOutReverseC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineDisjointGeneralC (dest, src, mask, width, CombineBOut); -} - -FASTCALL static void -fbCombineDisjointAtopC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineDisjointGeneralC (dest, src, mask, width, CombineAAtop); -} - -FASTCALL static void -fbCombineDisjointAtopReverseC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineDisjointGeneralC (dest, src, mask, width, CombineBAtop); -} - -FASTCALL static void -fbCombineDisjointXorC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineDisjointGeneralC (dest, src, mask, width, CombineXor); -} - -FASTCALL static void -fbCombineConjointGeneralC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width, comp1_t combine) -{ - int i; - - for (i = 0; i < width; ++i) { - comp4_t s, d; - comp4_t m,n,o,p; - comp4_t Fa, Fb; - comp2_t t, u, v; - comp4_t sa; - comp1_t da; - - s = *(src + i); - m = *(mask + i); - d = *(dest + i); - da = d >> A_SHIFT; - - fbCombineMaskC (&s, &m); - - sa = m; - - switch (combine & CombineA) { - default: - Fa = 0; - break; - case CombineAOut: - m = (comp4_t)fbCombineConjointOutPart ((comp1_t) (sa >> 0), da); - n = (comp4_t)fbCombineConjointOutPart ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (comp4_t)fbCombineConjointOutPart ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (comp4_t)fbCombineConjointOutPart ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m|n|o|p; - break; - case CombineAIn: - m = (comp4_t)fbCombineConjointInPart ((comp1_t) (sa >> 0), da); - n = (comp4_t)fbCombineConjointInPart ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT; - o = (comp4_t)fbCombineConjointInPart ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT; - p = (comp4_t)fbCombineConjointInPart ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT; - Fa = m|n|o|p; - break; - case CombineA: - Fa = ~0; - break; - } - - switch (combine & CombineB) { - default: - Fb = 0; - break; - case CombineBOut: - m = (comp4_t)fbCombineConjointOutPart (da, (comp1_t) (sa >> 0)); - n = (comp4_t)fbCombineConjointOutPart (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (comp4_t)fbCombineConjointOutPart (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (comp4_t)fbCombineConjointOutPart (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m|n|o|p; - break; - case CombineBIn: - m = (comp4_t)fbCombineConjointInPart (da, (comp1_t) (sa >> 0)); - n = (comp4_t)fbCombineConjointInPart (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT; - o = (comp4_t)fbCombineConjointInPart (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT; - p = (comp4_t)fbCombineConjointInPart (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT; - Fb = m|n|o|p; - break; - case CombineB: - Fb = ~0; - break; - } - m = FbGen (s,d,0,GetComp(Fa,0),GetComp(Fb,0),t, u, v); - n = FbGen (s,d,G_SHIFT,GetComp(Fa,G_SHIFT),GetComp(Fb,G_SHIFT),t, u, v); - o = FbGen (s,d,R_SHIFT,GetComp(Fa,R_SHIFT),GetComp(Fb,R_SHIFT),t, u, v); - p = FbGen (s,d,A_SHIFT,GetComp(Fa,A_SHIFT),GetComp(Fb,A_SHIFT),t, u, v); - s = m|n|o|p; - *(dest + i) = s; - } -} - -FASTCALL static void -fbCombineConjointOverC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralC (dest, src, mask, width, CombineAOver); -} - -FASTCALL static void -fbCombineConjointOverReverseC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralC (dest, src, mask, width, CombineBOver); -} - -FASTCALL static void -fbCombineConjointInC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralC (dest, src, mask, width, CombineAIn); -} - -FASTCALL static void -fbCombineConjointInReverseC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralC (dest, src, mask, width, CombineBIn); -} - -FASTCALL static void -fbCombineConjointOutC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralC (dest, src, mask, width, CombineAOut); -} - -FASTCALL static void -fbCombineConjointOutReverseC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralC (dest, src, mask, width, CombineBOut); -} - -FASTCALL static void -fbCombineConjointAtopC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralC (dest, src, mask, width, CombineAAtop); -} - -FASTCALL static void -fbCombineConjointAtopReverseC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralC (dest, src, mask, width, CombineBAtop); -} - -FASTCALL static void -fbCombineConjointXorC (pixman_implementation_t *imp, pixman_op_t op, - comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) -{ - fbCombineConjointGeneralC (dest, src, mask, width, CombineXor); -} - -void -_pixman_setup_combiner_functions_width (pixman_implementation_t *imp) -{ - /* Unified alpha */ - imp->combine_width[PIXMAN_OP_CLEAR] = fbCombineClear; - imp->combine_width[PIXMAN_OP_SRC] = fbCombineSrcU; - /* dest */ - imp->combine_width[PIXMAN_OP_OVER] = fbCombineOverU; - imp->combine_width[PIXMAN_OP_OVER_REVERSE] = fbCombineOverReverseU; - imp->combine_width[PIXMAN_OP_IN] = fbCombineInU; - imp->combine_width[PIXMAN_OP_IN_REVERSE] = fbCombineInReverseU; - imp->combine_width[PIXMAN_OP_OUT] = fbCombineOutU; - imp->combine_width[PIXMAN_OP_OUT_REVERSE] = fbCombineOutReverseU; - imp->combine_width[PIXMAN_OP_ATOP] = fbCombineAtopU; - imp->combine_width[PIXMAN_OP_ATOP_REVERSE] = fbCombineAtopReverseU; - imp->combine_width[PIXMAN_OP_XOR] = fbCombineXorU; - imp->combine_width[PIXMAN_OP_ADD] = fbCombineAddU; - imp->combine_width[PIXMAN_OP_SATURATE] = fbCombineSaturateU; - - /* Disjoint, unified */ - imp->combine_width[PIXMAN_OP_DISJOINT_CLEAR] = fbCombineClear; - imp->combine_width[PIXMAN_OP_DISJOINT_SRC] = fbCombineSrcU; - /* dest */ - imp->combine_width[PIXMAN_OP_DISJOINT_OVER] = fbCombineDisjointOverU; - imp->combine_width[PIXMAN_OP_DISJOINT_OVER_REVERSE] = fbCombineSaturateU; - imp->combine_width[PIXMAN_OP_DISJOINT_IN] = fbCombineDisjointInU; - imp->combine_width[PIXMAN_OP_DISJOINT_IN_REVERSE] = fbCombineDisjointInReverseU; - imp->combine_width[PIXMAN_OP_DISJOINT_OUT] = fbCombineDisjointOutU; - imp->combine_width[PIXMAN_OP_DISJOINT_OUT_REVERSE] = fbCombineDisjointOutReverseU; - imp->combine_width[PIXMAN_OP_DISJOINT_ATOP] = fbCombineDisjointAtopU; - imp->combine_width[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = fbCombineDisjointAtopReverseU; - imp->combine_width[PIXMAN_OP_DISJOINT_XOR] = fbCombineDisjointXorU; - - /* Conjoint, unified */ - imp->combine_width[PIXMAN_OP_CONJOINT_CLEAR] = fbCombineClear; - imp->combine_width[PIXMAN_OP_CONJOINT_SRC] = fbCombineSrcU; - /* dest */ - imp->combine_width[PIXMAN_OP_CONJOINT_OVER] = fbCombineConjointOverU; - imp->combine_width[PIXMAN_OP_CONJOINT_OVER_REVERSE] = fbCombineConjointOverReverseU; - imp->combine_width[PIXMAN_OP_CONJOINT_IN] = fbCombineConjointInU; - imp->combine_width[PIXMAN_OP_CONJOINT_IN_REVERSE] = fbCombineConjointInReverseU; - imp->combine_width[PIXMAN_OP_CONJOINT_OUT] = fbCombineConjointOutU; - imp->combine_width[PIXMAN_OP_CONJOINT_OUT_REVERSE] = fbCombineConjointOutReverseU; - imp->combine_width[PIXMAN_OP_CONJOINT_ATOP] = fbCombineConjointAtopU; - imp->combine_width[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = fbCombineConjointAtopReverseU; - imp->combine_width[PIXMAN_OP_CONJOINT_XOR] = fbCombineConjointXorU; - - /* Component alpha combiners */ - imp->combine_width_ca[PIXMAN_OP_CLEAR] = fbCombineClearC; - imp->combine_width_ca[PIXMAN_OP_SRC] = fbCombineSrcC; - /* dest */ - imp->combine_width_ca[PIXMAN_OP_OVER] = fbCombineOverC; - imp->combine_width_ca[PIXMAN_OP_OVER_REVERSE] = fbCombineOverReverseC; - imp->combine_width_ca[PIXMAN_OP_IN] = fbCombineInC; - imp->combine_width_ca[PIXMAN_OP_IN_REVERSE] = fbCombineInReverseC; - imp->combine_width_ca[PIXMAN_OP_OUT] = fbCombineOutC; - imp->combine_width_ca[PIXMAN_OP_OUT_REVERSE] = fbCombineOutReverseC; - imp->combine_width_ca[PIXMAN_OP_ATOP] = fbCombineAtopC; - imp->combine_width_ca[PIXMAN_OP_ATOP_REVERSE] = fbCombineAtopReverseC; - imp->combine_width_ca[PIXMAN_OP_XOR] = fbCombineXorC; - imp->combine_width_ca[PIXMAN_OP_ADD] = fbCombineAddC; - imp->combine_width_ca[PIXMAN_OP_SATURATE] = fbCombineSaturateC; - - /* Disjoint CA */ - imp->combine_width_ca[PIXMAN_OP_DISJOINT_CLEAR] = fbCombineClearC; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_SRC] = fbCombineSrcC; - /* dest */ - imp->combine_width_ca[PIXMAN_OP_DISJOINT_OVER] = fbCombineDisjointOverC; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = fbCombineSaturateC, - imp->combine_width_ca[PIXMAN_OP_DISJOINT_IN] = fbCombineDisjointInC; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = fbCombineDisjointInReverseC; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_OUT] = fbCombineDisjointOutC; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = fbCombineDisjointOutReverseC; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_ATOP] = fbCombineDisjointAtopC; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = fbCombineDisjointAtopReverseC; - imp->combine_width_ca[PIXMAN_OP_DISJOINT_XOR] = fbCombineDisjointXorC; - - /* Conjoint CA */ - imp->combine_width_ca[PIXMAN_OP_CONJOINT_CLEAR] = fbCombineClearC; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_SRC] = fbCombineSrcC; - /* dest */ - imp->combine_width_ca[PIXMAN_OP_CONJOINT_OVER] = fbCombineConjointOverC; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = fbCombineConjointOverReverseC; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_IN] = fbCombineConjointInC; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = fbCombineConjointInReverseC; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_OUT] = fbCombineConjointOutC; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = fbCombineConjointOutReverseC; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_ATOP] = fbCombineConjointAtopC; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = fbCombineConjointAtopReverseC; - imp->combine_width_ca[PIXMAN_OP_CONJOINT_XOR] = fbCombineConjointXorC; -} diff --git a/lib/pixman/pixman/combine.pl b/lib/pixman/pixman/make-combine.pl index 3b7536205..210a5da12 100644 --- a/lib/pixman/pixman/combine.pl +++ b/lib/pixman/pixman/make-combine.pl @@ -1,4 +1,4 @@ -$usage = "Usage: combine.pl { 8 | 16 } < combine.inc"; +$usage = "Usage: combine.pl { 8 | 16 } < pixman-combine.c.template"; $#ARGV == 0 or die $usage; @@ -27,7 +27,7 @@ print "/* WARNING: This file is generated by combine.pl from combine.inc.\n"; print " Please edit one of those files rather than this one. */\n"; print "\n"; -print "#line 1 \"combine.inc\"\n"; +print "#line 1 \"pixman-combine.c.template\"\n"; $mask_ = mask($mask); $one_half_ = mask($one_half); @@ -64,6 +64,11 @@ while (<STDIN>) { s/\bFbComposeFunctions\b/FbComposeFunctions$pixel_size/; s/combine_width/combine_$pixel_size/; s/_pixman_setup_combiner_functions_width/_pixman_setup_combiner_functions_$pixel_size/; + s/UNc/UN$size/g; + s/ALPHA_c/ALPHA_$size/g; + s/RED_c/RED_$size/g; + s/GREEN_c/GREEN_$size/g; + s/BLUE_c/BLUE_$size/g; # Convert comp*_t values into the appropriate real types. s/comp1_t/uint${size}_t/g; diff --git a/lib/pixman/pixman/pixman-access.c b/lib/pixman/pixman/pixman-access.c index 6b3ce34fa..d9fd38c15 100644 --- a/lib/pixman/pixman/pixman-access.c +++ b/lib/pixman/pixman/pixman-access.c @@ -33,674 +33,1092 @@ #include <assert.h> #include "pixman-private.h" +#include "pixman-accessor.h" -#define Red(x) (((x) >> 16) & 0xff) -#define Green(x) (((x) >> 8) & 0xff) -#define Blue(x) ((x) & 0xff) +#define CONVERT_RGB24_TO_Y15(s) \ + (((((s) >> 16) & 0xff) * 153 + \ + (((s) >> 8) & 0xff) * 301 + \ + (((s) ) & 0xff) * 58) >> 2) + +#define CONVERT_RGB24_TO_RGB15(s) \ + ((((s) >> 3) & 0x001f) | \ + (((s) >> 6) & 0x03e0) | \ + (((s) >> 9) & 0x7c00)) + +#define RGB15_TO_ENTRY(mif,rgb15) \ + ((mif)->ent[rgb15]) + +#define RGB24_TO_ENTRY(mif,rgb24) \ + RGB15_TO_ENTRY (mif,CONVERT_RGB24_TO_RGB15 (rgb24)) + +#define RGB24_TO_ENTRY_Y(mif,rgb24) \ + ((mif)->ent[CONVERT_RGB24_TO_Y15 (rgb24)]) /* * YV12 setup and access macros */ -#define YV12_SETUP(pict) \ - uint32_t *bits = pict->bits; \ - int stride = pict->rowstride; \ - int offset0 = stride < 0 ? \ - ((-stride) >> 1) * ((pict->height - 1) >> 1) - stride : \ - stride * pict->height; \ - int offset1 = stride < 0 ? \ - offset0 + ((-stride) >> 1) * ((pict->height) >> 1) : \ - offset0 + (offset0 >> 2) +#define YV12_SETUP(image) \ + bits_image_t *__bits_image = (bits_image_t *)image; \ + uint32_t *bits = __bits_image->bits; \ + int stride = __bits_image->rowstride; \ + int offset0 = stride < 0 ? \ + ((-stride) >> 1) * ((__bits_image->height - 1) >> 1) - stride : \ + stride * __bits_image->height; \ + int offset1 = stride < 0 ? \ + offset0 + ((-stride) >> 1) * ((__bits_image->height) >> 1) : \ + offset0 + (offset0 >> 2) + /* Note no trailing semicolon on the above macro; if it's there, then - * the typical usage of YV12_SETUP(pict); will have an extra trailing ; + * the typical usage of YV12_SETUP(image); will have an extra trailing ; * that some compilers will interpret as a statement -- and then any further * variable declarations will cause an error. */ -#define YV12_Y(line) \ +#define YV12_Y(line) \ ((uint8_t *) ((bits) + (stride) * (line))) -#define YV12_U(line) \ - ((uint8_t *) ((bits) + offset1 + \ - ((stride) >> 1) * ((line) >> 1))) - -#define YV12_V(line) \ - ((uint8_t *) ((bits) + offset0 + \ - ((stride) >> 1) * ((line) >> 1))) - -/*********************************** Fetch ************************************/ - -static FASTCALL void -fbFetch_a8r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) -{ - const uint32_t *bits = pict->bits + y*pict->rowstride; - MEMCPY_WRAPPED(pict, - buffer, (const uint32_t *)bits + x, - width*sizeof(uint32_t)); -} - -static FASTCALL void -fbFetch_x8r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) -{ - const uint32_t *bits = pict->bits + y*pict->rowstride; +#define YV12_U(line) \ + ((uint8_t *) ((bits) + offset1 + \ + ((stride) >> 1) * ((line) >> 1))) + +#define YV12_V(line) \ + ((uint8_t *) ((bits) + offset0 + \ + ((stride) >> 1) * ((line) >> 1))) + +/********************************** Fetch ************************************/ + +static void +fetch_scanline_a8r8g8b8 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) +{ + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + + MEMCPY_WRAPPED (image, + buffer, (const uint32_t *)bits + x, + width * sizeof(uint32_t)); +} + +static void +fetch_scanline_x8r8g8b8 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) +{ + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint32_t *pixel = (const uint32_t *)bits + x; const uint32_t *end = pixel + width; - while (pixel < end) { - *buffer++ = READ(pict, pixel++) | 0xff000000; - } + + while (pixel < end) + *buffer++ = READ (image, pixel++) | 0xff000000; } -static FASTCALL void -fbFetch_a8b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_a8b8g8r8 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint32_t *pixel = (uint32_t *)bits + x; const uint32_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - *buffer++ = (p & 0xff00ff00) | - ((p >> 16) & 0xff) | + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + + *buffer++ = (p & 0xff00ff00) | + ((p >> 16) & 0xff) | ((p & 0xff) << 16); } } -static FASTCALL void -fbFetch_x8b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_x8b8g8r8 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint32_t *pixel = (uint32_t *)bits + x; const uint32_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - *buffer++ = 0xff000000 | - (p & 0x0000ff00) | - ((p >> 16) & 0xff) | + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + + *buffer++ = 0xff000000 | + (p & 0x0000ff00) | + ((p >> 16) & 0xff) | ((p & 0xff) << 16); } } -static FASTCALL void -fbFetch_b8g8r8a8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_b8g8r8a8 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint32_t *pixel = (uint32_t *)bits + x; const uint32_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - *buffer++ = ((p & 0xff000000) >> 24) | - ((p & 0x00ff0000) >> 8) | - ((p & 0x0000ff00) << 8) | - ((p & 0x000000ff) << 24); + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + + *buffer++ = (((p & 0xff000000) >> 24) | + ((p & 0x00ff0000) >> 8) | + ((p & 0x0000ff00) << 8) | + ((p & 0x000000ff) << 24)); } } -static FASTCALL void -fbFetch_b8g8r8x8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_b8g8r8x8 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint32_t *pixel = (uint32_t *)bits + x; const uint32_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - *buffer++ = 0xff000000 | - ((p & 0xff000000) >> 24) | - ((p & 0x00ff0000) >> 8) | - ((p & 0x0000ff00) << 8); - } -} - -static FASTCALL void -fbFetch_a2b10g10r10 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer) -{ - const uint32_t *bits = pict->bits + y*pict->rowstride; + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + + *buffer++ = (0xff000000 | + ((p & 0xff000000) >> 24) | + ((p & 0x00ff0000) >> 8) | + ((p & 0x0000ff00) << 8)); + } +} + +/* Expects a uint64_t buffer */ +static void +fetch_scanline_a2r10g10b10 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask, + uint32_t mask_bits) +{ + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint32_t *pixel = bits + x; const uint32_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - uint64_t a = p >> 30; - uint64_t b = (p >> 20) & 0x3ff; - uint64_t g = (p >> 10) & 0x3ff; - uint64_t r = p & 0x3ff; + uint64_t *buffer = (uint64_t *)b; + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint64_t a = p >> 30; + uint64_t r = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t b = p & 0x3ff; - r = r << 6 | r >> 4; - g = g << 6 | g >> 4; - b = b << 6 | b >> 4; + r = r << 6 | r >> 4; + g = g << 6 | g >> 4; + b = b << 6 | b >> 4; - a <<= 62; - a |= a >> 2; - a |= a >> 4; - a |= a >> 8; + a <<= 14; + a |= a >> 2; + a |= a >> 4; + a |= a >> 8; - *buffer++ = a << 48 | r << 32 | g << 16 | b; + *buffer++ = a << 48 | r << 32 | g << 16 | b; } } -static FASTCALL void -fbFetch_x2b10g10r10 (bits_image_t *pict, int x, int y, int width, uint64_t *buffer) +/* Expects a uint64_t buffer */ +static void +fetch_scanline_x2r10g10b10 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask, + uint32_t mask_bits) { - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint32_t *pixel = (uint32_t *)bits + x; const uint32_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - uint64_t b = (p >> 20) & 0x3ff; - uint64_t g = (p >> 10) & 0x3ff; - uint64_t r = p & 0x3ff; - - r = r << 6 | r >> 4; - g = g << 6 | g >> 4; - b = b << 6 | b >> 4; - - *buffer++ = 0xffffULL << 48 | r << 32 | g << 16 | b; - } -} - -static FASTCALL void -fbFetch_r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) -{ - const uint32_t *bits = pict->bits + y*pict->rowstride; - const uint8_t *pixel = (const uint8_t *)bits + 3*x; - const uint8_t *end = pixel + 3*width; - while (pixel < end) { - uint32_t b = Fetch24(pict, pixel) | 0xff000000; - pixel += 3; + uint64_t *buffer = (uint64_t *)b; + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint64_t r = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t b = p & 0x3ff; + + r = r << 6 | r >> 4; + g = g << 6 | g >> 4; + b = b << 6 | b >> 4; + + *buffer++ = 0xffffULL << 48 | r << 32 | g << 16 | b; + } +} + +/* Expects a uint64_t buffer */ +static void +fetch_scanline_a2b10g10r10 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask, + uint32_t mask_bits) +{ + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *pixel = bits + x; + const uint32_t *end = pixel + width; + uint64_t *buffer = (uint64_t *)b; + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint64_t a = p >> 30; + uint64_t b = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t r = p & 0x3ff; + + r = r << 6 | r >> 4; + g = g << 6 | g >> 4; + b = b << 6 | b >> 4; + + a <<= 14; + a |= a >> 2; + a |= a >> 4; + a |= a >> 8; + + *buffer++ = a << 48 | r << 32 | g << 16 | b; + } +} + +/* Expects a uint64_t buffer */ +static void +fetch_scanline_x2b10g10r10 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t *mask, + uint32_t mask_bits) +{ + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint32_t *pixel = (uint32_t *)bits + x; + const uint32_t *end = pixel + width; + uint64_t *buffer = (uint64_t *)b; + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint64_t b = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t r = p & 0x3ff; + + r = r << 6 | r >> 4; + g = g << 6 | g >> 4; + b = b << 6 | b >> 4; + + *buffer++ = 0xffffULL << 48 | r << 32 | g << 16 | b; + } +} + +static void +fetch_scanline_r8g8b8 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) +{ + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint8_t *pixel = (const uint8_t *)bits + 3 * x; + const uint8_t *end = pixel + 3 * width; + + while (pixel < end) + { + uint32_t b = 0xff000000; + +#ifdef WORDS_BIGENDIAN + b |= (READ (image, pixel++) << 16); + b |= (READ (image, pixel++) << 8); + b |= (READ (image, pixel++)); +#else + b |= (READ (image, pixel++)); + b |= (READ (image, pixel++) << 8); + b |= (READ (image, pixel++) << 16); +#endif + *buffer++ = b; } } -static FASTCALL void -fbFetch_b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_b8g8r8 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - const uint32_t *bits = pict->bits + y*pict->rowstride; - const uint8_t *pixel = (const uint8_t *)bits + 3*x; - const uint8_t *end = pixel + 3*width; - while (pixel < end) { + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const uint8_t *pixel = (const uint8_t *)bits + 3 * x; + const uint8_t *end = pixel + 3 * width; + + while (pixel < end) + { uint32_t b = 0xff000000; -#if IMAGE_BYTE_ORDER == MSBFirst - b |= (READ(pict, pixel++)); - b |= (READ(pict, pixel++) << 8); - b |= (READ(pict, pixel++) << 16); +#ifdef WORDS_BIGENDIAN + b |= (READ (image, pixel++)); + b |= (READ (image, pixel++) << 8); + b |= (READ (image, pixel++) << 16); #else - b |= (READ(pict, pixel++) << 16); - b |= (READ(pict, pixel++) << 8); - b |= (READ(pict, pixel++)); + b |= (READ (image, pixel++) << 16); + b |= (READ (image, pixel++) << 8); + b |= (READ (image, pixel++)); #endif *buffer++ = b; } } -static FASTCALL void -fbFetch_r5g6b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_r5g6b5 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint16_t *pixel = (const uint16_t *)bits + x; const uint16_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); uint32_t r = (((p) << 3) & 0xf8) | (((p) << 5) & 0xfc00) | (((p) << 8) & 0xf80000); + r |= (r >> 5) & 0x70007; r |= (r >> 6) & 0x300; + *buffer++ = 0xff000000 | r; } } -static FASTCALL void -fbFetch_b5g6r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_b5g6r5 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t r,g,b; - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint16_t *pixel = (const uint16_t *)bits + x; const uint16_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint32_t r, g, b; + b = ((p & 0xf800) | ((p & 0xe000) >> 5)) >> 8; g = ((p & 0x07e0) | ((p & 0x0600) >> 6)) << 5; r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14; + *buffer++ = 0xff000000 | r | g | b; } } -static FASTCALL void -fbFetch_a1r5g5b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_a1r5g5b5 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t r,g,b, a; - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint16_t *pixel = (const uint16_t *)bits + x; const uint16_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint32_t r, g, b, a; + a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24; r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9; g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6; b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2; + *buffer++ = a | r | g | b; } } -static FASTCALL void -fbFetch_x1r5g5b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_x1r5g5b5 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t r,g,b; - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint16_t *pixel = (const uint16_t *)bits + x; const uint16_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint32_t r, g, b; + r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9; g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6; b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2; + *buffer++ = 0xff000000 | r | g | b; } } -static FASTCALL void -fbFetch_a1b5g5r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_a1b5g5r5 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t r,g,b, a; - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint16_t *pixel = (const uint16_t *)bits + x; const uint16_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - + uint32_t r, g, b, a; + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24; b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7; g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6; r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14; + *buffer++ = a | r | g | b; } } -static FASTCALL void -fbFetch_x1b5g5r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_x1b5g5r5 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t r,g,b; - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint16_t *pixel = (const uint16_t *)bits + x; const uint16_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint32_t r, g, b; + b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7; g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6; r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14; + *buffer++ = 0xff000000 | r | g | b; } } -static FASTCALL void -fbFetch_a4r4g4b4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_a4r4g4b4 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t r,g,b, a; - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint16_t *pixel = (const uint16_t *)bits + x; const uint16_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint32_t r, g, b, a; + a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16; r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12; g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8; b = ((p & 0x000f) | ((p & 0x000f) << 4)); + *buffer++ = a | r | g | b; } } -static FASTCALL void -fbFetch_x4r4g4b4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_x4r4g4b4 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t r,g,b; - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint16_t *pixel = (const uint16_t *)bits + x; const uint16_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint32_t r, g, b; + r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12; g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8; b = ((p & 0x000f) | ((p & 0x000f) << 4)); + *buffer++ = 0xff000000 | r | g | b; } } -static FASTCALL void -fbFetch_a4b4g4r4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_a4b4g4r4 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t r,g,b, a; - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint16_t *pixel = (const uint16_t *)bits + x; const uint16_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint32_t r, g, b, a; + a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16; b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4; g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8; r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16; + *buffer++ = a | r | g | b; } } -static FASTCALL void -fbFetch_x4b4g4r4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_x4b4g4r4 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t r,g,b; - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint16_t *pixel = (const uint16_t *)bits + x; const uint16_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint32_t r, g, b; + b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4; g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8; r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16; + *buffer++ = 0xff000000 | r | g | b; } } -static FASTCALL void -fbFetch_a8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_a8 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint8_t *pixel = (const uint8_t *)bits + x; const uint8_t *end = pixel + width; - while (pixel < end) { - *buffer++ = READ(pict, pixel++) << 24; - } + + while (pixel < end) + *buffer++ = READ (image, pixel++) << 24; } -static FASTCALL void -fbFetch_r3g3b2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_r3g3b2 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t r,g,b; - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint8_t *pixel = (const uint8_t *)bits + x; const uint8_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint32_t r, g, b; + r = ((p & 0xe0) | ((p & 0xe0) >> 3) | ((p & 0xc0) >> 6)) << 16; g = ((p & 0x1c) | ((p & 0x18) >> 3) | ((p & 0x1c) << 3)) << 8; b = (((p & 0x03) ) | ((p & 0x03) << 2) | ((p & 0x03) << 4) | ((p & 0x03) << 6)); + *buffer++ = 0xff000000 | r | g | b; } } -static FASTCALL void -fbFetch_b2g3r3 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_b2g3r3 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t r,g,b; - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint8_t *pixel = (const uint8_t *)bits + x; const uint8_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint32_t r, g, b; + b = (((p & 0xc0) ) | ((p & 0xc0) >> 2) | ((p & 0xc0) >> 4) | ((p & 0xc0) >> 6)); + g = ((p & 0x38) | ((p & 0x38) >> 3) | ((p & 0x30) << 2)) << 8; + r = (((p & 0x07) ) | ((p & 0x07) << 3) | ((p & 0x06) << 6)) << 16; + *buffer++ = 0xff000000 | r | g | b; } } -static FASTCALL void -fbFetch_a2r2g2b2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_a2r2g2b2 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t a,r,g,b; - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint8_t *pixel = (const uint8_t *)bits + x; const uint8_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint32_t a, r, g, b; + a = ((p & 0xc0) * 0x55) << 18; r = ((p & 0x30) * 0x55) << 12; g = ((p & 0x0c) * 0x55) << 6; b = ((p & 0x03) * 0x55); - *buffer++ = a|r|g|b; + + *buffer++ = a | r | g | b; } } -static FASTCALL void -fbFetch_a2b2g2r2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_a2b2g2r2 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t a,r,g,b; - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint8_t *pixel = (const uint8_t *)bits + x; const uint8_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); - + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + uint32_t a, r, g, b; + a = ((p & 0xc0) * 0x55) << 18; b = ((p & 0x30) * 0x55) >> 6; g = ((p & 0x0c) * 0x55) << 6; r = ((p & 0x03) * 0x55) << 16; - *buffer++ = a|r|g|b; + + *buffer++ = a | r | g | b; } } -static FASTCALL void -fbFetch_c8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_c8 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - const uint32_t *bits = pict->bits + y*pict->rowstride; - const pixman_indexed_t * indexed = pict->indexed; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const pixman_indexed_t * indexed = image->bits.indexed; const uint8_t *pixel = (const uint8_t *)bits + x; const uint8_t *end = pixel + width; - while (pixel < end) { - uint32_t p = READ(pict, pixel++); + + while (pixel < end) + { + uint32_t p = READ (image, pixel++); + *buffer++ = indexed->rgba[p]; } } -static FASTCALL void -fbFetch_x4a4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_x4a4 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; const uint8_t *pixel = (const uint8_t *)bits + x; const uint8_t *end = pixel + width; - while (pixel < end) { - uint8_t p = READ(pict, pixel++) & 0xf; + + while (pixel < end) + { + uint8_t p = READ (image, pixel++) & 0xf; + *buffer++ = (p | (p << 4)) << 24; } } -#define Fetch8(img,l,o) (READ(img, (uint8_t *)(l) + ((o) >> 2))) -#if IMAGE_BYTE_ORDER == MSBFirst -#define Fetch4(img,l,o) ((o) & 2 ? Fetch8(img,l,o) & 0xf : Fetch8(img,l,o) >> 4) +#define FETCH_8(img,l,o) (READ (img, (uint8_t *)(l) + ((o) >> 2))) +#ifdef WORDS_BIGENDIAN +#define FETCH_4(img,l,o) ((o) & 2 ? FETCH_8 (img,l,o) & 0xf : FETCH_8 (img,l,o) >> 4) #else -#define Fetch4(img,l,o) ((o) & 2 ? Fetch8(img,l,o) >> 4 : Fetch8(img,l,o) & 0xf) +#define FETCH_4(img,l,o) ((o) & 2 ? FETCH_8 (img,l,o) >> 4 : FETCH_8 (img,l,o) & 0xf) #endif -static FASTCALL void -fbFetch_a4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_a4 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; int i; - for (i = 0; i < width; ++i) { - uint32_t p = Fetch4(pict, bits, i + x); - + + for (i = 0; i < width; ++i) + { + uint32_t p = FETCH_4 (image, bits, i + x); + p |= p << 4; + *buffer++ = p << 24; } } -static FASTCALL void -fbFetch_r1g2b1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_r1g2b1 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t r,g,b; - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; int i; - for (i = 0; i < width; ++i) { - uint32_t p = Fetch4(pict, bits, i + x); - + + for (i = 0; i < width; ++i) + { + uint32_t p = FETCH_4 (image, bits, i + x); + uint32_t r, g, b; + r = ((p & 0x8) * 0xff) << 13; g = ((p & 0x6) * 0x55) << 7; b = ((p & 0x1) * 0xff); - *buffer++ = 0xff000000|r|g|b; + + *buffer++ = 0xff000000 | r | g | b; } } -static FASTCALL void -fbFetch_b1g2r1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_b1g2r1 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t r,g,b; - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; int i; - for (i = 0; i < width; ++i) { - uint32_t p = Fetch4(pict, bits, i + x); - + + for (i = 0; i < width; ++i) + { + uint32_t p = FETCH_4 (image, bits, i + x); + uint32_t r, g, b; + b = ((p & 0x8) * 0xff) >> 3; g = ((p & 0x6) * 0x55) << 7; r = ((p & 0x1) * 0xff) << 16; - *buffer++ = 0xff000000|r|g|b; + + *buffer++ = 0xff000000 | r | g | b; } } -static FASTCALL void -fbFetch_a1r1g1b1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_a1r1g1b1 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t a,r,g,b; - const uint32_t *bits = pict->bits + y*pict->rowstride; + uint32_t a, r, g, b; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; int i; - for (i = 0; i < width; ++i) { - uint32_t p = Fetch4(pict, bits, i + x); - + + for (i = 0; i < width; ++i) + { + uint32_t p = FETCH_4 (image, bits, i + x); + a = ((p & 0x8) * 0xff) << 21; r = ((p & 0x4) * 0xff) << 14; g = ((p & 0x2) * 0xff) << 7; b = ((p & 0x1) * 0xff); - *buffer++ = a|r|g|b; + + *buffer++ = a | r | g | b; } } -static FASTCALL void -fbFetch_a1b1g1r1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_a1b1g1r1 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - uint32_t a,r,g,b; - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; int i; - for (i = 0; i < width; ++i) { - uint32_t p = Fetch4(pict, bits, i + x); - + + for (i = 0; i < width; ++i) + { + uint32_t p = FETCH_4 (image, bits, i + x); + uint32_t a, r, g, b; + a = ((p & 0x8) * 0xff) << 21; r = ((p & 0x4) * 0xff) >> 3; g = ((p & 0x2) * 0xff) << 7; b = ((p & 0x1) * 0xff) << 16; - *buffer++ = a|r|g|b; + + *buffer++ = a | r | g | b; } } -static FASTCALL void -fbFetch_c4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_c4 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - const uint32_t *bits = pict->bits + y*pict->rowstride; - const pixman_indexed_t * indexed = pict->indexed; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const pixman_indexed_t * indexed = image->bits.indexed; int i; - for (i = 0; i < width; ++i) { - uint32_t p = Fetch4(pict, bits, i + x); - + + for (i = 0; i < width; ++i) + { + uint32_t p = FETCH_4 (image, bits, i + x); + *buffer++ = indexed->rgba[p]; } } - -static FASTCALL void -fbFetch_a1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_a1 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - const uint32_t *bits = pict->bits + y*pict->rowstride; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; int i; - for (i = 0; i < width; ++i) { - uint32_t p = READ(pict, bits + ((i + x) >> 5)); - uint32_t a; -#if BITMAP_BIT_ORDER == MSBFirst - a = p >> (0x1f - ((i+x) & 0x1f)); + + for (i = 0; i < width; ++i) + { + uint32_t p = READ (image, bits + ((i + x) >> 5)); + uint32_t a; + +#ifdef WORDS_BIGENDIAN + a = p >> (0x1f - ((i + x) & 0x1f)); #else - a = p >> ((i+x) & 0x1f); + a = p >> ((i + x) & 0x1f); #endif a = a & 1; a |= a << 1; a |= a << 2; a |= a << 4; + *buffer++ = a << 24; } } -static FASTCALL void -fbFetch_g1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) +static void +fetch_scanline_g1 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - const uint32_t *bits = pict->bits + y*pict->rowstride; - const pixman_indexed_t * indexed = pict->indexed; + const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; + const pixman_indexed_t * indexed = image->bits.indexed; int i; - for (i = 0; i < width; ++i) { - uint32_t p = READ(pict, bits + ((i+x) >> 5)); + + for (i = 0; i < width; ++i) + { + uint32_t p = READ (image, bits + ((i + x) >> 5)); uint32_t a; -#if BITMAP_BIT_ORDER == MSBFirst - a = p >> (0x1f - ((i+x) & 0x1f)); + +#ifdef WORDS_BIGENDIAN + a = p >> (0x1f - ((i + x) & 0x1f)); #else - a = p >> ((i+x) & 0x1f); + a = p >> ((i + x) & 0x1f); #endif a = a & 1; + *buffer++ = indexed->rgba[a]; } } -static FASTCALL void -fbFetch_yuy2 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer) +static void +fetch_scanline_yuy2 (pixman_image_t *image, + int x, + int line, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - int16_t y, u, v; - int32_t r, g, b; - int i; - - const uint32_t *bits = pict->bits + pict->rowstride * line; - + const uint32_t *bits = image->bits.bits + image->bits.rowstride * line; + int i; + for (i = 0; i < width; i++) { + int16_t y, u, v; + int32_t r, g, b; + y = ((uint8_t *) bits)[(x + i) << 1] - 16; - u = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 1] - 128; - v = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 3] - 128; - + u = ((uint8_t *) bits)[(((x + i) << 1) & - 4) + 1] - 128; + v = ((uint8_t *) bits)[(((x + i) << 1) & - 4) + 3] - 128; + /* R = 1.164(Y - 16) + 1.596(V - 128) */ r = 0x012b27 * y + 0x019a2e * v; /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */ g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u; /* B = 1.164(Y - 16) + 2.018(U - 128) */ b = 0x012b27 * y + 0x0206a2 * u; - - WRITE(pict, buffer++, 0xff000000 | - (r >= 0 ? r < 0x1000000 ? r & 0xff0000 : 0xff0000 : 0) | - (g >= 0 ? g < 0x1000000 ? (g >> 8) & 0x00ff00 : 0x00ff00 : 0) | - (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0)); + + *buffer++ = 0xff000000 | + (r >= 0 ? r < 0x1000000 ? r & 0xff0000 : 0xff0000 : 0) | + (g >= 0 ? g < 0x1000000 ? (g >> 8) & 0x00ff00 : 0x00ff00 : 0) | + (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0); } } -static FASTCALL void -fbFetch_yv12 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer) +static void +fetch_scanline_yv12 (pixman_image_t *image, + int x, + int line, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { - YV12_SETUP(pict); - uint8_t *pY = YV12_Y (line); - uint8_t *pU = YV12_U (line); - uint8_t *pV = YV12_V (line); - int16_t y, u, v; - int32_t r, g, b; - int i; - + YV12_SETUP (image); + uint8_t *y_line = YV12_Y (line); + uint8_t *u_line = YV12_U (line); + uint8_t *v_line = YV12_V (line); + int i; + for (i = 0; i < width; i++) { - y = pY[x + i] - 16; - u = pU[(x + i) >> 1] - 128; - v = pV[(x + i) >> 1] - 128; + int16_t y, u, v; + int32_t r, g, b; + + y = y_line[x + i] - 16; + u = u_line[(x + i) >> 1] - 128; + v = v_line[(x + i) >> 1] - 128; /* R = 1.164(Y - 16) + 1.596(V - 128) */ r = 0x012b27 * y + 0x019a2e * v; @@ -709,537 +1127,601 @@ fbFetch_yv12 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer) /* B = 1.164(Y - 16) + 2.018(U - 128) */ b = 0x012b27 * y + 0x0206a2 * u; - WRITE(pict, buffer++, 0xff000000 | + *buffer++ = 0xff000000 | (r >= 0 ? r < 0x1000000 ? r & 0xff0000 : 0xff0000 : 0) | (g >= 0 ? g < 0x1000000 ? (g >> 8) & 0x00ff00 : 0x00ff00 : 0) | - (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0)); + (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0); } } -fetchProc32 ACCESS(pixman_fetchProcForPicture32) (bits_image_t * pict) -{ - switch(pict->format) { - case PIXMAN_a8r8g8b8: return fbFetch_a8r8g8b8; - case PIXMAN_x8r8g8b8: return fbFetch_x8r8g8b8; - case PIXMAN_a8b8g8r8: return fbFetch_a8b8g8r8; - case PIXMAN_x8b8g8r8: return fbFetch_x8b8g8r8; - case PIXMAN_b8g8r8a8: return fbFetch_b8g8r8a8; - case PIXMAN_b8g8r8x8: return fbFetch_b8g8r8x8; - /* These two require wide compositing */ - case PIXMAN_a2b10g10r10: return NULL; - case PIXMAN_x2b10g10r10: return NULL; - - /* 24bpp formats */ - case PIXMAN_r8g8b8: return fbFetch_r8g8b8; - case PIXMAN_b8g8r8: return fbFetch_b8g8r8; - - /* 16bpp formats */ - case PIXMAN_r5g6b5: return fbFetch_r5g6b5; - case PIXMAN_b5g6r5: return fbFetch_b5g6r5; - - case PIXMAN_a1r5g5b5: return fbFetch_a1r5g5b5; - case PIXMAN_x1r5g5b5: return fbFetch_x1r5g5b5; - case PIXMAN_a1b5g5r5: return fbFetch_a1b5g5r5; - case PIXMAN_x1b5g5r5: return fbFetch_x1b5g5r5; - case PIXMAN_a4r4g4b4: return fbFetch_a4r4g4b4; - case PIXMAN_x4r4g4b4: return fbFetch_x4r4g4b4; - case PIXMAN_a4b4g4r4: return fbFetch_a4b4g4r4; - case PIXMAN_x4b4g4r4: return fbFetch_x4b4g4r4; - - /* 8bpp formats */ - case PIXMAN_a8: return fbFetch_a8; - case PIXMAN_r3g3b2: return fbFetch_r3g3b2; - case PIXMAN_b2g3r3: return fbFetch_b2g3r3; - case PIXMAN_a2r2g2b2: return fbFetch_a2r2g2b2; - case PIXMAN_a2b2g2r2: return fbFetch_a2b2g2r2; - case PIXMAN_c8: return fbFetch_c8; - case PIXMAN_g8: return fbFetch_c8; - case PIXMAN_x4a4: return fbFetch_x4a4; - - /* 4bpp formats */ - case PIXMAN_a4: return fbFetch_a4; - case PIXMAN_r1g2b1: return fbFetch_r1g2b1; - case PIXMAN_b1g2r1: return fbFetch_b1g2r1; - case PIXMAN_a1r1g1b1: return fbFetch_a1r1g1b1; - case PIXMAN_a1b1g1r1: return fbFetch_a1b1g1r1; - case PIXMAN_c4: return fbFetch_c4; - case PIXMAN_g4: return fbFetch_c4; - - /* 1bpp formats */ - case PIXMAN_a1: return fbFetch_a1; - case PIXMAN_g1: return fbFetch_g1; +/**************************** Pixel wise fetching *****************************/ - /* YUV formats */ - case PIXMAN_yuy2: return fbFetch_yuy2; - case PIXMAN_yv12: return fbFetch_yv12; - } +/* Despite the type, expects a uint64_t buffer */ +static uint64_t +fetch_pixel_a2r10g10b10 (bits_image_t *image, + int offset, + int line) +{ + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t p = READ (image, bits + offset); + uint64_t a = p >> 30; + uint64_t r = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t b = p & 0x3ff; - return NULL; -} + r = r << 6 | r >> 4; + g = g << 6 | g >> 4; + b = b << 6 | b >> 4; -static FASTCALL void -fbFetch64_generic (bits_image_t *pict, int x, int y, int width, uint64_t *buffer) -{ - fetchProc32 fetch32 = ACCESS(pixman_fetchProcForPicture32) (pict); + a <<= 14; + a |= a >> 2; + a |= a >> 4; + a |= a >> 8; - // Fetch the pixels into the first half of buffer and then expand them in - // place. - fetch32(pict, x, y, width, (uint32_t*)buffer); - pixman_expand(buffer, (uint32_t*)buffer, pict->format, width); + return a << 48 | r << 32 | g << 16 | b; } -fetchProc64 ACCESS(pixman_fetchProcForPicture64) (bits_image_t * pict) +/* Despite the type, this function expects a uint64_t buffer */ +static uint64_t +fetch_pixel_x2r10g10b10 (bits_image_t *image, + int offset, + int line) { - switch(pict->format) { - case PIXMAN_a2b10g10r10: return fbFetch_a2b10g10r10; - case PIXMAN_x2b10g10r10: return fbFetch_x2b10g10r10; - default: return fbFetch64_generic; - } + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t p = READ (image, bits + offset); + uint64_t r = (p >> 20) & 0x3ff; + uint64_t g = (p >> 10) & 0x3ff; + uint64_t b = p & 0x3ff; + + r = r << 6 | r >> 4; + g = g << 6 | g >> 4; + b = b << 6 | b >> 4; + + return 0xffffULL << 48 | r << 32 | g << 16 | b; } -/**************************** Pixel wise fetching *****************************/ - -static FASTCALL uint64_t -fbFetchPixel_a2b10g10r10 (bits_image_t *pict, int offset, int line) +/* Despite the type, expects a uint64_t buffer */ +static uint64_t +fetch_pixel_a2b10g10r10 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t p = READ(pict, bits + offset); + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t p = READ (image, bits + offset); uint64_t a = p >> 30; uint64_t b = (p >> 20) & 0x3ff; uint64_t g = (p >> 10) & 0x3ff; uint64_t r = p & 0x3ff; - + r = r << 6 | r >> 4; g = g << 6 | g >> 4; b = b << 6 | b >> 4; - - a <<= 62; + + a <<= 14; a |= a >> 2; a |= a >> 4; a |= a >> 8; - + return a << 48 | r << 32 | g << 16 | b; } -static FASTCALL uint64_t -fbFetchPixel_x2b10g10r10 (bits_image_t *pict, int offset, int line) +/* Despite the type, this function expects a uint64_t buffer */ +static uint64_t +fetch_pixel_x2b10g10r10 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t p = READ(pict, bits + offset); + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t p = READ (image, bits + offset); uint64_t b = (p >> 20) & 0x3ff; uint64_t g = (p >> 10) & 0x3ff; uint64_t r = p & 0x3ff; - + r = r << 6 | r >> 4; g = g << 6 | g >> 4; b = b << 6 | b >> 4; - + return 0xffffULL << 48 | r << 32 | g << 16 | b; } -static FASTCALL uint32_t -fbFetchPixel_a8r8g8b8 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_a8r8g8b8 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - return READ(pict, (uint32_t *)bits + offset); + uint32_t *bits = image->bits + line * image->rowstride; + return READ (image, (uint32_t *)bits + offset); } -static FASTCALL uint32_t -fbFetchPixel_x8r8g8b8 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_x8r8g8b8 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - return READ(pict, (uint32_t *)bits + offset) | 0xff000000; + uint32_t *bits = image->bits + line * image->rowstride; + + return READ (image, (uint32_t *)bits + offset) | 0xff000000; } -static FASTCALL uint32_t -fbFetchPixel_a8b8g8r8 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_a8b8g8r8 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint32_t *)bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint32_t *)bits + offset); + return ((pixel & 0xff000000) | ((pixel >> 16) & 0xff) | (pixel & 0x0000ff00) | ((pixel & 0xff) << 16)); } -static FASTCALL uint32_t -fbFetchPixel_x8b8g8r8 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_x8b8g8r8 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint32_t *)bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint32_t *)bits + offset); + return ((0xff000000) | ((pixel >> 16) & 0xff) | (pixel & 0x0000ff00) | ((pixel & 0xff) << 16)); } -static FASTCALL uint32_t -fbFetchPixel_b8g8r8a8 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_b8g8r8a8 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint32_t *)bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint32_t *)bits + offset); + return ((pixel & 0xff000000) >> 24 | (pixel & 0x00ff0000) >> 8 | (pixel & 0x0000ff00) << 8 | (pixel & 0x000000ff) << 24); } -static FASTCALL uint32_t -fbFetchPixel_b8g8r8x8 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_b8g8r8x8 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint32_t *)bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint32_t *)bits + offset); + return ((0xff000000) | (pixel & 0xff000000) >> 24 | (pixel & 0x00ff0000) >> 8 | (pixel & 0x0000ff00) << 8); } -static FASTCALL uint32_t -fbFetchPixel_r8g8b8 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_r8g8b8 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - uint8_t *pixel = ((uint8_t *) bits) + (offset*3); -#if IMAGE_BYTE_ORDER == MSBFirst + uint32_t *bits = image->bits + line * image->rowstride; + uint8_t *pixel = ((uint8_t *) bits) + (offset * 3); + +#ifdef WORDS_BIGENDIAN return (0xff000000 | - (READ(pict, pixel + 0) << 16) | - (READ(pict, pixel + 1) << 8) | - (READ(pict, pixel + 2))); + (READ (image, pixel + 0) << 16) | + (READ (image, pixel + 1) << 8) | + (READ (image, pixel + 2))); #else return (0xff000000 | - (READ(pict, pixel + 2) << 16) | - (READ(pict, pixel + 1) << 8) | - (READ(pict, pixel + 0))); + (READ (image, pixel + 2) << 16) | + (READ (image, pixel + 1) << 8) | + (READ (image, pixel + 0))); #endif } -static FASTCALL uint32_t -fbFetchPixel_b8g8r8 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_b8g8r8 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - uint8_t *pixel = ((uint8_t *) bits) + (offset*3); -#if IMAGE_BYTE_ORDER == MSBFirst + uint32_t *bits = image->bits + line * image->rowstride; + uint8_t *pixel = ((uint8_t *) bits) + (offset * 3); +#ifdef WORDS_BIGENDIAN return (0xff000000 | - (READ(pict, pixel + 2) << 16) | - (READ(pict, pixel + 1) << 8) | - (READ(pict, pixel + 0))); + (READ (image, pixel + 2) << 16) | + (READ (image, pixel + 1) << 8) | + (READ (image, pixel + 0))); #else return (0xff000000 | - (READ(pict, pixel + 0) << 16) | - (READ(pict, pixel + 1) << 8) | - (READ(pict, pixel + 2))); + (READ (image, pixel + 0) << 16) | + (READ (image, pixel + 1) << 8) | + (READ (image, pixel + 2))); #endif } -static FASTCALL uint32_t -fbFetchPixel_r5g6b5 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_r5g6b5 (bits_image_t *image, + int offset, + int line) { - uint32_t r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint16_t *) bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint16_t *) bits + offset); + uint32_t r, g, b; + r = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) << 8; g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5; b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2; + return (0xff000000 | r | g | b); } -static FASTCALL uint32_t -fbFetchPixel_b5g6r5 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_b5g6r5 (bits_image_t *image, + int offset, + int line) { - uint32_t r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint16_t *) bits + offset); - + uint32_t r, g, b; + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint16_t *) bits + offset); + b = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) >> 8; g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5; r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14; + return (0xff000000 | r | g | b); } -static FASTCALL uint32_t -fbFetchPixel_a1r5g5b5 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_a1r5g5b5 (bits_image_t *image, + int offset, + int line) { - uint32_t a,r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint16_t *) bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint16_t *) bits + offset); + uint32_t a, r, g, b; + a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24; r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9; g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6; b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2; + return (a | r | g | b); } -static FASTCALL uint32_t -fbFetchPixel_x1r5g5b5 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_x1r5g5b5 (bits_image_t *image, + int offset, + int line) { - uint32_t r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint16_t *) bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint16_t *) bits + offset); + uint32_t r, g, b; + r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9; g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6; b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2; + return (0xff000000 | r | g | b); } -static FASTCALL uint32_t -fbFetchPixel_a1b5g5r5 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_a1b5g5r5 (bits_image_t *image, + int offset, + int line) { - uint32_t a,r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint16_t *) bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint16_t *) bits + offset); + uint32_t a, r, g, b; + a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24; b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7; g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6; r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14; + return (a | r | g | b); } -static FASTCALL uint32_t -fbFetchPixel_x1b5g5r5 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_x1b5g5r5 (bits_image_t *image, + int offset, + int line) { - uint32_t r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint16_t *) bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint16_t *) bits + offset); + uint32_t r, g, b; + b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7; g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6; r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14; + return (0xff000000 | r | g | b); } -static FASTCALL uint32_t -fbFetchPixel_a4r4g4b4 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_a4r4g4b4 (bits_image_t *image, + int offset, + int line) { - uint32_t a,r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint16_t *) bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint16_t *) bits + offset); + uint32_t a, r, g, b; + a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16; r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12; g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8; b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)); + return (a | r | g | b); } -static FASTCALL uint32_t -fbFetchPixel_x4r4g4b4 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_x4r4g4b4 (bits_image_t *image, + int offset, + int line) { - uint32_t r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint16_t *) bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint16_t *) bits + offset); + uint32_t r, g, b; + r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12; g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8; b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)); + return (0xff000000 | r | g | b); } -static FASTCALL uint32_t -fbFetchPixel_a4b4g4r4 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_a4b4g4r4 (bits_image_t *image, + int offset, + int line) { - uint32_t a,r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint16_t *) bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint16_t *) bits + offset); + uint32_t a, r, g, b; + a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16; b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4; g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8; r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16; + return (a | r | g | b); } -static FASTCALL uint32_t -fbFetchPixel_x4b4g4r4 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_x4b4g4r4 (bits_image_t *image, + int offset, + int line) { - uint32_t r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint16_t *) bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint16_t *) bits + offset); + uint32_t r, g, b; + b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4; g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8; r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16; + return (0xff000000 | r | g | b); } -static FASTCALL uint32_t -fbFetchPixel_a8 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_a8 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint8_t *) bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint8_t *) bits + offset); + return pixel << 24; } -static FASTCALL uint32_t -fbFetchPixel_r3g3b2 (bits_image_t *pict, int offset, int line) -{ - uint32_t r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint8_t *) bits + offset); - - r = ((pixel & 0xe0) | ((pixel & 0xe0) >> 3) | ((pixel & 0xc0) >> 6)) << 16; - g = ((pixel & 0x1c) | ((pixel & 0x18) >> 3) | ((pixel & 0x1c) << 3)) << 8; +static uint32_t +fetch_pixel_r3g3b2 (bits_image_t *image, + int offset, + int line) +{ + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint8_t *) bits + offset); + uint32_t r, g, b; + + r = ((pixel & 0xe0) | + ((pixel & 0xe0) >> 3) | + ((pixel & 0xc0) >> 6)) << 16; + + g = ((pixel & 0x1c) | + ((pixel & 0x18) >> 3) | + ((pixel & 0x1c) << 3)) << 8; + b = (((pixel & 0x03) ) | ((pixel & 0x03) << 2) | ((pixel & 0x03) << 4) | ((pixel & 0x03) << 6)); + return (0xff000000 | r | g | b); } -static FASTCALL uint32_t -fbFetchPixel_b2g3r3 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_b2g3r3 (bits_image_t *image, + int offset, + int line) { - uint32_t r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint8_t *) bits + offset); - - b = (((pixel & 0xc0) ) | - ((pixel & 0xc0) >> 2) | - ((pixel & 0xc0) >> 4) | + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint8_t *) bits + offset); + uint32_t r, g, b; + + b = ((pixel & 0xc0) | + ((pixel & 0xc0) >> 2) | + ((pixel & 0xc0) >> 4) | ((pixel & 0xc0) >> 6)); - g = ((pixel & 0x38) | ((pixel & 0x38) >> 3) | ((pixel & 0x30) << 2)) << 8; - r = (((pixel & 0x07) ) | - ((pixel & 0x07) << 3) | + + g = ((pixel & 0x38) | + ((pixel & 0x38) >> 3) | + ((pixel & 0x30) << 2)) << 8; + + r = ((pixel & 0x07) | + ((pixel & 0x07) << 3) | ((pixel & 0x06) << 6)) << 16; + return (0xff000000 | r | g | b); } -static FASTCALL uint32_t -fbFetchPixel_a2r2g2b2 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_a2r2g2b2 (bits_image_t *image, + int offset, + int line) { - uint32_t a,r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint8_t *) bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint8_t *) bits + offset); + uint32_t a, r, g, b; + a = ((pixel & 0xc0) * 0x55) << 18; r = ((pixel & 0x30) * 0x55) << 12; g = ((pixel & 0x0c) * 0x55) << 6; b = ((pixel & 0x03) * 0x55); - return a|r|g|b; + + return a | r | g | b; } -static FASTCALL uint32_t -fbFetchPixel_a2b2g2r2 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_a2b2g2r2 (bits_image_t *image, + int offset, + int line) { - uint32_t a,r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint8_t *) bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint8_t *) bits + offset); + uint32_t a, r, g, b; + a = ((pixel & 0xc0) * 0x55) << 18; b = ((pixel & 0x30) * 0x55) >> 6; g = ((pixel & 0x0c) * 0x55) << 6; r = ((pixel & 0x03) * 0x55) << 16; - return a|r|g|b; + + return a | r | g | b; } -static FASTCALL uint32_t -fbFetchPixel_c8 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_c8 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint8_t *) bits + offset); - const pixman_indexed_t * indexed = pict->indexed; + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint8_t *) bits + offset); + const pixman_indexed_t * indexed = image->indexed; + return indexed->rgba[pixel]; } -static FASTCALL uint32_t -fbFetchPixel_x4a4 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_x4a4 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, (uint8_t *) bits + offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, (uint8_t *) bits + offset); + return ((pixel & 0xf) | ((pixel & 0xf) << 4)) << 24; } -static FASTCALL uint32_t -fbFetchPixel_a4 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_a4 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = Fetch4(pict, bits, offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = FETCH_4 (image, bits, offset); + pixel |= pixel << 4; return pixel << 24; } -static FASTCALL uint32_t -fbFetchPixel_r1g2b1 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_r1g2b1 (bits_image_t *image, + int offset, + int line) { - uint32_t r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = Fetch4(pict, bits, offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = FETCH_4 (image, bits, offset); + uint32_t r, g, b; + r = ((pixel & 0x8) * 0xff) << 13; g = ((pixel & 0x6) * 0x55) << 7; b = ((pixel & 0x1) * 0xff); - return 0xff000000|r|g|b; + + return 0xff000000 | r | g | b; } -static FASTCALL uint32_t -fbFetchPixel_b1g2r1 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_b1g2r1 (bits_image_t *image, + int offset, + int line) { - uint32_t r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = Fetch4(pict, bits, offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = FETCH_4 (image, bits, offset); + uint32_t r, g, b; + b = ((pixel & 0x8) * 0xff) >> 3; g = ((pixel & 0x6) * 0x55) << 7; r = ((pixel & 0x1) * 0xff) << 16; - return 0xff000000|r|g|b; + + return 0xff000000 | r | g | b; } -static FASTCALL uint32_t -fbFetchPixel_a1r1g1b1 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_a1r1g1b1 (bits_image_t *image, + int offset, + int line) { - uint32_t a,r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = Fetch4(pict, bits, offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = FETCH_4 (image, bits, offset); + uint32_t a, r, g, b; + a = ((pixel & 0x8) * 0xff) << 21; r = ((pixel & 0x4) * 0xff) << 14; g = ((pixel & 0x2) * 0xff) << 7; b = ((pixel & 0x1) * 0xff); - return a|r|g|b; + + return a | r | g | b; } -static FASTCALL uint32_t -fbFetchPixel_a1b1g1r1 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_a1b1g1r1 (bits_image_t *image, + int offset, + int line) { - uint32_t a,r,g,b; - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = Fetch4(pict, bits, offset); - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = FETCH_4 (image, bits, offset); + uint32_t a, r, g, b; + a = ((pixel & 0x8) * 0xff) << 21; r = ((pixel & 0x4) * 0xff) >> 3; g = ((pixel & 0x2) * 0xff) << 7; b = ((pixel & 0x1) * 0xff) << 16; - return a|r|g|b; + + return a | r | g | b; } -static FASTCALL uint32_t -fbFetchPixel_c4 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_c4 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = Fetch4(pict, bits, offset); - const pixman_indexed_t * indexed = pict->indexed; - + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = FETCH_4 (image, bits, offset); + const pixman_indexed_t * indexed = image->indexed; + return indexed->rgba[pixel]; } - -static FASTCALL uint32_t -fbFetchPixel_a1 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_a1 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, bits + (offset >> 5)); - uint32_t a; -#if BITMAP_BIT_ORDER == MSBFirst + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, bits + (offset >> 5)); + uint32_t a; + +#ifdef WORDS_BIGENDIAN a = pixel >> (0x1f - (offset & 0x1f)); #else a = pixel >> (offset & 0x1f); @@ -1248,826 +1730,1147 @@ fbFetchPixel_a1 (bits_image_t *pict, int offset, int line) a |= a << 1; a |= a << 2; a |= a << 4; + return a << 24; } -static FASTCALL uint32_t -fbFetchPixel_g1 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_g1 (bits_image_t *image, + int offset, + int line) { - uint32_t *bits = pict->bits + line*pict->rowstride; - uint32_t pixel = READ(pict, bits + (offset >> 5)); - const pixman_indexed_t * indexed = pict->indexed; + uint32_t *bits = image->bits + line * image->rowstride; + uint32_t pixel = READ (image, bits + (offset >> 5)); + const pixman_indexed_t * indexed = image->indexed; uint32_t a; -#if BITMAP_BIT_ORDER == MSBFirst + +#ifdef WORDS_BIGENDIAN a = pixel >> (0x1f - (offset & 0x1f)); #else a = pixel >> (offset & 0x1f); #endif a = a & 1; + return indexed->rgba[a]; } -static FASTCALL uint32_t -fbFetchPixel_yuy2 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_yuy2 (bits_image_t *image, + int offset, + int line) { + const uint32_t *bits = image->bits + image->rowstride * line; + int16_t y, u, v; int32_t r, g, b; - - const uint32_t *bits = pict->bits + pict->rowstride * line; - + y = ((uint8_t *) bits)[offset << 1] - 16; - u = ((uint8_t *) bits)[((offset << 1) & -4) + 1] - 128; - v = ((uint8_t *) bits)[((offset << 1) & -4) + 3] - 128; - + u = ((uint8_t *) bits)[((offset << 1) & - 4) + 1] - 128; + v = ((uint8_t *) bits)[((offset << 1) & - 4) + 3] - 128; + /* R = 1.164(Y - 16) + 1.596(V - 128) */ r = 0x012b27 * y + 0x019a2e * v; + /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */ g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u; + /* B = 1.164(Y - 16) + 2.018(U - 128) */ b = 0x012b27 * y + 0x0206a2 * u; - + return 0xff000000 | (r >= 0 ? r < 0x1000000 ? r & 0xff0000 : 0xff0000 : 0) | (g >= 0 ? g < 0x1000000 ? (g >> 8) & 0x00ff00 : 0x00ff00 : 0) | (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0); } -static FASTCALL uint32_t -fbFetchPixel_yv12 (bits_image_t *pict, int offset, int line) +static uint32_t +fetch_pixel_yv12 (bits_image_t *image, + int offset, + int line) { - YV12_SETUP(pict); + YV12_SETUP (image); int16_t y = YV12_Y (line)[offset] - 16; int16_t u = YV12_U (line)[offset >> 1] - 128; int16_t v = YV12_V (line)[offset >> 1] - 128; int32_t r, g, b; - + /* R = 1.164(Y - 16) + 1.596(V - 128) */ r = 0x012b27 * y + 0x019a2e * v; + /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */ g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u; + /* B = 1.164(Y - 16) + 2.018(U - 128) */ b = 0x012b27 * y + 0x0206a2 * u; - + return 0xff000000 | (r >= 0 ? r < 0x1000000 ? r & 0xff0000 : 0xff0000 : 0) | (g >= 0 ? g < 0x1000000 ? (g >> 8) & 0x00ff00 : 0x00ff00 : 0) | (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0); } -/* - * XXX: The transformed fetch path only works at 32-bpp so far. When all paths - * have wide versions, this can be removed. - * - * WARNING: This function loses precision! - */ -static FASTCALL uint32_t -fbFetchPixel32_generic_lossy (bits_image_t *pict, int offset, int line) -{ - fetchPixelProc64 fetchPixel64 = ACCESS(pixman_fetchPixelProcForPicture64) (pict); - const uint64_t argb16Pixel = fetchPixel64(pict, offset, line); - uint32_t argb8Pixel; - - pixman_contract(&argb8Pixel, &argb16Pixel, 1); - - return argb8Pixel; -} - -fetchPixelProc32 ACCESS(pixman_fetchPixelProcForPicture32) (bits_image_t * pict) -{ - switch(pict->format) { - case PIXMAN_a8r8g8b8: return fbFetchPixel_a8r8g8b8; - case PIXMAN_x8r8g8b8: return fbFetchPixel_x8r8g8b8; - case PIXMAN_a8b8g8r8: return fbFetchPixel_a8b8g8r8; - case PIXMAN_x8b8g8r8: return fbFetchPixel_x8b8g8r8; - case PIXMAN_b8g8r8a8: return fbFetchPixel_b8g8r8a8; - case PIXMAN_b8g8r8x8: return fbFetchPixel_b8g8r8x8; - /* These two require wide compositing */ - case PIXMAN_a2b10g10r10: return fbFetchPixel32_generic_lossy; - case PIXMAN_x2b10g10r10: return fbFetchPixel32_generic_lossy; - - /* 24bpp formats */ - case PIXMAN_r8g8b8: return fbFetchPixel_r8g8b8; - case PIXMAN_b8g8r8: return fbFetchPixel_b8g8r8; - - /* 16bpp formats */ - case PIXMAN_r5g6b5: return fbFetchPixel_r5g6b5; - case PIXMAN_b5g6r5: return fbFetchPixel_b5g6r5; - - case PIXMAN_a1r5g5b5: return fbFetchPixel_a1r5g5b5; - case PIXMAN_x1r5g5b5: return fbFetchPixel_x1r5g5b5; - case PIXMAN_a1b5g5r5: return fbFetchPixel_a1b5g5r5; - case PIXMAN_x1b5g5r5: return fbFetchPixel_x1b5g5r5; - case PIXMAN_a4r4g4b4: return fbFetchPixel_a4r4g4b4; - case PIXMAN_x4r4g4b4: return fbFetchPixel_x4r4g4b4; - case PIXMAN_a4b4g4r4: return fbFetchPixel_a4b4g4r4; - case PIXMAN_x4b4g4r4: return fbFetchPixel_x4b4g4r4; - - /* 8bpp formats */ - case PIXMAN_a8: return fbFetchPixel_a8; - case PIXMAN_r3g3b2: return fbFetchPixel_r3g3b2; - case PIXMAN_b2g3r3: return fbFetchPixel_b2g3r3; - case PIXMAN_a2r2g2b2: return fbFetchPixel_a2r2g2b2; - case PIXMAN_a2b2g2r2: return fbFetchPixel_a2b2g2r2; - case PIXMAN_c8: return fbFetchPixel_c8; - case PIXMAN_g8: return fbFetchPixel_c8; - case PIXMAN_x4a4: return fbFetchPixel_x4a4; - - /* 4bpp formats */ - case PIXMAN_a4: return fbFetchPixel_a4; - case PIXMAN_r1g2b1: return fbFetchPixel_r1g2b1; - case PIXMAN_b1g2r1: return fbFetchPixel_b1g2r1; - case PIXMAN_a1r1g1b1: return fbFetchPixel_a1r1g1b1; - case PIXMAN_a1b1g1r1: return fbFetchPixel_a1b1g1r1; - case PIXMAN_c4: return fbFetchPixel_c4; - case PIXMAN_g4: return fbFetchPixel_c4; - - /* 1bpp formats */ - case PIXMAN_a1: return fbFetchPixel_a1; - case PIXMAN_g1: return fbFetchPixel_g1; +/*********************************** Store ************************************/ - /* YUV formats */ - case PIXMAN_yuy2: return fbFetchPixel_yuy2; - case PIXMAN_yv12: return fbFetchPixel_yv12; +#define SPLIT_A(v) \ + uint32_t a = ((v) >> 24), \ + r = ((v) >> 16) & 0xff, \ + g = ((v) >> 8) & 0xff, \ + b = (v) & 0xff + +#define SPLIT(v) \ + uint32_t r = ((v) >> 16) & 0xff, \ + g = ((v) >> 8) & 0xff, \ + b = (v) & 0xff + +static void +store_scanline_a2r10g10b10 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) +{ + uint32_t *bits = image->bits + image->rowstride * y; + uint32_t *pixel = bits + x; + uint64_t *values = (uint64_t *)v; + int i; + + for (i = 0; i < width; ++i) + { + WRITE (image, pixel++, + ((values[i] >> 32) & 0xc0000000) | + ((values[i] >> 18) & 0x3ff00000) | + ((values[i] >> 12) & 0xffc00) | + ((values[i] >> 6) & 0x3ff)); } - - return NULL; } -static FASTCALL uint64_t -fbFetchPixel64_generic (bits_image_t *pict, int offset, int line) +static void +store_scanline_x2r10g10b10 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) { - fetchPixelProc32 fetchPixel32 = ACCESS(pixman_fetchPixelProcForPicture32) (pict); - uint32_t argb8Pixel = fetchPixel32(pict, offset, line); - uint64_t argb16Pixel; - - pixman_expand(&argb16Pixel, &argb8Pixel, pict->format, 1); - - return argb16Pixel; -} - -fetchPixelProc64 ACCESS(pixman_fetchPixelProcForPicture64) (bits_image_t * pict) -{ - switch(pict->format) { - case PIXMAN_a2b10g10r10: return fbFetchPixel_a2b10g10r10; - case PIXMAN_x2b10g10r10: return fbFetchPixel_x2b10g10r10; - default: return fbFetchPixel64_generic; + uint32_t *bits = image->bits + image->rowstride * y; + uint64_t *values = (uint64_t *)v; + uint32_t *pixel = bits + x; + int i; + + for (i = 0; i < width; ++i) + { + WRITE (image, pixel++, + ((values[i] >> 18) & 0x3ff00000) | + ((values[i] >> 12) & 0xffc00) | + ((values[i] >> 6) & 0x3ff)); } } -/*********************************** Store ************************************/ - -#define Splita(v) uint32_t a = ((v) >> 24), r = ((v) >> 16) & 0xff, g = ((v) >> 8) & 0xff, b = (v) & 0xff -#define Split(v) uint32_t r = ((v) >> 16) & 0xff, g = ((v) >> 8) & 0xff, b = (v) & 0xff - -static FASTCALL void -fbStore_a2b10g10r10 (pixman_image_t *image, - uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_a2b10g10r10 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint32_t *pixel = bits + x; - for (i = 0; i < width; ++i) { - WRITE(image, pixel++, - ((values[i] >> 32) & 0xc0000000) | // A - ((values[i] >> 38) & 0x3ff) | // R - ((values[i] >> 12) & 0xffc00) | // G - ((values[i] << 14) & 0x3ff00000)); // B + uint64_t *values = (uint64_t *)v; + int i; + + for (i = 0; i < width; ++i) + { + WRITE (image, pixel++, + ((values[i] >> 32) & 0xc0000000) | + ((values[i] >> 38) & 0x3ff) | + ((values[i] >> 12) & 0xffc00) | + ((values[i] << 14) & 0x3ff00000)); } } -static FASTCALL void -fbStore_x2b10g10r10 (pixman_image_t *image, - uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_x2b10g10r10 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *v) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; + uint64_t *values = (uint64_t *)v; uint32_t *pixel = bits + x; - for (i = 0; i < width; ++i) { - WRITE(image, pixel++, - ((values[i] >> 38) & 0x3ff) | // R - ((values[i] >> 12) & 0xffc00) | // G - ((values[i] << 14) & 0x3ff00000)); // B + int i; + + for (i = 0; i < width; ++i) + { + WRITE (image, pixel++, + ((values[i] >> 38) & 0x3ff) | + ((values[i] >> 12) & 0xffc00) | + ((values[i] << 14) & 0x3ff00000)); } } -static FASTCALL void -fbStore_a8r8g8b8 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_a8r8g8b8 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - MEMCPY_WRAPPED(image, ((uint32_t *)bits) + x, values, width*sizeof(uint32_t)); + uint32_t *bits = image->bits + image->rowstride * y; + + MEMCPY_WRAPPED (image, ((uint32_t *)bits) + x, values, + width * sizeof(uint32_t)); } -static FASTCALL void -fbStore_x8r8g8b8 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_x8r8g8b8 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint32_t *pixel = (uint32_t *)bits + x; + int i; + for (i = 0; i < width; ++i) - WRITE(image, pixel++, values[i] & 0xffffff); + WRITE (image, pixel++, values[i] & 0xffffff); } -static FASTCALL void -fbStore_a8b8g8r8 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_a8b8g8r8 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint32_t *pixel = (uint32_t *)bits + x; + int i; + for (i = 0; i < width; ++i) - WRITE(image, pixel++, (values[i] & 0xff00ff00) | ((values[i] >> 16) & 0xff) | ((values[i] & 0xff) << 16)); + { + WRITE (image, pixel++, + (values[i] & 0xff00ff00) | + ((values[i] >> 16) & 0xff) | + ((values[i] & 0xff) << 16)); + } } -static FASTCALL void -fbStore_x8b8g8r8 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_x8b8g8r8 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint32_t *pixel = (uint32_t *)bits + x; + int i; + for (i = 0; i < width; ++i) - WRITE(image, pixel++, (values[i] & 0x0000ff00) | ((values[i] >> 16) & 0xff) | ((values[i] & 0xff) << 16)); + { + WRITE (image, pixel++, + (values[i] & 0x0000ff00) | + ((values[i] >> 16) & 0xff) | + ((values[i] & 0xff) << 16)); + } } -static FASTCALL void -fbStore_b8g8r8a8 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_b8g8r8a8 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint32_t *pixel = (uint32_t *)bits + x; + int i; + for (i = 0; i < width; ++i) - WRITE(image, pixel++, - ((values[i] >> 24) & 0x000000ff) | - ((values[i] >> 8) & 0x0000ff00) | - ((values[i] << 8) & 0x00ff0000) | - ((values[i] << 24) & 0xff000000)); + { + WRITE (image, pixel++, + ((values[i] >> 24) & 0x000000ff) | + ((values[i] >> 8) & 0x0000ff00) | + ((values[i] << 8) & 0x00ff0000) | + ((values[i] << 24) & 0xff000000)); + } } -static FASTCALL void -fbStore_b8g8r8x8 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_b8g8r8x8 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint32_t *pixel = (uint32_t *)bits + x; + int i; + for (i = 0; i < width; ++i) - WRITE(image, pixel++, - ((values[i] >> 8) & 0x0000ff00) | - ((values[i] << 8) & 0x00ff0000) | - ((values[i] << 24) & 0xff000000)); + { + WRITE (image, pixel++, + ((values[i] >> 8) & 0x0000ff00) | + ((values[i] << 8) & 0x00ff0000) | + ((values[i] << 24) & 0xff000000)); + } } -static FASTCALL void -fbStore_r8g8b8 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, - const pixman_indexed_t * indexed) +static void +store_scanline_r8g8b8 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { + uint32_t *bits = image->bits + image->rowstride * y; + uint8_t *pixel = ((uint8_t *) bits) + 3 * x; int i; - uint8_t *pixel = ((uint8_t *) bits) + 3*x; - for (i = 0; i < width; ++i) { - Store24(image, pixel, values[i]); - pixel += 3; + + for (i = 0; i < width; ++i) + { + uint32_t val = values[i]; + +#ifdef WORDS_BIGENDIAN + WRITE (image, pixel++, (val & 0x00ff0000) >> 16); + WRITE (image, pixel++, (val & 0x0000ff00) >> 8); + WRITE (image, pixel++, (val & 0x000000ff) >> 0); +#else + WRITE (image, pixel++, (val & 0x000000ff) >> 0); + WRITE (image, pixel++, (val & 0x0000ff00) >> 8); + WRITE (image, pixel++, (val & 0x00ff0000) >> 16); +#endif } } -static FASTCALL void -fbStore_b8g8r8 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_b8g8r8 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { + uint32_t *bits = image->bits + image->rowstride * y; + uint8_t *pixel = ((uint8_t *) bits) + 3 * x; int i; - uint8_t *pixel = ((uint8_t *) bits) + 3*x; - for (i = 0; i < width; ++i) { + + for (i = 0; i < width; ++i) + { uint32_t val = values[i]; -#if IMAGE_BYTE_ORDER == MSBFirst - WRITE(image, pixel++, Blue(val)); - WRITE(image, pixel++, Green(val)); - WRITE(image, pixel++, Red(val)); + +#ifdef WORDS_BIGENDIAN + WRITE (image, pixel++, (val & 0x000000ff) >> 0); + WRITE (image, pixel++, (val & 0x0000ff00) >> 8); + WRITE (image, pixel++, (val & 0x00ff0000) >> 16); #else - WRITE(image, pixel++, Red(val)); - WRITE(image, pixel++, Green(val)); - WRITE(image, pixel++, Blue(val)); + WRITE (image, pixel++, (val & 0x00ff0000) >> 16); + WRITE (image, pixel++, (val & 0x0000ff00) >> 8); + WRITE (image, pixel++, (val & 0x000000ff) >> 0); #endif } } -static FASTCALL void -fbStore_r5g6b5 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_r5g6b5 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint16_t *pixel = ((uint16_t *) bits) + x; - for (i = 0; i < width; ++i) { + int i; + + for (i = 0; i < width; ++i) + { uint32_t s = values[i]; - WRITE(image, pixel++, ((s >> 3) & 0x001f) | - ((s >> 5) & 0x07e0) | - ((s >> 8) & 0xf800)); + + WRITE (image, pixel++, + ((s >> 3) & 0x001f) | + ((s >> 5) & 0x07e0) | + ((s >> 8) & 0xf800)); } } -static FASTCALL void -fbStore_b5g6r5 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_b5g6r5 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint16_t *pixel = ((uint16_t *) bits) + x; - for (i = 0; i < width; ++i) { - Split(values[i]); - WRITE(image, pixel++, ((b << 8) & 0xf800) | - ((g << 3) & 0x07e0) | - ((r >> 3) )); + int i; + + for (i = 0; i < width; ++i) + { + SPLIT (values[i]); + + WRITE (image, pixel++, + ((b << 8) & 0xf800) | + ((g << 3) & 0x07e0) | + ((r >> 3) )); } } -static FASTCALL void -fbStore_a1r5g5b5 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_a1r5g5b5 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint16_t *pixel = ((uint16_t *) bits) + x; - for (i = 0; i < width; ++i) { - Splita(values[i]); - WRITE(image, pixel++, ((a << 8) & 0x8000) | - ((r << 7) & 0x7c00) | - ((g << 2) & 0x03e0) | - ((b >> 3) )); + int i; + + for (i = 0; i < width; ++i) + { + SPLIT_A (values[i]); + + WRITE (image, pixel++, + ((a << 8) & 0x8000) | + ((r << 7) & 0x7c00) | + ((g << 2) & 0x03e0) | + ((b >> 3) )); } } -static FASTCALL void -fbStore_x1r5g5b5 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_x1r5g5b5 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint16_t *pixel = ((uint16_t *) bits) + x; - for (i = 0; i < width; ++i) { - Split(values[i]); - WRITE(image, pixel++, ((r << 7) & 0x7c00) | - ((g << 2) & 0x03e0) | - ((b >> 3) )); + int i; + + for (i = 0; i < width; ++i) + { + SPLIT (values[i]); + + WRITE (image, pixel++, + ((r << 7) & 0x7c00) | + ((g << 2) & 0x03e0) | + ((b >> 3) )); } } -static FASTCALL void -fbStore_a1b5g5r5 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_a1b5g5r5 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint16_t *pixel = ((uint16_t *) bits) + x; - for (i = 0; i < width; ++i) { - Splita(values[i]); - WRITE(image, pixel++, ((a << 8) & 0x8000) | - ((b << 7) & 0x7c00) | - ((g << 2) & 0x03e0) | - ((r >> 3) )); + int i; + + for (i = 0; i < width; ++i) + { + SPLIT_A (values[i]); + + WRITE (image, pixel++, + ((a << 8) & 0x8000) | + ((b << 7) & 0x7c00) | + ((g << 2) & 0x03e0) | + ((r >> 3) )); } } -static FASTCALL void -fbStore_x1b5g5r5 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_x1b5g5r5 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint16_t *pixel = ((uint16_t *) bits) + x; - for (i = 0; i < width; ++i) { - Split(values[i]); - WRITE(image, pixel++, ((b << 7) & 0x7c00) | - ((g << 2) & 0x03e0) | - ((r >> 3) )); + int i; + + for (i = 0; i < width; ++i) + { + SPLIT (values[i]); + + WRITE (image, pixel++, ((b << 7) & 0x7c00) | + ((g << 2) & 0x03e0) | + ((r >> 3) )); } } -static FASTCALL void -fbStore_a4r4g4b4 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_a4r4g4b4 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint16_t *pixel = ((uint16_t *) bits) + x; - for (i = 0; i < width; ++i) { - Splita(values[i]); - WRITE(image, pixel++, ((a << 8) & 0xf000) | - ((r << 4) & 0x0f00) | - ((g ) & 0x00f0) | - ((b >> 4) )); + int i; + + for (i = 0; i < width; ++i) + { + SPLIT_A (values[i]); + + WRITE (image, pixel++, + ((a << 8) & 0xf000) | + ((r << 4) & 0x0f00) | + ((g ) & 0x00f0) | + ((b >> 4) )); } } -static FASTCALL void -fbStore_x4r4g4b4 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_x4r4g4b4 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint16_t *pixel = ((uint16_t *) bits) + x; - for (i = 0; i < width; ++i) { - Split(values[i]); - WRITE(image, pixel++, ((r << 4) & 0x0f00) | - ((g ) & 0x00f0) | - ((b >> 4) )); + int i; + + for (i = 0; i < width; ++i) + { + SPLIT (values[i]); + + WRITE (image, pixel++, + ((r << 4) & 0x0f00) | + ((g ) & 0x00f0) | + ((b >> 4) )); } } -static FASTCALL void -fbStore_a4b4g4r4 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_a4b4g4r4 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint16_t *pixel = ((uint16_t *) bits) + x; - for (i = 0; i < width; ++i) { - Splita(values[i]); - WRITE(image, pixel++, ((a << 8) & 0xf000) | - ((b << 4) & 0x0f00) | - ((g ) & 0x00f0) | - ((r >> 4) )); + int i; + + for (i = 0; i < width; ++i) + { + SPLIT_A (values[i]); + WRITE (image, pixel++, ((a << 8) & 0xf000) | + ((b << 4) & 0x0f00) | + ((g ) & 0x00f0) | + ((r >> 4) )); } } -static FASTCALL void -fbStore_x4b4g4r4 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_x4b4g4r4 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint16_t *pixel = ((uint16_t *) bits) + x; - for (i = 0; i < width; ++i) { - Split(values[i]); - WRITE(image, pixel++, ((b << 4) & 0x0f00) | - ((g ) & 0x00f0) | - ((r >> 4) )); + int i; + + for (i = 0; i < width; ++i) + { + SPLIT (values[i]); + + WRITE (image, pixel++, + ((b << 4) & 0x0f00) | + ((g ) & 0x00f0) | + ((r >> 4) )); } } -static FASTCALL void -fbStore_a8 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_a8 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint8_t *pixel = ((uint8_t *) bits) + x; - for (i = 0; i < width; ++i) { - WRITE(image, pixel++, values[i] >> 24); + int i; + + for (i = 0; i < width; ++i) + { + WRITE (image, pixel++, values[i] >> 24); } } -static FASTCALL void -fbStore_r3g3b2 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_r3g3b2 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint8_t *pixel = ((uint8_t *) bits) + x; - for (i = 0; i < width; ++i) { - Split(values[i]); - WRITE(image, pixel++, - ((r ) & 0xe0) | - ((g >> 3) & 0x1c) | - ((b >> 6) )); + int i; + + for (i = 0; i < width; ++i) + { + SPLIT (values[i]); + + WRITE (image, pixel++, + ((r ) & 0xe0) | + ((g >> 3) & 0x1c) | + ((b >> 6) )); } } -static FASTCALL void -fbStore_b2g3r3 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_b2g3r3 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint8_t *pixel = ((uint8_t *) bits) + x; - for (i = 0; i < width; ++i) { - Split(values[i]); - WRITE(image, pixel++, - ((b ) & 0xc0) | - ((g >> 2) & 0x38) | - ((r >> 5) )); + int i; + + for (i = 0; i < width; ++i) + { + SPLIT (values[i]); + + WRITE (image, pixel++, + ((b ) & 0xc0) | + ((g >> 2) & 0x38) | + ((r >> 5) )); } } -static FASTCALL void -fbStore_a2r2g2b2 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_a2r2g2b2 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint8_t *pixel = ((uint8_t *) bits) + x; - for (i = 0; i < width; ++i) { - Splita(values[i]); - WRITE(image, pixel++, ((a ) & 0xc0) | - ((r >> 2) & 0x30) | - ((g >> 4) & 0x0c) | - ((b >> 6) )); + int i; + + for (i = 0; i < width; ++i) + { + SPLIT_A (values[i]); + + WRITE (image, pixel++, + ((a ) & 0xc0) | + ((r >> 2) & 0x30) | + ((g >> 4) & 0x0c) | + ((b >> 6) )); } } -static FASTCALL void -fbStore_c8 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_a2b2g2r2 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint8_t *pixel = ((uint8_t *) bits) + x; - for (i = 0; i < width; ++i) { - WRITE(image, pixel++, miIndexToEnt24(indexed,values[i])); - } + int i; + + for (i = 0; i < width; ++i) + { + SPLIT_A (values[i]); + + *(pixel++) = + ((a ) & 0xc0) | + ((b >> 2) & 0x30) | + ((g >> 4) & 0x0c) | + ((r >> 6) ); + } +} + +static void +store_scanline_c8 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) +{ + uint32_t *bits = image->bits + image->rowstride * y; + uint8_t *pixel = ((uint8_t *) bits) + x; + const pixman_indexed_t *indexed = image->indexed; + int i; + + for (i = 0; i < width; ++i) + WRITE (image, pixel++, RGB24_TO_ENTRY (indexed,values[i])); } -static FASTCALL void -fbStore_x4a4 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_x4a4 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { - int i; + uint32_t *bits = image->bits + image->rowstride * y; uint8_t *pixel = ((uint8_t *) bits) + x; - for (i = 0; i < width; ++i) { - WRITE(image, pixel++, values[i] >> 28); - } + int i; + + for (i = 0; i < width; ++i) + WRITE (image, pixel++, values[i] >> 28); } -#define Store8(img,l,o,v) (WRITE(img, (uint8_t *)(l) + ((o) >> 3), (v))) -#if IMAGE_BYTE_ORDER == MSBFirst -#define Store4(img,l,o,v) Store8(img,l,o,((o) & 4 ? \ - (Fetch8(img,l,o) & 0xf0) | (v) : \ - (Fetch8(img,l,o) & 0x0f) | ((v) << 4))) +#define STORE_8(img,l,o,v) (WRITE (img, (uint8_t *)(l) + ((o) >> 3), (v))) +#ifdef WORDS_BIGENDIAN +#define STORE_4(img,l,o,v) \ + STORE_8 (img,l,o,((o) & 4 ? \ + (FETCH_8 (img,l,o) & 0xf0) | (v) : \ + (FETCH_8 (img,l,o) & 0x0f) | ((v) << 4))) #else -#define Store4(img,l,o,v) Store8(img,l,o,((o) & 4 ? \ - (Fetch8(img,l,o) & 0x0f) | ((v) << 4) : \ - (Fetch8(img,l,o) & 0xf0) | (v))) +#define STORE_4(img,l,o,v) \ + STORE_8 (img,l,o,((o) & 4 ? \ + (FETCH_8 (img,l,o) & 0x0f) | ((v) << 4) : \ + (FETCH_8 (img,l,o) & 0xf0) | (v))) #endif -static FASTCALL void -fbStore_a4 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_a4 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { + uint32_t *bits = image->bits + image->rowstride * y; int i; - for (i = 0; i < width; ++i) { - Store4(image, bits, i + x, values[i]>>28); - } + + for (i = 0; i < width; ++i) + STORE_4 (image, bits, i + x, values[i] >> 28); } -static FASTCALL void -fbStore_r1g2b1 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_r1g2b1 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { + uint32_t *bits = image->bits + image->rowstride * y; int i; - for (i = 0; i < width; ++i) { - uint32_t pixel; - - Split(values[i]); + + for (i = 0; i < width; ++i) + { + uint32_t pixel; + + SPLIT (values[i]); pixel = (((r >> 4) & 0x8) | - ((g >> 5) & 0x6) | - ((b >> 7) )); - Store4(image, bits, i + x, pixel); + ((g >> 5) & 0x6) | + ((b >> 7) )); + STORE_4 (image, bits, i + x, pixel); } } -static FASTCALL void -fbStore_b1g2r1 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_b1g2r1 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { + uint32_t *bits = image->bits + image->rowstride * y; int i; - for (i = 0; i < width; ++i) { - uint32_t pixel; - - Split(values[i]); + + for (i = 0; i < width; ++i) + { + uint32_t pixel; + + SPLIT (values[i]); pixel = (((b >> 4) & 0x8) | - ((g >> 5) & 0x6) | - ((r >> 7) )); - Store4(image, bits, i + x, pixel); + ((g >> 5) & 0x6) | + ((r >> 7) )); + STORE_4 (image, bits, i + x, pixel); } } -static FASTCALL void -fbStore_a1r1g1b1 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_a1r1g1b1 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { + uint32_t *bits = image->bits + image->rowstride * y; int i; - for (i = 0; i < width; ++i) { - uint32_t pixel; - Splita(values[i]); + + for (i = 0; i < width; ++i) + { + uint32_t pixel; + + SPLIT_A (values[i]); pixel = (((a >> 4) & 0x8) | - ((r >> 5) & 0x4) | - ((g >> 6) & 0x2) | - ((b >> 7) )); - Store4(image, bits, i + x, pixel); + ((r >> 5) & 0x4) | + ((g >> 6) & 0x2) | + ((b >> 7) )); + STORE_4 (image, bits, i + x, pixel); } } -static FASTCALL void -fbStore_a1b1g1r1 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_a1b1g1r1 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { + uint32_t *bits = image->bits + image->rowstride * y; int i; - for (i = 0; i < width; ++i) { - uint32_t pixel; - Splita(values[i]); + + for (i = 0; i < width; ++i) + { + uint32_t pixel; + + SPLIT_A (values[i]); pixel = (((a >> 4) & 0x8) | - ((b >> 5) & 0x4) | - ((g >> 6) & 0x2) | - ((r >> 7) )); - Store4(image, bits, i + x, pixel); + ((b >> 5) & 0x4) | + ((g >> 6) & 0x2) | + ((r >> 7) )); + STORE_4 (image, bits, i + x, pixel); } } -static FASTCALL void -fbStore_c4 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_c4 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { + uint32_t *bits = image->bits + image->rowstride * y; + const pixman_indexed_t *indexed = image->indexed; int i; - for (i = 0; i < width; ++i) { - uint32_t pixel; - - pixel = miIndexToEnt24(indexed, values[i]); - Store4(image, bits, i + x, pixel); + + for (i = 0; i < width; ++i) + { + uint32_t pixel; + + pixel = RGB24_TO_ENTRY (indexed, values[i]); + STORE_4 (image, bits, i + x, pixel); } } -static FASTCALL void -fbStore_a1 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_a1 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { + uint32_t *bits = image->bits + image->rowstride * y; int i; - for (i = 0; i < width; ++i) { - uint32_t *pixel = ((uint32_t *) bits) + ((i+x) >> 5); - uint32_t mask = FbStipMask((i+x) & 0x1f, 1); - - uint32_t v = values[i] & 0x80000000 ? mask : 0; - WRITE(image, pixel, (READ(image, pixel) & ~mask) | v); + + for (i = 0; i < width; ++i) + { + uint32_t *pixel = ((uint32_t *) bits) + ((i + x) >> 5); + uint32_t mask, v; + +#ifdef WORDS_BIGENDIAN + mask = 1 << (0x1f - ((i + x) & 0x1f)); +#else + mask = 1 << ((i + x) & 0x1f); +#endif + v = values[i] & 0x80000000 ? mask : 0; + + WRITE (image, pixel, (READ (image, pixel) & ~mask) | v); } } -static FASTCALL void -fbStore_g1 (pixman_image_t *image, - uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed) +static void +store_scanline_g1 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) { + uint32_t *bits = image->bits + image->rowstride * y; + const pixman_indexed_t *indexed = image->indexed; int i; - for (i = 0; i < width; ++i) { - uint32_t *pixel = ((uint32_t *) bits) + ((i+x) >> 5); - uint32_t mask = FbStipMask((i+x) & 0x1f, 1); - - uint32_t v = miIndexToEntY24(indexed,values[i]) ? mask : 0; - WRITE(image, pixel, (READ(image, pixel) & ~mask) | v); - } -} - - -storeProc32 ACCESS(pixman_storeProcForPicture32) (bits_image_t * pict) -{ - switch(pict->format) { - case PIXMAN_a8r8g8b8: return fbStore_a8r8g8b8; - case PIXMAN_x8r8g8b8: return fbStore_x8r8g8b8; - case PIXMAN_a8b8g8r8: return fbStore_a8b8g8r8; - case PIXMAN_x8b8g8r8: return fbStore_x8b8g8r8; - case PIXMAN_b8g8r8a8: return fbStore_b8g8r8a8; - case PIXMAN_b8g8r8x8: return fbStore_b8g8r8x8; - - /* 24bpp formats */ - case PIXMAN_r8g8b8: return fbStore_r8g8b8; - case PIXMAN_b8g8r8: return fbStore_b8g8r8; - - /* 16bpp formats */ - case PIXMAN_r5g6b5: return fbStore_r5g6b5; - case PIXMAN_b5g6r5: return fbStore_b5g6r5; - - case PIXMAN_a1r5g5b5: return fbStore_a1r5g5b5; - case PIXMAN_x1r5g5b5: return fbStore_x1r5g5b5; - case PIXMAN_a1b5g5r5: return fbStore_a1b5g5r5; - case PIXMAN_x1b5g5r5: return fbStore_x1b5g5r5; - case PIXMAN_a4r4g4b4: return fbStore_a4r4g4b4; - case PIXMAN_x4r4g4b4: return fbStore_x4r4g4b4; - case PIXMAN_a4b4g4r4: return fbStore_a4b4g4r4; - case PIXMAN_x4b4g4r4: return fbStore_x4b4g4r4; - - /* 8bpp formats */ - case PIXMAN_a8: return fbStore_a8; - case PIXMAN_r3g3b2: return fbStore_r3g3b2; - case PIXMAN_b2g3r3: return fbStore_b2g3r3; - case PIXMAN_a2r2g2b2: return fbStore_a2r2g2b2; - case PIXMAN_c8: return fbStore_c8; - case PIXMAN_g8: return fbStore_c8; - case PIXMAN_x4a4: return fbStore_x4a4; - - /* 4bpp formats */ - case PIXMAN_a4: return fbStore_a4; - case PIXMAN_r1g2b1: return fbStore_r1g2b1; - case PIXMAN_b1g2r1: return fbStore_b1g2r1; - case PIXMAN_a1r1g1b1: return fbStore_a1r1g1b1; - case PIXMAN_a1b1g1r1: return fbStore_a1b1g1r1; - case PIXMAN_c4: return fbStore_c4; - case PIXMAN_g4: return fbStore_c4; - - /* 1bpp formats */ - case PIXMAN_a1: return fbStore_a1; - case PIXMAN_g1: return fbStore_g1; - default: - return NULL; + + for (i = 0; i < width; ++i) + { + uint32_t *pixel = ((uint32_t *) bits) + ((i + x) >> 5); + uint32_t mask, v; + +#ifdef WORDS_BIGENDIAN + mask = 1 << (0x1f - ((i + x) & 0x1f)); +#else + mask = 1 << ((i + x) & 0x1f); +#endif + v = RGB24_TO_ENTRY_Y (indexed, values[i]) ? mask : 0; + + WRITE (image, pixel, (READ (image, pixel) & ~mask) | v); } } /* * Contracts a 64bpp image to 32bpp and then stores it using a regular 32-bit - * store proc. + * store proc. Despite the type, this function expects a uint64_t buffer. */ -static FASTCALL void -fbStore64_generic (pixman_image_t *image, - uint32_t *bits, const uint64_t *values, int x, int width, const pixman_indexed_t * indexed) -{ - bits_image_t *pict = (bits_image_t*)image; - storeProc32 store32 = ACCESS(pixman_storeProcForPicture32) (pict); - uint32_t *argb8Pixels; - - assert(image->common.type == BITS); - assert(store32); - - argb8Pixels = pixman_malloc_ab (width, sizeof(uint32_t)); - if (!argb8Pixels) return; - - // Contract the scanline. We could do this in place if values weren't - // const. - pixman_contract(argb8Pixels, values, width); - store32(image, bits, argb8Pixels, x, width, indexed); - - free(argb8Pixels); +static void +store_scanline_generic_64 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values) +{ + uint32_t *argb8_pixels; + + assert (image->common.type == BITS); + + argb8_pixels = pixman_malloc_ab (width, sizeof(uint32_t)); + if (!argb8_pixels) + return; + + /* Contract the scanline. We could do this in place if values weren't + * const. + */ + pixman_contract (argb8_pixels, (uint64_t *)values, width); + + image->store_scanline_raw_32 (image, x, y, width, argb8_pixels); + + free (argb8_pixels); } -storeProc64 ACCESS(pixman_storeProcForPicture64) (bits_image_t * pict) -{ - switch(pict->format) { - case PIXMAN_a2b10g10r10: return fbStore_a2b10g10r10; - case PIXMAN_x2b10g10r10: return fbStore_x2b10g10r10; - default: return fbStore64_generic; - } +/* Despite the type, this function expects both buffer + * and mask to be uint64_t + */ +static void +fetch_scanline_generic_64 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) +{ + /* Fetch the pixels into the first half of buffer and then expand them in + * place. + */ + image->bits.fetch_scanline_raw_32 (image, x, y, width, buffer, NULL, 0); + + pixman_expand ((uint64_t *)buffer, buffer, image->bits.format, width); } -#ifndef PIXMAN_FB_ACCESSORS -/* - * Helper routine to expand a color component from 0 < n <= 8 bits to 16 bits by - * replication. - */ -static inline uint64_t expand16(const uint8_t val, int nbits) +/* Despite the type, this function expects a uint64_t *buffer */ +static uint64_t +fetch_pixel_generic_64 (bits_image_t *image, + int offset, + int line) { - // Start out with the high bit of val in the high bit of result. - uint16_t result = (uint16_t)val << (16 - nbits); - - if (nbits == 0) - return 0; - - // Copy the bits in result, doubling the number of bits each time, until we - // fill all 16 bits. - while (nbits < 16) { - result |= result >> nbits; - nbits *= 2; - } + uint32_t pixel32 = image->fetch_pixel_raw_32 (image, offset, line); + uint64_t result; + + pixman_expand ((uint64_t *)&result, &pixel32, image->format, 1); return result; } /* - * This function expands images from ARGB8 format to ARGB16. To preserve - * precision, it needs to know the original source format. For example, if the - * source was PIXMAN_x1r5g5b5 and the red component contained bits 12345, then - * the expanded value is 12345123. To correctly expand this to 16 bits, it - * should be 1234512345123451 and not 1234512312345123. + * XXX: The transformed fetch path only works at 32-bpp so far. When all + * paths have wide versions, this can be removed. + * + * WARNING: This function loses precision! */ -void pixman_expand(uint64_t *dst, const uint32_t *src, - pixman_format_code_t format, int width) +static uint32_t +fetch_pixel_generic_lossy_32 (bits_image_t *image, + int offset, + int line) { - /* - * Determine the sizes of each component and the masks and shifts required - * to extract them from the source pixel. - */ - const int a_size = PIXMAN_FORMAT_A(format), - r_size = PIXMAN_FORMAT_R(format), - g_size = PIXMAN_FORMAT_G(format), - b_size = PIXMAN_FORMAT_B(format); - const int a_shift = 32 - a_size, - r_shift = 24 - r_size, - g_shift = 16 - g_size, - b_shift = 8 - b_size; - const uint8_t a_mask = ~(~0 << a_size), - r_mask = ~(~0 << r_size), - g_mask = ~(~0 << g_size), - b_mask = ~(~0 << b_size); - int i; + uint64_t pixel64 = image->fetch_pixel_raw_64 (image, offset, line); + uint32_t result; + + pixman_contract (&result, &pixel64, 1); - /* Start at the end so that we can do the expansion in place when src == dst */ - for (i = width - 1; i >= 0; i--) - { - const uint32_t pixel = src[i]; - // Extract the components. - const uint8_t a = (pixel >> a_shift) & a_mask, - r = (pixel >> r_shift) & r_mask, - g = (pixel >> g_shift) & g_mask, - b = (pixel >> b_shift) & b_mask; - const uint64_t a16 = a_size ? expand16(a, a_size) : 0xffff, - r16 = expand16(r, r_size), - g16 = expand16(g, g_size), - b16 = expand16(b, b_size); + return result; +} - dst[i] = a16 << 48 | r16 << 32 | g16 << 16 | b16; +typedef struct +{ + pixman_format_code_t format; + fetch_scanline_t fetch_scanline_raw_32; + fetch_scanline_t fetch_scanline_raw_64; + fetch_pixel_32_t fetch_pixel_raw_32; + fetch_pixel_64_t fetch_pixel_raw_64; + store_scanline_t store_scanline_raw_32; + store_scanline_t store_scanline_raw_64; +} format_info_t; + +#define FORMAT_INFO(format) \ + { \ + PIXMAN_ ## format, \ + fetch_scanline_ ## format, \ + fetch_scanline_generic_64, \ + fetch_pixel_ ## format, fetch_pixel_generic_64, \ + store_scanline_ ## format, store_scanline_generic_64 \ + } + +static const format_info_t accessors[] = +{ +/* 32 bpp formats */ + FORMAT_INFO (a8r8g8b8), + FORMAT_INFO (x8r8g8b8), + FORMAT_INFO (a8b8g8r8), + FORMAT_INFO (x8b8g8r8), + FORMAT_INFO (b8g8r8a8), + FORMAT_INFO (b8g8r8x8), + +/* 24bpp formats */ + FORMAT_INFO (r8g8b8), + FORMAT_INFO (b8g8r8), + +/* 16bpp formats */ + FORMAT_INFO (r5g6b5), + FORMAT_INFO (b5g6r5), + + FORMAT_INFO (a1r5g5b5), + FORMAT_INFO (x1r5g5b5), + FORMAT_INFO (a1b5g5r5), + FORMAT_INFO (x1b5g5r5), + FORMAT_INFO (a4r4g4b4), + FORMAT_INFO (x4r4g4b4), + FORMAT_INFO (a4b4g4r4), + FORMAT_INFO (x4b4g4r4), + +/* 8bpp formats */ + FORMAT_INFO (a8), + FORMAT_INFO (r3g3b2), + FORMAT_INFO (b2g3r3), + FORMAT_INFO (a2r2g2b2), + FORMAT_INFO (a2b2g2r2), + + FORMAT_INFO (c8), + +#define fetch_scanline_g8 fetch_scanline_c8 +#define fetch_pixel_g8 fetch_pixel_c8 +#define store_scanline_g8 store_scanline_c8 + FORMAT_INFO (g8), + +#define fetch_scanline_x4c4 fetch_scanline_c8 +#define fetch_pixel_x4c4 fetch_pixel_c8 +#define store_scanline_x4c4 store_scanline_c8 + FORMAT_INFO (x4c4), + +#define fetch_scanline_x4g4 fetch_scanline_c8 +#define fetch_pixel_x4g4 fetch_pixel_c8 +#define store_scanline_x4g4 store_scanline_c8 + FORMAT_INFO (x4g4), + + FORMAT_INFO (x4a4), + +/* 4bpp formats */ + FORMAT_INFO (a4), + FORMAT_INFO (r1g2b1), + FORMAT_INFO (b1g2r1), + FORMAT_INFO (a1r1g1b1), + FORMAT_INFO (a1b1g1r1), + + FORMAT_INFO (c4), + +#define fetch_scanline_g4 fetch_scanline_c4 +#define fetch_pixel_g4 fetch_pixel_c4 +#define store_scanline_g4 store_scanline_c4 + FORMAT_INFO (g4), + +/* 1bpp formats */ + FORMAT_INFO (a1), + FORMAT_INFO (g1), + +/* Wide formats */ + + { PIXMAN_a2r10g10b10, + NULL, fetch_scanline_a2r10g10b10, + fetch_pixel_generic_lossy_32, fetch_pixel_a2r10g10b10, + NULL, store_scanline_a2r10g10b10 }, + + { PIXMAN_x2r10g10b10, + NULL, fetch_scanline_x2r10g10b10, + fetch_pixel_generic_lossy_32, fetch_pixel_x2r10g10b10, + NULL, store_scanline_x2r10g10b10 }, + + { PIXMAN_a2b10g10r10, + NULL, fetch_scanline_a2b10g10r10, + fetch_pixel_generic_lossy_32, fetch_pixel_a2b10g10r10, + NULL, store_scanline_a2b10g10r10 }, + + { PIXMAN_x2b10g10r10, + NULL, fetch_scanline_x2b10g10r10, + fetch_pixel_generic_lossy_32, fetch_pixel_x2b10g10r10, + NULL, store_scanline_x2b10g10r10 }, + +/* YUV formats */ + { PIXMAN_yuy2, + fetch_scanline_yuy2, fetch_scanline_generic_64, + fetch_pixel_yuy2, fetch_pixel_generic_64, + NULL, NULL }, + + { PIXMAN_yv12, + fetch_scanline_yv12, fetch_scanline_generic_64, + fetch_pixel_yv12, fetch_pixel_generic_64, + NULL, NULL }, + + { PIXMAN_null }, +}; + +static void +setup_accessors (bits_image_t *image) +{ + const format_info_t *info = accessors; + + while (info->format != PIXMAN_null) + { + if (info->format == image->format) + { + image->fetch_scanline_raw_32 = info->fetch_scanline_raw_32; + image->fetch_scanline_raw_64 = info->fetch_scanline_raw_64; + image->fetch_pixel_raw_32 = info->fetch_pixel_raw_32; + image->fetch_pixel_raw_64 = info->fetch_pixel_raw_64; + image->store_scanline_raw_32 = info->store_scanline_raw_32; + image->store_scanline_raw_64 = info->store_scanline_raw_64; + + return; + } + + info++; } } -/* - * Contracting is easier than expanding. We just need to truncate the - * components. - */ -void pixman_contract(uint32_t *dst, const uint64_t *src, int width) +#ifndef PIXMAN_FB_ACCESSORS +void +_pixman_bits_image_setup_raw_accessors_accessors (bits_image_t *image); + +void +_pixman_bits_image_setup_raw_accessors (bits_image_t *image) { - int i; + if (image->read_func || image->write_func) + _pixman_bits_image_setup_raw_accessors_accessors (image); + else + setup_accessors (image); +} - /* Start at the beginning so that we can do the contraction in place when - * src == dst */ - for (i = 0; i < width; i++) - { - const uint8_t a = src[i] >> 56, - r = src[i] >> 40, - g = src[i] >> 24, - b = src[i] >> 8; - dst[i] = a << 24 | r << 16 | g << 8 | b; - } +#else + +void +_pixman_bits_image_setup_raw_accessors_accessors (bits_image_t *image) +{ + setup_accessors (image); } -#endif // PIXMAN_FB_ACCESSORS + +#endif diff --git a/lib/pixman/pixman/pixman-accessor.h b/lib/pixman/pixman/pixman-accessor.h new file mode 100644 index 000000000..90c8ea7b7 --- /dev/null +++ b/lib/pixman/pixman/pixman-accessor.h @@ -0,0 +1,40 @@ +#ifdef PIXMAN_FB_ACCESSORS + +#define ACCESS(sym) sym##_accessors + +#define READ(img, ptr) \ + (((bits_image_t *)(img))->read_func ((ptr), sizeof(*(ptr)))) +#define WRITE(img, ptr,val) \ + (((bits_image_t *)(img))->write_func ((ptr), (val), sizeof (*(ptr)))) + +#define MEMCPY_WRAPPED(img, dst, src, size) \ + do { \ + size_t _i; \ + uint8_t *_dst = (uint8_t*)(dst), *_src = (uint8_t*)(src); \ + for(_i = 0; _i < size; _i++) { \ + WRITE((img), _dst +_i, READ((img), _src + _i)); \ + } \ + } while (0) + +#define MEMSET_WRAPPED(img, dst, val, size) \ + do { \ + size_t _i; \ + uint8_t *_dst = (uint8_t*)(dst); \ + for(_i = 0; _i < (size_t) size; _i++) { \ + WRITE((img), _dst +_i, (val)); \ + } \ + } while (0) + +#else + +#define ACCESS(sym) sym + +#define READ(img, ptr) (*(ptr)) +#define WRITE(img, ptr, val) (*(ptr) = (val)) +#define MEMCPY_WRAPPED(img, dst, src, size) \ + memcpy(dst, src, size) +#define MEMSET_WRAPPED(img, dst, val, size) \ + memset(dst, val, size) + +#endif + diff --git a/lib/pixman/pixman/pixman-arm-neon.c b/lib/pixman/pixman/pixman-arm-neon.c index 5453dbbaf..8a2d72ea3 100644 --- a/lib/pixman/pixman/pixman-arm-neon.c +++ b/lib/pixman/pixman/pixman-arm-neon.c @@ -1,5 +1,5 @@ /* - * Copyright © 2009 ARM Ltd + * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that @@ -20,7 +20,9 @@ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS * SOFTWARE. * - * Author: Ian Rickards (ian.rickards@arm.com) + * Author: Ian Rickards (ian.rickards@arm.com) + * Author: Jonathan Morton (jonathan.morton@movial.com) + * Author: Markku Vire (markku.vire@movial.com) * */ @@ -28,1523 +30,2752 @@ #include <config.h> #endif -#include "pixman-arm-neon.h" - #include <arm_neon.h> +#include <string.h> +#include "pixman-private.h" +/* Deal with an intrinsic that is defined differently in GCC */ +#if !defined(__ARMCC_VERSION) && !defined(__pld) +#define __pld(_x) __builtin_prefetch (_x) +#endif -static force_inline uint8x8x4_t unpack0565(uint16x8_t rgb) +static force_inline uint8x8x4_t +unpack0565 (uint16x8_t rgb) { uint16x8_t gb, b; uint8x8x4_t res; - res.val[3] = vdup_n_u8(0); - gb = vshrq_n_u16(rgb, 5); - b = vshrq_n_u16(rgb, 5+6); - res.val[0] = vmovn_u16(rgb); // get low 5 bits - res.val[1] = vmovn_u16(gb); // get mid 6 bits - res.val[2] = vmovn_u16(b); // get top 5 bits + res.val[3] = vdup_n_u8 (0); + gb = vshrq_n_u16 (rgb, 5); + b = vshrq_n_u16 (rgb, 5 + 6); - res.val[0] = vshl_n_u8(res.val[0], 3); // shift to top - res.val[1] = vshl_n_u8(res.val[1], 2); // shift to top - res.val[2] = vshl_n_u8(res.val[2], 3); // shift to top + res.val[0] = vmovn_u16 (rgb); /* get low 5 bits */ + res.val[1] = vmovn_u16 (gb); /* get mid 6 bits */ + res.val[2] = vmovn_u16 (b); /* get top 5 bits */ - res.val[0] = vsri_n_u8(res.val[0], res.val[0], 5); - res.val[1] = vsri_n_u8(res.val[1], res.val[1], 6); - res.val[2] = vsri_n_u8(res.val[2], res.val[2], 5); + res.val[0] = vshl_n_u8 (res.val[0], 3); /* shift to top */ + res.val[1] = vshl_n_u8 (res.val[1], 2); /* shift to top */ + res.val[2] = vshl_n_u8 (res.val[2], 3); /* shift to top */ + + res.val[0] = vsri_n_u8 (res.val[0], res.val[0], 5); + res.val[1] = vsri_n_u8 (res.val[1], res.val[1], 6); + res.val[2] = vsri_n_u8 (res.val[2], res.val[2], 5); return res; } -static force_inline uint16x8_t pack0565(uint8x8x4_t s) +#ifdef USE_GCC_INLINE_ASM +/* Some versions of gcc have problems with vshll_n_u8 intrinsic (Bug 23576) */ +#define vshll_n_u8(a, n) ({ uint16x8_t r; \ + asm ("vshll.u8 %q0, %P1, %2\n" : "=w" (r) : "w" (a), "i" (n)); r; }) +#endif + +static force_inline uint16x8_t +pack0565 (uint8x8x4_t s) { uint16x8_t rgb, val_g, val_r; - rgb = vshll_n_u8(s.val[2],8); - val_g = vshll_n_u8(s.val[1],8); - val_r = vshll_n_u8(s.val[0],8); - rgb = vsriq_n_u16(rgb, val_g, 5); - rgb = vsriq_n_u16(rgb, val_r, 5+6); + rgb = vshll_n_u8 (s.val[2], 8); + val_g = vshll_n_u8 (s.val[1], 8); + val_r = vshll_n_u8 (s.val[0], 8); + rgb = vsriq_n_u16 (rgb, val_g, 5); + rgb = vsriq_n_u16 (rgb, val_r, 5 + 6); return rgb; } -static force_inline uint8x8_t neon2mul(uint8x8_t x, uint8x8_t alpha) +static force_inline uint8x8_t +neon2mul (uint8x8_t x, + uint8x8_t alpha) { - uint16x8_t tmp,tmp2; + uint16x8_t tmp, tmp2; uint8x8_t res; - tmp = vmull_u8(x,alpha); - tmp2 = vrshrq_n_u16(tmp,8); - res = vraddhn_u16(tmp,tmp2); + tmp = vmull_u8 (x, alpha); + tmp2 = vrshrq_n_u16 (tmp, 8); + res = vraddhn_u16 (tmp, tmp2); return res; } -static force_inline uint8x8x4_t neon8mul(uint8x8x4_t x, uint8x8_t alpha) +static force_inline uint8x8x4_t +neon8mul (uint8x8x4_t x, + uint8x8_t alpha) { uint16x8x4_t tmp; uint8x8x4_t res; - uint16x8_t qtmp1,qtmp2; - - tmp.val[0] = vmull_u8(x.val[0],alpha); - tmp.val[1] = vmull_u8(x.val[1],alpha); - tmp.val[2] = vmull_u8(x.val[2],alpha); - tmp.val[3] = vmull_u8(x.val[3],alpha); - - qtmp1 = vrshrq_n_u16(tmp.val[0],8); - qtmp2 = vrshrq_n_u16(tmp.val[1],8); - res.val[0] = vraddhn_u16(tmp.val[0],qtmp1); - qtmp1 = vrshrq_n_u16(tmp.val[2],8); - res.val[1] = vraddhn_u16(tmp.val[1],qtmp2); - qtmp2 = vrshrq_n_u16(tmp.val[3],8); - res.val[2] = vraddhn_u16(tmp.val[2],qtmp1); - res.val[3] = vraddhn_u16(tmp.val[3],qtmp2); + uint16x8_t qtmp1, qtmp2; + + tmp.val[0] = vmull_u8 (x.val[0], alpha); + tmp.val[1] = vmull_u8 (x.val[1], alpha); + tmp.val[2] = vmull_u8 (x.val[2], alpha); + tmp.val[3] = vmull_u8 (x.val[3], alpha); + + qtmp1 = vrshrq_n_u16 (tmp.val[0], 8); + qtmp2 = vrshrq_n_u16 (tmp.val[1], 8); + res.val[0] = vraddhn_u16 (tmp.val[0], qtmp1); + qtmp1 = vrshrq_n_u16 (tmp.val[2], 8); + res.val[1] = vraddhn_u16 (tmp.val[1], qtmp2); + qtmp2 = vrshrq_n_u16 (tmp.val[3], 8); + res.val[2] = vraddhn_u16 (tmp.val[2], qtmp1); + res.val[3] = vraddhn_u16 (tmp.val[3], qtmp2); return res; } -static force_inline uint8x8x4_t neon8qadd(uint8x8x4_t x, uint8x8x4_t y) +static force_inline uint8x8x4_t +neon8qadd (uint8x8x4_t x, + uint8x8x4_t y) { uint8x8x4_t res; - res.val[0] = vqadd_u8(x.val[0],y.val[0]); - res.val[1] = vqadd_u8(x.val[1],y.val[1]); - res.val[2] = vqadd_u8(x.val[2],y.val[2]); - res.val[3] = vqadd_u8(x.val[3],y.val[3]); + res.val[0] = vqadd_u8 (x.val[0], y.val[0]); + res.val[1] = vqadd_u8 (x.val[1], y.val[1]); + res.val[2] = vqadd_u8 (x.val[2], y.val[2]); + res.val[3] = vqadd_u8 (x.val[3], y.val[3]); return res; } - -void -fbCompositeSrcAdd_8000x8000neon ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +static void +neon_composite_add_8000_8000 (pixman_implementation_t * impl, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint8_t *dstLine, *dst; - uint8_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + uint16_t w; - fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1); - fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - if (width>=8) + if (width >= 8) { - // Use overlapping 8-pixel method - while (height--) - { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; + /* Use overlapping 8-pixel method */ + while (height--) + { + uint8_t *keep_dst = 0; + uint8x8_t sval, dval, temp; - uint8_t *keep_dst; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; #ifndef USE_GCC_INLINE_ASM - uint8x8_t sval,dval,temp; + sval = vld1_u8 ((void *)src); + dval = vld1_u8 ((void *)dst); + keep_dst = dst; - sval = vld1_u8((void*)src); - dval = vld1_u8((void*)dst); - keep_dst = dst; + temp = vqadd_u8 (dval, sval); - temp = vqadd_u8(dval,sval); + src += (w & 7); + dst += (w & 7); + w -= (w & 7); - src += (w & 7); - dst += (w & 7); - w -= (w & 7); + while (w) + { + sval = vld1_u8 ((void *)src); + dval = vld1_u8 ((void *)dst); - while (w) - { - sval = vld1_u8((void*)src); - dval = vld1_u8((void*)dst); + vst1_u8 ((void *)keep_dst, temp); + keep_dst = dst; - vst1_u8((void*)keep_dst,temp); - keep_dst = dst; + temp = vqadd_u8 (dval, sval); - temp = vqadd_u8(dval,sval); + src += 8; + dst += 8; + w -= 8; + } - src+=8; - dst+=8; - w-=8; - } - vst1_u8((void*)keep_dst,temp); + vst1_u8 ((void *)keep_dst, temp); #else - asm volatile ( -// avoid using d8-d15 (q4-q7) aapcs callee-save registers - "vld1.8 {d0}, [%[src]]\n\t" - "vld1.8 {d4}, [%[dst]]\n\t" - "mov %[keep_dst], %[dst]\n\t" - - "and ip, %[w], #7\n\t" - "add %[src], %[src], ip\n\t" - "add %[dst], %[dst], ip\n\t" - "subs %[w], %[w], ip\n\t" - "b 9f\n\t" -// LOOP - "2:\n\t" - "vld1.8 {d0}, [%[src]]!\n\t" - "vld1.8 {d4}, [%[dst]]!\n\t" - "vst1.8 {d20}, [%[keep_dst]]\n\t" - "sub %[keep_dst], %[dst], #8\n\t" - "subs %[w], %[w], #8\n\t" - "9:\n\t" - "vqadd.u8 d20, d0, d4\n\t" - - "bne 2b\n\t" - - "1:\n\t" - "vst1.8 {d20}, [%[keep_dst]]\n\t" - - : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "=r" (keep_dst) - : - : "ip", "cc", "memory", "d0","d4", - "d20" - ); + asm volatile ( +/* avoid using d8-d15 (q4-q7) aapcs callee-save registers */ + "vld1.8 {d0}, [%[src]]\n\t" + "vld1.8 {d4}, [%[dst]]\n\t" + "mov %[keep_dst], %[dst]\n\t" + + "and ip, %[w], #7\n\t" + "add %[src], %[src], ip\n\t" + "add %[dst], %[dst], ip\n\t" + "subs %[w], %[w], ip\n\t" + "b 9f\n\t" +/* LOOP */ + "2:\n\t" + "vld1.8 {d0}, [%[src]]!\n\t" + "vld1.8 {d4}, [%[dst]]!\n\t" + "vst1.8 {d20}, [%[keep_dst]]\n\t" + "sub %[keep_dst], %[dst], #8\n\t" + "subs %[w], %[w], #8\n\t" + "9:\n\t" + "vqadd.u8 d20, d0, d4\n\t" + + "bne 2b\n\t" + + "1:\n\t" + "vst1.8 {d20}, [%[keep_dst]]\n\t" + + : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "=r" (keep_dst) + : + : "ip", "cc", "memory", "d0", "d4", + "d20" + ); #endif - } + } } else { - while (height--) - { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; - uint8x8_t sval, dval; - uint8_t *dst4, *dst2; - - if (w&4) - { - sval = vreinterpret_u8_u32(vld1_lane_u32((void*)src,vreinterpret_u32_u8(sval),1)); - dval = vreinterpret_u8_u32(vld1_lane_u32((void*)dst,vreinterpret_u32_u8(dval),1)); - dst4=dst; - src+=4; - dst+=4; - } - if (w&2) - { - sval = vreinterpret_u8_u16(vld1_lane_u16((void*)src,vreinterpret_u16_u8(sval),1)); - dval = vreinterpret_u8_u16(vld1_lane_u16((void*)dst,vreinterpret_u16_u8(dval),1)); - dst2=dst; - src+=2; - dst+=2; - } - if (w&1) - { - sval = vld1_lane_u8(src,sval,1); - dval = vld1_lane_u8(dst,dval,1); - } - - dval = vqadd_u8(dval,sval); - - if (w&1) - vst1_lane_u8(dst,dval,1); - if (w&2) - vst1_lane_u16((void*)dst2,vreinterpret_u16_u8(dval),1); - if (w&4) - vst1_lane_u32((void*)dst4,vreinterpret_u32_u8(dval),1); - } + const uint8_t nil = 0; + const uint8x8_t vnil = vld1_dup_u8 (&nil); + + while (height--) + { + uint8x8_t sval = vnil, dval = vnil; + uint8_t *dst4 = 0, *dst2 = 0; + + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + if (w & 4) + { + sval = vreinterpret_u8_u32 ( + vld1_lane_u32 ((void *)src, vreinterpret_u32_u8 (sval), 1)); + dval = vreinterpret_u8_u32 ( + vld1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (dval), 1)); + + dst4 = dst; + src += 4; + dst += 4; + } + + if (w & 2) + { + sval = vreinterpret_u8_u16 ( + vld1_lane_u16 ((void *)src, vreinterpret_u16_u8 (sval), 1)); + dval = vreinterpret_u8_u16 ( + vld1_lane_u16 ((void *)dst, vreinterpret_u16_u8 (dval), 1)); + + dst2 = dst; + src += 2; + dst += 2; + } + + if (w & 1) + { + sval = vld1_lane_u8 (src, sval, 1); + dval = vld1_lane_u8 (dst, dval, 1); + } + + dval = vqadd_u8 (dval, sval); + + if (w & 1) + vst1_lane_u8 (dst, dval, 1); + + if (w & 2) + vst1_lane_u16 ((void *)dst2, vreinterpret_u16_u8 (dval), 1); + + if (w & 4) + vst1_lane_u32 ((void *)dst4, vreinterpret_u32_u8 (dval), 1); + } } } - -void -fbCompositeSrc_8888x8888neon ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +static void +neon_composite_over_8888_8888 (pixman_implementation_t * impl, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - int dstStride, srcStride; - uint32_t w; + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + uint32_t w; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - if (width>=8) + if (width >= 8) { - // Use overlapping 8-pixel method - while (height--) - { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; + /* Use overlapping 8-pixel method */ + while (height--) + { + uint32_t *keep_dst = 0; + uint8x8x4_t sval, dval, temp; - uint32_t *keep_dst; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; #ifndef USE_GCC_INLINE_ASM - uint8x8x4_t sval,dval,temp; + sval = vld4_u8 ((void *)src); + dval = vld4_u8 ((void *)dst); + keep_dst = dst; - sval = vld4_u8((void*)src); - dval = vld4_u8((void*)dst); - keep_dst = dst; + temp = neon8mul (dval, vmvn_u8 (sval.val[3])); + temp = neon8qadd (sval, temp); - temp = neon8mul(dval,vmvn_u8(sval.val[3])); - temp = neon8qadd(sval,temp); + src += (w & 7); + dst += (w & 7); + w -= (w & 7); - src += (w & 7); - dst += (w & 7); - w -= (w & 7); + while (w) + { + sval = vld4_u8 ((void *)src); + dval = vld4_u8 ((void *)dst); - while (w) - { - sval = vld4_u8((void*)src); - dval = vld4_u8((void*)dst); + vst4_u8 ((void *)keep_dst, temp); + keep_dst = dst; - vst4_u8((void*)keep_dst,temp); - keep_dst = dst; + temp = neon8mul (dval, vmvn_u8 (sval.val[3])); + temp = neon8qadd (sval, temp); - temp = neon8mul(dval,vmvn_u8(sval.val[3])); - temp = neon8qadd(sval,temp); + src += 8; + dst += 8; + w -= 8; + } - src+=8; - dst+=8; - w-=8; - } - vst4_u8((void*)keep_dst,temp); + vst4_u8 ((void *)keep_dst, temp); #else - asm volatile ( -// avoid using d8-d15 (q4-q7) aapcs callee-save registers - "vld4.8 {d0-d3}, [%[src]]\n\t" - "vld4.8 {d4-d7}, [%[dst]]\n\t" - "mov %[keep_dst], %[dst]\n\t" - - "and ip, %[w], #7\n\t" - "add %[src], %[src], ip, LSL#2\n\t" - "add %[dst], %[dst], ip, LSL#2\n\t" - "subs %[w], %[w], ip\n\t" - "b 9f\n\t" -// LOOP - "2:\n\t" - "vld4.8 {d0-d3}, [%[src]]!\n\t" - "vld4.8 {d4-d7}, [%[dst]]!\n\t" - "vst4.8 {d20-d23}, [%[keep_dst]]\n\t" - "sub %[keep_dst], %[dst], #8*4\n\t" - "subs %[w], %[w], #8\n\t" - "9:\n\t" - "vmvn.8 d31, d3\n\t" - "vmull.u8 q10, d31, d4\n\t" - "vmull.u8 q11, d31, d5\n\t" - "vmull.u8 q12, d31, d6\n\t" - "vmull.u8 q13, d31, d7\n\t" - "vrshr.u16 q8, q10, #8\n\t" - "vrshr.u16 q9, q11, #8\n\t" - "vraddhn.u16 d20, q10, q8\n\t" - "vraddhn.u16 d21, q11, q9\n\t" - "vrshr.u16 q8, q12, #8\n\t" - "vrshr.u16 q9, q13, #8\n\t" - "vraddhn.u16 d22, q12, q8\n\t" - "vraddhn.u16 d23, q13, q9\n\t" -// result in d20-d23 - "vqadd.u8 d20, d0, d20\n\t" - "vqadd.u8 d21, d1, d21\n\t" - "vqadd.u8 d22, d2, d22\n\t" - "vqadd.u8 d23, d3, d23\n\t" - - "bne 2b\n\t" - - "1:\n\t" - "vst4.8 {d20-d23}, [%[keep_dst]]\n\t" - - : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "=r" (keep_dst) - : - : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7", - "d16","d17","d18","d19","d20","d21","d22","d23" - ); + asm volatile ( +/* avoid using d8-d15 (q4-q7) aapcs callee-save registers */ + "vld4.8 {d0-d3}, [%[src]]\n\t" + "vld4.8 {d4-d7}, [%[dst]]\n\t" + "mov %[keep_dst], %[dst]\n\t" + + "and ip, %[w], #7\n\t" + "add %[src], %[src], ip, LSL#2\n\t" + "add %[dst], %[dst], ip, LSL#2\n\t" + "subs %[w], %[w], ip\n\t" + "b 9f\n\t" +/* LOOP */ + "2:\n\t" + "vld4.8 {d0-d3}, [%[src]]!\n\t" + "vld4.8 {d4-d7}, [%[dst]]!\n\t" + "vst4.8 {d20-d23}, [%[keep_dst]]\n\t" + "sub %[keep_dst], %[dst], #8*4\n\t" + "subs %[w], %[w], #8\n\t" + "9:\n\t" + "vmvn.8 d31, d3\n\t" + "vmull.u8 q10, d31, d4\n\t" + "vmull.u8 q11, d31, d5\n\t" + "vmull.u8 q12, d31, d6\n\t" + "vmull.u8 q13, d31, d7\n\t" + "vrshr.u16 q8, q10, #8\n\t" + "vrshr.u16 q9, q11, #8\n\t" + "vraddhn.u16 d20, q10, q8\n\t" + "vraddhn.u16 d21, q11, q9\n\t" + "vrshr.u16 q8, q12, #8\n\t" + "vrshr.u16 q9, q13, #8\n\t" + "vraddhn.u16 d22, q12, q8\n\t" + "vraddhn.u16 d23, q13, q9\n\t" +/* result in d20-d23 */ + "vqadd.u8 d20, d0, d20\n\t" + "vqadd.u8 d21, d1, d21\n\t" + "vqadd.u8 d22, d2, d22\n\t" + "vqadd.u8 d23, d3, d23\n\t" + + "bne 2b\n\t" + + "1:\n\t" + "vst4.8 {d20-d23}, [%[keep_dst]]\n\t" + + : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "=r" (keep_dst) + : + : "ip", "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23" + ); #endif - } + } } else { - uint8x8_t alpha_selector=vreinterpret_u8_u64(vcreate_u64(0x0707070703030303ULL)); - - // Handle width<8 - while (height--) - { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; - - while (w>=2) - { - uint8x8_t sval,dval; - - /* two 32-bit pixels packed into D-reg; ad-hoc vectorization */ - sval = vreinterpret_u8_u32(vld1_u32((void*)src)); - dval = vreinterpret_u8_u32(vld1_u32((void*)dst)); - dval = neon2mul(dval,vtbl1_u8(vmvn_u8(sval),alpha_selector)); - vst1_u8((void*)dst,vqadd_u8(sval,dval)); - - src+=2; - dst+=2; - w-=2; - } - - if (w) - { - uint8x8_t sval,dval; - - /* single 32-bit pixel in lane 0 */ - sval = vreinterpret_u8_u32(vld1_dup_u32((void*)src)); // only interested in lane 0 - dval = vreinterpret_u8_u32(vld1_dup_u32((void*)dst)); // only interested in lane 0 - dval = neon2mul(dval,vtbl1_u8(vmvn_u8(sval),alpha_selector)); - vst1_lane_u32((void*)dst,vreinterpret_u32_u8(vqadd_u8(sval,dval)),0); - } - } - } -} + uint8x8_t alpha_selector = vreinterpret_u8_u64 ( + vcreate_u64 (0x0707070703030303ULL)); + /* Handle width < 8 */ + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + while (w >= 2) + { + uint8x8_t sval, dval; -void -fbCompositeSrc_x888x0565neon ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint16_t *dstLine, *dst; - uint32_t *srcLine, *src; - int dstStride, srcStride; - uint32_t w; + /* two 32-bit pixels packed into D-reg; ad-hoc vectorization */ + sval = vreinterpret_u8_u32 (vld1_u32 ((void *)src)); + dval = vreinterpret_u8_u32 (vld1_u32 ((void *)dst)); + dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector)); + vst1_u8 ((void *)dst, vqadd_u8 (sval, dval)); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); + src += 2; + dst += 2; + w -= 2; + } - if (width>=8) - { - while (height--) - { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; - - do { - while (w>=8) - { -#ifndef USE_GCC_INLINE_ASM - vst1q_u16(dst, pack0565(vld4_u8((void*)src))); -#else - asm volatile ( - "vld4.8 {d4-d7}, [%[src]]\n\t" - "vshll.u8 q0, d6, #8\n\t" - "vshll.u8 q1, d5, #8\n\t" - "vsriq.u16 q0, q1, #5\t\n" - "vshll.u8 q1, d4, #8\n\t" - "vsriq.u16 q0, q1, #11\t\n" - "vst1.16 {q0}, [%[dst]]\n\t" - : - : [dst] "r" (dst), [src] "r" (src) - : "memory", "d0","d1","d2","d3","d4","d5","d6","d7" - ); -#endif - src+=8; - dst+=8; - w-=8; - } - if (w != 0) - { - src -= (8-w); - dst -= (8-w); - w = 8; // do another vector - } - } while (w!=0); - } - } - else - { - // Handle width<8 - while (height--) - { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; - - while (w>=2) + if (w) { - uint32x2_t sval, rgb, g, b; - sval = vld1_u32(src); - rgb = vshr_n_u32(sval,8-5); // r (5 bits) - g = vshr_n_u32(sval,8+8-6); // g to bottom byte - rgb = vsli_n_u32(rgb, g, 5); - b = vshr_n_u32(sval,8+8+8-5); // b to bottom byte - rgb = vsli_n_u32(rgb, b, 11); - vst1_lane_u16(dst++,vreinterpret_u16_u32(rgb),0); - vst1_lane_u16(dst++,vreinterpret_u16_u32(rgb),2); - src+=2; - w-=2; + uint8x8_t sval, dval; + + /* single 32-bit pixel in lane 0 */ + sval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)src)); /* only interested in lane 0 */ + dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)dst)); /* only interested in lane 0 */ + dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector)); + vst1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0); } - if (w) - { - uint32x2_t sval, rgb, g, b; - sval = vld1_dup_u32(src); - rgb = vshr_n_u32(sval,8-5); // r (5 bits) - g = vshr_n_u32(sval,8+8-6); // g to bottom byte - rgb = vsli_n_u32(rgb, g, 5); - b = vshr_n_u32(sval,8+8+8-5); // b to bottom byte - rgb = vsli_n_u32(rgb, b, 11); - vst1_lane_u16(dst++,vreinterpret_u16_u32(rgb),0); - } } } } - -void -fbCompositeSrc_8888x8x8888neon ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +static void +neon_composite_over_8888_n_8888 (pixman_implementation_t * impl, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - uint32_t mask; - int dstStride, srcStride; - uint32_t w; + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + uint32_t mask; + int dst_stride, src_stride; + uint32_t w; uint8x8_t mask_alpha; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - fbComposeGetSolid (pMask, mask, pDst->bits.format); - mask_alpha = vdup_n_u8((mask) >> 24); + mask = _pixman_image_get_solid (mask_image, dst_image->bits.format); + mask_alpha = vdup_n_u8 ((mask) >> 24); - if (width>=8) + if (width >= 8) { - // Use overlapping 8-pixel method - while (height--) - { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; + /* Use overlapping 8-pixel method */ + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; - uint32_t *keep_dst; + uint32_t *keep_dst = 0; #ifndef USE_GCC_INLINE_ASM - uint8x8x4_t sval,dval,temp; + uint8x8x4_t sval, dval, temp; - sval = vld4_u8((void*)src); - dval = vld4_u8((void*)dst); - keep_dst = dst; + sval = vld4_u8 ((void *)src); + dval = vld4_u8 ((void *)dst); + keep_dst = dst; - sval = neon8mul(sval,mask_alpha); - temp = neon8mul(dval,vmvn_u8(sval.val[3])); - temp = neon8qadd(sval,temp); + sval = neon8mul (sval, mask_alpha); + temp = neon8mul (dval, vmvn_u8 (sval.val[3])); + temp = neon8qadd (sval, temp); - src += (w & 7); - dst += (w & 7); - w -= (w & 7); + src += (w & 7); + dst += (w & 7); + w -= (w & 7); - while (w) - { - sval = vld4_u8((void*)src); - dval = vld4_u8((void*)dst); + while (w) + { + sval = vld4_u8 ((void *)src); + dval = vld4_u8 ((void *)dst); - vst4_u8((void*)keep_dst,temp); - keep_dst = dst; + vst4_u8 ((void *)keep_dst, temp); + keep_dst = dst; - sval = neon8mul(sval,mask_alpha); - temp = neon8mul(dval,vmvn_u8(sval.val[3])); - temp = neon8qadd(sval,temp); + sval = neon8mul (sval, mask_alpha); + temp = neon8mul (dval, vmvn_u8 (sval.val[3])); + temp = neon8qadd (sval, temp); - src+=8; - dst+=8; - w-=8; - } - vst4_u8((void*)keep_dst,temp); + src += 8; + dst += 8; + w -= 8; + } + vst4_u8 ((void *)keep_dst, temp); #else - asm volatile ( -// avoid using d8-d15 (q4-q7) aapcs callee-save registers - "vdup.32 d30, %[mask]\n\t" - "vdup.8 d30, d30[3]\n\t" - - "vld4.8 {d0-d3}, [%[src]]\n\t" - "vld4.8 {d4-d7}, [%[dst]]\n\t" - "mov %[keep_dst], %[dst]\n\t" - - "and ip, %[w], #7\n\t" - "add %[src], %[src], ip, LSL#2\n\t" - "add %[dst], %[dst], ip, LSL#2\n\t" - "subs %[w], %[w], ip\n\t" - "b 9f\n\t" -// LOOP - "2:\n\t" - "vld4.8 {d0-d3}, [%[src]]!\n\t" - "vld4.8 {d4-d7}, [%[dst]]!\n\t" - "vst4.8 {d20-d23}, [%[keep_dst]]\n\t" - "sub %[keep_dst], %[dst], #8*4\n\t" - "subs %[w], %[w], #8\n\t" - - "9:\n\t" - "vmull.u8 q10, d30, d0\n\t" - "vmull.u8 q11, d30, d1\n\t" - "vmull.u8 q12, d30, d2\n\t" - "vmull.u8 q13, d30, d3\n\t" - "vrshr.u16 q8, q10, #8\n\t" - "vrshr.u16 q9, q11, #8\n\t" - "vraddhn.u16 d0, q10, q8\n\t" - "vraddhn.u16 d1, q11, q9\n\t" - "vrshr.u16 q9, q13, #8\n\t" - "vrshr.u16 q8, q12, #8\n\t" - "vraddhn.u16 d3, q13, q9\n\t" - "vraddhn.u16 d2, q12, q8\n\t" - - "vmvn.8 d31, d3\n\t" - "vmull.u8 q10, d31, d4\n\t" - "vmull.u8 q11, d31, d5\n\t" - "vmull.u8 q12, d31, d6\n\t" - "vmull.u8 q13, d31, d7\n\t" - "vrshr.u16 q8, q10, #8\n\t" - "vrshr.u16 q9, q11, #8\n\t" - "vraddhn.u16 d20, q10, q8\n\t" - "vrshr.u16 q8, q12, #8\n\t" - "vraddhn.u16 d21, q11, q9\n\t" - "vrshr.u16 q9, q13, #8\n\t" - "vraddhn.u16 d22, q12, q8\n\t" - "vraddhn.u16 d23, q13, q9\n\t" -// result in d20-d23 - "vqadd.u8 d20, d0, d20\n\t" - "vqadd.u8 d21, d1, d21\n\t" - "vqadd.u8 d22, d2, d22\n\t" - "vqadd.u8 d23, d3, d23\n\t" - - "bne 2b\n\t" - - "1:\n\t" - "vst4.8 {d20-d23}, [%[keep_dst]]\n\t" - - : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "=r" (keep_dst) - : [mask] "r" (mask) - : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7", - "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27", - "d30","d31" - ); + asm volatile ( +/* avoid using d8-d15 (q4-q7) aapcs callee-save registers */ + "vdup.32 d30, %[mask]\n\t" + "vdup.8 d30, d30[3]\n\t" + + "vld4.8 {d0-d3}, [%[src]]\n\t" + "vld4.8 {d4-d7}, [%[dst]]\n\t" + "mov %[keep_dst], %[dst]\n\t" + + "and ip, %[w], #7\n\t" + "add %[src], %[src], ip, LSL#2\n\t" + "add %[dst], %[dst], ip, LSL#2\n\t" + "subs %[w], %[w], ip\n\t" + "b 9f\n\t" +/* LOOP */ + "2:\n\t" + "vld4.8 {d0-d3}, [%[src]]!\n\t" + "vld4.8 {d4-d7}, [%[dst]]!\n\t" + "vst4.8 {d20-d23}, [%[keep_dst]]\n\t" + "sub %[keep_dst], %[dst], #8*4\n\t" + "subs %[w], %[w], #8\n\t" + + "9:\n\t" + "vmull.u8 q10, d30, d0\n\t" + "vmull.u8 q11, d30, d1\n\t" + "vmull.u8 q12, d30, d2\n\t" + "vmull.u8 q13, d30, d3\n\t" + "vrshr.u16 q8, q10, #8\n\t" + "vrshr.u16 q9, q11, #8\n\t" + "vraddhn.u16 d0, q10, q8\n\t" + "vraddhn.u16 d1, q11, q9\n\t" + "vrshr.u16 q9, q13, #8\n\t" + "vrshr.u16 q8, q12, #8\n\t" + "vraddhn.u16 d3, q13, q9\n\t" + "vraddhn.u16 d2, q12, q8\n\t" + + "vmvn.8 d31, d3\n\t" + "vmull.u8 q10, d31, d4\n\t" + "vmull.u8 q11, d31, d5\n\t" + "vmull.u8 q12, d31, d6\n\t" + "vmull.u8 q13, d31, d7\n\t" + "vrshr.u16 q8, q10, #8\n\t" + "vrshr.u16 q9, q11, #8\n\t" + "vraddhn.u16 d20, q10, q8\n\t" + "vrshr.u16 q8, q12, #8\n\t" + "vraddhn.u16 d21, q11, q9\n\t" + "vrshr.u16 q9, q13, #8\n\t" + "vraddhn.u16 d22, q12, q8\n\t" + "vraddhn.u16 d23, q13, q9\n\t" + +/* result in d20-d23 */ + "vqadd.u8 d20, d0, d20\n\t" + "vqadd.u8 d21, d1, d21\n\t" + "vqadd.u8 d22, d2, d22\n\t" + "vqadd.u8 d23, d3, d23\n\t" + + "bne 2b\n\t" + + "1:\n\t" + "vst4.8 {d20-d23}, [%[keep_dst]]\n\t" + + : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "=r" (keep_dst) + : [mask] "r" (mask) + : "ip", "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", + "d30", "d31" + ); #endif - } + } } else { - uint8x8_t alpha_selector=vreinterpret_u8_u64(vcreate_u64(0x0707070703030303ULL)); + uint8x8_t alpha_selector = vreinterpret_u8_u64 (vcreate_u64 (0x0707070703030303ULL)); - // Handle width<8 - while (height--) - { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; + /* Handle width < 8 */ + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; - while (w>=2) - { - uint8x8_t sval,dval; + while (w >= 2) + { + uint8x8_t sval, dval; - sval = vreinterpret_u8_u32(vld1_u32((void*)src)); - dval = vreinterpret_u8_u32(vld1_u32((void*)dst)); + sval = vreinterpret_u8_u32 (vld1_u32 ((void *)src)); + dval = vreinterpret_u8_u32 (vld1_u32 ((void *)dst)); - /* sval * const alpha_mul */ - sval = neon2mul(sval,mask_alpha); + /* sval * const alpha_mul */ + sval = neon2mul (sval, mask_alpha); - /* dval * 255-(src alpha) */ - dval = neon2mul(dval,vtbl1_u8(vmvn_u8(sval), alpha_selector)); + /* dval * 255-(src alpha) */ + dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector)); - vst1_u8((void*)dst,vqadd_u8(sval,dval)); + vst1_u8 ((void *)dst, vqadd_u8 (sval, dval)); - src+=2; - dst+=2; - w-=2; - } + src += 2; + dst += 2; + w -= 2; + } - if (w) - { - uint8x8_t sval,dval; + if (w) + { + uint8x8_t sval, dval; - sval = vreinterpret_u8_u32(vld1_dup_u32((void*)src)); - dval = vreinterpret_u8_u32(vld1_dup_u32((void*)dst)); + sval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)src)); + dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)dst)); - /* sval * const alpha_mul */ - sval = neon2mul(sval,mask_alpha); + /* sval * const alpha_mul */ + sval = neon2mul (sval, mask_alpha); - /* dval * 255-(src alpha) */ - dval = neon2mul(dval,vtbl1_u8(vmvn_u8(sval), alpha_selector)); + /* dval * 255-(src alpha) */ + dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector)); - vst1_lane_u32((void*)dst,vreinterpret_u32_u8(vqadd_u8(sval,dval)),0); - } - } + vst1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0); + } + } } } - - -void -fbCompositeSolidMask_nx8x0565neon ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +static void +neon_composite_over_n_8_0565 (pixman_implementation_t * impl, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { uint32_t src, srca; - uint16_t *dstLine, *dst; - uint8_t *maskLine, *mask; - int dstStride, maskStride; + uint16_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; uint32_t w; uint8x8_t sval2; uint8x8x4_t sval8; - fbComposeGetSolid(pSrc, src, pDst->bits.format); + src = _pixman_image_get_solid (src_image, dst_image->bits.format); srca = src >> 24; if (src == 0) - return; + return; - sval2=vreinterpret_u8_u32(vdup_n_u32(src)); - sval8.val[0]=vdup_lane_u8(sval2,0); - sval8.val[1]=vdup_lane_u8(sval2,1); - sval8.val[2]=vdup_lane_u8(sval2,2); - sval8.val[3]=vdup_lane_u8(sval2,3); + sval2=vreinterpret_u8_u32 (vdup_n_u32 (src)); + sval8.val[0]=vdup_lane_u8 (sval2,0); + sval8.val[1]=vdup_lane_u8 (sval2,1); + sval8.val[2]=vdup_lane_u8 (sval2,2); + sval8.val[3]=vdup_lane_u8 (sval2,3); - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); if (width>=8) { - // Use overlapping 8-pixel method, modified to avoid rewritten dest being reused - while (height--) - { - uint16_t *keep_dst; + /* Use overlapping 8-pixel method, modified to avoid rewritten dest being reused */ + while (height--) + { + uint16_t *keep_dst=0; - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; - w = width; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; #ifndef USE_GCC_INLINE_ASM - uint8x8_t alpha; - uint16x8_t dval, temp; - uint8x8x4_t sval8temp; + uint8x8_t alpha; + uint16x8_t dval, temp; + uint8x8x4_t sval8temp; - alpha = vld1_u8((void*)mask); - dval = vld1q_u16((void*)dst); - keep_dst = dst; + alpha = vld1_u8 ((void *)mask); + dval = vld1q_u16 ((void *)dst); + keep_dst = dst; - sval8temp = neon8mul(sval8,alpha); - temp = pack0565(neon8qadd(sval8temp,neon8mul(unpack0565(dval),vmvn_u8(sval8temp.val[3])))); + sval8temp = neon8mul (sval8, alpha); + temp = pack0565 (neon8qadd (sval8temp, neon8mul (unpack0565 (dval), vmvn_u8 (sval8temp.val[3])))); - mask += (w & 7); - dst += (w & 7); - w -= (w & 7); + mask += (w & 7); + dst += (w & 7); + w -= (w & 7); - while (w) - { - dval = vld1q_u16((void*)dst); - alpha = vld1_u8((void*)mask); + while (w) + { + dval = vld1q_u16 ((void *)dst); + alpha = vld1_u8 ((void *)mask); - vst1q_u16((void*)keep_dst,temp); - keep_dst = dst; + vst1q_u16 ((void *)keep_dst, temp); + keep_dst = dst; - sval8temp = neon8mul(sval8,alpha); - temp = pack0565(neon8qadd(sval8temp,neon8mul(unpack0565(dval),vmvn_u8(sval8temp.val[3])))); + sval8temp = neon8mul (sval8, alpha); + temp = pack0565 (neon8qadd (sval8temp, neon8mul (unpack0565 (dval), vmvn_u8 (sval8temp.val[3])))); - mask+=8; - dst+=8; - w-=8; - } - vst1q_u16((void*)keep_dst,temp); + mask+=8; + dst+=8; + w-=8; + } + vst1q_u16 ((void *)keep_dst, temp); #else - asm volatile ( - "vdup.32 d0, %[src]\n\t" - "vdup.8 d1, d0[1]\n\t" - "vdup.8 d2, d0[2]\n\t" - "vdup.8 d3, d0[3]\n\t" - "vdup.8 d0, d0[0]\n\t" - - "vld1.8 {q12}, [%[dst]]\n\t" - "vld1.8 {d31}, [%[mask]]\n\t" - "mov %[keep_dst], %[dst]\n\t" - - "and ip, %[w], #7\n\t" - "add %[mask], %[mask], ip\n\t" - "add %[dst], %[dst], ip, LSL#1\n\t" - "subs %[w], %[w], ip\n\t" - "b 9f\n\t" -// LOOP - "2:\n\t" - - "vld1.16 {q12}, [%[dst]]!\n\t" - "vld1.8 {d31}, [%[mask]]!\n\t" - "vst1.16 {q10}, [%[keep_dst]]\n\t" - "sub %[keep_dst], %[dst], #8*2\n\t" - "subs %[w], %[w], #8\n\t" - "9:\n\t" -// expand 0565 q12 to 8888 {d4-d7} - "vmovn.u16 d4, q12\t\n" - "vshr.u16 q11, q12, #5\t\n" - "vshr.u16 q10, q12, #6+5\t\n" - "vmovn.u16 d5, q11\t\n" - "vmovn.u16 d6, q10\t\n" - "vshl.u8 d4, d4, #3\t\n" - "vshl.u8 d5, d5, #2\t\n" - "vshl.u8 d6, d6, #3\t\n" - "vsri.u8 d4, d4, #5\t\n" - "vsri.u8 d5, d5, #6\t\n" - "vsri.u8 d6, d6, #5\t\n" - - "vmull.u8 q10, d31, d0\n\t" - "vmull.u8 q11, d31, d1\n\t" - "vmull.u8 q12, d31, d2\n\t" - "vmull.u8 q13, d31, d3\n\t" - "vrshr.u16 q8, q10, #8\n\t" - "vrshr.u16 q9, q11, #8\n\t" - "vraddhn.u16 d20, q10, q8\n\t" - "vraddhn.u16 d21, q11, q9\n\t" - "vrshr.u16 q9, q13, #8\n\t" - "vrshr.u16 q8, q12, #8\n\t" - "vraddhn.u16 d23, q13, q9\n\t" - "vraddhn.u16 d22, q12, q8\n\t" - -// duplicate in 4/2/1 & 8pix vsns - "vmvn.8 d30, d23\n\t" - "vmull.u8 q14, d30, d6\n\t" - "vmull.u8 q13, d30, d5\n\t" - "vmull.u8 q12, d30, d4\n\t" - "vrshr.u16 q8, q14, #8\n\t" - "vrshr.u16 q9, q13, #8\n\t" - "vraddhn.u16 d6, q14, q8\n\t" - "vrshr.u16 q8, q12, #8\n\t" - "vraddhn.u16 d5, q13, q9\n\t" - "vqadd.u8 d6, d6, d22\n\t" // moved up - "vraddhn.u16 d4, q12, q8\n\t" -// intentionally don't calculate alpha -// result in d4-d6 - -// "vqadd.u8 d6, d6, d22\n\t" ** moved up - "vqadd.u8 d5, d5, d21\n\t" - "vqadd.u8 d4, d4, d20\n\t" - -// pack 8888 {d20-d23} to 0565 q10 - "vshll.u8 q10, d6, #8\n\t" - "vshll.u8 q3, d5, #8\n\t" - "vshll.u8 q2, d4, #8\n\t" - "vsri.u16 q10, q3, #5\t\n" - "vsri.u16 q10, q2, #11\t\n" - - "bne 2b\n\t" - - "1:\n\t" - "vst1.16 {q10}, [%[keep_dst]]\n\t" - - : [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [keep_dst] "=r" (keep_dst) - : [src] "r" (src) - : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7", - "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29", - "d30","d31" - ); + asm volatile ( + "vdup.32 d0, %[src]\n\t" + "vdup.8 d1, d0[1]\n\t" + "vdup.8 d2, d0[2]\n\t" + "vdup.8 d3, d0[3]\n\t" + "vdup.8 d0, d0[0]\n\t" + + "vld1.8 {q12}, [%[dst]]\n\t" + "vld1.8 {d31}, [%[mask]]\n\t" + "mov %[keep_dst], %[dst]\n\t" + + "and ip, %[w], #7\n\t" + "add %[mask], %[mask], ip\n\t" + "add %[dst], %[dst], ip, LSL#1\n\t" + "subs %[w], %[w], ip\n\t" + "b 9f\n\t" +/* LOOP */ + "2:\n\t" + + "vld1.16 {q12}, [%[dst]]!\n\t" + "vld1.8 {d31}, [%[mask]]!\n\t" + "vst1.16 {q10}, [%[keep_dst]]\n\t" + "sub %[keep_dst], %[dst], #8*2\n\t" + "subs %[w], %[w], #8\n\t" + "9:\n\t" +/* expand 0565 q12 to 8888 {d4-d7} */ + "vmovn.u16 d4, q12\t\n" + "vshr.u16 q11, q12, #5\t\n" + "vshr.u16 q10, q12, #6+5\t\n" + "vmovn.u16 d5, q11\t\n" + "vmovn.u16 d6, q10\t\n" + "vshl.u8 d4, d4, #3\t\n" + "vshl.u8 d5, d5, #2\t\n" + "vshl.u8 d6, d6, #3\t\n" + "vsri.u8 d4, d4, #5\t\n" + "vsri.u8 d5, d5, #6\t\n" + "vsri.u8 d6, d6, #5\t\n" + + "vmull.u8 q10, d31, d0\n\t" + "vmull.u8 q11, d31, d1\n\t" + "vmull.u8 q12, d31, d2\n\t" + "vmull.u8 q13, d31, d3\n\t" + "vrshr.u16 q8, q10, #8\n\t" + "vrshr.u16 q9, q11, #8\n\t" + "vraddhn.u16 d20, q10, q8\n\t" + "vraddhn.u16 d21, q11, q9\n\t" + "vrshr.u16 q9, q13, #8\n\t" + "vrshr.u16 q8, q12, #8\n\t" + "vraddhn.u16 d23, q13, q9\n\t" + "vraddhn.u16 d22, q12, q8\n\t" + +/* duplicate in 4/2/1 & 8pix vsns */ + "vmvn.8 d30, d23\n\t" + "vmull.u8 q14, d30, d6\n\t" + "vmull.u8 q13, d30, d5\n\t" + "vmull.u8 q12, d30, d4\n\t" + "vrshr.u16 q8, q14, #8\n\t" + "vrshr.u16 q9, q13, #8\n\t" + "vraddhn.u16 d6, q14, q8\n\t" + "vrshr.u16 q8, q12, #8\n\t" + "vraddhn.u16 d5, q13, q9\n\t" + "vqadd.u8 d6, d6, d22\n\t" /* moved up */ + "vraddhn.u16 d4, q12, q8\n\t" +/* intentionally don't calculate alpha */ +/* result in d4-d6 */ + +/* "vqadd.u8 d6, d6, d22\n\t" ** moved up */ + "vqadd.u8 d5, d5, d21\n\t" + "vqadd.u8 d4, d4, d20\n\t" + +/* pack 8888 {d20-d23} to 0565 q10 */ + "vshll.u8 q10, d6, #8\n\t" + "vshll.u8 q3, d5, #8\n\t" + "vshll.u8 q2, d4, #8\n\t" + "vsri.u16 q10, q3, #5\t\n" + "vsri.u16 q10, q2, #11\t\n" + + "bne 2b\n\t" + + "1:\n\t" + "vst1.16 {q10}, [%[keep_dst]]\n\t" + + : [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [keep_dst] "=r" (keep_dst) + : [src] "r" (src) + : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7", + "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29", + "d30","d31" + ); #endif - } + } } else { - while (height--) - { - void *dst4, *dst2; + while (height--) + { + void *dst4=0, *dst2=0; - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; - w = width; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; -#ifndef USE_GCC_INLINE_ASM - uint8x8_t alpha; - uint16x8_t dval, temp; - uint8x8x4_t sval8temp; - - if (w&4) - { - alpha = vreinterpret_u8_u32(vld1_lane_u32((void*)mask,vreinterpret_u32_u8(alpha),1)); - dval = vreinterpretq_u16_u64(vld1q_lane_u64((void*)dst,vreinterpretq_u64_u16(dval),1)); - dst4=dst; - mask+=4; - dst+=4; - } - if (w&2) - { - alpha = vreinterpret_u8_u16(vld1_lane_u16((void*)mask,vreinterpret_u16_u8(alpha),1)); - dval = vreinterpretq_u16_u32(vld1q_lane_u32((void*)dst,vreinterpretq_u32_u16(dval),1)); - dst2=dst; - mask+=2; - dst+=2; - } - if (w&1) - { - alpha = vld1_lane_u8((void*)mask,alpha,1); - dval = vld1q_lane_u16((void*)dst,dval,1); - } - - sval8temp = neon8mul(sval8,alpha); - temp = pack0565(neon8qadd(sval8temp,neon8mul(unpack0565(dval),vmvn_u8(sval8temp.val[3])))); - - if (w&1) - vst1q_lane_u16((void*)dst,temp,1); - if (w&2) - vst1q_lane_u32((void*)dst2,vreinterpretq_u32_u16(temp),1); - if (w&4) - vst1q_lane_u64((void*)dst4,vreinterpretq_u64_u16(temp),1); +#if 1 /* #ifndef USE_GCC_INLINE_ASM */ + uint8x8_t alpha; + uint16x8_t dval, temp; + uint8x8x4_t sval8temp; + + if (w&4) + { + alpha = vreinterpret_u8_u32 (vld1_lane_u32 ((void *)mask, vreinterpret_u32_u8 (alpha),1)); + dval = vreinterpretq_u16_u64 (vld1q_lane_u64 ((void *)dst, vreinterpretq_u64_u16 (dval),1)); + dst4=dst; + mask+=4; + dst+=4; + } + if (w&2) + { + alpha = vreinterpret_u8_u16 (vld1_lane_u16 ((void *)mask, vreinterpret_u16_u8 (alpha),1)); + dval = vreinterpretq_u16_u32 (vld1q_lane_u32 ((void *)dst, vreinterpretq_u32_u16 (dval),1)); + dst2=dst; + mask+=2; + dst+=2; + } + if (w&1) + { + alpha = vld1_lane_u8 ((void *)mask, alpha,1); + dval = vld1q_lane_u16 ((void *)dst, dval,1); + } + + sval8temp = neon8mul (sval8, alpha); + temp = pack0565 (neon8qadd (sval8temp, neon8mul (unpack0565 (dval), vmvn_u8 (sval8temp.val[3])))); + + if (w&1) + vst1q_lane_u16 ((void *)dst, temp,1); + if (w&2) + vst1q_lane_u32 ((void *)dst2, vreinterpretq_u32_u16 (temp),1); + if (w&4) + vst1q_lane_u64 ((void *)dst4, vreinterpretq_u64_u16 (temp),1); #else - asm volatile ( - "vdup.32 d0, %[src]\n\t" - "vdup.8 d1, d0[1]\n\t" - "vdup.8 d2, d0[2]\n\t" - "vdup.8 d3, d0[3]\n\t" - "vdup.8 d0, d0[0]\n\t" - - "tst %[w], #4\t\n" - "beq skip_load4\t\n" - - "vld1.64 {d25}, [%[dst]]\n\t" - "vld1.32 {d31[1]}, [%[mask]]\n\t" - "mov %[dst4], %[dst]\t\n" - "add %[mask], %[mask], #4\t\n" - "add %[dst], %[dst], #4*2\t\n" - - "skip_load4:\t\n" - "tst %[w], #2\t\n" - "beq skip_load2\t\n" - "vld1.32 {d24[1]}, [%[dst]]\n\t" - "vld1.16 {d31[1]}, [%[mask]]\n\t" - "mov %[dst2], %[dst]\t\n" - "add %[mask], %[mask], #2\t\n" - "add %[dst], %[dst], #2*2\t\n" - - "skip_load2:\t\n" - "tst %[w], #1\t\n" - "beq skip_load1\t\n" - "vld1.16 {d24[1]}, [%[dst]]\n\t" - "vld1.8 {d31[1]}, [%[mask]]\n\t" - - "skip_load1:\t\n" -// expand 0565 q12 to 8888 {d4-d7} - "vmovn.u16 d4, q12\t\n" - "vshr.u16 q11, q12, #5\t\n" - "vshr.u16 q10, q12, #6+5\t\n" - "vmovn.u16 d5, q11\t\n" - "vmovn.u16 d6, q10\t\n" - "vshl.u8 d4, d4, #3\t\n" - "vshl.u8 d5, d5, #2\t\n" - "vshl.u8 d6, d6, #3\t\n" - "vsri.u8 d4, d4, #5\t\n" - "vsri.u8 d5, d5, #6\t\n" - "vsri.u8 d6, d6, #5\t\n" - - "vmull.u8 q10, d31, d0\n\t" - "vmull.u8 q11, d31, d1\n\t" - "vmull.u8 q12, d31, d2\n\t" - "vmull.u8 q13, d31, d3\n\t" - "vrshr.u16 q8, q10, #8\n\t" - "vrshr.u16 q9, q11, #8\n\t" - "vraddhn.u16 d20, q10, q8\n\t" - "vraddhn.u16 d21, q11, q9\n\t" - "vrshr.u16 q9, q13, #8\n\t" - "vrshr.u16 q8, q12, #8\n\t" - "vraddhn.u16 d23, q13, q9\n\t" - "vraddhn.u16 d22, q12, q8\n\t" - -// duplicate in 4/2/1 & 8pix vsns - "vmvn.8 d30, d23\n\t" - "vmull.u8 q14, d30, d6\n\t" - "vmull.u8 q13, d30, d5\n\t" - "vmull.u8 q12, d30, d4\n\t" - "vrshr.u16 q8, q14, #8\n\t" - "vrshr.u16 q9, q13, #8\n\t" - "vraddhn.u16 d6, q14, q8\n\t" - "vrshr.u16 q8, q12, #8\n\t" - "vraddhn.u16 d5, q13, q9\n\t" - "vqadd.u8 d6, d6, d22\n\t" // moved up - "vraddhn.u16 d4, q12, q8\n\t" -// intentionally don't calculate alpha -// result in d4-d6 - -// "vqadd.u8 d6, d6, d22\n\t" ** moved up - "vqadd.u8 d5, d5, d21\n\t" - "vqadd.u8 d4, d4, d20\n\t" - -// pack 8888 {d20-d23} to 0565 q10 - "vshll.u8 q10, d6, #8\n\t" - "vshll.u8 q3, d5, #8\n\t" - "vshll.u8 q2, d4, #8\n\t" - "vsri.u16 q10, q3, #5\t\n" - "vsri.u16 q10, q2, #11\t\n" - - "tst %[w], #1\n\t" - "beq skip_store1\t\n" - "vst1.16 {d20[1]}, [%[dst]]\t\n" - "skip_store1:\t\n" - "tst %[w], #2\n\t" - "beq skip_store2\t\n" - "vst1.32 {d20[1]}, [%[dst2]]\t\n" - "skip_store2:\t\n" - "tst %[w], #4\n\t" - "beq skip_store4\t\n" - "vst1.16 {d21}, [%[dst4]]\t\n" - "skip_store4:\t\n" - - : [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [dst4] "+r" (dst4), [dst2] "+r" (dst2) - : [src] "r" (src) - : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7", - "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29", - "d30","d31" - ); + /* this code has some bug (does not pass blitters-test) */ + asm volatile ( + "vdup.32 d0, %[src]\n\t" + "vdup.8 d1, d0[1]\n\t" + "vdup.8 d2, d0[2]\n\t" + "vdup.8 d3, d0[3]\n\t" + "vdup.8 d0, d0[0]\n\t" + + "tst %[w], #4\t\n" + "beq skip_load4\t\n" + + "vld1.64 {d25}, [%[dst]]\n\t" + "vld1.32 {d31[1]}, [%[mask]]\n\t" + "mov %[dst4], %[dst]\t\n" + "add %[mask], %[mask], #4\t\n" + "add %[dst], %[dst], #4*2\t\n" + + "skip_load4:\t\n" + "tst %[w], #2\t\n" + "beq skip_load2\t\n" + "vld1.32 {d24[1]}, [%[dst]]\n\t" + "vld1.16 {d31[1]}, [%[mask]]\n\t" + "mov %[dst2], %[dst]\t\n" + "add %[mask], %[mask], #2\t\n" + "add %[dst], %[dst], #2*2\t\n" + + "skip_load2:\t\n" + "tst %[w], #1\t\n" + "beq skip_load1\t\n" + "vld1.16 {d24[1]}, [%[dst]]\n\t" + "vld1.8 {d31[1]}, [%[mask]]\n\t" + + "skip_load1:\t\n" +/* expand 0565 q12 to 8888 {d4-d7} */ + "vmovn.u16 d4, q12\t\n" + "vshr.u16 q11, q12, #5\t\n" + "vshr.u16 q10, q12, #6+5\t\n" + "vmovn.u16 d5, q11\t\n" + "vmovn.u16 d6, q10\t\n" + "vshl.u8 d4, d4, #3\t\n" + "vshl.u8 d5, d5, #2\t\n" + "vshl.u8 d6, d6, #3\t\n" + "vsri.u8 d4, d4, #5\t\n" + "vsri.u8 d5, d5, #6\t\n" + "vsri.u8 d6, d6, #5\t\n" + + "vmull.u8 q10, d31, d0\n\t" + "vmull.u8 q11, d31, d1\n\t" + "vmull.u8 q12, d31, d2\n\t" + "vmull.u8 q13, d31, d3\n\t" + "vrshr.u16 q8, q10, #8\n\t" + "vrshr.u16 q9, q11, #8\n\t" + "vraddhn.u16 d20, q10, q8\n\t" + "vraddhn.u16 d21, q11, q9\n\t" + "vrshr.u16 q9, q13, #8\n\t" + "vrshr.u16 q8, q12, #8\n\t" + "vraddhn.u16 d23, q13, q9\n\t" + "vraddhn.u16 d22, q12, q8\n\t" + +/* duplicate in 4/2/1 & 8pix vsns */ + "vmvn.8 d30, d23\n\t" + "vmull.u8 q14, d30, d6\n\t" + "vmull.u8 q13, d30, d5\n\t" + "vmull.u8 q12, d30, d4\n\t" + "vrshr.u16 q8, q14, #8\n\t" + "vrshr.u16 q9, q13, #8\n\t" + "vraddhn.u16 d6, q14, q8\n\t" + "vrshr.u16 q8, q12, #8\n\t" + "vraddhn.u16 d5, q13, q9\n\t" + "vqadd.u8 d6, d6, d22\n\t" /* moved up */ + "vraddhn.u16 d4, q12, q8\n\t" +/* intentionally don't calculate alpha */ +/* result in d4-d6 */ + +/* "vqadd.u8 d6, d6, d22\n\t" ** moved up */ + "vqadd.u8 d5, d5, d21\n\t" + "vqadd.u8 d4, d4, d20\n\t" + +/* pack 8888 {d20-d23} to 0565 q10 */ + "vshll.u8 q10, d6, #8\n\t" + "vshll.u8 q3, d5, #8\n\t" + "vshll.u8 q2, d4, #8\n\t" + "vsri.u16 q10, q3, #5\t\n" + "vsri.u16 q10, q2, #11\t\n" + + "tst %[w], #1\n\t" + "beq skip_store1\t\n" + "vst1.16 {d20[1]}, [%[dst]]\t\n" + "skip_store1:\t\n" + "tst %[w], #2\n\t" + "beq skip_store2\t\n" + "vst1.32 {d20[1]}, [%[dst2]]\t\n" + "skip_store2:\t\n" + "tst %[w], #4\n\t" + "beq skip_store4\t\n" + "vst1.16 {d21}, [%[dst4]]\t\n" + "skip_store4:\t\n" + + : [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [dst4] "+r" (dst4), [dst2] "+r" (dst2) + : [src] "r" (src) + : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7", + "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29", + "d30","d31" + ); #endif - } + } } } - - -void -fbCompositeSolidMask_nx8x8888neon ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +static void +neon_composite_over_n_8_8888 (pixman_implementation_t * impl, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t src, srca; - uint32_t *dstLine, *dst; - uint8_t *maskLine, *mask; - int dstStride, maskStride; - uint32_t w; - uint8x8_t sval2; - uint8x8x4_t sval8; - uint8x8_t mask_selector=vreinterpret_u8_u64(vcreate_u64(0x0101010100000000ULL)); - uint8x8_t alpha_selector=vreinterpret_u8_u64(vcreate_u64(0x0707070703030303ULL)); - - fbComposeGetSolid(pSrc, src, pDst->bits.format); - + uint32_t src, srca; + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint32_t w; + uint8x8_t sval2; + uint8x8x4_t sval8; + uint8x8_t mask_selector = vreinterpret_u8_u64 (vcreate_u64 (0x0101010100000000ULL)); + uint8x8_t alpha_selector = vreinterpret_u8_u64 (vcreate_u64 (0x0707070703030303ULL)); + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); + + /* bail out if fully transparent */ srca = src >> 24; if (src == 0) return; - sval2=vreinterpret_u8_u32(vdup_n_u32(src)); - sval8.val[0]=vdup_lane_u8(sval2,0); - sval8.val[1]=vdup_lane_u8(sval2,1); - sval8.val[2]=vdup_lane_u8(sval2,2); - sval8.val[3]=vdup_lane_u8(sval2,3); + sval2 = vreinterpret_u8_u32 (vdup_n_u32 (src)); + sval8.val[0] = vdup_lane_u8 (sval2, 0); + sval8.val[1] = vdup_lane_u8 (sval2, 1); + sval8.val[2] = vdup_lane_u8 (sval2, 2); + sval8.val[3] = vdup_lane_u8 (sval2, 3); - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - if (width>=8) + if (width >= 8) { - // Use overlapping 8-pixel method, modified to avoid rewritten dest being reused - while (height--) - { - uint32_t *keep_dst; + /* Use overlapping 8-pixel method, modified to avoid + * rewritten dest being reused + */ + while (height--) + { + uint32_t *keep_dst = 0; - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; - w = width; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; #ifndef USE_GCC_INLINE_ASM - uint8x8_t alpha; - uint8x8x4_t dval, temp; - - alpha = vld1_u8((void*)mask); - dval = vld4_u8((void*)dst); - keep_dst = dst; - - temp = neon8mul(sval8,alpha); - dval = neon8mul(dval,vmvn_u8(temp.val[3])); - temp = neon8qadd(temp,dval); - - mask += (w & 7); - dst += (w & 7); - w -= (w & 7); - - while (w) - { - alpha = vld1_u8((void*)mask); - dval = vld4_u8((void*)dst); - - vst4_u8((void*)keep_dst,temp); - keep_dst = dst; - - temp = neon8mul(sval8,alpha); - dval = neon8mul(dval,vmvn_u8(temp.val[3])); - temp = neon8qadd(temp,dval); - - mask+=8; - dst+=8; - w-=8; - } - vst4_u8((void*)keep_dst,temp); + uint8x8_t alpha; + uint8x8x4_t dval, temp; + + alpha = vld1_u8 ((void *)mask); + dval = vld4_u8 ((void *)dst); + keep_dst = dst; + + temp = neon8mul (sval8, alpha); + dval = neon8mul (dval, vmvn_u8 (temp.val[3])); + temp = neon8qadd (temp, dval); + + mask += (w & 7); + dst += (w & 7); + w -= (w & 7); + + while (w) + { + alpha = vld1_u8 ((void *)mask); + dval = vld4_u8 ((void *)dst); + + vst4_u8 ((void *)keep_dst, temp); + keep_dst = dst; + + temp = neon8mul (sval8, alpha); + dval = neon8mul (dval, vmvn_u8 (temp.val[3])); + temp = neon8qadd (temp, dval); + + mask += 8; + dst += 8; + w -= 8; + } + vst4_u8 ((void *)keep_dst, temp); #else - asm volatile ( - "vdup.32 d0, %[src]\n\t" - "vdup.8 d1, d0[1]\n\t" - "vdup.8 d2, d0[2]\n\t" - "vdup.8 d3, d0[3]\n\t" - "vdup.8 d0, d0[0]\n\t" - - "vld4.8 {d4-d7}, [%[dst]]\n\t" - "vld1.8 {d31}, [%[mask]]\n\t" - "mov %[keep_dst], %[dst]\n\t" - - "and ip, %[w], #7\n\t" - "add %[mask], %[mask], ip\n\t" - "add %[dst], %[dst], ip, LSL#2\n\t" - "subs %[w], %[w], ip\n\t" - "b 9f\n\t" -// LOOP - "2:\n\t" - "vld4.8 {d4-d7}, [%[dst]]!\n\t" - "vld1.8 {d31}, [%[mask]]!\n\t" - "vst4.8 {d20-d23}, [%[keep_dst]]\n\t" - "sub %[keep_dst], %[dst], #8*4\n\t" - "subs %[w], %[w], #8\n\t" - "9:\n\t" - - "vmull.u8 q10, d31, d0\n\t" - "vmull.u8 q11, d31, d1\n\t" - "vmull.u8 q12, d31, d2\n\t" - "vmull.u8 q13, d31, d3\n\t" - "vrshr.u16 q8, q10, #8\n\t" - "vrshr.u16 q9, q11, #8\n\t" - "vraddhn.u16 d20, q10, q8\n\t" - "vraddhn.u16 d21, q11, q9\n\t" - "vrshr.u16 q9, q13, #8\n\t" - "vrshr.u16 q8, q12, #8\n\t" - "vraddhn.u16 d23, q13, q9\n\t" - "vraddhn.u16 d22, q12, q8\n\t" - - "vmvn.8 d30, d23\n\t" - "vmull.u8 q12, d30, d4\n\t" - "vmull.u8 q13, d30, d5\n\t" - "vmull.u8 q14, d30, d6\n\t" - "vmull.u8 q15, d30, d7\n\t" - - "vrshr.u16 q8, q12, #8\n\t" - "vrshr.u16 q9, q13, #8\n\t" - "vraddhn.u16 d4, q12, q8\n\t" - "vrshr.u16 q8, q14, #8\n\t" - "vraddhn.u16 d5, q13, q9\n\t" - "vrshr.u16 q9, q15, #8\n\t" - "vraddhn.u16 d6, q14, q8\n\t" - "vraddhn.u16 d7, q15, q9\n\t" -// result in d4-d7 - - "vqadd.u8 d20, d4, d20\n\t" - "vqadd.u8 d21, d5, d21\n\t" - "vqadd.u8 d22, d6, d22\n\t" - "vqadd.u8 d23, d7, d23\n\t" - - "bne 2b\n\t" - - "1:\n\t" - "vst4.8 {d20-d23}, [%[keep_dst]]\n\t" - - : [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [keep_dst] "=r" (keep_dst) - : [src] "r" (src) - : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7", - "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29", - "d30","d31" - ); + asm volatile ( + "vdup.32 d0, %[src]\n\t" + "vdup.8 d1, d0[1]\n\t" + "vdup.8 d2, d0[2]\n\t" + "vdup.8 d3, d0[3]\n\t" + "vdup.8 d0, d0[0]\n\t" + + "vld4.8 {d4-d7}, [%[dst]]\n\t" + "vld1.8 {d31}, [%[mask]]\n\t" + "mov %[keep_dst], %[dst]\n\t" + + "and ip, %[w], #7\n\t" + "add %[mask], %[mask], ip\n\t" + "add %[dst], %[dst], ip, LSL#2\n\t" + "subs %[w], %[w], ip\n\t" + "b 9f\n\t" +/* LOOP */ + "2:\n\t" + "vld4.8 {d4-d7}, [%[dst]]!\n\t" + "vld1.8 {d31}, [%[mask]]!\n\t" + "vst4.8 {d20-d23}, [%[keep_dst]]\n\t" + "sub %[keep_dst], %[dst], #8*4\n\t" + "subs %[w], %[w], #8\n\t" + "9:\n\t" + + "vmull.u8 q10, d31, d0\n\t" + "vmull.u8 q11, d31, d1\n\t" + "vmull.u8 q12, d31, d2\n\t" + "vmull.u8 q13, d31, d3\n\t" + "vrshr.u16 q8, q10, #8\n\t" + "vrshr.u16 q9, q11, #8\n\t" + "vraddhn.u16 d20, q10, q8\n\t" + "vraddhn.u16 d21, q11, q9\n\t" + "vrshr.u16 q9, q13, #8\n\t" + "vrshr.u16 q8, q12, #8\n\t" + "vraddhn.u16 d23, q13, q9\n\t" + "vraddhn.u16 d22, q12, q8\n\t" + + "vmvn.8 d30, d23\n\t" + "vmull.u8 q12, d30, d4\n\t" + "vmull.u8 q13, d30, d5\n\t" + "vmull.u8 q14, d30, d6\n\t" + "vmull.u8 q15, d30, d7\n\t" + + "vrshr.u16 q8, q12, #8\n\t" + "vrshr.u16 q9, q13, #8\n\t" + "vraddhn.u16 d4, q12, q8\n\t" + "vrshr.u16 q8, q14, #8\n\t" + "vraddhn.u16 d5, q13, q9\n\t" + "vrshr.u16 q9, q15, #8\n\t" + "vraddhn.u16 d6, q14, q8\n\t" + "vraddhn.u16 d7, q15, q9\n\t" +/* result in d4-d7 */ + + "vqadd.u8 d20, d4, d20\n\t" + "vqadd.u8 d21, d5, d21\n\t" + "vqadd.u8 d22, d6, d22\n\t" + "vqadd.u8 d23, d7, d23\n\t" + + "bne 2b\n\t" + + "1:\n\t" + "vst4.8 {d20-d23}, [%[keep_dst]]\n\t" + + : [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [keep_dst] "=r" (keep_dst) + : [src] "r" (src) + : "ip", "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", + "d30", "d31" + ); #endif - } + } + } + else + { + while (height--) + { + uint8x8_t alpha; + + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w >= 2) + { + uint8x8_t dval, temp, res; + + alpha = vtbl1_u8 ( + vreinterpret_u8_u16 (vld1_dup_u16 ((void *)mask)), mask_selector); + dval = vld1_u8 ((void *)dst); + + temp = neon2mul (sval2, alpha); + res = vqadd_u8 ( + temp, neon2mul (dval, vtbl1_u8 (vmvn_u8 (temp), alpha_selector))); + + vst1_u8 ((void *)dst, res); + + mask += 2; + dst += 2; + w -= 2; + } + + if (w) + { + uint8x8_t dval, temp, res; + + alpha = vtbl1_u8 (vld1_dup_u8 ((void *)mask), mask_selector); + dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)dst)); + + temp = neon2mul (sval2, alpha); + res = vqadd_u8 ( + temp, neon2mul (dval, vtbl1_u8 (vmvn_u8 (temp), alpha_selector))); + + vst1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (res), 0); + } + } + } +} + +static void +neon_composite_add_8888_8_8 (pixman_implementation_t * impl, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint32_t w; + uint32_t src; + uint8x8_t sa; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + src = _pixman_image_get_solid (src_image, dst_image->bits.format); + sa = vdup_n_u8 ((src) >> 24); + + if (width >= 8) + { + /* Use overlapping 8-pixel method, modified to avoid rewritten dest being reused */ + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + uint8x8_t mval, dval, res; + uint8_t *keep_dst; + + mval = vld1_u8 ((void *)mask); + dval = vld1_u8 ((void *)dst); + keep_dst = dst; + + res = vqadd_u8 (neon2mul (mval, sa), dval); + + mask += (w & 7); + dst += (w & 7); + w -= w & 7; + + while (w) + { + mval = vld1_u8 ((void *)mask); + dval = vld1_u8 ((void *)dst); + vst1_u8 ((void *)keep_dst, res); + keep_dst = dst; + + res = vqadd_u8 (neon2mul (mval, sa), dval); + + mask += 8; + dst += 8; + w -= 8; + } + vst1_u8 ((void *)keep_dst, res); + } } else { - while (height--) - { - uint8x8_t alpha; + /* Use 4/2/1 load/store method to handle 1-7 pixels */ + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; - w = width; + uint8x8_t mval = sa, dval = sa, res; + uint8_t *dst4 = 0, *dst2 = 0; - while (w>=2) - { - uint8x8_t dval, temp, res; + if (w & 4) + { + mval = vreinterpret_u8_u32 ( + vld1_lane_u32 ((void *)mask, vreinterpret_u32_u8 (mval), 1)); + dval = vreinterpret_u8_u32 ( + vld1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (dval), 1)); + + dst4 = dst; + mask += 4; + dst += 4; + } - alpha = vtbl1_u8(vreinterpret_u8_u16(vld1_dup_u16((void*)mask)), mask_selector); - dval = vld1_u8((void*)dst); + if (w & 2) + { + mval = vreinterpret_u8_u16 ( + vld1_lane_u16 ((void *)mask, vreinterpret_u16_u8 (mval), 1)); + dval = vreinterpret_u8_u16 ( + vld1_lane_u16 ((void *)dst, vreinterpret_u16_u8 (dval), 1)); + dst2 = dst; + mask += 2; + dst += 2; + } - temp = neon2mul(sval2,alpha); - res = vqadd_u8(temp,neon2mul(dval,vtbl1_u8(vmvn_u8(temp), alpha_selector))); + if (w & 1) + { + mval = vld1_lane_u8 (mask, mval, 1); + dval = vld1_lane_u8 (dst, dval, 1); + } - vst1_u8((void*)dst,res); + res = vqadd_u8 (neon2mul (mval, sa), dval); - mask+=2; - dst+=2; - w-=2; - } - if (w) - { - uint8x8_t dval, temp, res; + if (w & 1) + vst1_lane_u8 (dst, res, 1); + if (w & 2) + vst1_lane_u16 ((void *)dst2, vreinterpret_u16_u8 (res), 1); + if (w & 4) + vst1_lane_u32 ((void *)dst4, vreinterpret_u32_u8 (res), 1); + } + } +} - alpha = vtbl1_u8(vld1_dup_u8((void*)mask), mask_selector); - dval = vreinterpret_u8_u32(vld1_dup_u32((void*)dst)); +#ifdef USE_GCC_INLINE_ASM - temp = neon2mul(sval2,alpha); - res = vqadd_u8(temp,neon2mul(dval,vtbl1_u8(vmvn_u8(temp), alpha_selector))); +static void +neon_composite_src_16_16 (pixman_implementation_t * impl, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint16_t *dst_line, *src_line; + uint32_t dst_stride, src_stride; - vst1_lane_u32((void*)dst,vreinterpret_u32_u8(res),0); - } - } + if (!height || !width) + return; + + /* We simply copy 16-bit-aligned pixels from one place to another. */ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + /* Preload the first input scanline */ + { + uint16_t *src_ptr = src_line; + uint32_t count = width; + + asm volatile ( + "0: @ loop \n" + " subs %[count], %[count], #32 \n" + " pld [%[src]] \n" + " add %[src], %[src], #64 \n" + " bgt 0b \n" + + /* Clobbered input registers marked as input/outputs */ + : [src] "+r" (src_ptr), [count] "+r" (count) + : /* no unclobbered inputs */ + : "cc" + ); + } + + while (height--) + { + uint16_t *dst_ptr = dst_line; + uint16_t *src_ptr = src_line; + uint32_t count = width; + uint32_t tmp = 0; + + /* Uses multi-register access and preloading to maximise bandwidth. + * Each pixel is one halfword, so a quadword contains 8px. + * Preload frequency assumed a 64-byte cacheline. + */ + asm volatile ( + " cmp %[count], #64 \n" + " blt 1f @ skip oversized fragments \n" + "0: @ start with eight quadwords at a time \n" + /* preload from next scanline */ + " pld [%[src], %[src_stride], LSL #1] \n" + " sub %[count], %[count], #64 \n" + " vld1.16 {d16, d17, d18, d19}, [%[src]]! \n" + " vld1.16 {d20, d21, d22, d23}, [%[src]]! \n" + /* preload from next scanline */ + " pld [%[src], %[src_stride], LSL #1] \n" + " vld1.16 {d24, d25, d26, d27}, [%[src]]! \n" + " vld1.16 {d28, d29, d30, d31}, [%[src]]! \n" + " cmp %[count], #64 \n" + " vst1.16 {d16, d17, d18, d19}, [%[dst]]! \n" + " vst1.16 {d20, d21, d22, d23}, [%[dst]]! \n" + " vst1.16 {d24, d25, d26, d27}, [%[dst]]! \n" + " vst1.16 {d28, d29, d30, d31}, [%[dst]]! \n" + " bge 0b \n" + " cmp %[count], #0 \n" + " beq 7f @ aligned fastpath \n" + "1: @ four quadwords \n" + " tst %[count], #32 \n" + " beq 2f @ skip oversized fragment \n" + /* preload from next scanline */ + " pld [%[src], %[src_stride], LSL #1] \n" + " vld1.16 {d16, d17, d18, d19}, [%[src]]! \n" + " vld1.16 {d20, d21, d22, d23}, [%[src]]! \n" + " vst1.16 {d16, d17, d18, d19}, [%[dst]]! \n" + " vst1.16 {d20, d21, d22, d23}, [%[dst]]! \n" + "2: @ two quadwords \n" + " tst %[count], #16 \n" + " beq 3f @ skip oversized fragment \n" + /* preload from next scanline */ + " pld [%[src], %[src_stride], LSL #1] \n" + " vld1.16 {d16, d17, d18, d19}, [%[src]]! \n" + " vst1.16 {d16, d17, d18, d19}, [%[dst]]! \n" + "3: @ one quadword \n" + " tst %[count], #8 \n" + " beq 4f @ skip oversized fragment \n" + " vld1.16 {d16, d17}, [%[src]]! \n" + " vst1.16 {d16, d17}, [%[dst]]! \n" + "4: @ one doubleword \n" + " tst %[count], #4 \n" + " beq 5f @ skip oversized fragment \n" + " vld1.16 {d16}, [%[src]]! \n" + " vst1.16 {d16}, [%[dst]]! \n" + "5: @ one word \n" + " tst %[count], #2 \n" + " beq 6f @ skip oversized fragment \n" + " ldr %[tmp], [%[src]], #4 \n" + " str %[tmp], [%[dst]], #4 \n" + "6: @ one halfword \n" + " tst %[count], #1 \n" + " beq 7f @ skip oversized fragment \n" + " ldrh %[tmp], [%[src]] \n" + " strh %[tmp], [%[dst]] \n" + "7: @ end \n" + + /* Clobbered input registers marked as input/outputs */ + : [dst] "+r" (dst_ptr), [src] "+r" (src_ptr), + [count] "+r" (count), [tmp] "+r" (tmp) + + /* Unclobbered input */ + : [src_stride] "r" (src_stride) + + /* Clobbered vector registers */ + : "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", "cc", "memory" + ); + + src_line += src_stride; + dst_line += dst_stride; } } +#endif /* USE_GCC_INLINE_ASM */ -void -fbCompositeSrcAdd_8888x8x8neon ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +static void +neon_composite_src_24_16 (pixman_implementation_t * impl, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint8_t *dstLine, *dst; - uint8_t *maskLine, *mask; - int dstStride, maskStride; - uint32_t w; - uint32_t src; - uint8x8_t sa; - - fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); - fbComposeGetSolid (pSrc, src, pDst->bits.format); - sa = vdup_n_u8((src) >> 24); + uint16_t *dst_line; + uint32_t *src_line; + uint32_t dst_stride, src_stride; - if (width>=8) + if (!width || !height) + return; + + /* We simply copy pixels from one place to another, + * assuming that the source's alpha is opaque. + */ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + /* Preload the first input scanline */ { - // Use overlapping 8-pixel method, modified to avoid rewritten dest being reused - while (height--) - { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; - w = width; - - uint8x8_t mval, dval, res; - uint8_t *keep_dst; - - mval = vld1_u8((void *)mask); - dval = vld1_u8((void *)dst); - keep_dst = dst; - - res = vqadd_u8(neon2mul(mval,sa),dval); - - mask += (w & 7); - dst += (w & 7); - w -= w & 7; - - while (w) - { - mval = vld1_u8((void *)mask); - dval = vld1_u8((void *)dst); - vst1_u8((void *)keep_dst, res); - keep_dst = dst; - - res = vqadd_u8(neon2mul(mval,sa),dval); - - mask += 8; - dst += 8; - w -= 8; - } - vst1_u8((void *)keep_dst, res); - } + uint8_t *src_ptr = (uint8_t*) src_line; + uint32_t count = (width + 15) / 16; + +#ifdef USE_GCC_INLINE_ASM + asm volatile ( + "0: @ loop \n" + " subs %[count], %[count], #1 \n" + " pld [%[src]] \n" + " add %[src], %[src], #64 \n" + " bgt 0b \n" + + /* Clobbered input registers marked as input/outputs */ + : [src] "+r" (src_ptr), [count] "+r" (count) + : /* no unclobbered inputs */ + : "cc" + ); +#else + do + { + __pld (src_ptr); + src_ptr += 64; + } + while (--count); +#endif + } + + while (height--) + { + uint16_t *dst_ptr = dst_line; + uint32_t *src_ptr = src_line; + uint32_t count = width; + const uint32_t rb_mask = 0x1F; + const uint32_t g_mask = 0x3F; + + /* If you're going to complain about a goto, take a long hard look + * at the massive blocks of assembler this skips over. ;-) + */ + if (count < 8) + goto small_stuff; + +#ifdef USE_GCC_INLINE_ASM + + /* This is not as aggressive as the RGB565-source case. + * Generally the source is in cached RAM when the formats are + * different, so we use preload. + * + * We don't need to blend, so we are not reading from the + * uncached framebuffer. + */ + asm volatile ( + " cmp %[count], #16 \n" + " blt 1f @ skip oversized fragments \n" + "0: @ start with sixteen pixels at a time \n" + " sub %[count], %[count], #16 \n" + " pld [%[src], %[src_stride], lsl #2] @ preload from next scanline \n" + " vld4.8 {d0, d1, d2, d3}, [%[src]]! @ d3 is alpha and ignored, d2-0 are rgb. \n" + " vld4.8 {d4, d5, d6, d7}, [%[src]]! @ d7 is alpha and ignored, d6-4 are rgb. \n" + " vshll.u8 q8, d2, #8 @ expand first red for repacking \n" + " vshll.u8 q10, d1, #8 @ expand first green for repacking \n" + " vshll.u8 q11, d0, #8 @ expand first blue for repacking \n" + " vshll.u8 q9, d6, #8 @ expand second red for repacking \n" + " vsri.u16 q8, q10, #5 @ insert first green after red \n" + " vshll.u8 q10, d5, #8 @ expand second green for repacking \n" + " vsri.u16 q8, q11, #11 @ insert first blue after green \n" + " vshll.u8 q11, d4, #8 @ expand second blue for repacking \n" + " vsri.u16 q9, q10, #5 @ insert second green after red \n" + " vsri.u16 q9, q11, #11 @ insert second blue after green \n" + " cmp %[count], #16 \n" + " vst1.16 {d16, d17, d18, d19}, [%[dst]]! @ store 16 pixels \n" + " bge 0b \n" + "1: @ end of main loop \n" + " cmp %[count], #8 @ can we still do an 8-pixel block? \n" + " blt 2f \n" + " sub %[count], %[count], #8 \n" + " pld [%[src], %[src_stride], lsl #2] @ preload from next scanline \n" + " vld4.8 {d0, d1, d2, d3}, [%[src]]! @ d3 is alpha and ignored, d2-0 are rgb. \n" + " vshll.u8 q8, d2, #8 @ expand first red for repacking \n" + " vshll.u8 q10, d1, #8 @ expand first green for repacking \n" + " vshll.u8 q11, d0, #8 @ expand first blue for repacking \n" + " vsri.u16 q8, q10, #5 @ insert first green after red \n" + " vsri.u16 q8, q11, #11 @ insert first blue after green \n" + " vst1.16 {d16, d17}, [%[dst]]! @ store 8 pixels \n" + "2: @ end \n" + + /* Clobbered input and working registers marked as input/outputs */ + : [dst] "+r" (dst_ptr), [src] "+r" (src_ptr), [count] "+r" (count) + + /* Unclobbered input */ + : [src_stride] "r" (src_stride) + + /* Clobbered vector registers */ + + /* NB: these are the quad aliases of the + * double registers used in the asm + */ + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16", "d17", + "d18", "d19", "d20", "d21", "d22", "d23", "cc", "memory" + ); +#else + /* A copy of the above code, in intrinsics-form. */ + while (count >= 16) + { + uint8x8x4_t pixel_set_a, pixel_set_b; + uint16x8_t red_a, green_a, blue_a; + uint16x8_t red_b, green_b, blue_b; + uint16x8_t dest_pixels_a, dest_pixels_b; + + count -= 16; + __pld (src_ptr + src_stride); + pixel_set_a = vld4_u8 ((uint8_t*)(src_ptr)); + pixel_set_b = vld4_u8 ((uint8_t*)(src_ptr + 8)); + src_ptr += 16; + + red_a = vshll_n_u8 (pixel_set_a.val[2], 8); + green_a = vshll_n_u8 (pixel_set_a.val[1], 8); + blue_a = vshll_n_u8 (pixel_set_a.val[0], 8); + + red_b = vshll_n_u8 (pixel_set_b.val[2], 8); + green_b = vshll_n_u8 (pixel_set_b.val[1], 8); + blue_b = vshll_n_u8 (pixel_set_b.val[0], 8); + + dest_pixels_a = vsriq_n_u16 (red_a, green_a, 5); + dest_pixels_b = vsriq_n_u16 (red_b, green_b, 5); + + dest_pixels_a = vsriq_n_u16 (dest_pixels_a, blue_a, 11); + dest_pixels_b = vsriq_n_u16 (dest_pixels_b, blue_b, 11); + + /* There doesn't seem to be an intrinsic for the + * double-quadword variant + */ + vst1q_u16 (dst_ptr, dest_pixels_a); + vst1q_u16 (dst_ptr + 8, dest_pixels_b); + dst_ptr += 16; + } + + /* 8-pixel loop */ + if (count >= 8) + { + uint8x8x4_t pixel_set_a; + uint16x8_t red_a, green_a, blue_a; + uint16x8_t dest_pixels_a; + + __pld (src_ptr + src_stride); + count -= 8; + pixel_set_a = vld4_u8 ((uint8_t*)(src_ptr)); + src_ptr += 8; + + red_a = vshll_n_u8 (pixel_set_a.val[2], 8); + green_a = vshll_n_u8 (pixel_set_a.val[1], 8); + blue_a = vshll_n_u8 (pixel_set_a.val[0], 8); + + dest_pixels_a = vsriq_n_u16 (red_a, green_a, 5); + dest_pixels_a = vsriq_n_u16 (dest_pixels_a, blue_a, 11); + + vst1q_u16 (dst_ptr, dest_pixels_a); + dst_ptr += 8; + } + +#endif /* USE_GCC_INLINE_ASM */ + + small_stuff: + if (count) + __pld (src_ptr + src_stride); + + while (count >= 2) + { + uint32_t src_pixel_a = *src_ptr++; + uint32_t src_pixel_b = *src_ptr++; + + /* ARM is really good at shift-then-ALU ops. */ + /* This should be a total of six shift-ANDs and five shift-ORs. */ + uint32_t dst_pixels_a; + uint32_t dst_pixels_b; + + dst_pixels_a = ((src_pixel_a >> 3) & rb_mask); + dst_pixels_a |= ((src_pixel_a >> 10) & g_mask) << 5; + dst_pixels_a |= ((src_pixel_a >> 19) & rb_mask) << 11; + + dst_pixels_b = ((src_pixel_b >> 3) & rb_mask); + dst_pixels_b |= ((src_pixel_b >> 10) & g_mask) << 5; + dst_pixels_b |= ((src_pixel_b >> 19) & rb_mask) << 11; + + /* little-endian mode only */ + *((uint32_t*) dst_ptr) = dst_pixels_a | (dst_pixels_b << 16); + dst_ptr += 2; + count -= 2; + } + + if (count) + { + uint32_t src_pixel = *src_ptr++; + + /* ARM is really good at shift-then-ALU ops. + * This block should end up as three shift-ANDs + * and two shift-ORs. + */ + uint32_t tmp_blue = (src_pixel >> 3) & rb_mask; + uint32_t tmp_green = (src_pixel >> 10) & g_mask; + uint32_t tmp_red = (src_pixel >> 19) & rb_mask; + uint16_t dst_pixel = (tmp_red << 11) | (tmp_green << 5) | tmp_blue; + + *dst_ptr++ = dst_pixel; + count--; + } + + src_line += src_stride; + dst_line += dst_stride; + } +} + +static pixman_bool_t +pixman_fill_neon (uint32_t *bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t _xor) +{ + uint32_t byte_stride, color; + char *dst; + + /* stride is always multiple of 32bit units in pixman */ + byte_stride = stride * sizeof(uint32_t); + + switch (bpp) + { + case 8: + dst = ((char *) bits) + y * byte_stride + x; + _xor &= 0xff; + color = _xor << 24 | _xor << 16 | _xor << 8 | _xor; + break; + + case 16: + dst = ((char *) bits) + y * byte_stride + x * 2; + _xor &= 0xffff; + color = _xor << 16 | _xor; + width *= 2; /* width to bytes */ + break; + + case 32: + dst = ((char *) bits) + y * byte_stride + x * 4; + color = _xor; + width *= 4; /* width to bytes */ + break; + + default: + return FALSE; + } + +#ifdef USE_GCC_INLINE_ASM + if (width < 16) + { + /* We have a special case for such small widths that don't allow + * us to use wide 128-bit stores anyway. We don't waste time + * trying to align writes, since there are only very few of them anyway + */ + asm volatile ( + "cmp %[height], #0\n"/* Check if empty fill */ + "beq 3f\n" + "vdup.32 d0, %[color]\n"/* Fill the color to neon req */ + + /* Check if we have a such width that can easily be handled by single + * operation for each scanline. This significantly reduces the number + * of test/branch instructions for each scanline + */ + "cmp %[width], #8\n" + "beq 4f\n" + "cmp %[width], #4\n" + "beq 5f\n" + "cmp %[width], #2\n" + "beq 6f\n" + + /* Loop starts here for each scanline */ + "1:\n" + "mov r4, %[dst]\n" /* Starting address of the current line */ + "tst %[width], #8\n" + "beq 2f\n" + "vst1.8 {d0}, [r4]!\n" + "2:\n" + "tst %[width], #4\n" + "beq 2f\n" + "str %[color], [r4], #4\n" + "2:\n" + "tst %[width], #2\n" + "beq 2f\n" + "strh %[color], [r4], #2\n" + "2:\n" + "tst %[width], #1\n" + "beq 2f\n" + "strb %[color], [r4], #1\n" + "2:\n" + + "subs %[height], %[height], #1\n" + "add %[dst], %[dst], %[byte_stride]\n" + "bne 1b\n" + "b 3f\n" + + /* Special fillers for those widths that we can do with single operation */ + "4:\n" + "subs %[height], %[height], #1\n" + "vst1.8 {d0}, [%[dst]]\n" + "add %[dst], %[dst], %[byte_stride]\n" + "bne 4b\n" + "b 3f\n" + + "5:\n" + "subs %[height], %[height], #1\n" + "str %[color], [%[dst]]\n" + "add %[dst], %[dst], %[byte_stride]\n" + "bne 5b\n" + "b 3f\n" + + "6:\n" + "subs %[height], %[height], #1\n" + "strh %[color], [%[dst]]\n" + "add %[dst], %[dst], %[byte_stride]\n" + "bne 6b\n" + + "3:\n" + : [height] "+r" (height), [dst] "+r" (dst) + : [color] "r" (color), [width] "r" (width), + [byte_stride] "r" (byte_stride) + : "memory", "cc", "d0", "r4"); } else { - // Use 4/2/1 load/store method to handle 1-7 pixels - while (height--) - { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; - w = width; - - uint8x8_t mval, dval, res; - uint8_t *dst4, *dst2; - - if (w&4) - { - mval = vreinterpret_u8_u32(vld1_lane_u32((void *)mask, vreinterpret_u32_u8(mval), 1)); - dval = vreinterpret_u8_u32(vld1_lane_u32((void *)dst, vreinterpret_u32_u8(dval), 1)); - - dst4 = dst; - mask += 4; - dst += 4; - } - if (w&2) - { - mval = vreinterpret_u8_u16(vld1_lane_u16((void *)mask, vreinterpret_u16_u8(mval), 1)); - dval = vreinterpret_u8_u16(vld1_lane_u16((void *)dst, vreinterpret_u16_u8(dval), 1)); - dst2 = dst; - mask += 2; - dst += 2; - } - if (w&1) - { - mval = vld1_lane_u8(mask, mval, 1); - dval = vld1_lane_u8(dst, dval, 1); - } - - res = vqadd_u8(neon2mul(mval,sa),dval); - - if (w&1) - vst1_lane_u8(dst, res, 1); - if (w&2) - vst1_lane_u16((void *)dst2, vreinterpret_u16_u8(res), 1); - if (w&4) - vst1_lane_u32((void *)dst4, vreinterpret_u32_u8(res), 1); - } + asm volatile ( + "cmp %[height], #0\n"/* Check if empty fill */ + "beq 5f\n" + "vdup.32 q0, %[color]\n"/* Fill the color to neon req */ + + /* Loop starts here for each scanline */ + "1:\n" + "mov r4, %[dst]\n"/* Starting address of the current line */ + "mov r5, %[width]\n"/* We're going to write this many bytes */ + "ands r6, r4, #15\n"/* Are we at the 128-bit aligned address? */ + "beq 2f\n"/* Jump to the best case */ + + /* We're not 128-bit aligned: However, we know that we can get to the + next aligned location, since the fill is at least 16 bytes wide */ + "rsb r6, r6, #16\n" /* We would need to go forward this much */ + "sub r5, r5, r6\n"/* Update bytes left */ + "tst r6, #1\n" + "beq 6f\n" + "vst1.8 {d0[0]}, [r4]!\n"/* Store byte, now we are word aligned */ + "6:\n" + "tst r6, #2\n" + "beq 6f\n" + "vst1.16 {d0[0]}, [r4, :16]!\n"/* Store half word, now we are 16-bit aligned */ + "6:\n" + "tst r6, #4\n" + "beq 6f\n" + "vst1.32 {d0[0]}, [r4, :32]!\n"/* Store word, now we're 32-bit aligned */ + "6:\n" + "tst r6, #8\n" + "beq 2f\n" + "vst1.64 {d0}, [r4, :64]!\n"/* Store qword now we're 64-bit aligned */ + + /* The good case: We're 128-bit aligned for this scanline */ + "2:\n" + "and r6, r5, #15\n"/* Number of tailing bytes */ + "cmp r5, r6\n"/* Do we have at least one qword to write? */ + "beq 6f\n"/* No, we just write the tail */ + "lsr r5, r5, #4\n"/* This many full qwords to write */ + + /* The main block: Do 128-bit aligned writes */ + "3:\n" + "subs r5, r5, #1\n" + "vst1.64 {d0, d1}, [r4, :128]!\n" + "bne 3b\n" + + /* Handle the tailing bytes: Do 64, 32, 16 and 8-bit aligned writes as needed. + We know that we're currently at 128-bit aligned address, so we can just + pick the biggest operations that the remaining write width allows */ + "6:\n" + "cmp r6, #0\n" + "beq 4f\n" + "tst r6, #8\n" + "beq 6f\n" + "vst1.64 {d0}, [r4, :64]!\n" + "6:\n" + "tst r6, #4\n" + "beq 6f\n" + "vst1.32 {d0[0]}, [r4, :32]!\n" + "6:\n" + "tst r6, #2\n" + "beq 6f\n" + "vst1.16 {d0[0]}, [r4, :16]!\n" + "6:\n" + "tst r6, #1\n" + "beq 4f\n" + "vst1.8 {d0[0]}, [r4]!\n" + "4:\n" + + /* Handle the next scanline */ + "subs %[height], %[height], #1\n" + "add %[dst], %[dst], %[byte_stride]\n" + "bne 1b\n" + "5:\n" + : [height] "+r" (height), [dst] "+r" (dst) + : [color] "r" (color), [width] "r" (width), + [byte_stride] "r" (byte_stride) + : "memory", "cc", "d0", "d1", "r4", "r5", "r6"); } + return TRUE; + +#else + + /* TODO: intrinsic version for armcc */ + return FALSE; + +#endif } -static const FastPathInfo arm_neon_fast_path_array[] = +/* TODO: is there a more generic way of doing this being introduced? */ +#define NEON_SCANLINE_BUFFER_PIXELS (1024) + +static inline void +neon_quadword_copy (void * dst, + void * src, + uint32_t count, /* of quadwords */ + uint32_t trailer_count /* of bytes */) { - { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fbCompositeSrcAdd_8888x8x8neon, 0 }, - { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcAdd_8000x8000neon, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_x888x0565neon, 0 }, - { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_x888x0565neon, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_x888x0565neon, 0 }, - { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_x888x0565neon, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888neon, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888neon, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888neon, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888neon, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888neon, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888neon, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, fbCompositeSolidMask_nx8x0565neon, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, fbCompositeSolidMask_nx8x0565neon, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888neon, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888neon, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888neon, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888neon, 0 }, - { PIXMAN_OP_NONE }, -}; + uint8_t *t_dst = dst, *t_src = src; + + /* Uses aligned multi-register loads to maximise read bandwidth + * on uncached memory such as framebuffers + * The accesses do not have the aligned qualifiers, so that the copy + * may convert between aligned-uncached and unaligned-cached memory. + * It is assumed that the CPU can infer alignedness from the address. + */ + +#ifdef USE_GCC_INLINE_ASM + + asm volatile ( + " cmp %[count], #8 \n" + " blt 1f @ skip oversized fragments \n" + "0: @ start with eight quadwords at a time \n" + " sub %[count], %[count], #8 \n" + " vld1.8 {d16, d17, d18, d19}, [%[src]]! \n" + " vld1.8 {d20, d21, d22, d23}, [%[src]]! \n" + " vld1.8 {d24, d25, d26, d27}, [%[src]]! \n" + " vld1.8 {d28, d29, d30, d31}, [%[src]]! \n" + " cmp %[count], #8 \n" + " vst1.8 {d16, d17, d18, d19}, [%[dst]]! \n" + " vst1.8 {d20, d21, d22, d23}, [%[dst]]! \n" + " vst1.8 {d24, d25, d26, d27}, [%[dst]]! \n" + " vst1.8 {d28, d29, d30, d31}, [%[dst]]! \n" + " bge 0b \n" + "1: @ four quadwords \n" + " tst %[count], #4 \n" + " beq 2f @ skip oversized fragment \n" + " vld1.8 {d16, d17, d18, d19}, [%[src]]! \n" + " vld1.8 {d20, d21, d22, d23}, [%[src]]! \n" + " vst1.8 {d16, d17, d18, d19}, [%[dst]]! \n" + " vst1.8 {d20, d21, d22, d23}, [%[dst]]! \n" + "2: @ two quadwords \n" + " tst %[count], #2 \n" + " beq 3f @ skip oversized fragment \n" + " vld1.8 {d16, d17, d18, d19}, [%[src]]! \n" + " vst1.8 {d16, d17, d18, d19}, [%[dst]]! \n" + "3: @ one quadword \n" + " tst %[count], #1 \n" + " beq 4f @ skip oversized fragment \n" + " vld1.8 {d16, d17}, [%[src]]! \n" + " vst1.8 {d16, d17}, [%[dst]]! \n" + "4: @ end \n" + + /* Clobbered input registers marked as input/outputs */ + : [dst] "+r" (t_dst), [src] "+r" (t_src), [count] "+r" (count) + + /* No unclobbered inputs */ + : + + /* Clobbered vector registers */ + : "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", + "d26", "d27", "d28", "d29", "d30", "d31", "cc", "memory"); + +#else + + while (count >= 8) + { + uint8x16x4_t t1 = vld4q_u8 (t_src); + uint8x16x4_t t2 = vld4q_u8 (t_src + sizeof(uint8x16x4_t)); + + t_src += sizeof(uint8x16x4_t) * 2; + vst4q_u8 (t_dst, t1); + vst4q_u8 (t_dst + sizeof(uint8x16x4_t), t2); + t_dst += sizeof(uint8x16x4_t) * 2; + count -= 8; + } + + if (count & 4) + { + uint8x16x4_t t1 = vld4q_u8 (t_src); + + t_src += sizeof(uint8x16x4_t); + vst4q_u8 (t_dst, t1); + t_dst += sizeof(uint8x16x4_t); + } + + if (count & 2) + { + uint8x8x4_t t1 = vld4_u8 (t_src); + + t_src += sizeof(uint8x8x4_t); + vst4_u8 (t_dst, t1); + t_dst += sizeof(uint8x8x4_t); + } + + if (count & 1) + { + uint8x16_t t1 = vld1q_u8 (t_src); + + t_src += sizeof(uint8x16_t); + vst1q_u8 (t_dst, t1); + t_dst += sizeof(uint8x16_t); + } + +#endif /* !USE_GCC_INLINE_ASM */ + + if (trailer_count) + { + if (trailer_count & 8) + { + uint8x8_t t1 = vld1_u8 (t_src); + + t_src += sizeof(uint8x8_t); + vst1_u8 (t_dst, t1); + t_dst += sizeof(uint8x8_t); + } + + if (trailer_count & 4) + { + *((uint32_t*) t_dst) = *((uint32_t*) t_src); + + t_dst += 4; + t_src += 4; + } + + if (trailer_count & 2) + { + *((uint16_t*) t_dst) = *((uint16_t*) t_src); + + t_dst += 2; + t_src += 2; + } + + if (trailer_count & 1) + { + *t_dst++ = *t_src++; + } + } +} + +static inline void +solid_over_565_8_pix_neon (uint32_t glyph_colour, + uint16_t *dest, + uint8_t * in_mask, + uint32_t dest_stride, /* bytes, not elements */ + uint32_t mask_stride, + uint32_t count /* 8-pixel groups */) +{ + /* Inner loop of glyph blitter (solid colour, alpha mask) */ + +#ifdef USE_GCC_INLINE_ASM + + asm volatile ( + " vld4.8 {d20[], d21[], d22[], d23[]}, [%[glyph_colour]] @ splat solid colour components \n" + "0: @ loop \n" + " vld1.16 {d0, d1}, [%[dest]] @ load first pixels from framebuffer \n" + " vld1.8 {d17}, [%[in_mask]] @ load alpha mask of glyph \n" + " vmull.u8 q9, d17, d23 @ apply glyph colour alpha to mask \n" + " vshrn.u16 d17, q9, #8 @ reformat it to match original mask \n" + " vmvn d18, d17 @ we need the inverse mask for the background \n" + " vsli.u16 q3, q0, #5 @ duplicate framebuffer blue bits \n" + " vshrn.u16 d2, q0, #8 @ unpack red from framebuffer pixels \n" + " vshrn.u16 d4, q0, #3 @ unpack green \n" + " vsri.u8 d2, d2, #5 @ duplicate red bits (extend 5 to 8) \n" + " vshrn.u16 d6, q3, #2 @ unpack extended blue (truncate 10 to 8) \n" + " vsri.u8 d4, d4, #6 @ duplicate green bits (extend 6 to 8) \n" + " vmull.u8 q1, d2, d18 @ apply inverse mask to background red... \n" + " vmull.u8 q2, d4, d18 @ ...green... \n" + " vmull.u8 q3, d6, d18 @ ...blue \n" + " subs %[count], %[count], #1 @ decrement/test loop counter \n" + " vmlal.u8 q1, d17, d22 @ add masked foreground red... \n" + " vmlal.u8 q2, d17, d21 @ ...green... \n" + " vmlal.u8 q3, d17, d20 @ ...blue \n" + " add %[in_mask], %[in_mask], %[mask_stride] @ advance mask pointer, while we wait \n" + " vsri.16 q1, q2, #5 @ pack green behind red \n" + " vsri.16 q1, q3, #11 @ pack blue into pixels \n" + " vst1.16 {d2, d3}, [%[dest]] @ store composited pixels \n" + " add %[dest], %[dest], %[dest_stride] @ advance framebuffer pointer \n" + " bne 0b @ next please \n" + + /* Clobbered registers marked as input/outputs */ + : [dest] "+r" (dest), [in_mask] "+r" (in_mask), [count] "+r" (count) + + /* Inputs */ + : [dest_stride] "r" (dest_stride), [mask_stride] "r" (mask_stride), [glyph_colour] "r" (&glyph_colour) + + /* Clobbers, including the inputs we modify, and potentially lots of memory */ + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d17", "d18", "d19", + "d20", "d21", "d22", "d23", "d24", "d25", "cc", "memory" + ); -const FastPathInfo *const arm_neon_fast_paths = arm_neon_fast_path_array; +#else + + uint8x8x4_t solid_colour = vld4_dup_u8 ((uint8_t*) &glyph_colour); + + while (count--) + { + uint16x8_t pixels = vld1q_u16 (dest); + uint8x8_t mask = vshrn_n_u16 (vmull_u8 (solid_colour.val[3], vld1_u8 (in_mask)), 8); + uint8x8_t mask_image = vmvn_u8 (mask); + + uint8x8_t t_red = vshrn_n_u16 (pixels, 8); + uint8x8_t t_green = vshrn_n_u16 (pixels, 3); + uint8x8_t t_blue = vshrn_n_u16 (vsli_n_u8 (pixels, pixels, 5), 2); + + uint16x8_t s_red = vmull_u8 (vsri_n_u8 (t_red, t_red, 5), mask_image); + uint16x8_t s_green = vmull_u8 (vsri_n_u8 (t_green, t_green, 6), mask_image); + uint16x8_t s_blue = vmull_u8 (t_blue, mask_image); + + s_red = vmlal (s_red, mask, solid_colour.val[2]); + s_green = vmlal (s_green, mask, solid_colour.val[1]); + s_blue = vmlal (s_blue, mask, solid_colour.val[0]); + + pixels = vsri_n_u16 (s_red, s_green, 5); + pixels = vsri_n_u16 (pixels, s_blue, 11); + vst1q_u16 (dest, pixels); + + dest += dest_stride; + mask += mask_stride; + } +#endif +} + +#if 0 /* this is broken currently */ static void -arm_neon_composite (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t *src, - pixman_image_t *mask, - pixman_image_t *dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +neon_composite_over_n_8_0565 (pixman_implementation_t * impl, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint16_t *dst_line, *aligned_line; + uint8_t *mask_line; + uint32_t dst_stride, mask_stride; + uint32_t kernel_count, copy_count, copy_tail; + uint8_t kernel_offset, copy_offset; + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); + + /* bail out if fully transparent or degenerate */ + srca = src >> 24; + if (src == 0) + return; + + if (width == 0 || height == 0) + return; + + if (width > NEON_SCANLINE_BUFFER_PIXELS) + { + /* split the blit, so we can use a fixed-size scanline buffer + * TODO: there must be a more elegant way of doing this. + */ + int x; + for (x = 0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS) + { + neon_composite_over_n_8_0565 ( + impl, op, + src_image, mask_image, dst_image, + src_x + x, src_y, mask_x + x, mask_y, dest_x + x, dest_y, + (x + NEON_SCANLINE_BUFFER_PIXELS > width) ? width - x : NEON_SCANLINE_BUFFER_PIXELS, height); + } + + return; + } + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + /* keep within minimum number of aligned quadwords on width + * while also keeping the minimum number of columns to process + */ + { + unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF; + unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF; + unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF; + + /* the fast copy should be quadword aligned */ + copy_offset = dst_line - ((uint16_t*) aligned_left); + aligned_line = dst_line - copy_offset; + copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4); + copy_tail = 0; + + if (aligned_right - aligned_left > ceiling_length) + { + /* unaligned routine is tightest */ + kernel_count = (uint32_t) (ceiling_length >> 4); + kernel_offset = copy_offset; + } + else + { + /* aligned routine is equally tight, so it is safer to align */ + kernel_count = copy_count; + kernel_offset = 0; + } + + /* We should avoid reading beyond scanline ends for safety */ + if (aligned_line < (dst_line - dest_x) || + (aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width)) + { + /* switch to precise read */ + copy_offset = kernel_offset = 0; + aligned_line = dst_line; + kernel_count = (uint32_t) (ceiling_length >> 4); + copy_count = (width * sizeof(*dst_line)) >> 4; + copy_tail = (width * sizeof(*dst_line)) & 0xF; + } + } + + { + uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8]; /* deliberately not initialised */ + uint8_t glyph_line[NEON_SCANLINE_BUFFER_PIXELS + 8]; + int y = height; + + /* row-major order */ + /* left edge, middle block, right edge */ + for ( ; y--; mask_line += mask_stride, aligned_line += dst_stride, dst_line += dst_stride) + { + /* We don't want to overrun the edges of the glyph, + * so realign the edge data into known buffers + */ + neon_quadword_copy (glyph_line + copy_offset, mask_line, width >> 4, width & 0xF); + + /* Uncached framebuffer access is really, really slow + * if we do it piecemeal. It should be much faster if we + * grab it all at once. One scanline should easily fit in + * L1 cache, so this should not waste RAM bandwidth. + */ + neon_quadword_copy (scan_line, aligned_line, copy_count, copy_tail); + + /* Apply the actual filter */ + solid_over_565_8_pix_neon ( + src, scan_line + kernel_offset, + glyph_line + kernel_offset, 8 * sizeof(*dst_line), + 8, kernel_count); + + /* Copy the modified scanline back */ + neon_quadword_copy (dst_line, scan_line + copy_offset, + width >> 3, (width & 7) * 2); + } + } +} +#endif + +#ifdef USE_GCC_INLINE_ASM + +static inline void +plain_over_565_8_pix_neon (uint32_t colour, + uint16_t *dest, + uint32_t dest_stride, /* bytes, not elements */ + uint32_t count /* 8-pixel groups */) { - if (_pixman_run_fast_path (arm_neon_fast_paths, imp, - op, src, mask, dest, - src_x, src_y, - mask_x, mask_y, - dest_x, dest_y, - width, height)) + /* Inner loop for plain translucent rects + * (solid colour without alpha mask) + */ + asm volatile ( + " vld4.8 {d20[], d21[], d22[], d23[]}, [%[colour]] @ solid colour load/splat \n" + " vmull.u8 q12, d23, d22 @ premultiply alpha red \n" + " vmull.u8 q13, d23, d21 @ premultiply alpha green \n" + " vmull.u8 q14, d23, d20 @ premultiply alpha blue \n" + " vmvn d18, d23 @ inverse alpha for background \n" + "0: @ loop\n" + " vld1.16 {d0, d1}, [%[dest]] @ load first pixels from framebuffer \n" + " vshrn.u16 d2, q0, #8 @ unpack red from framebuffer pixels \n" + " vshrn.u16 d4, q0, #3 @ unpack green \n" + " vsli.u16 q3, q0, #5 @ duplicate framebuffer blue bits \n" + " vsri.u8 d2, d2, #5 @ duplicate red bits (extend 5 to 8) \n" + " vsri.u8 d4, d4, #6 @ duplicate green bits (extend 6 to 8) \n" + " vshrn.u16 d6, q3, #2 @ unpack extended blue (truncate 10 to 8) \n" + " vmov q0, q12 @ retrieve foreground red \n" + " vmlal.u8 q0, d2, d18 @ blend red - my kingdom for a four-operand MLA \n" + " vmov q1, q13 @ retrieve foreground green \n" + " vmlal.u8 q1, d4, d18 @ blend green \n" + " vmov q2, q14 @ retrieve foreground blue \n" + " vmlal.u8 q2, d6, d18 @ blend blue \n" + " subs %[count], %[count], #1 @ decrement/test loop counter \n" + " vsri.16 q0, q1, #5 @ pack green behind red \n" + " vsri.16 q0, q2, #11 @ pack blue into pixels \n" + " vst1.16 {d0, d1}, [%[dest]] @ store composited pixels \n" + " add %[dest], %[dest], %[dest_stride] @ advance framebuffer pointer \n" + " bne 0b @ next please \n" + + /* Clobbered registers marked as input/outputs */ + : [dest] "+r" (dest), [count] "+r" (count) + + /* Inputs */ + : [dest_stride] "r" (dest_stride), [colour] "r" (&colour) + + /* Clobbers, including the inputs we modify, and + * potentially lots of memory + */ + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d18", "d19", + "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", + "cc", "memory" + ); +} + +static void +neon_composite_over_n_0565 (pixman_implementation_t * impl, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint16_t *dst_line, *aligned_line; + uint32_t dst_stride; + uint32_t kernel_count, copy_count, copy_tail; + uint8_t kernel_offset, copy_offset; + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); + + /* bail out if fully transparent */ + srca = src >> 24; + if (src == 0) + return; + + if (width == 0 || height == 0) + return; + + if (width > NEON_SCANLINE_BUFFER_PIXELS) + { + /* split the blit, so we can use a fixed-size scanline buffer * + * TODO: there must be a more elegant way of doing this. + */ + int x; + + for (x = 0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS) + { + neon_composite_over_n_0565 ( + impl, op, + src_image, mask_image, dst_image, + src_x + x, src_y, mask_x + x, mask_y, dest_x + x, dest_y, + (x + NEON_SCANLINE_BUFFER_PIXELS > width) ? width - x : NEON_SCANLINE_BUFFER_PIXELS, height); + } + return; + } + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + /* keep within minimum number of aligned quadwords on width + * while also keeping the minimum number of columns to process + */ + { + unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF; + unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF; + unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF; + + /* the fast copy should be quadword aligned */ + copy_offset = dst_line - ((uint16_t*) aligned_left); + aligned_line = dst_line - copy_offset; + copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4); + copy_tail = 0; + + if (aligned_right - aligned_left > ceiling_length) { - return; + /* unaligned routine is tightest */ + kernel_count = (uint32_t) (ceiling_length >> 4); + kernel_offset = copy_offset; + } + else + { + /* aligned routine is equally tight, so it is safer to align */ + kernel_count = copy_count; + kernel_offset = 0; } - _pixman_implementation_composite (imp->delegate, op, - src, mask, dest, - src_x, src_y, - mask_x, mask_y, - dest_x, dest_y, - width, height); + /* We should avoid reading beyond scanline ends for safety */ + if (aligned_line < (dst_line - dest_x) || + (aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width)) + { + /* switch to precise read */ + copy_offset = kernel_offset = 0; + aligned_line = dst_line; + kernel_count = (uint32_t) (ceiling_length >> 4); + copy_count = (width * sizeof(*dst_line)) >> 4; + copy_tail = (width * sizeof(*dst_line)) & 0xF; + } + } + + { + uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8]; /* deliberately not initialised */ + + /* row-major order */ + /* left edge, middle block, right edge */ + for ( ; height--; aligned_line += dst_stride, dst_line += dst_stride) + { + /* Uncached framebuffer access is really, really slow if we do it piecemeal. + * It should be much faster if we grab it all at once. + * One scanline should easily fit in L1 cache, so this should + * not waste RAM bandwidth. + */ + neon_quadword_copy (scan_line, aligned_line, copy_count, copy_tail); + + /* Apply the actual filter */ + plain_over_565_8_pix_neon ( + src, scan_line + kernel_offset, 8 * sizeof(*dst_line), kernel_count); + + /* Copy the modified scanline back */ + neon_quadword_copy ( + dst_line, scan_line + copy_offset, width >> 3, (width & 7) * 2); + } + } } -pixman_bool_t -pixman_blt_neon ( - void *src_bits, - void *dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, int src_y, - int dst_x, int dst_y, - int width, int height) +static inline void +ARGB8_over_565_8_pix_neon (uint32_t *src, + uint16_t *dest, + uint32_t src_stride, /* bytes, not elements */ + uint32_t count /* 8-pixel groups */) { + asm volatile ( + "0: @ loop\n" + " pld [%[src], %[src_stride]] @ preload from next scanline \n" + " vld1.16 {d0, d1}, [%[dest]] @ load pixels from framebuffer \n" + " vld4.8 {d20, d21, d22, d23},[%[src]]! @ load source image pixels \n" + " vsli.u16 q3, q0, #5 @ duplicate framebuffer blue bits \n" + " vshrn.u16 d2, q0, #8 @ unpack red from framebuffer pixels \n" + " vshrn.u16 d4, q0, #3 @ unpack green \n" + " vmvn d18, d23 @ we need the inverse alpha for the background \n" + " vsri.u8 d2, d2, #5 @ duplicate red bits (extend 5 to 8) \n" + " vshrn.u16 d6, q3, #2 @ unpack extended blue (truncate 10 to 8) \n" + " vsri.u8 d4, d4, #6 @ duplicate green bits (extend 6 to 8) \n" + " vmull.u8 q1, d2, d18 @ apply inverse alpha to background red... \n" + " vmull.u8 q2, d4, d18 @ ...green... \n" + " vmull.u8 q3, d6, d18 @ ...blue \n" + " subs %[count], %[count], #1 @ decrement/test loop counter \n" + " vmlal.u8 q1, d23, d22 @ add blended foreground red... \n" + " vmlal.u8 q2, d23, d21 @ ...green... \n" + " vmlal.u8 q3, d23, d20 @ ...blue \n" + " vsri.16 q1, q2, #5 @ pack green behind red \n" + " vsri.16 q1, q3, #11 @ pack blue into pixels \n" + " vst1.16 {d2, d3}, [%[dest]]! @ store composited pixels \n" + " bne 0b @ next please \n" + + /* Clobbered registers marked as input/outputs */ + : [dest] "+r" (dest), [src] "+r" (src), [count] "+r" (count) + + /* Inputs */ + : [src_stride] "r" (src_stride) + + /* Clobbers, including the inputs we modify, and potentially lots of memory */ + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d17", "d18", "d20", + "d21", "d22", "d23", "cc", "memory" + ); +} -#if 0 // Relies on code which isn't upstreamed yet +static void +neon_composite_over_8888_0565 (pixman_implementation_t * impl, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *src_line; + uint16_t *dst_line, *aligned_line; + uint32_t dst_stride, src_stride; + uint32_t kernel_count, copy_count, copy_tail; + uint8_t kernel_offset, copy_offset; + + /* we assume mask is opaque + * so the only alpha to deal with is embedded in src + */ + if (width > NEON_SCANLINE_BUFFER_PIXELS) + { + /* split the blit, so we can use a fixed-size scanline buffer */ + int x; + for (x = 0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS) + { + neon_composite_over_8888_0565 ( + impl, op, + src_image, mask_image, dst_image, + src_x + x, src_y, mask_x + x, mask_y, dest_x + x, dest_y, + (x + NEON_SCANLINE_BUFFER_PIXELS > width) ? width - x : NEON_SCANLINE_BUFFER_PIXELS, height); + } + return; + } - // accelerate only straight copies - if(src_bpp != dst_bpp || (src_bpp & 7) || !width || !height) - return FALSE; + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + /* keep within minimum number of aligned quadwords on width + * while also keeping the minimum number of columns to process + */ + { + unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF; + unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF; + unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF; + + /* the fast copy should be quadword aligned */ + copy_offset = dst_line - ((uint16_t*) aligned_left); + aligned_line = dst_line - copy_offset; + copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4); + copy_tail = 0; + + if (aligned_right - aligned_left > ceiling_length) { - uint32_t bytes_per_pixel = src_bpp >> 3; - uint32_t byte_width = width * bytes_per_pixel; - int32_t src_stride_bytes = src_stride * 4; // parameter is in words for some reason - int32_t dst_stride_bytes = dst_stride * 4; - uint8_t *src_bytes = ((uint8_t*) src_bits) + src_y * src_stride_bytes + src_x * bytes_per_pixel; - uint8_t *dst_bytes = ((uint8_t*) dst_bits) + dst_y * dst_stride_bytes + dst_x * bytes_per_pixel; - uint32_t quadword_count = byte_width / 16; - uint32_t offset = byte_width % 16; - - while(height--) { - QuadwordCopy_neon(dst_bytes, src_bytes, quadword_count, offset); - src_bytes += src_stride_bytes; - dst_bytes += dst_stride_bytes; - } + /* unaligned routine is tightest */ + kernel_count = (uint32_t) (ceiling_length >> 4); + kernel_offset = copy_offset; + } + else + { + /* aligned routine is equally tight, so it is safer to align */ + kernel_count = copy_count; + kernel_offset = 0; } - return TRUE; + /* We should avoid reading beyond scanline ends for safety */ + if (aligned_line < (dst_line - dest_x) || + (aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width)) + { + /* switch to precise read */ + copy_offset = kernel_offset = 0; + aligned_line = dst_line; + kernel_count = (uint32_t) (ceiling_length >> 4); + copy_count = (width * sizeof(*dst_line)) >> 4; + copy_tail = (width * sizeof(*dst_line)) & 0xF; + } + } -#else /* USE_GCC_INLINE_ASM */ + /* Preload the first input scanline */ + { + uint8_t *src_ptr = (uint8_t*) src_line; + uint32_t count = (width + 15) / 16; + +#ifdef USE_GCC_INLINE_ASM + asm volatile ( + "0: @ loop \n" + " subs %[count], %[count], #1 \n" + " pld [%[src]] \n" + " add %[src], %[src], #64 \n" + " bgt 0b \n" + + /* Clobbered input registers marked as input/outputs */ + : [src] "+r" (src_ptr), [count] "+r" (count) + : /* no unclobbered inputs */ + : "cc" + ); +#else + do + { + __pld (src_ptr); + src_ptr += 64; + } + while (--count); +#endif + } - // TODO: intrinsic version for armcc - return FALSE; + { + uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8]; /* deliberately not initialised */ + /* row-major order */ + /* left edge, middle block, right edge */ + for ( ; height--; src_line += src_stride, aligned_line += dst_stride) + { + /* Uncached framebuffer access is really, really slow if we do + * it piecemeal. It should be much faster if we grab it all at + * once. One scanline should easily fit in L1 cache, so this + * should not waste RAM bandwidth. + */ + neon_quadword_copy (scan_line, aligned_line, copy_count, copy_tail); + + /* Apply the actual filter */ + ARGB8_over_565_8_pix_neon ( + src_line, scan_line + kernel_offset, + src_stride * sizeof(*src_line), kernel_count); + + /* Copy the modified scanline back */ + neon_quadword_copy (dst_line, + scan_line + copy_offset, + width >> 3, (width & 7) * 2); + } + } +} + +#endif /* USE_GCC_INLINE_ASM */ + +static const pixman_fast_path_t arm_neon_fast_path_array[] = +{ + { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, neon_composite_add_8888_8_8, 0 }, + { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, neon_composite_add_8000_8000, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, neon_composite_over_n_8_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, neon_composite_over_n_8_0565, 0 }, + { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_src_24_16, 0 }, + { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_src_24_16, 0 }, + { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_src_24_16, 0 }, + { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_src_24_16, 0 }, +#ifdef USE_GCC_INLINE_ASM + { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_src_16_16, 0 }, + { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_src_16_16, 0 }, +#if 0 /* this code has some bugs */ + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_over_n_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_over_n_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_over_8888_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_over_8888_0565, 0 }, +#endif #endif + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, neon_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, neon_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, neon_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, neon_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, neon_composite_over_8888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, neon_composite_over_8888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, neon_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, neon_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, neon_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, neon_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_NONE }, +}; + +const pixman_fast_path_t *const arm_neon_fast_paths = arm_neon_fast_path_array; + +static void +arm_neon_composite (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + if (_pixman_run_fast_path (arm_neon_fast_paths, imp, + op, src, mask, dest, + src_x, src_y, + mask_x, mask_y, + dest_x, dest_y, + width, height)) + { + return; + } + + _pixman_implementation_composite (imp->delegate, op, + src, mask, dest, + src_x, src_y, + mask_x, mask_y, + dest_x, dest_y, + width, height); +} + +static pixman_bool_t +pixman_blt_neon (void *src_bits, + void *dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) +{ + if (!width || !height) + return TRUE; + + /* accelerate only straight copies involving complete bytes */ + if (src_bpp != dst_bpp || (src_bpp & 7)) + return FALSE; + + { + uint32_t bytes_per_pixel = src_bpp >> 3; + uint32_t byte_width = width * bytes_per_pixel; + /* parameter is in words for some reason */ + int32_t src_stride_bytes = src_stride * 4; + int32_t dst_stride_bytes = dst_stride * 4; + uint8_t *src_bytes = ((uint8_t*) src_bits) + + src_y * src_stride_bytes + src_x * bytes_per_pixel; + uint8_t *dst_bytes = ((uint8_t*) dst_bits) + + dst_y * dst_stride_bytes + dst_x * bytes_per_pixel; + uint32_t quadword_count = byte_width / 16; + uint32_t offset = byte_width % 16; + + while (height--) + { + neon_quadword_copy (dst_bytes, src_bytes, quadword_count, offset); + src_bytes += src_stride_bytes; + dst_bytes += dst_stride_bytes; + } + } + + return TRUE; } static pixman_bool_t arm_neon_blt (pixman_implementation_t *imp, - uint32_t *src_bits, - uint32_t *dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, int src_y, - int dst_x, int dst_y, - int width, int height) + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) { - if (pixman_blt_neon ( - src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, - src_x, src_y, dst_x, dst_y, width, height)) - return TRUE; - - return _pixman_implementation_blt ( - imp->delegate, - src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, - src_x, src_y, dst_x, dst_y, width, height); + if (pixman_blt_neon ( + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dst_x, dst_y, width, height)) + { + return TRUE; + } + + return _pixman_implementation_blt ( + imp->delegate, + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dst_x, dst_y, width, height); } static pixman_bool_t arm_neon_fill (pixman_implementation_t *imp, - uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) { - if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor)) - return TRUE; + if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor)) + return TRUE; - return _pixman_implementation_fill ( - imp->delegate, bits, stride, bpp, x, y, width, height, xor); + return _pixman_implementation_fill ( + imp->delegate, bits, stride, bpp, x, y, width, height, xor); } pixman_implementation_t * -_pixman_implementation_create_arm_neon (pixman_implementation_t *toplevel) +_pixman_implementation_create_arm_neon (void) { - pixman_implementation_t *simd = _pixman_implementation_create_arm_simd (NULL); - pixman_implementation_t *imp = _pixman_implementation_create (toplevel, simd); + pixman_implementation_t *general = _pixman_implementation_create_fast_path (); + pixman_implementation_t *imp = _pixman_implementation_create (general); - imp->composite = arm_neon_composite; -// imp->blt = arm_neon_blt; -// imp->fill = arm_neon_fill; -- Relies on code which is not upstreamed yet + imp->composite = arm_neon_composite; +#if 0 /* this code has some bugs */ + imp->blt = arm_neon_blt; +#endif + imp->fill = arm_neon_fill; - return imp; + return imp; } + diff --git a/lib/pixman/pixman/pixman-arm-neon.h b/lib/pixman/pixman/pixman-arm-neon.h deleted file mode 100644 index aed7a4dfd..000000000 --- a/lib/pixman/pixman/pixman-arm-neon.h +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright © 2009 ARM Ltd - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of ARM Ltd not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. ARM Ltd makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Ian Rickards (ian.rickards@arm.com) - * - */ - -#include "pixman-private.h" - -#ifdef USE_ARM_NEON - -pixman_bool_t pixman_have_arm_neon(void); - -#else -#define pixman_have_arm_neon() FALSE -#endif - -#ifdef USE_ARM_NEON - -extern const FastPathInfo *const arm_neon_fast_paths; - -void -fbCompositeSrcAdd_8000x8000neon ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height); - -void -fbCompositeSrc_8888x8888neon ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height); - -void -fbCompositeSrc_8888x8x8888neon ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height); - -void -fbCompositeSolidMask_nx8x0565neon ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height); - -void -fbCompositeSolidMask_nx8x8888neon ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height); - -void -fbCompositeSrcAdd_8888x8x8neon ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height); - -void -fbCompositeSrc_16x16neon ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height); - -void -fbCompositeSrc_24x16neon ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height); - -pixman_bool_t -pixman_fill_neon (uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t _xor); - -#endif /* USE_ARM_NEON */ diff --git a/lib/pixman/pixman/pixman-arm-simd.c b/lib/pixman/pixman/pixman-arm-simd.c index 42503fc85..fb7bf3da8 100644 --- a/lib/pixman/pixman/pixman-arm-simd.c +++ b/lib/pixman/pixman/pixman-arm-simd.c @@ -27,48 +27,48 @@ #include <config.h> #endif -#include "pixman-arm-simd.h" - -void -fbCompositeSrcAdd_8000x8000arm ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +#include "pixman-private.h" + +static void +arm_composite_add_8000_8000 (pixman_implementation_t * impl, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint8_t *dstLine, *dst; - uint8_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; - uint8_t s, d; + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + uint16_t w; + uint8_t s, d; - fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1); - fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; - /* ensure both src and dst are properly aligned before doing 32 bit reads - * we'll stay in this loop if src and dst have differing alignments */ + /* ensure both src and dst are properly aligned before doing 32 bit reads + * we'll stay in this loop if src and dst have differing alignments + */ while (w && (((unsigned long)dst & 3) || ((unsigned long)src & 3))) { s = *src; d = *dst; - asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s)); + asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s)); *dst = d; dst++; @@ -78,7 +78,9 @@ fbCompositeSrcAdd_8000x8000arm ( while (w >= 4) { - asm("uqadd8 %0, %1, %2" : "=r"(*(uint32_t*)dst) : "r"(*(uint32_t*)src), "r"(*(uint32_t*)dst)); + asm ("uqadd8 %0, %1, %2" + : "=r" (*(uint32_t*)dst) + : "r" (*(uint32_t*)src), "r" (*(uint32_t*)dst)); dst += 4; src += 4; w -= 4; @@ -88,7 +90,7 @@ fbCompositeSrcAdd_8000x8000arm ( { s = *src; d = *dst; - asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s)); + asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s)); *dst = d; dst++; @@ -99,232 +101,233 @@ fbCompositeSrcAdd_8000x8000arm ( } -void -fbCompositeSrc_8888x8888arm ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +static void +arm_composite_over_8888_8888 (pixman_implementation_t * impl, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + uint16_t w; uint32_t component_half = 0x800080; uint32_t upper_component_mask = 0xff00ff00; uint32_t alpha_mask = 0xff; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; -//#define inner_branch +/* #define inner_branch */ asm volatile ( - "cmp %[w], #0\n\t" - "beq 2f\n\t" - "1:\n\t" - /* load src */ - "ldr r5, [%[src]], #4\n\t" + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load src */ + "ldr r5, [%[src]], #4\n\t" #ifdef inner_branch - /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. - * The 0x0 case also allows us to avoid doing an unecessary data - * write which is more valuable so we only check for that */ - "cmp r5, #0\n\t" - "beq 3f\n\t" + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that + */ + "cmp r5, #0\n\t" + "beq 3f\n\t" - /* = 255 - alpha */ - "sub r8, %[alpha_mask], r5, lsr #24\n\t" + /* = 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" - "ldr r4, [%[dest]] \n\t" + "ldr r4, [%[dest]] \n\t" #else - "ldr r4, [%[dest]] \n\t" + "ldr r4, [%[dest]] \n\t" - /* = 255 - alpha */ - "sub r8, %[alpha_mask], r5, lsr #24\n\t" + /* = 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" #endif - "uxtb16 r6, r4\n\t" - "uxtb16 r7, r4, ror #8\n\t" + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" - /* multiply by 257 and divide by 65536 */ - "mla r6, r6, r8, %[component_half]\n\t" - "mla r7, r7, r8, %[component_half]\n\t" + /* multiply by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" - /* recombine the 0xff00ff00 bytes of r6 and r7 */ - "and r7, r7, %[upper_component_mask]\n\t" - "uxtab16 r6, r7, r6, ror #8\n\t" + /* recombine the 0xff00ff00 bytes of r6 and r7 */ + "and r7, r7, %[upper_component_mask]\n\t" + "uxtab16 r6, r7, r6, ror #8\n\t" - "uqadd8 r5, r6, r5\n\t" + "uqadd8 r5, r6, r5\n\t" #ifdef inner_branch - "3:\n\t" + "3:\n\t" #endif - "str r5, [%[dest]], #4\n\t" - /* increment counter and jmp to top */ - "subs %[w], %[w], #1\n\t" - "bne 1b\n\t" - "2:\n\t" - : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) - : [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask), - [alpha_mask] "r" (alpha_mask) - : "r4", "r5", "r6", "r7", "r8", "cc", "memory" - ); + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) + : [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask), + [alpha_mask] "r" (alpha_mask) + : "r4", "r5", "r6", "r7", "r8", "cc", "memory" + ); } } -void -fbCompositeSrc_8888x8x8888arm ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +static void +arm_composite_over_8888_n_8888 ( + pixman_implementation_t * impl, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - uint32_t mask; - int dstStride, srcStride; - uint16_t w; + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + uint32_t mask; + int dst_stride, src_stride; + uint16_t w; uint32_t component_half = 0x800080; uint32_t alpha_mask = 0xff; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - fbComposeGetSolid (pMask, mask, pDst->bits.format); + mask = _pixman_image_get_solid (mask_image, dst_image->bits.format); mask = (mask) >> 24; while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; -//#define inner_branch +/* #define inner_branch */ asm volatile ( - "cmp %[w], #0\n\t" - "beq 2f\n\t" - "1:\n\t" - /* load src */ - "ldr r5, [%[src]], #4\n\t" + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load src */ + "ldr r5, [%[src]], #4\n\t" #ifdef inner_branch - /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. - * The 0x0 case also allows us to avoid doing an unecessary data - * write which is more valuable so we only check for that */ - "cmp r5, #0\n\t" - "beq 3f\n\t" + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that + */ + "cmp r5, #0\n\t" + "beq 3f\n\t" #endif - "ldr r4, [%[dest]] \n\t" + "ldr r4, [%[dest]] \n\t" - "uxtb16 r6, r5\n\t" - "uxtb16 r7, r5, ror #8\n\t" + "uxtb16 r6, r5\n\t" + "uxtb16 r7, r5, ror #8\n\t" - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, r6, %[mask_alpha], %[component_half]\n\t" - "mla r7, r7, %[mask_alpha], %[component_half]\n\t" + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, %[mask_alpha], %[component_half]\n\t" + "mla r7, r7, %[mask_alpha], %[component_half]\n\t" - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" - /* recombine */ - "orr r5, r6, r7, lsl #8\n\t" + /* recombine */ + "orr r5, r6, r7, lsl #8\n\t" - "uxtb16 r6, r4\n\t" - "uxtb16 r7, r4, ror #8\n\t" + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" - /* 255 - alpha */ - "sub r8, %[alpha_mask], r5, lsr #24\n\t" + /* 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, r6, r8, %[component_half]\n\t" - "mla r7, r7, r8, %[component_half]\n\t" + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" - /* recombine */ - "orr r6, r6, r7, lsl #8\n\t" + /* recombine */ + "orr r6, r6, r7, lsl #8\n\t" - "uqadd8 r5, r6, r5\n\t" + "uqadd8 r5, r6, r5\n\t" #ifdef inner_branch - "3:\n\t" + "3:\n\t" #endif - "str r5, [%[dest]], #4\n\t" - /* increment counter and jmp to top */ - "subs %[w], %[w], #1\n\t" - "bne 1b\n\t" - "2:\n\t" - : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) - : [component_half] "r" (component_half), [mask_alpha] "r" (mask), - [alpha_mask] "r" (alpha_mask) - : "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory" - ); + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) + : [component_half] "r" (component_half), [mask_alpha] "r" (mask), + [alpha_mask] "r" (alpha_mask) + : "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory" + ); } } -void -fbCompositeSolidMask_nx8x8888arm ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +static void +arm_composite_over_n_8_8888 (pixman_implementation_t * impl, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t src, srca; - uint32_t *dstLine, *dst; - uint8_t *maskLine, *mask; - int dstStride, maskStride; - uint16_t w; + uint32_t src, srca; + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint16_t w; - fbComposeGetSolid(pSrc, src, pDst->bits.format); + src = _pixman_image_get_solid (src_image, dst_image->bits.format); + /* bail out if fully transparent */ srca = src >> 24; if (src == 0) return; @@ -335,148 +338,149 @@ fbCompositeSolidMask_nx8x8888arm ( uint32_t src_hi = (src >> 8) & component_mask; uint32_t src_lo = src & component_mask; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; -//#define inner_branch +/* #define inner_branch */ asm volatile ( - "cmp %[w], #0\n\t" - "beq 2f\n\t" - "1:\n\t" - /* load mask */ - "ldrb r5, [%[mask]], #1\n\t" + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load mask */ + "ldrb r5, [%[mask]], #1\n\t" #ifdef inner_branch - /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. - * The 0x0 case also allows us to avoid doing an unecessary data - * write which is more valuable so we only check for that */ - "cmp r5, #0\n\t" - "beq 3f\n\t" + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that + */ + "cmp r5, #0\n\t" + "beq 3f\n\t" #endif - "ldr r4, [%[dest]] \n\t" + "ldr r4, [%[dest]] \n\t" - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, %[src_lo], r5, %[component_half]\n\t" - "mla r7, %[src_hi], r5, %[component_half]\n\t" + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, %[src_lo], r5, %[component_half]\n\t" + "mla r7, %[src_hi], r5, %[component_half]\n\t" - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" - /* recombine */ - "orr r5, r6, r7, lsl #8\n\t" + /* recombine */ + "orr r5, r6, r7, lsl #8\n\t" - "uxtb16 r6, r4\n\t" - "uxtb16 r7, r4, ror #8\n\t" + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" - /* we could simplify this to use 'sub' if we were - * willing to give up a register for alpha_mask */ - "mvn r8, r5\n\t" - "mov r8, r8, lsr #24\n\t" + /* we could simplify this to use 'sub' if we were + * willing to give up a register for alpha_mask */ + "mvn r8, r5\n\t" + "mov r8, r8, lsr #24\n\t" - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, r6, r8, %[component_half]\n\t" - "mla r7, r7, r8, %[component_half]\n\t" + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" - /* recombine */ - "orr r6, r6, r7, lsl #8\n\t" + /* recombine */ + "orr r6, r6, r7, lsl #8\n\t" - "uqadd8 r5, r6, r5\n\t" + "uqadd8 r5, r6, r5\n\t" #ifdef inner_branch - "3:\n\t" + "3:\n\t" #endif - "str r5, [%[dest]], #4\n\t" - /* increment counter and jmp to top */ - "subs %[w], %[w], #1\n\t" - "bne 1b\n\t" - "2:\n\t" - : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask) - : [component_half] "r" (component_half), - [src_hi] "r" (src_hi), [src_lo] "r" (src_lo) - : "r4", "r5", "r6", "r7", "r8", "cc", "memory" - ); + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask) + : [component_half] "r" (component_half), + [src_hi] "r" (src_hi), [src_lo] "r" (src_lo) + : "r4", "r5", "r6", "r7", "r8", "cc", "memory"); } } -static const FastPathInfo arm_simd_fast_path_array[] = +static const pixman_fast_path_t arm_simd_fast_path_array[] = { - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888arm, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888arm, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888arm, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888arm, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888arm, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888arm, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, arm_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, arm_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, arm_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, arm_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, arm_composite_over_8888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, arm_composite_over_8888_n_8888, NEED_SOLID_MASK }, - { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcAdd_8000x8000arm, 0 }, + { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, arm_composite_add_8000_8000, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888arm, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888arm, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888arm, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888arm, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, arm_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, arm_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, arm_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, arm_composite_over_n_8_8888, 0 }, { PIXMAN_OP_NONE }, }; -const FastPathInfo *const arm_simd_fast_paths = arm_simd_fast_path_array; +const pixman_fast_path_t *const arm_simd_fast_paths = arm_simd_fast_path_array; static void arm_simd_composite (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t *src, - pixman_image_t *mask, - pixman_image_t *dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { if (_pixman_run_fast_path (arm_simd_fast_paths, imp, - op, src, mask, dest, - src_x, src_y, - mask_x, mask_y, - dest_x, dest_y, - width, height)) + op, src, mask, dest, + src_x, src_y, + mask_x, mask_y, + dest_x, dest_y, + width, height)) { return; } _pixman_implementation_composite (imp->delegate, op, - src, mask, dest, - src_x, src_y, - mask_x, mask_y, - dest_x, dest_y, - width, height); + src, mask, dest, + src_x, src_y, + mask_x, mask_y, + dest_x, dest_y, + width, height); } pixman_implementation_t * -_pixman_implementation_create_arm_simd (pixman_implementation_t *toplevel) +_pixman_implementation_create_arm_simd (void) { - pixman_implementation_t *general = _pixman_implementation_create_fast_path (NULL); - pixman_implementation_t *imp = _pixman_implementation_create (toplevel, general); + pixman_implementation_t *general = _pixman_implementation_create_fast_path (); + pixman_implementation_t *imp = _pixman_implementation_create (general); imp->composite = arm_simd_composite; return imp; } + diff --git a/lib/pixman/pixman/pixman-arm-simd.h b/lib/pixman/pixman/pixman-arm-simd.h deleted file mode 100644 index 8c1f88342..000000000 --- a/lib/pixman/pixman/pixman-arm-simd.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright © 2008 Mozilla Corporation - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Mozilla Corporation not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Mozilla Corporation makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Jeff Muizelaar (jeff@infidigm.net) - * - */ - -#include "pixman-private.h" - -#ifdef USE_ARM_SIMD - -pixman_bool_t pixman_have_arm_simd(void); - -#else -#define pixman_have_arm_simd() FALSE -#endif - -#ifdef USE_ARM_SIMD - -extern const FastPathInfo *const arm_simd_fast_paths; - -void -fbCompositeSrcAdd_8000x8000arm ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height); - -void -fbCompositeSrc_8888x8888arm ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height); - -void -fbCompositeSrc_8888x8x8888arm ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height); - -void -fbCompositeSolidMask_nx8x8888arm ( - pixman_implementation_t * impl, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height); - - -#endif /* USE_ARM */ diff --git a/lib/pixman/pixman/pixman-bits-image.c b/lib/pixman/pixman/pixman-bits-image.c index 888e487e9..7a1910935 100644 --- a/lib/pixman/pixman/pixman-bits-image.c +++ b/lib/pixman/pixman/pixman-bits-image.c @@ -1,346 +1,773 @@ /* + * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. + * 2005 Lars Knoll & Zack Rusin, Trolltech + * 2008 Aaron Plattner, NVIDIA Corporation * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. + * Copyright © 2007, 2009 Red Hat, Inc. * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. + * documentation, and that the name of Keith Packard not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Keith Packard makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. */ +#ifdef HAVE_CONFIG_H #include <config.h> +#endif #include <stdlib.h> +#include <string.h> #include "pixman-private.h" +#include "pixman-combine32.h" - -#define READ_ACCESS(f) ((image->common.read_func)? f##_accessors : f) -#define WRITE_ACCESS(f) ((image->common.write_func)? f##_accessors : f) +/* Store functions */ static void -fbFetchSolid(bits_image_t * image, - int x, int y, int width, - uint32_t *buffer, - uint32_t *mask, uint32_t maskBits) +bits_image_store_scanline_32 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *buffer) { - uint32_t color; - uint32_t *end; - fetchPixelProc32 fetch = - READ_ACCESS(pixman_fetchPixelProcForPicture32)(image); - - color = fetch(image, 0, 0); - - end = buffer + width; - while (buffer < end) - *(buffer++) = color; + image->store_scanline_raw_32 (image, x, y, width, buffer); + + if (image->common.alpha_map) + { + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + bits_image_store_scanline_32 (image->common.alpha_map, x, y, width, buffer); + } } static void -fbFetchSolid64(bits_image_t * image, - int x, int y, int width, - uint64_t *buffer, void *unused, uint32_t unused2) +bits_image_store_scanline_64 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *buffer) { - uint64_t color; - uint64_t *end; - fetchPixelProc64 fetch = - READ_ACCESS(pixman_fetchPixelProcForPicture64)(image); - - color = fetch(image, 0, 0); - - end = buffer + width; - while (buffer < end) - *(buffer++) = color; + image->store_scanline_raw_64 (image, x, y, width, buffer); + + if (image->common.alpha_map) + { + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + bits_image_store_scanline_64 (image->common.alpha_map, x, y, width, buffer); + } } -static void -fbFetch(bits_image_t * image, - int x, int y, int width, - uint32_t *buffer, uint32_t *mask, uint32_t maskBits) +void +_pixman_image_store_scanline_32 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *buffer) +{ + image->store_scanline_32 (image, x, y, width, buffer); +} + +void +_pixman_image_store_scanline_64 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *buffer) { - fetchProc32 fetch = READ_ACCESS(pixman_fetchProcForPicture32)(image); - - fetch(image, x, y, width, buffer); + image->store_scanline_64 (image, x, y, width, buffer); +} + +/* Fetch functions */ + +static uint32_t +bits_image_fetch_pixel_alpha (bits_image_t *image, int x, int y) +{ + uint32_t pixel; + uint32_t pixel_a; + + pixel = image->fetch_pixel_raw_32 (image, x, y); + + assert (image->common.alpha_map); + + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; + + if (x < 0 || x >= image->common.alpha_map->width || + y < 0 || y >= image->common.alpha_map->height) + { + pixel_a = 0; + } + else + { + pixel_a = image->common.alpha_map->fetch_pixel_raw_32 ( + image->common.alpha_map, x, y); + pixel_a = ALPHA_8 (pixel_a); + } + + pixel &= 0x00ffffff; + pixel |= (pixel_a << 24); + + return pixel; +} + +static force_inline uint32_t +get_pixel (bits_image_t *image, int x, int y, pixman_bool_t check_bounds) +{ + if (check_bounds && + (x < 0 || x >= image->width || y < 0 || y >= image->height)) + { + return 0; + } + + return image->fetch_pixel_32 (image, x, y); +} + +static force_inline void +repeat (pixman_repeat_t repeat, int size, int *coord) +{ + switch (repeat) + { + case PIXMAN_REPEAT_NORMAL: + *coord = MOD (*coord, size); + break; + + case PIXMAN_REPEAT_PAD: + *coord = CLIP (*coord, 0, size - 1); + break; + + case PIXMAN_REPEAT_REFLECT: + *coord = MOD (*coord, size * 2); + + if (*coord >= size) + *coord = size * 2 - *coord - 1; + break; + + case PIXMAN_REPEAT_NONE: + break; + } +} + +static force_inline uint32_t +bits_image_fetch_pixel_nearest (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y) +{ + int x0 = pixman_fixed_to_int (x - pixman_fixed_e); + int y0 = pixman_fixed_to_int (y - pixman_fixed_e); + + if (image->common.repeat != PIXMAN_REPEAT_NONE) + { + repeat (image->common.repeat, image->width, &x0); + repeat (image->common.repeat, image->height, &y0); + + return get_pixel (image, x0, y0, FALSE); + } + else + { + return get_pixel (image, x0, y0, TRUE); + } +} + +static force_inline uint32_t +bits_image_fetch_pixel_bilinear (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y) +{ + pixman_repeat_t repeat_mode = image->common.repeat; + int width = image->width; + int height = image->height; + int x1, y1, x2, y2; + uint32_t tl, tr, bl, br, r; + int32_t distx, disty, idistx, idisty; + uint32_t ft, fb; + + x1 = x - pixman_fixed_1 / 2; + y1 = y - pixman_fixed_1 / 2; + + distx = (x1 >> 8) & 0xff; + disty = (y1 >> 8) & 0xff; + + x1 = pixman_fixed_to_int (x1); + y1 = pixman_fixed_to_int (y1); + x2 = x1 + 1; + y2 = y1 + 1; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, width, &x1); + repeat (repeat_mode, height, &y1); + repeat (repeat_mode, width, &x2); + repeat (repeat_mode, height, &y2); + + tl = get_pixel (image, x1, y1, FALSE); + bl = get_pixel (image, x1, y2, FALSE); + tr = get_pixel (image, x2, y1, FALSE); + br = get_pixel (image, x2, y2, FALSE); + } + else + { + tl = get_pixel (image, x1, y1, TRUE); + tr = get_pixel (image, x2, y1, TRUE); + bl = get_pixel (image, x1, y2, TRUE); + br = get_pixel (image, x2, y2, TRUE); + } + + idistx = 256 - distx; + idisty = 256 - disty; + +#define GET8(v, i) ((uint16_t) (uint8_t) ((v) >> i)) + ft = GET8 (tl, 0) * idistx + GET8 (tr, 0) * distx; + fb = GET8 (bl, 0) * idistx + GET8 (br, 0) * distx; + r = (((ft * idisty + fb * disty) >> 16) & 0xff); + ft = GET8 (tl, 8) * idistx + GET8 (tr, 8) * distx; + fb = GET8 (bl, 8) * idistx + GET8 (br, 8) * distx; + r |= (((ft * idisty + fb * disty) >> 8) & 0xff00); + ft = GET8 (tl, 16) * idistx + GET8 (tr, 16) * distx; + fb = GET8 (bl, 16) * idistx + GET8 (br, 16) * distx; + r |= (((ft * idisty + fb * disty)) & 0xff0000); + ft = GET8 (tl, 24) * idistx + GET8 (tr, 24) * distx; + fb = GET8 (bl, 24) * idistx + GET8 (br, 24) * distx; + r |= (((ft * idisty + fb * disty) << 8) & 0xff000000); + + return r; +} + +static force_inline uint32_t +bits_image_fetch_pixel_convolution (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y) +{ + pixman_fixed_t *params = image->common.filter_params; + int x_off = (params[0] - pixman_fixed_1) >> 1; + int y_off = (params[1] - pixman_fixed_1) >> 1; + int32_t cwidth = pixman_fixed_to_int (params[0]); + int32_t cheight = pixman_fixed_to_int (params[1]); + int32_t srtot, sgtot, sbtot, satot; + int32_t i, j, x1, x2, y1, y2; + pixman_repeat_t repeat_mode = image->common.repeat; + int width = image->width; + int height = image->height; + + params += 2; + + x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); + y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); + x2 = x1 + cwidth; + y2 = y1 + cheight; + + srtot = sgtot = sbtot = satot = 0; + + for (i = y1; i < y2; ++i) + { + for (j = x1; j < x2; ++j) + { + int rx = j; + int ry = i; + + pixman_fixed_t f = *params; + + if (f) + { + uint32_t pixel; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, width, &rx); + repeat (repeat_mode, height, &ry); + + pixel = get_pixel (image, rx, ry, FALSE); + } + else + { + pixel = get_pixel (image, rx, ry, TRUE); + } + + srtot += RED_8 (pixel) * f; + sgtot += GREEN_8 (pixel) * f; + sbtot += BLUE_8 (pixel) * f; + satot += ALPHA_8 (pixel) * f; + } + + params++; + } + } + + satot >>= 16; + srtot >>= 16; + sgtot >>= 16; + sbtot >>= 16; + + satot = CLIP (satot, 0, 0xff); + srtot = CLIP (srtot, 0, 0xff); + sgtot = CLIP (sgtot, 0, 0xff); + sbtot = CLIP (sbtot, 0, 0xff); + + return ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot)); +} + +static force_inline uint32_t +bits_image_fetch_pixel_filtered (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y) +{ + switch (image->common.filter) + { + case PIXMAN_FILTER_NEAREST: + case PIXMAN_FILTER_FAST: + return bits_image_fetch_pixel_nearest (image, x, y); + break; + + case PIXMAN_FILTER_BILINEAR: + case PIXMAN_FILTER_GOOD: + case PIXMAN_FILTER_BEST: + return bits_image_fetch_pixel_bilinear (image, x, y); + break; + + case PIXMAN_FILTER_CONVOLUTION: + return bits_image_fetch_pixel_convolution (image, x, y); + break; + } + + return 0; } static void -fbFetch64(bits_image_t * image, - int x, int y, int width, - uint64_t *buffer, void *unused, uint32_t unused2) +bits_image_fetch_transformed (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask, + uint32_t mask_bits) { - fetchProc64 fetch = READ_ACCESS(pixman_fetchProcForPicture64)(image); - - fetch(image, x, y, width, buffer); + pixman_fixed_t x, y, w; + pixman_fixed_t ux, uy, uw; + pixman_vector_t v; + int i; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + /* when using convolution filters or PIXMAN_REPEAT_PAD one + * might get here without a transform */ + if (image->common.transform) + { + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + uw = image->common.transform->matrix[2][0]; + } + else + { + ux = pixman_fixed_1; + uy = 0; + uw = 0; + } + + x = v.vector[0]; + y = v.vector[1]; + w = v.vector[2]; + + if (w == pixman_fixed_1 && uw == 0) /* Affine */ + { + for (i = 0; i < width; ++i) + { + if (!mask || (mask[i] & mask_bits)) + { + buffer[i] = + bits_image_fetch_pixel_filtered (&image->bits, x, y); + } + + x += ux; + y += uy; + } + } + else + { + for (i = 0; i < width; ++i) + { + pixman_fixed_t x0, y0; + + if (!mask || (mask[i] & mask_bits)) + { + x0 = ((pixman_fixed_48_16_t)x << 16) / w; + y0 = ((pixman_fixed_48_16_t)y << 16) / w; + + buffer[i] = + bits_image_fetch_pixel_filtered (&image->bits, x0, y0); + } + + x += ux; + y += uy; + w += uw; + } + } } static void -fbStore(bits_image_t * image, int x, int y, int width, uint32_t *buffer) +bits_image_fetch_solid_32 (pixman_image_t * image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t * mask, + uint32_t mask_bits) { - uint32_t *bits; - int32_t stride; - storeProc32 store = WRITE_ACCESS(pixman_storeProcForPicture32)(image); - const pixman_indexed_t * indexed = image->indexed; - - bits = image->bits; - stride = image->rowstride; - bits += y*stride; - store((pixman_image_t *)image, bits, buffer, x, width, indexed); + uint32_t color; + uint32_t *end; + + color = image->bits.fetch_pixel_raw_32 (&image->bits, 0, 0); + + end = buffer + width; + while (buffer < end) + *(buffer++) = color; } static void -fbStore64 (bits_image_t * image, int x, int y, int width, uint64_t *buffer) +bits_image_fetch_solid_64 (pixman_image_t * image, + int x, + int y, + int width, + uint32_t * b, + const uint32_t * unused, + uint32_t unused2) { - uint32_t *bits; - int32_t stride; - storeProc64 store = WRITE_ACCESS(pixman_storeProcForPicture64)(image); - const pixman_indexed_t * indexed = image->indexed; - - bits = image->bits; - stride = image->rowstride; - bits += y*stride; - store((pixman_image_t *)image, bits, buffer, x, width, indexed); + uint64_t color; + uint64_t *buffer = (uint64_t *)b; + uint64_t *end; + + color = image->bits.fetch_pixel_raw_64 (&image->bits, 0, 0); + + end = buffer + width; + while (buffer < end) + *(buffer++) = color; } static void -fbStoreExternalAlpha (bits_image_t * image, int x, int y, int width, - uint32_t *buffer) +bits_image_fetch_untransformed_repeat_none (bits_image_t *image, + pixman_bool_t wide, + int x, + int y, + int width, + uint32_t * buffer) { - uint32_t *bits, *alpha_bits; - int32_t stride, astride; - int ax, ay; - storeProc32 store; - storeProc32 astore; - const pixman_indexed_t * indexed = image->indexed; - const pixman_indexed_t * aindexed; - - if (!image->common.alpha_map) { - // XXX[AGP]: This should never happen! - // fbStore(image, x, y, width, buffer); - abort(); + uint32_t w; + + if (y < 0 || y >= image->height) + { + memset (buffer, 0, width * (wide? 8 : 4)); return; } - store = WRITE_ACCESS(pixman_storeProcForPicture32)(image); - astore = WRITE_ACCESS(pixman_storeProcForPicture32)(image->common.alpha_map); - aindexed = image->common.alpha_map->indexed; + if (x < 0) + { + w = MIN (width, -x); - ax = x; - ay = y; + memset (buffer, 0, w * (wide ? 8 : 4)); - bits = image->bits; - stride = image->rowstride; + width -= w; + buffer += w * (wide? 2 : 1); + x += w; + } - alpha_bits = image->common.alpha_map->bits; - astride = image->common.alpha_map->rowstride; + if (x < image->width) + { + w = MIN (width, image->width - x); - bits += y*stride; - alpha_bits += (ay - image->common.alpha_origin.y)*astride; + if (wide) + image->fetch_scanline_raw_64 ((pixman_image_t *)image, x, y, w, buffer, NULL, 0); + else + image->fetch_scanline_raw_32 ((pixman_image_t *)image, x, y, w, buffer, NULL, 0); + width -= w; + buffer += w * (wide? 2 : 1); + x += w; + } - store((pixman_image_t *)image, bits, buffer, x, width, indexed); - astore((pixman_image_t *)image->common.alpha_map, - alpha_bits, buffer, ax - image->common.alpha_origin.x, width, aindexed); + memset (buffer, 0, width * (wide ? 8 : 4)); } static void -fbStoreExternalAlpha64 (bits_image_t * image, int x, int y, int width, - uint64_t *buffer) +bits_image_fetch_untransformed_repeat_normal (bits_image_t *image, + pixman_bool_t wide, + int x, + int y, + int width, + uint32_t * buffer) { - uint32_t *bits, *alpha_bits; - int32_t stride, astride; - int ax, ay; - storeProc64 store; - storeProc64 astore; - const pixman_indexed_t * indexed = image->indexed; - const pixman_indexed_t * aindexed; + uint32_t w; - store = ACCESS(pixman_storeProcForPicture64)(image); - astore = ACCESS(pixman_storeProcForPicture64)(image->common.alpha_map); - aindexed = image->common.alpha_map->indexed; + while (y < 0) + y += image->height; - ax = x; - ay = y; + while (y >= image->height) + y -= image->height; - bits = image->bits; - stride = image->rowstride; + while (width) + { + while (x < 0) + x += image->width; + while (x >= image->width) + x -= image->width; + + w = MIN (width, image->width - x); + + if (wide) + image->fetch_scanline_raw_64 ((pixman_image_t *)image, x, y, w, buffer, NULL, 0); + else + image->fetch_scanline_raw_32 ((pixman_image_t *)image, x, y, w, buffer, NULL, 0); + + buffer += w * (wide? 2 : 1); + x += w; + width -= w; + } +} + +static void +bits_image_fetch_untransformed_32 (pixman_image_t * image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t * mask, + uint32_t mask_bits) +{ + if (image->common.repeat == PIXMAN_REPEAT_NONE) + { + bits_image_fetch_untransformed_repeat_none ( + &image->bits, FALSE, x, y, width, buffer); + } + else + { + bits_image_fetch_untransformed_repeat_normal ( + &image->bits, FALSE, x, y, width, buffer); + } +} + +static void +bits_image_fetch_untransformed_64 (pixman_image_t * image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t * unused, + uint32_t unused2) +{ + if (image->common.repeat == PIXMAN_REPEAT_NONE) + { + bits_image_fetch_untransformed_repeat_none ( + &image->bits, TRUE, x, y, width, buffer); + } + else + { + bits_image_fetch_untransformed_repeat_normal ( + &image->bits, TRUE, x, y, width, buffer); + } +} - alpha_bits = image->common.alpha_map->bits; - astride = image->common.alpha_map->rowstride; +static pixman_bool_t out_of_bounds_workaround = TRUE; - bits += y*stride; - alpha_bits += (ay - image->common.alpha_origin.y)*astride; +/* Old X servers rely on out-of-bounds accesses when they are asked + * to composite with a window as the source. They create a pixman image + * pointing to some bogus position in memory, but then they set a clip + * region to the position where the actual bits are. + * + * Due to a bug in old versions of pixman, where it would not clip + * against the image bounds when a clip region was set, this would + * actually work. So by default we allow certain out-of-bound access + * to happen unless explicitly disabled. + * + * Fixed X servers should call this function to disable the workaround. + */ +PIXMAN_EXPORT void +pixman_disable_out_of_bounds_workaround (void) +{ + out_of_bounds_workaround = FALSE; +} +static pixman_bool_t +source_image_needs_out_of_bounds_workaround (bits_image_t *image) +{ + if (image->common.clip_sources && + image->common.repeat == PIXMAN_REPEAT_NONE && + image->common.have_clip_region && + out_of_bounds_workaround) + { + if (!image->common.client_clip) + { + /* There is no client clip, so if the clip region extends beyond the + * drawable geometry, it must be because the X server generated the + * bogus clip region. + */ + const pixman_box32_t *extents = pixman_region32_extents (&image->common.clip_region); + + if (extents->x1 >= 0 && extents->x2 <= image->width && + extents->y1 >= 0 && extents->y2 <= image->height) + { + return FALSE; + } + } + + return TRUE; + } - store((pixman_image_t *)image, bits, buffer, x, width, indexed); - astore((pixman_image_t *)image->common.alpha_map, - alpha_bits, buffer, ax - image->common.alpha_origin.x, width, aindexed); + return FALSE; } static void bits_image_property_changed (pixman_image_t *image) { bits_image_t *bits = (bits_image_t *)image; - + + _pixman_bits_image_setup_raw_accessors (bits); + + image->bits.fetch_pixel_32 = image->bits.fetch_pixel_raw_32; + if (bits->common.alpha_map) { image->common.get_scanline_64 = - (scanFetchProc)_pixman_image_get_scanline_64_generic; + _pixman_image_get_scanline_generic_64; image->common.get_scanline_32 = - (scanFetchProc)READ_ACCESS(fbFetchExternalAlpha); + bits_image_fetch_transformed; + + image->bits.fetch_pixel_32 = bits_image_fetch_pixel_alpha; } else if ((bits->common.repeat != PIXMAN_REPEAT_NONE) && - bits->width == 1 && - bits->height == 1) + bits->width == 1 && + bits->height == 1) { - image->common.get_scanline_64 = (scanFetchProc)fbFetchSolid64; - image->common.get_scanline_32 = (scanFetchProc)fbFetchSolid; + image->common.get_scanline_64 = bits_image_fetch_solid_64; + image->common.get_scanline_32 = bits_image_fetch_solid_32; } else if (!bits->common.transform && - bits->common.filter != PIXMAN_FILTER_CONVOLUTION && - bits->common.repeat != PIXMAN_REPEAT_PAD && - bits->common.repeat != PIXMAN_REPEAT_REFLECT) + bits->common.filter != PIXMAN_FILTER_CONVOLUTION && + (bits->common.repeat == PIXMAN_REPEAT_NONE || + bits->common.repeat == PIXMAN_REPEAT_NORMAL)) { - image->common.get_scanline_64 = (scanFetchProc)fbFetch64; - image->common.get_scanline_32 = (scanFetchProc)fbFetch; + image->common.get_scanline_64 = bits_image_fetch_untransformed_64; + image->common.get_scanline_32 = bits_image_fetch_untransformed_32; } else { image->common.get_scanline_64 = - (scanFetchProc)_pixman_image_get_scanline_64_generic; + _pixman_image_get_scanline_generic_64; image->common.get_scanline_32 = - (scanFetchProc)READ_ACCESS(fbFetchTransformed); + bits_image_fetch_transformed; } - - if (bits->common.alpha_map) - { - bits->store_scanline_64 = (scanStoreProc)fbStoreExternalAlpha64; - bits->store_scanline_32 = fbStoreExternalAlpha; - } - else - { - bits->store_scanline_64 = (scanStoreProc)fbStore64; - bits->store_scanline_32 = fbStore; - } -} -void -_pixman_image_store_scanline_32 (bits_image_t *image, int x, int y, int width, - uint32_t *buffer) -{ - image->store_scanline_32 (image, x, y, width, buffer); -} + bits->store_scanline_64 = bits_image_store_scanline_64; + bits->store_scanline_32 = bits_image_store_scanline_32; -void -_pixman_image_store_scanline_64 (bits_image_t *image, int x, int y, int width, - uint32_t *buffer) -{ - image->store_scanline_64 (image, x, y, width, buffer); + bits->common.need_workaround = + source_image_needs_out_of_bounds_workaround (bits); } static uint32_t * create_bits (pixman_format_code_t format, - int width, - int height, - int *rowstride_bytes) + int width, + int height, + int * rowstride_bytes) { int stride; int buf_size; int bpp; - + /* what follows is a long-winded way, avoiding any possibility of integer * overflows, of saying: - * stride = ((width * bpp + FB_MASK) >> FB_SHIFT) * sizeof (uint32_t); + * stride = ((width * bpp + 0x1f) >> 5) * sizeof (uint32_t); */ - + bpp = PIXMAN_FORMAT_BPP (format); if (pixman_multiply_overflows_int (width, bpp)) return NULL; - + stride = width * bpp; - if (pixman_addition_overflows_int (stride, FB_MASK)) - return NULL; - - stride += FB_MASK; - stride >>= FB_SHIFT; - -#if FB_SHIFT < 2 - if (pixman_multiply_overflows_int (stride, sizeof (uint32_t))) + if (pixman_addition_overflows_int (stride, 0x1f)) return NULL; -#endif + + stride += 0x1f; + stride >>= 5; + stride *= sizeof (uint32_t); - + if (pixman_multiply_overflows_int (height, stride)) return NULL; - + buf_size = height * stride; - + if (rowstride_bytes) *rowstride_bytes = stride; - + return calloc (buf_size, 1); } PIXMAN_EXPORT pixman_image_t * -pixman_image_create_bits (pixman_format_code_t format, - int width, - int height, - uint32_t *bits, - int rowstride_bytes) +pixman_image_create_bits (pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride_bytes) { pixman_image_t *image; uint32_t *free_me = NULL; - + /* must be a whole number of uint32_t's */ return_val_if_fail (bits == NULL || - (rowstride_bytes % sizeof (uint32_t)) == 0, NULL); - + (rowstride_bytes % sizeof (uint32_t)) == 0, NULL); + if (!bits && width && height) { free_me = bits = create_bits (format, width, height, &rowstride_bytes); if (!bits) return NULL; } - - image = _pixman_image_allocate(); - - if (!image) { + + image = _pixman_image_allocate (); + + if (!image) + { if (free_me) free (free_me); + return NULL; } - + image->type = BITS; image->bits.format = format; image->bits.width = width; image->bits.height = height; image->bits.bits = bits; image->bits.free_me = free_me; - - image->bits.rowstride = rowstride_bytes / (int) sizeof (uint32_t); /* we store it in number - * of uint32_t's - */ + image->bits.read_func = NULL; + image->bits.write_func = NULL; + + /* The rowstride is stored in number of uint32_t */ + image->bits.rowstride = rowstride_bytes / (int) sizeof (uint32_t); + image->bits.indexed = NULL; - - pixman_region32_fini (&image->common.full_region); - pixman_region32_init_rect (&image->common.full_region, 0, 0, - image->bits.width, image->bits.height); - + image->common.property_changed = bits_image_property_changed; - - bits_image_property_changed (image); - + _pixman_image_reset_clip_region (image); - + return image; } diff --git a/lib/pixman/pixman/pixman-combine.c.template b/lib/pixman/pixman/pixman-combine.c.template new file mode 100644 index 000000000..c129980a8 --- /dev/null +++ b/lib/pixman/pixman/pixman-combine.c.template @@ -0,0 +1,2436 @@ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <math.h> +#include <string.h> + +#include "pixman-private.h" + +#include "pixman-combine.h" + +/*** per channel helper functions ***/ + +static void +combine_mask_ca (comp4_t *src, comp4_t *mask) +{ + comp4_t a = *mask; + + comp4_t x; + comp2_t xa; + + if (!a) + { + *(src) = 0; + return; + } + + x = *(src); + if (a == ~0) + { + x = x >> A_SHIFT; + x |= x << G_SHIFT; + x |= x << R_SHIFT; + *(mask) = x; + return; + } + + xa = x >> A_SHIFT; + UNcx4_MUL_UNcx4 (x, a); + *(src) = x; + + UNcx4_MUL_UNc (a, xa); + *(mask) = a; +} + +static void +combine_mask_value_ca (comp4_t *src, const comp4_t *mask) +{ + comp4_t a = *mask; + comp4_t x; + + if (!a) + { + *(src) = 0; + return; + } + + if (a == ~0) + return; + + x = *(src); + UNcx4_MUL_UNcx4 (x, a); + *(src) = x; +} + +static void +combine_mask_alpha_ca (const comp4_t *src, comp4_t *mask) +{ + comp4_t a = *(mask); + comp4_t x; + + if (!a) + return; + + x = *(src) >> A_SHIFT; + if (x == MASK) + return; + + if (a == ~0) + { + x |= x << G_SHIFT; + x |= x << R_SHIFT; + *(mask) = x; + return; + } + + UNcx4_MUL_UNc (a, x); + *(mask) = a; +} + +/* + * There are two ways of handling alpha -- either as a single unified value or + * a separate value for each component, hence each macro must have two + * versions. The unified alpha version has a 'U' at the end of the name, + * the component version has a 'C'. Similarly, functions which deal with + * this difference will have two versions using the same convention. + */ + +/* + * All of the composing functions + */ + +static force_inline comp4_t +combine_mask (const comp4_t *src, const comp4_t *mask, int i) +{ + comp4_t s, m; + + if (mask) + { + m = *(mask + i) >> A_SHIFT; + + if (!m) + return 0; + } + + s = *(src + i); + + if (mask) + UNcx4_MUL_UNc (s, m); + + return s; +} + +static void +combine_clear (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + memset (dest, 0, width * sizeof(comp4_t)); +} + +static void +combine_src_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + if (!mask) + memcpy (dest, src, width * sizeof (comp4_t)); + else + { + for (i = 0; i < width; ++i) + { + comp4_t s = combine_mask (src, mask, i); + + *(dest + i) = s; + } + } +} + +/* if the Src is opaque, call combine_src_u */ +static void +combine_over_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = combine_mask (src, mask, i); + comp4_t d = *(dest + i); + comp4_t ia = ALPHA_c (~s); + + UNcx4_MUL_UNc_ADD_UNcx4 (d, ia, s); + *(dest + i) = d; + } +} + +/* if the Dst is opaque, this is a noop */ +static void +combine_over_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = combine_mask (src, mask, i); + comp4_t d = *(dest + i); + comp4_t ia = ALPHA_c (~*(dest + i)); + UNcx4_MUL_UNc_ADD_UNcx4 (s, ia, d); + *(dest + i) = s; + } +} + +/* if the Dst is opaque, call combine_src_u */ +static void +combine_in_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = combine_mask (src, mask, i); + comp4_t a = ALPHA_c (*(dest + i)); + UNcx4_MUL_UNc (s, a); + *(dest + i) = s; + } +} + +/* if the Src is opaque, this is a noop */ +static void +combine_in_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = combine_mask (src, mask, i); + comp4_t d = *(dest + i); + comp4_t a = ALPHA_c (s); + UNcx4_MUL_UNc (d, a); + *(dest + i) = d; + } +} + +/* if the Dst is opaque, call combine_clear */ +static void +combine_out_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = combine_mask (src, mask, i); + comp4_t a = ALPHA_c (~*(dest + i)); + UNcx4_MUL_UNc (s, a); + *(dest + i) = s; + } +} + +/* if the Src is opaque, call combine_clear */ +static void +combine_out_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = combine_mask (src, mask, i); + comp4_t d = *(dest + i); + comp4_t a = ALPHA_c (~s); + UNcx4_MUL_UNc (d, a); + *(dest + i) = d; + } +} + +/* if the Src is opaque, call combine_in_u */ +/* if the Dst is opaque, call combine_over_u */ +/* if both the Src and Dst are opaque, call combine_src_u */ +static void +combine_atop_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = combine_mask (src, mask, i); + comp4_t d = *(dest + i); + comp4_t dest_a = ALPHA_c (d); + comp4_t src_ia = ALPHA_c (~s); + + UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (s, dest_a, d, src_ia); + *(dest + i) = s; + } +} + +/* if the Src is opaque, call combine_over_reverse_u */ +/* if the Dst is opaque, call combine_in_reverse_u */ +/* if both the Src and Dst are opaque, call combine_dst_u */ +static void +combine_atop_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = combine_mask (src, mask, i); + comp4_t d = *(dest + i); + comp4_t src_a = ALPHA_c (s); + comp4_t dest_ia = ALPHA_c (~d); + + UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (s, dest_ia, d, src_a); + *(dest + i) = s; + } +} + +/* if the Src is opaque, call combine_over_u */ +/* if the Dst is opaque, call combine_over_reverse_u */ +/* if both the Src and Dst are opaque, call combine_clear */ +static void +combine_xor_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = combine_mask (src, mask, i); + comp4_t d = *(dest + i); + comp4_t src_ia = ALPHA_c (~s); + comp4_t dest_ia = ALPHA_c (~d); + + UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (s, dest_ia, d, src_ia); + *(dest + i) = s; + } +} + +static void +combine_add_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = combine_mask (src, mask, i); + comp4_t d = *(dest + i); + UNcx4_ADD_UNcx4 (d, s); + *(dest + i) = d; + } +} + +/* if the Src is opaque, call combine_add_u */ +/* if the Dst is opaque, call combine_add_u */ +/* if both the Src and Dst are opaque, call combine_add_u */ +static void +combine_saturate_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = combine_mask (src, mask, i); + comp4_t d = *(dest + i); + comp2_t sa, da; + + sa = s >> A_SHIFT; + da = ~d >> A_SHIFT; + if (sa > da) + { + sa = DIV_UNc (da, sa); + UNcx4_MUL_UNc (s, sa); + } + ; + UNcx4_ADD_UNcx4 (d, s); + *(dest + i) = d; + } +} + +/* + * PDF blend modes: + * The following blend modes have been taken from the PDF ISO 32000 + * specification, which at this point in time is available from + * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf + * The relevant chapters are 11.3.5 and 11.3.6. + * The formula for computing the final pixel color given in 11.3.6 is: + * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) + * with B() being the blend function. + * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs + * + * These blend modes should match the SVG filter draft specification, as + * it has been designed to mirror ISO 32000. Note that at the current point + * no released draft exists that shows this, as the formulas have not been + * updated yet after the release of ISO 32000. + * + * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and + * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an + * argument. Note that this implementation operates on premultiplied colors, + * while the PDF specification does not. Therefore the code uses the formula + * ar.Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as) + */ + +/* + * Multiply + * B(Dca, ad, Sca, as) = Dca.Sca + */ + +static void +combine_multiply_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = combine_mask (src, mask, i); + comp4_t d = *(dest + i); + comp4_t ss = s; + comp4_t src_ia = ALPHA_c (~s); + comp4_t dest_ia = ALPHA_c (~d); + + UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (ss, dest_ia, d, src_ia); + UNcx4_MUL_UNcx4 (d, s); + UNcx4_ADD_UNcx4 (d, ss); + + *(dest + i) = d; + } +} + +static void +combine_multiply_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t m = *(mask + i); + comp4_t s = *(src + i); + comp4_t d = *(dest + i); + comp4_t r = d; + comp4_t dest_ia = ALPHA_c (~d); + + combine_mask_value_ca (&s, &m); + + UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (r, ~m, s, dest_ia); + UNcx4_MUL_UNcx4 (d, s); + UNcx4_ADD_UNcx4 (r, d); + + *(dest + i) = r; + } +} + +#define PDF_SEPARABLE_BLEND_MODE(name) \ + static void \ + combine_ ## name ## _u (pixman_implementation_t *imp, \ + pixman_op_t op, \ + comp4_t * dest, \ + const comp4_t * src, \ + const comp4_t * mask, \ + int width) \ + { \ + int i; \ + for (i = 0; i < width; ++i) { \ + comp4_t s = combine_mask (src, mask, i); \ + comp4_t d = *(dest + i); \ + comp1_t sa = ALPHA_c (s); \ + comp1_t isa = ~sa; \ + comp1_t da = ALPHA_c (d); \ + comp1_t ida = ~da; \ + comp4_t result; \ + \ + result = d; \ + UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (result, isa, s, ida); \ + \ + *(dest + i) = result + \ + (DIV_ONE_UNc (sa * da) << A_SHIFT) + \ + (blend_ ## name (RED_c (d), da, RED_c (s), sa) << R_SHIFT) + \ + (blend_ ## name (GREEN_c (d), da, GREEN_c (s), sa) << G_SHIFT) + \ + (blend_ ## name (BLUE_c (d), da, BLUE_c (s), sa)); \ + } \ + } \ + \ + static void \ + combine_ ## name ## _ca (pixman_implementation_t *imp, \ + pixman_op_t op, \ + comp4_t * dest, \ + const comp4_t * src, \ + const comp4_t * mask, \ + int width) \ + { \ + int i; \ + for (i = 0; i < width; ++i) { \ + comp4_t m = *(mask + i); \ + comp4_t s = *(src + i); \ + comp4_t d = *(dest + i); \ + comp1_t da = ALPHA_c (d); \ + comp1_t ida = ~da; \ + comp4_t result; \ + \ + combine_mask_value_ca (&s, &m); \ + \ + result = d; \ + UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (result, ~m, s, ida); \ + \ + result += \ + (DIV_ONE_UNc (ALPHA_c (m) * da) << A_SHIFT) + \ + (blend_ ## name (RED_c (d), da, RED_c (s), RED_c (m)) << R_SHIFT) + \ + (blend_ ## name (GREEN_c (d), da, GREEN_c (s), GREEN_c (m)) << G_SHIFT) + \ + (blend_ ## name (BLUE_c (d), da, BLUE_c (s), BLUE_c (m))); \ + \ + *(dest + i) = result; \ + } \ + } + +/* + * Screen + * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca + */ +static inline comp4_t +blend_screen (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) +{ + return DIV_ONE_UNc (sca * da + dca * sa - sca * dca); +} + +PDF_SEPARABLE_BLEND_MODE (screen) + +/* + * Overlay + * B(Dca, Da, Sca, Sa) = + * if 2.Dca < Da + * 2.Sca.Dca + * otherwise + * Sa.Da - 2.(Da - Dca).(Sa - Sca) + */ +static inline comp4_t +blend_overlay (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) +{ + comp4_t rca; + + if (2 * dca < da) + rca = 2 * sca * dca; + else + rca = sa * da - 2 * (da - dca) * (sa - sca); + return DIV_ONE_UNc (rca); +} + +PDF_SEPARABLE_BLEND_MODE (overlay) + +/* + * Darken + * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa) + */ +static inline comp4_t +blend_darken (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) +{ + comp4_t s, d; + + s = sca * da; + d = dca * sa; + return DIV_ONE_UNc (s > d ? d : s); +} + +PDF_SEPARABLE_BLEND_MODE (darken) + +/* + * Lighten + * B(Dca, Da, Sca, Sa) = max (Sca.Da, Dca.Sa) + */ +static inline comp4_t +blend_lighten (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) +{ + comp4_t s, d; + + s = sca * da; + d = dca * sa; + return DIV_ONE_UNc (s > d ? s : d); +} + +PDF_SEPARABLE_BLEND_MODE (lighten) + +/* + * Color dodge + * B(Dca, Da, Sca, Sa) = + * if Dca == 0 + * 0 + * if Sca == Sa + * Sa.Da + * otherwise + * Sa.Da. min (1, Dca / Da / (1 - Sca/Sa)) + */ +static inline comp4_t +blend_color_dodge (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) +{ + if (sca >= sa) + { + return dca == 0 ? 0 : DIV_ONE_UNc (sa * da); + } + else + { + comp4_t rca = dca * sa / (sa - sca); + return DIV_ONE_UNc (sa * MIN (rca, da)); + } +} + +PDF_SEPARABLE_BLEND_MODE (color_dodge) + +/* + * Color burn + * B(Dca, Da, Sca, Sa) = + * if Dca == Da + * Sa.Da + * if Sca == 0 + * 0 + * otherwise + * Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca)) + */ +static inline comp4_t +blend_color_burn (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) +{ + if (sca == 0) + { + return dca < da ? 0 : DIV_ONE_UNc (sa * da); + } + else + { + comp4_t rca = (da - dca) * sa / sca; + return DIV_ONE_UNc (sa * (MAX (rca, da) - rca)); + } +} + +PDF_SEPARABLE_BLEND_MODE (color_burn) + +/* + * Hard light + * B(Dca, Da, Sca, Sa) = + * if 2.Sca < Sa + * 2.Sca.Dca + * otherwise + * Sa.Da - 2.(Da - Dca).(Sa - Sca) + */ +static inline comp4_t +blend_hard_light (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) +{ + if (2 * sca < sa) + return DIV_ONE_UNc (2 * sca * dca); + else + return DIV_ONE_UNc (sa * da - 2 * (da - dca) * (sa - sca)); +} + +PDF_SEPARABLE_BLEND_MODE (hard_light) + +/* + * Soft light + * B(Dca, Da, Sca, Sa) = + * if (2.Sca <= Sa) + * Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa)) + * otherwise if Dca.4 <= Da + * Dca.(Sa + (2.Sca - Sa).((16.Dca/Da - 12).Dca/Da + 3) + * otherwise + * (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa)) + */ +static inline comp4_t +blend_soft_light (comp4_t dca_org, + comp4_t da_org, + comp4_t sca_org, + comp4_t sa_org) +{ + double dca = dca_org * (1.0 / MASK); + double da = da_org * (1.0 / MASK); + double sca = sca_org * (1.0 / MASK); + double sa = sa_org * (1.0 / MASK); + double rca; + + if (2 * sca < sa) + { + if (da == 0) + rca = dca * sa; + else + rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da; + } + else if (da == 0) + { + rca = 0; + } + else if (4 * dca <= da) + { + rca = dca * sa + + (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3); + } + else + { + rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa); + } + return rca * MASK + 0.5; +} + +PDF_SEPARABLE_BLEND_MODE (soft_light) + +/* + * Difference + * B(Dca, Da, Sca, Sa) = abs (Dca.Sa - Sca.Da) + */ +static inline comp4_t +blend_difference (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) +{ + comp4_t dcasa = dca * sa; + comp4_t scada = sca * da; + + if (scada < dcasa) + return DIV_ONE_UNc (dcasa - scada); + else + return DIV_ONE_UNc (scada - dcasa); +} + +PDF_SEPARABLE_BLEND_MODE (difference) + +/* + * Exclusion + * B(Dca, Da, Sca, Sa) = (Sca.Da + Dca.Sa - 2.Sca.Dca) + */ + +/* This can be made faster by writing it directly and not using + * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */ + +static inline comp4_t +blend_exclusion (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa) +{ + return DIV_ONE_UNc (sca * da + dca * sa - 2 * dca * sca); +} + +PDF_SEPARABLE_BLEND_MODE (exclusion) + +#undef PDF_SEPARABLE_BLEND_MODE + +/* + * PDF nonseperable blend modes are implemented using the following functions + * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid + * and min value of the red, green and blue components. + * + * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue + * + * clip_color (C): + * l = LUM (C) + * min = Cmin + * max = Cmax + * if n < 0.0 + * C = l + ( ( ( C – l ) × l ) ⁄ ( l – min ) ) + * if x > 1.0 + * C = l + ( ( ( C – l ) × ( 1 – l ) ) ⁄ ( max – l ) ) + * return C + * + * set_lum (C, l): + * d = l – LUM (C) + * C += d + * return clip_color (C) + * + * SAT (C) = CH_MAX (C) - CH_MIN (C) + * + * set_sat (C, s): + * if Cmax > Cmin + * Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) ) + * Cmax = s + * else + * Cmid = Cmax = 0.0 + * Cmin = 0.0 + * return C + */ + +/* For premultiplied colors, we need to know what happens when C is + * multiplied by a real number. LUM and SAT are linear: + * + * LUM (r × C) = r × LUM (C) SAT (r * C) = r * SAT (C) + * + * If we extend clip_color with an extra argument a and change + * + * if x >= 1.0 + * + * into + * + * if x >= a + * + * then clip_color is also linear: + * + * r * clip_color (C, a) = clip_color (r_c, ra); + * + * for positive r. + * + * Similarly, we can extend set_lum with an extra argument that is just passed + * on to clip_color: + * + * r * set_lum ( C, l, a) + * + * = r × clip_color ( C + l - LUM (C), a) + * + * = clip_color ( r * C + r × l - r * LUM (C), r * a) + * + * = set_lum ( r * C, r * l, r * a) + * + * Finally, set_sat: + * + * r * set_sat (C, s) = set_sat (x * C, r * s) + * + * The above holds for all non-zero x, because they x'es in the fraction for + * C_mid cancel out. Specifically, it holds for x = r: + * + * r * set_sat (C, s) = set_sat (r_c, rs) + * + */ + +/* So, for the non-separable PDF blend modes, we have (using s, d for + * non-premultiplied colors, and S, D for premultiplied: + * + * Color: + * + * a_s * a_d * B(s, d) + * = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1) + * = set_lum (S * a_d, a_s * LUM (D), a_s * a_d) + * + * + * Luminosity: + * + * a_s * a_d * B(s, d) + * = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1) + * = set_lum (a_s * D, a_d * LUM(S), a_s * a_d) + * + * + * Saturation: + * + * a_s * a_d * B(s, d) + * = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1) + * = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)), + * a_s * LUM (D), a_s * a_d) + * = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d)) + * + * Hue: + * + * a_s * a_d * B(s, d) + * = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1) + * = a_s * a_d * set_lum (set_sat (a_d * S, a_s * SAT (D)), + * a_s * LUM (D), a_s * a_d) + * + */ + +#define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2])) +#define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2])) +#define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100) +#define SAT(c) (CH_MAX (c) - CH_MIN (c)) + +#define PDF_NON_SEPARABLE_BLEND_MODE(name) \ + static void \ + combine_ ## name ## _u (pixman_implementation_t *imp, \ + pixman_op_t op, \ + comp4_t *dest, \ + const comp4_t *src, \ + const comp4_t *mask, \ + int width) \ + { \ + int i; \ + for (i = 0; i < width; ++i) \ + { \ + comp4_t s = combine_mask (src, mask, i); \ + comp4_t d = *(dest + i); \ + comp1_t sa = ALPHA_c (s); \ + comp1_t isa = ~sa; \ + comp1_t da = ALPHA_c (d); \ + comp1_t ida = ~da; \ + comp4_t result; \ + comp4_t sc[3], dc[3], c[3]; \ + \ + result = d; \ + UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (result, isa, s, ida); \ + dc[0] = RED_c (d); \ + sc[0] = RED_c (s); \ + dc[1] = GREEN_c (d); \ + sc[1] = GREEN_c (s); \ + dc[2] = BLUE_c (d); \ + sc[2] = BLUE_c (s); \ + blend_ ## name (c, dc, da, sc, sa); \ + \ + *(dest + i) = result + \ + (DIV_ONE_UNc (sa * da) << A_SHIFT) + \ + (DIV_ONE_UNc (c[0]) << R_SHIFT) + \ + (DIV_ONE_UNc (c[1]) << G_SHIFT) + \ + (DIV_ONE_UNc (c[2])); \ + } \ + } + +static void +set_lum (comp4_t dest[3], comp4_t src[3], comp4_t sa, comp4_t lum) +{ + double a, l, min, max; + double tmp[3]; + + a = sa * (1.0 / MASK); + + l = lum * (1.0 / MASK); + tmp[0] = src[0] * (1.0 / MASK); + tmp[1] = src[1] * (1.0 / MASK); + tmp[2] = src[2] * (1.0 / MASK); + + l = l - LUM (tmp); + tmp[0] += l; + tmp[1] += l; + tmp[2] += l; + + /* clip_color */ + l = LUM (tmp); + min = CH_MIN (tmp); + max = CH_MAX (tmp); + + if (min < 0) + { + tmp[0] = l + (tmp[0] - l) * l / (l - min); + tmp[1] = l + (tmp[1] - l) * l / (l - min); + tmp[2] = l + (tmp[2] - l) * l / (l - min); + } + if (max > a) + { + tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l); + tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l); + tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l); + } + + dest[0] = tmp[0] * MASK + 0.5; + dest[1] = tmp[1] * MASK + 0.5; + dest[2] = tmp[2] * MASK + 0.5; +} + +static void +set_sat (comp4_t dest[3], comp4_t src[3], comp4_t sat) +{ + int id[3]; + comp4_t min, max; + + if (src[0] > src[1]) + { + if (src[0] > src[2]) + { + id[0] = 0; + if (src[1] > src[2]) + { + id[1] = 1; + id[2] = 2; + } + else + { + id[1] = 2; + id[2] = 1; + } + } + else + { + id[0] = 2; + id[1] = 0; + id[2] = 1; + } + } + else + { + if (src[0] > src[2]) + { + id[0] = 1; + id[1] = 0; + id[2] = 2; + } + else + { + id[2] = 0; + if (src[1] > src[2]) + { + id[0] = 1; + id[1] = 2; + } + else + { + id[0] = 2; + id[1] = 1; + } + } + } + + max = dest[id[0]]; + min = dest[id[2]]; + if (max > min) + { + dest[id[1]] = (dest[id[1]] - min) * sat / (max - min); + dest[id[0]] = sat; + dest[id[2]] = 0; + } + else + { + dest[0] = dest[1] = dest[2] = 0; + } +} + +/* + * Hue: + * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb)) + */ +static inline void +blend_hsl_hue (comp4_t c[3], + comp4_t dc[3], + comp4_t da, + comp4_t sc[3], + comp4_t sa) +{ + c[0] = sc[0] * da; + c[1] = sc[1] * da; + c[2] = sc[2] * da; + set_sat (c, c, SAT (dc) * sa); + set_lum (c, c, sa * da, LUM (dc) * sa); +} + +PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue) + +/* + * Saturation: + * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb)) + */ +static inline void +blend_hsl_saturation (comp4_t c[3], + comp4_t dc[3], + comp4_t da, + comp4_t sc[3], + comp4_t sa) +{ + c[0] = dc[0] * sa; + c[1] = dc[1] * sa; + c[2] = dc[2] * sa; + set_sat (c, c, SAT (sc) * da); + set_lum (c, c, sa * da, LUM (dc) * sa); +} + +PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation) + +/* + * Color: + * B(Cb, Cs) = set_lum (Cs, LUM (Cb)) + */ +static inline void +blend_hsl_color (comp4_t c[3], + comp4_t dc[3], + comp4_t da, + comp4_t sc[3], + comp4_t sa) +{ + c[0] = sc[0] * da; + c[1] = sc[1] * da; + c[2] = sc[2] * da; + set_lum (c, c, sa * da, LUM (dc) * sa); +} + +PDF_NON_SEPARABLE_BLEND_MODE (hsl_color) + +/* + * Luminosity: + * B(Cb, Cs) = set_lum (Cb, LUM (Cs)) + */ +static inline void +blend_hsl_luminosity (comp4_t c[3], + comp4_t dc[3], + comp4_t da, + comp4_t sc[3], + comp4_t sa) +{ + c[0] = dc[0] * sa; + c[1] = dc[1] * sa; + c[2] = dc[2] * sa; + set_lum (c, c, sa * da, LUM (sc) * da); +} + +PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity) + +#undef SAT +#undef LUM +#undef CH_MAX +#undef CH_MIN +#undef PDF_NON_SEPARABLE_BLEND_MODE + +/* Overlay + * + * All of the disjoint composing functions + * + * The four entries in the first column indicate what source contributions + * come from each of the four areas of the picture -- areas covered by neither + * A nor B, areas covered only by A, areas covered only by B and finally + * areas covered by both A and B. + * + * Disjoint Conjoint + * Fa Fb Fa Fb + * (0,0,0,0) 0 0 0 0 + * (0,A,0,A) 1 0 1 0 + * (0,0,B,B) 0 1 0 1 + * (0,A,B,A) 1 min((1-a)/b,1) 1 max(1-a/b,0) + * (0,A,B,B) min((1-b)/a,1) 1 max(1-b/a,0) 1 + * (0,0,0,A) max(1-(1-b)/a,0) 0 min(1,b/a) 0 + * (0,0,0,B) 0 max(1-(1-a)/b,0) 0 min(a/b,1) + * (0,A,0,0) min(1,(1-b)/a) 0 max(1-b/a,0) 0 + * (0,0,B,0) 0 min(1,(1-a)/b) 0 max(1-a/b,0) + * (0,0,B,A) max(1-(1-b)/a,0) min(1,(1-a)/b) min(1,b/a) max(1-a/b,0) + * (0,A,0,B) min(1,(1-b)/a) max(1-(1-a)/b,0) max(1-b/a,0) min(1,a/b) + * (0,A,B,0) min(1,(1-b)/a) min(1,(1-a)/b) max(1-b/a,0) max(1-a/b,0) + */ + +#define COMBINE_A_OUT 1 +#define COMBINE_A_IN 2 +#define COMBINE_B_OUT 4 +#define COMBINE_B_IN 8 + +#define COMBINE_CLEAR 0 +#define COMBINE_A (COMBINE_A_OUT | COMBINE_A_IN) +#define COMBINE_B (COMBINE_B_OUT | COMBINE_B_IN) +#define COMBINE_A_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN) +#define COMBINE_B_OVER (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN) +#define COMBINE_A_ATOP (COMBINE_B_OUT | COMBINE_A_IN) +#define COMBINE_B_ATOP (COMBINE_A_OUT | COMBINE_B_IN) +#define COMBINE_XOR (COMBINE_A_OUT | COMBINE_B_OUT) + +/* portion covered by a but not b */ +static comp1_t +combine_disjoint_out_part (comp1_t a, comp1_t b) +{ + /* min (1, (1-b) / a) */ + + b = ~b; /* 1 - b */ + if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */ + return MASK; /* 1 */ + return DIV_UNc (b, a); /* (1-b) / a */ +} + +/* portion covered by both a and b */ +static comp1_t +combine_disjoint_in_part (comp1_t a, comp1_t b) +{ + /* max (1-(1-b)/a,0) */ + /* = - min ((1-b)/a - 1, 0) */ + /* = 1 - min (1, (1-b)/a) */ + + b = ~b; /* 1 - b */ + if (b >= a) /* 1 - b >= a -> (1-b)/a >= 1 */ + return 0; /* 1 - 1 */ + return ~DIV_UNc(b, a); /* 1 - (1-b) / a */ +} + +/* portion covered by a but not b */ +static comp1_t +combine_conjoint_out_part (comp1_t a, comp1_t b) +{ + /* max (1-b/a,0) */ + /* = 1-min(b/a,1) */ + + /* min (1, (1-b) / a) */ + + if (b >= a) /* b >= a -> b/a >= 1 */ + return 0x00; /* 0 */ + return ~DIV_UNc(b, a); /* 1 - b/a */ +} + +/* portion covered by both a and b */ +static comp1_t +combine_conjoint_in_part (comp1_t a, comp1_t b) +{ + /* min (1,b/a) */ + + if (b >= a) /* b >= a -> b/a >= 1 */ + return MASK; /* 1 */ + return DIV_UNc (b, a); /* b/a */ +} + +#define GET_COMP(v, i) ((comp2_t) (comp1_t) ((v) >> i)) + +#define ADD(x, y, i, t) \ + ((t) = GET_COMP (x, i) + GET_COMP (y, i), \ + (comp4_t) ((comp1_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i)) + +#define GENERIC(x, y, i, ax, ay, t, u, v) \ + ((t) = (MUL_UNc (GET_COMP (y, i), ay, (u)) + \ + MUL_UNc (GET_COMP (x, i), ax, (v))), \ + (comp4_t) ((comp1_t) ((t) | \ + (0 - ((t) >> G_SHIFT)))) << (i)) + +static void +combine_disjoint_general_u (comp4_t * dest, + const comp4_t *src, + const comp4_t *mask, + int width, + comp1_t combine) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = combine_mask (src, mask, i); + comp4_t d = *(dest + i); + comp4_t m, n, o, p; + comp2_t Fa, Fb, t, u, v; + comp1_t sa = s >> A_SHIFT; + comp1_t da = d >> A_SHIFT; + + switch (combine & COMBINE_A) + { + default: + Fa = 0; + break; + + case COMBINE_A_OUT: + Fa = combine_disjoint_out_part (sa, da); + break; + + case COMBINE_A_IN: + Fa = combine_disjoint_in_part (sa, da); + break; + + case COMBINE_A: + Fa = MASK; + break; + } + + switch (combine & COMBINE_B) + { + default: + Fb = 0; + break; + + case COMBINE_B_OUT: + Fb = combine_disjoint_out_part (da, sa); + break; + + case COMBINE_B_IN: + Fb = combine_disjoint_in_part (da, sa); + break; + + case COMBINE_B: + Fb = MASK; + break; + } + m = GENERIC (s, d, 0, Fa, Fb, t, u, v); + n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v); + o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v); + p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v); + s = m | n | o | p; + *(dest + i) = s; + } +} + +static void +combine_disjoint_over_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = combine_mask (src, mask, i); + comp2_t a = s >> A_SHIFT; + + if (a != 0x00) + { + if (a != MASK) + { + comp4_t d = *(dest + i); + a = combine_disjoint_out_part (d >> A_SHIFT, a); + UNcx4_MUL_UNc_ADD_UNcx4 (d, a, s); + s = d; + } + + *(dest + i) = s; + } + } +} + +static void +combine_disjoint_in_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN); +} + +static void +combine_disjoint_in_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN); +} + +static void +combine_disjoint_out_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT); +} + +static void +combine_disjoint_out_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT); +} + +static void +combine_disjoint_atop_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP); +} + +static void +combine_disjoint_atop_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP); +} + +static void +combine_disjoint_xor_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR); +} + +static void +combine_conjoint_general_u (comp4_t * dest, + const comp4_t *src, + const comp4_t *mask, + int width, + comp1_t combine) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = combine_mask (src, mask, i); + comp4_t d = *(dest + i); + comp4_t m, n, o, p; + comp2_t Fa, Fb, t, u, v; + comp1_t sa = s >> A_SHIFT; + comp1_t da = d >> A_SHIFT; + + switch (combine & COMBINE_A) + { + default: + Fa = 0; + break; + + case COMBINE_A_OUT: + Fa = combine_conjoint_out_part (sa, da); + break; + + case COMBINE_A_IN: + Fa = combine_conjoint_in_part (sa, da); + break; + + case COMBINE_A: + Fa = MASK; + break; + } + + switch (combine & COMBINE_B) + { + default: + Fb = 0; + break; + + case COMBINE_B_OUT: + Fb = combine_conjoint_out_part (da, sa); + break; + + case COMBINE_B_IN: + Fb = combine_conjoint_in_part (da, sa); + break; + + case COMBINE_B: + Fb = MASK; + break; + } + + m = GENERIC (s, d, 0, Fa, Fb, t, u, v); + n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v); + o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v); + p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v); + + s = m | n | o | p; + + *(dest + i) = s; + } +} + +static void +combine_conjoint_over_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER); +} + +static void +combine_conjoint_over_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER); +} + +static void +combine_conjoint_in_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN); +} + +static void +combine_conjoint_in_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN); +} + +static void +combine_conjoint_out_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT); +} + +static void +combine_conjoint_out_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT); +} + +static void +combine_conjoint_atop_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP); +} + +static void +combine_conjoint_atop_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP); +} + +static void +combine_conjoint_xor_u (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR); +} + +/************************************************************************/ +/*********************** Per Channel functions **************************/ +/************************************************************************/ + +static void +combine_clear_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + memset (dest, 0, width * sizeof(comp4_t)); +} + +static void +combine_src_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = *(src + i); + comp4_t m = *(mask + i); + + combine_mask_value_ca (&s, &m); + + *(dest + i) = s; + } +} + +static void +combine_over_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = *(src + i); + comp4_t m = *(mask + i); + comp4_t a; + + combine_mask_ca (&s, &m); + + a = ~m; + if (a) + { + comp4_t d = *(dest + i); + UNcx4_MUL_UNcx4_ADD_UNcx4 (d, a, s); + s = d; + } + + *(dest + i) = s; + } +} + +static void +combine_over_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t d = *(dest + i); + comp4_t a = ~d >> A_SHIFT; + + if (a) + { + comp4_t s = *(src + i); + comp4_t m = *(mask + i); + + UNcx4_MUL_UNcx4 (s, m); + UNcx4_MUL_UNc_ADD_UNcx4 (s, a, d); + + *(dest + i) = s; + } + } +} + +static void +combine_in_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t d = *(dest + i); + comp2_t a = d >> A_SHIFT; + comp4_t s = 0; + + if (a) + { + comp4_t m = *(mask + i); + + s = *(src + i); + combine_mask_value_ca (&s, &m); + + if (a != MASK) + UNcx4_MUL_UNc (s, a); + } + + *(dest + i) = s; + } +} + +static void +combine_in_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = *(src + i); + comp4_t m = *(mask + i); + comp4_t a; + + combine_mask_alpha_ca (&s, &m); + + a = m; + if (a != ~0) + { + comp4_t d = 0; + + if (a) + { + d = *(dest + i); + UNcx4_MUL_UNcx4 (d, a); + } + + *(dest + i) = d; + } + } +} + +static void +combine_out_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t d = *(dest + i); + comp2_t a = ~d >> A_SHIFT; + comp4_t s = 0; + + if (a) + { + comp4_t m = *(mask + i); + + s = *(src + i); + combine_mask_value_ca (&s, &m); + + if (a != MASK) + UNcx4_MUL_UNc (s, a); + } + + *(dest + i) = s; + } +} + +static void +combine_out_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = *(src + i); + comp4_t m = *(mask + i); + comp4_t a; + + combine_mask_alpha_ca (&s, &m); + + a = ~m; + if (a != ~0) + { + comp4_t d = 0; + + if (a) + { + d = *(dest + i); + UNcx4_MUL_UNcx4 (d, a); + } + + *(dest + i) = d; + } + } +} + +static void +combine_atop_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t d = *(dest + i); + comp4_t s = *(src + i); + comp4_t m = *(mask + i); + comp4_t ad; + comp2_t as = d >> A_SHIFT; + + combine_mask_ca (&s, &m); + + ad = ~m; + + UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (d, ad, s, as); + + *(dest + i) = d; + } +} + +static void +combine_atop_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t d = *(dest + i); + comp4_t s = *(src + i); + comp4_t m = *(mask + i); + comp4_t ad; + comp2_t as = ~d >> A_SHIFT; + + combine_mask_ca (&s, &m); + + ad = m; + + UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (d, ad, s, as); + + *(dest + i) = d; + } +} + +static void +combine_xor_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t d = *(dest + i); + comp4_t s = *(src + i); + comp4_t m = *(mask + i); + comp4_t ad; + comp2_t as = ~d >> A_SHIFT; + + combine_mask_ca (&s, &m); + + ad = ~m; + + UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (d, ad, s, as); + + *(dest + i) = d; + } +} + +static void +combine_add_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s = *(src + i); + comp4_t m = *(mask + i); + comp4_t d = *(dest + i); + + combine_mask_value_ca (&s, &m); + + UNcx4_ADD_UNcx4 (d, s); + + *(dest + i) = d; + } +} + +static void +combine_saturate_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s, d; + comp2_t sa, sr, sg, sb, da; + comp2_t t, u, v; + comp4_t m, n, o, p; + + d = *(dest + i); + s = *(src + i); + m = *(mask + i); + + combine_mask_ca (&s, &m); + + sa = (m >> A_SHIFT); + sr = (m >> R_SHIFT) & MASK; + sg = (m >> G_SHIFT) & MASK; + sb = m & MASK; + da = ~d >> A_SHIFT; + + if (sb <= da) + m = ADD (s, d, 0, t); + else + m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v); + + if (sg <= da) + n = ADD (s, d, G_SHIFT, t); + else + n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v); + + if (sr <= da) + o = ADD (s, d, R_SHIFT, t); + else + o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v); + + if (sa <= da) + p = ADD (s, d, A_SHIFT, t); + else + p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v); + + *(dest + i) = m | n | o | p; + } +} + +static void +combine_disjoint_general_ca (comp4_t * dest, + const comp4_t *src, + const comp4_t *mask, + int width, + comp1_t combine) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s, d; + comp4_t m, n, o, p; + comp4_t Fa, Fb; + comp2_t t, u, v; + comp4_t sa; + comp1_t da; + + s = *(src + i); + m = *(mask + i); + d = *(dest + i); + da = d >> A_SHIFT; + + combine_mask_ca (&s, &m); + + sa = m; + + switch (combine & COMBINE_A) + { + default: + Fa = 0; + break; + + case COMBINE_A_OUT: + m = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> 0), da); + n = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT; + o = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT; + p = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT; + Fa = m | n | o | p; + break; + + case COMBINE_A_IN: + m = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> 0), da); + n = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT; + o = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT; + p = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT; + Fa = m | n | o | p; + break; + + case COMBINE_A: + Fa = ~0; + break; + } + + switch (combine & COMBINE_B) + { + default: + Fb = 0; + break; + + case COMBINE_B_OUT: + m = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> 0)); + n = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT; + o = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT; + p = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT; + Fb = m | n | o | p; + break; + + case COMBINE_B_IN: + m = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> 0)); + n = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT; + o = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT; + p = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT; + Fb = m | n | o | p; + break; + + case COMBINE_B: + Fb = ~0; + break; + } + m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v); + n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v); + o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v); + p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v); + + s = m | n | o | p; + + *(dest + i) = s; + } +} + +static void +combine_disjoint_over_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER); +} + +static void +combine_disjoint_in_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN); +} + +static void +combine_disjoint_in_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN); +} + +static void +combine_disjoint_out_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT); +} + +static void +combine_disjoint_out_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT); +} + +static void +combine_disjoint_atop_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP); +} + +static void +combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP); +} + +static void +combine_disjoint_xor_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR); +} + +static void +combine_conjoint_general_ca (comp4_t * dest, + const comp4_t *src, + const comp4_t *mask, + int width, + comp1_t combine) +{ + int i; + + for (i = 0; i < width; ++i) + { + comp4_t s, d; + comp4_t m, n, o, p; + comp4_t Fa, Fb; + comp2_t t, u, v; + comp4_t sa; + comp1_t da; + + s = *(src + i); + m = *(mask + i); + d = *(dest + i); + da = d >> A_SHIFT; + + combine_mask_ca (&s, &m); + + sa = m; + + switch (combine & COMBINE_A) + { + default: + Fa = 0; + break; + + case COMBINE_A_OUT: + m = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> 0), da); + n = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT; + o = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT; + p = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT; + Fa = m | n | o | p; + break; + + case COMBINE_A_IN: + m = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> 0), da); + n = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT; + o = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT; + p = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT; + Fa = m | n | o | p; + break; + + case COMBINE_A: + Fa = ~0; + break; + } + + switch (combine & COMBINE_B) + { + default: + Fb = 0; + break; + + case COMBINE_B_OUT: + m = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> 0)); + n = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT; + o = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT; + p = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT; + Fb = m | n | o | p; + break; + + case COMBINE_B_IN: + m = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> 0)); + n = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT; + o = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT; + p = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT; + Fb = m | n | o | p; + break; + + case COMBINE_B: + Fb = ~0; + break; + } + m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v); + n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v); + o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v); + p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v); + + s = m | n | o | p; + + *(dest + i) = s; + } +} + +static void +combine_conjoint_over_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER); +} + +static void +combine_conjoint_over_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER); +} + +static void +combine_conjoint_in_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN); +} + +static void +combine_conjoint_in_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN); +} + +static void +combine_conjoint_out_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT); +} + +static void +combine_conjoint_out_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT); +} + +static void +combine_conjoint_atop_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP); +} + +static void +combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP); +} + +static void +combine_conjoint_xor_ca (pixman_implementation_t *imp, + pixman_op_t op, + comp4_t * dest, + const comp4_t * src, + const comp4_t * mask, + int width) +{ + combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR); +} + +void +_pixman_setup_combiner_functions_width (pixman_implementation_t *imp) +{ + /* Unified alpha */ + imp->combine_width[PIXMAN_OP_CLEAR] = combine_clear; + imp->combine_width[PIXMAN_OP_SRC] = combine_src_u; + /* dest */ + imp->combine_width[PIXMAN_OP_OVER] = combine_over_u; + imp->combine_width[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u; + imp->combine_width[PIXMAN_OP_IN] = combine_in_u; + imp->combine_width[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u; + imp->combine_width[PIXMAN_OP_OUT] = combine_out_u; + imp->combine_width[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u; + imp->combine_width[PIXMAN_OP_ATOP] = combine_atop_u; + imp->combine_width[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u; + imp->combine_width[PIXMAN_OP_XOR] = combine_xor_u; + imp->combine_width[PIXMAN_OP_ADD] = combine_add_u; + imp->combine_width[PIXMAN_OP_SATURATE] = combine_saturate_u; + + /* Disjoint, unified */ + imp->combine_width[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear; + imp->combine_width[PIXMAN_OP_DISJOINT_SRC] = combine_src_u; + /* dest */ + imp->combine_width[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u; + imp->combine_width[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u; + imp->combine_width[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u; + imp->combine_width[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u; + imp->combine_width[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u; + imp->combine_width[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u; + imp->combine_width[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u; + imp->combine_width[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u; + imp->combine_width[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u; + + /* Conjoint, unified */ + imp->combine_width[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear; + imp->combine_width[PIXMAN_OP_CONJOINT_SRC] = combine_src_u; + /* dest */ + imp->combine_width[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u; + imp->combine_width[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u; + imp->combine_width[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u; + imp->combine_width[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u; + imp->combine_width[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u; + imp->combine_width[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u; + imp->combine_width[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u; + imp->combine_width[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u; + imp->combine_width[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u; + + imp->combine_width[PIXMAN_OP_MULTIPLY] = combine_multiply_u; + imp->combine_width[PIXMAN_OP_SCREEN] = combine_screen_u; + imp->combine_width[PIXMAN_OP_OVERLAY] = combine_overlay_u; + imp->combine_width[PIXMAN_OP_DARKEN] = combine_darken_u; + imp->combine_width[PIXMAN_OP_LIGHTEN] = combine_lighten_u; + imp->combine_width[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u; + imp->combine_width[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u; + imp->combine_width[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u; + imp->combine_width[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u; + imp->combine_width[PIXMAN_OP_DIFFERENCE] = combine_difference_u; + imp->combine_width[PIXMAN_OP_EXCLUSION] = combine_exclusion_u; + imp->combine_width[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u; + imp->combine_width[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u; + imp->combine_width[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u; + imp->combine_width[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u; + + /* Component alpha combiners */ + imp->combine_width_ca[PIXMAN_OP_CLEAR] = combine_clear_ca; + imp->combine_width_ca[PIXMAN_OP_SRC] = combine_src_ca; + /* dest */ + imp->combine_width_ca[PIXMAN_OP_OVER] = combine_over_ca; + imp->combine_width_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca; + imp->combine_width_ca[PIXMAN_OP_IN] = combine_in_ca; + imp->combine_width_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca; + imp->combine_width_ca[PIXMAN_OP_OUT] = combine_out_ca; + imp->combine_width_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca; + imp->combine_width_ca[PIXMAN_OP_ATOP] = combine_atop_ca; + imp->combine_width_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca; + imp->combine_width_ca[PIXMAN_OP_XOR] = combine_xor_ca; + imp->combine_width_ca[PIXMAN_OP_ADD] = combine_add_ca; + imp->combine_width_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca; + + /* Disjoint CA */ + imp->combine_width_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca; + imp->combine_width_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca; + /* dest */ + imp->combine_width_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca; + imp->combine_width_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca; + imp->combine_width_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca; + imp->combine_width_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca; + imp->combine_width_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca; + imp->combine_width_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca; + imp->combine_width_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca; + imp->combine_width_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca; + imp->combine_width_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca; + + /* Conjoint CA */ + imp->combine_width_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca; + imp->combine_width_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca; + /* dest */ + imp->combine_width_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca; + imp->combine_width_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca; + imp->combine_width_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca; + imp->combine_width_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca; + imp->combine_width_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca; + imp->combine_width_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca; + imp->combine_width_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca; + imp->combine_width_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca; + imp->combine_width_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca; + + imp->combine_width_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca; + imp->combine_width_ca[PIXMAN_OP_SCREEN] = combine_screen_ca; + imp->combine_width_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca; + imp->combine_width_ca[PIXMAN_OP_DARKEN] = combine_darken_ca; + imp->combine_width_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca; + imp->combine_width_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca; + imp->combine_width_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca; + imp->combine_width_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca; + imp->combine_width_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca; + imp->combine_width_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca; + imp->combine_width_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca; + + /* It is not clear that these make sense, so leave them out for now */ + imp->combine_width_ca[PIXMAN_OP_HSL_HUE] = NULL; + imp->combine_width_ca[PIXMAN_OP_HSL_SATURATION] = NULL; + imp->combine_width_ca[PIXMAN_OP_HSL_COLOR] = NULL; + imp->combine_width_ca[PIXMAN_OP_HSL_LUMINOSITY] = NULL; +} + diff --git a/lib/pixman/pixman/pixman-combine.h.template b/lib/pixman/pixman/pixman-combine.h.template new file mode 100644 index 000000000..2f6392f96 --- /dev/null +++ b/lib/pixman/pixman/pixman-combine.h.template @@ -0,0 +1,226 @@ + +#define COMPONENT_SIZE +#define MASK +#define ONE_HALF + +#define A_SHIFT +#define R_SHIFT +#define G_SHIFT +#define A_MASK +#define R_MASK +#define G_MASK + +#define RB_MASK +#define AG_MASK +#define RB_ONE_HALF +#define RB_MASK_PLUS_ONE + +#define ALPHA_c(x) ((x) >> A_SHIFT) +#define RED_c(x) (((x) >> R_SHIFT) & MASK) +#define GREEN_c(x) (((x) >> G_SHIFT) & MASK) +#define BLUE_c(x) ((x) & MASK) + +/* + * Helper macros. + */ + +#define MUL_UNc(a, b, t) \ + ((t) = (a) * (b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT )) + +#define DIV_UNc(a, b) \ + (((comp2_t) (a) * MASK) / (b)) + +#define ADD_UNc(x, y, t) \ + ((t) = x + y, \ + (comp4_t) (comp1_t) ((t) | (0 - ((t) >> G_SHIFT)))) + +#define DIV_ONE_UNc(x) \ + (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT) + +/* + * The methods below use some tricks to be able to do two color + * components at the same time. + */ + +/* + * x_rb = (x_rb * a) / 255 + */ +#define UNc_rb_MUL_UNc(x, a, t) \ + do \ + { \ + t = ((x) & RB_MASK) * (a); \ + t += RB_ONE_HALF; \ + x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ + x &= RB_MASK; \ + } while (0) + +/* + * x_rb = min (x_rb + y_rb, 255) + */ +#define UNc_rb_ADD_UNc_rb(x, y, t) \ + do \ + { \ + t = ((x) + (y)); \ + t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ + x = (t & RB_MASK); \ + } while (0) + +/* + * x_rb = (x_rb * a_rb) / 255 + */ +#define UNc_rb_MUL_UNc_rb(x, a, t) \ + do \ + { \ + t = (x & MASK) * (a & MASK); \ + t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \ + t += RB_ONE_HALF; \ + t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ + x = t & RB_MASK; \ + } while (0) + +/* + * x_c = (x_c * a) / 255 + */ +#define UNcx4_MUL_UNc(x, a) \ + do \ + { \ + comp4_t r1, r2, t; \ + \ + r1 = (x); \ + UNc_rb_MUL_UNc (r1, a, t); \ + \ + r2 = (x) >> G_SHIFT; \ + UNc_rb_MUL_UNc (r2, a, t); \ + \ + x = r1 | (r2 << G_SHIFT); \ + } while (0) + +/* + * x_c = (x_c * a) / 255 + y_c + */ +#define UNcx4_MUL_UNc_ADD_UNcx4(x, a, y) \ + do \ + { \ + comp4_t r1, r2, r3, t; \ + \ + r1 = (x); \ + r2 = (y) & RB_MASK; \ + UNc_rb_MUL_UNc (r1, a, t); \ + UNc_rb_ADD_UNc_rb (r1, r2, t); \ + \ + r2 = (x) >> G_SHIFT; \ + r3 = ((y) >> G_SHIFT) & RB_MASK; \ + UNc_rb_MUL_UNc (r2, a, t); \ + UNc_rb_ADD_UNc_rb (r2, r3, t); \ + \ + x = r1 | (r2 << G_SHIFT); \ + } while (0) + +/* + * x_c = (x_c * a + y_c * b) / 255 + */ +#define UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc(x, a, y, b) \ + do \ + { \ + comp4_t r1, r2, r3, t; \ + \ + r1 = x; \ + r2 = y; \ + UNc_rb_MUL_UNc (r1, a, t); \ + UNc_rb_MUL_UNc (r2, b, t); \ + UNc_rb_ADD_UNc_rb (r1, r2, t); \ + \ + r2 = (x >> G_SHIFT); \ + r3 = (y >> G_SHIFT); \ + UNc_rb_MUL_UNc (r2, a, t); \ + UNc_rb_MUL_UNc (r3, b, t); \ + UNc_rb_ADD_UNc_rb (r2, r3, t); \ + \ + x = r1 | (r2 << G_SHIFT); \ + } while (0) + +/* + * x_c = (x_c * a_c) / 255 + */ +#define UNcx4_MUL_UNcx4(x, a) \ + do \ + { \ + comp4_t r1, r2, r3, t; \ + \ + r1 = x; \ + r2 = a; \ + UNc_rb_MUL_UNc_rb (r1, r2, t); \ + \ + r2 = x >> G_SHIFT; \ + r3 = a >> G_SHIFT; \ + UNc_rb_MUL_UNc_rb (r2, r3, t); \ + \ + x = r1 | (r2 << G_SHIFT); \ + } while (0) + +/* + * x_c = (x_c * a_c) / 255 + y_c + */ +#define UNcx4_MUL_UNcx4_ADD_UNcx4(x, a, y) \ + do \ + { \ + comp4_t r1, r2, r3, t; \ + \ + r1 = x; \ + r2 = a; \ + UNc_rb_MUL_UNc_rb (r1, r2, t); \ + r2 = y & RB_MASK; \ + UNc_rb_ADD_UNc_rb (r1, r2, t); \ + \ + r2 = (x >> G_SHIFT); \ + r3 = (a >> G_SHIFT); \ + UNc_rb_MUL_UNc_rb (r2, r3, t); \ + r3 = (y >> G_SHIFT) & RB_MASK; \ + UNc_rb_ADD_UNc_rb (r2, r3, t); \ + \ + x = r1 | (r2 << G_SHIFT); \ + } while (0) + +/* + * x_c = (x_c * a_c + y_c * b) / 255 + */ +#define UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc(x, a, y, b) \ + do \ + { \ + comp4_t r1, r2, r3, t; \ + \ + r1 = x; \ + r2 = a; \ + UNc_rb_MUL_UNc_rb (r1, r2, t); \ + r2 = y; \ + UNc_rb_MUL_UNc (r2, b, t); \ + UNc_rb_ADD_UNc_rb (r1, r2, t); \ + \ + r2 = x >> G_SHIFT; \ + r3 = a >> G_SHIFT; \ + UNc_rb_MUL_UNc_rb (r2, r3, t); \ + r3 = y >> G_SHIFT; \ + UNc_rb_MUL_UNc (r3, b, t); \ + UNc_rb_ADD_UNc_rb (r2, r3, t); \ + \ + x = r1 | (r2 << G_SHIFT); \ + } while (0) + +/* + x_c = min(x_c + y_c, 255) + */ +#define UNcx4_ADD_UNcx4(x, y) \ + do \ + { \ + comp4_t r1, r2, r3, t; \ + \ + r1 = x & RB_MASK; \ + r2 = y & RB_MASK; \ + UNc_rb_ADD_UNc_rb (r1, r2, t); \ + \ + r2 = (x >> G_SHIFT) & RB_MASK; \ + r3 = (y >> G_SHIFT) & RB_MASK; \ + UNc_rb_ADD_UNc_rb (r2, r3, t); \ + \ + x = r1 | (r2 << G_SHIFT); \ + } while (0) diff --git a/lib/pixman/pixman/pixman-compiler.h b/lib/pixman/pixman/pixman-compiler.h new file mode 100644 index 000000000..9647dbb48 --- /dev/null +++ b/lib/pixman/pixman/pixman-compiler.h @@ -0,0 +1,71 @@ +/* Pixman uses some non-standard compiler features. This file ensures + * they exist + * + * The features are: + * + * FUNC must be defined to expand to the current function + * PIXMAN_EXPORT should be defined to whatever is required to + * export functions from a shared library + * limits limits for various types must be defined + * inline must be defined + * force_inline must be defined + */ +#if defined (__GNUC__) +# define FUNC ((const char*) (__PRETTY_FUNCTION__)) +#elif defined (__sun) || (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) +# define FUNC ((const char*) (__func__)) +#else +# define FUNC ((const char*) ("???")) +#endif + +#ifndef INT16_MIN +# define INT16_MIN (-32767-1) +#endif + +#ifndef INT16_MAX +# define INT16_MAX (32767) +#endif + +#ifndef INT32_MIN +# define INT32_MIN (-2147483647-1) +#endif + +#ifndef INT32_MAX +# define INT32_MAX (2147483647) +#endif + +#ifndef UINT32_MIN +# define UINT32_MIN (0) +#endif + +#ifndef UINT32_MAX +# define UINT32_MAX (4294967295U) +#endif + +#ifndef M_PI +# define M_PI 3.14159265358979323846 +#endif + +#ifdef _MSC_VER +/* 'inline' is available only in C++ in MSVC */ +# define inline __inline +# define force_inline __forceinline +#elif defined __GNUC__ || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) +# define inline __inline__ +# define force_inline __inline__ __attribute__ ((__always_inline__)) +#else +# ifndef force_inline +# define force_inline inline +# endif +#endif + +/* GCC visibility */ +#if defined(__GNUC__) && __GNUC__ >= 4 +# define PIXMAN_EXPORT __attribute__ ((visibility("default"))) +/* Sun Studio 8 visibility */ +#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550) +# define PIXMAN_EXPORT __global +#else +# define PIXMAN_EXPORT +#endif + diff --git a/lib/pixman/pixman/pixman-compute-region.c b/lib/pixman/pixman/pixman-compute-region.c deleted file mode 100644 index 31eaee8e3..000000000 --- a/lib/pixman/pixman/pixman-compute-region.c +++ /dev/null @@ -1,242 +0,0 @@ -/* - * - * Copyright © 1999 Keith Packard - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#include <stdlib.h> -#include <stdio.h> -#include "pixman-private.h" - -#define BOUND(v) (int16_t) ((v) < INT16_MIN ? INT16_MIN : (v) > INT16_MAX ? INT16_MAX : (v)) - -static inline pixman_bool_t -miClipPictureReg (pixman_region32_t * pRegion, - pixman_region32_t * pClip, - int dx, - int dy) -{ - if (pixman_region32_n_rects(pRegion) == 1 && - pixman_region32_n_rects(pClip) == 1) - { - pixman_box32_t * pRbox = pixman_region32_rectangles(pRegion, NULL); - pixman_box32_t * pCbox = pixman_region32_rectangles(pClip, NULL); - int v; - - if (pRbox->x1 < (v = pCbox->x1 + dx)) - pRbox->x1 = BOUND(v); - if (pRbox->x2 > (v = pCbox->x2 + dx)) - pRbox->x2 = BOUND(v); - if (pRbox->y1 < (v = pCbox->y1 + dy)) - pRbox->y1 = BOUND(v); - if (pRbox->y2 > (v = pCbox->y2 + dy)) - pRbox->y2 = BOUND(v); - if (pRbox->x1 >= pRbox->x2 || - pRbox->y1 >= pRbox->y2) - { - pixman_region32_init (pRegion); - } - } - else if (!pixman_region32_not_empty (pClip)) - return FALSE; - else - { - if (dx || dy) - pixman_region32_translate (pRegion, -dx, -dy); - if (!pixman_region32_intersect (pRegion, pRegion, pClip)) - return FALSE; - if (dx || dy) - pixman_region32_translate(pRegion, dx, dy); - } - return pixman_region32_not_empty(pRegion); -} - - -static inline pixman_bool_t -miClipPictureSrc (pixman_region32_t * pRegion, - pixman_image_t * pPicture, - int dx, - int dy) -{ - /* XXX what to do with clipping from transformed pictures? */ - if (pPicture->common.transform || pPicture->type != BITS) - return TRUE; - - if (pPicture->common.repeat) - { - /* If the clip region was set by a client, then it should be intersected - * with the composite region since it's interpreted as happening - * after the repeat algorithm. - * - * If the clip region was not set by a client, then it was imposed by - * boundaries of the pixmap, or by sibling or child windows, which means - * it should in theory be repeated along. FIXME: we ignore that case. - * It is only relevant for windows that are (a) clipped by siblings/children - * and (b) used as source. However this case is not useful anyway due - * to lack of GraphicsExpose events. - */ - if (pPicture->common.has_client_clip) - { - pixman_region32_translate (pRegion, dx, dy); - - if (!pixman_region32_intersect (pRegion, pRegion, - pPicture->common.src_clip)) - return FALSE; - - pixman_region32_translate ( pRegion, -dx, -dy); - } - - return TRUE; - } - else - { - return miClipPictureReg (pRegion, - pPicture->common.src_clip, - dx, - dy); - } -} - -/* - * returns FALSE if the final region is empty. Indistinguishable from - * an allocation failure, but rendering ignores those anyways. - */ - -pixman_bool_t -pixman_compute_composite_region32 (pixman_region32_t * pRegion, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height) -{ - int v; - - pRegion->extents.x1 = xDst; - v = xDst + width; - pRegion->extents.x2 = BOUND(v); - pRegion->extents.y1 = yDst; - v = yDst + height; - pRegion->extents.y2 = BOUND(v); - pRegion->data = 0; - /* Check for empty operation */ - if (pRegion->extents.x1 >= pRegion->extents.x2 || - pRegion->extents.y1 >= pRegion->extents.y2) - { - pixman_region32_init (pRegion); - return FALSE; - } - /* clip against dst */ - if (!miClipPictureReg (pRegion, &pDst->common.clip_region, 0, 0)) - { - pixman_region32_fini (pRegion); - return FALSE; - } - if (pDst->common.alpha_map) - { - if (!miClipPictureReg (pRegion, &pDst->common.alpha_map->common.clip_region, - -pDst->common.alpha_origin.x, - -pDst->common.alpha_origin.y)) - { - pixman_region32_fini (pRegion); - return FALSE; - } - } - /* clip against src */ - if (!miClipPictureSrc (pRegion, pSrc, xDst - xSrc, yDst - ySrc)) - { - pixman_region32_fini (pRegion); - return FALSE; - } - if (pSrc->common.alpha_map) - { - if (!miClipPictureSrc (pRegion, (pixman_image_t *)pSrc->common.alpha_map, - xDst - (xSrc - pSrc->common.alpha_origin.x), - yDst - (ySrc - pSrc->common.alpha_origin.y))) - { - pixman_region32_fini (pRegion); - return FALSE; - } - } - /* clip against mask */ - if (pMask) - { - if (!miClipPictureSrc (pRegion, pMask, xDst - xMask, yDst - yMask)) - { - pixman_region32_fini (pRegion); - return FALSE; - } - if (pMask->common.alpha_map) - { - if (!miClipPictureSrc (pRegion, (pixman_image_t *)pMask->common.alpha_map, - xDst - (xMask - pMask->common.alpha_origin.x), - yDst - (yMask - pMask->common.alpha_origin.y))) - { - pixman_region32_fini (pRegion); - return FALSE; - } - } - } - - return TRUE; -} - -PIXMAN_EXPORT pixman_bool_t -pixman_compute_composite_region (pixman_region16_t * pRegion, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height) -{ - pixman_region32_t r32; - pixman_bool_t retval; - - pixman_region32_init (&r32); - - retval = pixman_compute_composite_region32 (&r32, pSrc, pMask, pDst, - xSrc, ySrc, xMask, yMask, xDst, yDst, - width, height); - - if (retval) - { - if (!pixman_region16_copy_from_region32 (pRegion, &r32)) - retval = FALSE; - } - - pixman_region32_fini (&r32); - return retval; -} diff --git a/lib/pixman/pixman/pixman-conical-gradient.c b/lib/pixman/pixman/pixman-conical-gradient.c index 023256aae..d720db3d4 100644 --- a/lib/pixman/pixman/pixman-conical-gradient.c +++ b/lib/pixman/pixman/pixman-conical-gradient.c @@ -29,14 +29,19 @@ #include "pixman-private.h" static void -conical_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width, - uint32_t *buffer, uint32_t *mask, uint32_t maskBits) +conical_gradient_get_scanline_32 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { source_image_t *source = (source_image_t *)image; gradient_t *gradient = (gradient_t *)source; conical_gradient_t *conical = (conical_gradient_t *)image; uint32_t *end = buffer + width; - GradientWalker walker; + pixman_gradient_walker_t walker; pixman_bool_t affine = TRUE; double cx = 1.; double cy = 0.; @@ -44,73 +49,92 @@ conical_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width double rx = x + 0.5; double ry = y + 0.5; double rz = 1.; - double a = conical->angle/(180.*65536); + double a = conical->angle / (180. * 65536); _pixman_gradient_walker_init (&walker, gradient, source->common.repeat); - - if (source->common.transform) { + + if (source->common.transform) + { pixman_vector_t v; + /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2; - v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2; + v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2; v.vector[2] = pixman_fixed_1; + if (!pixman_transform_point_3d (source->common.transform, &v)) return; + + cx = source->common.transform->matrix[0][0] / 65536.; + cy = source->common.transform->matrix[1][0] / 65536.; + cz = source->common.transform->matrix[2][0] / 65536.; - cx = source->common.transform->matrix[0][0]/65536.; - cy = source->common.transform->matrix[1][0]/65536.; - cz = source->common.transform->matrix[2][0]/65536.; - rx = v.vector[0]/65536.; - ry = v.vector[1]/65536.; - rz = v.vector[2]/65536.; - affine = source->common.transform->matrix[2][0] == 0 && v.vector[2] == pixman_fixed_1; - } - - if (affine) { - rx -= conical->center.x/65536.; - ry -= conical->center.y/65536.; + rx = v.vector[0] / 65536.; + ry = v.vector[1] / 65536.; + rz = v.vector[2] / 65536.; - while (buffer < end) { + affine = + source->common.transform->matrix[2][0] == 0 && + v.vector[2] == pixman_fixed_1; + } + + if (affine) + { + rx -= conical->center.x / 65536.; + ry -= conical->center.y / 65536.; + + while (buffer < end) + { double angle; - - if (!mask || *mask++ & maskBits) + + if (!mask || *mask++ & mask_bits) { - pixman_fixed_48_16_t t; - - angle = atan2(ry, rx) + a; - t = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI))); - - *(buffer) = _pixman_gradient_walker_pixel (&walker, t); + pixman_fixed_48_16_t t; + + angle = atan2 (ry, rx) + a; + t = (pixman_fixed_48_16_t) (angle * (65536. / (2 * M_PI))); + + *buffer = _pixman_gradient_walker_pixel (&walker, t); } - + ++buffer; + rx += cx; ry += cy; } - } else { - while (buffer < end) { + } + else + { + while (buffer < end) + { double x, y; double angle; - - if (!mask || *mask++ & maskBits) + + if (!mask || *mask++ & mask_bits) { - pixman_fixed_48_16_t t; - - if (rz != 0) { - x = rx/rz; - y = ry/rz; - } else { + pixman_fixed_48_16_t t; + + if (rz != 0) + { + x = rx / rz; + y = ry / rz; + } + else + { x = y = 0.; } - x -= conical->center.x/65536.; - y -= conical->center.y/65536.; - angle = atan2(y, x) + a; - t = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI))); + + x -= conical->center.x / 65536.; + y -= conical->center.y / 65536.; - *(buffer) = _pixman_gradient_walker_pixel (&walker, t); + angle = atan2 (y, x) + a; + t = (pixman_fixed_48_16_t) (angle * (65536. / (2 * M_PI))); + + *buffer = _pixman_gradient_walker_pixel (&walker, t); } - + ++buffer; + rx += cx; ry += cy; rz += cz; @@ -121,37 +145,36 @@ conical_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width static void conical_gradient_property_changed (pixman_image_t *image) { - image->common.get_scanline_32 = (scanFetchProc)conical_gradient_get_scanline_32; - image->common.get_scanline_64 = (scanFetchProc)_pixman_image_get_scanline_64_generic; + image->common.get_scanline_32 = conical_gradient_get_scanline_32; + image->common.get_scanline_64 = _pixman_image_get_scanline_generic_64; } PIXMAN_EXPORT pixman_image_t * -pixman_image_create_conical_gradient (pixman_point_fixed_t *center, - pixman_fixed_t angle, - const pixman_gradient_stop_t *stops, - int n_stops) +pixman_image_create_conical_gradient (pixman_point_fixed_t * center, + pixman_fixed_t angle, + const pixman_gradient_stop_t *stops, + int n_stops) { - pixman_image_t *image = _pixman_image_allocate(); + pixman_image_t *image = _pixman_image_allocate (); conical_gradient_t *conical; - + if (!image) return NULL; - + conical = &image->conical; - + if (!_pixman_init_gradient (&conical->common, stops, n_stops)) { free (image); return NULL; } - + image->type = CONICAL; conical->center = *center; conical->angle = angle; - + image->common.property_changed = conical_gradient_property_changed; - - conical_gradient_property_changed (image); - + return image; } + diff --git a/lib/pixman/pixman/pixman-cpu.c b/lib/pixman/pixman/pixman-cpu.c index 057c13418..5d5469bb8 100644 --- a/lib/pixman/pixman/pixman-cpu.c +++ b/lib/pixman/pixman/pixman-cpu.c @@ -47,12 +47,16 @@ static volatile pixman_bool_t have_vmx = TRUE; static pixman_bool_t pixman_have_vmx (void) { - if(!initialized) { - size_t length = sizeof(have_vmx); - int error = - sysctlbyname("hw.optional.altivec", &have_vmx, &length, NULL, 0); - if(error) have_vmx = FALSE; - initialized = TRUE; + if (!initialized) + { + size_t length = sizeof(have_vmx); + int error = + sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0); + + if (error) + have_vmx = FALSE; + + initialized = TRUE; } return have_vmx; } @@ -69,39 +73,47 @@ pixman_have_vmx (void) static pixman_bool_t pixman_have_vmx (void) { - if (!initialized) { + if (!initialized) + { char fname[64]; unsigned long buf[64]; ssize_t count = 0; pid_t pid; int fd, i; - pid = getpid(); - snprintf(fname, sizeof(fname)-1, "/proc/%d/auxv", pid); + pid = getpid (); + snprintf (fname, sizeof(fname) - 1, "/proc/%d/auxv", pid); - fd = open(fname, O_RDONLY); - if (fd >= 0) { - for (i = 0; i <= (count / sizeof(unsigned long)); i += 2) { + fd = open (fname, O_RDONLY); + if (fd >= 0) + { + for (i = 0; i <= (count / sizeof(unsigned long)); i += 2) + { /* Read more if buf is empty... */ - if (i == (count / sizeof(unsigned long))) { - count = read(fd, buf, sizeof(buf)); + if (i == (count / sizeof(unsigned long))) + { + count = read (fd, buf, sizeof(buf)); if (count <= 0) break; i = 0; } - if (buf[i] == AT_HWCAP) { - have_vmx = !!(buf[i+1] & PPC_FEATURE_HAS_ALTIVEC); + if (buf[i] == AT_HWCAP) + { + have_vmx = !!(buf[i + 1] & PPC_FEATURE_HAS_ALTIVEC); initialized = TRUE; break; - } else if (buf[i] == AT_NULL) { + } + else if (buf[i] == AT_NULL) + { break; } } - close(fd); + close (fd); } } - if (!initialized) { + if (!initialized) + { /* Something went wrong. Assume 'no' rather than playing fragile tricks with catching SIGILL. */ have_vmx = FALSE; @@ -110,35 +122,45 @@ pixman_have_vmx (void) return have_vmx; } + #else /* !__APPLE__ && !__linux__ */ #include <signal.h> #include <setjmp.h> static jmp_buf jump_env; -static void vmx_test(int sig, siginfo_t *si, void *unused) { +static void +vmx_test (int sig, + siginfo_t *si, + void * unused) +{ longjmp (jump_env, 1); } static pixman_bool_t -pixman_have_vmx (void) { +pixman_have_vmx (void) +{ struct sigaction sa, osa; int jmp_result; - if (!initialized) { - sa.sa_flags = SA_SIGINFO; - sigemptyset(&sa.sa_mask); - sa.sa_sigaction = vmx_test; - sigaction(SIGILL, &sa, &osa); + + if (!initialized) + { + sa.sa_flags = SA_SIGINFO; + sigemptyset (&sa.sa_mask); + sa.sa_sigaction = vmx_test; + sigaction (SIGILL, &sa, &osa); jmp_result = setjmp (jump_env); - if (jmp_result == 0) { + if (jmp_result == 0) + { asm volatile ( "vor 0, 0, 0" ); } - sigaction(SIGILL, &osa, NULL); + sigaction (SIGILL, &osa, NULL); have_vmx = (jmp_result == 0); - initialized = TRUE; + initialized = TRUE; } return have_vmx; } + #endif /* __APPLE__ */ #endif /* USE_VMX */ @@ -147,7 +169,7 @@ pixman_have_vmx (void) { #if defined(_MSC_VER) #if defined(USE_ARM_SIMD) -extern int pixman_msvc_try_arm_simd_op(); +extern int pixman_msvc_try_arm_simd_op (); pixman_bool_t pixman_have_arm_simd (void) @@ -155,22 +177,24 @@ pixman_have_arm_simd (void) static pixman_bool_t initialized = FALSE; static pixman_bool_t have_arm_simd = FALSE; - if (!initialized) { - __try { - pixman_msvc_try_arm_simd_op(); - have_arm_simd = TRUE; - } __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { - have_arm_simd = FALSE; - } + if (!initialized) + { + __try { + pixman_msvc_try_arm_simd_op (); + have_arm_simd = TRUE; + } __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) { + have_arm_simd = FALSE; + } initialized = TRUE; } return have_arm_simd; } + #endif /* USE_ARM_SIMD */ #if defined(USE_ARM_NEON) -extern int pixman_msvc_try_arm_neon_op(); +extern int pixman_msvc_try_arm_neon_op (); pixman_bool_t pixman_have_arm_neon (void) @@ -178,18 +202,23 @@ pixman_have_arm_neon (void) static pixman_bool_t initialized = FALSE; static pixman_bool_t have_arm_neon = FALSE; - if (!initialized) { - __try { - pixman_msvc_try_arm_neon_op(); - have_arm_neon = TRUE; - } __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { - have_arm_neon = FALSE; - } + if (!initialized) + { + __try + { + pixman_msvc_try_arm_neon_op (); + have_arm_neon = TRUE; + } + __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) + { + have_arm_neon = FALSE; + } initialized = TRUE; } return have_arm_neon; } + #endif /* USE_ARM_NEON */ #else /* linux ELF */ @@ -211,40 +240,51 @@ static pixman_bool_t arm_has_iwmmxt = FALSE; static pixman_bool_t arm_tests_initialized = FALSE; static void -pixman_arm_read_auxv() { +pixman_arm_read_auxv () +{ int fd; Elf32_auxv_t aux; - fd = open("/proc/self/auxv", O_RDONLY); - if (fd >= 0) { - while (read(fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) { - if (aux.a_type == AT_HWCAP) { + fd = open ("/proc/self/auxv", O_RDONLY); + if (fd >= 0) + { + while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) + { + if (aux.a_type == AT_HWCAP) + { uint32_t hwcap = aux.a_un.a_val; - if (getenv("ARM_FORCE_HWCAP")) - hwcap = strtoul(getenv("ARM_FORCE_HWCAP"), NULL, 0); - // hardcode these values to avoid depending on specific versions - // of the hwcap header, e.g. HWCAP_NEON + if (getenv ("ARM_FORCE_HWCAP")) + hwcap = strtoul (getenv ("ARM_FORCE_HWCAP"), NULL, 0); + /* hardcode these values to avoid depending on specific + * versions of the hwcap header, e.g. HWCAP_NEON + */ arm_has_vfp = (hwcap & 64) != 0; arm_has_iwmmxt = (hwcap & 512) != 0; - // this flag is only present on kernel 2.6.29 + /* this flag is only present on kernel 2.6.29 */ arm_has_neon = (hwcap & 4096) != 0; - } else if (aux.a_type == AT_PLATFORM) { + } + else if (aux.a_type == AT_PLATFORM) + { const char *plat = (const char*) aux.a_un.a_val; - if (getenv("ARM_FORCE_PLATFORM")) - plat = getenv("ARM_FORCE_PLATFORM"); - if (strncmp(plat, "v7l", 3) == 0) { + if (getenv ("ARM_FORCE_PLATFORM")) + plat = getenv ("ARM_FORCE_PLATFORM"); + if (strncmp (plat, "v7l", 3) == 0) + { arm_has_v7 = TRUE; arm_has_v6 = TRUE; - } else if (strncmp(plat, "v6l", 3) == 0) { + } + else if (strncmp (plat, "v6l", 3) == 0) + { arm_has_v6 = TRUE; } - } - } - close (fd); + } + } + close (fd); - // if we don't have 2.6.29, we have to do this hack; set - // the env var to trust HWCAP. - if (!getenv("ARM_TRUST_HWCAP") && arm_has_v7) + /* if we don't have 2.6.29, we have to do this hack; set + * the env var to trust HWCAP. + */ + if (!getenv ("ARM_TRUST_HWCAP") && arm_has_v7) arm_has_neon = TRUE; } @@ -256,10 +296,11 @@ pixman_bool_t pixman_have_arm_simd (void) { if (!arm_tests_initialized) - pixman_arm_read_auxv(); + pixman_arm_read_auxv (); return arm_has_v6; } + #endif /* USE_ARM_SIMD */ #if defined(USE_ARM_NEON) @@ -267,10 +308,11 @@ pixman_bool_t pixman_have_arm_neon (void) { if (!arm_tests_initialized) - pixman_arm_read_auxv(); + pixman_arm_read_auxv (); return arm_has_neon; } + #endif /* USE_ARM_NEON */ #endif /* linux */ @@ -283,37 +325,42 @@ pixman_have_arm_neon (void) * that would lead to SIGILL instructions on old CPUs that don't have * it. */ -#if !defined(__amd64__) && !defined(__x86_64__) +#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64) #ifdef HAVE_GETISAX #include <sys/auxv.h> #endif -enum CPUFeatures { - NoFeatures = 0, +typedef enum +{ + NO_FEATURES = 0, MMX = 0x1, - MMX_Extensions = 0x2, + MMX_EXTENSIONS = 0x2, SSE = 0x6, SSE2 = 0x8, CMOV = 0x10 -}; +} cpu_features_t; -static unsigned int detectCPUFeatures(void) { + +static unsigned int +detect_cpu_features (void) +{ unsigned int features = 0; unsigned int result = 0; #ifdef HAVE_GETISAX - if (getisax(&result, 1)) { - if (result & AV_386_CMOV) - features |= CMOV; - if (result & AV_386_MMX) - features |= MMX; - if (result & AV_386_AMD_MMX) - features |= MMX_Extensions; - if (result & AV_386_SSE) - features |= SSE; - if (result & AV_386_SSE2) - features |= SSE2; + if (getisax (&result, 1)) + { + if (result & AV_386_CMOV) + features |= CMOV; + if (result & AV_386_MMX) + features |= MMX; + if (result & AV_386_AMD_MMX) + features |= MMX_EXTENSIONS; + if (result & AV_386_SSE) + features |= SSE; + if (result & AV_386_SSE2) + features |= SSE2; } #else char vendor[13]; @@ -333,128 +380,130 @@ static unsigned int detectCPUFeatures(void) { * original values when we access the output operands. */ __asm__ ( - "pushf\n" - "pop %%eax\n" - "mov %%eax, %%ecx\n" - "xor $0x00200000, %%eax\n" - "push %%eax\n" - "popf\n" - "pushf\n" - "pop %%eax\n" - "mov $0x0, %%edx\n" - "xor %%ecx, %%eax\n" - "jz 1f\n" - - "mov $0x00000000, %%eax\n" - "push %%ebx\n" - "cpuid\n" - "mov %%ebx, %%eax\n" - "pop %%ebx\n" - "mov %%eax, %1\n" - "mov %%edx, %2\n" - "mov %%ecx, %3\n" - "mov $0x00000001, %%eax\n" - "push %%ebx\n" - "cpuid\n" - "pop %%ebx\n" - "1:\n" - "mov %%edx, %0\n" + "pushf\n" + "pop %%eax\n" + "mov %%eax, %%ecx\n" + "xor $0x00200000, %%eax\n" + "push %%eax\n" + "popf\n" + "pushf\n" + "pop %%eax\n" + "mov $0x0, %%edx\n" + "xor %%ecx, %%eax\n" + "jz 1f\n" + + "mov $0x00000000, %%eax\n" + "push %%ebx\n" + "cpuid\n" + "mov %%ebx, %%eax\n" + "pop %%ebx\n" + "mov %%eax, %1\n" + "mov %%edx, %2\n" + "mov %%ecx, %3\n" + "mov $0x00000001, %%eax\n" + "push %%ebx\n" + "cpuid\n" + "pop %%ebx\n" + "1:\n" + "mov %%edx, %0\n" : "=r" (result), - "=m" (vendor[0]), - "=m" (vendor[4]), - "=m" (vendor[8]) + "=m" (vendor[0]), + "=m" (vendor[4]), + "=m" (vendor[8]) : : "%eax", "%ecx", "%edx" ); - + #elif defined (_MSC_VER) _asm { - pushfd - pop eax - mov ecx, eax - xor eax, 00200000h - push eax - popfd - pushfd - pop eax - mov edx, 0 - xor eax, ecx - jz nocpuid - - mov eax, 0 - push ebx - cpuid - mov eax, ebx - pop ebx - mov vendor0, eax - mov vendor1, edx - mov vendor2, ecx - mov eax, 1 - push ebx - cpuid - pop ebx + pushfd + pop eax + mov ecx, eax + xor eax, 00200000h + push eax + popfd + pushfd + pop eax + mov edx, 0 + xor eax, ecx + jz nocpuid + + mov eax, 0 + push ebx + cpuid + mov eax, ebx + pop ebx + mov vendor0, eax + mov vendor1, edx + mov vendor2, ecx + mov eax, 1 + push ebx + cpuid + pop ebx nocpuid: - mov result, edx + mov result, edx } - memmove (vendor+0, &vendor0, 4); - memmove (vendor+4, &vendor1, 4); - memmove (vendor+8, &vendor2, 4); + memmove (vendor + 0, &vendor0, 4); + memmove (vendor + 4, &vendor1, 4); + memmove (vendor + 8, &vendor2, 4); #else # error unsupported compiler #endif features = 0; - if (result) { - /* result now contains the standard feature bits */ - if (result & (1 << 15)) - features |= CMOV; - if (result & (1 << 23)) - features |= MMX; - if (result & (1 << 25)) - features |= SSE; - if (result & (1 << 26)) - features |= SSE2; - if ((features & MMX) && !(features & SSE) && - (strcmp(vendor, "AuthenticAMD") == 0 || - strcmp(vendor, "Geode by NSC") == 0)) { - /* check for AMD MMX extensions */ + if (result) + { + /* result now contains the standard feature bits */ + if (result & (1 << 15)) + features |= CMOV; + if (result & (1 << 23)) + features |= MMX; + if (result & (1 << 25)) + features |= SSE; + if (result & (1 << 26)) + features |= SSE2; + if ((features & MMX) && !(features & SSE) && + (strcmp (vendor, "AuthenticAMD") == 0 || + strcmp (vendor, "Geode by NSC") == 0)) + { + /* check for AMD MMX extensions */ #ifdef __GNUC__ - __asm__( - " push %%ebx\n" - " mov $0x80000000, %%eax\n" - " cpuid\n" - " xor %%edx, %%edx\n" - " cmp $0x1, %%eax\n" - " jge 2f\n" - " mov $0x80000001, %%eax\n" - " cpuid\n" - "2:\n" - " pop %%ebx\n" - " mov %%edx, %0\n" + __asm__ ( + " push %%ebx\n" + " mov $0x80000000, %%eax\n" + " cpuid\n" + " xor %%edx, %%edx\n" + " cmp $0x1, %%eax\n" + " jge 2f\n" + " mov $0x80000001, %%eax\n" + " cpuid\n" + "2:\n" + " pop %%ebx\n" + " mov %%edx, %0\n" : "=r" (result) : : "%eax", "%ecx", "%edx" - ); + ); #elif defined _MSC_VER - _asm { - push ebx - mov eax, 80000000h - cpuid - xor edx, edx - cmp eax, 1 - jge notamd - mov eax, 80000001h - cpuid - notamd: - pop ebx - mov result, edx - } + _asm { + push ebx + mov eax, 80000000h + cpuid + xor edx, edx + cmp eax, 1 + jge notamd + mov eax, 80000001h + cpuid + notamd: + pop ebx + mov result, edx + } #endif - if (result & (1<<22)) - features |= MMX_Extensions; - } + if (result & (1 << 22)) + features |= MMX_EXTENSIONS; + } } #endif /* HAVE_GETISAX */ @@ -469,9 +518,9 @@ pixman_have_mmx (void) if (!initialized) { - unsigned int features = detectCPUFeatures(); - mmx_present = (features & (MMX|MMX_Extensions)) == (MMX|MMX_Extensions); - initialized = TRUE; + unsigned int features = detect_cpu_features (); + mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS); + initialized = TRUE; } return mmx_present; @@ -486,13 +535,14 @@ pixman_have_sse2 (void) if (!initialized) { - unsigned int features = detectCPUFeatures(); - sse2_present = (features & (MMX|MMX_Extensions|SSE|SSE2)) == (MMX|MMX_Extensions|SSE|SSE2); - initialized = TRUE; + unsigned int features = detect_cpu_features (); + sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2); + initialized = TRUE; } return sse2_present; } + #endif #else /* __amd64__ */ @@ -510,25 +560,26 @@ _pixman_choose_implementation (void) { #ifdef USE_SSE2 if (pixman_have_sse2 ()) - return _pixman_implementation_create_sse2 (NULL); + return _pixman_implementation_create_sse2 (); #endif #ifdef USE_MMX - if (pixman_have_mmx()) - return _pixman_implementation_create_mmx (NULL); + if (pixman_have_mmx ()) + return _pixman_implementation_create_mmx (); #endif #ifdef USE_ARM_NEON - if (pixman_have_arm_neon()) - return _pixman_implementation_create_arm_neon (NULL); + if (pixman_have_arm_neon ()) + return _pixman_implementation_create_arm_neon (); #endif #ifdef USE_ARM_SIMD - if (pixman_have_arm_simd()) - return _pixman_implementation_create_arm_simd (NULL); + if (pixman_have_arm_simd ()) + return _pixman_implementation_create_arm_simd (); #endif #ifdef USE_VMX - if (pixman_have_vmx()) - return _pixman_implementation_create_vmx (NULL); + if (pixman_have_vmx ()) + return _pixman_implementation_create_vmx (); #endif - - return _pixman_implementation_create_fast_path (NULL); + + return _pixman_implementation_create_fast_path (); } + diff --git a/lib/pixman/pixman/pixman-edge-imp.h b/lib/pixman/pixman/pixman-edge-imp.h index 016bfaba7..a30f82108 100644 --- a/lib/pixman/pixman/pixman-edge-imp.h +++ b/lib/pixman/pixman/pixman-edge-imp.h @@ -20,11 +20,11 @@ * PERFORMANCE OF THIS SOFTWARE. */ -#ifndef rasterizeSpan +#ifndef rasterize_span #endif static void -rasterizeEdges (pixman_image_t *image, +RASTERIZE_EDGES (pixman_image_t *image, pixman_edge_t *l, pixman_edge_t *r, pixman_fixed_t t, @@ -50,7 +50,7 @@ rasterizeEdges (pixman_image_t *image, #if N_BITS == 1 /* For the non-antialiased case, round the coordinates up, in effect * sampling the center of the pixel. (The AA case does a similar - * adjustment in RenderSamplesX) */ + * adjustment in RENDER_SAMPLES_X) */ lx += X_FRAC_FIRST(1); rx += X_FRAC_FIRST(1); #endif @@ -78,53 +78,85 @@ rasterizeEdges (pixman_image_t *image, #if N_BITS == 1 { + +#ifdef WORDS_BIGENDIAN +# define SCREEN_SHIFT_LEFT(x,n) ((x) << (n)) +# define SCREEN_SHIFT_RIGHT(x,n) ((x) >> (n)) +#else +# define SCREEN_SHIFT_LEFT(x,n) ((x) >> (n)) +# define SCREEN_SHIFT_RIGHT(x,n) ((x) << (n)) +#endif + +#define LEFT_MASK(x) \ + (((x) & 0x1f) ? \ + SCREEN_SHIFT_RIGHT (0xffffffff, (x) & 0x1f) : 0) +#define RIGHT_MASK(x) \ + (((32 - (x)) & 0x1f) ? \ + SCREEN_SHIFT_LEFT (0xffffffff, (32 - (x)) & 0x1f) : 0) + +#define MASK_BITS(x,w,l,n,r) { \ + n = (w); \ + r = RIGHT_MASK ((x) + n); \ + l = LEFT_MASK (x); \ + if (l) { \ + n -= 32 - ((x) & 0x1f); \ + if (n < 0) { \ + n = 0; \ + l &= r; \ + r = 0; \ + } \ + } \ + n >>= 5; \ + } + uint32_t *a = line; uint32_t startmask; uint32_t endmask; int nmiddle; int width = rxi - lxi; int x = lxi; - - a += x >> FB_SHIFT; - x &= FB_MASK; - - FbMaskBits (x, width, startmask, nmiddle, endmask); - if (startmask) { - WRITE(image, a, READ(image, a) | startmask); - a++; - } - while (nmiddle--) - WRITE(image, a++, FB_ALLONES); - if (endmask) - WRITE(image, a, READ(image, a) | endmask); + + a += x >> 5; + x &= 0x1f; + + MASK_BITS (x, width, startmask, nmiddle, endmask); + + if (startmask) { + WRITE(image, a, READ(image, a) | startmask); + a++; + } + while (nmiddle--) + WRITE(image, a++, 0xffffffff); + if (endmask) + WRITE(image, a, READ(image, a) | endmask); } #else { - DefineAlpha(line,lxi); + DEFINE_ALPHA(line,lxi); int lxs; int rxs; /* Sample coverage for edge pixels */ - lxs = RenderSamplesX (lx, N_BITS); - rxs = RenderSamplesX (rx, N_BITS); + lxs = RENDER_SAMPLES_X (lx, N_BITS); + rxs = RENDER_SAMPLES_X (rx, N_BITS); /* Add coverage across row */ if (lxi == rxi) { - AddAlpha (rxs - lxs); + ADD_ALPHA (rxs - lxs); } else { int xi; - AddAlpha (N_X_FRAC(N_BITS) - lxs); - StepAlpha; + ADD_ALPHA (N_X_FRAC(N_BITS) - lxs); + STEP_ALPHA; for (xi = lxi + 1; xi < rxi; xi++) { - AddAlpha (N_X_FRAC(N_BITS)); - StepAlpha; + ADD_ALPHA (N_X_FRAC(N_BITS)); + STEP_ALPHA; } - AddAlpha (rxs); + ADD_ALPHA (rxs); } } #endif @@ -136,19 +168,19 @@ rasterizeEdges (pixman_image_t *image, #if N_BITS > 1 if (pixman_fixed_frac (y) != Y_FRAC_LAST(N_BITS)) { - RenderEdgeStepSmall (l); - RenderEdgeStepSmall (r); + RENDER_EDGE_STEP_SMALL (l); + RENDER_EDGE_STEP_SMALL (r); y += STEP_Y_SMALL(N_BITS); } else #endif { - RenderEdgeStepBig (l); - RenderEdgeStepBig (r); + RENDER_EDGE_STEP_BIG (l); + RENDER_EDGE_STEP_BIG (r); y += STEP_Y_BIG(N_BITS); line += stride; } } } -#undef rasterizeSpan +#undef rasterize_span diff --git a/lib/pixman/pixman/pixman-edge.c b/lib/pixman/pixman/pixman-edge.c index b9246af5f..81a2e960a 100644 --- a/lib/pixman/pixman/pixman-edge.c +++ b/lib/pixman/pixman/pixman-edge.c @@ -27,6 +27,35 @@ #include <string.h> #include "pixman-private.h" +#include "pixman-accessor.h" + +/* + * Step across a small sample grid gap + */ +#define RENDER_EDGE_STEP_SMALL(edge) \ + { \ + edge->x += edge->stepx_small; \ + edge->e += edge->dx_small; \ + if (edge->e > 0) \ + { \ + edge->e -= edge->dy; \ + edge->x += edge->signdx; \ + } \ + } + +/* + * Step across a large sample grid gap + */ +#define RENDER_EDGE_STEP_BIG(edge) \ + { \ + edge->x += edge->stepx_big; \ + edge->e += edge->dx_big; \ + if (edge->e > 0) \ + { \ + edge->e -= edge->dy; \ + edge->x += edge->signdx; \ + } \ + } #ifdef PIXMAN_FB_ACCESSORS #define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_accessors @@ -38,36 +67,38 @@ * 4 bit alpha */ -#define N_BITS 4 -#define rasterizeEdges fbRasterizeEdges4 +#define N_BITS 4 +#define RASTERIZE_EDGES rasterize_edges_4 -#if BITMAP_BIT_ORDER == LSBFirst -#define Shift4(o) ((o) << 2) +#ifndef WORDS_BIG_ENDIAN +#define SHIFT_4(o) ((o) << 2) #else -#define Shift4(o) ((1-(o)) << 2) +#define SHIFT_4(o) ((1 - (o)) << 2) #endif -#define Get4(x,o) (((x) >> Shift4(o)) & 0xf) -#define Put4(x,o,v) (((x) & ~(0xf << Shift4(o))) | (((v) & 0xf) << Shift4(o))) +#define GET_4(x, o) (((x) >> SHIFT_4 (o)) & 0xf) +#define PUT_4(x, o, v) \ + (((x) & ~(0xf << SHIFT_4 (o))) | (((v) & 0xf) << SHIFT_4 (o))) -#define DefineAlpha(line,x) \ - uint8_t *__ap = (uint8_t *) line + ((x) >> 1); \ - int __ao = (x) & 1 +#define DEFINE_ALPHA(line, x) \ + uint8_t *__ap = (uint8_t *) line + ((x) >> 1); \ + int __ao = (x) & 1 -#define StepAlpha ((__ap += __ao), (__ao ^= 1)) +#define STEP_ALPHA ((__ap += __ao), (__ao ^= 1)) -#define AddAlpha(a) { \ - uint8_t __o = READ(image, __ap); \ - uint8_t __a = (a) + Get4(__o, __ao); \ - WRITE(image, __ap, Put4 (__o, __ao, __a | (0 - ((__a) >> 4)))); \ +#define ADD_ALPHA(a) \ + { \ + uint8_t __o = READ (image, __ap); \ + uint8_t __a = (a) + GET_4 (__o, __ao); \ + WRITE (image, __ap, PUT_4 (__o, __ao, __a | (0 - ((__a) >> 4)))); \ } #include "pixman-edge-imp.h" -#undef AddAlpha -#undef StepAlpha -#undef DefineAlpha -#undef rasterizeEdges +#undef ADD_ALPHA +#undef STEP_ALPHA +#undef DEFINE_ALPHA +#undef RASTERIZE_EDGES #undef N_BITS @@ -76,35 +107,38 @@ */ #define N_BITS 1 -#define rasterizeEdges fbRasterizeEdges1 +#define RASTERIZE_EDGES rasterize_edges_1 #include "pixman-edge-imp.h" -#undef rasterizeEdges +#undef RASTERIZE_EDGES #undef N_BITS /* * 8 bit alpha */ -static inline uint8_t +static force_inline uint8_t clip255 (int x) { - if (x > 255) return 255; + if (x > 255) + return 255; + return x; } -#define add_saturate_8(buf,val,length) \ - do { \ - int i__ = (length); \ - uint8_t *buf__ = (buf); \ - int val__ = (val); \ - \ - while (i__--) \ - { \ - WRITE(image, (buf__), clip255 (READ(image, (buf__)) + (val__))); \ - (buf__)++; \ - } \ +#define ADD_SATURATE_8(buf, val, length) \ + do \ + { \ + int i__ = (length); \ + uint8_t *buf__ = (buf); \ + int val__ = (val); \ + \ + while (i__--) \ + { \ + WRITE (image, (buf__), clip255 (READ (image, (buf__)) + (val__))); \ + (buf__)++; \ + } \ } while (0) /* @@ -119,13 +153,13 @@ clip255 (int x) * fill_start fill_end */ static void -fbRasterizeEdges8 (pixman_image_t *image, - pixman_edge_t *l, - pixman_edge_t *r, - pixman_fixed_t t, - pixman_fixed_t b) +rasterize_edges_8 (pixman_image_t *image, + pixman_edge_t * l, + pixman_edge_t * r, + pixman_fixed_t t, + pixman_fixed_t b) { - pixman_fixed_t y = t; + pixman_fixed_t y = t; uint32_t *line; int fill_start = -1, fill_end = -1; int fill_size = 0; @@ -138,153 +172,165 @@ fbRasterizeEdges8 (pixman_image_t *image, for (;;) { uint8_t *ap = (uint8_t *) line; - pixman_fixed_t lx, rx; - int lxi, rxi; + pixman_fixed_t lx, rx; + int lxi, rxi; - /* clip X */ - lx = l->x; - if (lx < 0) + /* clip X */ + lx = l->x; + if (lx < 0) lx = 0; - rx = r->x; - if (pixman_fixed_to_int (rx) >= width) + + rx = r->x; + + if (pixman_fixed_to_int (rx) >= width) + { /* Use the last pixel of the scanline, covered 100%. * We can't use the first pixel following the scanline, * because accessing it could result in a buffer overrun. */ rx = pixman_int_to_fixed (width) - 1; + } - /* Skip empty (or backwards) sections */ - if (rx > lx) - { + /* Skip empty (or backwards) sections */ + if (rx > lx) + { int lxs, rxs; - /* Find pixel bounds for span. */ - lxi = pixman_fixed_to_int (lx); - rxi = pixman_fixed_to_int (rx); + /* Find pixel bounds for span. */ + lxi = pixman_fixed_to_int (lx); + rxi = pixman_fixed_to_int (rx); /* Sample coverage for edge pixels */ - lxs = RenderSamplesX (lx, 8); - rxs = RenderSamplesX (rx, 8); + lxs = RENDER_SAMPLES_X (lx, 8); + rxs = RENDER_SAMPLES_X (rx, 8); /* Add coverage across row */ - if (lxi == rxi) - { - WRITE(image, ap +lxi, clip255 (READ(image, ap + lxi) + rxs - lxs)); + if (lxi == rxi) + { + WRITE (image, ap + lxi, + clip255 (READ (image, ap + lxi) + rxs - lxs)); } - else - { - WRITE(image, ap + lxi, clip255 (READ(image, ap + lxi) + N_X_FRAC(8) - lxs)); + else + { + WRITE (image, ap + lxi, + clip255 (READ (image, ap + lxi) + N_X_FRAC (8) - lxs)); - /* Move forward so that lxi/rxi is the pixel span */ - lxi++; + /* Move forward so that lxi/rxi is the pixel span */ + lxi++; - /* Don't bother trying to optimize the fill unless + /* Don't bother trying to optimize the fill unless * the span is longer than 4 pixels. */ - if (rxi - lxi > 4) - { - if (fill_start < 0) - { - fill_start = lxi; - fill_end = rxi; - fill_size++; + if (rxi - lxi > 4) + { + if (fill_start < 0) + { + fill_start = lxi; + fill_end = rxi; + fill_size++; } - else - { - if (lxi >= fill_end || rxi < fill_start) - { - /* We're beyond what we saved, just fill it */ - add_saturate_8 (ap + fill_start, - fill_size * N_X_FRAC(8), - fill_end - fill_start); - fill_start = lxi; - fill_end = rxi; - fill_size = 1; + else + { + if (lxi >= fill_end || rxi < fill_start) + { + /* We're beyond what we saved, just fill it */ + ADD_SATURATE_8 (ap + fill_start, + fill_size * N_X_FRAC (8), + fill_end - fill_start); + fill_start = lxi; + fill_end = rxi; + fill_size = 1; } - else - { - /* Update fill_start */ - if (lxi > fill_start) - { - add_saturate_8 (ap + fill_start, - fill_size * N_X_FRAC(8), - lxi - fill_start); - fill_start = lxi; + else + { + /* Update fill_start */ + if (lxi > fill_start) + { + ADD_SATURATE_8 (ap + fill_start, + fill_size * N_X_FRAC (8), + lxi - fill_start); + fill_start = lxi; } - else if (lxi < fill_start) - { - add_saturate_8 (ap + lxi, N_X_FRAC(8), - fill_start - lxi); + else if (lxi < fill_start) + { + ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), + fill_start - lxi); } - /* Update fill_end */ - if (rxi < fill_end) - { - add_saturate_8 (ap + rxi, - fill_size * N_X_FRAC(8), - fill_end - rxi); - fill_end = rxi; + /* Update fill_end */ + if (rxi < fill_end) + { + ADD_SATURATE_8 (ap + rxi, + fill_size * N_X_FRAC (8), + fill_end - rxi); + fill_end = rxi; } - else if (fill_end < rxi) - { - add_saturate_8 (ap + fill_end, - N_X_FRAC(8), - rxi - fill_end); + else if (fill_end < rxi) + { + ADD_SATURATE_8 (ap + fill_end, + N_X_FRAC (8), + rxi - fill_end); } - fill_size++; + fill_size++; } } } - else - { - add_saturate_8 (ap + lxi, N_X_FRAC(8), rxi - lxi); + else + { + ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), rxi - lxi); } - WRITE(image, ap + rxi, clip255 (READ(image, ap + rxi) + rxs)); + WRITE (image, ap + rxi, clip255 (READ (image, ap + rxi) + rxs)); } } - if (y == b) { + if (y == b) + { /* We're done, make sure we clean up any remaining fill. */ - if (fill_start != fill_end) { - if (fill_size == N_Y_FRAC(8)) - { - MEMSET_WRAPPED (image, ap + fill_start, 0xff, fill_end - fill_start); + if (fill_start != fill_end) + { + if (fill_size == N_Y_FRAC (8)) + { + MEMSET_WRAPPED (image, ap + fill_start, + 0xff, fill_end - fill_start); } - else - { - add_saturate_8 (ap + fill_start, fill_size * N_X_FRAC(8), - fill_end - fill_start); + else + { + ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8), + fill_end - fill_start); } - } - break; - } + } + break; + } - if (pixman_fixed_frac (y) != Y_FRAC_LAST(8)) - { - RenderEdgeStepSmall (l); - RenderEdgeStepSmall (r); - y += STEP_Y_SMALL(8); + if (pixman_fixed_frac (y) != Y_FRAC_LAST (8)) + { + RENDER_EDGE_STEP_SMALL (l); + RENDER_EDGE_STEP_SMALL (r); + y += STEP_Y_SMALL (8); } - else - { - RenderEdgeStepBig (l); - RenderEdgeStepBig (r); - y += STEP_Y_BIG(8); + else + { + RENDER_EDGE_STEP_BIG (l); + RENDER_EDGE_STEP_BIG (r); + y += STEP_Y_BIG (8); if (fill_start != fill_end) { - if (fill_size == N_Y_FRAC(8)) - { - MEMSET_WRAPPED (image, ap + fill_start, 0xff, fill_end - fill_start); + if (fill_size == N_Y_FRAC (8)) + { + MEMSET_WRAPPED (image, ap + fill_start, + 0xff, fill_end - fill_start); } - else - { - add_saturate_8 (ap + fill_start, fill_size * N_X_FRAC(8), - fill_end - fill_start); + else + { + ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8), + fill_end - fill_start); } + fill_start = fill_end = -1; fill_size = 0; - } - line += stride; + } + + line += stride; } } } @@ -294,21 +340,23 @@ static #endif void PIXMAN_RASTERIZE_EDGES (pixman_image_t *image, - pixman_edge_t *l, - pixman_edge_t *r, - pixman_fixed_t t, - pixman_fixed_t b) + pixman_edge_t * l, + pixman_edge_t * r, + pixman_fixed_t t, + pixman_fixed_t b) { switch (PIXMAN_FORMAT_BPP (image->bits.format)) { case 1: - fbRasterizeEdges1 (image, l, r, t, b); + rasterize_edges_1 (image, l, r, t, b); break; + case 4: - fbRasterizeEdges4 (image, l, r, t, b); + rasterize_edges_4 (image, l, r, t, b); break; + case 8: - fbRasterizeEdges8 (image, l, r, t, b); + rasterize_edges_8 (image, l, r, t, b); break; } } @@ -317,12 +365,14 @@ PIXMAN_RASTERIZE_EDGES (pixman_image_t *image, PIXMAN_EXPORT void pixman_rasterize_edges (pixman_image_t *image, - pixman_edge_t *l, - pixman_edge_t *r, - pixman_fixed_t t, - pixman_fixed_t b) + pixman_edge_t * l, + pixman_edge_t * r, + pixman_fixed_t t, + pixman_fixed_t b) { - if (image->common.read_func || image->common.write_func) + return_if_fail (image->type == BITS); + + if (image->bits.read_func || image->bits.write_func) pixman_rasterize_edges_accessors (image, l, r, t, b); else pixman_rasterize_edges_no_accessors (image, l, r, t, b); diff --git a/lib/pixman/pixman/pixman-fast-path.c b/lib/pixman/pixman/pixman-fast-path.c index 5f78bc335..5ab8d8c99 100644 --- a/lib/pixman/pixman/pixman-fast-path.c +++ b/lib/pixman/pixman/pixman-fast-path.c @@ -23,110 +23,139 @@ * Author: Keith Packard, SuSE, Inc. */ +#ifdef HAVE_CONFIG_H #include <config.h> +#endif #include <string.h> #include "pixman-private.h" #include "pixman-combine32.h" -#define FbFullMask(n) ((n) == 32 ? (uint32_t)-1 : ((((uint32_t) 1) << n) - 1)) - -#undef READ -#undef WRITE -#define READ(img,x) (*(x)) -#define WRITE(img,ptr,v) ((*(ptr)) = (v)) static force_inline uint32_t -fbOver (uint32_t src, uint32_t dest) +fetch_24 (uint8_t *a) { - // dest = (dest * (255 - alpha)) / 255 + src - uint32_t a = ~src >> 24; // 255 - alpha == 255 + (~alpha + 1) == ~alpha - FbByteMulAdd(dest, a, src); + if (((unsigned long)a) & 1) + { +#ifdef WORDS_BIGENDIAN + return (*a << 16) | (*(uint16_t *)(a + 1)); +#else + return *a | (*(uint16_t *)(a + 1) << 8); +#endif + } + else + { +#ifdef WORDS_BIGENDIAN + return (*(uint16_t *)a << 8) | *(a + 2); +#else + return *(uint16_t *)a | (*(a + 2) << 16); +#endif + } +} - return dest; +static force_inline void +store_24 (uint8_t *a, + uint32_t v) +{ + if (((unsigned long)a) & 1) + { +#ifdef WORDS_BIGENDIAN + *a = (uint8_t) (v >> 16); + *(uint16_t *)(a + 1) = (uint16_t) (v); +#else + *a = (uint8_t) (v); + *(uint16_t *)(a + 1) = (uint16_t) (v >> 8); +#endif + } + else + { +#ifdef WORDS_BIGENDIAN + *(uint16_t *)a = (uint16_t)(v >> 8); + *(a + 2) = (uint8_t)v; +#else + *(uint16_t *)a = (uint16_t)v; + *(a + 2) = (uint8_t)(v >> 16); +#endif + } } -static uint32_t -fbOver24 (uint32_t x, uint32_t y) +static force_inline uint32_t +over (uint32_t src, + uint32_t dest) { - uint16_t a = ~x >> 24; - uint16_t t; - uint32_t m,n,o; - - m = FbOverU(x,y,0,a,t); - n = FbOverU(x,y,8,a,t); - o = FbOverU(x,y,16,a,t); - return m|n|o; + uint32_t a = ~src >> 24; + + UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src); + + return dest; } static uint32_t -fbIn (uint32_t x, uint8_t y) +in (uint32_t x, + uint8_t y) { - uint16_t a = y; - uint16_t t; - uint32_t m,n,o,p; - - m = FbInU(x,0,a,t); - n = FbInU(x,8,a,t); - o = FbInU(x,16,a,t); - p = FbInU(x,24,a,t); - return m|n|o|p; + uint16_t a = y; + + UN8x4_MUL_UN8 (x, a); + + return x; } /* * Naming convention: * - * opSRCxMASKxDST + * op_src_mask_dest */ - static void -fbCompositeOver_x888x8x8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_over_x888_8_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t *src, *srcLine; - uint32_t *dst, *dstLine; - uint8_t *mask, *maskLine; - int srcStride, maskStride, dstStride; + uint32_t *src, *src_line; + uint32_t *dst, *dst_line; + uint8_t *mask, *mask_line; + int src_stride, mask_stride, dst_stride; uint8_t m; uint32_t s, d; uint16_t w; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { - src = srcLine; - srcLine += srcStride; - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) { - m = READ(pMask, mask++); + m = *mask++; if (m) { - s = READ(pSrc, src) | 0xff000000; + s = *src | 0xff000000; if (m == 0xff) - WRITE(pDst, dst, s); + { + *dst = s; + } else { - d = fbIn (s, m); - WRITE(pDst, dst, fbOver (d, READ(pDst, dst))); + d = in (s, m); + *dst = over (d, *dst); } } src++; @@ -136,55 +165,53 @@ fbCompositeOver_x888x8x8888 (pixman_implementation_t *imp, } static void -fbCompositeSolidMaskIn_nx8x8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t *iSrc, - pixman_image_t *iMask, - pixman_image_t *iDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_in_n_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dest_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t src, srca; - uint8_t *dstLine, *dst, dstMask; - uint8_t *maskLine, *mask, m; - int dstStride, maskStride; - uint16_t w; - uint16_t t; + uint32_t src, srca; + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + uint16_t w; + uint16_t t; - fbComposeGetSolid(iSrc, src, iDst->bits.format); + src = _pixman_image_get_solid (src_image, dest_image->bits.format); - dstMask = FbFullMask (PIXMAN_FORMAT_DEPTH (iDst->bits.format)); srca = src >> 24; - fbComposeGetStart (iDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); - fbComposeGetStart (iMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - if (srca == 0xff) { + if (srca == 0xff) + { while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) { m = *mask++; + if (m == 0) - { *dst = 0; - } else if (m != 0xff) - { - *dst = FbIntMult(m, *dst, t); - } + *dst = MUL_UN8 (m, *dst, t); + dst++; } } @@ -193,133 +220,127 @@ fbCompositeSolidMaskIn_nx8x8 (pixman_implementation_t *imp, { while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) { m = *mask++; - m = FbIntMult(m, srca, t); + m = MUL_UN8 (m, srca, t); + if (m == 0) - { *dst = 0; - } else if (m != 0xff) - { - *dst = FbIntMult(m, *dst, t); - } + *dst = MUL_UN8 (m, *dst, t); + dst++; } } } } - static void -fbCompositeSrcIn_8x8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t *iSrc, - pixman_image_t *iMask, - pixman_image_t *iDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_in_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dest_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint8_t *dstLine, *dst; - uint8_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; - uint8_t s; - uint16_t t; + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + uint16_t w; + uint8_t s; + uint16_t t; - fbComposeGetStart (iSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1); - fbComposeGetStart (iDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w--) { s = *src++; + if (s == 0) - { *dst = 0; - } else if (s != 0xff) - { - *dst = FbIntMult(s, *dst, t); - } + *dst = MUL_UN8 (s, *dst, t); + dst++; } } } static void -fbCompositeSolidMask_nx8x8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_over_n_8_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t src, srca; - uint32_t *dstLine, *dst, d, dstMask; - uint8_t *maskLine, *mask, m; - int dstStride, maskStride; - uint16_t w; + uint32_t src, srca; + uint32_t *dst_line, *dst, d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + uint16_t w; - fbComposeGetSolid(pSrc, src, pDst->bits.format); + src = _pixman_image_get_solid (src_image, dst_image->bits.format); - dstMask = FbFullMask (PIXMAN_FORMAT_DEPTH (pDst->bits.format)); srca = src >> 24; if (src == 0) return; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) { - m = READ(pMask, mask++); + m = *mask++; if (m == 0xff) { if (srca == 0xff) - WRITE(pDst, dst, src & dstMask); + *dst = src; else - WRITE(pDst, dst, fbOver (src, READ(pDst, dst)) & dstMask); + *dst = over (src, *dst); } else if (m) { - d = fbIn (src, m); - WRITE(pDst, dst, fbOver (d, READ(pDst, dst)) & dstMask); + d = in (src, m); + *dst = over (d, *dst); } dst++; } @@ -327,136 +348,187 @@ fbCompositeSolidMask_nx8x8888 (pixman_implementation_t *imp, } static void -fbCompositeSolidMask_nx8888x8888C (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t src, srca; - uint32_t *dstLine, *dst, d, dstMask; - uint32_t *maskLine, *mask, ma; - int dstStride, maskStride; - uint16_t w; - uint32_t m, n, o, p; + uint32_t src, srca, s; + uint32_t *dst_line, *dst, d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + uint16_t w; - fbComposeGetSolid(pSrc, src, pDst->bits.format); + src = _pixman_image_get_solid (src_image, dst_image->bits.format); - dstMask = FbFullMask (PIXMAN_FORMAT_DEPTH (pDst->bits.format)); srca = src >> 24; if (src == 0) return; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint32_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) { - ma = READ(pMask, mask++); + ma = *mask++; + + if (ma) + { + d = *dst; + s = src; + + UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d); + + *dst = s; + } + + dst++; + } + } +} + +static void +fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca, s; + uint32_t *dst_line, *dst, d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + uint16_t w; + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + ma = *mask++; if (ma == 0xffffffff) { if (srca == 0xff) - WRITE(pDst, dst, src & dstMask); + *dst = src; else - WRITE(pDst, dst, fbOver (src, READ(pDst, dst)) & dstMask); + *dst = over (src, *dst); } else if (ma) { - d = READ(pDst, dst); -#define FbInOverC(src,srca,msk,dst,i,result) { \ - uint16_t __a = FbGet8(msk,i); \ - uint32_t __t, __ta; \ - uint32_t __i; \ - __t = FbIntMult (FbGet8(src,i), __a,__i); \ - __ta = (uint8_t) ~FbIntMult (srca, __a,__i); \ - __t = __t + FbIntMult(FbGet8(dst,i),__ta,__i); \ - __t = (uint32_t) (uint8_t) (__t | (-(__t >> 8))); \ - result = __t << (i); \ -} - FbInOverC (src, srca, ma, d, 0, m); - FbInOverC (src, srca, ma, d, 8, n); - FbInOverC (src, srca, ma, d, 16, o); - FbInOverC (src, srca, ma, d, 24, p); - WRITE(pDst, dst, m|n|o|p); + d = *dst; + s = src; + + UN8x4_MUL_UN8x4 (s, ma); + UN8x4_MUL_UN8 (ma, srca); + ma = ~ma; + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); + + *dst = d; } + dst++; } } } static void -fbCompositeSolidMask_nx8x0888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_over_n_8_0888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t src, srca; - uint8_t *dstLine, *dst; - uint32_t d; - uint8_t *maskLine, *mask, m; - int dstStride, maskStride; - uint16_t w; + uint32_t src, srca; + uint8_t *dst_line, *dst; + uint32_t d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + uint16_t w; - fbComposeGetSolid(pSrc, src, pDst->bits.format); + src = _pixman_image_get_solid (src_image, dst_image->bits.format); srca = src >> 24; if (src == 0) return; - fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 3); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) { - m = READ(pMask, mask++); + m = *mask++; if (m == 0xff) { if (srca == 0xff) + { d = src; + } else { - d = Fetch24(pDst, dst); - d = fbOver24 (src, d); + d = fetch_24 (dst); + d = over (src, d); } - Store24(pDst, dst,d); + store_24 (dst, d); } else if (m) { - d = fbOver24 (fbIn(src,m), Fetch24(pDst, dst)); - Store24(pDst, dst, d); + d = over (in (src, m), fetch_24 (dst)); + store_24 (dst, d); } dst += 3; } @@ -464,63 +536,65 @@ fbCompositeSolidMask_nx8x0888 (pixman_implementation_t *imp, } static void -fbCompositeSolidMask_nx8x0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_over_n_8_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t src, srca; - uint16_t *dstLine, *dst; - uint32_t d; - uint8_t *maskLine, *mask, m; - int dstStride, maskStride; - uint16_t w; + uint32_t src, srca; + uint16_t *dst_line, *dst; + uint32_t d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + uint16_t w; - fbComposeGetSolid(pSrc, src, pDst->bits.format); + src = _pixman_image_get_solid (src_image, dst_image->bits.format); srca = src >> 24; if (src == 0) return; - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) { - m = READ(pMask, mask++); + m = *mask++; if (m == 0xff) { if (srca == 0xff) + { d = src; + } else { - d = READ(pDst, dst); - d = fbOver24 (src, cvt0565to0888(d)); + d = *dst; + d = over (src, CONVERT_0565_TO_0888 (d)); } - WRITE(pDst, dst, cvt8888to0565(d)); + *dst = CONVERT_8888_TO_0565 (d); } else if (m) { - d = READ(pDst, dst); - d = fbOver24 (fbIn(src,m), cvt0565to0888(d)); - WRITE(pDst, dst, cvt8888to0565(d)); + d = *dst; + d = over (in (src, m), CONVERT_0565_TO_0888 (d)); + *dst = CONVERT_8888_TO_0565 (d); } dst++; } @@ -528,73 +602,76 @@ fbCompositeSolidMask_nx8x0565 (pixman_implementation_t *imp, } static void -fbCompositeSolidMask_nx8888x0565C (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t src, srca; - uint16_t src16; - uint16_t *dstLine, *dst; - uint32_t d; - uint32_t *maskLine, *mask, ma; - int dstStride, maskStride; - uint16_t w; - uint32_t m, n, o; + uint32_t src, srca, s; + uint16_t src16; + uint16_t *dst_line, *dst; + uint32_t d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + uint16_t w; - fbComposeGetSolid(pSrc, src, pDst->bits.format); + src = _pixman_image_get_solid (src_image, dst_image->bits.format); srca = src >> 24; if (src == 0) return; - src16 = cvt8888to0565(src); + src16 = CONVERT_8888_TO_0565 (src); - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint32_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) { - ma = READ(pMask, mask++); + ma = *mask++; if (ma == 0xffffffff) { if (srca == 0xff) { - WRITE(pDst, dst, src16); + *dst = src16; } else { - d = READ(pDst, dst); - d = fbOver24 (src, cvt0565to0888(d)); - WRITE(pDst, dst, cvt8888to0565(d)); + d = *dst; + d = over (src, CONVERT_0565_TO_0888 (d)); + *dst = CONVERT_8888_TO_0565 (d); } } else if (ma) { - d = READ(pDst, dst); - d = cvt0565to0888(d); - FbInOverC (src, srca, ma, d, 0, m); - FbInOverC (src, srca, ma, d, 8, n); - FbInOverC (src, srca, ma, d, 16, o); - d = m|n|o; - WRITE(pDst, dst, cvt8888to0565(d)); + d = *dst; + d = CONVERT_0565_TO_0888 (d); + + s = src; + + UN8x4_MUL_UN8x4 (s, ma); + UN8x4_MUL_UN8 (ma, srca); + ma = ~ma; + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); + + *dst = CONVERT_8888_TO_0565 (d); } dst++; } @@ -602,96 +679,95 @@ fbCompositeSolidMask_nx8888x0565C (pixman_implementation_t *imp, } static void -fbCompositeSrc_8888x8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_over_8888_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t *dstLine, *dst, dstMask; - uint32_t *srcLine, *src, s; - int dstStride, srcStride; - uint8_t a; - uint16_t w; - - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + uint8_t a; + uint16_t w; - dstMask = FbFullMask (PIXMAN_FORMAT_DEPTH (pDst->bits.format)); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w--) { - s = READ(pSrc, src++); + s = *src++; a = s >> 24; if (a == 0xff) - WRITE(pDst, dst, s & dstMask); + *dst = s; else if (s) - WRITE(pDst, dst, fbOver (s, READ(pDst, dst)) & dstMask); + *dst = over (s, *dst); dst++; } } } static void -fbCompositeSrc_8888x0888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_over_8888_0888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint8_t *dstLine, *dst; - uint32_t d; - uint32_t *srcLine, *src, s; - uint8_t a; - int dstStride, srcStride; - uint16_t w; + uint8_t *dst_line, *dst; + uint32_t d; + uint32_t *src_line, *src, s; + uint8_t a; + int dst_stride, src_stride; + uint16_t w; - fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 3); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w--) { - s = READ(pSrc, src++); + s = *src++; a = s >> 24; if (a) { if (a == 0xff) d = s; else - d = fbOver24 (s, Fetch24(pDst, dst)); - Store24(pDst, dst, d); + d = over (s, fetch_24 (dst)); + + store_24 (dst, d); } dst += 3; } @@ -699,52 +775,54 @@ fbCompositeSrc_8888x0888 (pixman_implementation_t *imp, } static void -fbCompositeSrc_8888x0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_over_8888_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint16_t *dstLine, *dst; - uint32_t d; - uint32_t *srcLine, *src, s; - uint8_t a; - int dstStride, srcStride; - uint16_t w; + uint16_t *dst_line, *dst; + uint32_t d; + uint32_t *src_line, *src, s; + uint8_t a; + int dst_stride, src_stride; + uint16_t w; - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w--) { - s = READ(pSrc, src++); + s = *src++; a = s >> 24; if (s) { if (a == 0xff) + { d = s; + } else { - d = READ(pDst, dst); - d = fbOver24 (s, cvt0565to0888(d)); + d = *dst; + d = over (s, CONVERT_0565_TO_0888 (d)); } - WRITE(pDst, dst, cvt8888to0565(d)); + *dst = CONVERT_8888_TO_0565 (d); } dst++; } @@ -752,90 +830,90 @@ fbCompositeSrc_8888x0565 (pixman_implementation_t *imp, } static void -fbCompositeSrc_x888x0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_src_x888_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint16_t *dstLine, *dst; - uint32_t *srcLine, *src, s; - int dstStride, srcStride; - uint16_t w; + uint16_t *dst_line, *dst; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + uint16_t w; - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w--) { - s = READ(pSrc, src++); - WRITE(pDst, dst, cvt8888to0565(s)); + s = *src++; + *dst = CONVERT_8888_TO_0565 (s); dst++; } } } static void -fbCompositeSrcAdd_8000x8000 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_add_8000_8000 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint8_t *dstLine, *dst; - uint8_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; - uint8_t s, d; - uint16_t t; + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + uint16_t w; + uint8_t s, d; + uint16_t t; - fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1); - fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w--) { - s = READ(pSrc, src++); + s = *src++; if (s) { if (s != 0xff) { - d = READ(pDst, dst); + d = *dst; t = d + s; s = t | (0 - (t >> 8)); } - WRITE(pDst, dst, s); + *dst = s; } dst++; } @@ -843,57 +921,49 @@ fbCompositeSrcAdd_8000x8000 (pixman_implementation_t *imp, } static void -fbCompositeSrcAdd_8888x8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_add_8888_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; - uint32_t s, d; - uint16_t t; - uint32_t m,n,o,p; + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + uint16_t w; + uint32_t s, d; - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w--) { - s = READ(pSrc, src++); + s = *src++; if (s) { if (s != 0xffffffff) { - d = READ(pDst, dst); + d = *dst; if (d) - { - m = FbAdd(s,d,0,t); - n = FbAdd(s,d,8,t); - o = FbAdd(s,d,16,t); - p = FbAdd(s,d,24,t); - s = m|n|o|p; - } + UN8x4_ADD_UN8x4 (s, d); } - WRITE(pDst, dst, s); + *dst = s; } dst++; } @@ -901,54 +971,54 @@ fbCompositeSrcAdd_8888x8888 (pixman_implementation_t *imp, } static void -fbCompositeSrcAdd_8888x8x8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_add_8888_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint8_t *dstLine, *dst; - uint8_t *maskLine, *mask; - int dstStride, maskStride; - uint16_t w; - uint32_t src; - uint8_t sa; - - fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); - fbComposeGetSolid (pSrc, src, pDst->bits.format); + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint16_t w; + uint32_t src; + uint8_t sa; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + src = _pixman_image_get_solid (src_image, dst_image->bits.format); sa = (src >> 24); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) { - uint16_t tmp; - uint16_t a; - uint32_t m, d; - uint32_t r; + uint16_t tmp; + uint16_t a; + uint32_t m, d; + uint32_t r; - a = READ(pMask, mask++); - d = READ(pDst, dst); + a = *mask++; + d = *dst; - m = FbInU (sa, 0, a, tmp); - r = FbAdd (m, d, 0, tmp); + m = MUL_UN8 (sa, a, tmp); + r = ADD_UN8 (m, d, tmp); - WRITE(pDst, dst++, r); + *dst++ = r; } } } @@ -958,229 +1028,249 @@ fbCompositeSrcAdd_8888x8x8 (pixman_implementation_t *imp, */ static void -fbCompositeSolidFill (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_solid_fill (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t src; + uint32_t src; - fbComposeGetSolid(pSrc, src, pDst->bits.format); + src = _pixman_image_get_solid (src_image, dst_image->bits.format); - if (pDst->bits.format == PIXMAN_a8) + if (dst_image->bits.format == PIXMAN_a8) + { src = src >> 24; - else if (pDst->bits.format == PIXMAN_r5g6b5 || - pDst->bits.format == PIXMAN_b5g6r5) - src = cvt8888to0565 (src); - - pixman_fill (pDst->bits.bits, pDst->bits.rowstride, - PIXMAN_FORMAT_BPP (pDst->bits.format), - xDst, yDst, - width, height, - src); + } + else if (dst_image->bits.format == PIXMAN_r5g6b5 || + dst_image->bits.format == PIXMAN_b5g6r5) + { + src = CONVERT_8888_TO_0565 (src); + } + + pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride, + PIXMAN_FORMAT_BPP (dst_image->bits.format), + dest_x, dest_y, + width, height, + src); } static void -fbCompositeSrc_8888xx888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_src_8888_x888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - uint32_t *dst; + uint32_t *dst; uint32_t *src; - int dstStride, srcStride; - uint32_t n_bytes = width * sizeof (uint32_t); + int dst_stride, src_stride; + uint32_t n_bytes = width * sizeof (uint32_t); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, src, 1); - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dst, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst, 1); while (height--) { memcpy (dst, src, n_bytes); - dst += dstStride; - src += srcStride; + dst += dst_stride; + src += src_stride; } } -static const FastPathInfo c_fast_paths[] = +static const pixman_fast_path_t c_fast_paths[] = { - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, fbCompositeSolidMask_nx8x0565, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, fbCompositeSolidMask_nx8x0565, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r8g8b8, fbCompositeSolidMask_nx8x0888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b8g8r8, fbCompositeSolidMask_nx8x0888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8888x8888C, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8888x8888C, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, fbCompositeSolidMask_nx8888x0565C, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8888x8888C, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8888x8888C, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, fbCompositeSolidMask_nx8888x0565C, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeOver_x888x8x8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeOver_x888x8x8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeOver_x888x8x8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeOver_x888x8x8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_8888x0565, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_8888x0565, 0 }, - { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrcAdd_8888x8888, 0 }, - { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSrcAdd_8888x8888, 0 }, - { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcAdd_8000x8000, 0 }, - { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fbCompositeSrcAdd_8888x8x8, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSolidFill, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSolidFill, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSolidFill, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeSolidFill, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_a8, fbCompositeSolidFill, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSolidFill, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSrc_8888xx888, 0 }, - { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSrc_8888xx888, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeSrc_8888xx888, 0 }, - { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeSrc_8888xx888, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_x888x0565, 0 }, - { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_x888x0565, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_x888x0565, 0 }, - { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_x888x0565, 0 }, - { PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcIn_8x8, 0 }, - { PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fbCompositeSolidMaskIn_nx8x8, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, fast_composite_over_n_8_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, fast_composite_over_n_8_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r8g8b8, fast_composite_over_n_8_0888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b8g8r8, fast_composite_over_n_8_0888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fast_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fast_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fast_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fast_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fast_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, fast_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, fast_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, fast_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, fast_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, fast_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fast_composite_over_x888_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fast_composite_over_x888_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, fast_composite_over_x888_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, fast_composite_over_x888_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fast_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fast_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fast_composite_over_8888_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fast_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fast_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fast_composite_over_8888_0565, 0 }, + { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fast_composite_add_8888_8888, 0 }, + { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fast_composite_add_8888_8888, 0 }, + { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fast_composite_add_8000_8000, 0 }, + { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fast_composite_add_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fast_composite_add_8888_8_8, 0 }, + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_a8r8g8b8, fast_composite_solid_fill, 0 }, + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_x8r8g8b8, fast_composite_solid_fill, 0 }, + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_a8b8g8r8, fast_composite_solid_fill, 0 }, + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_x8b8g8r8, fast_composite_solid_fill, 0 }, + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_a8, fast_composite_solid_fill, 0 }, + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, fast_composite_solid_fill, 0 }, + { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fast_composite_src_8888_x888, 0 }, + { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fast_composite_src_8888_x888, 0 }, + { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fast_composite_src_8888_x888, 0 }, + { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fast_composite_src_8888_x888, 0 }, + { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fast_composite_src_x888_0565, 0 }, + { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fast_composite_src_x888_0565, 0 }, + { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fast_composite_src_x888_0565, 0 }, + { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fast_composite_src_x888_0565, 0 }, + { PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fast_composite_in_8_8, 0 }, + { PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fast_composite_in_n_8_8, 0 }, { PIXMAN_OP_NONE }, }; static void -fbCompositeSrcScaleNearest (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t *pSrc, - pixman_image_t *pMask, - pixman_image_t *pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +fast_composite_src_scale_nearest (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { uint32_t *dst; uint32_t *src; - int dstStride, srcStride; - int i, j; + int dst_stride, src_stride; + int i, j; pixman_vector_t v; - - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dst, 1); - /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst, 1); + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be * transformed from destination space to source space */ - fbComposeGetStart (pSrc, 0, 0, uint32_t, srcStride, src, 1); - + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src, 1); + /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed(xSrc) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed(ySrc) + pixman_fixed_1 / 2; + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (pSrc->common.transform, &v)) - return; - + + if (!pixman_transform_point_3d (src_image->common.transform, &v)) + return; + /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ v.vector[0] -= pixman_fixed_e; v.vector[1] -= pixman_fixed_e; - - for (j = 0; j < height; j++) { - pixman_fixed_t vx = v.vector[0]; - pixman_fixed_t vy = v.vector[1]; - for (i = 0; i < width; ++i) { - pixman_bool_t inside_bounds; - uint32_t result; - int x, y; - x = vx >> 16; - y = vy >> 16; - - /* apply the repeat function */ - switch (pSrc->common.repeat) { + + for (j = 0; j < height; j++) + { + pixman_fixed_t vx = v.vector[0]; + pixman_fixed_t vy = v.vector[1]; + + for (i = 0; i < width; ++i) + { + pixman_bool_t inside_bounds; + uint32_t result; + int x, y; + x = vx >> 16; + y = vy >> 16; + + /* apply the repeat function */ + switch (src_image->common.repeat) + { case PIXMAN_REPEAT_NORMAL: - x = MOD (x, pSrc->bits.width); - y = MOD (y, pSrc->bits.height); + x = MOD (x, src_image->bits.width); + y = MOD (y, src_image->bits.height); inside_bounds = TRUE; break; - + case PIXMAN_REPEAT_PAD: - x = CLIP (x, 0, pSrc->bits.width-1); - y = CLIP (y, 0, pSrc->bits.height-1); + x = CLIP (x, 0, src_image->bits.width - 1); + y = CLIP (y, 0, src_image->bits.height - 1); inside_bounds = TRUE; break; - + case PIXMAN_REPEAT_REFLECT: - x = MOD (x, pSrc->bits.width * 2); - if (x >= pSrc->bits.width) - x = pSrc->bits.width * 2 - x - 1; - y = MOD (y, pSrc->bits.height * 2); - if (y >= pSrc->bits.height) - y = pSrc->bits.height * 2 - y - 1; + x = MOD (x, src_image->bits.width * 2); + if (x >= src_image->bits.width) + x = src_image->bits.width * 2 - x - 1; + y = MOD (y, src_image->bits.height * 2); + if (y >= src_image->bits.height) + y = src_image->bits.height * 2 - y - 1; inside_bounds = TRUE; break; - + case PIXMAN_REPEAT_NONE: default: - inside_bounds = (x >= 0 && x < pSrc->bits.width && y >= 0 && y < pSrc->bits.height); + inside_bounds = + (x >= 0 && + x < src_image->bits.width && + y >= 0 && + y < src_image->bits.height); break; - } - - if (inside_bounds) { - //XXX: we should move this multiplication out of the loop - result = READ(pSrc, src + y * srcStride + x); - } else { - result = 0; - } - WRITE(pDst, dst + i, result); - - /* adjust the x location by a unit vector in the x direction: - * this is equivalent to transforming x+1 of the destination point to source space */ - vx += pSrc->common.transform->matrix[0][0]; - } - /* adjust the y location by a unit vector in the y direction - * this is equivalent to transforming y+1 of the destination point to source space */ - v.vector[1] += pSrc->common.transform->matrix[1][1]; - dst += dstStride; + } + + if (inside_bounds) + { + /* XXX: we should move this multiplication out of the loop */ + result = *(src + y * src_stride + x); + } + else + { + result = 0; + } + *(dst + i) = result; + + /* adjust the x location by a unit vector in the x direction: + * this is equivalent to transforming x+1 of the destination + * point to source space + */ + vx += src_image->common.transform->matrix[0][0]; + } + /* adjust the y location by a unit vector in the y direction + * this is equivalent to transforming y+1 of the destination point + * to source space + */ + v.vector[1] += src_image->common.transform->matrix[1][1]; + dst += dst_stride; } } static void fast_path_composite (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t *src, - pixman_image_t *mask, - pixman_image_t *dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { if (src->type == BITS && src->common.transform @@ -1188,57 +1278,55 @@ fast_path_composite (pixman_implementation_t *imp, && op == PIXMAN_OP_SRC && !src->common.alpha_map && !dest->common.alpha_map && (src->common.filter == PIXMAN_FILTER_NEAREST) - && PIXMAN_FORMAT_BPP(dest->bits.format) == 32 + && PIXMAN_FORMAT_BPP (dest->bits.format) == 32 && src->bits.format == dest->bits.format - && src->common.src_clip == &(src->common.full_region) - && !src->common.read_func && !src->common.write_func - && !dest->common.read_func && !dest->common.write_func) + && !src->bits.read_func && !src->bits.write_func + && !dest->bits.read_func && !dest->bits.write_func) { - /* ensure that the transform matrix only has a scale */ - if (src->common.transform->matrix[0][1] == 0 && - src->common.transform->matrix[1][0] == 0 && - src->common.transform->matrix[2][0] == 0 && - src->common.transform->matrix[2][1] == 0 && - src->common.transform->matrix[2][2] == pixman_fixed_1) + /* ensure that the transform matrix only has a scale */ + if (src->common.transform->matrix[0][1] == 0 && + src->common.transform->matrix[1][0] == 0 && + src->common.transform->matrix[2][0] == 0 && + src->common.transform->matrix[2][1] == 0 && + src->common.transform->matrix[2][2] == pixman_fixed_1) { _pixman_walk_composite_region (imp, op, - src, mask, dest, - src_x, src_y, - mask_x, mask_y, - dest_x, dest_y, - width, height, - FALSE, FALSE, - fbCompositeSrcScaleNearest); + src, mask, dest, + src_x, src_y, + mask_x, mask_y, + dest_x, dest_y, + width, height, + fast_composite_src_scale_nearest); return; } } if (_pixman_run_fast_path (c_fast_paths, imp, - op, src, mask, dest, - src_x, src_y, - mask_x, mask_y, - dest_x, dest_y, - width, height)) + op, src, mask, dest, + src_x, src_y, + mask_x, mask_y, + dest_x, dest_y, + width, height)) { return; } _pixman_implementation_composite (imp->delegate, op, - src, mask, dest, - src_x, src_y, - mask_x, mask_y, - dest_x, dest_y, - width, height); + src, mask, dest, + src_x, src_y, + mask_x, mask_y, + dest_x, dest_y, + width, height); } static void -pixman_fill8 (uint32_t *bits, - int stride, - int x, - int y, - int width, - int height, - uint32_t xor) +pixman_fill8 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) { int byte_stride = stride * (int) sizeof (uint32_t); uint8_t *dst = (uint8_t *) bits; @@ -1258,14 +1346,15 @@ pixman_fill8 (uint32_t *bits, static void pixman_fill16 (uint32_t *bits, - int stride, - int x, - int y, - int width, - int height, - uint32_t xor) + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) { - int short_stride = (stride * (int) sizeof (uint32_t)) / (int) sizeof (uint16_t); + int short_stride = + (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t); uint16_t *dst = (uint16_t *)bits; uint16_t v = xor & 0xffff; int i; @@ -1283,12 +1372,12 @@ pixman_fill16 (uint32_t *bits, static void pixman_fill32 (uint32_t *bits, - int stride, - int x, - int y, - int width, - int height, - uint32_t xor) + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) { int i; @@ -1305,46 +1394,47 @@ pixman_fill32 (uint32_t *bits, static pixman_bool_t fast_path_fill (pixman_implementation_t *imp, - uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) { switch (bpp) { case 8: pixman_fill8 (bits, stride, x, y, width, height, xor); break; - + case 16: pixman_fill16 (bits, stride, x, y, width, height, xor); break; - + case 32: pixman_fill32 (bits, stride, x, y, width, height, xor); break; - + default: return _pixman_implementation_fill ( imp->delegate, bits, stride, bpp, x, y, width, height, xor); break; } - + return TRUE; } pixman_implementation_t * -_pixman_implementation_create_fast_path (pixman_implementation_t *toplevel) +_pixman_implementation_create_fast_path (void) { - pixman_implementation_t *general = _pixman_implementation_create_general (NULL); - pixman_implementation_t *imp = _pixman_implementation_create (toplevel, general); + pixman_implementation_t *general = _pixman_implementation_create_general (); + pixman_implementation_t *imp = _pixman_implementation_create (general); imp->composite = fast_path_composite; imp->fill = fast_path_fill; - + return imp; } + diff --git a/lib/pixman/pixman/pixman-general.c b/lib/pixman/pixman/pixman-general.c index 1d0e10963..3ead3dac7 100644 --- a/lib/pixman/pixman/pixman-general.c +++ b/lib/pixman/pixman/pixman-general.c @@ -25,11 +25,12 @@ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS * SOFTWARE. */ +#ifdef HAVE_CONFIG_H #include <config.h> +#endif #include <stdlib.h> #include <string.h> #include <math.h> -#include <assert.h> #include <limits.h> #include <stdio.h> #include <stdlib.h> @@ -42,86 +43,87 @@ static void general_composite_rect (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t *src, - pixman_image_t *mask, - pixman_image_t *dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - return_if_fail (src != NULL); - return_if_fail (dest != NULL); - { uint8_t stack_scanline_buffer[SCANLINE_BUFFER_LENGTH * 3]; - const pixman_format_code_t srcFormat = src->type == BITS ? src->bits.format : 0; - const pixman_format_code_t maskFormat = mask && mask->type == BITS ? mask->bits.format : 0; - const pixman_format_code_t destFormat = dest->type == BITS ? dest->bits.format : 0; - const int srcWide = PIXMAN_FORMAT_16BPC(srcFormat); - const int maskWide = mask && PIXMAN_FORMAT_16BPC(maskFormat); - const int destWide = PIXMAN_FORMAT_16BPC(destFormat); - const int wide = srcWide || maskWide || destWide; + const pixman_format_code_t src_format = + src->type == BITS ? src->bits.format : 0; + const pixman_format_code_t mask_format = + mask && mask->type == BITS ? mask->bits.format : 0; + const pixman_format_code_t dest_format = + dest->type == BITS ? dest->bits.format : 0; + const int src_wide = PIXMAN_FORMAT_IS_WIDE (src_format); + const int mask_wide = mask && PIXMAN_FORMAT_IS_WIDE (mask_format); + const int dest_wide = PIXMAN_FORMAT_IS_WIDE (dest_format); + const int wide = src_wide || mask_wide || dest_wide; const int Bpp = wide ? 8 : 4; uint8_t *scanline_buffer = stack_scanline_buffer; uint8_t *src_buffer, *mask_buffer, *dest_buffer; - scanFetchProc fetchSrc = NULL, fetchMask = NULL, fetchDest = NULL; + fetch_scanline_t fetch_src = NULL, fetch_mask = NULL, fetch_dest = NULL; pixman_combine_32_func_t compose; - scanStoreProc store; - source_pict_class_t srcClass, maskClass; + store_scanline_t store; + source_image_class_t src_class, mask_class; pixman_bool_t component_alpha; uint32_t *bits; int32_t stride; int i; - + if (width * Bpp > SCANLINE_BUFFER_LENGTH) { scanline_buffer = pixman_malloc_abc (width, 3, Bpp); - + if (!scanline_buffer) return; } - + src_buffer = scanline_buffer; mask_buffer = src_buffer + width * Bpp; dest_buffer = mask_buffer + width * Bpp; - - srcClass = _pixman_image_classify (src, - src_x, src_y, - width, height); - - maskClass = SOURCE_IMAGE_CLASS_UNKNOWN; + + src_class = _pixman_image_classify (src, + src_x, src_y, + width, height); + + mask_class = SOURCE_IMAGE_CLASS_UNKNOWN; + if (mask) { - maskClass = _pixman_image_classify (mask, - src_x, src_y, - width, height); + mask_class = _pixman_image_classify (mask, + src_x, src_y, + width, height); } - + if (op == PIXMAN_OP_CLEAR) - fetchSrc = NULL; + fetch_src = NULL; else if (wide) - fetchSrc = _pixman_image_get_scanline_64; + fetch_src = _pixman_image_get_scanline_64; else - fetchSrc = _pixman_image_get_scanline_32; - + fetch_src = _pixman_image_get_scanline_32; + if (!mask || op == PIXMAN_OP_CLEAR) - fetchMask = NULL; + fetch_mask = NULL; else if (wide) - fetchMask = _pixman_image_get_scanline_64; + fetch_mask = _pixman_image_get_scanline_64; else - fetchMask = _pixman_image_get_scanline_32; - + fetch_mask = _pixman_image_get_scanline_32; + if (op == PIXMAN_OP_CLEAR || op == PIXMAN_OP_SRC) - fetchDest = NULL; + fetch_dest = NULL; else if (wide) - fetchDest = _pixman_image_get_scanline_64; + fetch_dest = _pixman_image_get_scanline_64; else - fetchDest = _pixman_image_get_scanline_32; + fetch_dest = _pixman_image_get_scanline_32; if (wide) store = _pixman_image_store_scanline_64; @@ -133,15 +135,15 @@ general_composite_rect (pixman_implementation_t *imp, * the destination format. */ if (!wide && - !dest->common.alpha_map && - !dest->common.write_func && - (op == PIXMAN_OP_ADD || op == PIXMAN_OP_OVER) && - (dest->bits.format == PIXMAN_a8r8g8b8 || - dest->bits.format == PIXMAN_x8r8g8b8)) + !dest->common.alpha_map && + !dest->bits.write_func && + (op == PIXMAN_OP_ADD || op == PIXMAN_OP_OVER) && + (dest->bits.format == PIXMAN_a8r8g8b8 || + dest->bits.format == PIXMAN_x8r8g8b8)) { store = NULL; } - + if (!store) { bits = dest->bits.bits; @@ -152,15 +154,15 @@ general_composite_rect (pixman_implementation_t *imp, bits = NULL; stride = 0; } - + component_alpha = - fetchSrc && - fetchMask && - mask && - mask->common.type == BITS && - mask->common.component_alpha && - PIXMAN_FORMAT_RGB (mask->bits.format); - + fetch_src && + fetch_mask && + mask && + mask->common.type == BITS && + mask->common.component_alpha && + PIXMAN_FORMAT_RGB (mask->bits.format); + if (wide) { if (component_alpha) @@ -175,186 +177,148 @@ general_composite_rect (pixman_implementation_t *imp, else compose = _pixman_implementation_combine_32; } - + if (!compose) return; - - if (!fetchMask) + + if (!fetch_mask) mask_buffer = NULL; - + for (i = 0; i < height; ++i) { /* fill first half of scanline with source */ - if (fetchSrc) + if (fetch_src) { - if (fetchMask) + if (fetch_mask) { /* fetch mask before source so that fetching of source can be optimized */ - fetchMask (mask, mask_x, mask_y + i, - width, (void *)mask_buffer, 0, 0); - - if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL) - fetchMask = NULL; + fetch_mask (mask, mask_x, mask_y + i, + width, (void *)mask_buffer, 0, 0); + + if (mask_class == SOURCE_IMAGE_CLASS_HORIZONTAL) + fetch_mask = NULL; } - - if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL) + + if (src_class == SOURCE_IMAGE_CLASS_HORIZONTAL) { - fetchSrc (src, src_x, src_y + i, - width, (void *)src_buffer, 0, 0); - fetchSrc = NULL; + fetch_src (src, src_x, src_y + i, + width, (void *)src_buffer, 0, 0); + fetch_src = NULL; } else { - fetchSrc (src, src_x, src_y + i, - width, (void *)src_buffer, (void *)mask_buffer, - 0xffffffff); + fetch_src (src, src_x, src_y + i, + width, (void *)src_buffer, (void *)mask_buffer, + 0xffffffff); } } - else if (fetchMask) + else if (fetch_mask) { - fetchMask (mask, mask_x, mask_y + i, - width, (void *)mask_buffer, 0, 0); + fetch_mask (mask, mask_x, mask_y + i, + width, (void *)mask_buffer, 0, 0); } - + if (store) { /* fill dest into second half of scanline */ - if (fetchDest) - fetchDest (dest, dest_x, dest_y + i, - width, (void *)dest_buffer, 0, 0); - + if (fetch_dest) + { + fetch_dest (dest, dest_x, dest_y + i, + width, (void *)dest_buffer, 0, 0); + } + /* blend */ - compose (imp->toplevel, op, (void *)dest_buffer, (void *)src_buffer, (void *)mask_buffer, width); - + compose (imp->toplevel, op, + (void *)dest_buffer, + (void *)src_buffer, + (void *)mask_buffer, + width); + /* write back */ store (&(dest->bits), dest_x, dest_y + i, width, - (void *)dest_buffer); + (void *)dest_buffer); } else { /* blend */ - compose (imp->toplevel, op, bits + (dest_y + i) * stride + - dest_x, - (void *)src_buffer, (void *)mask_buffer, width); + compose (imp->toplevel, op, + bits + (dest_y + i) * stride + dest_x, + (void *)src_buffer, (void *)mask_buffer, width); } } - + if (scanline_buffer != stack_scanline_buffer) free (scanline_buffer); - } } static void -general_composite (pixman_implementation_t * imp, - pixman_op_t op, - pixman_image_t * src, - pixman_image_t * mask, - pixman_image_t * dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +general_composite (pixman_implementation_t * imp, + pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - pixman_bool_t srcRepeat = src->type == BITS && src->common.repeat == PIXMAN_REPEAT_NORMAL; - pixman_bool_t maskRepeat = FALSE; - pixman_bool_t srcTransform = src->common.transform != NULL; - pixman_bool_t maskTransform = FALSE; - - if (srcRepeat && srcTransform && - src->bits.width == 1 && - src->bits.height == 1) - { - srcTransform = FALSE; - } - - if (mask && mask->type == BITS) - { - maskRepeat = mask->common.repeat == PIXMAN_REPEAT_NORMAL; - - maskTransform = mask->common.transform != 0; - if (mask->common.filter == PIXMAN_FILTER_CONVOLUTION) - maskTransform = TRUE; - - if (maskRepeat && maskTransform && - mask->bits.width == 1 && - mask->bits.height == 1) - { - maskTransform = FALSE; - } - } - - /* CompositeGeneral optimizes 1x1 repeating images itself */ - if (src->type == BITS && - src->bits.width == 1 && src->bits.height == 1) - { - srcRepeat = FALSE; - } - - if (mask && mask->type == BITS && - mask->bits.width == 1 && mask->bits.height == 1) - { - maskRepeat = FALSE; - } - - /* if we are transforming, repeats are handled in fbFetchTransformed */ - if (srcTransform) - srcRepeat = FALSE; - - if (maskTransform) - maskRepeat = FALSE; - _pixman_walk_composite_region (imp, op, src, mask, dest, src_x, src_y, - mask_x, mask_y, dest_x, dest_y, width, height, - srcRepeat, maskRepeat, general_composite_rect); + mask_x, mask_y, dest_x, dest_y, + width, height, + general_composite_rect); } static pixman_bool_t general_blt (pixman_implementation_t *imp, - uint32_t *src_bits, - uint32_t *dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, int src_y, - int dst_x, int dst_y, - int width, int height) + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) { /* We can't blit unless we have sse2 or mmx */ - + return FALSE; } static pixman_bool_t general_fill (pixman_implementation_t *imp, - uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) { return FALSE; } pixman_implementation_t * -_pixman_implementation_create_general (pixman_implementation_t *toplevel) +_pixman_implementation_create_general (void) { - pixman_implementation_t *imp = _pixman_implementation_create (toplevel, NULL); + pixman_implementation_t *imp = _pixman_implementation_create (NULL); _pixman_setup_combiner_functions_32 (imp); _pixman_setup_combiner_functions_64 (imp); - + imp->composite = general_composite; imp->blt = general_blt; imp->fill = general_fill; - + return imp; } + diff --git a/lib/pixman/pixman/pixman-gradient-walker.c b/lib/pixman/pixman/pixman-gradient-walker.c index 6a47a8ea3..dd666b412 100644 --- a/lib/pixman/pixman/pixman-gradient-walker.c +++ b/lib/pixman/pixman/pixman-gradient-walker.c @@ -23,13 +23,15 @@ * SOFTWARE. */ +#ifdef HAVE_CONFIG_H #include <config.h> +#endif #include "pixman-private.h" void -_pixman_gradient_walker_init (GradientWalker *walker, - gradient_t *gradient, - unsigned int spread) +_pixman_gradient_walker_init (pixman_gradient_walker_t *walker, + gradient_t * gradient, + unsigned int spread) { walker->num_stops = gradient->n_stops; walker->stops = gradient->stops; @@ -41,21 +43,21 @@ _pixman_gradient_walker_init (GradientWalker *walker, walker->right_ag = 0; walker->right_rb = 0; walker->spread = spread; - + walker->need_reset = TRUE; } void -_pixman_gradient_walker_reset (GradientWalker *walker, - pixman_fixed_32_32_t pos) +_pixman_gradient_walker_reset (pixman_gradient_walker_t *walker, + pixman_fixed_32_32_t pos) { - int32_t x, left_x, right_x; + int32_t x, left_x, right_x; pixman_color_t *left_c, *right_c; - int n, count = walker->num_stops; + int n, count = walker->num_stops; pixman_gradient_stop_t * stops = walker->stops; - - static const pixman_color_t transparent_black = { 0, 0, 0, 0 }; - + + static const pixman_color_t transparent_black = { 0, 0, 0, 0 }; + switch (walker->spread) { case PIXMAN_REPEAT_NORMAL: @@ -63,47 +65,59 @@ _pixman_gradient_walker_reset (GradientWalker *walker, for (n = 0; n < count; n++) if (x < stops[n].x) break; - if (n == 0) { - left_x = stops[count-1].x - 0x10000; - left_c = &stops[count-1].color; - } else { - left_x = stops[n-1].x; - left_c = &stops[n-1].color; - } - - if (n == count) { + if (n == 0) + { + left_x = stops[count - 1].x - 0x10000; + left_c = &stops[count - 1].color; + } + else + { + left_x = stops[n - 1].x; + left_c = &stops[n - 1].color; + } + + if (n == count) + { right_x = stops[0].x + 0x10000; right_c = &stops[0].color; - } else { + } + else + { right_x = stops[n].x; right_c = &stops[n].color; } left_x += (pos - x); right_x += (pos - x); break; - + case PIXMAN_REPEAT_PAD: for (n = 0; n < count; n++) if (pos < stops[n].x) break; - - if (n == 0) { + + if (n == 0) + { left_x = INT32_MIN; left_c = &stops[0].color; - } else { - left_x = stops[n-1].x; - left_c = &stops[n-1].color; } - - if (n == count) { + else + { + left_x = stops[n - 1].x; + left_c = &stops[n - 1].color; + } + + if (n == count) + { right_x = INT32_MAX; - right_c = &stops[n-1].color; - } else { + right_c = &stops[n - 1].color; + } + else + { right_x = stops[n].x; right_c = &stops[n].color; } break; - + case PIXMAN_REPEAT_REFLECT: x = (int32_t)pos & 0xFFFF; if ((int32_t)pos & 0x10000) @@ -111,46 +125,53 @@ _pixman_gradient_walker_reset (GradientWalker *walker, for (n = 0; n < count; n++) if (x < stops[n].x) break; - - if (n == 0) { + + if (n == 0) + { left_x = -stops[0].x; left_c = &stops[0].color; - } else { - left_x = stops[n-1].x; - left_c = &stops[n-1].color; - } - - if (n == count) { - right_x = 0x20000 - stops[n-1].x; - right_c = &stops[n-1].color; - } else { + } + else + { + left_x = stops[n - 1].x; + left_c = &stops[n - 1].color; + } + + if (n == count) + { + right_x = 0x20000 - stops[n - 1].x; + right_c = &stops[n - 1].color; + } + else + { right_x = stops[n].x; right_c = &stops[n].color; } - - if ((int32_t)pos & 0x10000) { + + if ((int32_t)pos & 0x10000) + { pixman_color_t *tmp_c; - int32_t tmp_x; - + int32_t tmp_x; + tmp_x = 0x10000 - right_x; right_x = 0x10000 - left_x; left_x = tmp_x; - + tmp_c = right_c; right_c = left_c; left_c = tmp_c; - + x = 0x10000 - x; } left_x += (pos - x); right_x += (pos - x); break; - - default: /* RepeatNone */ + + default: /* REPEAT_NONE */ for (n = 0; n < count; n++) if (pos < stops[n].x) break; - + if (n == 0) { left_x = INT32_MIN; @@ -159,74 +180,75 @@ _pixman_gradient_walker_reset (GradientWalker *walker, } else if (n == count) { - left_x = stops[n-1].x; + left_x = stops[n - 1].x; right_x = INT32_MAX; left_c = right_c = (pixman_color_t*) &transparent_black; } else { - left_x = stops[n-1].x; + left_x = stops[n - 1].x; right_x = stops[n].x; - left_c = &stops[n-1].color; + left_c = &stops[n - 1].color; right_c = &stops[n].color; } } - + walker->left_x = left_x; walker->right_x = right_x; walker->left_ag = ((left_c->alpha >> 8) << 16) | (left_c->green >> 8); walker->left_rb = ((left_c->red & 0xff00) << 8) | (left_c->blue >> 8); walker->right_ag = ((right_c->alpha >> 8) << 16) | (right_c->green >> 8); walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8); - - if ( walker->left_x == walker->right_x || - ( walker->left_ag == walker->right_ag && - walker->left_rb == walker->right_rb ) ) + + if (walker->left_x == walker->right_x || + ( walker->left_ag == walker->right_ag && + walker->left_rb == walker->right_rb ) ) { walker->stepper = 0; } else { int32_t width = right_x - left_x; - walker->stepper = ((1 << 24) + width/2)/width; + walker->stepper = ((1 << 24) + width / 2) / width; } - + walker->need_reset = FALSE; } -#define PIXMAN_GRADIENT_WALKER_NEED_RESET(w,x) \ +#define PIXMAN_GRADIENT_WALKER_NEED_RESET(w, x) \ ( (w)->need_reset || (x) < (w)->left_x || (x) >= (w)->right_x) /* the following assumes that PIXMAN_GRADIENT_WALKER_NEED_RESET(w,x) is FALSE */ uint32_t -_pixman_gradient_walker_pixel (GradientWalker *walker, - pixman_fixed_32_32_t x) +_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker, + pixman_fixed_32_32_t x) { - int dist, idist; - uint32_t t1, t2, a, color; - + int dist, idist; + uint32_t t1, t2, a, color; + if (PIXMAN_GRADIENT_WALKER_NEED_RESET (walker, x)) - _pixman_gradient_walker_reset (walker, x); - - dist = ((int)(x - walker->left_x)*walker->stepper) >> 16; + _pixman_gradient_walker_reset (walker, x); + + dist = ((int)(x - walker->left_x) * walker->stepper) >> 16; idist = 256 - dist; - + /* combined INTERPOLATE and premultiply */ - t1 = walker->left_rb*idist + walker->right_rb*dist; + t1 = walker->left_rb * idist + walker->right_rb * dist; t1 = (t1 >> 8) & 0xff00ff; - - t2 = walker->left_ag*idist + walker->right_ag*dist; + + t2 = walker->left_ag * idist + walker->right_ag * dist; t2 &= 0xff00ff00; - + color = t2 & 0xff000000; a = t2 >> 24; - - t1 = t1*a + 0x800080; + + t1 = t1 * a + 0x800080; t1 = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8; - - t2 = (t2 >> 8)*a + 0x800080; + + t2 = (t2 >> 8) * a + 0x800080; t2 = (t2 + ((t2 >> 8) & 0xff00ff)); - + return (color | (t1 & 0xff00ff) | (t2 & 0xff00)); } + diff --git a/lib/pixman/pixman/pixman-image.c b/lib/pixman/pixman/pixman-image.c index c8295f882..fff0497f1 100644 --- a/lib/pixman/pixman/pixman-image.c +++ b/lib/pixman/pixman/pixman-image.c @@ -30,13 +30,12 @@ #include <assert.h> #include "pixman-private.h" - -#define Alpha(x) ((x) >> 24) +#include "pixman-combine32.h" pixman_bool_t -_pixman_init_gradient (gradient_t *gradient, - const pixman_gradient_stop_t *stops, - int n_stops) +_pixman_init_gradient (gradient_t * gradient, + const pixman_gradient_stop_t *stops, + int n_stops) { return_val_if_fail (n_stops > 0, FALSE); @@ -59,33 +58,40 @@ _pixman_init_gradient (gradient_t *gradient, /* * By default, just evaluate the image at 32bpp and expand. Individual image * types can plug in a better scanline getter if they want to. For example - * we could produce smoother gradients by evaluating them at higher color depth, but - * that's a project for the future. + * we could produce smoother gradients by evaluating them at higher color + * depth, but that's a project for the future. */ void -_pixman_image_get_scanline_64_generic (pixman_image_t * pict, int x, int y, int width, - uint64_t *buffer, uint64_t *mask, uint32_t maskBits) +_pixman_image_get_scanline_generic_64 (pixman_image_t * image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t * mask, + uint32_t mask_bits) { uint32_t *mask8 = NULL; - // Contract the mask image, if one exists, so that the 32-bit fetch function - // can use it. - if (mask) { - mask8 = pixman_malloc_ab(width, sizeof(uint32_t)); + /* Contract the mask image, if one exists, so that the 32-bit fetch + * function can use it. + */ + if (mask) + { + mask8 = pixman_malloc_ab (width, sizeof(uint32_t)); if (!mask8) return; - - pixman_contract(mask8, mask, width); + + pixman_contract (mask8, (uint64_t *)mask, width); } - // Fetch the source image into the first half of buffer. - _pixman_image_get_scanline_32 (pict, x, y, width, (uint32_t*)buffer, mask8, - maskBits); + /* Fetch the source image into the first half of buffer. */ + _pixman_image_get_scanline_32 (image, x, y, width, (uint32_t*)buffer, mask8, + mask_bits); - // Expand from 32bpp to 64bpp in place. - pixman_expand(buffer, (uint32_t*)buffer, PIXMAN_a8r8g8b8, width); + /* Expand from 32bpp to 64bpp in place. */ + pixman_expand ((uint64_t *)buffer, buffer, PIXMAN_a8r8g8b8, width); - free(mask8); + free (mask8); } pixman_image_t * @@ -97,10 +103,10 @@ _pixman_image_allocate (void) { image_common_t *common = &image->common; - pixman_region32_init (&common->full_region); pixman_region32_init (&common->clip_region); - common->src_clip = &common->full_region; - common->has_client_clip = FALSE; + + common->have_clip_region = FALSE; + common->clip_sources = FALSE; common->transform = NULL; common->repeat = PIXMAN_REPEAT_NONE; common->filter = PIXMAN_FILTER_NEAREST; @@ -109,20 +115,23 @@ _pixman_image_allocate (void) common->alpha_map = NULL; common->component_alpha = FALSE; common->ref_count = 1; - common->read_func = NULL; - common->write_func = NULL; common->classify = NULL; + common->client_clip = FALSE; + common->destroy_func = NULL; + common->destroy_data = NULL; + common->need_workaround = FALSE; + common->dirty = TRUE; } return image; } -source_pict_class_t +source_image_class_t _pixman_image_classify (pixman_image_t *image, - int x, - int y, - int width, - int height) + int x, + int y, + int width, + int height) { if (image->common.classify) return image->common.classify (image, x, y, width, height); @@ -131,44 +140,36 @@ _pixman_image_classify (pixman_image_t *image, } void -_pixman_image_get_scanline_32 (pixman_image_t *image, int x, int y, int width, - uint32_t *buffer, uint32_t *mask, uint32_t mask_bits) +_pixman_image_get_scanline_32 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { image->common.get_scanline_32 (image, x, y, width, buffer, mask, mask_bits); } -void -_pixman_image_get_scanline_64 (pixman_image_t *image, int x, int y, int width, - uint32_t *buffer, uint32_t *unused, uint32_t unused2) -{ - image->common.get_scanline_64 (image, x, y, width, buffer, unused, unused2); -} - /* Even thought the type of buffer is uint32_t *, the function actually expects * a uint64_t *buffer. */ - -scanFetchProc -_pixman_image_get_fetcher (pixman_image_t *image, - int wide) +void +_pixman_image_get_scanline_64 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *unused, + uint32_t unused2) { - assert (image->common.get_scanline_64); - assert (image->common.get_scanline_32); - - if (wide) - return image->common.get_scanline_64; - else - return image->common.get_scanline_32; + image->common.get_scanline_64 (image, x, y, width, buffer, unused, unused2); } -#define WRITE_ACCESS(f) ((image->common.write_func)? f##_accessors : f) - static void image_property_changed (pixman_image_t *image) { - - - image->common.property_changed (image); + image->common.dirty = TRUE; } /* Ref Counting */ @@ -190,8 +191,10 @@ pixman_image_unref (pixman_image_t *image) if (common->ref_count == 0) { + if (image->common.destroy_func) + image->common.destroy_func (image, image->common.destroy_data); + pixman_region32_fini (&common->clip_region); - pixman_region32_fini (&common->full_region); if (common->transform) free (common->transform); @@ -202,21 +205,14 @@ pixman_image_unref (pixman_image_t *image) if (common->alpha_map) pixman_image_unref ((pixman_image_t *)common->alpha_map); -#if 0 - if (image->type == BITS && image->bits.indexed) - free (image->bits.indexed); -#endif - -#if 0 - memset (image, 0xaa, sizeof (pixman_image_t)); -#endif - if (image->type == LINEAR || image->type == RADIAL || image->type == CONICAL) + if (image->type == LINEAR || + image->type == RADIAL || + image->type == CONICAL) { if (image->gradient.stops) free (image->gradient.stops); } - if (image->type == BITS && image->bits.free_me) free (image->bits.free_me); @@ -228,34 +224,45 @@ pixman_image_unref (pixman_image_t *image) return FALSE; } -/* Constructors */ +PIXMAN_EXPORT void +pixman_image_set_destroy_function (pixman_image_t * image, + pixman_image_destroy_func_t func, + void * data) +{ + image->common.destroy_func = func; + image->common.destroy_data = data; +} void _pixman_image_reset_clip_region (pixman_image_t *image) { - pixman_region32_fini (&image->common.clip_region); + image->common.have_clip_region = FALSE; +} - if (image->type == BITS) - { - pixman_region32_init_rect (&image->common.clip_region, 0, 0, - image->bits.width, image->bits.height); - } - else +void +_pixman_image_validate (pixman_image_t *image) +{ + if (image->common.dirty) { - pixman_region32_init (&image->common.clip_region); + image->common.property_changed (image); + image->common.dirty = FALSE; } + + if (image->common.alpha_map) + _pixman_image_validate (image->common.alpha_map); } PIXMAN_EXPORT pixman_bool_t -pixman_image_set_clip_region32 (pixman_image_t *image, - pixman_region32_t *region) +pixman_image_set_clip_region32 (pixman_image_t * image, + pixman_region32_t *region) { image_common_t *common = (image_common_t *)image; pixman_bool_t result; if (region) { - result = pixman_region32_copy (&common->clip_region, region); + if ((result = pixman_region32_copy (&common->clip_region, region))) + image->common.have_clip_region = TRUE; } else { @@ -269,17 +276,17 @@ pixman_image_set_clip_region32 (pixman_image_t *image, return result; } - PIXMAN_EXPORT pixman_bool_t -pixman_image_set_clip_region (pixman_image_t *image, - pixman_region16_t *region) +pixman_image_set_clip_region (pixman_image_t * image, + pixman_region16_t *region) { image_common_t *common = (image_common_t *)image; pixman_bool_t result; if (region) { - result = pixman_region32_copy_from_region16 (&common->clip_region, region); + if ((result = pixman_region32_copy_from_region16 (&common->clip_region, region))) + image->common.have_clip_region = TRUE; } else { @@ -293,27 +300,22 @@ pixman_image_set_clip_region (pixman_image_t *image, return result; } -/* Sets whether the clip region includes a clip region set by the client - */ PIXMAN_EXPORT void pixman_image_set_has_client_clip (pixman_image_t *image, - pixman_bool_t client_clip) + pixman_bool_t client_clip) { - image->common.has_client_clip = client_clip; - - image_property_changed (image); + image->common.client_clip = client_clip; } PIXMAN_EXPORT pixman_bool_t -pixman_image_set_transform (pixman_image_t *image, - const pixman_transform_t *transform) +pixman_image_set_transform (pixman_image_t * image, + const pixman_transform_t *transform) { static const pixman_transform_t id = { { { pixman_fixed_1, 0, 0 }, { 0, pixman_fixed_1, 0 }, - { 0, 0, pixman_fixed_1 } - } + { 0, 0, pixman_fixed_1 } } }; image_common_t *common = (image_common_t *)image; @@ -324,9 +326,10 @@ pixman_image_set_transform (pixman_image_t *image, if (memcmp (&id, transform, sizeof (pixman_transform_t)) == 0) { - free(common->transform); + free (common->transform); common->transform = NULL; result = TRUE; + goto out; } @@ -336,20 +339,23 @@ pixman_image_set_transform (pixman_image_t *image, if (common->transform == NULL) { result = FALSE; + goto out; } - memcpy(common->transform, transform, sizeof(pixman_transform_t)); + memcpy (common->transform, transform, sizeof(pixman_transform_t)); + + result = TRUE; out: image_property_changed (image); - - return TRUE; + + return result; } PIXMAN_EXPORT void -pixman_image_set_repeat (pixman_image_t *image, - pixman_repeat_t repeat) +pixman_image_set_repeat (pixman_image_t *image, + pixman_repeat_t repeat) { image->common.repeat = repeat; @@ -357,10 +363,10 @@ pixman_image_set_repeat (pixman_image_t *image, } PIXMAN_EXPORT pixman_bool_t -pixman_image_set_filter (pixman_image_t *image, - pixman_filter_t filter, - const pixman_fixed_t *params, - int n_params) +pixman_image_set_filter (pixman_image_t * image, + pixman_filter_t filter, + const pixman_fixed_t *params, + int n_params) { image_common_t *common = (image_common_t *)image; pixman_fixed_t *new_params; @@ -376,7 +382,7 @@ pixman_image_set_filter (pixman_image_t *image, return FALSE; memcpy (new_params, - params, n_params * sizeof (pixman_fixed_t)); + params, n_params * sizeof (pixman_fixed_t)); } common->filter = filter; @@ -392,15 +398,10 @@ pixman_image_set_filter (pixman_image_t *image, } PIXMAN_EXPORT void -pixman_image_set_source_clipping (pixman_image_t *image, - pixman_bool_t source_clipping) +pixman_image_set_source_clipping (pixman_image_t *image, + pixman_bool_t clip_sources) { - image_common_t *common = &image->common; - - if (source_clipping) - common->src_clip = &common->clip_region; - else - common->src_clip = &common->full_region; + image->common.clip_sources = clip_sources; image_property_changed (image); } @@ -410,8 +411,8 @@ pixman_image_set_source_clipping (pixman_image_t *image, * way, way too expensive. */ PIXMAN_EXPORT void -pixman_image_set_indexed (pixman_image_t *image, - const pixman_indexed_t *indexed) +pixman_image_set_indexed (pixman_image_t * image, + const pixman_indexed_t *indexed) { bits_image_t *bits = (bits_image_t *)image; @@ -422,9 +423,9 @@ pixman_image_set_indexed (pixman_image_t *image, PIXMAN_EXPORT void pixman_image_set_alpha_map (pixman_image_t *image, - pixman_image_t *alpha_map, - int16_t x, - int16_t y) + pixman_image_t *alpha_map, + int16_t x, + int16_t y) { image_common_t *common = (image_common_t *)image; @@ -441,33 +442,35 @@ pixman_image_set_alpha_map (pixman_image_t *image, common->alpha_map = NULL; } - common->alpha_origin.x = x; - common->alpha_origin.y = y; + common->alpha_origin_x = x; + common->alpha_origin_y = y; image_property_changed (image); } PIXMAN_EXPORT void -pixman_image_set_component_alpha (pixman_image_t *image, - pixman_bool_t component_alpha) +pixman_image_set_component_alpha (pixman_image_t *image, + pixman_bool_t component_alpha) { image->common.component_alpha = component_alpha; image_property_changed (image); } - PIXMAN_EXPORT void -pixman_image_set_accessors (pixman_image_t *image, - pixman_read_memory_func_t read_func, - pixman_write_memory_func_t write_func) +pixman_image_set_accessors (pixman_image_t * image, + pixman_read_memory_func_t read_func, + pixman_write_memory_func_t write_func) { return_if_fail (image != NULL); - image->common.read_func = read_func; - image->common.write_func = write_func; + if (image->type == BITS) + { + image->bits.read_func = read_func; + image->bits.write_func = write_func; - image_property_changed (image); + image_property_changed (image); + } } PIXMAN_EXPORT uint32_t * @@ -515,234 +518,92 @@ pixman_image_get_depth (pixman_image_t *image) return 0; } -static uint32_t -color_to_uint32 (const pixman_color_t *color) +pixman_bool_t +_pixman_image_is_solid (pixman_image_t *image) { - return - (color->alpha >> 8 << 24) | - (color->red >> 8 << 16) | - (color->green & 0xff00) | - (color->blue >> 8); -} + if (image->type == SOLID) + return TRUE; -static pixman_bool_t -color_to_pixel (pixman_color_t *color, - uint32_t *pixel, - pixman_format_code_t format) -{ - uint32_t c = color_to_uint32 (color); - - if (!(format == PIXMAN_a8r8g8b8 || - format == PIXMAN_x8r8g8b8 || - format == PIXMAN_a8b8g8r8 || - format == PIXMAN_x8b8g8r8 || - format == PIXMAN_b8g8r8a8 || - format == PIXMAN_b8g8r8x8 || - format == PIXMAN_r5g6b5 || - format == PIXMAN_b5g6r5 || - format == PIXMAN_a8)) + if (image->type != BITS || + image->bits.width != 1 || + image->bits.height != 1) { return FALSE; } - if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_ABGR) - { - c = ((c & 0xff000000) >> 0) | - ((c & 0x00ff0000) >> 16) | - ((c & 0x0000ff00) >> 0) | - ((c & 0x000000ff) << 16); - } - if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_BGRA) - { - c = ((c & 0xff000000) >> 24) | - ((c & 0x00ff0000) >> 8) | - ((c & 0x0000ff00) << 8) | - ((c & 0x000000ff) << 24); - } - - if (format == PIXMAN_a8) - c = c >> 24; - else if (format == PIXMAN_r5g6b5 || - format == PIXMAN_b5g6r5) - c = cvt8888to0565 (c); - -#if 0 - printf ("color: %x %x %x %x\n", color->alpha, color->red, color->green, color->blue); - printf ("pixel: %x\n", c); -#endif + if (image->common.repeat == PIXMAN_REPEAT_NONE) + return FALSE; - *pixel = c; return TRUE; } -PIXMAN_EXPORT pixman_bool_t -pixman_image_fill_rectangles (pixman_op_t op, - pixman_image_t *dest, - pixman_color_t *color, - int n_rects, - const pixman_rectangle16_t *rects) +uint32_t +_pixman_image_get_solid (pixman_image_t * image, + pixman_format_code_t format) { - pixman_image_t *solid; - pixman_color_t c; - int i; - - if (color->alpha == 0xffff) - { - if (op == PIXMAN_OP_OVER) - op = PIXMAN_OP_SRC; - } - - if (op == PIXMAN_OP_CLEAR) - { - c.red = 0; - c.green = 0; - c.blue = 0; - c.alpha = 0; - - color = &c; - - op = PIXMAN_OP_SRC; - } - - if (op == PIXMAN_OP_SRC) - { - uint32_t pixel; - - if (color_to_pixel (color, &pixel, dest->bits.format)) - { - for (i = 0; i < n_rects; ++i) - { - pixman_region32_t fill_region; - int n_boxes, j; - pixman_box32_t *boxes; - - pixman_region32_init_rect (&fill_region, rects[i].x, rects[i].y, rects[i].width, rects[i].height); - if (!pixman_region32_intersect (&fill_region, - &fill_region, - &dest->common.clip_region)) - return FALSE; - - - boxes = pixman_region32_rectangles (&fill_region, &n_boxes); - for (j = 0; j < n_boxes; ++j) - { - const pixman_box32_t *box = &(boxes[j]); - pixman_fill (dest->bits.bits, dest->bits.rowstride, PIXMAN_FORMAT_BPP (dest->bits.format), - box->x1, box->y1, box->x2 - box->x1, box->y2 - box->y1, - pixel); - } - - pixman_region32_fini (&fill_region); - } - return TRUE; - } - } + uint32_t result; - solid = pixman_image_create_solid_fill (color); - if (!solid) - return FALSE; + _pixman_image_get_scanline_32 (image, 0, 0, 1, &result, NULL, 0); - for (i = 0; i < n_rects; ++i) + /* If necessary, convert RGB <--> BGR. */ + if (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB) { - const pixman_rectangle16_t *rect = &(rects[i]); - - pixman_image_composite (op, solid, NULL, dest, - 0, 0, 0, 0, - rect->x, rect->y, - rect->width, rect->height); + result = (((result & 0xff000000) >> 0) | + ((result & 0x00ff0000) >> 16) | + ((result & 0x0000ff00) >> 0) | + ((result & 0x000000ff) << 16)); } - pixman_image_unref (solid); - - return TRUE; + return result; } pixman_bool_t -pixman_image_can_get_solid (pixman_image_t *image) +_pixman_image_is_opaque (pixman_image_t *image) { - if (image->type == SOLID) - return TRUE; - - if (image->type != BITS || - image->bits.width != 1 || - image->bits.height != 1) - { - return FALSE; - } + int i; - if (image->common.repeat != PIXMAN_REPEAT_NORMAL) + if (image->common.alpha_map) return FALSE; - switch (image->bits.format) + switch (image->type) { - case PIXMAN_a8r8g8b8: - case PIXMAN_x8r8g8b8: - case PIXMAN_a8b8g8r8: - case PIXMAN_x8b8g8r8: - case PIXMAN_b8g8r8a8: - case PIXMAN_b8g8r8x8: - case PIXMAN_r8g8b8: - case PIXMAN_b8g8r8: - case PIXMAN_r5g6b5: - case PIXMAN_b5g6r5: - return TRUE; - default: - return FALSE; - } -} + case BITS: + if (image->common.repeat == PIXMAN_REPEAT_NONE) + return FALSE; -pixman_bool_t -pixman_image_is_opaque(pixman_image_t *image) -{ - int i = 0; - int gradientNumberOfColors = 0; + if (PIXMAN_FORMAT_A (image->bits.format)) + return FALSE; + break; - if(image->common.alpha_map) - return FALSE; + case LINEAR: + case RADIAL: + if (image->common.repeat == PIXMAN_REPEAT_NONE) + return FALSE; - switch(image->type) - { - case BITS: - if(PIXMAN_FORMAT_A(image->bits.format)) - return FALSE; - break; + for (i = 0; i < image->gradient.n_stops; ++i) + { + if (image->gradient.stops[i].color.alpha != 0xffff) + return FALSE; + } + break; - case LINEAR: case CONICAL: - case RADIAL: - gradientNumberOfColors = image->gradient.n_stops; - i=0; - while(i<gradientNumberOfColors) - { - if(image->gradient.stops[i].color.alpha != 0xffff) - return FALSE; - i++; - } - break; + /* Conical gradients always have a transparent border */ + return FALSE; + break; case SOLID: - if(Alpha(image->solid.color) != 0xff) - return FALSE; - break; + if (ALPHA_8 (image->solid.color) != 0xff) + return FALSE; + break; } - /* Convolution filters can introduce translucency if the sum of the weights - is lower than 1. */ + /* Convolution filters can introduce translucency if the sum of the + * weights is lower than 1. + */ if (image->common.filter == PIXMAN_FILTER_CONVOLUTION) - return FALSE; - - if (image->common.repeat == PIXMAN_REPEAT_NONE) - { - if (image->common.filter != PIXMAN_FILTER_NEAREST) - return FALSE; - - if (image->common.transform) - return FALSE; - - /* Gradients do not necessarily cover the entire compositing area */ - if (image->type == LINEAR || image->type == CONICAL || image->type == RADIAL) - return FALSE; - } + return FALSE; - return TRUE; + return TRUE; } + diff --git a/lib/pixman/pixman/pixman-implementation.c b/lib/pixman/pixman/pixman-implementation.c index 86c2f3773..bcda9fe85 100644 --- a/lib/pixman/pixman/pixman-implementation.c +++ b/lib/pixman/pixman/pixman-implementation.c @@ -21,142 +21,141 @@ * SOFTWARE. */ +#ifdef HAVE_CONFIG_H #include <config.h> +#endif #include <stdlib.h> #include "pixman-private.h" static void -delegate_composite (pixman_implementation_t * imp, - pixman_op_t op, - pixman_image_t * src, - pixman_image_t * mask, - pixman_image_t * dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +delegate_composite (pixman_implementation_t * imp, + pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { _pixman_implementation_composite (imp->delegate, - op, - src, mask, dest, - src_x, src_y, - mask_x, mask_y, - dest_x, dest_y, - width, height); + op, + src, mask, dest, + src_x, src_y, + mask_x, mask_y, + dest_x, dest_y, + width, height); } static void -delegate_combine_32 (pixman_implementation_t * imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) +delegate_combine_32 (pixman_implementation_t * imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { _pixman_implementation_combine_32 (imp->delegate, - op, dest, src, mask, width); + op, dest, src, mask, width); } static void -delegate_combine_64 (pixman_implementation_t * imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) +delegate_combine_64 (pixman_implementation_t * imp, + pixman_op_t op, + uint64_t * dest, + const uint64_t * src, + const uint64_t * mask, + int width) { _pixman_implementation_combine_64 (imp->delegate, - op, dest, src, mask, width); + op, dest, src, mask, width); } static void -delegate_combine_32_ca (pixman_implementation_t * imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) +delegate_combine_32_ca (pixman_implementation_t * imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { _pixman_implementation_combine_32_ca (imp->delegate, - op, dest, src, mask, width); + op, dest, src, mask, width); } static void -delegate_combine_64_ca (pixman_implementation_t * imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) +delegate_combine_64_ca (pixman_implementation_t * imp, + pixman_op_t op, + uint64_t * dest, + const uint64_t * src, + const uint64_t * mask, + int width) { _pixman_implementation_combine_64_ca (imp->delegate, - op, dest, src, mask, width); + op, dest, src, mask, width); } static pixman_bool_t -delegate_blt (pixman_implementation_t * imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dst_x, - int dst_y, - int width, - int height) +delegate_blt (pixman_implementation_t * imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) { - return _pixman_implementation_blt (imp->delegate, src_bits, dst_bits, src_stride, dst_stride, - src_bpp, dst_bpp, src_x, src_y, dst_x, dst_y, - width, height); + return _pixman_implementation_blt ( + imp->delegate, src_bits, dst_bits, src_stride, dst_stride, + src_bpp, dst_bpp, src_x, src_y, dst_x, dst_y, + width, height); } static pixman_bool_t delegate_fill (pixman_implementation_t *imp, - uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) { - return _pixman_implementation_fill (imp->delegate, bits, stride, bpp, x, y, width, height, xor); + return _pixman_implementation_fill ( + imp->delegate, bits, stride, bpp, x, y, width, height, xor); } pixman_implementation_t * -_pixman_implementation_create (pixman_implementation_t *toplevel, - pixman_implementation_t *delegate) +_pixman_implementation_create (pixman_implementation_t *delegate) { pixman_implementation_t *imp = malloc (sizeof (pixman_implementation_t)); + pixman_implementation_t *d; int i; - + if (!imp) return NULL; - - if (toplevel) - imp->toplevel = toplevel; - else - imp->toplevel = imp; - - if (delegate) - delegate->toplevel = imp->toplevel; - + + /* Make sure the whole delegate chain has the right toplevel */ imp->delegate = delegate; - + for (d = imp; d != NULL; d = d->delegate) + d->toplevel = imp; + /* Fill out function pointers with ones that just delegate */ imp->composite = delegate_composite; imp->blt = delegate_blt; imp->fill = delegate_fill; - + for (i = 0; i < PIXMAN_OP_LAST; ++i) { imp->combine_32[i] = delegate_combine_32; @@ -164,105 +163,106 @@ _pixman_implementation_create (pixman_implementation_t *toplevel, imp->combine_32_ca[i] = delegate_combine_32_ca; imp->combine_64_ca[i] = delegate_combine_64_ca; } - + return imp; } void -_pixman_implementation_combine_32 (pixman_implementation_t * imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) +_pixman_implementation_combine_32 (pixman_implementation_t * imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { - (* imp->combine_32[op]) (imp, op, dest, src, mask, width); + (*imp->combine_32[op]) (imp, op, dest, src, mask, width); } void -_pixman_implementation_combine_64 (pixman_implementation_t * imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) +_pixman_implementation_combine_64 (pixman_implementation_t * imp, + pixman_op_t op, + uint64_t * dest, + const uint64_t * src, + const uint64_t * mask, + int width) { - (* imp->combine_64[op]) (imp, op, dest, src, mask, width); + (*imp->combine_64[op]) (imp, op, dest, src, mask, width); } void -_pixman_implementation_combine_32_ca (pixman_implementation_t * imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) +_pixman_implementation_combine_32_ca (pixman_implementation_t * imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { - (* imp->combine_32_ca[op]) (imp, op, dest, src, mask, width); + (*imp->combine_32_ca[op]) (imp, op, dest, src, mask, width); } void -_pixman_implementation_combine_64_ca (pixman_implementation_t * imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) +_pixman_implementation_combine_64_ca (pixman_implementation_t * imp, + pixman_op_t op, + uint64_t * dest, + const uint64_t * src, + const uint64_t * mask, + int width) { - (* imp->combine_64_ca[op]) (imp, op, dest, src, mask, width); + (*imp->combine_64_ca[op]) (imp, op, dest, src, mask, width); } void -_pixman_implementation_composite (pixman_implementation_t * imp, - pixman_op_t op, - pixman_image_t * src, - pixman_image_t * mask, - pixman_image_t * dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +_pixman_implementation_composite (pixman_implementation_t * imp, + pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { - (* imp->composite) (imp, op, - src, mask, dest, - src_x, src_y, mask_x, mask_y, dest_x, dest_y, - width, height); + (*imp->composite) (imp, op, + src, mask, dest, + src_x, src_y, mask_x, mask_y, dest_x, dest_y, + width, height); } pixman_bool_t -_pixman_implementation_blt (pixman_implementation_t * imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dst_x, - int dst_y, - int width, - int height) +_pixman_implementation_blt (pixman_implementation_t * imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) { - return (* imp->blt) (imp, src_bits, dst_bits, src_stride, dst_stride, - src_bpp, dst_bpp, src_x, src_y, dst_x, dst_y, - width, height); + return (*imp->blt) (imp, src_bits, dst_bits, src_stride, dst_stride, + src_bpp, dst_bpp, src_x, src_y, dst_x, dst_y, + width, height); } pixman_bool_t _pixman_implementation_fill (pixman_implementation_t *imp, - uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) { - return (* imp->fill) (imp, bits, stride, bpp, x, y, width, height, xor); + return (*imp->fill) (imp, bits, stride, bpp, x, y, width, height, xor); } + diff --git a/lib/pixman/pixman/pixman-linear-gradient.c b/lib/pixman/pixman/pixman-linear-gradient.c index ea2975036..d9409fe50 100644 --- a/lib/pixman/pixman/pixman-linear-gradient.c +++ b/lib/pixman/pixman/pixman-linear-gradient.c @@ -24,29 +24,33 @@ * SOFTWARE. */ +#ifdef HAVE_CONFIG_H #include <config.h> +#endif #include <stdlib.h> #include "pixman-private.h" -static source_pict_class_t +static source_image_class_t linear_gradient_classify (pixman_image_t *image, - int x, - int y, - int width, - int height) + int x, + int y, + int width, + int height) { linear_gradient_t *linear = (linear_gradient_t *)image; - pixman_vector_t v; + pixman_vector_t v; pixman_fixed_32_32_t l; pixman_fixed_48_16_t dx, dy, a, b, off; pixman_fixed_48_16_t factors[4]; - int i; - + int i; + image->source.class = SOURCE_IMAGE_CLASS_UNKNOWN; - + dx = linear->p2.x - linear->p1.x; dy = linear->p2.y - linear->p1.y; + l = dx * dx + dy * dy; + if (l) { a = (dx << 32) / l; @@ -56,40 +60,45 @@ linear_gradient_classify (pixman_image_t *image, { a = b = 0; } - + off = (-a * linear->p1.x - -b * linear->p1.y) >> 16; - + -b * linear->p1.y) >> 16; + for (i = 0; i < 3; i++) { v.vector[0] = pixman_int_to_fixed ((i % 2) * (width - 1) + x); v.vector[1] = pixman_int_to_fixed ((i / 2) * (height - 1) + y); v.vector[2] = pixman_fixed_1; - + if (image->common.transform) { if (!pixman_transform_point_3d (image->common.transform, &v)) { image->source.class = SOURCE_IMAGE_CLASS_UNKNOWN; - + return image->source.class; } } - + factors[i] = ((a * v.vector[0] + b * v.vector[1]) >> 16) + off; } - + if (factors[2] == factors[0]) image->source.class = SOURCE_IMAGE_CLASS_HORIZONTAL; else if (factors[1] == factors[0]) image->source.class = SOURCE_IMAGE_CLASS_VERTICAL; - + return image->source.class; } static void -linear_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width, - uint32_t *buffer, uint32_t *mask, uint32_t maskBits) +linear_gradient_get_scanline_32 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { pixman_vector_t v, unit; pixman_fixed_32_32_t l; @@ -97,83 +106,102 @@ linear_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width, gradient_t *gradient = (gradient_t *)image; source_image_t *source = (source_image_t *)image; linear_gradient_t *linear = (linear_gradient_t *)image; - uint32_t *end = buffer + width; - GradientWalker walker; - + uint32_t *end = buffer + width; + pixman_gradient_walker_t walker; + _pixman_gradient_walker_init (&walker, gradient, source->common.repeat); - + /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2; - v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2; + v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2; v.vector[2] = pixman_fixed_1; - if (source->common.transform) { + + if (source->common.transform) + { if (!pixman_transform_point_3d (source->common.transform, &v)) return; + unit.vector[0] = source->common.transform->matrix[0][0]; unit.vector[1] = source->common.transform->matrix[1][0]; unit.vector[2] = source->common.transform->matrix[2][0]; - } else { + } + else + { unit.vector[0] = pixman_fixed_1; unit.vector[1] = 0; unit.vector[2] = 0; } - + dx = linear->p2.x - linear->p1.x; dy = linear->p2.y - linear->p1.y; - l = dx*dx + dy*dy; - if (l != 0) { + + l = dx * dx + dy * dy; + + if (l != 0) + { a = (dx << 32) / l; b = (dy << 32) / l; - off = (-a*linear->p1.x - b*linear->p1.y)>>16; + off = (-a * linear->p1.x + -b * linear->p1.y) >> 16; } - if (l == 0 || (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)) { + + if (l == 0 || (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)) + { pixman_fixed_48_16_t inc, t; + /* affine transformation only */ - if (l == 0) { + if (l == 0) + { t = 0; inc = 0; - } else { - t = ((a*v.vector[0] + b*v.vector[1]) >> 16) + off; + } + else + { + t = ((a * v.vector[0] + b * v.vector[1]) >> 16) + off; inc = (a * unit.vector[0] + b * unit.vector[1]) >> 16; } - + if (source->class == SOURCE_IMAGE_CLASS_VERTICAL) { register uint32_t color; - - color = _pixman_gradient_walker_pixel( &walker, t ); + + color = _pixman_gradient_walker_pixel (&walker, t); while (buffer < end) - *(buffer++) = color; + *buffer++ = color; } else { - if (!mask) { + if (!mask) + { while (buffer < end) { - *(buffer) = _pixman_gradient_walker_pixel (&walker, t); - buffer += 1; - t += inc; + *buffer++ = _pixman_gradient_walker_pixel (&walker, t); + + t += inc; } - } else { - while (buffer < end) { - if (*mask++ & maskBits) - { - *(buffer) = _pixman_gradient_walker_pixel (&walker, t); - } - buffer += 1; - t += inc; + } + else + { + while (buffer < end) + { + if (*mask++ & mask_bits) + *buffer = _pixman_gradient_walker_pixel (&walker, t); + + buffer++; + t += inc; } } } } - else /* projective transformation */ + else { + /* projective transformation */ pixman_fixed_48_16_t t; - + if (source->class == SOURCE_IMAGE_CLASS_VERTICAL) { register uint32_t color; - + if (v.vector[2] == 0) { t = 0; @@ -181,33 +209,39 @@ linear_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width, else { pixman_fixed_48_16_t x, y; - + x = ((pixman_fixed_48_16_t) v.vector[0] << 16) / v.vector[2]; y = ((pixman_fixed_48_16_t) v.vector[1] << 16) / v.vector[2]; t = ((a * x + b * y) >> 16) + off; } - - color = _pixman_gradient_walker_pixel( &walker, t ); + + color = _pixman_gradient_walker_pixel (&walker, t); while (buffer < end) - *(buffer++) = color; + *buffer++ = color; } else { while (buffer < end) { - if (!mask || *mask++ & maskBits) + if (!mask || *mask++ & mask_bits) { - if (v.vector[2] == 0) { + if (v.vector[2] == 0) + { t = 0; - } else { + } + else + { pixman_fixed_48_16_t x, y; x = ((pixman_fixed_48_16_t)v.vector[0] << 16) / v.vector[2]; y = ((pixman_fixed_48_16_t)v.vector[1] << 16) / v.vector[2]; - t = ((a*x + b*y) >> 16) + off; + t = ((a * x + b * y) >> 16) + off; } - *(buffer) = _pixman_gradient_walker_pixel (&walker, t); + + *buffer = _pixman_gradient_walker_pixel (&walker, t); } + ++buffer; + v.vector[0] += unit.vector[0]; v.vector[1] += unit.vector[1]; v.vector[2] += unit.vector[2]; @@ -219,43 +253,42 @@ linear_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width, static void linear_gradient_property_changed (pixman_image_t *image) { - image->common.get_scanline_32 = (scanFetchProc)linear_gradient_get_scanline_32; - image->common.get_scanline_64 = (scanFetchProc)_pixman_image_get_scanline_64_generic; + image->common.get_scanline_32 = linear_gradient_get_scanline_32; + image->common.get_scanline_64 = _pixman_image_get_scanline_generic_64; } PIXMAN_EXPORT pixman_image_t * -pixman_image_create_linear_gradient (pixman_point_fixed_t *p1, - pixman_point_fixed_t *p2, - const pixman_gradient_stop_t *stops, - int n_stops) +pixman_image_create_linear_gradient (pixman_point_fixed_t * p1, + pixman_point_fixed_t * p2, + const pixman_gradient_stop_t *stops, + int n_stops) { pixman_image_t *image; linear_gradient_t *linear; - + return_val_if_fail (n_stops >= 2, NULL); - - image = _pixman_image_allocate(); - + + image = _pixman_image_allocate (); + if (!image) return NULL; - + linear = &image->linear; - + if (!_pixman_init_gradient (&linear->common, stops, n_stops)) { free (image); return NULL; } - + linear->p1 = *p1; linear->p2 = *p2; - + image->type = LINEAR; image->source.class = SOURCE_IMAGE_CLASS_UNKNOWN; image->common.classify = linear_gradient_classify; image->common.property_changed = linear_gradient_property_changed; - - linear_gradient_property_changed (image); - + return image; } + diff --git a/lib/pixman/pixman/pixman-matrix.c b/lib/pixman/pixman/pixman-matrix.c index 79dae8de1..abdfa0525 100644 --- a/lib/pixman/pixman/pixman-matrix.c +++ b/lib/pixman/pixman/pixman-matrix.c @@ -32,595 +32,737 @@ #include <string.h> #include "pixman-private.h" -#define F(x) pixman_int_to_fixed(x) +#define F(x) pixman_int_to_fixed (x) PIXMAN_EXPORT void -pixman_transform_init_identity(struct pixman_transform *matrix) +pixman_transform_init_identity (struct pixman_transform *matrix) { - int i; + int i; - memset(matrix, '\0', sizeof (struct pixman_transform)); - for (i = 0; i < 3; i++) - matrix->matrix[i][i] = F(1); + memset (matrix, '\0', sizeof (struct pixman_transform)); + for (i = 0; i < 3; i++) + matrix->matrix[i][i] = F (1); } -typedef pixman_fixed_32_32_t pixman_fixed_34_30_t; +typedef pixman_fixed_32_32_t pixman_fixed_34_30_t; PIXMAN_EXPORT pixman_bool_t -pixman_transform_point_3d(const struct pixman_transform *transform, - struct pixman_vector *vector) +pixman_transform_point_3d (const struct pixman_transform *transform, + struct pixman_vector * vector) { - struct pixman_vector result; - pixman_fixed_32_32_t partial; - pixman_fixed_48_16_t v; - int i, j; + struct pixman_vector result; + pixman_fixed_32_32_t partial; + pixman_fixed_48_16_t v; + int i, j; - for (j = 0; j < 3; j++) + for (j = 0; j < 3; j++) + { + v = 0; + for (i = 0; i < 3; i++) { - v = 0; - for (i = 0; i < 3; i++) - { - partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] * - (pixman_fixed_48_16_t) vector->vector[i]); - v += partial >> 16; - } - if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) - return FALSE; - result.vector[j] = (pixman_fixed_t) v; + partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] * + (pixman_fixed_48_16_t) vector->vector[i]); + v += partial >> 16; } - *vector = result; - if (!result.vector[2]) - return FALSE; - return TRUE; + + if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) + return FALSE; + + result.vector[j] = (pixman_fixed_t) v; + } + + *vector = result; + + if (!result.vector[2]) + return FALSE; + + return TRUE; } PIXMAN_EXPORT pixman_bool_t -pixman_transform_point(const struct pixman_transform *transform, - struct pixman_vector *vector) +pixman_transform_point (const struct pixman_transform *transform, + struct pixman_vector * vector) { - pixman_fixed_32_32_t partial; - pixman_fixed_34_30_t v[3]; - pixman_fixed_48_16_t quo; - int i, j; + pixman_fixed_32_32_t partial; + pixman_fixed_34_30_t v[3]; + pixman_fixed_48_16_t quo; + int i, j; - for (j = 0; j < 3; j++) - { - v[j] = 0; - for (i = 0; i < 3; i++) - { - partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] * - (pixman_fixed_32_32_t) vector->vector[i]); - v[j] += partial >> 2; - } - } - if (!(v[2] >> 16)) - return FALSE; - for (j = 0; j < 2; j++) + for (j = 0; j < 3; j++) + { + v[j] = 0; + + for (i = 0; i < 3; i++) { - quo = v[j] / (v[2] >> 16); - if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16) - return FALSE; - vector->vector[j] = (pixman_fixed_t) quo; + partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] * + (pixman_fixed_32_32_t) vector->vector[i]); + v[j] += partial >> 2; } - vector->vector[2] = pixman_fixed_1; - return TRUE; + } + + if (!(v[2] >> 16)) + return FALSE; + + for (j = 0; j < 2; j++) + { + quo = v[j] / (v[2] >> 16); + if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16) + return FALSE; + vector->vector[j] = (pixman_fixed_t) quo; + } + + vector->vector[2] = pixman_fixed_1; + return TRUE; } PIXMAN_EXPORT pixman_bool_t -pixman_transform_multiply (struct pixman_transform *dst, - const struct pixman_transform *l, - const struct pixman_transform *r) -{ - struct pixman_transform d; - int dx, dy; - int o; - - for (dy = 0; dy < 3; dy++) - for (dx = 0; dx < 3; dx++) { - pixman_fixed_48_16_t v; - pixman_fixed_32_32_t partial; - v = 0; - for (o = 0; o < 3; o++) { - partial = (pixman_fixed_32_32_t) l->matrix[dy][o] * (pixman_fixed_32_32_t) r->matrix[o][dx]; - v += partial >> 16; - } - if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) - return FALSE; - d.matrix[dy][dx] = (pixman_fixed_t) v; - } - *dst = d; - return TRUE; +pixman_transform_multiply (struct pixman_transform * dst, + const struct pixman_transform *l, + const struct pixman_transform *r) +{ + struct pixman_transform d; + int dx, dy; + int o; + + for (dy = 0; dy < 3; dy++) + { + for (dx = 0; dx < 3; dx++) + { + pixman_fixed_48_16_t v; + pixman_fixed_32_32_t partial; + + v = 0; + for (o = 0; o < 3; o++) + { + partial = + (pixman_fixed_32_32_t) l->matrix[dy][o] * + (pixman_fixed_32_32_t) r->matrix[o][dx]; + + v += partial >> 16; + } + + if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) + return FALSE; + + d.matrix[dy][dx] = (pixman_fixed_t) v; + } + } + + *dst = d; + return TRUE; } PIXMAN_EXPORT void pixman_transform_init_scale (struct pixman_transform *t, - pixman_fixed_t sx, - pixman_fixed_t sy) + pixman_fixed_t sx, + pixman_fixed_t sy) { - memset (t, '\0', sizeof (struct pixman_transform)); - t->matrix[0][0] = sx; - t->matrix[1][1] = sy; - t->matrix[2][2] = F (1); + memset (t, '\0', sizeof (struct pixman_transform)); + + t->matrix[0][0] = sx; + t->matrix[1][1] = sy; + t->matrix[2][2] = F (1); } static pixman_fixed_t -fixed_inverse(pixman_fixed_t x) +fixed_inverse (pixman_fixed_t x) { - return (pixman_fixed_t) ((((pixman_fixed_48_16_t) F(1)) * F(1)) / x); + return (pixman_fixed_t) ((((pixman_fixed_48_16_t) F (1)) * F (1)) / x); } PIXMAN_EXPORT pixman_bool_t -pixman_transform_scale(struct pixman_transform *forward, - struct pixman_transform *reverse, - pixman_fixed_t sx, pixman_fixed_t sy) -{ - struct pixman_transform t; - - if (sx == 0 || sy == 0) - return FALSE; - - if (forward) { - pixman_transform_init_scale (&t, sx, sy); - if (!pixman_transform_multiply (forward, &t, forward)) - return FALSE; - } - if (reverse) { - pixman_transform_init_scale (&t, fixed_inverse (sx), - fixed_inverse (sy)); - if (!pixman_transform_multiply (reverse, reverse, &t)) - return FALSE; - } - return TRUE; +pixman_transform_scale (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t sx, + pixman_fixed_t sy) +{ + struct pixman_transform t; + + if (sx == 0 || sy == 0) + return FALSE; + + if (forward) + { + pixman_transform_init_scale (&t, sx, sy); + if (!pixman_transform_multiply (forward, &t, forward)) + return FALSE; + } + + if (reverse) + { + pixman_transform_init_scale (&t, fixed_inverse (sx), + fixed_inverse (sy)); + if (!pixman_transform_multiply (reverse, reverse, &t)) + return FALSE; + } + + return TRUE; } PIXMAN_EXPORT void -pixman_transform_init_rotate(struct pixman_transform *t, - pixman_fixed_t c, - pixman_fixed_t s) +pixman_transform_init_rotate (struct pixman_transform *t, + pixman_fixed_t c, + pixman_fixed_t s) { - memset(t, '\0', sizeof (struct pixman_transform)); - t->matrix[0][0] = c; - t->matrix[0][1] = -s; - t->matrix[1][0] = s; - t->matrix[1][1] = c; - t->matrix[2][2] = F (1); + memset (t, '\0', sizeof (struct pixman_transform)); + + t->matrix[0][0] = c; + t->matrix[0][1] = -s; + t->matrix[1][0] = s; + t->matrix[1][1] = c; + t->matrix[2][2] = F (1); } PIXMAN_EXPORT pixman_bool_t -pixman_transform_rotate(struct pixman_transform *forward, - struct pixman_transform *reverse, - pixman_fixed_t c, pixman_fixed_t s) -{ - struct pixman_transform t; - - if (forward) { - pixman_transform_init_rotate(&t, c, s); - if (!pixman_transform_multiply(forward, &t, forward)) - return FALSE; - } - - if (reverse) { - pixman_transform_init_rotate(&t, c, -s); - if (!pixman_transform_multiply (reverse, reverse, &t)) - return FALSE; - } - return TRUE; +pixman_transform_rotate (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t c, + pixman_fixed_t s) +{ + struct pixman_transform t; + + if (forward) + { + pixman_transform_init_rotate (&t, c, s); + if (!pixman_transform_multiply (forward, &t, forward)) + return FALSE; + } + + if (reverse) + { + pixman_transform_init_rotate (&t, c, -s); + if (!pixman_transform_multiply (reverse, reverse, &t)) + return FALSE; + } + + return TRUE; } PIXMAN_EXPORT void -pixman_transform_init_translate(struct pixman_transform *t, - pixman_fixed_t tx, pixman_fixed_t ty) +pixman_transform_init_translate (struct pixman_transform *t, + pixman_fixed_t tx, + pixman_fixed_t ty) { - memset(t, '\0', sizeof (struct pixman_transform)); - t->matrix[0][0] = F (1); - t->matrix[0][2] = tx; - t->matrix[1][1] = F (1); - t->matrix[1][2] = ty; - t->matrix[2][2] = F (1); + memset (t, '\0', sizeof (struct pixman_transform)); + + t->matrix[0][0] = F (1); + t->matrix[0][2] = tx; + t->matrix[1][1] = F (1); + t->matrix[1][2] = ty; + t->matrix[2][2] = F (1); } PIXMAN_EXPORT pixman_bool_t -pixman_transform_translate(struct pixman_transform *forward, - struct pixman_transform *reverse, - pixman_fixed_t tx, pixman_fixed_t ty) +pixman_transform_translate (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t tx, + pixman_fixed_t ty) { - struct pixman_transform t; + struct pixman_transform t; - if (forward) { - pixman_transform_init_translate(&t, tx, ty); - if (!pixman_transform_multiply(forward, &t, forward)) - return FALSE; - } + if (forward) + { + pixman_transform_init_translate (&t, tx, ty); - if (reverse) { - pixman_transform_init_translate(&t, -tx, -ty); - if (!pixman_transform_multiply(reverse, reverse, &t)) - return FALSE; - } - return TRUE; + if (!pixman_transform_multiply (forward, &t, forward)) + return FALSE; + } + + if (reverse) + { + pixman_transform_init_translate (&t, -tx, -ty); + + if (!pixman_transform_multiply (reverse, reverse, &t)) + return FALSE; + } + return TRUE; } PIXMAN_EXPORT pixman_bool_t -pixman_transform_bounds(const struct pixman_transform *matrix, - struct pixman_box16 *b) - -{ - struct pixman_vector v[4]; - int i; - int x1, y1, x2, y2; - - v[0].vector[0] = F (b->x1); v[0].vector[1] = F (b->y1); v[0].vector[2] = F(1); - v[1].vector[0] = F (b->x2); v[1].vector[1] = F (b->y1); v[1].vector[2] = F(1); - v[2].vector[0] = F (b->x2); v[2].vector[1] = F (b->y2); v[2].vector[2] = F(1); - v[3].vector[0] = F (b->x1); v[3].vector[1] = F (b->y2); v[3].vector[2] = F(1); - for (i = 0; i < 4; i++) +pixman_transform_bounds (const struct pixman_transform *matrix, + struct pixman_box16 * b) + +{ + struct pixman_vector v[4]; + int i; + int x1, y1, x2, y2; + + v[0].vector[0] = F (b->x1); + v[0].vector[1] = F (b->y1); + v[0].vector[2] = F (1); + + v[1].vector[0] = F (b->x2); + v[1].vector[1] = F (b->y1); + v[1].vector[2] = F (1); + + v[2].vector[0] = F (b->x2); + v[2].vector[1] = F (b->y2); + v[2].vector[2] = F (1); + + v[3].vector[0] = F (b->x1); + v[3].vector[1] = F (b->y2); + v[3].vector[2] = F (1); + + for (i = 0; i < 4; i++) + { + if (!pixman_transform_point (matrix, &v[i])) + return FALSE; + + x1 = pixman_fixed_to_int (v[i].vector[0]); + y1 = pixman_fixed_to_int (v[i].vector[1]); + x2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[0])); + y2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[1])); + + if (i == 0) + { + b->x1 = x1; + b->y1 = y1; + b->x2 = x2; + b->y2 = y2; + } + else { - if (!pixman_transform_point(matrix, &v[i])) - return FALSE; - x1 = pixman_fixed_to_int(v[i].vector[0]); - y1 = pixman_fixed_to_int(v[i].vector[1]); - x2 = pixman_fixed_to_int(pixman_fixed_ceil (v[i].vector[0])); - y2 = pixman_fixed_to_int(pixman_fixed_ceil (v[i].vector[1])); - if (i == 0) - { - b->x1 = x1; b->y1 = y1; - b->x2 = x2; b->y2 = y2; - } - else - { - if (x1 < b->x1) b->x1 = x1; - if (y1 < b->y1) b->y1 = y1; - if (x2 > b->x2) b->x2 = x2; - if (y2 > b->y2) b->y2 = y2; - } + if (x1 < b->x1) b->x1 = x1; + if (y1 < b->y1) b->y1 = y1; + if (x2 > b->x2) b->x2 = x2; + if (y2 > b->y2) b->y2 = y2; } - return TRUE; + } + + return TRUE; } PIXMAN_EXPORT pixman_bool_t -pixman_transform_invert (struct pixman_transform *dst, - const struct pixman_transform *src) +pixman_transform_invert (struct pixman_transform * dst, + const struct pixman_transform *src) { - struct pixman_f_transform m, r; + struct pixman_f_transform m, r; - pixman_f_transform_from_pixman_transform (&m, src); - if (!pixman_f_transform_invert (&r, &m)) - return FALSE; - if (!pixman_transform_from_pixman_f_transform (dst, &r)) - return FALSE; - return TRUE; + pixman_f_transform_from_pixman_transform (&m, src); + + if (!pixman_f_transform_invert (&r, &m)) + return FALSE; + + if (!pixman_transform_from_pixman_f_transform (dst, &r)) + return FALSE; + + return TRUE; } static pixman_bool_t -within_epsilon(pixman_fixed_t a, pixman_fixed_t b, pixman_fixed_t epsilon) +within_epsilon (pixman_fixed_t a, + pixman_fixed_t b, + pixman_fixed_t epsilon) { - pixman_fixed_t t = a - b; - if (t < 0) t = -t; - return t <= epsilon; + pixman_fixed_t t = a - b; + + if (t < 0) + t = -t; + + return t <= epsilon; } -#define epsilon (pixman_fixed_t) (2) +#define EPSILON (pixman_fixed_t) (2) -#define is_same(a,b) (within_epsilon(a, b, epsilon)) -#define is_zero(a) (within_epsilon(a, 0, epsilon)) -#define is_one(a) (within_epsilon(a, F(1), epsilon)) -#define is_unit(a) (within_epsilon(a, F( 1), epsilon) || \ - within_epsilon(a, F(-1), epsilon) || \ - is_zero(a)) -#define is_int(a) (is_zero(pixman_fixed_frac(a))) +#define IS_SAME(a, b) (within_epsilon (a, b, EPSILON)) +#define IS_ZERO(a) (within_epsilon (a, 0, EPSILON)) +#define IS_ONE(a) (within_epsilon (a, F (1), EPSILON)) +#define IS_UNIT(a) \ + (within_epsilon (a, F (1), EPSILON) || \ + within_epsilon (a, F (-1), EPSILON) || \ + IS_ZERO (a)) +#define IS_INT(a) (IS_ZERO (pixman_fixed_frac (a))) PIXMAN_EXPORT pixman_bool_t -pixman_transform_is_identity(const struct pixman_transform *t) +pixman_transform_is_identity (const struct pixman_transform *t) { - return ( is_same(t->matrix[0][0], t->matrix[1][1]) && - is_same(t->matrix[0][0], t->matrix[2][2]) && - !is_zero(t->matrix[0][0]) && - is_zero(t->matrix[0][1]) && - is_zero(t->matrix[0][2]) && - is_zero(t->matrix[1][0]) && - is_zero(t->matrix[1][2]) && - is_zero(t->matrix[2][0]) && - is_zero(t->matrix[2][1])); + return (IS_SAME (t->matrix[0][0], t->matrix[1][1]) && + IS_SAME (t->matrix[0][0], t->matrix[2][2]) && + !IS_ZERO (t->matrix[0][0]) && + IS_ZERO (t->matrix[0][1]) && + IS_ZERO (t->matrix[0][2]) && + IS_ZERO (t->matrix[1][0]) && + IS_ZERO (t->matrix[1][2]) && + IS_ZERO (t->matrix[2][0]) && + IS_ZERO (t->matrix[2][1])); } PIXMAN_EXPORT pixman_bool_t -pixman_transform_is_scale(const struct pixman_transform *t) +pixman_transform_is_scale (const struct pixman_transform *t) { - return (!is_zero(t->matrix[0][0]) && - is_zero(t->matrix[0][1]) && - is_zero(t->matrix[0][2]) && + return (!IS_ZERO (t->matrix[0][0]) && + IS_ZERO (t->matrix[0][1]) && + IS_ZERO (t->matrix[0][2]) && - is_zero(t->matrix[1][0]) && - !is_zero(t->matrix[1][1]) && - is_zero(t->matrix[1][2]) && + IS_ZERO (t->matrix[1][0]) && + !IS_ZERO (t->matrix[1][1]) && + IS_ZERO (t->matrix[1][2]) && - is_zero(t->matrix[2][0]) && - is_zero(t->matrix[2][1]) && - !is_zero(t->matrix[2][2])); + IS_ZERO (t->matrix[2][0]) && + IS_ZERO (t->matrix[2][1]) && + !IS_ZERO (t->matrix[2][2])); } PIXMAN_EXPORT pixman_bool_t -pixman_transform_is_int_translate(const struct pixman_transform *t) +pixman_transform_is_int_translate (const struct pixman_transform *t) { - return (is_one (t->matrix[0][0]) && - is_zero(t->matrix[0][1]) && - is_int (t->matrix[0][2]) && + return (IS_ONE (t->matrix[0][0]) && + IS_ZERO (t->matrix[0][1]) && + IS_INT (t->matrix[0][2]) && - is_zero(t->matrix[1][0]) && - is_one (t->matrix[1][1]) && - is_int (t->matrix[1][2]) && + IS_ZERO (t->matrix[1][0]) && + IS_ONE (t->matrix[1][1]) && + IS_INT (t->matrix[1][2]) && - is_zero(t->matrix[2][0]) && - is_zero(t->matrix[2][1]) && - is_one (t->matrix[2][2])); + IS_ZERO (t->matrix[2][0]) && + IS_ZERO (t->matrix[2][1]) && + IS_ONE (t->matrix[2][2])); } PIXMAN_EXPORT pixman_bool_t -pixman_transform_is_inverse(const struct pixman_transform *a, - const struct pixman_transform *b) +pixman_transform_is_inverse (const struct pixman_transform *a, + const struct pixman_transform *b) { - struct pixman_transform t; + struct pixman_transform t; - pixman_transform_multiply(&t, a, b); - return pixman_transform_is_identity(&t); + pixman_transform_multiply (&t, a, b); + + return pixman_transform_is_identity (&t); } PIXMAN_EXPORT void -pixman_f_transform_from_pixman_transform (struct pixman_f_transform *ft, - const struct pixman_transform *t) +pixman_f_transform_from_pixman_transform (struct pixman_f_transform * ft, + const struct pixman_transform *t) { - int i, j; + int i, j; - for (j = 0; j < 3; j++) - for (i = 0; i < 3; i++) - ft->m[j][i] = pixman_fixed_to_double (t->matrix[j][i]); + for (j = 0; j < 3; j++) + { + for (i = 0; i < 3; i++) + ft->m[j][i] = pixman_fixed_to_double (t->matrix[j][i]); + } } PIXMAN_EXPORT pixman_bool_t -pixman_transform_from_pixman_f_transform (struct pixman_transform *t, - const struct pixman_f_transform *ft) +pixman_transform_from_pixman_f_transform (struct pixman_transform * t, + const struct pixman_f_transform *ft) { - int i, j; + int i, j; - for (j = 0; j < 3; j++) - for (i = 0; i < 3; i++) - { - double d = ft->m[j][i]; - if (d < -32767.0 || d > 32767.0) - return FALSE; - d = d * 65536.0 + 0.5; - t->matrix[j][i] = (pixman_fixed_t) floor (d); - } - return TRUE; + for (j = 0; j < 3; j++) + { + for (i = 0; i < 3; i++) + { + double d = ft->m[j][i]; + if (d < -32767.0 || d > 32767.0) + return FALSE; + d = d * 65536.0 + 0.5; + t->matrix[j][i] = (pixman_fixed_t) floor (d); + } + } + + return TRUE; } -static const int a[3] = { 3, 3, 2 }; -static const int b[3] = { 2, 1, 1 }; +static const int a[3] = { 3, 3, 2 }; +static const int b[3] = { 2, 1, 1 }; PIXMAN_EXPORT pixman_bool_t -pixman_f_transform_invert(struct pixman_f_transform *dst, - const struct pixman_f_transform *src) -{ - double det; - int i, j; - static int a[3] = { 2, 2, 1 }; - static int b[3] = { 1, 0, 0 }; - - det = 0; - for (i = 0; i < 3; i++) { - double p; - int ai = a[i]; - int bi = b[i]; - p = src->m[i][0] * (src->m[ai][2] * src->m[bi][1] - - src->m[ai][1] * src->m[bi][2]); - if (i == 1) - p = -p; - det += p; - } - if (det == 0) - return FALSE; - det = 1/det; - for (j = 0; j < 3; j++) { - for (i = 0; i < 3; i++) { - double p; - int ai = a[i]; - int aj = a[j]; - int bi = b[i]; - int bj = b[j]; - - p = (src->m[ai][aj] * src->m[bi][bj] - - src->m[ai][bj] * src->m[bi][aj]); - if (((i + j) & 1) != 0) - p = -p; - dst->m[j][i] = det * p; - } +pixman_f_transform_invert (struct pixman_f_transform * dst, + const struct pixman_f_transform *src) +{ + double det; + int i, j; + static int a[3] = { 2, 2, 1 }; + static int b[3] = { 1, 0, 0 }; + + det = 0; + for (i = 0; i < 3; i++) + { + double p; + int ai = a[i]; + int bi = b[i]; + p = src->m[i][0] * (src->m[ai][2] * src->m[bi][1] - + src->m[ai][1] * src->m[bi][2]); + if (i == 1) + p = -p; + det += p; + } + + if (det == 0) + return FALSE; + + det = 1 / det; + for (j = 0; j < 3; j++) + { + for (i = 0; i < 3; i++) + { + double p; + int ai = a[i]; + int aj = a[j]; + int bi = b[i]; + int bj = b[j]; + + p = (src->m[ai][aj] * src->m[bi][bj] - + src->m[ai][bj] * src->m[bi][aj]); + + if (((i + j) & 1) != 0) + p = -p; + + dst->m[j][i] = det * p; } - return TRUE; + } + + return TRUE; } PIXMAN_EXPORT pixman_bool_t -pixman_f_transform_point(const struct pixman_f_transform *t, - struct pixman_f_vector *v) +pixman_f_transform_point (const struct pixman_f_transform *t, + struct pixman_f_vector * v) { - struct pixman_f_vector result; - int i, j; - double a; + struct pixman_f_vector result; + int i, j; + double a; - for (j = 0; j < 3; j++) - { - a = 0; - for (i = 0; i < 3; i++) - a += t->m[j][i] * v->v[i]; - result.v[j] = a; - } - if (!result.v[2]) - return FALSE; - for (j = 0; j < 2; j++) - v->v[j] = result.v[j] / result.v[2]; - v->v[2] = 1; - return TRUE; + for (j = 0; j < 3; j++) + { + a = 0; + for (i = 0; i < 3; i++) + a += t->m[j][i] * v->v[i]; + result.v[j] = a; + } + + if (!result.v[2]) + return FALSE; + + for (j = 0; j < 2; j++) + v->v[j] = result.v[j] / result.v[2]; + + v->v[2] = 1; + + return TRUE; } PIXMAN_EXPORT void -pixman_f_transform_point_3d(const struct pixman_f_transform *t, - struct pixman_f_vector *v) +pixman_f_transform_point_3d (const struct pixman_f_transform *t, + struct pixman_f_vector * v) { - struct pixman_f_vector result; - int i, j; - double a; + struct pixman_f_vector result; + int i, j; + double a; - for (j = 0; j < 3; j++) - { - a = 0; - for (i = 0; i < 3; i++) - a += t->m[j][i] * v->v[i]; - result.v[j] = a; - } - *v = result; + for (j = 0; j < 3; j++) + { + a = 0; + for (i = 0; i < 3; i++) + a += t->m[j][i] * v->v[i]; + result.v[j] = a; + } + + *v = result; } PIXMAN_EXPORT void -pixman_f_transform_multiply(struct pixman_f_transform *dst, - const struct pixman_f_transform *l, - const struct pixman_f_transform *r) +pixman_f_transform_multiply (struct pixman_f_transform * dst, + const struct pixman_f_transform *l, + const struct pixman_f_transform *r) { - struct pixman_f_transform d; - int dx, dy; - int o; + struct pixman_f_transform d; + int dx, dy; + int o; - for (dy = 0; dy < 3; dy++) - for (dx = 0; dx < 3; dx++) - { - double v = 0; - for (o = 0; o < 3; o++) - v += l->m[dy][o] * r->m[o][dx]; - d.m[dy][dx] = v; - } - *dst = d; + for (dy = 0; dy < 3; dy++) + { + for (dx = 0; dx < 3; dx++) + { + double v = 0; + for (o = 0; o < 3; o++) + v += l->m[dy][o] * r->m[o][dx]; + d.m[dy][dx] = v; + } + } + + *dst = d; } PIXMAN_EXPORT void -pixman_f_transform_init_scale (struct pixman_f_transform *t, double sx, double sy) +pixman_f_transform_init_scale (struct pixman_f_transform *t, + double sx, + double sy) { - t->m[0][0] = sx; t->m[0][1] = 0; t->m[0][2] = 0; - t->m[1][0] = 0; t->m[1][1] = sy; t->m[1][2] = 0; - t->m[2][0] = 0; t->m[2][1] = 0; t->m[2][2] = 1; + t->m[0][0] = sx; + t->m[0][1] = 0; + t->m[0][2] = 0; + t->m[1][0] = 0; + t->m[1][1] = sy; + t->m[1][2] = 0; + t->m[2][0] = 0; + t->m[2][1] = 0; + t->m[2][2] = 1; } PIXMAN_EXPORT pixman_bool_t pixman_f_transform_scale (struct pixman_f_transform *forward, - struct pixman_f_transform *reverse, - double sx, double sy) -{ - struct pixman_f_transform t; - - if (sx == 0 || sy == 0) - return FALSE; - - if (forward) { - pixman_f_transform_init_scale (&t, sx, sy); - pixman_f_transform_multiply (forward, &t, forward); - } - if (reverse) { - pixman_f_transform_init_scale (&t, 1/sx, 1/sy); - pixman_f_transform_multiply (reverse, reverse, &t); - } - return TRUE; + struct pixman_f_transform *reverse, + double sx, + double sy) +{ + struct pixman_f_transform t; + + if (sx == 0 || sy == 0) + return FALSE; + + if (forward) + { + pixman_f_transform_init_scale (&t, sx, sy); + pixman_f_transform_multiply (forward, &t, forward); + } + + if (reverse) + { + pixman_f_transform_init_scale (&t, 1 / sx, 1 / sy); + pixman_f_transform_multiply (reverse, reverse, &t); + } + + return TRUE; } PIXMAN_EXPORT void -pixman_f_transform_init_rotate (struct pixman_f_transform *t, double c, double s) +pixman_f_transform_init_rotate (struct pixman_f_transform *t, + double c, + double s) { - t->m[0][0] = c; t->m[0][1] = -s; t->m[0][2] = 0; - t->m[1][0] = s; t->m[1][1] = c; t->m[1][2] = 0; - t->m[2][0] = 0; t->m[2][1] = 0; t->m[2][2] = 1; + t->m[0][0] = c; + t->m[0][1] = -s; + t->m[0][2] = 0; + t->m[1][0] = s; + t->m[1][1] = c; + t->m[1][2] = 0; + t->m[2][0] = 0; + t->m[2][1] = 0; + t->m[2][2] = 1; } PIXMAN_EXPORT pixman_bool_t pixman_f_transform_rotate (struct pixman_f_transform *forward, - struct pixman_f_transform *reverse, - double c, double s) + struct pixman_f_transform *reverse, + double c, + double s) { - struct pixman_f_transform t; + struct pixman_f_transform t; - if (forward) { - pixman_f_transform_init_rotate (&t, c, s); - pixman_f_transform_multiply (forward, &t, forward); - } - if (reverse) { - pixman_f_transform_init_rotate (&t, c, -s); - pixman_f_transform_multiply (reverse, reverse, &t); - } - return TRUE; + if (forward) + { + pixman_f_transform_init_rotate (&t, c, s); + pixman_f_transform_multiply (forward, &t, forward); + } + + if (reverse) + { + pixman_f_transform_init_rotate (&t, c, -s); + pixman_f_transform_multiply (reverse, reverse, &t); + } + + return TRUE; } PIXMAN_EXPORT void -pixman_f_transform_init_translate (struct pixman_f_transform *t, double tx, double ty) +pixman_f_transform_init_translate (struct pixman_f_transform *t, + double tx, + double ty) { - t->m[0][0] = 1; t->m[0][1] = 0; t->m[0][2] = tx; - t->m[1][0] = 0; t->m[1][1] = 1; t->m[1][2] = ty; - t->m[2][0] = 0; t->m[2][1] = 0; t->m[2][2] = 1; + t->m[0][0] = 1; + t->m[0][1] = 0; + t->m[0][2] = tx; + t->m[1][0] = 0; + t->m[1][1] = 1; + t->m[1][2] = ty; + t->m[2][0] = 0; + t->m[2][1] = 0; + t->m[2][2] = 1; } PIXMAN_EXPORT pixman_bool_t pixman_f_transform_translate (struct pixman_f_transform *forward, - struct pixman_f_transform *reverse, - double tx, double ty) + struct pixman_f_transform *reverse, + double tx, + double ty) { - struct pixman_f_transform t; + struct pixman_f_transform t; - if (forward) { - pixman_f_transform_init_translate (&t, tx, ty); - pixman_f_transform_multiply (forward, &t, forward); - } - if (reverse) { - pixman_f_transform_init_translate (&t, -tx, -ty); - pixman_f_transform_multiply (reverse, reverse, &t); - } - return TRUE; + if (forward) + { + pixman_f_transform_init_translate (&t, tx, ty); + pixman_f_transform_multiply (forward, &t, forward); + } + + if (reverse) + { + pixman_f_transform_init_translate (&t, -tx, -ty); + pixman_f_transform_multiply (reverse, reverse, &t); + } + + return TRUE; } PIXMAN_EXPORT pixman_bool_t -pixman_f_transform_bounds(const struct pixman_f_transform *t, struct pixman_box16 *b) -{ - struct pixman_f_vector v[4]; - int i; - int x1, y1, x2, y2; - - v[0].v[0] = b->x1; v[0].v[1] = b->y1; v[0].v[2] = 1; - v[1].v[0] = b->x2; v[1].v[1] = b->y1; v[1].v[2] = 1; - v[2].v[0] = b->x2; v[2].v[1] = b->y2; v[2].v[2] = 1; - v[3].v[0] = b->x1; v[3].v[1] = b->y2; v[3].v[2] = 1; - for (i = 0; i < 4; i++) +pixman_f_transform_bounds (const struct pixman_f_transform *t, + struct pixman_box16 * b) +{ + struct pixman_f_vector v[4]; + int i; + int x1, y1, x2, y2; + + v[0].v[0] = b->x1; + v[0].v[1] = b->y1; + v[0].v[2] = 1; + v[1].v[0] = b->x2; + v[1].v[1] = b->y1; + v[1].v[2] = 1; + v[2].v[0] = b->x2; + v[2].v[1] = b->y2; + v[2].v[2] = 1; + v[3].v[0] = b->x1; + v[3].v[1] = b->y2; + v[3].v[2] = 1; + + for (i = 0; i < 4; i++) + { + if (!pixman_f_transform_point (t, &v[i])) + return FALSE; + + x1 = floor (v[i].v[0]); + y1 = floor (v[i].v[1]); + x2 = ceil (v[i].v[0]); + y2 = ceil (v[i].v[1]); + + if (i == 0) + { + b->x1 = x1; + b->y1 = y1; + b->x2 = x2; + b->y2 = y2; + } + else { - if (!pixman_f_transform_point (t, &v[i])) - return FALSE; - x1 = floor (v[i].v[0]); - y1 = floor (v[i].v[1]); - x2 = ceil (v[i].v[0]); - y2 = ceil (v[i].v[1]); - if (i == 0) - { - b->x1 = x1; b->y1 = y1; - b->x2 = x2; b->y2 = y2; - } - else - { - if (x1 < b->x1) b->x1 = x1; - if (y1 < b->y1) b->y1 = y1; - if (x2 > b->x2) b->x2 = x2; - if (y2 > b->y2) b->y2 = y2; - } + if (x1 < b->x1) b->x1 = x1; + if (y1 < b->y1) b->y1 = y1; + if (x2 > b->x2) b->x2 = x2; + if (y2 > b->y2) b->y2 = y2; } - return TRUE; + } + + return TRUE; } PIXMAN_EXPORT void pixman_f_transform_init_identity (struct pixman_f_transform *t) { - int i, j; + int i, j; - for (j = 0; j < 3; j++) - for (i = 0; i < 3; i++) - t->m[j][i] = i == j ? 1 : 0; + for (j = 0; j < 3; j++) + { + for (i = 0; i < 3; i++) + t->m[j][i] = i == j ? 1 : 0; + } } diff --git a/lib/pixman/pixman/pixman-mmx.c b/lib/pixman/pixman/pixman-mmx.c index db87b1987..7dcc1dc96 100644 --- a/lib/pixman/pixman/pixman-mmx.c +++ b/lib/pixman/pixman/pixman-mmx.c @@ -37,11 +37,12 @@ #include <mmintrin.h> #include "pixman-private.h" +#include "pixman-combine32.h" -#define noVERBOSE +#define no_vERBOSE #ifdef VERBOSE -#define CHECKPOINT() ErrorF ("at %s %d\n", __FUNCTION__, __LINE__) +#define CHECKPOINT() error_f ("at %s %d\n", __FUNCTION__, __LINE__) #else #define CHECKPOINT() #endif @@ -97,43 +98,43 @@ typedef struct mmxdatafield mmx_ffff0000ffff0000; mmxdatafield mmx_0000ffff00000000; mmxdatafield mmx_000000000000ffff; -} MMXData; +} mmx_data_t; #if defined(_MSC_VER) -# define MMXDATA_INIT(field, val) { val##UI64 } -#elif defined(M64_MEMBER) /* __m64 is a struct, not an integral type */ -# define MMXDATA_INIT(field, val) field = { val##ULL } -#else /* __m64 is an integral type */ -# define MMXDATA_INIT(field, val) field = val##ULL +# define MMXDATA_INIT(field, val) { val ## UI64 } +#elif defined(M64_MEMBER) /* __m64 is a struct, not an integral type */ +# define MMXDATA_INIT(field, val) field = { val ## ULL } +#else /* __m64 is an integral type */ +# define MMXDATA_INIT(field, val) field = val ## ULL #endif -static const MMXData c = -{ - MMXDATA_INIT(.mmx_4x00ff, 0x00ff00ff00ff00ff), - MMXDATA_INIT(.mmx_4x0080, 0x0080008000800080), - MMXDATA_INIT(.mmx_565_rgb, 0x000001f0003f001f), - MMXDATA_INIT(.mmx_565_unpack_multiplier, 0x0000008404100840), - MMXDATA_INIT(.mmx_565_r, 0x000000f800000000), - MMXDATA_INIT(.mmx_565_g, 0x0000000000fc0000), - MMXDATA_INIT(.mmx_565_b, 0x00000000000000f8), - MMXDATA_INIT(.mmx_mask_0, 0xffffffffffff0000), - MMXDATA_INIT(.mmx_mask_1, 0xffffffff0000ffff), - MMXDATA_INIT(.mmx_mask_2, 0xffff0000ffffffff), - MMXDATA_INIT(.mmx_mask_3, 0x0000ffffffffffff), - MMXDATA_INIT(.mmx_full_alpha, 0x00ff000000000000), - MMXDATA_INIT(.mmx_ffff0000ffff0000, 0xffff0000ffff0000), - MMXDATA_INIT(.mmx_0000ffff00000000, 0x0000ffff00000000), - MMXDATA_INIT(.mmx_000000000000ffff, 0x000000000000ffff), +static const mmx_data_t c = +{ + MMXDATA_INIT (.mmx_4x00ff, 0x00ff00ff00ff00ff), + MMXDATA_INIT (.mmx_4x0080, 0x0080008000800080), + MMXDATA_INIT (.mmx_565_rgb, 0x000001f0003f001f), + MMXDATA_INIT (.mmx_565_unpack_multiplier, 0x0000008404100840), + MMXDATA_INIT (.mmx_565_r, 0x000000f800000000), + MMXDATA_INIT (.mmx_565_g, 0x0000000000fc0000), + MMXDATA_INIT (.mmx_565_b, 0x00000000000000f8), + MMXDATA_INIT (.mmx_mask_0, 0xffffffffffff0000), + MMXDATA_INIT (.mmx_mask_1, 0xffffffff0000ffff), + MMXDATA_INIT (.mmx_mask_2, 0xffff0000ffffffff), + MMXDATA_INIT (.mmx_mask_3, 0x0000ffffffffffff), + MMXDATA_INIT (.mmx_full_alpha, 0x00ff000000000000), + MMXDATA_INIT (.mmx_ffff0000ffff0000, 0xffff0000ffff0000), + MMXDATA_INIT (.mmx_0000ffff00000000, 0x0000ffff00000000), + MMXDATA_INIT (.mmx_000000000000ffff, 0x000000000000ffff), }; #ifdef __GNUC__ # ifdef __ICC -# define MC(x) M64(c.mmx_##x) +# define MC(x) M64 (c.mmx_ ## x) # else -# define MC(x) ((__m64)c.mmx_##x) +# define MC(x) ((__m64)c.mmx_ ## x) # endif #else -# define MC(x) c.mmx_##x +# define MC(x) c.mmx_ ## x #endif static force_inline __m64 @@ -141,12 +142,12 @@ M64 (uint64_t x) { #ifdef __ICC return _mm_cvtsi64_m64 (x); -#elif defined M64_MEMBER /* __m64 is a struct, not an integral type */ +#elif defined M64_MEMBER /* __m64 is a struct, not an integral type */ __m64 res; res.M64_MEMBER = x; return res; -#else /* __m64 is an integral type */ +#else /* __m64 is an integral type */ return (__m64)x; #endif } @@ -156,16 +157,17 @@ UINT64 (__m64 x) { #ifdef __ICC return _mm_cvtm64_si64 (x); -#elif defined M64_MEMBER /* __m64 is a struct, not an integral type */ +#elif defined M64_MEMBER /* __m64 is a struct, not an integral type */ uint64_t res = x.M64_MEMBER; return res; -#else /* __m64 is an integral type */ +#else /* __m64 is an integral type */ return (uint64_t)x; #endif } static force_inline __m64 -shift (__m64 v, int s) +shift (__m64 v, + int s) { if (s > 0) return _mm_slli_si64 (v, s); @@ -178,7 +180,7 @@ shift (__m64 v, int s) static force_inline __m64 negate (__m64 mask) { - return _mm_xor_si64 (mask, MC(4x00ff)); + return _mm_xor_si64 (mask, MC (4x00ff)); } static force_inline __m64 @@ -187,7 +189,7 @@ pix_multiply (__m64 a, __m64 b) __m64 res; res = _mm_mullo_pi16 (a, b); - res = _mm_adds_pu16 (res, MC(4x0080)); + res = _mm_adds_pu16 (res, MC (4x0080)); res = _mm_adds_pu16 (res, _mm_srli_pi16 (res, 8)); res = _mm_srli_pi16 (res, 8); @@ -197,7 +199,7 @@ pix_multiply (__m64 a, __m64 b) static force_inline __m64 pix_add (__m64 a, __m64 b) { - return _mm_adds_pu8 (a, b); + return _mm_adds_pu8 (a, b); } static force_inline __m64 @@ -238,9 +240,9 @@ invert_colors (__m64 pixel) x = y = z = pixel; - x = _mm_and_si64 (x, MC(ffff0000ffff0000)); - y = _mm_and_si64 (y, MC(000000000000ffff)); - z = _mm_and_si64 (z, MC(0000ffff00000000)); + x = _mm_and_si64 (x, MC (ffff0000ffff0000)); + y = _mm_and_si64 (y, MC (000000000000ffff)); + z = _mm_and_si64 (z, MC (0000ffff00000000)); y = shift (y, 32); z = shift (z, -32); @@ -252,23 +254,24 @@ invert_colors (__m64 pixel) } static force_inline __m64 -over (__m64 src, __m64 srca, __m64 dest) +over (__m64 src, + __m64 srca, + __m64 dest) { - return _mm_adds_pu8 (src, pix_multiply(dest, negate(srca))); + return _mm_adds_pu8 (src, pix_multiply (dest, negate (srca))); } static force_inline __m64 over_rev_non_pre (__m64 src, __m64 dest) { __m64 srca = expand_alpha (src); - __m64 srcfaaa = _mm_or_si64 (srca, MC(full_alpha)); + __m64 srcfaaa = _mm_or_si64 (srca, MC (full_alpha)); - return over(pix_multiply(invert_colors(src), srcfaaa), srca, dest); + return over (pix_multiply (invert_colors (src), srcfaaa), srca, dest); } static force_inline __m64 -in (__m64 src, - __m64 mask) +in (__m64 src, __m64 mask) { return pix_multiply (src, mask); } @@ -276,28 +279,29 @@ in (__m64 src, static force_inline __m64 in_over_full_src_alpha (__m64 src, __m64 mask, __m64 dest) { - src = _mm_or_si64 (src, MC(full_alpha)); + src = _mm_or_si64 (src, MC (full_alpha)); - return over(in (src, mask), mask, dest); + return over (in (src, mask), mask, dest); } #ifndef _MSC_VER static force_inline __m64 -in_over (__m64 src, - __m64 srca, - __m64 mask, - __m64 dest) +in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest) { - return over(in(src, mask), pix_multiply(srca, mask), dest); + return over (in (src, mask), pix_multiply (srca, mask), dest); } + #else -#define in_over(src, srca, mask, dest) over(in(src, mask), pix_multiply(srca, mask), dest) + +#define in_over(src, srca, mask, dest) \ + over (in (src, mask), pix_multiply (srca, mask), dest) + #endif static force_inline __m64 load8888 (uint32_t v) { - return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64()); + return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64 ()); } static force_inline __m64 @@ -309,7 +313,7 @@ pack8888 (__m64 lo, __m64 hi) static force_inline uint32_t store8888 (__m64 v) { - return _mm_cvtsi64_si32(pack8888(v, _mm_setzero_si64())); + return _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ())); } /* Expand 16 bits positioned at @pos (0-3) of a mmx register into @@ -340,9 +344,9 @@ expand565 (__m64 pixel, int pos) p = _mm_or_si64 (t1, p); p = _mm_or_si64 (t2, p); - p = _mm_and_si64 (p, MC(565_rgb)); + p = _mm_and_si64 (p, MC (565_rgb)); - pixel = _mm_mullo_pi16 (p, MC(565_unpack_multiplier)); + pixel = _mm_mullo_pi16 (p, MC (565_unpack_multiplier)); return _mm_srli_pi16 (pixel, 8); } @@ -350,40 +354,40 @@ static force_inline __m64 expand8888 (__m64 in, int pos) { if (pos == 0) - return _mm_unpacklo_pi8 (in, _mm_setzero_si64()); + return _mm_unpacklo_pi8 (in, _mm_setzero_si64 ()); else - return _mm_unpackhi_pi8 (in, _mm_setzero_si64()); + return _mm_unpackhi_pi8 (in, _mm_setzero_si64 ()); } static force_inline __m64 expandx888 (__m64 in, int pos) { - return _mm_or_si64 (expand8888 (in, pos), MC(full_alpha)); + return _mm_or_si64 (expand8888 (in, pos), MC (full_alpha)); } static force_inline __m64 -pack565 (__m64 pixel, __m64 target, int pos) +pack_565 (__m64 pixel, __m64 target, int pos) { __m64 p = pixel; __m64 t = target; __m64 r, g, b; - r = _mm_and_si64 (p, MC(565_r)); - g = _mm_and_si64 (p, MC(565_g)); - b = _mm_and_si64 (p, MC(565_b)); + r = _mm_and_si64 (p, MC (565_r)); + g = _mm_and_si64 (p, MC (565_g)); + b = _mm_and_si64 (p, MC (565_b)); - r = shift (r, - (32 - 8) + pos * 16); - g = shift (g, - (16 - 3) + pos * 16); - b = shift (b, - (0 + 3) + pos * 16); + r = shift (r, -(32 - 8) + pos * 16); + g = shift (g, -(16 - 3) + pos * 16); + b = shift (b, -(0 + 3) + pos * 16); if (pos == 0) - t = _mm_and_si64 (t, MC(mask_0)); + t = _mm_and_si64 (t, MC (mask_0)); else if (pos == 1) - t = _mm_and_si64 (t, MC(mask_1)); + t = _mm_and_si64 (t, MC (mask_1)); else if (pos == 2) - t = _mm_and_si64 (t, MC(mask_2)); + t = _mm_and_si64 (t, MC (mask_2)); else if (pos == 3) - t = _mm_and_si64 (t, MC(mask_3)); + t = _mm_and_si64 (t, MC (mask_3)); p = _mm_or_si64 (r, t); p = _mm_or_si64 (g, p); @@ -392,26 +396,23 @@ pack565 (__m64 pixel, __m64 target, int pos) } #ifndef _MSC_VER + static force_inline __m64 pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b) { - x = _mm_mullo_pi16 (x, a); - y = _mm_mullo_pi16 (y, b); - x = _mm_adds_pu16 (x, MC(4x0080)); - x = _mm_adds_pu16 (x, y); - x = _mm_adds_pu16 (x, _mm_srli_pi16 (x, 8)); - x = _mm_srli_pi16 (x, 8); + x = pix_multiply (x, a); + y = pix_multiply (y, b); - return x; + return pix_add (x, y); } + #else -#define pix_add_mul(x, a, y, b) \ -( x = _mm_mullo_pi16 (x, a), \ - y = _mm_mullo_pi16 (y, b), \ - x = _mm_adds_pu16 (x, MC(4x0080)), \ - x = _mm_adds_pu16 (x, y), \ - x = _mm_adds_pu16 (x, _mm_srli_pi16 (x, 8)), \ - _mm_srli_pi16 (x, 8) ) + +#define pix_add_mul(x, a, y, b) \ + ( x = pix_multiply (x, a), \ + y = pix_multiply (y, a), \ + pix_add (x, y) ) + #endif /* --------------- MMX code patch for fbcompose.c --------------------- */ @@ -435,532 +436,699 @@ combine (const uint32_t *src, const uint32_t *mask) return ssrc; } -static FASTCALL void -mmxCombineOverU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_over_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = dest + width; - while (dest < end) { + while (dest < end) + { uint32_t ssrc = combine (src, mask); uint32_t a = ssrc >> 24; - if (a == 0xff) { + + if (a == 0xff) + { *dest = ssrc; - } else if (ssrc) { + } + else if (ssrc) + { __m64 s, sa; - s = load8888(ssrc); - sa = expand_alpha(s); - *dest = store8888(over(s, sa, load8888(*dest))); + s = load8888 (ssrc); + sa = expand_alpha (s); + *dest = store8888 (over (s, sa, load8888 (*dest))); } + ++dest; ++src; if (mask) ++mask; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineOverReverseU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_over_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = dest + width; - while (dest < end) { + while (dest < end) + { __m64 d, da; uint32_t s = combine (src, mask); - d = load8888(*dest); - da = expand_alpha(d); - *dest = store8888(over (d, da, load8888(s))); - ++dest; - ++src; + + d = load8888 (*dest); + da = expand_alpha (d); + *dest = store8888 (over (d, da, load8888 (s))); + + ++dest; + ++src; if (mask) mask++; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineInU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_in_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = dest + width; - while (dest < end) { - __m64 x, a; - x = load8888 (combine (src, mask)); - a = load8888(*dest); - a = expand_alpha(a); - x = pix_multiply(x, a); - *dest = store8888(x); - ++dest; - ++src; + while (dest < end) + { + __m64 x, a; + + x = load8888 (combine (src, mask)); + a = load8888 (*dest); + a = expand_alpha (a); + x = pix_multiply (x, a); + + *dest = store8888 (x); + + ++dest; + ++src; if (mask) mask++; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineInReverseU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_in_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = dest + width; - while (dest < end) { - __m64 x, a; - x = load8888(*dest); - a = load8888(combine (src, mask)); - a = expand_alpha(a); - x = pix_multiply(x, a); - *dest = store8888(x); - ++dest; - ++src; + while (dest < end) + { + __m64 x, a; + + x = load8888 (*dest); + a = load8888 (combine (src, mask)); + a = expand_alpha (a); + x = pix_multiply (x, a); + *dest = store8888 (x); + + ++dest; + ++src; if (mask) mask++; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineOutU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_out_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = dest + width; - while (dest < end) { - __m64 x, a; - x = load8888(combine (src, mask)); - a = load8888(*dest); - a = expand_alpha(a); - a = negate(a); - x = pix_multiply(x, a); - *dest = store8888(x); - ++dest; - ++src; + while (dest < end) + { + __m64 x, a; + + x = load8888 (combine (src, mask)); + a = load8888 (*dest); + a = expand_alpha (a); + a = negate (a); + x = pix_multiply (x, a); + *dest = store8888 (x); + + ++dest; + ++src; if (mask) mask++; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineOutReverseU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_out_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = dest + width; - while (dest < end) { - __m64 x, a; - x = load8888(*dest); - a = load8888(combine (src, mask)); - a = expand_alpha(a); - a = negate(a); - x = pix_multiply(x, a); - *dest = store8888(x); - ++dest; - ++src; + while (dest < end) + { + __m64 x, a; + + x = load8888 (*dest); + a = load8888 (combine (src, mask)); + a = expand_alpha (a); + a = negate (a); + x = pix_multiply (x, a); + + *dest = store8888 (x); + + ++dest; + ++src; if (mask) mask++; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineAtopU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_atop_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = dest + width; - while (dest < end) { - __m64 s, da, d, sia; - s = load8888(combine (src, mask)); - d = load8888(*dest); - sia = expand_alpha(s); - sia = negate(sia); - da = expand_alpha(d); - s = pix_add_mul (s, da, d, sia); - *dest = store8888(s); - ++dest; - ++src; + while (dest < end) + { + __m64 s, da, d, sia; + + s = load8888 (combine (src, mask)); + d = load8888 (*dest); + sia = expand_alpha (s); + sia = negate (sia); + da = expand_alpha (d); + s = pix_add_mul (s, da, d, sia); + *dest = store8888 (s); + + ++dest; + ++src; if (mask) mask++; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineAtopReverseU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_atop_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end; end = dest + width; - while (dest < end) { - __m64 s, dia, d, sa; - s = load8888(combine(src, mask)); - d = load8888(*dest); - sa = expand_alpha(s); - dia = expand_alpha(d); - dia = negate(dia); + while (dest < end) + { + __m64 s, dia, d, sa; + + s = load8888 (combine (src, mask)); + d = load8888 (*dest); + sa = expand_alpha (s); + dia = expand_alpha (d); + dia = negate (dia); s = pix_add_mul (s, dia, d, sa); - *dest = store8888(s); - ++dest; - ++src; + *dest = store8888 (s); + + ++dest; + ++src; if (mask) mask++; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineXorU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_xor_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = dest + width; - while (dest < end) { - __m64 s, dia, d, sia; - s = load8888(combine(src, mask)); - d = load8888(*dest); - sia = expand_alpha(s); - dia = expand_alpha(d); - sia = negate(sia); - dia = negate(dia); + while (dest < end) + { + __m64 s, dia, d, sia; + + s = load8888 (combine (src, mask)); + d = load8888 (*dest); + sia = expand_alpha (s); + dia = expand_alpha (d); + sia = negate (sia); + dia = negate (dia); s = pix_add_mul (s, dia, d, sia); - *dest = store8888(s); - ++dest; - ++src; + *dest = store8888 (s); + + ++dest; + ++src; if (mask) mask++; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineAddU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_add_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = dest + width; - while (dest < end) { - __m64 s, d; - s = load8888(combine(src,mask)); - d = load8888(*dest); - s = pix_add(s, d); - *dest = store8888(s); - ++dest; - ++src; + + while (dest < end) + { + __m64 s, d; + + s = load8888 (combine (src, mask)); + d = load8888 (*dest); + s = pix_add (s, d); + *dest = store8888 (s); + + ++dest; + ++src; if (mask) mask++; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineSaturateU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_saturate_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = dest + width; - while (dest < end) { - uint32_t s = combine(src,mask); - uint32_t d = *dest; - __m64 ms = load8888(s); - __m64 md = load8888(d); - uint32_t sa = s >> 24; - uint32_t da = ~d >> 24; - - if (sa > da) { - __m64 msa = load8888(FbIntDiv(da, sa) << 24); - msa = expand_alpha(msa); - ms = pix_multiply(ms, msa); - } - md = pix_add(md, ms); - *dest = store8888(md); - ++src; - ++dest; + + while (dest < end) + { + uint32_t s = combine (src, mask); + uint32_t d = *dest; + __m64 ms = load8888 (s); + __m64 md = load8888 (d); + uint32_t sa = s >> 24; + uint32_t da = ~d >> 24; + + if (sa > da) + { + __m64 msa = load8888 (DIV_UN8 (da, sa) << 24); + msa = expand_alpha (msa); + ms = pix_multiply (ms, msa); + } + + md = pix_add (md, ms); + *dest = store8888 (md); + + ++src; + ++dest; if (mask) mask++; } - _mm_empty(); + _mm_empty (); } - -static FASTCALL void -mmxCombineSrcC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_src_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = src + width; - while (src < end) { - __m64 a = load8888(*mask); - __m64 s = load8888(*src); - s = pix_multiply(s, a); - *dest = store8888(s); - ++src; - ++mask; - ++dest; + + while (src < end) + { + __m64 a = load8888 (*mask); + __m64 s = load8888 (*src); + + s = pix_multiply (s, a); + *dest = store8888 (s); + + ++src; + ++mask; + ++dest; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineOverC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_over_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = src + width; - while (src < end) { - __m64 a = load8888(*mask); - __m64 s = load8888(*src); - __m64 d = load8888(*dest); - __m64 sa = expand_alpha(s); - *dest = store8888(in_over (s, sa, a, d)); + while (src < end) + { + __m64 a = load8888 (*mask); + __m64 s = load8888 (*src); + __m64 d = load8888 (*dest); + __m64 sa = expand_alpha (s); - ++src; - ++dest; - ++mask; + *dest = store8888 (in_over (s, sa, a, d)); + + ++src; + ++dest; + ++mask; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineOverReverseC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_over_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = src + width; - while (src < end) { - __m64 a = load8888(*mask); - __m64 s = load8888(*src); - __m64 d = load8888(*dest); - __m64 da = expand_alpha(d); - *dest = store8888(over (d, da, in (s, a))); + while (src < end) + { + __m64 a = load8888 (*mask); + __m64 s = load8888 (*src); + __m64 d = load8888 (*dest); + __m64 da = expand_alpha (d); + + *dest = store8888 (over (d, da, in (s, a))); - ++src; - ++dest; - ++mask; + ++src; + ++dest; + ++mask; } - _mm_empty(); + _mm_empty (); } - -static FASTCALL void -mmxCombineInC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_in_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = src + width; - while (src < end) { - __m64 a = load8888(*mask); - __m64 s = load8888(*src); - __m64 d = load8888(*dest); - __m64 da = expand_alpha(d); - s = pix_multiply(s, a); - s = pix_multiply(s, da); - *dest = store8888(s); - ++src; - ++dest; - ++mask; + + while (src < end) + { + __m64 a = load8888 (*mask); + __m64 s = load8888 (*src); + __m64 d = load8888 (*dest); + __m64 da = expand_alpha (d); + + s = pix_multiply (s, a); + s = pix_multiply (s, da); + *dest = store8888 (s); + + ++src; + ++dest; + ++mask; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineInReverseC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_in_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = src + width; - while (src < end) { - __m64 a = load8888(*mask); - __m64 s = load8888(*src); - __m64 d = load8888(*dest); - __m64 sa = expand_alpha(s); - a = pix_multiply(a, sa); - d = pix_multiply(d, a); - *dest = store8888(d); - ++src; - ++dest; - ++mask; + + while (src < end) + { + __m64 a = load8888 (*mask); + __m64 s = load8888 (*src); + __m64 d = load8888 (*dest); + __m64 sa = expand_alpha (s); + + a = pix_multiply (a, sa); + d = pix_multiply (d, a); + *dest = store8888 (d); + + ++src; + ++dest; + ++mask; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineOutC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_out_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = src + width; - while (src < end) { - __m64 a = load8888(*mask); - __m64 s = load8888(*src); - __m64 d = load8888(*dest); - __m64 da = expand_alpha(d); - da = negate(da); - s = pix_multiply(s, a); - s = pix_multiply(s, da); - *dest = store8888(s); - ++src; - ++dest; - ++mask; + + while (src < end) + { + __m64 a = load8888 (*mask); + __m64 s = load8888 (*src); + __m64 d = load8888 (*dest); + __m64 da = expand_alpha (d); + + da = negate (da); + s = pix_multiply (s, a); + s = pix_multiply (s, da); + *dest = store8888 (s); + + ++src; + ++dest; + ++mask; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineOutReverseC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_out_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = src + width; - while (src < end) { - __m64 a = load8888(*mask); - __m64 s = load8888(*src); - __m64 d = load8888(*dest); - __m64 sa = expand_alpha(s); - a = pix_multiply(a, sa); - a = negate(a); - d = pix_multiply(d, a); - *dest = store8888(d); - ++src; - ++dest; - ++mask; + + while (src < end) + { + __m64 a = load8888 (*mask); + __m64 s = load8888 (*src); + __m64 d = load8888 (*dest); + __m64 sa = expand_alpha (s); + + a = pix_multiply (a, sa); + a = negate (a); + d = pix_multiply (d, a); + *dest = store8888 (d); + + ++src; + ++dest; + ++mask; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineAtopC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_atop_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = src + width; - while (src < end) { - __m64 a = load8888(*mask); - __m64 s = load8888(*src); - __m64 d = load8888(*dest); - __m64 da = expand_alpha(d); - __m64 sa = expand_alpha(s); - s = pix_multiply(s, a); - a = pix_multiply(a, sa); - a = negate(a); + + while (src < end) + { + __m64 a = load8888 (*mask); + __m64 s = load8888 (*src); + __m64 d = load8888 (*dest); + __m64 da = expand_alpha (d); + __m64 sa = expand_alpha (s); + + s = pix_multiply (s, a); + a = pix_multiply (a, sa); + a = negate (a); d = pix_add_mul (d, a, s, da); - *dest = store8888(d); - ++src; - ++dest; - ++mask; + *dest = store8888 (d); + + ++src; + ++dest; + ++mask; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineAtopReverseC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_atop_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = src + width; - while (src < end) { - __m64 a = load8888(*mask); - __m64 s = load8888(*src); - __m64 d = load8888(*dest); - __m64 da = expand_alpha(d); - __m64 sa = expand_alpha(s); - s = pix_multiply(s, a); - a = pix_multiply(a, sa); - da = negate(da); + + while (src < end) + { + __m64 a = load8888 (*mask); + __m64 s = load8888 (*src); + __m64 d = load8888 (*dest); + __m64 da = expand_alpha (d); + __m64 sa = expand_alpha (s); + + s = pix_multiply (s, a); + a = pix_multiply (a, sa); + da = negate (da); d = pix_add_mul (d, a, s, da); - *dest = store8888(d); - ++src; - ++dest; - ++mask; + *dest = store8888 (d); + + ++src; + ++dest; + ++mask; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineXorC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_xor_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = src + width; - while (src < end) { - __m64 a = load8888(*mask); - __m64 s = load8888(*src); - __m64 d = load8888(*dest); - __m64 da = expand_alpha(d); - __m64 sa = expand_alpha(s); - s = pix_multiply(s, a); - a = pix_multiply(a, sa); - da = negate(da); - a = negate(a); + + while (src < end) + { + __m64 a = load8888 (*mask); + __m64 s = load8888 (*src); + __m64 d = load8888 (*dest); + __m64 da = expand_alpha (d); + __m64 sa = expand_alpha (s); + + s = pix_multiply (s, a); + a = pix_multiply (a, sa); + da = negate (da); + a = negate (a); d = pix_add_mul (d, a, s, da); - *dest = store8888(d); - ++src; - ++dest; - ++mask; + *dest = store8888 (d); + + ++src; + ++dest; + ++mask; } - _mm_empty(); + _mm_empty (); } -static FASTCALL void -mmxCombineAddC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +mmx_combine_add_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { const uint32_t *end = src + width; - while (src < end) { - __m64 a = load8888(*mask); - __m64 s = load8888(*src); - __m64 d = load8888(*dest); - s = pix_multiply(s, a); - d = pix_add(s, d); - *dest = store8888(d); - ++src; - ++dest; - ++mask; + + while (src < end) + { + __m64 a = load8888 (*mask); + __m64 s = load8888 (*src); + __m64 d = load8888 (*dest); + + s = pix_multiply (s, a); + d = pix_add (s, d); + *dest = store8888 (d); + + ++src; + ++dest; + ++mask; } - _mm_empty(); + _mm_empty (); } -/* ------------------ MMX code paths called from fbpict.c ----------------------- */ +/* ------------- MMX code paths called from fbpict.c -------------------- */ static void -fbCompositeSolid_nx8888mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t src; - uint32_t *dstLine, *dst; - uint16_t w; - int dstStride; - __m64 vsrc, vsrca; - - CHECKPOINT(); - - fbComposeGetSolid(pSrc, src, pDst->bits.format); - - if (src >> 24 == 0) +mmx_composite_over_n_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src; + uint32_t *dst_line, *dst; + uint16_t w; + int dst_stride; + __m64 vsrc, vsrca; + + CHECKPOINT (); + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); + + if (src == 0) return; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); vsrc = load8888 (src); vsrca = expand_alpha (vsrc); while (height--) { - dst = dstLine; - dstLine += dstStride; + dst = dst_line; + dst_line += dst_stride; w = width; - CHECKPOINT(); + CHECKPOINT (); while (w && (unsigned long)dst & 7) { - *dst = store8888(over(vsrc, vsrca, load8888(*dst))); + *dst = store8888 (over (vsrc, vsrca, load8888 (*dst))); w--; dst++; @@ -973,76 +1141,77 @@ fbCompositeSolid_nx8888mmx (pixman_implementation_t *imp, vdest = *(__m64 *)dst; - dest0 = over(vsrc, vsrca, expand8888(vdest, 0)); - dest1 = over(vsrc, vsrca, expand8888(vdest, 1)); + dest0 = over (vsrc, vsrca, expand8888 (vdest, 0)); + dest1 = over (vsrc, vsrca, expand8888 (vdest, 1)); - *(__m64 *)dst = pack8888(dest0, dest1); + *(__m64 *)dst = pack8888 (dest0, dest1); dst += 2; w -= 2; } - CHECKPOINT(); + CHECKPOINT (); while (w) { - *dst = store8888(over(vsrc, vsrca, load8888(*dst))); + *dst = store8888 (over (vsrc, vsrca, load8888 (*dst))); w--; dst++; } } - _mm_empty(); + _mm_empty (); } static void -fbCompositeSolid_nx0565mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t src; - uint16_t *dstLine, *dst; - uint16_t w; - int dstStride; - __m64 vsrc, vsrca; - - CHECKPOINT(); - - fbComposeGetSolid(pSrc, src, pDst->bits.format); - - if (src >> 24 == 0) +mmx_composite_over_n_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src; + uint16_t *dst_line, *dst; + uint16_t w; + int dst_stride; + __m64 vsrc, vsrca; + + CHECKPOINT (); + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); + + if (src == 0) return; - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); vsrc = load8888 (src); vsrca = expand_alpha (vsrc); while (height--) { - dst = dstLine; - dstLine += dstStride; + dst = dst_line; + dst_line += dst_stride; w = width; - CHECKPOINT(); + CHECKPOINT (); while (w && (unsigned long)dst & 7) { uint64_t d = *dst; - __m64 vdest = expand565 (M64(d), 0); - vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0); - *dst = UINT64(vdest); + __m64 vdest = expand565 (M64 (d), 0); + + vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0); + *dst = UINT64 (vdest); w--; dst++; @@ -1054,10 +1223,10 @@ fbCompositeSolid_nx0565mmx (pixman_implementation_t *imp, vdest = *(__m64 *)dst; - vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 0)), vdest, 0); - vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 1)), vdest, 1); - vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 2)), vdest, 2); - vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 3)), vdest, 3); + vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 0)), vdest, 0); + vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 1)), vdest, 1); + vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 2)), vdest, 2); + vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 3)), vdest, 3); *(__m64 *)dst = vdest; @@ -1065,63 +1234,64 @@ fbCompositeSolid_nx0565mmx (pixman_implementation_t *imp, w -= 4; } - CHECKPOINT(); + CHECKPOINT (); while (w) { uint64_t d = *dst; - __m64 vdest = expand565 (M64(d), 0); - vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0); - *dst = UINT64(vdest); + __m64 vdest = expand565 (M64 (d), 0); + + vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0); + *dst = UINT64 (vdest); w--; dst++; } } - _mm_empty(); + _mm_empty (); } static void -fbCompositeSolidMask_nx8888x8888Cmmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t src, srca; - uint32_t *dstLine; - uint32_t *maskLine; - int dstStride, maskStride; - __m64 vsrc, vsrca; - - CHECKPOINT(); - - fbComposeGetSolid(pSrc, src, pDst->bits.format); +mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint32_t *dst_line; + uint32_t *mask_line; + int dst_stride, mask_stride; + __m64 vsrc, vsrca; + + CHECKPOINT (); + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); srca = src >> 24; - if (srca == 0) + if (src == 0) return; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint32_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - vsrc = load8888(src); - vsrca = expand_alpha(vsrc); + vsrc = load8888 (src); + vsrca = expand_alpha (vsrc); while (height--) { int twidth = width; - uint32_t *p = (uint32_t *)maskLine; - uint32_t *q = (uint32_t *)dstLine; + uint32_t *p = (uint32_t *)mask_line; + uint32_t *q = (uint32_t *)dst_line; while (twidth && (unsigned long)q & 7) { @@ -1129,9 +1299,9 @@ fbCompositeSolidMask_nx8888x8888Cmmx (pixman_implementation_t *imp, if (m) { - __m64 vdest = load8888(*q); - vdest = in_over(vsrc, vsrca, load8888(m), vdest); - *q = store8888(vdest); + __m64 vdest = load8888 (*q); + vdest = in_over (vsrc, vsrca, load8888 (m), vdest); + *q = store8888 (vdest); } twidth--; @@ -1150,12 +1320,12 @@ fbCompositeSolidMask_nx8888x8888Cmmx (pixman_implementation_t *imp, __m64 dest0, dest1; __m64 vdest = *(__m64 *)q; - dest0 = in_over(vsrc, vsrca, load8888(m0), - expand8888 (vdest, 0)); - dest1 = in_over(vsrc, vsrca, load8888(m1), - expand8888 (vdest, 1)); + dest0 = in_over (vsrc, vsrca, load8888 (m0), + expand8888 (vdest, 0)); + dest1 = in_over (vsrc, vsrca, load8888 (m1), + expand8888 (vdest, 1)); - *(__m64 *)q = pack8888(dest0, dest1); + *(__m64 *)q = pack8888 (dest0, dest1); } p += 2; @@ -1169,9 +1339,9 @@ fbCompositeSolidMask_nx8888x8888Cmmx (pixman_implementation_t *imp, if (m) { - __m64 vdest = load8888(*q); - vdest = in_over(vsrc, vsrca, load8888(m), vdest); - *q = store8888(vdest); + __m64 vdest = load8888 (*q); + vdest = in_over (vsrc, vsrca, load8888 (m), vdest); + *q = store8888 (vdest); } twidth--; @@ -1179,52 +1349,52 @@ fbCompositeSolidMask_nx8888x8888Cmmx (pixman_implementation_t *imp, q++; } - dstLine += dstStride; - maskLine += maskStride; + dst_line += dst_stride; + mask_line += mask_stride; } - _mm_empty(); + _mm_empty (); } static void -fbCompositeSrc_8888x8x8888mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - uint32_t mask; - __m64 vmask; - int dstStride, srcStride; - uint16_t w; - __m64 srca; - - CHECKPOINT(); - - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); - - fbComposeGetSolid (pMask, mask, pDst->bits.format); +mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + uint32_t mask; + __m64 vmask; + int dst_stride, src_stride; + uint16_t w; + __m64 srca; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + mask = _pixman_image_get_solid (mask_image, dst_image->bits.format); mask = mask | mask >> 8 | mask >> 16 | mask >> 24; vmask = load8888 (mask); - srca = MC(4x00ff); + srca = MC (4x00ff); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w && (unsigned long)dst & 7) @@ -1247,8 +1417,8 @@ fbCompositeSrc_8888x8x8888mmx (pixman_implementation_t *imp, __m64 vsrc1 = expand8888 (vs, 1); *(__m64 *)dst = pack8888 ( - in_over (vsrc0, expand_alpha (vsrc0), vmask, expand8888 (vd, 0)), - in_over (vsrc1, expand_alpha (vsrc1), vmask, expand8888 (vd, 1))); + in_over (vsrc0, expand_alpha (vsrc0), vmask, expand8888 (vd, 0)), + in_over (vsrc1, expand_alpha (vsrc1), vmask, expand8888 (vd, 1))); w -= 2; dst += 2; @@ -1268,48 +1438,48 @@ fbCompositeSrc_8888x8x8888mmx (pixman_implementation_t *imp, } } - _mm_empty(); + _mm_empty (); } static void -fbCompositeSrc_x888xnx8888mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - uint32_t mask; - __m64 vmask; - int dstStride, srcStride; - uint16_t w; - __m64 srca; - - CHECKPOINT(); - - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); - fbComposeGetSolid (pMask, mask, pDst->bits.format); +mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + uint32_t mask; + __m64 vmask; + int dst_stride, src_stride; + uint16_t w; + __m64 srca; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + mask = _pixman_image_get_solid (mask_image, dst_image->bits.format); mask = mask | mask >> 8 | mask >> 16 | mask >> 24; vmask = load8888 (mask); - srca = MC(4x00ff); + srca = MC (4x00ff); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w && (unsigned long)dst & 7) @@ -1345,36 +1515,36 @@ fbCompositeSrc_x888xnx8888mmx (pixman_implementation_t *imp, __m64 vs7 = *(__m64 *)(src + 14); vd0 = pack8888 ( - in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)), - in_over (expandx888 (vs0, 1), srca, vmask, expand8888 (vd0, 1))); + in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)), + in_over (expandx888 (vs0, 1), srca, vmask, expand8888 (vd0, 1))); vd1 = pack8888 ( - in_over (expandx888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)), - in_over (expandx888 (vs1, 1), srca, vmask, expand8888 (vd1, 1))); + in_over (expandx888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)), + in_over (expandx888 (vs1, 1), srca, vmask, expand8888 (vd1, 1))); vd2 = pack8888 ( - in_over (expandx888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)), - in_over (expandx888 (vs2, 1), srca, vmask, expand8888 (vd2, 1))); + in_over (expandx888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)), + in_over (expandx888 (vs2, 1), srca, vmask, expand8888 (vd2, 1))); vd3 = pack8888 ( - in_over (expandx888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)), - in_over (expandx888 (vs3, 1), srca, vmask, expand8888 (vd3, 1))); + in_over (expandx888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)), + in_over (expandx888 (vs3, 1), srca, vmask, expand8888 (vd3, 1))); vd4 = pack8888 ( - in_over (expandx888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)), - in_over (expandx888 (vs4, 1), srca, vmask, expand8888 (vd4, 1))); + in_over (expandx888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)), + in_over (expandx888 (vs4, 1), srca, vmask, expand8888 (vd4, 1))); vd5 = pack8888 ( - in_over (expandx888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)), - in_over (expandx888 (vs5, 1), srca, vmask, expand8888 (vd5, 1))); + in_over (expandx888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)), + in_over (expandx888 (vs5, 1), srca, vmask, expand8888 (vd5, 1))); - vd6 = pack8888 ( - in_over (expandx888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)), - in_over (expandx888 (vs6, 1), srca, vmask, expand8888 (vd6, 1))); + vd6 = pack8888 ( + in_over (expandx888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)), + in_over (expandx888 (vs6, 1), srca, vmask, expand8888 (vd6, 1))); vd7 = pack8888 ( - in_over (expandx888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)), - in_over (expandx888 (vs7, 1), srca, vmask, expand8888 (vd7, 1))); + in_over (expandx888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)), + in_over (expandx888 (vs7, 1), srca, vmask, expand8888 (vd7, 1))); *(__m64 *)(dst + 0) = vd0; *(__m64 *)(dst + 2) = vd1; @@ -1403,135 +1573,141 @@ fbCompositeSrc_x888xnx8888mmx (pixman_implementation_t *imp, } } - _mm_empty(); + _mm_empty (); } static void -fbCompositeSrc_8888x8888mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - uint32_t s; - int dstStride, srcStride; - uint8_t a; - uint16_t w; - - CHECKPOINT(); - - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); +mmx_composite_over_8888_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + uint32_t s; + int dst_stride, src_stride; + uint8_t a; + uint16_t w; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w--) { s = *src++; a = s >> 24; + if (a == 0xff) + { *dst = s; - else if (s) { + } + else if (s) + { __m64 ms, sa; - ms = load8888(s); - sa = expand_alpha(ms); - *dst = store8888(over(ms, sa, load8888(*dst))); + ms = load8888 (s); + sa = expand_alpha (ms); + *dst = store8888 (over (ms, sa, load8888 (*dst))); } + dst++; } } - _mm_empty(); + _mm_empty (); } static void -fbCompositeSrc_8888x0565mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint16_t *dstLine, *dst; - uint32_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; - - CHECKPOINT(); - - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); +mmx_composite_over_8888_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint16_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + uint16_t w; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); #if 0 /* FIXME */ - assert (pSrc->pDrawable == pMask->pDrawable); + assert (src_image->drawable == mask_image->drawable); #endif while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; - CHECKPOINT(); + CHECKPOINT (); while (w && (unsigned long)dst & 7) { __m64 vsrc = load8888 (*src); uint64_t d = *dst; - __m64 vdest = expand565 (M64(d), 0); + __m64 vdest = expand565 (M64 (d), 0); - vdest = pack565(over(vsrc, expand_alpha(vsrc), vdest), vdest, 0); + vdest = pack_565 ( + over (vsrc, expand_alpha (vsrc), vdest), vdest, 0); - *dst = UINT64(vdest); + *dst = UINT64 (vdest); w--; dst++; src++; } - CHECKPOINT(); + CHECKPOINT (); while (w >= 4) { __m64 vsrc0, vsrc1, vsrc2, vsrc3; __m64 vdest; - vsrc0 = load8888(*(src + 0)); - vsrc1 = load8888(*(src + 1)); - vsrc2 = load8888(*(src + 2)); - vsrc3 = load8888(*(src + 3)); + vsrc0 = load8888 (*(src + 0)); + vsrc1 = load8888 (*(src + 1)); + vsrc2 = load8888 (*(src + 2)); + vsrc3 = load8888 (*(src + 3)); vdest = *(__m64 *)dst; - vdest = pack565(over(vsrc0, expand_alpha(vsrc0), expand565(vdest, 0)), vdest, 0); - vdest = pack565(over(vsrc1, expand_alpha(vsrc1), expand565(vdest, 1)), vdest, 1); - vdest = pack565(over(vsrc2, expand_alpha(vsrc2), expand565(vdest, 2)), vdest, 2); - vdest = pack565(over(vsrc3, expand_alpha(vsrc3), expand565(vdest, 3)), vdest, 3); + vdest = pack_565 (over (vsrc0, expand_alpha (vsrc0), expand565 (vdest, 0)), vdest, 0); + vdest = pack_565 (over (vsrc1, expand_alpha (vsrc1), expand565 (vdest, 1)), vdest, 1); + vdest = pack_565 (over (vsrc2, expand_alpha (vsrc2), expand565 (vdest, 2)), vdest, 2); + vdest = pack_565 (over (vsrc3, expand_alpha (vsrc3), expand565 (vdest, 3)), vdest, 3); *(__m64 *)dst = vdest; @@ -1540,17 +1716,17 @@ fbCompositeSrc_8888x0565mmx (pixman_implementation_t *imp, src += 4; } - CHECKPOINT(); + CHECKPOINT (); while (w) { __m64 vsrc = load8888 (*src); uint64_t d = *dst; - __m64 vdest = expand565 (M64(d), 0); + __m64 vdest = expand565 (M64 (d), 0); - vdest = pack565(over(vsrc, expand_alpha(vsrc), vdest), vdest, 0); + vdest = pack_565 (over (vsrc, expand_alpha (vsrc), vdest), vdest, 0); - *dst = UINT64(vdest); + *dst = UINT64 (vdest); w--; dst++; @@ -1558,57 +1734,57 @@ fbCompositeSrc_8888x0565mmx (pixman_implementation_t *imp, } } - _mm_empty(); + _mm_empty (); } static void -fbCompositeSolidMask_nx8x8888mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t src, srca; - uint32_t *dstLine, *dst; - uint8_t *maskLine, *mask; - int dstStride, maskStride; - uint16_t w; - __m64 vsrc, vsrca; - uint64_t srcsrc; - - CHECKPOINT(); - - fbComposeGetSolid(pSrc, src, pDst->bits.format); +mmx_composite_over_n_8_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint16_t w; + __m64 vsrc, vsrca; + uint64_t srcsrc; + + CHECKPOINT (); + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); srca = src >> 24; - if (srca == 0) + if (src == 0) return; srcsrc = (uint64_t)src << 32 | src; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); vsrc = load8888 (src); vsrca = expand_alpha (vsrc); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; - CHECKPOINT(); + CHECKPOINT (); while (w && (unsigned long)dst & 7) { @@ -1616,8 +1792,11 @@ fbCompositeSolidMask_nx8x8888mmx (pixman_implementation_t *imp, if (m) { - __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev (M64(m)), load8888(*dst)); - *dst = store8888(vdest); + __m64 vdest = in_over (vsrc, vsrca, + expand_alpha_rev (M64 (m)), + load8888 (*dst)); + + *dst = store8888 (vdest); } w--; @@ -1625,11 +1804,12 @@ fbCompositeSolidMask_nx8x8888mmx (pixman_implementation_t *imp, dst++; } - CHECKPOINT(); + CHECKPOINT (); while (w >= 2) { uint64_t m0, m1; + m0 = *mask; m1 = *(mask + 1); @@ -1644,10 +1824,12 @@ fbCompositeSolidMask_nx8x8888mmx (pixman_implementation_t *imp, vdest = *(__m64 *)dst; - dest0 = in_over(vsrc, vsrca, expand_alpha_rev (M64(m0)), expand8888(vdest, 0)); - dest1 = in_over(vsrc, vsrca, expand_alpha_rev (M64(m1)), expand8888(vdest, 1)); + dest0 = in_over (vsrc, vsrca, expand_alpha_rev (M64 (m0)), + expand8888 (vdest, 0)); + dest1 = in_over (vsrc, vsrca, expand_alpha_rev (M64 (m1)), + expand8888 (vdest, 1)); - *(__m64 *)dst = pack8888(dest0, dest1); + *(__m64 *)dst = pack8888 (dest0, dest1); } mask += 2; @@ -1655,7 +1837,7 @@ fbCompositeSolidMask_nx8x8888mmx (pixman_implementation_t *imp, w -= 2; } - CHECKPOINT(); + CHECKPOINT (); while (w) { @@ -1663,9 +1845,11 @@ fbCompositeSolidMask_nx8x8888mmx (pixman_implementation_t *imp, if (m) { - __m64 vdest = load8888(*dst); - vdest = in_over(vsrc, vsrca, expand_alpha_rev (M64(m)), vdest); - *dst = store8888(vdest); + __m64 vdest = load8888 (*dst); + + vdest = in_over ( + vsrc, vsrca, expand_alpha_rev (M64 (m)), vdest); + *dst = store8888 (vdest); } w--; @@ -1674,25 +1858,26 @@ fbCompositeSolidMask_nx8x8888mmx (pixman_implementation_t *imp, } } - _mm_empty(); + _mm_empty (); } pixman_bool_t pixman_fill_mmx (uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) -{ - uint64_t fill; - __m64 vfill; - uint32_t byte_width; - uint8_t *byte_line; + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + uint64_t fill; + __m64 vfill; + uint32_t byte_width; + uint8_t *byte_line; + #ifdef __GNUC__ - __m64 v1, v2, v3, v4, v5, v6, v7; + __m64 v1, v2, v3, v4, v5, v6, v7; #endif if (bpp != 16 && bpp != 32 && bpp != 8) @@ -1702,12 +1887,12 @@ pixman_fill_mmx (uint32_t *bits, return FALSE; if (bpp == 8 && - ((xor >> 16 != (xor & 0xffff)) || - (xor >> 24 != (xor & 0x00ff) >> 16))) + ((xor >> 16 != (xor & 0xffff)) || + (xor >> 24 != (xor & 0x00ff) >> 16))) { return FALSE; } - + if (bpp == 8) { stride = stride * (int) sizeof (uint32_t) / 1; @@ -1731,19 +1916,19 @@ pixman_fill_mmx (uint32_t *bits, } fill = ((uint64_t)xor << 32) | xor; - vfill = M64(fill); + vfill = M64 (fill); #ifdef __GNUC__ __asm__ ( - "movq %7, %0\n" - "movq %7, %1\n" - "movq %7, %2\n" - "movq %7, %3\n" - "movq %7, %4\n" - "movq %7, %5\n" - "movq %7, %6\n" + "movq %7, %0\n" + "movq %7, %1\n" + "movq %7, %2\n" + "movq %7, %3\n" + "movq %7, %4\n" + "movq %7, %5\n" + "movq %7, %6\n" : "=y" (v1), "=y" (v2), "=y" (v3), - "=y" (v4), "=y" (v5), "=y" (v6), "=y" (v7) + "=y" (v4), "=y" (v5), "=y" (v6), "=y" (v7) : "y" (vfill)); #endif @@ -1751,6 +1936,7 @@ pixman_fill_mmx (uint32_t *bits, { int w; uint8_t *d = byte_line; + byte_line += stride; w = byte_width; @@ -1760,7 +1946,7 @@ pixman_fill_mmx (uint32_t *bits, w--; d++; } - + while (w >= 2 && ((unsigned long)d & 3)) { *(uint16_t *)d = xor; @@ -1780,18 +1966,18 @@ pixman_fill_mmx (uint32_t *bits, { #ifdef __GNUC__ __asm__ ( - "movq %1, (%0)\n" - "movq %2, 8(%0)\n" - "movq %3, 16(%0)\n" - "movq %4, 24(%0)\n" - "movq %5, 32(%0)\n" - "movq %6, 40(%0)\n" - "movq %7, 48(%0)\n" - "movq %8, 56(%0)\n" + "movq %1, (%0)\n" + "movq %2, 8(%0)\n" + "movq %3, 16(%0)\n" + "movq %4, 24(%0)\n" + "movq %5, 32(%0)\n" + "movq %6, 40(%0)\n" + "movq %7, 48(%0)\n" + "movq %8, 56(%0)\n" : : "r" (d), - "y" (vfill), "y" (v1), "y" (v2), "y" (v3), - "y" (v4), "y" (v5), "y" (v6), "y" (v7) + "y" (vfill), "y" (v1), "y" (v2), "y" (v3), + "y" (v4), "y" (v5), "y" (v6), "y" (v7) : "memory"); #else *(__m64*) (d + 0) = vfill; @@ -1826,65 +2012,66 @@ pixman_fill_mmx (uint32_t *bits, w--; d++; } - + } - _mm_empty(); + _mm_empty (); return TRUE; } static void -fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t src, srca; - uint32_t *dstLine, *dst; - uint8_t *maskLine, *mask; - int dstStride, maskStride; - uint16_t w; - __m64 vsrc, vsrca; - uint64_t srcsrc; - - CHECKPOINT(); - - fbComposeGetSolid(pSrc, src, pDst->bits.format); +mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint16_t w; + __m64 vsrc, vsrca; + uint64_t srcsrc; + + CHECKPOINT (); + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); srca = src >> 24; - if (srca == 0) + if (src == 0) { - pixman_fill_mmx (pDst->bits.bits, pDst->bits.rowstride, PIXMAN_FORMAT_BPP (pDst->bits.format), - xDst, yDst, width, height, 0); + pixman_fill_mmx (dst_image->bits.bits, dst_image->bits.rowstride, + PIXMAN_FORMAT_BPP (dst_image->bits.format), + dest_x, dest_y, width, height, 0); return; } srcsrc = (uint64_t)src << 32 | src; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); vsrc = load8888 (src); vsrca = expand_alpha (vsrc); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; - CHECKPOINT(); + CHECKPOINT (); while (w && (unsigned long)dst & 7) { @@ -1892,8 +2079,9 @@ fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_implementation_t *imp, if (m) { - __m64 vdest = in(vsrc, expand_alpha_rev (M64(m))); - *dst = store8888(vdest); + __m64 vdest = in (vsrc, expand_alpha_rev (M64 (m))); + + *dst = store8888 (vdest); } else { @@ -1905,7 +2093,7 @@ fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_implementation_t *imp, dst++; } - CHECKPOINT(); + CHECKPOINT (); while (w >= 2) { @@ -1924,10 +2112,10 @@ fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_implementation_t *imp, vdest = *(__m64 *)dst; - dest0 = in(vsrc, expand_alpha_rev (M64(m0))); - dest1 = in(vsrc, expand_alpha_rev (M64(m1))); + dest0 = in (vsrc, expand_alpha_rev (M64 (m0))); + dest1 = in (vsrc, expand_alpha_rev (M64 (m1))); - *(__m64 *)dst = pack8888(dest0, dest1); + *(__m64 *)dst = pack8888 (dest0, dest1); } else { @@ -1939,7 +2127,7 @@ fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_implementation_t *imp, w -= 2; } - CHECKPOINT(); + CHECKPOINT (); while (w) { @@ -1947,9 +2135,10 @@ fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_implementation_t *imp, if (m) { - __m64 vdest = load8888(*dst); - vdest = in(vsrc, expand_alpha_rev (M64(m))); - *dst = store8888(vdest); + __m64 vdest = load8888 (*dst); + + vdest = in (vsrc, expand_alpha_rev (M64 (m))); + *dst = store8888 (vdest); } else { @@ -1962,61 +2151,62 @@ fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_implementation_t *imp, } } - _mm_empty(); + _mm_empty (); } static void -fbCompositeSolidMask_nx8x0565mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t src, srca; - uint16_t *dstLine, *dst; - uint8_t *maskLine, *mask; - int dstStride, maskStride; - uint16_t w; - __m64 vsrc, vsrca, tmp; +mmx_composite_over_n_8_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint16_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint16_t w; + __m64 vsrc, vsrca, tmp; uint64_t srcsrcsrcsrc, src16; - CHECKPOINT(); + CHECKPOINT (); - fbComposeGetSolid(pSrc, src, pDst->bits.format); + src = _pixman_image_get_solid (src_image, dst_image->bits.format); srca = src >> 24; - if (srca == 0) + if (src == 0) return; - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); vsrc = load8888 (src); vsrca = expand_alpha (vsrc); - tmp = pack565(vsrc, _mm_setzero_si64(), 0); - src16 = UINT64(tmp); + tmp = pack_565 (vsrc, _mm_setzero_si64 (), 0); + src16 = UINT64 (tmp); - srcsrcsrcsrc = (uint64_t)src16 << 48 | (uint64_t)src16 << 32 | + srcsrcsrcsrc = + (uint64_t)src16 << 48 | (uint64_t)src16 << 32 | (uint64_t)src16 << 16 | (uint64_t)src16; while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; - CHECKPOINT(); + CHECKPOINT (); while (w && (unsigned long)dst & 7) { @@ -2025,10 +2215,12 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_implementation_t *imp, if (m) { uint64_t d = *dst; - __m64 vd = M64(d); - __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev (M64 (m)), expand565(vd, 0)); - vd = pack565(vdest, _mm_setzero_si64(), 0); - *dst = UINT64(vd); + __m64 vd = M64 (d); + __m64 vdest = in_over ( + vsrc, vsrca, expand_alpha_rev (M64 (m)), expand565 (vd, 0)); + + vd = pack_565 (vdest, _mm_setzero_si64 (), 0); + *dst = UINT64 (vd); } w--; @@ -2036,7 +2228,7 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_implementation_t *imp, dst++; } - CHECKPOINT(); + CHECKPOINT (); while (w >= 4) { @@ -2057,14 +2249,18 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_implementation_t *imp, vdest = *(__m64 *)dst; - vm0 = M64(m0); - vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm0), expand565(vdest, 0)), vdest, 0); - vm1 = M64(m1); - vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm1), expand565(vdest, 1)), vdest, 1); - vm2 = M64(m2); - vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm2), expand565(vdest, 2)), vdest, 2); - vm3 = M64(m3); - vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm3), expand565(vdest, 3)), vdest, 3); + vm0 = M64 (m0); + vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm0), + expand565 (vdest, 0)), vdest, 0); + vm1 = M64 (m1); + vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm1), + expand565 (vdest, 1)), vdest, 1); + vm2 = M64 (m2); + vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm2), + expand565 (vdest, 2)), vdest, 2); + vm3 = M64 (m3); + vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm3), + expand565 (vdest, 3)), vdest, 3); *(__m64 *)dst = vdest; } @@ -2074,7 +2270,7 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_implementation_t *imp, dst += 4; } - CHECKPOINT(); + CHECKPOINT (); while (w) { @@ -2083,10 +2279,11 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_implementation_t *imp, if (m) { uint64_t d = *dst; - __m64 vd = M64(d); - __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev (M64(m)), expand565(vd, 0)); - vd = pack565(vdest, _mm_setzero_si64(), 0); - *dst = UINT64(vd); + __m64 vd = M64 (d); + __m64 vdest = in_over (vsrc, vsrca, expand_alpha_rev (M64 (m)), + expand565 (vd, 0)); + vd = pack_565 (vdest, _mm_setzero_si64 (), 0); + *dst = UINT64 (vd); } w--; @@ -2095,65 +2292,65 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_implementation_t *imp, } } - _mm_empty(); + _mm_empty (); } static void -fbCompositeSrc_8888RevNPx0565mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint16_t *dstLine, *dst; - uint32_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; - - CHECKPOINT(); - - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); +mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint16_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + uint16_t w; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); #if 0 /* FIXME */ - assert (pSrc->pDrawable == pMask->pDrawable); + assert (src_image->drawable == mask_image->drawable); #endif while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; - CHECKPOINT(); + CHECKPOINT (); while (w && (unsigned long)dst & 7) { __m64 vsrc = load8888 (*src); uint64_t d = *dst; - __m64 vdest = expand565 (M64(d), 0); + __m64 vdest = expand565 (M64 (d), 0); - vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0); + vdest = pack_565 (over_rev_non_pre (vsrc, vdest), vdest, 0); - *dst = UINT64(vdest); + *dst = UINT64 (vdest); w--; dst++; src++; } - CHECKPOINT(); + CHECKPOINT (); while (w >= 4) { @@ -2173,10 +2370,10 @@ fbCompositeSrc_8888RevNPx0565mmx (pixman_implementation_t *imp, if ((a0 & a1 & a2 & a3) == 0xFF) { __m64 vdest; - vdest = pack565(invert_colors(load8888(s0)), _mm_setzero_si64(), 0); - vdest = pack565(invert_colors(load8888(s1)), vdest, 1); - vdest = pack565(invert_colors(load8888(s2)), vdest, 2); - vdest = pack565(invert_colors(load8888(s3)), vdest, 3); + vdest = pack_565 (invert_colors (load8888 (s0)), _mm_setzero_si64 (), 0); + vdest = pack_565 (invert_colors (load8888 (s1)), vdest, 1); + vdest = pack_565 (invert_colors (load8888 (s2)), vdest, 2); + vdest = pack_565 (invert_colors (load8888 (s3)), vdest, 3); *(__m64 *)dst = vdest; } @@ -2184,10 +2381,10 @@ fbCompositeSrc_8888RevNPx0565mmx (pixman_implementation_t *imp, { __m64 vdest = *(__m64 *)dst; - vdest = pack565(over_rev_non_pre(load8888(s0), expand565(vdest, 0)), vdest, 0); - vdest = pack565(over_rev_non_pre(load8888(s1), expand565(vdest, 1)), vdest, 1); - vdest = pack565(over_rev_non_pre(load8888(s2), expand565(vdest, 2)), vdest, 2); - vdest = pack565(over_rev_non_pre(load8888(s3), expand565(vdest, 3)), vdest, 3); + vdest = pack_565 (over_rev_non_pre (load8888 (s0), expand565 (vdest, 0)), vdest, 0); + vdest = pack_565 (over_rev_non_pre (load8888 (s1), expand565 (vdest, 1)), vdest, 1); + vdest = pack_565 (over_rev_non_pre (load8888 (s2), expand565 (vdest, 2)), vdest, 2); + vdest = pack_565 (over_rev_non_pre (load8888 (s3), expand565 (vdest, 3)), vdest, 3); *(__m64 *)dst = vdest; } @@ -2197,17 +2394,17 @@ fbCompositeSrc_8888RevNPx0565mmx (pixman_implementation_t *imp, src += 4; } - CHECKPOINT(); + CHECKPOINT (); while (w) { __m64 vsrc = load8888 (*src); uint64_t d = *dst; - __m64 vdest = expand565 (M64(d), 0); + __m64 vdest = expand565 (M64 (d), 0); - vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0); + vdest = pack_565 (over_rev_non_pre (vsrc, vdest), vdest, 0); - *dst = UINT64(vdest); + *dst = UINT64 (vdest); w--; dst++; @@ -2215,47 +2412,45 @@ fbCompositeSrc_8888RevNPx0565mmx (pixman_implementation_t *imp, } } - _mm_empty(); + _mm_empty (); } -/* "8888RevNP" is GdkPixbuf's format: ABGR, non premultiplied */ - static void -fbCompositeSrc_8888RevNPx8888mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; - - CHECKPOINT(); - - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); +mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + uint16_t w; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); #if 0 /* FIXME */ - assert (pSrc->pDrawable == pMask->pDrawable); + assert (src_image->drawable == mask_image->drawable); #endif while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w && (unsigned long)dst & 7) @@ -2284,8 +2479,8 @@ fbCompositeSrc_8888RevNPx8888mmx (pixman_implementation_t *imp, if ((a0 & a1) == 0xFF) { - d0 = invert_colors(load8888(s0)); - d1 = invert_colors(load8888(s1)); + d0 = invert_colors (load8888 (s0)); + d1 = invert_colors (load8888 (s1)); *(__m64 *)dst = pack8888 (d0, d1); } @@ -2293,8 +2488,8 @@ fbCompositeSrc_8888RevNPx8888mmx (pixman_implementation_t *imp, { __m64 vdest = *(__m64 *)dst; - d0 = over_rev_non_pre (load8888(s0), expand8888 (vdest, 0)); - d1 = over_rev_non_pre (load8888(s1), expand8888 (vdest, 1)); + d0 = over_rev_non_pre (load8888 (s0), expand8888 (vdest, 0)); + d1 = over_rev_non_pre (load8888 (s1), expand8888 (vdest, 1)); *(__m64 *)dst = pack8888 (d0, d1); } @@ -2317,40 +2512,40 @@ fbCompositeSrc_8888RevNPx8888mmx (pixman_implementation_t *imp, } } - _mm_empty(); + _mm_empty (); } static void -fbCompositeSolidMask_nx8888x0565Cmmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t src, srca; - uint16_t *dstLine; - uint32_t *maskLine; - int dstStride, maskStride; - __m64 vsrc, vsrca; - - CHECKPOINT(); - - fbComposeGetSolid(pSrc, src, pDst->bits.format); +mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint16_t *dst_line; + uint32_t *mask_line; + int dst_stride, mask_stride; + __m64 vsrc, vsrca; + + CHECKPOINT (); + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); srca = src >> 24; - if (srca == 0) + if (src == 0) return; - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint32_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); vsrc = load8888 (src); vsrca = expand_alpha (vsrc); @@ -2358,8 +2553,8 @@ fbCompositeSolidMask_nx8888x0565Cmmx (pixman_implementation_t *imp, while (height--) { int twidth = width; - uint32_t *p = (uint32_t *)maskLine; - uint16_t *q = (uint16_t *)dstLine; + uint32_t *p = (uint32_t *)mask_line; + uint16_t *q = (uint16_t *)dst_line; while (twidth && ((unsigned long)q & 7)) { @@ -2368,9 +2563,9 @@ fbCompositeSolidMask_nx8888x0565Cmmx (pixman_implementation_t *imp, if (m) { uint64_t d = *q; - __m64 vdest = expand565 (M64(d), 0); - vdest = pack565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0); - *q = UINT64(vdest); + __m64 vdest = expand565 (M64 (d), 0); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0); + *q = UINT64 (vdest); } twidth--; @@ -2391,10 +2586,10 @@ fbCompositeSolidMask_nx8888x0565Cmmx (pixman_implementation_t *imp, { __m64 vdest = *(__m64 *)q; - vdest = pack565(in_over(vsrc, vsrca, load8888(m0), expand565(vdest, 0)), vdest, 0); - vdest = pack565(in_over(vsrc, vsrca, load8888(m1), expand565(vdest, 1)), vdest, 1); - vdest = pack565(in_over(vsrc, vsrca, load8888(m2), expand565(vdest, 2)), vdest, 2); - vdest = pack565(in_over(vsrc, vsrca, load8888(m3), expand565(vdest, 3)), vdest, 3); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m0), expand565 (vdest, 0)), vdest, 0); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m1), expand565 (vdest, 1)), vdest, 1); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m2), expand565 (vdest, 2)), vdest, 2); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m3), expand565 (vdest, 3)), vdest, 3); *(__m64 *)q = vdest; } @@ -2411,9 +2606,9 @@ fbCompositeSolidMask_nx8888x0565Cmmx (pixman_implementation_t *imp, if (m) { uint64_t d = *q; - __m64 vdest = expand565(M64(d), 0); - vdest = pack565 (in_over(vsrc, vsrca, load8888(m), vdest), vdest, 0); - *q = UINT64(vdest); + __m64 vdest = expand565 (M64 (d), 0); + vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0); + *q = UINT64 (vdest); } twidth--; @@ -2421,58 +2616,56 @@ fbCompositeSolidMask_nx8888x0565Cmmx (pixman_implementation_t *imp, q++; } - maskLine += maskStride; - dstLine += dstStride; + mask_line += mask_stride; + dst_line += dst_stride; } _mm_empty (); } static void -fbCompositeIn_nx8x8mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint8_t *dstLine, *dst; - uint8_t *maskLine, *mask; - int dstStride, maskStride; - uint16_t w; - uint32_t src; - uint8_t sa; - __m64 vsrc, vsrca; - - fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); - - fbComposeGetSolid(pSrc, src, pDst->bits.format); +mmx_composite_in_n_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint16_t w; + uint32_t src; + uint8_t sa; + __m64 vsrc, vsrca; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); sa = src >> 24; - if (sa == 0) - return; - vsrc = load8888(src); - vsrca = expand_alpha(vsrc); + vsrc = load8888 (src); + vsrca = expand_alpha (vsrc); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; - if ((((unsigned long)pDst & 3) == 0) && - (((unsigned long)pSrc & 3) == 0)) + if ((((unsigned long)dst_image & 3) == 0) && + (((unsigned long)src_image & 3) == 0)) { while (w >= 4) { @@ -2495,57 +2688,56 @@ fbCompositeIn_nx8x8mmx (pixman_implementation_t *imp, while (w--) { - uint16_t tmp; - uint8_t a; - uint32_t m, d; - uint32_t r; + uint16_t tmp; + uint8_t a; + uint32_t m, d; a = *mask++; d = *dst; - m = FbInU (sa, 0, a, tmp); - r = FbInU (m, 0, d, tmp); + m = MUL_UN8 (sa, a, tmp); + d = MUL_UN8 (m, d, tmp); - *dst++ = r; + *dst++ = d; } } - _mm_empty(); + _mm_empty (); } static void -fbCompositeIn_8x8mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint8_t *dstLine, *dst; - uint8_t *srcLine, *src; - int srcStride, dstStride; - uint16_t w; - - fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1); +mmx_composite_in_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int src_stride, dst_stride; + uint16_t w; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; - if ((((unsigned long)pDst & 3) == 0) && - (((unsigned long)pSrc & 3) == 0)) + if ((((unsigned long)dst_image & 3) == 0) && + (((unsigned long)src_image & 3) == 0)) { while (w >= 4) { @@ -2568,7 +2760,7 @@ fbCompositeIn_8x8mmx (pixman_implementation_t *imp, s = *src; d = *dst; - *dst = FbInU (s, 0, d, tmp); + *dst = MUL_UN8 (s, d, tmp); src++; dst++; @@ -2579,50 +2771,51 @@ fbCompositeIn_8x8mmx (pixman_implementation_t *imp, } static void -fbCompositeSrcAdd_8888x8x8mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint8_t *dstLine, *dst; - uint8_t *maskLine, *mask; - int dstStride, maskStride; - uint16_t w; - uint32_t src; - uint8_t sa; - __m64 vsrc, vsrca; - - fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); - - fbComposeGetSolid(pSrc, src, pDst->bits.format); +mmx_composite_add_n_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint16_t w; + uint32_t src; + uint8_t sa; + __m64 vsrc, vsrca; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); sa = src >> 24; - if (sa == 0) + + if (src == 0) return; - vsrc = load8888(src); - vsrca = expand_alpha(vsrc); + vsrc = load8888 (src); + vsrca = expand_alpha (vsrc); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; - if ((((unsigned long)pMask & 3) == 0) && - (((unsigned long)pDst & 3) == 0)) + if ((((unsigned long)mask_image & 3) == 0) && + (((unsigned long)dst_image & 3) == 0)) { while (w >= 4) { @@ -2639,57 +2832,57 @@ fbCompositeSrcAdd_8888x8x8mmx (pixman_implementation_t *imp, while (w--) { - uint16_t tmp; - uint16_t a; - uint32_t m, d; - uint32_t r; + uint16_t tmp; + uint16_t a; + uint32_t m, d; + uint32_t r; a = *mask++; d = *dst; - m = FbInU (sa, 0, a, tmp); - r = FbAdd (m, d, 0, tmp); + m = MUL_UN8 (sa, a, tmp); + r = ADD_UN8 (m, d, tmp); *dst++ = r; } } - _mm_empty(); + _mm_empty (); } static void -fbCompositeSrcAdd_8000x8000mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint8_t *dstLine, *dst; - uint8_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; - uint8_t s, d; - uint16_t t; - - CHECKPOINT(); - - fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1); - fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); +mmx_composite_add_8000_8000 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + uint16_t w; + uint8_t s, d; + uint16_t t; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w && (unsigned long)dst & 7) @@ -2707,7 +2900,7 @@ fbCompositeSrcAdd_8000x8000mmx (pixman_implementation_t *imp, while (w >= 8) { - *(__m64*)dst = _mm_adds_pu8(*(__m64*)src, *(__m64*)dst); + *(__m64*)dst = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst); dst += 8; src += 8; w -= 8; @@ -2727,47 +2920,47 @@ fbCompositeSrcAdd_8000x8000mmx (pixman_implementation_t *imp, } } - _mm_empty(); + _mm_empty (); } static void -fbCompositeSrcAdd_8888x8888mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) +mmx_composite_add_8888_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { __m64 dst64; - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + uint16_t w; - CHECKPOINT(); + CHECKPOINT (); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w && (unsigned long)dst & 7) { - *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src), - _mm_cvtsi32_si64(*dst))); + *dst = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (*src), + _mm_cvtsi32_si64 (*dst))); dst++; src++; w--; @@ -2775,8 +2968,8 @@ fbCompositeSrcAdd_8888x8888mmx (pixman_implementation_t *imp, while (w >= 2) { - dst64 = _mm_adds_pu8(*(__m64*)src, *(__m64*)dst); - *(uint64_t*)dst = UINT64(dst64); + dst64 = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst); + *(uint64_t*)dst = UINT64 (dst64); dst += 2; src += 2; w -= 2; @@ -2784,29 +2977,32 @@ fbCompositeSrcAdd_8888x8888mmx (pixman_implementation_t *imp, if (w) { - *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src), - _mm_cvtsi32_si64(*dst))); + *dst = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (*src), + _mm_cvtsi32_si64 (*dst))); } } - _mm_empty(); + _mm_empty (); } static pixman_bool_t pixman_blt_mmx (uint32_t *src_bits, - uint32_t *dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, int src_y, - int dst_x, int dst_y, - int width, int height) -{ - uint8_t * src_bytes; - uint8_t * dst_bytes; - int byte_width; + uint32_t *dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) +{ + uint8_t * src_bytes; + uint8_t * dst_bytes; + int byte_width; if (src_bpp != dst_bpp) return FALSE; @@ -2820,7 +3016,9 @@ pixman_blt_mmx (uint32_t *src_bits, byte_width = 2 * width; src_stride *= 2; dst_stride *= 2; - } else if (src_bpp == 32) { + } + else if (src_bpp == 32) + { src_stride = src_stride * (int) sizeof (uint32_t) / 4; dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); @@ -2828,7 +3026,9 @@ pixman_blt_mmx (uint32_t *src_bits, byte_width = 4 * width; src_stride *= 4; dst_stride *= 4; - } else { + } + else + { return FALSE; } @@ -2862,28 +3062,28 @@ pixman_blt_mmx (uint32_t *src_bits, { #if defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) __asm__ ( - "movq (%1), %%mm0\n" - "movq 8(%1), %%mm1\n" - "movq 16(%1), %%mm2\n" - "movq 24(%1), %%mm3\n" - "movq 32(%1), %%mm4\n" - "movq 40(%1), %%mm5\n" - "movq 48(%1), %%mm6\n" - "movq 56(%1), %%mm7\n" - - "movq %%mm0, (%0)\n" - "movq %%mm1, 8(%0)\n" - "movq %%mm2, 16(%0)\n" - "movq %%mm3, 24(%0)\n" - "movq %%mm4, 32(%0)\n" - "movq %%mm5, 40(%0)\n" - "movq %%mm6, 48(%0)\n" - "movq %%mm7, 56(%0)\n" + "movq (%1), %%mm0\n" + "movq 8(%1), %%mm1\n" + "movq 16(%1), %%mm2\n" + "movq 24(%1), %%mm3\n" + "movq 32(%1), %%mm4\n" + "movq 40(%1), %%mm5\n" + "movq 48(%1), %%mm6\n" + "movq 56(%1), %%mm7\n" + + "movq %%mm0, (%0)\n" + "movq %%mm1, 8(%0)\n" + "movq %%mm2, 16(%0)\n" + "movq %%mm3, 24(%0)\n" + "movq %%mm4, 32(%0)\n" + "movq %%mm5, 40(%0)\n" + "movq %%mm6, 48(%0)\n" + "movq %%mm7, 56(%0)\n" : : "r" (d), "r" (s) : "memory", - "%mm0", "%mm1", "%mm2", "%mm3", - "%mm4", "%mm5", "%mm6", "%mm7"); + "%mm0", "%mm1", "%mm2", "%mm3", + "%mm4", "%mm5", "%mm6", "%mm7"); #else __m64 v0 = *(__m64 *)(s + 0); __m64 v1 = *(__m64 *)(s + 8); @@ -2924,68 +3124,68 @@ pixman_blt_mmx (uint32_t *src_bits, } } - _mm_empty(); + _mm_empty (); return TRUE; } static void -fbCompositeCopyAreammx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - pixman_blt_mmx (pSrc->bits.bits, - pDst->bits.bits, - pSrc->bits.rowstride, - pDst->bits.rowstride, - PIXMAN_FORMAT_BPP (pSrc->bits.format), - PIXMAN_FORMAT_BPP (pDst->bits.format), - xSrc, ySrc, xDst, yDst, width, height); +mmx_composite_copy_area (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + pixman_blt_mmx (src_image->bits.bits, + dst_image->bits.bits, + src_image->bits.rowstride, + dst_image->bits.rowstride, + PIXMAN_FORMAT_BPP (src_image->bits.format), + PIXMAN_FORMAT_BPP (dst_image->bits.format), + src_x, src_y, dest_x, dest_y, width, height); } static void -fbCompositeOver_x888x8x8888mmx (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t *src, *srcLine; - uint32_t *dst, *dstLine; - uint8_t *mask, *maskLine; - int srcStride, maskStride, dstStride; +mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *src, *src_line; + uint32_t *dst, *dst_line; + uint8_t *mask, *mask_line; + int src_stride, mask_stride, dst_stride; uint16_t w; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { - src = srcLine; - srcLine += srcStride; - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; @@ -2998,12 +3198,14 @@ fbCompositeOver_x888x8x8888mmx (pixman_implementation_t *imp, __m64 s = load8888 (*src | 0xff000000); if (m == 0xff) + { *dst = store8888 (s); + } else { __m64 sa = expand_alpha (s); - __m64 vm = expand_alpha_rev (M64(m)); - __m64 vdest = in_over(s, sa, vm, load8888 (*dst)); + __m64 vm = expand_alpha_rev (M64 (m)); + __m64 vdest = in_over (s, sa, vm, load8888 (*dst)); *dst = store8888 (vdest); } @@ -3015,137 +3217,142 @@ fbCompositeOver_x888x8x8888mmx (pixman_implementation_t *imp, } } - _mm_empty(); -} - -static const FastPathInfo mmx_fast_paths[] = -{ - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, fbCompositeSolidMask_nx8x0565mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, fbCompositeSolidMask_nx8x0565mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8888x8888Cmmx, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8888x8888Cmmx, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, fbCompositeSolidMask_nx8888x0565Cmmx, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8888x8888Cmmx, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8888x8888Cmmx, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, fbCompositeSolidMask_nx8888x0565Cmmx, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, fbCompositeSrc_8888RevNPx0565mmx, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_r5g6b5, fbCompositeSrc_8888RevNPx0565mmx, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8b8g8r8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_x8b8g8r8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_b5g6r5, fbCompositeSrc_8888RevNPx0565mmx, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, fbCompositeSrc_8888RevNPx0565mmx, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSrc_x888xnx8888mmx, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSrc_x888xnx8888mmx, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSrc_x888xnx8888mmx, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSrc_x888xnx8888mmx, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888mmx, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888mmx, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8x8888mmx, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8x8888mmx, NEED_SOLID_MASK }, + _mm_empty (); +} + +static const pixman_fast_path_t mmx_fast_paths[] = +{ + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, mmx_composite_over_n_8_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, mmx_composite_over_n_8_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, mmx_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, mmx_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, mmx_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, mmx_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, mmx_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, mmx_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, mmx_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, mmx_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, mmx_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, mmx_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, mmx_composite_over_pixbuf_8888, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_a8r8g8b8, mmx_composite_over_pixbuf_8888, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, mmx_composite_over_pixbuf_8888, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_x8r8g8b8, mmx_composite_over_pixbuf_8888, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, mmx_composite_over_pixbuf_0565, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_r5g6b5, mmx_composite_over_pixbuf_0565, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8b8g8r8, mmx_composite_over_pixbuf_8888, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, mmx_composite_over_pixbuf_8888, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_x8b8g8r8, mmx_composite_over_pixbuf_8888, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, mmx_composite_over_pixbuf_8888, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_b5g6r5, mmx_composite_over_pixbuf_0565, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, mmx_composite_over_pixbuf_0565, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, mmx_composite_over_x888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, mmx_composite_over_x888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, mmx_composite_over_x888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, mmx_composite_over_x888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, mmx_composite_over_8888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, mmx_composite_over_8888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, mmx_composite_over_8888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, mmx_composite_over_8888_n_8888, NEED_SOLID_MASK }, #if 0 /* FIXME: This code is commented out since it's apparently not actually faster than the generic code. */ - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeOver_x888x8x8888mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeOver_x888x8x8888mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8b8r8g8, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeOver_x888x8x8888mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8b8r8g8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeOver_x888x8x8888mmx, 0 }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, mmx_composite_over_x888_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, mmx_composite_over_x888_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_x8b8r8g8, PIXMAN_a8, PIXMAN_x8b8g8r8, mmx_composite_over_x888_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_x8b8r8g8, PIXMAN_a8, PIXMAN_a8r8g8b8, mmx_composite_over_x888_8_8888, 0 }, #endif - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSolid_nx8888mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSolid_nx8888mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSolid_nx0565mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeCopyAreammx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeCopyAreammx, 0 }, - - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_8888x0565mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888mmx, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_8888x0565mmx, 0 }, - - { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrcAdd_8888x8888mmx, 0 }, - { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSrcAdd_8888x8888mmx, 0 }, - { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcAdd_8000x8000mmx, 0 }, - { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fbCompositeSrcAdd_8888x8x8mmx, 0 }, - - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSolidMaskSrc_nx8x8888mmx, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSolidMaskSrc_nx8x8888mmx, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSolidMaskSrc_nx8x8888mmx, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSolidMaskSrc_nx8x8888mmx, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeCopyAreammx, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeCopyAreammx, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeCopyAreammx, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeCopyAreammx, 0 }, - { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeCopyAreammx, 0 }, - { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeCopyAreammx, 0 }, - { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeCopyAreammx, 0 }, - { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeCopyAreammx, 0 }, - - { PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeIn_8x8mmx, 0 }, - { PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fbCompositeIn_nx8x8mmx, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_a8r8g8b8, mmx_composite_over_n_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_x8r8g8b8, mmx_composite_over_n_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, mmx_composite_over_n_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, mmx_composite_copy_area, 0 }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, mmx_composite_copy_area, 0 }, + + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, mmx_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, mmx_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, mmx_composite_over_8888_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, mmx_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, mmx_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, mmx_composite_over_8888_0565, 0 }, + + { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, mmx_composite_add_8888_8888, 0 }, + { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, mmx_composite_add_8888_8888, 0 }, + { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, mmx_composite_add_8000_8000, 0 }, + { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, mmx_composite_add_n_8_8, 0 }, + + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, mmx_composite_src_n_8_8888, 0 }, + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, mmx_composite_src_n_8_8888, 0 }, + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, mmx_composite_src_n_8_8888, 0 }, + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, mmx_composite_src_n_8_8888, 0 }, + { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, mmx_composite_copy_area, 0 }, + { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, mmx_composite_copy_area, 0 }, + { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, mmx_composite_copy_area, 0 }, + { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, mmx_composite_copy_area, 0 }, + { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, mmx_composite_copy_area, 0 }, + { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, mmx_composite_copy_area, 0 }, + { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, mmx_composite_copy_area, 0 }, + { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, mmx_composite_copy_area, 0 }, + + { PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, mmx_composite_in_8_8, 0 }, + { PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, mmx_composite_in_n_8_8, 0 }, { PIXMAN_OP_NONE }, }; static void mmx_composite (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t *src, - pixman_image_t *mask, - pixman_image_t *dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { if (_pixman_run_fast_path (mmx_fast_paths, imp, - op, src, mask, dest, - src_x, src_y, - mask_x, mask_y, - dest_x, dest_y, - width, height)) + op, src, mask, dest, + src_x, src_y, + mask_x, mask_y, + dest_x, dest_y, + width, height)) + { return; + } _pixman_implementation_composite (imp->delegate, - op, src, mask, dest, src_x, src_y, - mask_x, mask_y, dest_x, dest_y, - width, height); + op, src, mask, dest, src_x, src_y, + mask_x, mask_y, dest_x, dest_y, + width, height); } static pixman_bool_t mmx_blt (pixman_implementation_t *imp, - uint32_t *src_bits, - uint32_t *dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, int src_y, - int dst_x, int dst_y, - int width, int height) + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) { if (!pixman_blt_mmx ( - src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, - src_x, src_y, dst_x, dst_y, width, height)) + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dst_x, dst_y, width, height)) { return _pixman_implementation_blt ( - imp->delegate, - src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, - src_x, src_y, dst_x, dst_y, width, height); + imp->delegate, + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dst_x, dst_y, width, height); } return TRUE; @@ -3153,58 +3360,58 @@ mmx_blt (pixman_implementation_t *imp, static pixman_bool_t mmx_fill (pixman_implementation_t *imp, - uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) { if (!pixman_fill_mmx (bits, stride, bpp, x, y, width, height, xor)) { return _pixman_implementation_fill ( - imp->delegate, bits, stride, bpp, x, y, width, height, xor); + imp->delegate, bits, stride, bpp, x, y, width, height, xor); } return TRUE; } pixman_implementation_t * -_pixman_implementation_create_mmx (pixman_implementation_t *toplevel) -{ - pixman_implementation_t *general = _pixman_implementation_create_fast_path (NULL); - pixman_implementation_t *imp = _pixman_implementation_create (toplevel, general); - - imp->combine_32[PIXMAN_OP_OVER] = mmxCombineOverU; - imp->combine_32[PIXMAN_OP_OVER_REVERSE] = mmxCombineOverReverseU; - imp->combine_32[PIXMAN_OP_IN] = mmxCombineInU; - imp->combine_32[PIXMAN_OP_IN_REVERSE] = mmxCombineInReverseU; - imp->combine_32[PIXMAN_OP_OUT] = mmxCombineOutU; - imp->combine_32[PIXMAN_OP_OUT_REVERSE] = mmxCombineOutReverseU; - imp->combine_32[PIXMAN_OP_ATOP] = mmxCombineAtopU; - imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = mmxCombineAtopReverseU; - imp->combine_32[PIXMAN_OP_XOR] = mmxCombineXorU; - imp->combine_32[PIXMAN_OP_ADD] = mmxCombineAddU; - imp->combine_32[PIXMAN_OP_SATURATE] = mmxCombineSaturateU; - - imp->combine_32_ca[PIXMAN_OP_SRC] = mmxCombineSrcC; - imp->combine_32_ca[PIXMAN_OP_OVER] = mmxCombineOverC; - imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = mmxCombineOverReverseC; - imp->combine_32_ca[PIXMAN_OP_IN] = mmxCombineInC; - imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = mmxCombineInReverseC; - imp->combine_32_ca[PIXMAN_OP_OUT] = mmxCombineOutC; - imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = mmxCombineOutReverseC; - imp->combine_32_ca[PIXMAN_OP_ATOP] = mmxCombineAtopC; - imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = mmxCombineAtopReverseC; - imp->combine_32_ca[PIXMAN_OP_XOR] = mmxCombineXorC; - imp->combine_32_ca[PIXMAN_OP_ADD] = mmxCombineAddC; +_pixman_implementation_create_mmx (void) +{ + pixman_implementation_t *general = _pixman_implementation_create_fast_path (); + pixman_implementation_t *imp = _pixman_implementation_create (general); + + imp->combine_32[PIXMAN_OP_OVER] = mmx_combine_over_u; + imp->combine_32[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_u; + imp->combine_32[PIXMAN_OP_IN] = mmx_combine_in_u; + imp->combine_32[PIXMAN_OP_IN_REVERSE] = mmx_combine_in_reverse_u; + imp->combine_32[PIXMAN_OP_OUT] = mmx_combine_out_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = mmx_combine_out_reverse_u; + imp->combine_32[PIXMAN_OP_ATOP] = mmx_combine_atop_u; + imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = mmx_combine_atop_reverse_u; + imp->combine_32[PIXMAN_OP_XOR] = mmx_combine_xor_u; + imp->combine_32[PIXMAN_OP_ADD] = mmx_combine_add_u; + imp->combine_32[PIXMAN_OP_SATURATE] = mmx_combine_saturate_u; + + imp->combine_32_ca[PIXMAN_OP_SRC] = mmx_combine_src_ca; + imp->combine_32_ca[PIXMAN_OP_OVER] = mmx_combine_over_ca; + imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_IN] = mmx_combine_in_ca; + imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = mmx_combine_in_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_OUT] = mmx_combine_out_ca; + imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = mmx_combine_out_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP] = mmx_combine_atop_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = mmx_combine_atop_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_XOR] = mmx_combine_xor_ca; + imp->combine_32_ca[PIXMAN_OP_ADD] = mmx_combine_add_ca; imp->composite = mmx_composite; imp->blt = mmx_blt; imp->fill = mmx_fill; - + return imp; } diff --git a/lib/pixman/pixman/pixman-pict.c b/lib/pixman/pixman/pixman-pict.c deleted file mode 100644 index 17bd566e2..000000000 --- a/lib/pixman/pixman/pixman-pict.c +++ /dev/null @@ -1,175 +0,0 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Author: Keith Packard, SuSE, Inc. - */ - -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif -#include "pixman-private.h" - -/* - * Operator optimizations based on source or destination opacity - */ -typedef struct -{ - pixman_op_t op; - pixman_op_t opSrcDstOpaque; - pixman_op_t opSrcOpaque; - pixman_op_t opDstOpaque; -} OptimizedOperatorInfo; - -static const OptimizedOperatorInfo optimized_operators[] = -{ - /* Input Operator SRC&DST Opaque SRC Opaque DST Opaque */ - { PIXMAN_OP_OVER, PIXMAN_OP_SRC, PIXMAN_OP_SRC, PIXMAN_OP_OVER }, - { PIXMAN_OP_OVER_REVERSE, PIXMAN_OP_DST, PIXMAN_OP_OVER_REVERSE, PIXMAN_OP_DST }, - { PIXMAN_OP_IN, PIXMAN_OP_SRC, PIXMAN_OP_IN, PIXMAN_OP_SRC }, - { PIXMAN_OP_IN_REVERSE, PIXMAN_OP_DST, PIXMAN_OP_DST, PIXMAN_OP_IN_REVERSE }, - { PIXMAN_OP_OUT, PIXMAN_OP_CLEAR, PIXMAN_OP_OUT, PIXMAN_OP_CLEAR }, - { PIXMAN_OP_OUT_REVERSE, PIXMAN_OP_CLEAR, PIXMAN_OP_CLEAR, PIXMAN_OP_OUT_REVERSE }, - { PIXMAN_OP_ATOP, PIXMAN_OP_SRC, PIXMAN_OP_IN, PIXMAN_OP_OVER }, - { PIXMAN_OP_ATOP_REVERSE, PIXMAN_OP_DST, PIXMAN_OP_OVER_REVERSE, PIXMAN_OP_IN_REVERSE }, - { PIXMAN_OP_XOR, PIXMAN_OP_CLEAR, PIXMAN_OP_OUT, PIXMAN_OP_OUT_REVERSE }, - { PIXMAN_OP_SATURATE, PIXMAN_OP_DST, PIXMAN_OP_OVER_REVERSE, PIXMAN_OP_DST }, - { PIXMAN_OP_NONE } -}; - -/* - * Check if the current operator could be optimized - */ -static const OptimizedOperatorInfo* -pixman_operator_can_be_optimized(pixman_op_t op) -{ - const OptimizedOperatorInfo *info; - - for (info = optimized_operators; info->op != PIXMAN_OP_NONE; info++) - { - if(info->op == op) - return info; - } - return NULL; -} - -/* - * Optimize the current operator based on opacity of source or destination - * The output operator should be mathematically equivalent to the source. - */ -static pixman_op_t -pixman_optimize_operator(pixman_op_t op, pixman_image_t *pSrc, pixman_image_t *pMask, pixman_image_t *pDst ) -{ - pixman_bool_t is_source_opaque; - pixman_bool_t is_dest_opaque; - const OptimizedOperatorInfo *info = pixman_operator_can_be_optimized(op); - - if(!info || pMask) - return op; - - is_source_opaque = pixman_image_is_opaque(pSrc); - is_dest_opaque = pixman_image_is_opaque(pDst); - - if(is_source_opaque == FALSE && is_dest_opaque == FALSE) - return op; - - if(is_source_opaque && is_dest_opaque) - return info->opSrcDstOpaque; - else if(is_source_opaque) - return info->opSrcOpaque; - else if(is_dest_opaque) - return info->opDstOpaque; - - return op; - -} - -static pixman_implementation_t *imp; - -PIXMAN_EXPORT void -pixman_image_composite (pixman_op_t op, - pixman_image_t * src, - pixman_image_t * mask, - pixman_image_t * dest, - int16_t src_x, - int16_t src_y, - int16_t mask_x, - int16_t mask_y, - int16_t dest_x, - int16_t dest_y, - uint16_t width, - uint16_t height) -{ - /* - * Check if we can replace our operator by a simpler one if the src or dest are opaque - * The output operator should be mathematically equivalent to the source. - */ - op = pixman_optimize_operator(op, src, mask, dest); - if(op == PIXMAN_OP_DST) - return; - - if (!imp) - imp = _pixman_choose_implementation(); - - _pixman_implementation_composite (imp, op, - src, mask, dest, - src_x, src_y, - mask_x, mask_y, - dest_x, dest_y, - width, height); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_blt (uint32_t *src_bits, - uint32_t *dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, int src_y, - int dst_x, int dst_y, - int width, int height) -{ - if (!imp) - imp = _pixman_choose_implementation(); - - return _pixman_implementation_blt (imp, src_bits, dst_bits, src_stride, dst_stride, - src_bpp, dst_bpp, - src_x, src_y, - dst_x, dst_y, - width, height); -} - -PIXMAN_EXPORT pixman_bool_t -pixman_fill (uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) -{ - if (!imp) - imp = _pixman_choose_implementation(); - - return _pixman_implementation_fill (imp, bits, stride, bpp, x, y, width, height, xor); -} diff --git a/lib/pixman/pixman/pixman-private.h b/lib/pixman/pixman/pixman-private.h index 9e770f6f5..ff7a65f88 100644 --- a/lib/pixman/pixman/pixman-private.h +++ b/lib/pixman/pixman/pixman-private.h @@ -9,133 +9,11 @@ #include <time.h> #include <assert.h> -#ifndef FALSE -#define FALSE 0 -#endif - -#ifndef TRUE -#define TRUE 1 -#endif - -#define MSBFirst 0 -#define LSBFirst 1 - -#ifdef WORDS_BIGENDIAN -# define IMAGE_BYTE_ORDER MSBFirst -# define BITMAP_BIT_ORDER MSBFirst -#else -# define IMAGE_BYTE_ORDER LSBFirst -# define BITMAP_BIT_ORDER LSBFirst -#endif - -#undef DEBUG -#define DEBUG 0 - -#if defined (__GNUC__) -# define FUNC ((const char*) (__PRETTY_FUNCTION__)) -#elif defined (__sun) || (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) -# define FUNC ((const char*) (__func__)) -#else -# define FUNC ((const char*) ("???")) -#endif - -#ifndef INT16_MIN -# define INT16_MIN (-32767-1) -#endif - -#ifndef INT16_MAX -# define INT16_MAX (32767) -#endif - -#ifndef INT32_MIN -# define INT32_MIN (-2147483647-1) -#endif - -#ifndef INT32_MAX -# define INT32_MAX (2147483647) -#endif - -#ifndef UINT32_MIN -# define UINT32_MIN (0) -#endif - -#ifndef UINT32_MAX -# define UINT32_MAX (4294967295U) -#endif - -#ifndef M_PI -# define M_PI 3.14159265358979323846 -#endif - -#ifdef _MSC_VER -/* 'inline' is available only in C++ in MSVC */ -# define inline __inline -# define force_inline __forceinline -#elif defined __GNUC__ || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)) -# define inline __inline__ -# define force_inline __inline__ __attribute__ ((__always_inline__)) -#else -# ifndef force_inline -# define force_inline inline -# endif -#endif - -#define FB_SHIFT 5 -#define FB_UNIT (1 << FB_SHIFT) -#define FB_HALFUNIT (1 << (FB_SHIFT-1)) -#define FB_MASK (FB_UNIT - 1) -#define FB_ALLONES ((uint32_t) -1) - -/* Memory allocation helpers */ -void *pixman_malloc_ab (unsigned int n, unsigned int b); -void *pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c); -pixman_bool_t pixman_multiply_overflows_int (unsigned int a, unsigned int b); -pixman_bool_t pixman_addition_overflows_int (unsigned int a, unsigned int b); - -#if DEBUG - -#define return_if_fail(expr) \ - do \ - { \ - if (!(expr)) \ - { \ - fprintf(stderr, "In %s: %s failed\n", FUNC, #expr); \ - return; \ - } \ - } \ - while (0) - -#define return_val_if_fail(expr, retval) \ - do \ - { \ - if (!(expr)) \ - { \ - fprintf(stderr, "In %s: %s failed\n", FUNC, #expr); \ - return (retval); \ - } \ - } \ - while (0) - -#else - -#define return_if_fail(expr) \ - do \ - { \ - if (!(expr)) \ - return; \ - } \ - while (0) - -#define return_val_if_fail(expr, retval) \ - do \ - { \ - if (!(expr)) \ - return (retval); \ - } \ - while (0) - -#endif +#include "pixman-compiler.h" +/* + * Images + */ typedef struct image_common image_common_t; typedef struct source_image source_image_t; typedef struct solid_fill solid_fill_t; @@ -147,86 +25,28 @@ typedef struct conical_gradient conical_gradient_t; typedef struct radial_gradient radial_gradient_t; typedef struct bits_image bits_image_t; typedef struct circle circle_t; -typedef struct point point_t; -/* FIXME - the types and structures below should be give proper names - */ +typedef void (*fetch_scanline_t) (pixman_image_t *image, + int x, + int y, + int width, + uint32_t *buffer, + const uint32_t *mask, + uint32_t mask_bits); + +typedef uint32_t (*fetch_pixel_32_t) (bits_image_t *image, + int x, + int y); -#define FASTCALL -typedef FASTCALL void (*CombineFunc32) (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width); -typedef FASTCALL void (*fetchProc32)(bits_image_t *pict, int x, int y, int width, - uint32_t *buffer); -typedef FASTCALL uint32_t (*fetchPixelProc32)(bits_image_t *pict, int offset, int line); -typedef FASTCALL void (*storeProc32)(pixman_image_t *, uint32_t *bits, - const uint32_t *values, int x, int width, - const pixman_indexed_t *); - -typedef FASTCALL void (*CombineFunc64) (uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width); -typedef FASTCALL void (*fetchProc64)(bits_image_t *pict, int x, int y, int width, - uint64_t *buffer); -typedef FASTCALL uint64_t (*fetchPixelProc64)(bits_image_t *pict, int offset, int line); -typedef FASTCALL void (*storeProc64)(pixman_image_t *, uint32_t *bits, - const uint64_t *values, int x, int width, - const pixman_indexed_t *); - -typedef struct _FbComposeData { - uint8_t op; - pixman_image_t *src; - pixman_image_t *mask; - pixman_image_t *dest; - int16_t xSrc; - int16_t ySrc; - int16_t xMask; - int16_t yMask; - int16_t xDest; - int16_t yDest; - uint16_t width; - uint16_t height; -} FbComposeData; - -void pixman_composite_rect_general_accessors (const FbComposeData *data, - void *src_buffer, - void *mask_buffer, - void *dest_buffer, - const int wide); -void pixman_composite_rect_general (const FbComposeData *data); - -fetchProc32 pixman_fetchProcForPicture32 (bits_image_t *); -fetchPixelProc32 pixman_fetchPixelProcForPicture32 (bits_image_t *); -storeProc32 pixman_storeProcForPicture32 (bits_image_t *); -fetchProc32 pixman_fetchProcForPicture32_accessors (bits_image_t *); -fetchPixelProc32 pixman_fetchPixelProcForPicture32_accessors (bits_image_t *); -storeProc32 pixman_storeProcForPicture32_accessors (bits_image_t *); - -fetchProc64 pixman_fetchProcForPicture64 (bits_image_t *); -fetchPixelProc64 pixman_fetchPixelProcForPicture64 (bits_image_t *); -storeProc64 pixman_storeProcForPicture64 (bits_image_t *); -fetchProc64 pixman_fetchProcForPicture64_accessors (bits_image_t *); -fetchPixelProc64 pixman_fetchPixelProcForPicture64_accessors (bits_image_t *); -storeProc64 pixman_storeProcForPicture64_accessors (bits_image_t *); - -void pixman_expand(uint64_t *dst, const uint32_t *src, pixman_format_code_t, int width); -void pixman_contract(uint32_t *dst, const uint64_t *src, int width); - -void pixmanFetchGradient (gradient_t *, int x, int y, int width, - uint32_t *buffer, uint32_t *mask, uint32_t maskBits); -void _pixman_image_get_scanline_64_generic (pixman_image_t * pict, int x, int y, int width, - uint64_t *buffer, uint64_t *mask, uint32_t maskBits); -void fbFetchTransformed(bits_image_t *, int x, int y, int width, - uint32_t *buffer, uint32_t *mask, uint32_t maskBits); -void fbFetchExternalAlpha(bits_image_t *, int x, int y, int width, - uint32_t *buffer, uint32_t *mask, uint32_t maskBits); - -void fbFetchTransformed_accessors(bits_image_t *, int x, int y, int width, - uint32_t *buffer, uint32_t *mask, - uint32_t maskBits); -void fbStoreExternalAlpha_accessors(bits_image_t *, int x, int y, int width, - uint32_t *buffer); -void fbFetchExternalAlpha_accessors(bits_image_t *, int x, int y, int width, - uint32_t *buffer, uint32_t *mask, - uint32_t maskBits); - -/* end */ +typedef uint64_t (*fetch_pixel_64_t) (bits_image_t *image, + int x, + int y); + +typedef void (*store_scanline_t) (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *values); typedef enum { @@ -237,119 +57,78 @@ typedef enum SOLID } image_type_t; -#define IS_SOURCE_IMAGE(img) (((image_common_t *)img)->type > BITS) - typedef enum { SOURCE_IMAGE_CLASS_UNKNOWN, SOURCE_IMAGE_CLASS_HORIZONTAL, SOURCE_IMAGE_CLASS_VERTICAL, -} source_pict_class_t; - -typedef void (*scanStoreProc)(bits_image_t *img, int x, int y, int width, uint32_t *buffer); -typedef void (*scanFetchProc)(pixman_image_t *, int, int, int, uint32_t *, - uint32_t *, uint32_t); - -source_pict_class_t _pixman_image_classify (pixman_image_t *image, - int x, - int y, - int width, - int height); - -void -_pixman_image_get_scanline_32 (pixman_image_t *image, int x, int y, int width, - uint32_t *buffer, uint32_t *mask, uint32_t mask_bits); - -/* Even thought the type of buffer is uint32_t *, the function actually expects - * a uint64_t *buffer. - */ -void -_pixman_image_get_scanline_64 (pixman_image_t *image, int x, int y, int width, - uint32_t *buffer, uint32_t *unused, uint32_t unused2); - -void -_pixman_image_store_scanline_32 (bits_image_t *image, int x, int y, int width, - uint32_t *buffer); -/* Even thought the type of buffer is uint32_t *, the function actually expects - * a uint64_t *buffer. - */ -void -_pixman_image_store_scanline_64 (bits_image_t *image, int x, int y, int width, - uint32_t *buffer); - -pixman_image_t * -_pixman_image_allocate (void); - -pixman_bool_t -_pixman_init_gradient (gradient_t *gradient, - const pixman_gradient_stop_t *stops, - int n_stops); -void -_pixman_image_reset_clip_region (pixman_image_t *image); +} source_image_class_t; -struct point -{ - int16_t x, y; -}; - -typedef source_pict_class_t (* classify_func_t) (pixman_image_t *image, - int x, - int y, - int width, - int height); -typedef void (* property_changed_func_t) (pixman_image_t *image); +typedef source_image_class_t (*classify_func_t) (pixman_image_t *image, + int x, + int y, + int width, + int height); +typedef void (*property_changed_func_t) (pixman_image_t *image); struct image_common { - image_type_t type; - int32_t ref_count; - pixman_region32_t full_region; - pixman_region32_t clip_region; - pixman_region32_t *src_clip; - pixman_bool_t has_client_clip; - pixman_transform_t *transform; - pixman_repeat_t repeat; - pixman_filter_t filter; - pixman_fixed_t *filter_params; - int n_filter_params; - bits_image_t *alpha_map; - point_t alpha_origin; - pixman_bool_t component_alpha; - pixman_read_memory_func_t read_func; - pixman_write_memory_func_t write_func; - classify_func_t classify; - property_changed_func_t property_changed; - scanFetchProc get_scanline_32; - scanFetchProc get_scanline_64; + image_type_t type; + int32_t ref_count; + pixman_region32_t clip_region; + pixman_bool_t have_clip_region; /* FALSE if there is no clip */ + pixman_bool_t client_clip; /* Whether the source clip was + set by a client */ + pixman_bool_t clip_sources; /* Whether the clip applies when + * the image is used as a source + */ + pixman_bool_t dirty; + pixman_bool_t need_workaround; + pixman_transform_t * transform; + pixman_repeat_t repeat; + pixman_filter_t filter; + pixman_fixed_t * filter_params; + int n_filter_params; + bits_image_t * alpha_map; + int alpha_origin_x; + int alpha_origin_y; + pixman_bool_t component_alpha; + classify_func_t classify; + property_changed_func_t property_changed; + fetch_scanline_t get_scanline_32; + fetch_scanline_t get_scanline_64; + + pixman_image_destroy_func_t destroy_func; + void * destroy_data; }; struct source_image { - image_common_t common; - source_pict_class_t class; + image_common_t common; + source_image_class_t class; }; struct solid_fill { - source_image_t common; - uint32_t color; /* FIXME: shouldn't this be a pixman_color_t? */ + source_image_t common; + uint32_t color; /* FIXME: shouldn't this be a pixman_color_t? */ }; struct gradient { - source_image_t common; - int n_stops; - pixman_gradient_stop_t * stops; - int stop_range; - uint32_t * color_table; - int color_table_size; + source_image_t common; + int n_stops; + pixman_gradient_stop_t *stops; + int stop_range; + uint32_t * color_table; + int color_table_size; }; struct linear_gradient { - gradient_t common; - pixman_point_fixed_t p1; - pixman_point_fixed_t p2; + gradient_t common; + pixman_point_fixed_t p1; + pixman_point_fixed_t p2; }; struct circle @@ -361,721 +140,658 @@ struct circle struct radial_gradient { - gradient_t common; - - circle_t c1; - circle_t c2; - double cdx; - double cdy; - double dr; - double A; + gradient_t common; + + circle_t c1; + circle_t c2; + double cdx; + double cdy; + double dr; + double A; }; struct conical_gradient { - gradient_t common; - pixman_point_fixed_t center; - pixman_fixed_t angle; + gradient_t common; + pixman_point_fixed_t center; + pixman_fixed_t angle; }; struct bits_image { - image_common_t common; - pixman_format_code_t format; - const pixman_indexed_t *indexed; - int width; - int height; - uint32_t * bits; - uint32_t * free_me; - int rowstride; /* in number of uint32_t's */ - - scanStoreProc store_scanline_32; - scanStoreProc store_scanline_64; + image_common_t common; + pixman_format_code_t format; + const pixman_indexed_t * indexed; + int width; + int height; + uint32_t * bits; + uint32_t * free_me; + int rowstride; /* in number of uint32_t's */ + + /* Fetch a pixel, disregarding alpha maps, transformations etc. */ + fetch_pixel_32_t fetch_pixel_raw_32; + fetch_pixel_64_t fetch_pixel_raw_64; + + /* Fetch a pixel, taking alpha maps into account */ + fetch_pixel_32_t fetch_pixel_32; + fetch_pixel_64_t fetch_pixel_64; + + /* Fetch raw scanlines, with no regard for transformations, alpha maps etc. */ + fetch_scanline_t fetch_scanline_raw_32; + fetch_scanline_t fetch_scanline_raw_64; + + /* Store scanlines with no regard for alpha maps */ + store_scanline_t store_scanline_raw_32; + store_scanline_t store_scanline_raw_64; + + /* Store a scanline, taking alpha maps into account */ + store_scanline_t store_scanline_32; + store_scanline_t store_scanline_64; + + /* Used for indirect access to the bits */ + pixman_read_memory_func_t read_func; + pixman_write_memory_func_t write_func; }; union pixman_image { - image_type_t type; - image_common_t common; - bits_image_t bits; - source_image_t source; - gradient_t gradient; - linear_gradient_t linear; - conical_gradient_t conical; - radial_gradient_t radial; - solid_fill_t solid; + image_type_t type; + image_common_t common; + bits_image_t bits; + source_image_t source; + gradient_t gradient; + linear_gradient_t linear; + conical_gradient_t conical; + radial_gradient_t radial; + solid_fill_t solid; }; -/* Gradient walker - */ -typedef struct -{ - uint32_t left_ag; - uint32_t left_rb; - uint32_t right_ag; - uint32_t right_rb; - int32_t left_x; - int32_t right_x; - int32_t stepper; - pixman_gradient_stop_t *stops; - int num_stops; - unsigned int spread; +void +_pixman_bits_image_setup_raw_accessors (bits_image_t *image); - int need_reset; -} GradientWalker; +void +_pixman_image_get_scanline_generic_64 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits); + +source_image_class_t +_pixman_image_classify (pixman_image_t *image, + int x, + int y, + int width, + int height); void -_pixman_gradient_walker_init (GradientWalker *walker, - gradient_t *gradient, - unsigned int spread); +_pixman_image_get_scanline_32 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits); +/* Even thought the type of buffer is uint32_t *, the function actually expects + * a uint64_t *buffer. + */ void -_pixman_gradient_walker_reset (GradientWalker *walker, - pixman_fixed_32_32_t pos); +_pixman_image_get_scanline_64 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *unused, + uint32_t unused2); -uint32_t -_pixman_gradient_walker_pixel (GradientWalker *walker, - pixman_fixed_32_32_t x); +void +_pixman_image_store_scanline_32 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *buffer); +void +_pixman_image_fetch_pixels (bits_image_t *image, + uint32_t * buffer, + int n_pixels); +/* Even though the type of buffer is uint32_t *, the function + * actually expects a uint64_t *buffer. + */ +void +_pixman_image_store_scanline_64 (bits_image_t * image, + int x, + int y, + int width, + const uint32_t *buffer); +pixman_image_t * +_pixman_image_allocate (void); -#define LOG2_BITMAP_PAD 5 -#define FB_STIP_SHIFT LOG2_BITMAP_PAD -#define FB_STIP_UNIT (1 << FB_STIP_SHIFT) -#define FB_STIP_MASK (FB_STIP_UNIT - 1) -#define FB_STIP_ALLONES ((uint32_t) -1) +pixman_bool_t +_pixman_init_gradient (gradient_t * gradient, + const pixman_gradient_stop_t *stops, + int n_stops); +void +_pixman_image_reset_clip_region (pixman_image_t *image); -#if BITMAP_BIT_ORDER == LSBFirst -#define FbScrLeft(x,n) ((x) >> (n)) -#define FbScrRight(x,n) ((x) << (n)) -#define FbLeftStipBits(x,n) ((x) & ((((uint32_t) 1) << (n)) - 1)) -#else -#define FbScrLeft(x,n) ((x) << (n)) -#define FbScrRight(x,n) ((x) >> (n)) -#define FbLeftStipBits(x,n) ((x) >> (FB_STIP_UNIT - (n))) -#endif +void +_pixman_image_validate (pixman_image_t *image); -#define FbStipLeft(x,n) FbScrLeft(x,n) -#define FbStipRight(x,n) FbScrRight(x,n) -#define FbStipMask(x,w) (FbStipRight(FB_STIP_ALLONES,(x) & FB_STIP_MASK) & \ - FbStipLeft(FB_STIP_ALLONES,(FB_STIP_UNIT - ((x)+(w))) & FB_STIP_MASK)) - -#define FbLeftMask(x) ( ((x) & FB_MASK) ? \ - FbScrRight(FB_ALLONES,(x) & FB_MASK) : 0) -#define FbRightMask(x) ( ((FB_UNIT - (x)) & FB_MASK) ? \ - FbScrLeft(FB_ALLONES,(FB_UNIT - (x)) & FB_MASK) : 0) - -#define FbMaskBits(x,w,l,n,r) { \ - n = (w); \ - r = FbRightMask((x)+n); \ - l = FbLeftMask(x); \ - if (l) { \ - n -= FB_UNIT - ((x) & FB_MASK); \ - if (n < 0) { \ - n = 0; \ - l &= r; \ - r = 0; \ - } \ - } \ - n >>= FB_SHIFT; \ - } +pixman_bool_t +_pixman_image_is_opaque (pixman_image_t *image); -#if IMAGE_BYTE_ORDER == MSBFirst -#define Fetch24(img, a) ((unsigned long) (a) & 1 ? \ - ((READ(img, a) << 16) | READ(img, (uint16_t *) ((a)+1))) : \ - ((READ(img, (uint16_t *) (a)) << 8) | READ(img, (a)+2))) -#define Store24(img,a,v) ((unsigned long) (a) & 1 ? \ - (WRITE(img, a, (uint8_t) ((v) >> 16)), \ - WRITE(img, (uint16_t *) ((a)+1), (uint16_t) (v))) : \ - (WRITE(img, (uint16_t *) (a), (uint16_t) ((v) >> 8)), \ - WRITE(img, (a)+2, (uint8_t) (v)))) -#else -#define Fetch24(img,a) ((unsigned long) (a) & 1 ? \ - (READ(img, a) | (READ(img, (uint16_t *) ((a)+1)) << 8)) : \ - (READ(img, (uint16_t *) (a)) | (READ(img, (a)+2) << 16))) -#define Store24(img,a,v) ((unsigned long) (a) & 1 ? \ - (WRITE(img, a, (uint8_t) (v)), \ - WRITE(img, (uint16_t *) ((a)+1), (uint16_t) ((v) >> 8))) : \ - (WRITE(img, (uint16_t *) (a), (uint16_t) (v)), \ - WRITE(img, (a)+2, (uint8_t) ((v) >> 16)))) -#endif +pixman_bool_t +_pixman_image_is_solid (pixman_image_t *image); -#define CvtR8G8B8toY15(s) (((((s) >> 16) & 0xff) * 153 + \ - (((s) >> 8) & 0xff) * 301 + \ - (((s) ) & 0xff) * 58) >> 2) -#define miCvtR8G8B8to15(s) ((((s) >> 3) & 0x001f) | \ - (((s) >> 6) & 0x03e0) | \ - (((s) >> 9) & 0x7c00)) -#define miIndexToEnt15(mif,rgb15) ((mif)->ent[rgb15]) -#define miIndexToEnt24(mif,rgb24) miIndexToEnt15(mif,miCvtR8G8B8to15(rgb24)) +uint32_t +_pixman_image_get_solid (pixman_image_t * image, + pixman_format_code_t format); + +#define PIXMAN_IMAGE_GET_LINE(image, x, y, type, out_stride, line, mul) \ + do \ + { \ + uint32_t *__bits__; \ + int __stride__; \ + \ + __bits__ = image->bits.bits; \ + __stride__ = image->bits.rowstride; \ + (out_stride) = \ + __stride__ * (int) sizeof (uint32_t) / (int) sizeof (type); \ + (line) = \ + ((type *) __bits__) + (out_stride) * (y) + (mul) * (x); \ + } while (0) -#define miIndexToEntY24(mif,rgb24) ((mif)->ent[CvtR8G8B8toY15(rgb24)]) +/* + * Gradient walker + */ +typedef struct +{ + uint32_t left_ag; + uint32_t left_rb; + uint32_t right_ag; + uint32_t right_rb; + int32_t left_x; + int32_t right_x; + int32_t stepper; + pixman_gradient_stop_t *stops; + int num_stops; + unsigned int spread; -#define FbIntMult(a,b,t) ( (t) = (a) * (b) + 0x80, ( ( ( (t)>>8 ) + (t) )>>8 ) ) -#define FbIntDiv(a,b) (((uint16_t) (a) * 255) / (b)) + int need_reset; +} pixman_gradient_walker_t; -#define FbGet8(v,i) ((uint16_t) (uint8_t) ((v) >> i)) +void +_pixman_gradient_walker_init (pixman_gradient_walker_t *walker, + gradient_t * gradient, + unsigned int spread); +void +_pixman_gradient_walker_reset (pixman_gradient_walker_t *walker, + pixman_fixed_32_32_t pos); -#define cvt8888to0565(s) ((((s) >> 3) & 0x001f) | \ - (((s) >> 5) & 0x07e0) | \ - (((s) >> 8) & 0xf800)) -#define cvt0565to0888(s) (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) | \ - ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) | \ - ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000))) +uint32_t +_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker, + pixman_fixed_32_32_t x); /* - * There are two ways of handling alpha -- either as a single unified value or - * a separate value for each component, hence each macro must have two - * versions. The unified alpha version has a 'U' at the end of the name, - * the component version has a 'C'. Similarly, functions which deal with - * this difference will have two versions using the same convention. + * Edges */ -#define FbOverU(x,y,i,a,t) ((t) = FbIntMult(FbGet8(y,i),(a),(t)) + FbGet8(x,i), \ - (uint32_t) ((uint8_t) ((t) | (0 - ((t) >> 8)))) << (i)) +#define MAX_ALPHA(n) ((1 << (n)) - 1) +#define N_Y_FRAC(n) ((n) == 1 ? 1 : (1 << ((n) / 2)) - 1) +#define N_X_FRAC(n) ((n) == 1 ? 1 : (1 << ((n) / 2)) + 1) -#define FbOverC(x,y,i,a,t) ((t) = FbIntMult(FbGet8(y,i),FbGet8(a,i),(t)) + FbGet8(x,i), \ - (uint32_t) ((uint8_t) ((t) | (0 - ((t) >> 8)))) << (i)) +#define STEP_Y_SMALL(n) (pixman_fixed_1 / N_Y_FRAC (n)) +#define STEP_Y_BIG(n) (pixman_fixed_1 - (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n)) -#define FbInU(x,i,a,t) ((uint32_t) FbIntMult(FbGet8(x,i),(a),(t)) << (i)) +#define Y_FRAC_FIRST(n) (STEP_Y_SMALL (n) / 2) +#define Y_FRAC_LAST(n) (Y_FRAC_FIRST (n) + (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n)) -#define FbInC(x,i,a,t) ((uint32_t) FbIntMult(FbGet8(x,i),FbGet8(a,i),(t)) << (i)) +#define STEP_X_SMALL(n) (pixman_fixed_1 / N_X_FRAC (n)) +#define STEP_X_BIG(n) (pixman_fixed_1 - (N_X_FRAC (n) - 1) * STEP_X_SMALL (n)) -#define FbAdd(x,y,i,t) ((t) = FbGet8(x,i) + FbGet8(y,i), \ - (uint32_t) ((uint8_t) ((t) | (0 - ((t) >> 8)))) << (i)) +#define X_FRAC_FIRST(n) (STEP_X_SMALL (n) / 2) +#define X_FRAC_LAST(n) (X_FRAC_FIRST (n) + (N_X_FRAC (n) - 1) * STEP_X_SMALL (n)) -#define div_255(x) (((x) + 0x80 + (((x) + 0x80) >> 8)) >> 8) -#define div_65535(x) (((x) + 0x8000 + (((x) + 0x8000) >> 16)) >> 16) +#define RENDER_SAMPLES_X(x, n) \ + ((n) == 1? 0 : (pixman_fixed_frac (x) + \ + X_FRAC_FIRST (n)) / STEP_X_SMALL (n)) -#define MOD(a,b) ((a) < 0 ? ((b) - ((-(a) - 1) % (b))) - 1 : (a) % (b)) +void +pixman_rasterize_edges_accessors (pixman_image_t *image, + pixman_edge_t * l, + pixman_edge_t * r, + pixman_fixed_t t, + pixman_fixed_t b); -#define DIV(a,b) ((((a) < 0) == ((b) < 0)) ? (a) / (b) : \ - ((a) - (b) + 1 - (((b) < 0) << 1)) / (b)) +/* + * Implementations + */ -#define CLIP(a,b,c) ((a) < (b) ? (b) : ((a) > (c) ? (c) : (a))) +typedef struct pixman_implementation_t pixman_implementation_t; -#if 0 -/* FIXME: the MOD macro above is equivalent, but faster I think */ -#define mod(a,b) ((b) == 1 ? 0 : (a) >= 0 ? (a) % (b) : (b) - (-a) % (b)) -#endif +typedef void (*pixman_combine_32_func_t) (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width); + +typedef void (*pixman_combine_64_func_t) (pixman_implementation_t *imp, + pixman_op_t op, + uint64_t * dest, + const uint64_t * src, + const uint64_t * mask, + int width); + +typedef void (*pixman_composite_func_t) (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height); +typedef pixman_bool_t (*pixman_blt_func_t) (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height); +typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor); -/* FIXME: the (void)__read_func hides lots of warnings (which is what they - * are supposed to do), but some of them are real. For example the one - * where Fetch4 doesn't have a READ - */ +void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp); +void _pixman_setup_combiner_functions_64 (pixman_implementation_t *imp); -#if 0 -/* Framebuffer access support macros */ -#define ACCESS_MEM(code) \ - do { \ - const image_common_t *const com__ = \ - (image_common_t *)image; \ - \ - if (!com__->read_func && !com__->write_func) \ - { \ - const int do_access__ = 0; \ - const pixman_read_memory_func_t read_func__ = NULL; \ - const pixman_write_memory_func_t write_func__ = NULL; \ - (void)read_func__; \ - (void)write_func__; \ - (void)do_access__; \ - \ - {code} \ - } \ - else \ - { \ - const int do_access__ = 1; \ - const pixman_read_memory_func_t read_func__ = \ - com__->read_func; \ - const pixman_write_memory_func_t write_func__ = \ - com__->write_func; \ - (void)read_func__; \ - (void)write_func__; \ - (void)do_access__; \ - \ - {code} \ - } \ - } while (0) -#endif +struct pixman_implementation_t +{ + pixman_implementation_t *toplevel; + pixman_implementation_t *delegate; -#ifdef PIXMAN_FB_ACCESSORS + pixman_composite_func_t composite; + pixman_blt_func_t blt; + pixman_fill_func_t fill; -#define ACCESS(sym) sym##_accessors + pixman_combine_32_func_t combine_32[PIXMAN_OP_LAST]; + pixman_combine_32_func_t combine_32_ca[PIXMAN_OP_LAST]; + pixman_combine_64_func_t combine_64[PIXMAN_OP_LAST]; + pixman_combine_64_func_t combine_64_ca[PIXMAN_OP_LAST]; +}; -#define READ(img, ptr) \ - ((img)->common.read_func ((ptr), sizeof(*(ptr)))) -#define WRITE(img, ptr,val) \ - ((img)->common.write_func ((ptr), (val), sizeof (*(ptr)))) +pixman_implementation_t * +_pixman_implementation_create (pixman_implementation_t *delegate); -#define MEMCPY_WRAPPED(img, dst, src, size) \ - do { \ - size_t _i; \ - uint8_t *_dst = (uint8_t*)(dst), *_src = (uint8_t*)(src); \ - for(_i = 0; _i < size; _i++) { \ - WRITE((img), _dst +_i, READ((img), _src + _i)); \ - } \ - } while (0) +void +_pixman_implementation_combine_32 (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width); +void +_pixman_implementation_combine_64 (pixman_implementation_t *imp, + pixman_op_t op, + uint64_t * dest, + const uint64_t * src, + const uint64_t * mask, + int width); +void +_pixman_implementation_combine_32_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width); +void +_pixman_implementation_combine_64_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint64_t * dest, + const uint64_t * src, + const uint64_t * mask, + int width); +void +_pixman_implementation_composite (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height); -#define MEMSET_WRAPPED(img, dst, val, size) \ - do { \ - size_t _i; \ - uint8_t *_dst = (uint8_t*)(dst); \ - for(_i = 0; _i < (size_t) size; _i++) { \ - WRITE((img), _dst +_i, (val)); \ - } \ - } while (0) +pixman_bool_t +_pixman_implementation_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height); -#else +pixman_bool_t +_pixman_implementation_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor); -#define ACCESS(sym) sym +/* Specific implementations */ +pixman_implementation_t * +_pixman_implementation_create_general (void); -#define READ(img, ptr) (*(ptr)) -#define WRITE(img, ptr, val) (*(ptr) = (val)) -#define MEMCPY_WRAPPED(img, dst, src, size) \ - memcpy(dst, src, size) -#define MEMSET_WRAPPED(img, dst, val, size) \ - memset(dst, val, size) +pixman_implementation_t * +_pixman_implementation_create_fast_path (void); +#ifdef USE_MMX +pixman_implementation_t * +_pixman_implementation_create_mmx (void); #endif -#define fbComposeGetSolid(img, res, fmt) \ - do \ - { \ - pixman_format_code_t format__; \ - if (img->type == SOLID) \ - { \ - format__ = PIXMAN_a8r8g8b8; \ - (res) = img->solid.color; \ - } \ - else \ - { \ - uint32_t *bits__ = (img)->bits.bits; \ - format__ = (img)->bits.format; \ - \ - switch (PIXMAN_FORMAT_BPP((img)->bits.format)) \ - { \ - case 32: \ - (res) = READ(img, (uint32_t *)bits__); \ - break; \ - case 24: \ - (res) = Fetch24(img, (uint8_t *) bits__); \ - break; \ - case 16: \ - (res) = READ(img, (uint16_t *) bits__); \ - (res) = cvt0565to0888(res); \ - break; \ - case 8: \ - (res) = READ(img, (uint8_t *) bits__); \ - (res) = (res) << 24; \ - break; \ - case 1: \ - (res) = READ(img, (uint32_t *) bits__); \ - (res) = FbLeftStipBits((res),1) ? 0xff000000 : 0x00000000; \ - break; \ - default: \ - return; \ - } \ - /* manage missing src alpha */ \ - if (!PIXMAN_FORMAT_A((img)->bits.format)) \ - (res) |= 0xff000000; \ - } \ - \ - /* If necessary, convert RGB <--> BGR. */ \ - if (PIXMAN_FORMAT_TYPE (format__) != PIXMAN_FORMAT_TYPE(fmt)) \ - { \ - (res) = ((((res) & 0xff000000) >> 0) | \ - (((res) & 0x00ff0000) >> 16) | \ - (((res) & 0x0000ff00) >> 0) | \ - (((res) & 0x000000ff) << 16)); \ - } \ - } \ - while (0) +#ifdef USE_SSE2 +pixman_implementation_t * +_pixman_implementation_create_sse2 (void); +#endif + +#ifdef USE_ARM_SIMD +pixman_implementation_t * +_pixman_implementation_create_arm_simd (void); +#endif + +#ifdef USE_ARM_NEON +pixman_implementation_t * +_pixman_implementation_create_arm_neon (void); +#endif + +#ifdef USE_VMX +pixman_implementation_t * +_pixman_implementation_create_vmx (void); +#endif + +pixman_implementation_t * +_pixman_choose_implementation (void); -#define fbComposeGetStart(pict,x,y,type,out_stride,line,mul) do { \ - uint32_t *__bits__; \ - int __stride__; \ - int __bpp__; \ - \ - __bits__ = pict->bits.bits; \ - __stride__ = pict->bits.rowstride; \ - __bpp__ = PIXMAN_FORMAT_BPP(pict->bits.format); \ - (out_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (type); \ - (line) = ((type *) __bits__) + \ - (out_stride) * (y) + (mul) * (x); \ - } while (0) -#define PIXMAN_FORMAT_16BPC(f) (PIXMAN_FORMAT_A(f) > 8 || \ - PIXMAN_FORMAT_R(f) > 8 || \ - PIXMAN_FORMAT_G(f) > 8 || \ - PIXMAN_FORMAT_B(f) > 8) /* - * Edges + * Utilities + */ + +/* These "formats" both have depth 0, so they + * will never clash with any real ones */ +#define PIXMAN_null PIXMAN_FORMAT (0, 0, 0, 0, 0, 0) +#define PIXMAN_solid PIXMAN_FORMAT (0, 1, 0, 0, 0, 0) -#define MAX_ALPHA(n) ((1 << (n)) - 1) -#define N_Y_FRAC(n) ((n) == 1 ? 1 : (1 << ((n)/2)) - 1) -#define N_X_FRAC(n) ((n) == 1 ? 1 : (1 << ((n)/2)) + 1) +#define NEED_COMPONENT_ALPHA (1 << 0) +#define NEED_PIXBUF (1 << 1) +#define NEED_SOLID_MASK (1 << 2) -#define STEP_Y_SMALL(n) (pixman_fixed_1 / N_Y_FRAC(n)) -#define STEP_Y_BIG(n) (pixman_fixed_1 - (N_Y_FRAC(n) - 1) * STEP_Y_SMALL(n)) +typedef struct +{ + pixman_op_t op; + pixman_format_code_t src_format; + pixman_format_code_t mask_format; + pixman_format_code_t dest_format; + pixman_composite_func_t func; + uint32_t flags; +} pixman_fast_path_t; -#define Y_FRAC_FIRST(n) (STEP_Y_SMALL(n) / 2) -#define Y_FRAC_LAST(n) (Y_FRAC_FIRST(n) + (N_Y_FRAC(n) - 1) * STEP_Y_SMALL(n)) +/* Memory allocation helpers */ +void * +pixman_malloc_ab (unsigned int n, unsigned int b); -#define STEP_X_SMALL(n) (pixman_fixed_1 / N_X_FRAC(n)) -#define STEP_X_BIG(n) (pixman_fixed_1 - (N_X_FRAC(n) - 1) * STEP_X_SMALL(n)) +void * +pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c); -#define X_FRAC_FIRST(n) (STEP_X_SMALL(n) / 2) -#define X_FRAC_LAST(n) (X_FRAC_FIRST(n) + (N_X_FRAC(n) - 1) * STEP_X_SMALL(n)) +pixman_bool_t +pixman_multiply_overflows_int (unsigned int a, unsigned int b); -#define RenderSamplesX(x,n) ((n) == 1 ? 0 : (pixman_fixed_frac (x) + X_FRAC_FIRST(n)) / STEP_X_SMALL(n)) +pixman_bool_t +pixman_addition_overflows_int (unsigned int a, unsigned int b); -/* - * Step across a small sample grid gap - */ -#define RenderEdgeStepSmall(edge) { \ - edge->x += edge->stepx_small; \ - edge->e += edge->dx_small; \ - if (edge->e > 0) \ - { \ - edge->e -= edge->dy; \ - edge->x += edge->signdx; \ - } \ -} +/* Compositing utilities */ +pixman_bool_t +_pixman_run_fast_path (const pixman_fast_path_t *paths, + pixman_implementation_t * imp, + pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height); -/* - * Step across a large sample grid gap - */ -#define RenderEdgeStepBig(edge) { \ - edge->x += edge->stepx_big; \ - edge->e += edge->dx_big; \ - if (edge->e > 0) \ - { \ - edge->e -= edge->dy; \ - edge->x += edge->signdx; \ - } \ -} +void +_pixman_walk_composite_region (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int16_t src_x, + int16_t src_y, + int16_t mask_x, + int16_t mask_y, + int16_t dest_x, + int16_t dest_y, + uint16_t width, + uint16_t height, + pixman_composite_func_t composite_rect); void -pixman_rasterize_edges_accessors (pixman_image_t *image, - pixman_edge_t *l, - pixman_edge_t *r, - pixman_fixed_t t, - pixman_fixed_t b); +pixman_expand (uint64_t * dst, + const uint32_t * src, + pixman_format_code_t format, + int width); -pixman_bool_t -pixman_image_is_opaque(pixman_image_t *image); +void +pixman_contract (uint32_t * dst, + const uint64_t *src, + int width); + +/* Region Helpers */ pixman_bool_t -pixman_image_can_get_solid (pixman_image_t *image); +pixman_region32_copy_from_region16 (pixman_region32_t *dst, + pixman_region16_t *src); pixman_bool_t -pixman_compute_composite_region32 (pixman_region32_t * pRegion, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height); - -/* GCC visibility */ -#if defined(__GNUC__) && __GNUC__ >= 4 -#define PIXMAN_EXPORT __attribute__ ((visibility("default"))) -/* Sun Studio 8 visibility */ -#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550) -#define PIXMAN_EXPORT __global -#else -#define PIXMAN_EXPORT +pixman_region16_copy_from_region32 (pixman_region16_t *dst, + pixman_region32_t *src); + + +/* Misc macros */ + +#ifndef FALSE +# define FALSE 0 #endif -/* Region Helpers */ -pixman_bool_t pixman_region32_copy_from_region16 (pixman_region32_t *dst, - pixman_region16_t *src); -pixman_bool_t pixman_region16_copy_from_region32 (pixman_region16_t *dst, - pixman_region32_t *src); -void pixman_region_internal_set_static_pointers (pixman_box16_t *empty_box, - pixman_region16_data_t *empty_data, - pixman_region16_data_t *broken_data); +#ifndef TRUE +# define TRUE 1 +#endif -#ifdef PIXMAN_TIMING +#ifndef MIN +# define MIN(a, b) ((a < b) ? a : b) +#endif -/* Timing */ -static inline uint64_t -oil_profile_stamp_rdtsc (void) -{ - uint64_t ts; - __asm__ __volatile__("rdtsc\n" : "=A" (ts)); - return ts; -} -#define OIL_STAMP oil_profile_stamp_rdtsc +#ifndef MAX +# define MAX(a, b) ((a > b) ? a : b) +#endif -typedef struct PixmanTimer PixmanTimer; +/* Integer division that rounds towards -infinity */ +#define DIV(a, b) \ + ((((a) < 0) == ((b) < 0)) ? (a) / (b) : \ + ((a) - (b) + 1 - (((b) < 0) << 1)) / (b)) -struct PixmanTimer -{ - int initialized; - const char *name; - uint64_t n_times; - uint64_t total; - PixmanTimer *next; -}; +/* Modulus that produces the remainder wrt. DIV */ +#define MOD(a, b) ((a) < 0 ? ((b) - ((-(a) - 1) % (b))) - 1 : (a) % (b)) -extern int timer_defined; -void pixman_timer_register (PixmanTimer *timer); +#define CLIP(v, low, high) ((v) < (low) ? (low) : ((v) > (high) ? (high) : (v))) -#define TIMER_BEGIN(tname) \ - { \ - static PixmanTimer timer##tname; \ - uint64_t begin##tname; \ - \ - if (!timer##tname.initialized) \ - { \ - timer##tname.initialized = 1; \ - timer##tname.name = #tname; \ - pixman_timer_register (&timer##tname); \ - } \ - \ - timer##tname.n_times++; \ - begin##tname = OIL_STAMP(); +/* Conversion between 8888 and 0565 */ -#define TIMER_END(tname) \ - timer##tname.total += OIL_STAMP() - begin##tname; \ - } +#define CONVERT_8888_TO_0565(s) \ + ((((s) >> 3) & 0x001f) | \ + (((s) >> 5) & 0x07e0) | \ + (((s) >> 8) & 0xf800)) -#endif /* PIXMAN_TIMING */ +#define CONVERT_0565_TO_0888(s) \ + (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) | \ + ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) | \ + ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000))) -typedef struct pixman_implementation_t pixman_implementation_t; +#define PIXMAN_FORMAT_IS_WIDE(f) \ + (PIXMAN_FORMAT_A (f) > 8 || \ + PIXMAN_FORMAT_R (f) > 8 || \ + PIXMAN_FORMAT_G (f) > 8 || \ + PIXMAN_FORMAT_B (f) > 8) -typedef void (* pixman_combine_32_func_t) (pixman_implementation_t * imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width); - -typedef void (* pixman_combine_64_func_t) (pixman_implementation_t * imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width); - -typedef void (* pixman_composite_func_t) (pixman_implementation_t * imp, - pixman_op_t op, - pixman_image_t * src, - pixman_image_t * mask, - pixman_image_t * dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height); -typedef pixman_bool_t (* pixman_blt_func_t) (pixman_implementation_t * imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dst_x, - int dst_y, - int width, - int height); -typedef pixman_bool_t (* pixman_fill_func_t) (pixman_implementation_t *imp, - uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor); +/* + * Various debugging code + */ -void -_pixman_walk_composite_region (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height, - pixman_bool_t srcRepeat, - pixman_bool_t maskRepeat, - pixman_composite_func_t compositeRect); +#undef DEBUG +#define DEBUG 0 -void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp); -void _pixman_setup_combiner_functions_64 (pixman_implementation_t *imp); +#if DEBUG -/* These "formats" both have depth 0, so they - * will never clash with any real ones +#define return_if_fail(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "In %s: %s failed\n", FUNC, # expr); \ + return; \ + } \ + } \ + while (0) + +#define return_val_if_fail(expr, retval) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "In %s: %s failed\n", FUNC, # expr); \ + return (retval); \ + } \ + } \ + while (0) + +#else + +#define return_if_fail(expr) \ + do \ + { \ + if (!(expr)) \ + return; \ + } \ + while (0) + +#define return_val_if_fail(expr, retval) \ + do \ + { \ + if (!(expr)) \ + return (retval); \ + } \ + while (0) + +#endif + +/* + * Timers */ -#define PIXMAN_null PIXMAN_FORMAT(0,0,0,0,0,0) -#define PIXMAN_solid PIXMAN_FORMAT(0,1,0,0,0,0) -#define NEED_COMPONENT_ALPHA (1 << 0) -#define NEED_PIXBUF (1 << 1) -#define NEED_SOLID_MASK (1 << 2) +#ifdef PIXMAN_TIMERS -typedef struct +static inline uint64_t +oil_profile_stamp_rdtsc (void) { - pixman_op_t op; - pixman_format_code_t src_format; - pixman_format_code_t mask_format; - pixman_format_code_t dest_format; - pixman_composite_func_t func; - uint32_t flags; -} FastPathInfo; + uint64_t ts; -struct pixman_implementation_t + __asm__ __volatile__ ("rdtsc\n" : "=A" (ts)); + return ts; +} + +#define OIL_STAMP oil_profile_stamp_rdtsc + +typedef struct pixman_timer_t pixman_timer_t; + +struct pixman_timer_t { - pixman_implementation_t * toplevel; - pixman_implementation_t * delegate; - - pixman_composite_func_t composite; - pixman_blt_func_t blt; - pixman_fill_func_t fill; - - pixman_combine_32_func_t combine_32[PIXMAN_OP_LAST]; - pixman_combine_32_func_t combine_32_ca[PIXMAN_OP_LAST]; - pixman_combine_64_func_t combine_64[PIXMAN_OP_LAST]; - pixman_combine_64_func_t combine_64_ca[PIXMAN_OP_LAST]; + int initialized; + const char * name; + uint64_t n_times; + uint64_t total; + pixman_timer_t *next; }; -pixman_implementation_t * -_pixman_implementation_create (pixman_implementation_t *toplevel, - pixman_implementation_t *delegate); +extern int timer_defined; -void -_pixman_implementation_combine_32 (pixman_implementation_t * imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width); -void -_pixman_implementation_combine_64 (pixman_implementation_t * imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width); -void -_pixman_implementation_combine_32_ca (pixman_implementation_t * imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width); -void -_pixman_implementation_combine_64_ca (pixman_implementation_t * imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width); -void -_pixman_implementation_composite (pixman_implementation_t * imp, - pixman_op_t op, - pixman_image_t * src, - pixman_image_t * mask, - pixman_image_t * dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height); +void pixman_timer_register (pixman_timer_t *timer); + +#define TIMER_BEGIN(tname) \ + { \ + static pixman_timer_t timer ## tname; \ + uint64_t begin ## tname; \ + \ + if (!timer ## tname.initialized) \ + { \ + timer ## tname.initialized = 1; \ + timer ## tname.name = # tname; \ + pixman_timer_register (&timer ## tname); \ + } \ + \ + timer ## tname.n_times++; \ + begin ## tname = OIL_STAMP (); -pixman_bool_t -_pixman_implementation_blt (pixman_implementation_t * imp, - uint32_t * src_bits, - uint32_t * dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, - int src_y, - int dst_x, - int dst_y, - int width, - int height); -pixman_bool_t -_pixman_implementation_fill (pixman_implementation_t * imp, - uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor); - -/* Specific implementations */ -pixman_implementation_t * -_pixman_implementation_create_general (pixman_implementation_t *toplevel); -pixman_implementation_t * -_pixman_implementation_create_fast_path (pixman_implementation_t *toplevel); -#ifdef USE_MMX -pixman_implementation_t * -_pixman_implementation_create_mmx (pixman_implementation_t *toplevel); -#endif -#ifdef USE_SSE2 -pixman_implementation_t * -_pixman_implementation_create_sse2 (pixman_implementation_t *toplevel); -#endif -#ifdef USE_ARM_SIMD -pixman_implementation_t * -_pixman_implementation_create_arm_simd (pixman_implementation_t *toplevel); -#endif -#ifdef USE_ARM_NEON -pixman_implementation_t * -_pixman_implementation_create_arm_neon (pixman_implementation_t *toplevel); -#endif -#ifdef USE_VMX -pixman_implementation_t * -_pixman_implementation_create_vmx (pixman_implementation_t *toplevel); -#endif +#define TIMER_END(tname) \ + timer ## tname.total += OIL_STAMP () - begin ## tname; \ + } -pixman_bool_t -_pixman_run_fast_path (const FastPathInfo *paths, - pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t *src, - pixman_image_t *mask, - pixman_image_t *dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height); - -pixman_implementation_t * -_pixman_choose_implementation (void); +#endif /* PIXMAN_TIMERS */ #endif /* PIXMAN_PRIVATE_H */ diff --git a/lib/pixman/pixman/pixman-radial-gradient.c b/lib/pixman/pixman/pixman-radial-gradient.c index 4a4543001..022157b9b 100644 --- a/lib/pixman/pixman/pixman-radial-gradient.c +++ b/lib/pixman/pixman/pixman-radial-gradient.c @@ -26,14 +26,21 @@ * SOFTWARE. */ +#ifdef HAVE_CONFIG_H #include <config.h> +#endif #include <stdlib.h> #include <math.h> #include "pixman-private.h" static void -radial_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width, - uint32_t *buffer, uint32_t *mask, uint32_t maskBits) +radial_gradient_get_scanline_32 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { /* * In the radial gradient problem we are given two circles (c₁,r₁) and @@ -153,8 +160,8 @@ radial_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width, gradient_t *gradient = (gradient_t *)image; source_image_t *source = (source_image_t *)image; radial_gradient_t *radial = (radial_gradient_t *)image; - uint32_t *end = buffer + width; - GradientWalker walker; + uint32_t *end = buffer + width; + pixman_gradient_walker_t walker; pixman_bool_t affine = TRUE; double cx = 1.; double cy = 0.; @@ -162,67 +169,101 @@ radial_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width, double rx = x + 0.5; double ry = y + 0.5; double rz = 1.; - + _pixman_gradient_walker_init (&walker, gradient, source->common.repeat); - - if (source->common.transform) { + + if (source->common.transform) + { pixman_vector_t v; /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2; - v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2; + v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2; v.vector[2] = pixman_fixed_1; + if (!pixman_transform_point_3d (source->common.transform, &v)) return; + + cx = source->common.transform->matrix[0][0] / 65536.; + cy = source->common.transform->matrix[1][0] / 65536.; + cz = source->common.transform->matrix[2][0] / 65536.; - cx = source->common.transform->matrix[0][0]/65536.; - cy = source->common.transform->matrix[1][0]/65536.; - cz = source->common.transform->matrix[2][0]/65536.; - rx = v.vector[0]/65536.; - ry = v.vector[1]/65536.; - rz = v.vector[2]/65536.; - affine = source->common.transform->matrix[2][0] == 0 && v.vector[2] == pixman_fixed_1; + rx = v.vector[0] / 65536.; + ry = v.vector[1] / 65536.; + rz = v.vector[2] / 65536.; + + affine = + source->common.transform->matrix[2][0] == 0 && + v.vector[2] == pixman_fixed_1; } - - if (affine) { - while (buffer < end) { - if (!mask || *mask++ & maskBits) + + if (affine) + { + /* When computing t over a scanline, we notice that some expressions + * are constant so we can compute them just once. Given: + * + * t = (-2·B ± ⎷(B² - 4·A·C)) / 2·A + * + * where + * + * A = cdx² + cdy² - dr² [precomputed as radial->A] + * B = -2·(pdx·cdx + pdy·cdy + r₁·dr) + * C = pdx² + pdy² - r₁² + * + * Since we have an affine transformation, we know that (pdx, pdy) + * increase linearly with each pixel, + * + * pdx = pdx₀ + n·cx, + * pdy = pdy₀ + n·cy, + * + * we can then express B in terms of an linear increment along + * the scanline: + * + * B = B₀ + n·cB, with + * B₀ = -2·(pdx₀·cdx + pdy₀·cdy + r₁·dr) and + * cB = -2·(cx·cdx + cy·cdy) + * + * Thus we can replace the full evaluation of B per-pixel (4 multiplies, + * 2 additions) with a single addition. + */ + double r1 = radial->c1.radius / 65536.; + double r1sq = r1 * r1; + double pdx = rx - radial->c1.x / 65536.; + double pdy = ry - radial->c1.y / 65536.; + double A = radial->A; + double invA = -65536. / (2. * A); + double A4 = -4. * A; + double B = -2. * (pdx*radial->cdx + pdy*radial->cdy + r1*radial->dr); + double cB = -2. * (cx*radial->cdx + cy*radial->cdy); + pixman_bool_t invert = A * radial->dr < 0; + + while (buffer < end) + { + if (!mask || *mask++ & mask_bits) { - double pdx, pdy; - double B, C; - double det; - double c1x = radial->c1.x / 65536.0; - double c1y = radial->c1.y / 65536.0; - double r1 = radial->c1.radius / 65536.0; pixman_fixed_48_16_t t; - - pdx = rx - c1x; - pdy = ry - c1y; - - B = -2 * ( pdx * radial->cdx - + pdy * radial->cdy - + r1 * radial->dr); - C = (pdx * pdx + pdy * pdy - r1 * r1); - - det = (B * B) - (4 * radial->A * C); - if (det < 0.0) - det = 0.0; - - if (radial->A < 0) - t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536); + double det = B * B + A4 * (pdx * pdx + pdy * pdy - r1sq); + if (det <= 0.) + t = (pixman_fixed_48_16_t) (B * invA); + else if (invert) + t = (pixman_fixed_48_16_t) ((B + sqrt (det)) * invA); else - t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536); - - *(buffer) = _pixman_gradient_walker_pixel (&walker, t); + t = (pixman_fixed_48_16_t) ((B - sqrt (det)) * invA); + + *buffer = _pixman_gradient_walker_pixel (&walker, t); } ++buffer; - - rx += cx; - ry += cy; + + pdx += cx; + pdy += cy; + B += cB; } - } else { + } + else + { /* projective */ - while (buffer < end) { - if (!mask || *mask++ & maskBits) + while (buffer < end) + { + if (!mask || *mask++ & mask_bits) { double pdx, pdy; double B, C; @@ -232,78 +273,81 @@ radial_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width, double r1 = radial->c1.radius / 65536.0; pixman_fixed_48_16_t t; double x, y; - - if (rz != 0) { - x = rx/rz; - y = ry/rz; - } else { + + if (rz != 0) + { + x = rx / rz; + y = ry / rz; + } + else + { x = y = 0.; } - + pdx = x - c1x; pdy = y - c1y; - - B = -2 * ( pdx * radial->cdx - + pdy * radial->cdy - + r1 * radial->dr); + + B = -2 * (pdx * radial->cdx + + pdy * radial->cdy + + r1 * radial->dr); C = (pdx * pdx + pdy * pdy - r1 * r1); - + det = (B * B) - (4 * radial->A * C); if (det < 0.0) det = 0.0; - - if (radial->A < 0) - t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536); + + if (radial->A * radial->dr < 0) + t = (pixman_fixed_48_16_t) ((-B - sqrt (det)) / (2.0 * radial->A) * 65536); else - t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536); - - *(buffer) = _pixman_gradient_walker_pixel (&walker, t); + t = (pixman_fixed_48_16_t) ((-B + sqrt (det)) / (2.0 * radial->A) * 65536); + + *buffer = _pixman_gradient_walker_pixel (&walker, t); } - ++buffer; + ++buffer; + rx += cx; ry += cy; rz += cz; } } - } static void radial_gradient_property_changed (pixman_image_t *image) { - image->common.get_scanline_32 = (scanFetchProc)radial_gradient_get_scanline_32; - image->common.get_scanline_64 = (scanFetchProc)_pixman_image_get_scanline_64_generic; + image->common.get_scanline_32 = radial_gradient_get_scanline_32; + image->common.get_scanline_64 = _pixman_image_get_scanline_generic_64; } PIXMAN_EXPORT pixman_image_t * -pixman_image_create_radial_gradient (pixman_point_fixed_t *inner, - pixman_point_fixed_t *outer, - pixman_fixed_t inner_radius, - pixman_fixed_t outer_radius, - const pixman_gradient_stop_t *stops, - int n_stops) +pixman_image_create_radial_gradient (pixman_point_fixed_t * inner, + pixman_point_fixed_t * outer, + pixman_fixed_t inner_radius, + pixman_fixed_t outer_radius, + const pixman_gradient_stop_t *stops, + int n_stops) { pixman_image_t *image; radial_gradient_t *radial; - + return_val_if_fail (n_stops >= 2, NULL); - - image = _pixman_image_allocate(); - + + image = _pixman_image_allocate (); + if (!image) return NULL; - + radial = &image->radial; - + if (!_pixman_init_gradient (&radial->common, stops, n_stops)) { free (image); return NULL; } - + image->type = RADIAL; - + radial->c1.x = inner->x; radial->c1.y = inner->y; radial->c1.radius = inner_radius; @@ -313,14 +357,12 @@ pixman_image_create_radial_gradient (pixman_point_fixed_t *inner, radial->cdx = pixman_fixed_to_double (radial->c2.x - radial->c1.x); radial->cdy = pixman_fixed_to_double (radial->c2.y - radial->c1.y); radial->dr = pixman_fixed_to_double (radial->c2.radius - radial->c1.radius); - radial->A = (radial->cdx * radial->cdx - + radial->cdy * radial->cdy - - radial->dr * radial->dr); - + radial->A = (radial->cdx * radial->cdx + + radial->cdy * radial->cdy - + radial->dr * radial->dr); + image->common.property_changed = radial_gradient_property_changed; - - radial_gradient_property_changed (image); - + return image; } diff --git a/lib/pixman/pixman/pixman-region.c b/lib/pixman/pixman/pixman-region.c index dec2c9de3..8ce5deb77 100644 --- a/lib/pixman/pixman/pixman-region.c +++ b/lib/pixman/pixman/pixman-region.c @@ -1,112 +1,156 @@ -/*********************************************************** - -Copyright 1987, 1988, 1989, 1998 The Open Group - -Permission to use, copy, modify, distribute, and sell this software and its -documentation for any purpose is hereby granted without fee, provided that -the above copyright notice appear in all copies and that both that -copyright notice and this permission notice appear in supporting -documentation. - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN -AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -Except as contained in this notice, the name of The Open Group shall not be -used in advertising or otherwise to promote the sale, use or other dealings -in this Software without prior written authorization from The Open Group. - -Copyright 1987, 1988, 1989 by -Digital Equipment Corporation, Maynard, Massachusetts. - - All Rights Reserved - -Permission to use, copy, modify, and distribute this software and its -documentation for any purpose and without fee is hereby granted, -provided that the above copyright notice appear in all copies and that -both that copyright notice and this permission notice appear in -supporting documentation, and that the name of Digital not be -used in advertising or publicity pertaining to distribution of the -software without specific, written prior permission. - -DIGITAL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL -DIGITAL BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR -ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS -SOFTWARE. - -******************************************************************/ +/* + * Copyright 1987, 1988, 1989, 1998 The Open Group + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation. + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of The Open Group shall not be + * used in advertising or otherwise to promote the sale, use or other dealings + * in this Software without prior written authorization from The Open Group. + * + * Copyright 1987, 1988, 1989 by + * Digital Equipment Corporation, Maynard, Massachusetts. + * + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose and without fee is hereby granted, + * provided that the above copyright notice appear in all copies and that + * both that copyright notice and this permission notice appear in + * supporting documentation, and that the name of Digital not be + * used in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. + * + * DIGITAL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING + * ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL + * DIGITAL BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR + * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + */ #include <stdlib.h> #include <limits.h> #include <string.h> #include <stdio.h> +#include "pixman-private.h" #define PIXREGION_NIL(reg) ((reg)->data && !(reg)->data->numRects) /* not a region */ -#define PIXREGION_NAR(reg) ((reg)->data == pixman_brokendata) -#define PIXREGION_NUM_RECTS(reg) ((reg)->data ? (reg)->data->numRects : 1) +#define PIXREGION_NAR(reg) ((reg)->data == pixman_broken_data) +#define PIXREGION_NUMRECTS(reg) ((reg)->data ? (reg)->data->numRects : 1) #define PIXREGION_SIZE(reg) ((reg)->data ? (reg)->data->size : 0) -#define PIXREGION_RECTS(reg) ((reg)->data ? (box_type_t *)((reg)->data + 1) \ - : &(reg)->extents) +#define PIXREGION_RECTS(reg) \ + ((reg)->data ? (box_type_t *)((reg)->data + 1) \ + : &(reg)->extents) #define PIXREGION_BOXPTR(reg) ((box_type_t *)((reg)->data + 1)) -#define PIXREGION_BOX(reg,i) (&PIXREGION_BOXPTR(reg)[i]) -#define PIXREGION_TOP(reg) PIXREGION_BOX(reg, (reg)->data->numRects) -#define PIXREGION_END(reg) PIXREGION_BOX(reg, (reg)->data->numRects - 1) +#define PIXREGION_BOX(reg, i) (&PIXREGION_BOXPTR (reg)[i]) +#define PIXREGION_TOP(reg) PIXREGION_BOX (reg, (reg)->data->numRects) +#define PIXREGION_END(reg) PIXREGION_BOX (reg, (reg)->data->numRects - 1) +#define GOOD_RECT(rect) ((rect)->x1 < (rect)->x2 && (rect)->y1 < (rect)->y2) +#define BAD_RECT(rect) ((rect)->x1 > (rect)->x2 || (rect)->y1 > (rect)->y2) -#undef assert -#ifdef DEBUG_PIXREGION -#define assert(expr) {if (!(expr)) \ - FatalError("Assertion failed file %s, line %d: expr\n", \ - __FILE__, __LINE__); } +/* Turn on debugging depending on what type of release this is + */ + +#if ((PIXMAN_VERSION_MICRO % 2) == 1) +/* Random git checkout. + * + * Those are often used for performance work, so we don't turn on the + * full self-checking, but we do turn on the asserts. + */ +# define FATAL_BUGS +# define noSELF_CHECKS +#elif ((PIXMAN_VERSION_MINOR % 2) == 0) +/* Stable release. + * + * We don't want assertions because the X server should stay alive + * if possible. We also don't want self-checks for performance-reasons. + */ +# define noFATAL_BUGS +# define noSELF_CHECKS #else -#define assert(expr) +/* Development snapshot. + * + * These are the things that get shipped in development distributions + * such as Rawhide. We want both self-checking and fatal assertions + * to catch as many bugs as possible. + */ +# define FATAL_BUGS +# define SELF_CHECKS #endif -#define good(reg) assert(PREFIX(_selfcheck) (reg)) +#ifndef FATAL_BUGS +# undef assert +# undef abort +# define assert(expr) +# define abort() +#endif -#undef MIN -#define MIN(a,b) ((a) < (b) ? (a) : (b)) -#undef MAX -#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#ifdef SELF_CHECKS -static const box_type_t PREFIX(_emptyBox_) = {0, 0, 0, 0}; -static const region_data_type_t PREFIX(_emptyData_) = {0, 0}; -static const region_data_type_t PREFIX(_brokendata_) = {0, 0}; +static void +log_region_error (const char *function, const char *message) +{ + static int n_messages = 0; -static box_type_t *pixman_region_emptyBox = (box_type_t *)&PREFIX(_emptyBox_); -static region_data_type_t *pixman_region_emptyData = (region_data_type_t *)&PREFIX(_emptyData_); -static region_data_type_t *pixman_brokendata = (region_data_type_t *)&PREFIX(_brokendata_); + if (n_messages < 50) + { + fprintf (stderr, + "*** BUG ***\n" + "%s: %s\n" + "Set a breakpoint on 'log_region_error' to debug\n\n", + function, message); -/* This function exists only to make it possible to preserve the X ABI - it should - * go away at first opportunity. - * - * The problem is that the X ABI exports the three structs and has used - * them through macros. So the X server calls this function with - * the addresses of those structs which makes the existing code continue to - * work. - */ -void -PREFIX(_internal_set_static_pointers) (box_type_t *empty_box, - region_data_type_t *empty_data, - region_data_type_t *broken_data) -{ - pixman_region_emptyBox = empty_box; - pixman_region_emptyData = empty_data; - pixman_brokendata = broken_data; + abort (); /* This is #defined away unless FATAL_BUGS is defined */ + + n_messages++; + } } +#define GOOD(reg) \ + do \ + { \ + if (!PREFIX (_selfcheck (reg))) \ + log_region_error (FUNC, "Malformed region " # reg); \ + } while (0) + +#else + +#define log_region_error(function, name) +#define GOOD(reg) + +#endif + +static const box_type_t PREFIX (_empty_box_) = { 0, 0, 0, 0 }; +static const region_data_type_t PREFIX (_empty_data_) = { 0, 0 }; +static const region_data_type_t PREFIX (_broken_data_) = { 0, 0 }; + +static box_type_t *pixman_region_empty_box = + (box_type_t *)&PREFIX (_empty_box_); +static region_data_type_t *pixman_region_empty_data = + (region_data_type_t *)&PREFIX (_empty_data_); +static region_data_type_t *pixman_broken_data = + (region_data_type_t *)&PREFIX (_broken_data_); + static pixman_bool_t -pixman_break (region_type_t *pReg); +pixman_break (region_type_t *region); /* * The functions in this file implement the Region abstraction used extensively @@ -149,197 +193,275 @@ pixman_break (region_type_t *pReg); * * Adam de Boor wrote most of the original region code. Joel McCormack * substantially modified or rewrote most of the core arithmetic routines, and - * added pixman_region_validate in order to support several speed improvements to - * pixman_region_validateTree. Bob Scheifler changed the representation to be more - * compact when empty or a single rectangle, and did a bunch of gratuitous - * reformatting. Carl Worth did further gratuitous reformatting while re-merging - * the server and client region code into libpixregion. + * added pixman_region_validate in order to support several speed improvements + * to pixman_region_validate_tree. Bob Scheifler changed the representation + * to be more compact when empty or a single rectangle, and did a bunch of + * gratuitous reformatting. Carl Worth did further gratuitous reformatting + * while re-merging the server and client region code into libpixregion. + * Soren Sandmann did even more gratuitous reformatting. */ /* true iff two Boxes overlap */ -#define EXTENTCHECK(r1,r2) \ - (!( ((r1)->x2 <= (r2)->x1) || \ - ((r1)->x1 >= (r2)->x2) || \ - ((r1)->y2 <= (r2)->y1) || \ - ((r1)->y1 >= (r2)->y2) ) ) +#define EXTENTCHECK(r1, r2) \ + (!( ((r1)->x2 <= (r2)->x1) || \ + ((r1)->x1 >= (r2)->x2) || \ + ((r1)->y2 <= (r2)->y1) || \ + ((r1)->y1 >= (r2)->y2) ) ) /* true iff (x,y) is in Box */ -#define INBOX(r,x,y) \ - ( ((r)->x2 > x) && \ - ((r)->x1 <= x) && \ - ((r)->y2 > y) && \ - ((r)->y1 <= y) ) +#define INBOX(r, x, y) \ + ( ((r)->x2 > x) && \ + ((r)->x1 <= x) && \ + ((r)->y2 > y) && \ + ((r)->y1 <= y) ) /* true iff Box r1 contains Box r2 */ -#define SUBSUMES(r1,r2) \ - ( ((r1)->x1 <= (r2)->x1) && \ - ((r1)->x2 >= (r2)->x2) && \ - ((r1)->y1 <= (r2)->y1) && \ - ((r1)->y2 >= (r2)->y2) ) +#define SUBSUMES(r1, r2) \ + ( ((r1)->x1 <= (r2)->x1) && \ + ((r1)->x2 >= (r2)->x2) && \ + ((r1)->y1 <= (r2)->y1) && \ + ((r1)->y2 >= (r2)->y2) ) static size_t -PIXREGION_SZOF(size_t n) +PIXREGION_SZOF (size_t n) { size_t size = n * sizeof(box_type_t); + if (n > UINT32_MAX / sizeof(box_type_t)) - return 0; + return 0; if (sizeof(region_data_type_t) > UINT32_MAX - size) - return 0; + return 0; return size + sizeof(region_data_type_t); } static void * -allocData(size_t n) +alloc_data (size_t n) { - size_t sz = PIXREGION_SZOF(n); + size_t sz = PIXREGION_SZOF (n); + if (!sz) return NULL; - return malloc(sz); + return malloc (sz); } -#define freeData(reg) if ((reg)->data && (reg)->data->size) free((reg)->data) +#define FREE_DATA(reg) if ((reg)->data && (reg)->data->size) free ((reg)->data) -#define RECTALLOC_BAIL(pReg,n,bail) \ -if (!(pReg)->data || (((pReg)->data->numRects + (n)) > (pReg)->data->size)) \ - if (!pixman_rect_alloc(pReg, n)) { goto bail; } - -#define RECTALLOC(pReg,n) \ -if (!(pReg)->data || (((pReg)->data->numRects + (n)) > (pReg)->data->size)) \ - if (!pixman_rect_alloc(pReg, n)) { return FALSE; } - -#define ADDRECT(pNextRect,nx1,ny1,nx2,ny2) \ -{ \ - pNextRect->x1 = nx1; \ - pNextRect->y1 = ny1; \ - pNextRect->x2 = nx2; \ - pNextRect->y2 = ny2; \ - pNextRect++; \ -} +#define RECTALLOC_BAIL(region, n, bail) \ + do \ + { \ + if (!(region)->data || \ + (((region)->data->numRects + (n)) > (region)->data->size)) \ + { \ + if (!pixman_rect_alloc (region, n)) \ + goto bail; \ + } \ + } while (0) -#define NEWRECT(pReg,pNextRect,nx1,ny1,nx2,ny2) \ -{ \ - if (!(pReg)->data || ((pReg)->data->numRects == (pReg)->data->size))\ +#define RECTALLOC(region, n) \ + do \ { \ - if (!pixman_rect_alloc(pReg, 1)) \ - return FALSE; \ - pNextRect = PIXREGION_TOP(pReg); \ - } \ - ADDRECT(pNextRect,nx1,ny1,nx2,ny2); \ - pReg->data->numRects++; \ - assert(pReg->data->numRects<=pReg->data->size); \ -} + if (!(region)->data || \ + (((region)->data->numRects + (n)) > (region)->data->size)) \ + { \ + if (!pixman_rect_alloc (region, n)) { \ + return FALSE; \ + } \ + } \ + } while (0) + +#define ADDRECT(next_rect, nx1, ny1, nx2, ny2) \ + do \ + { \ + next_rect->x1 = nx1; \ + next_rect->y1 = ny1; \ + next_rect->x2 = nx2; \ + next_rect->y2 = ny2; \ + next_rect++; \ + } \ + while (0) + +#define NEWRECT(region, next_rect, nx1, ny1, nx2, ny2) \ + do \ + { \ + if (!(region)->data || \ + ((region)->data->numRects == (region)->data->size)) \ + { \ + if (!pixman_rect_alloc (region, 1)) \ + return FALSE; \ + next_rect = PIXREGION_TOP (region); \ + } \ + ADDRECT (next_rect, nx1, ny1, nx2, ny2); \ + region->data->numRects++; \ + assert (region->data->numRects <= region->data->size); \ + } while (0) -#define DOWNSIZE(reg,numRects) \ - if (((numRects) < ((reg)->data->size >> 1)) && ((reg)->data->size > 50)) \ +#define DOWNSIZE(reg, numRects) \ + do \ { \ - region_data_type_t * NewData; \ - size_t data_size = PIXREGION_SZOF(numRects); \ - if (!data_size) \ - NewData = NULL; \ - else \ - NewData = (region_data_type_t *)realloc((reg)->data, data_size); \ - if (NewData) \ + if (((numRects) < ((reg)->data->size >> 1)) && \ + ((reg)->data->size > 50)) \ { \ - NewData->size = (numRects); \ - (reg)->data = NewData; \ + region_data_type_t * new_data; \ + size_t data_size = PIXREGION_SZOF (numRects); \ + \ + if (!data_size) \ + { \ + new_data = NULL; \ + } \ + else \ + { \ + new_data = (region_data_type_t *) \ + realloc ((reg)->data, data_size); \ + } \ + \ + if (new_data) \ + { \ + new_data->size = (numRects); \ + (reg)->data = new_data; \ + } \ } \ - } + } while (0) PIXMAN_EXPORT pixman_bool_t -PREFIX(_equal) (reg1, reg2) - region_type_t * reg1; - region_type_t * reg2; +PREFIX (_equal) (region_type_t *reg1, region_type_t *reg2) { int i; box_type_t *rects1; box_type_t *rects2; - if (reg1->extents.x1 != reg2->extents.x1) return FALSE; - if (reg1->extents.x2 != reg2->extents.x2) return FALSE; - if (reg1->extents.y1 != reg2->extents.y1) return FALSE; - if (reg1->extents.y2 != reg2->extents.y2) return FALSE; - if (PIXREGION_NUM_RECTS(reg1) != PIXREGION_NUM_RECTS(reg2)) return FALSE; + if (reg1->extents.x1 != reg2->extents.x1) + return FALSE; + + if (reg1->extents.x2 != reg2->extents.x2) + return FALSE; + + if (reg1->extents.y1 != reg2->extents.y1) + return FALSE; + + if (reg1->extents.y2 != reg2->extents.y2) + return FALSE; + + if (PIXREGION_NUMRECTS (reg1) != PIXREGION_NUMRECTS (reg2)) + return FALSE; - rects1 = PIXREGION_RECTS(reg1); - rects2 = PIXREGION_RECTS(reg2); - for (i = 0; i != PIXREGION_NUM_RECTS(reg1); i++) { - if (rects1[i].x1 != rects2[i].x1) return FALSE; - if (rects1[i].x2 != rects2[i].x2) return FALSE; - if (rects1[i].y1 != rects2[i].y1) return FALSE; - if (rects1[i].y2 != rects2[i].y2) return FALSE; + rects1 = PIXREGION_RECTS (reg1); + rects2 = PIXREGION_RECTS (reg2); + + for (i = 0; i != PIXREGION_NUMRECTS (reg1); i++) + { + if (rects1[i].x1 != rects2[i].x1) + return FALSE; + + if (rects1[i].x2 != rects2[i].x2) + return FALSE; + + if (rects1[i].y1 != rects2[i].y1) + return FALSE; + + if (rects1[i].y2 != rects2[i].y2) + return FALSE; } + return TRUE; } int -PREFIX(_print) (rgn) - region_type_t * rgn; +PREFIX (_print) (region_type_t *rgn) { int num, size; int i; box_type_t * rects; - num = PIXREGION_NUM_RECTS(rgn); - size = PIXREGION_SIZE(rgn); - rects = PIXREGION_RECTS(rgn); - fprintf(stderr, "num: %d size: %d\n", num, size); - fprintf(stderr, "extents: %d %d %d %d\n", - rgn->extents.x1, rgn->extents.y1, rgn->extents.x2, rgn->extents.y2); + num = PIXREGION_NUMRECTS (rgn); + size = PIXREGION_SIZE (rgn); + rects = PIXREGION_RECTS (rgn); + + fprintf (stderr, "num: %d size: %d\n", num, size); + fprintf (stderr, "extents: %d %d %d %d\n", + rgn->extents.x1, + rgn->extents.y1, + rgn->extents.x2, + rgn->extents.y2); + for (i = 0; i < num; i++) - fprintf(stderr, "%d %d %d %d \n", - rects[i].x1, rects[i].y1, rects[i].x2, rects[i].y2); - fprintf(stderr, "\n"); + { + fprintf (stderr, "%d %d %d %d \n", + rects[i].x1, rects[i].y1, rects[i].x2, rects[i].y2); + } + + fprintf (stderr, "\n"); + return(num); } PIXMAN_EXPORT void -PREFIX(_init) (region_type_t *region) +PREFIX (_init) (region_type_t *region) { - region->extents = *pixman_region_emptyBox; - region->data = pixman_region_emptyData; + region->extents = *pixman_region_empty_box; + region->data = pixman_region_empty_data; } PIXMAN_EXPORT void -PREFIX(_init_rect) (region_type_t *region, - int x, int y, unsigned int width, unsigned int height) +PREFIX (_init_rect) (region_type_t * region, + int x, + int y, + unsigned int width, + unsigned int height) { region->extents.x1 = x; region->extents.y1 = y; region->extents.x2 = x + width; region->extents.y2 = y + height; + + if (!GOOD_RECT (®ion->extents)) + { + if (BAD_RECT (®ion->extents)) + log_region_error (FUNC, "Invalid rectangle passed"); + PREFIX (_init) (region); + return; + } + region->data = NULL; } PIXMAN_EXPORT void -PREFIX(_init_with_extents) (region_type_t *region, box_type_t *extents) +PREFIX (_init_with_extents) (region_type_t *region, box_type_t *extents) { + if (!GOOD_RECT (extents)) + { + if (BAD_RECT (extents)) + log_region_error (FUNC, "Invalid rectangle passed"); + PREFIX (_init) (region); + return; + } region->extents = *extents; + region->data = NULL; } PIXMAN_EXPORT void -PREFIX(_fini) (region_type_t *region) +PREFIX (_fini) (region_type_t *region) { - good (region); - freeData (region); + GOOD (region); + FREE_DATA (region); } PIXMAN_EXPORT int -PREFIX(_n_rects) (region_type_t *region) +PREFIX (_n_rects) (region_type_t *region) { - return PIXREGION_NUM_RECTS (region); + return PIXREGION_NUMRECTS (region); } PIXMAN_EXPORT box_type_t * -PREFIX(_rectangles) (region_type_t *region, - int *n_rects) +PREFIX (_rectangles) (region_type_t *region, + int *n_rects) { if (n_rects) - *n_rects = PIXREGION_NUM_RECTS (region); + *n_rects = PIXREGION_NUMRECTS (region); return PIXREGION_RECTS (region); } @@ -347,81 +469,110 @@ PREFIX(_rectangles) (region_type_t *region, static pixman_bool_t pixman_break (region_type_t *region) { - freeData (region); - region->extents = *pixman_region_emptyBox; - region->data = pixman_brokendata; + FREE_DATA (region); + + region->extents = *pixman_region_empty_box; + region->data = pixman_broken_data; + return FALSE; } static pixman_bool_t -pixman_rect_alloc (region_type_t * region, int n) +pixman_rect_alloc (region_type_t * region, + int n) { region_data_type_t *data; if (!region->data) { n++; - region->data = allocData(n); + region->data = alloc_data (n); + if (!region->data) return pixman_break (region); + region->data->numRects = 1; - *PIXREGION_BOXPTR(region) = region->extents; + *PIXREGION_BOXPTR (region) = region->extents; } else if (!region->data->size) { - region->data = allocData(n); + region->data = alloc_data (n); + if (!region->data) return pixman_break (region); + region->data->numRects = 0; } else { size_t data_size; + if (n == 1) { n = region->data->numRects; if (n > 500) /* XXX pick numbers out of a hat */ n = 250; } + n += region->data->numRects; - data_size = PIXREGION_SZOF(n); + data_size = PIXREGION_SZOF (n); + if (!data_size) + { data = NULL; + } else - data = (region_data_type_t *)realloc(region->data, PIXREGION_SZOF(n)); + { + data = (region_data_type_t *) + realloc (region->data, PIXREGION_SZOF (n)); + } + if (!data) return pixman_break (region); + region->data = data; } + region->data->size = n; + return TRUE; } PIXMAN_EXPORT pixman_bool_t -PREFIX(_copy) (region_type_t *dst, region_type_t *src) +PREFIX (_copy) (region_type_t *dst, region_type_t *src) { - good(dst); - good(src); + GOOD (dst); + GOOD (src); + if (dst == src) return TRUE; + dst->extents = src->extents; + if (!src->data || !src->data->size) { - freeData(dst); + FREE_DATA (dst); dst->data = src->data; return TRUE; } + if (!dst->data || (dst->data->size < src->data->numRects)) { - freeData(dst); - dst->data = allocData(src->data->numRects); + FREE_DATA (dst); + + dst->data = alloc_data (src->data->numRects); + if (!dst->data) return pixman_break (dst); + dst->data->size = src->data->numRects; } + dst->data->numRects = src->data->numRects; - memmove((char *)PIXREGION_BOXPTR(dst),(char *)PIXREGION_BOXPTR(src), - dst->data->numRects * sizeof(box_type_t)); + + memmove ((char *)PIXREGION_BOXPTR (dst), (char *)PIXREGION_BOXPTR (src), + dst->data->numRects * sizeof(box_type_t)); + return TRUE; } @@ -448,30 +599,30 @@ PREFIX(_copy) (region_type_t *dst, region_type_t *src) *----------------------------------------------------------------------- */ static inline int -pixman_coalesce ( - region_type_t * region, /* Region to coalesce */ - int prevStart, /* Index of start of previous band */ - int curStart) /* Index of start of current band */ +pixman_coalesce (region_type_t * region, /* Region to coalesce */ + int prev_start, /* Index of start of previous band */ + int cur_start) /* Index of start of current band */ { - box_type_t * pPrevBox; /* Current box in previous band */ - box_type_t * pCurBox; /* Current box in current band */ - int numRects; /* Number rectangles in both bands */ - int y2; /* Bottom of current band */ + box_type_t *prev_box; /* Current box in previous band */ + box_type_t *cur_box; /* Current box in current band */ + int numRects; /* Number rectangles in both bands */ + int y2; /* Bottom of current band */ + /* * Figure out how many rectangles are in the band. */ - numRects = curStart - prevStart; - assert(numRects == region->data->numRects - curStart); + numRects = cur_start - prev_start; + assert (numRects == region->data->numRects - cur_start); - if (!numRects) return curStart; + if (!numRects) return cur_start; /* * The bands may only be coalesced if the bottom of the previous * matches the top scanline of the current. */ - pPrevBox = PIXREGION_BOX(region, prevStart); - pCurBox = PIXREGION_BOX(region, curStart); - if (pPrevBox->y2 != pCurBox->y1) return curStart; + prev_box = PIXREGION_BOX (region, prev_start); + cur_box = PIXREGION_BOX (region, cur_start); + if (prev_box->y2 != cur_box->y1) return cur_start; /* * Make sure the bands have boxes in the same places. This @@ -479,43 +630,51 @@ pixman_coalesce ( * cover the most area possible. I.e. two boxes in a band must * have some horizontal space between them. */ - y2 = pCurBox->y2; + y2 = cur_box->y2; - do { - if ((pPrevBox->x1 != pCurBox->x1) || (pPrevBox->x2 != pCurBox->x2)) { - return (curStart); - } - pPrevBox++; - pCurBox++; + do + { + if ((prev_box->x1 != cur_box->x1) || (prev_box->x2 != cur_box->x2)) + return (cur_start); + + prev_box++; + cur_box++; numRects--; - } while (numRects); + } + while (numRects); /* * The bands may be merged, so set the bottom y of each box * in the previous band to the bottom y of the current band. */ - numRects = curStart - prevStart; + numRects = cur_start - prev_start; region->data->numRects -= numRects; - do { - pPrevBox--; - pPrevBox->y2 = y2; + + do + { + prev_box--; + prev_box->y2 = y2; numRects--; - } while (numRects); - return prevStart; + } + while (numRects); + + return prev_start; } /* Quicky macro to avoid trivial reject procedure calls to pixman_coalesce */ -#define Coalesce(newReg, prevBand, curBand) \ - if (curBand - prevBand == newReg->data->numRects - curBand) { \ - prevBand = pixman_coalesce(newReg, prevBand, curBand); \ - } else { \ - prevBand = curBand; \ - } +#define COALESCE(new_reg, prev_band, cur_band) \ + do \ + { \ + if (cur_band - prev_band == new_reg->data->numRects - cur_band) \ + prev_band = pixman_coalesce (new_reg, prev_band, cur_band); \ + else \ + prev_band = cur_band; \ + } while (0) /*- *----------------------------------------------------------------------- - * pixman_region_appendNonO -- + * pixman_region_append_non_o -- * Handle a non-overlapping band for the union and subtract operations. * Just adds the (top/bottom-clipped) rectangles into the region. * Doesn't have to check for subsumption or anything. @@ -529,55 +688,58 @@ pixman_coalesce ( * *----------------------------------------------------------------------- */ - static inline pixman_bool_t -pixman_region_appendNonO ( - region_type_t * region, - box_type_t * r, - box_type_t * rEnd, - int y1, - int y2) +pixman_region_append_non_o (region_type_t * region, + box_type_t * r, + box_type_t * r_end, + int y1, + int y2) { - box_type_t * pNextRect; - int newRects; + box_type_t *next_rect; + int new_rects; - newRects = rEnd - r; + new_rects = r_end - r; - assert(y1 < y2); - assert(newRects != 0); + assert (y1 < y2); + assert (new_rects != 0); /* Make sure we have enough space for all rectangles to be added */ - RECTALLOC(region, newRects); - pNextRect = PIXREGION_TOP(region); - region->data->numRects += newRects; - do { - assert(r->x1 < r->x2); - ADDRECT(pNextRect, r->x1, y1, r->x2, y2); + RECTALLOC (region, new_rects); + next_rect = PIXREGION_TOP (region); + region->data->numRects += new_rects; + + do + { + assert (r->x1 < r->x2); + ADDRECT (next_rect, r->x1, y1, r->x2, y2); r++; - } while (r != rEnd); + } + while (r != r_end); return TRUE; } -#define FindBand(r, rBandEnd, rEnd, ry1) \ -{ \ - ry1 = r->y1; \ - rBandEnd = r+1; \ - while ((rBandEnd != rEnd) && (rBandEnd->y1 == ry1)) { \ - rBandEnd++; \ - } \ -} - -#define AppendRegions(newReg, r, rEnd) \ -{ \ - int newRects; \ - if ((newRects = rEnd - r)) { \ - RECTALLOC_BAIL(newReg, newRects, bail); \ - memmove((char *)PIXREGION_TOP(newReg),(char *)r, \ - newRects * sizeof(box_type_t)); \ - newReg->data->numRects += newRects; \ - } \ -} +#define FIND_BAND(r, r_band_end, r_end, ry1) \ + do \ + { \ + ry1 = r->y1; \ + r_band_end = r + 1; \ + while ((r_band_end != r_end) && (r_band_end->y1 == ry1)) { \ + r_band_end++; \ + } \ + } while (0) + +#define APPEND_REGIONS(new_reg, r, r_end) \ + do \ + { \ + int new_rects; \ + if ((new_rects = r_end - r)) { \ + RECTALLOC_BAIL (new_reg, new_rects, bail); \ + memmove ((char *)PIXREGION_TOP (new_reg), (char *)r, \ + new_rects * sizeof(box_type_t)); \ + new_reg->data->numRects += new_rects; \ + } \ + } while (0) /*- *----------------------------------------------------------------------- @@ -591,15 +753,15 @@ pixman_region_appendNonO ( * * Side Effects: * The new region is overwritten. - * pOverlap set to TRUE if overlapFunc ever returns TRUE. + * overlap set to TRUE if overlap_func ever returns TRUE. * * Notes: * The idea behind this function is to view the two regions as sets. * Together they cover a rectangle of area that this function divides * into horizontal bands where points are covered only by one region - * or by both. For the first case, the nonOverlapFunc is called with + * or by both. For the first case, the non_overlap_func is called with * each the band and the band's upper and lower extents. For the - * second, the overlapFunc is called to process the entire band. It + * second, the overlap_func is called to process the entire band. It * is responsible for clipping the rectangles in the band, though * this function provides the boundaries. * At the end of each band, the new region is coalesced, if possible, @@ -608,92 +770,101 @@ pixman_region_appendNonO ( *----------------------------------------------------------------------- */ -typedef pixman_bool_t (*OverlapProcPtr)( - region_type_t *region, - box_type_t *r1, - box_type_t *r1End, - box_type_t *r2, - box_type_t *r2End, - int y1, - int y2, - int *pOverlap); +typedef pixman_bool_t (*overlap_proc_ptr) (region_type_t *region, + box_type_t * r1, + box_type_t * r1_end, + box_type_t * r2, + box_type_t * r2_end, + int y1, + int y2, + int * overlap); static pixman_bool_t -pixman_op( - region_type_t *newReg, /* Place to store result */ - region_type_t * reg1, /* First region in operation */ - region_type_t * reg2, /* 2d region in operation */ - OverlapProcPtr overlapFunc, /* Function to call for over- - * lapping bands */ - int appendNon1, /* Append non-overlapping bands */ - /* in region 1 ? */ - int appendNon2, /* Append non-overlapping bands */ - /* in region 2 ? */ - int *pOverlap) +pixman_op (region_type_t * new_reg, /* Place to store result */ + region_type_t * reg1, /* First region in operation */ + region_type_t * reg2, /* 2d region in operation */ + overlap_proc_ptr overlap_func, /* Function to call for over- + * lapping bands */ + int append_non1, /* Append non-overlapping bands + * in region 1 ? + */ + int append_non2, /* Append non-overlapping bands + * in region 2 ? + */ + int * overlap) { - box_type_t * r1; /* Pointer into first region */ - box_type_t * r2; /* Pointer into 2d region */ - box_type_t * r1End; /* End of 1st region */ - box_type_t * r2End; /* End of 2d region */ - int ybot; /* Bottom of intersection */ - int ytop; /* Top of intersection */ - region_data_type_t * oldData; /* Old data for newReg */ - int prevBand; /* Index of start of - * previous band in newReg */ - int curBand; /* Index of start of current - * band in newReg */ - box_type_t * r1BandEnd; /* End of current band in r1 */ - box_type_t * r2BandEnd; /* End of current band in r2 */ - int top; /* Top of non-overlapping band */ - int bot; /* Bottom of non-overlapping band*/ - int r1y1; /* Temps for r1->y1 and r2->y1 */ - int r2y1; - int newSize; - int numRects; + box_type_t *r1; /* Pointer into first region */ + box_type_t *r2; /* Pointer into 2d region */ + box_type_t *r1_end; /* End of 1st region */ + box_type_t *r2_end; /* End of 2d region */ + int ybot; /* Bottom of intersection */ + int ytop; /* Top of intersection */ + region_data_type_t *old_data; /* Old data for new_reg */ + int prev_band; /* Index of start of + * previous band in new_reg */ + int cur_band; /* Index of start of current + * band in new_reg */ + box_type_t * r1_band_end; /* End of current band in r1 */ + box_type_t * r2_band_end; /* End of current band in r2 */ + int top; /* Top of non-overlapping band */ + int bot; /* Bottom of non-overlapping band*/ + int r1y1; /* Temps for r1->y1 and r2->y1 */ + int r2y1; + int new_size; + int numRects; /* * Break any region computed from a broken region */ - if (PIXREGION_NAR (reg1) || PIXREGION_NAR(reg2)) - return pixman_break (newReg); + if (PIXREGION_NAR (reg1) || PIXREGION_NAR (reg2)) + return pixman_break (new_reg); /* * Initialization: - * set r1, r2, r1End and r2End appropriately, save the rectangles + * set r1, r2, r1_end and r2_end appropriately, save the rectangles * of the destination region until the end in case it's one of * the two source regions, then mark the "new" region empty, allocating * another array of rectangles for it to use. */ - r1 = PIXREGION_RECTS(reg1); - newSize = PIXREGION_NUM_RECTS(reg1); - r1End = r1 + newSize; - numRects = PIXREGION_NUM_RECTS(reg2); - r2 = PIXREGION_RECTS(reg2); - r2End = r2 + numRects; - assert(r1 != r1End); - assert(r2 != r2End); + r1 = PIXREGION_RECTS (reg1); + new_size = PIXREGION_NUMRECTS (reg1); + r1_end = r1 + new_size; + + numRects = PIXREGION_NUMRECTS (reg2); + r2 = PIXREGION_RECTS (reg2); + r2_end = r2 + numRects; + + assert (r1 != r1_end); + assert (r2 != r2_end); - oldData = (region_data_type_t *)NULL; - if (((newReg == reg1) && (newSize > 1)) || - ((newReg == reg2) && (numRects > 1))) + old_data = (region_data_type_t *)NULL; + + if (((new_reg == reg1) && (new_size > 1)) || + ((new_reg == reg2) && (numRects > 1))) { - oldData = newReg->data; - newReg->data = pixman_region_emptyData; + old_data = new_reg->data; + new_reg->data = pixman_region_empty_data; } + /* guess at new size */ - if (numRects > newSize) - newSize = numRects; - newSize <<= 1; - if (!newReg->data) - newReg->data = pixman_region_emptyData; - else if (newReg->data->size) - newReg->data->numRects = 0; - if (newSize > newReg->data->size) { - if (!pixman_rect_alloc(newReg, newSize)) { - if (oldData) - free (oldData); - return FALSE; + if (numRects > new_size) + new_size = numRects; + + new_size <<= 1; + + if (!new_reg->data) + new_reg->data = pixman_region_empty_data; + else if (new_reg->data->size) + new_reg->data->numRects = 0; + + if (new_size > new_reg->data->size) + { + if (!pixman_rect_alloc (new_reg, new_size)) + { + if (old_data) + free (old_data); + return FALSE; } } @@ -702,7 +873,7 @@ pixman_op( * In the upcoming loop, ybot and ytop serve different functions depending * on whether the band being handled is an overlapping or non-overlapping * band. - * In the case of a non-overlapping band (only one of the regions + * In the case of a non-overlapping band (only one of the regions * has points in the band), ybot is the bottom of the most recent * intersection and thus clips the top of the rectangles in that band. * ytop is the top of the next intersection between the two regions and @@ -711,34 +882,35 @@ pixman_op( * the top of the rectangles of both regions and ybot clips the bottoms. */ - ybot = MIN(r1->y1, r2->y1); + ybot = MIN (r1->y1, r2->y1); /* - * prevBand serves to mark the start of the previous band so rectangles + * prev_band serves to mark the start of the previous band so rectangles * can be coalesced into larger rectangles. qv. pixman_coalesce, above. - * In the beginning, there is no previous band, so prevBand == curBand - * (curBand is set later on, of course, but the first band will always - * start at index 0). prevBand and curBand must be indices because of + * In the beginning, there is no previous band, so prev_band == cur_band + * (cur_band is set later on, of course, but the first band will always + * start at index 0). prev_band and cur_band must be indices because of * the possible expansion, and resultant moving, of the new region's * array of rectangles. */ - prevBand = 0; + prev_band = 0; - do { - /* + do + { + /* * This algorithm proceeds one source-band (as opposed to a * destination band, which is determined by where the two regions - * intersect) at a time. r1BandEnd and r2BandEnd serve to mark the + * intersect) at a time. r1_band_end and r2_band_end serve to mark the * rectangle after the last one in the current band for their * respective regions. */ - assert(r1 != r1End); - assert(r2 != r2End); + assert (r1 != r1_end); + assert (r2 != r2_end); - FindBand(r1, r1BandEnd, r1End, r1y1); - FindBand(r2, r2BandEnd, r2End, r2y1); + FIND_BAND (r1, r1_band_end, r1_end, r1y1); + FIND_BAND (r2, r2_band_end, r2_end, r2y1); - /* + /* * First handle the band that doesn't intersect, if any. * * Note that attention is restricted to one band in the @@ -746,58 +918,79 @@ pixman_op( * bands between the current position and the next place it overlaps * the other, this entire loop will be passed through n times. */ - if (r1y1 < r2y1) { - if (appendNon1) { - top = MAX(r1y1, ybot); - bot = MIN(r1->y2, r2y1); - if (top != bot) { - curBand = newReg->data->numRects; - if (!pixman_region_appendNonO(newReg, r1, r1BandEnd, top, bot)) + if (r1y1 < r2y1) + { + if (append_non1) + { + top = MAX (r1y1, ybot); + bot = MIN (r1->y2, r2y1); + if (top != bot) + { + cur_band = new_reg->data->numRects; + if (!pixman_region_append_non_o (new_reg, r1, r1_band_end, top, bot)) goto bail; - Coalesce(newReg, prevBand, curBand); + COALESCE (new_reg, prev_band, cur_band); } } - ytop = r2y1; - } else if (r2y1 < r1y1) { - if (appendNon2) { - top = MAX(r2y1, ybot); - bot = MIN(r2->y2, r1y1); - if (top != bot) { - curBand = newReg->data->numRects; - if (!pixman_region_appendNonO(newReg, r2, r2BandEnd, top, bot)) + ytop = r2y1; + } + else if (r2y1 < r1y1) + { + if (append_non2) + { + top = MAX (r2y1, ybot); + bot = MIN (r2->y2, r1y1); + + if (top != bot) + { + cur_band = new_reg->data->numRects; + + if (!pixman_region_append_non_o (new_reg, r2, r2_band_end, top, bot)) goto bail; - Coalesce(newReg, prevBand, curBand); + + COALESCE (new_reg, prev_band, cur_band); } } - ytop = r1y1; - } else { - ytop = r1y1; + ytop = r1y1; + } + else + { + ytop = r1y1; } - /* + /* * Now see if we've hit an intersecting band. The two bands only * intersect if ybot > ytop */ - ybot = MIN(r1->y2, r2->y2); - if (ybot > ytop) { - curBand = newReg->data->numRects; - if (!(* overlapFunc)(newReg, - r1, r1BandEnd, - r2, r2BandEnd, - ytop, ybot, - pOverlap)) + ybot = MIN (r1->y2, r2->y2); + if (ybot > ytop) + { + cur_band = new_reg->data->numRects; + + if (!(*overlap_func)(new_reg, + r1, r1_band_end, + r2, r2_band_end, + ytop, ybot, + overlap)) + { goto bail; - Coalesce(newReg, prevBand, curBand); + } + + COALESCE (new_reg, prev_band, cur_band); } - /* + /* * If we've finished with a band (y2 == ybot) we skip forward * in the region to the next band. */ - if (r1->y2 == ybot) r1 = r1BandEnd; - if (r2->y2 == ybot) r2 = r2BandEnd; + if (r1->y2 == ybot) + r1 = r1_band_end; + + if (r2->y2 == ybot) + r2 = r2_band_end; - } while (r1 != r1End && r2 != r2End); + } + while (r1 != r1_end && r2 != r2_end); /* * Deal with whichever region (if any) still has rectangles left. @@ -807,64 +1000,79 @@ pixman_op( * regardless of how many bands, into one final append to the list. */ - if ((r1 != r1End) && appendNon1) { - /* Do first nonOverlap1Func call, which may be able to coalesce */ - FindBand(r1, r1BandEnd, r1End, r1y1); - curBand = newReg->data->numRects; - if (!pixman_region_appendNonO(newReg, - r1, r1BandEnd, - MAX(r1y1, ybot), r1->y2)) + if ((r1 != r1_end) && append_non1) + { + /* Do first non_overlap1Func call, which may be able to coalesce */ + FIND_BAND (r1, r1_band_end, r1_end, r1y1); + + cur_band = new_reg->data->numRects; + + if (!pixman_region_append_non_o (new_reg, + r1, r1_band_end, + MAX (r1y1, ybot), r1->y2)) + { goto bail; - Coalesce(newReg, prevBand, curBand); - /* Just append the rest of the boxes */ - AppendRegions(newReg, r1BandEnd, r1End); - - } else if ((r2 != r2End) && appendNon2) { - /* Do first nonOverlap2Func call, which may be able to coalesce */ - FindBand(r2, r2BandEnd, r2End, r2y1); - curBand = newReg->data->numRects; - if (!pixman_region_appendNonO(newReg, - r2, r2BandEnd, - MAX(r2y1, ybot), r2->y2)) + } + + COALESCE (new_reg, prev_band, cur_band); + + /* Just append the rest of the boxes */ + APPEND_REGIONS (new_reg, r1_band_end, r1_end); + } + else if ((r2 != r2_end) && append_non2) + { + /* Do first non_overlap2Func call, which may be able to coalesce */ + FIND_BAND (r2, r2_band_end, r2_end, r2y1); + + cur_band = new_reg->data->numRects; + + if (!pixman_region_append_non_o (new_reg, + r2, r2_band_end, + MAX (r2y1, ybot), r2->y2)) + { goto bail; - Coalesce(newReg, prevBand, curBand); - /* Append rest of boxes */ - AppendRegions(newReg, r2BandEnd, r2End); + } + + COALESCE (new_reg, prev_band, cur_band); + + /* Append rest of boxes */ + APPEND_REGIONS (new_reg, r2_band_end, r2_end); } - if (oldData) - free(oldData); + if (old_data) + free (old_data); - if (!(numRects = newReg->data->numRects)) + if (!(numRects = new_reg->data->numRects)) { - freeData(newReg); - newReg->data = pixman_region_emptyData; + FREE_DATA (new_reg); + new_reg->data = pixman_region_empty_data; } else if (numRects == 1) { - newReg->extents = *PIXREGION_BOXPTR(newReg); - freeData(newReg); - newReg->data = (region_data_type_t *)NULL; + new_reg->extents = *PIXREGION_BOXPTR (new_reg); + FREE_DATA (new_reg); + new_reg->data = (region_data_type_t *)NULL; } else { - DOWNSIZE(newReg, numRects); + DOWNSIZE (new_reg, numRects); } return TRUE; bail: - if (oldData) - free(oldData); - return pixman_break (newReg); + if (old_data) + free (old_data); + + return pixman_break (new_reg); } /*- *----------------------------------------------------------------------- * pixman_set_extents -- * Reset the extents of a region to what they should be. Called by - * pixman_region_subtract and pixman_region_intersect as they can't figure it out along the - * way or do so easily, as pixman_region_union can. + * pixman_region_subtract and pixman_region_intersect as they can't + * figure it out along the way or do so easily, as pixman_region_union can. * * Results: * None. @@ -877,42 +1085,45 @@ bail: static void pixman_set_extents (region_type_t *region) { - box_type_t *box, *boxEnd; + box_type_t *box, *box_end; if (!region->data) return; + if (!region->data->size) { - region->extents.x2 = region->extents.x1; - region->extents.y2 = region->extents.y1; - return; + region->extents.x2 = region->extents.x1; + region->extents.y2 = region->extents.y1; + return; } - box = PIXREGION_BOXPTR(region); - boxEnd = PIXREGION_END(region); + box = PIXREGION_BOXPTR (region); + box_end = PIXREGION_END (region); /* * Since box is the first rectangle in the region, it must have the - * smallest y1 and since boxEnd is the last rectangle in the region, + * smallest y1 and since box_end is the last rectangle in the region, * it must have the largest y2, because of banding. Initialize x1 and - * x2 from box and boxEnd, resp., as good things to initialize them + * x2 from box and box_end, resp., as good things to initialize them * to... */ region->extents.x1 = box->x1; region->extents.y1 = box->y1; - region->extents.x2 = boxEnd->x2; - region->extents.y2 = boxEnd->y2; + region->extents.x2 = box_end->x2; + region->extents.y2 = box_end->y2; - assert(region->extents.y1 < region->extents.y2); - while (box <= boxEnd) { - if (box->x1 < region->extents.x1) + assert (region->extents.y1 < region->extents.y2); + + while (box <= box_end) + { + if (box->x1 < region->extents.x1) region->extents.x1 = box->x1; - if (box->x2 > region->extents.x2) + if (box->x2 > region->extents.x2) region->extents.x2 = box->x2; - box++; - }; + box++; + } - assert(region->extents.x1 < region->extents.x2); + assert (region->extents.x1 < region->extents.x2); } /*====================================================================== @@ -920,7 +1131,7 @@ pixman_set_extents (region_type_t *region) *====================================================================*/ /*- *----------------------------------------------------------------------- - * pixman_region_intersectO -- + * pixman_region_intersect_o -- * Handle an overlapping band for pixman_region_intersect. * * Results: @@ -933,125 +1144,145 @@ pixman_set_extents (region_type_t *region) */ /*ARGSUSED*/ static pixman_bool_t -pixman_region_intersectO (region_type_t *region, - box_type_t *r1, - box_type_t *r1End, - box_type_t *r2, - box_type_t *r2End, - int y1, - int y2, - int *pOverlap) +pixman_region_intersect_o (region_type_t *region, + box_type_t * r1, + box_type_t * r1_end, + box_type_t * r2, + box_type_t * r2_end, + int y1, + int y2, + int * overlap) { - int x1; - int x2; - box_type_t * pNextRect; + int x1; + int x2; + box_type_t * next_rect; - pNextRect = PIXREGION_TOP(region); + next_rect = PIXREGION_TOP (region); - assert(y1 < y2); - assert(r1 != r1End && r2 != r2End); + assert (y1 < y2); + assert (r1 != r1_end && r2 != r2_end); - do { - x1 = MAX(r1->x1, r2->x1); - x2 = MIN(r1->x2, r2->x2); + do + { + x1 = MAX (r1->x1, r2->x1); + x2 = MIN (r1->x2, r2->x2); - /* + /* * If there's any overlap between the two rectangles, add that * overlap to the new region. */ - if (x1 < x2) - NEWRECT(region, pNextRect, x1, y1, x2, y2); + if (x1 < x2) + NEWRECT (region, next_rect, x1, y1, x2, y2); - /* + /* * Advance the pointer(s) with the leftmost right side, since the next * rectangle on that list may still overlap the other region's * current rectangle. */ - if (r1->x2 == x2) { - r1++; + if (r1->x2 == x2) + { + r1++; } - if (r2->x2 == x2) { - r2++; + if (r2->x2 == x2) + { + r2++; } - } while ((r1 != r1End) && (r2 != r2End)); + } + while ((r1 != r1_end) && (r2 != r2_end)); return TRUE; } PIXMAN_EXPORT pixman_bool_t -PREFIX(_intersect) (region_type_t * newReg, - region_type_t * reg1, - region_type_t * reg2) +PREFIX (_intersect) (region_type_t * new_reg, + region_type_t * reg1, + region_type_t * reg2) { - good(reg1); - good(reg2); - good(newReg); - /* check for trivial reject */ - if (PIXREGION_NIL(reg1) || PIXREGION_NIL(reg2) || - !EXTENTCHECK(®1->extents, ®2->extents)) - { - /* Covers about 20% of all cases */ - freeData(newReg); - newReg->extents.x2 = newReg->extents.x1; - newReg->extents.y2 = newReg->extents.y1; - if (PIXREGION_NAR(reg1) || PIXREGION_NAR(reg2)) + GOOD (reg1); + GOOD (reg2); + GOOD (new_reg); + + /* check for trivial reject */ + if (PIXREGION_NIL (reg1) || PIXREGION_NIL (reg2) || + !EXTENTCHECK (®1->extents, ®2->extents)) + { + /* Covers about 20% of all cases */ + FREE_DATA (new_reg); + new_reg->extents.x2 = new_reg->extents.x1; + new_reg->extents.y2 = new_reg->extents.y1; + if (PIXREGION_NAR (reg1) || PIXREGION_NAR (reg2)) + { + new_reg->data = pixman_broken_data; + return FALSE; + } + else { - newReg->data = pixman_brokendata; - return FALSE; + new_reg->data = pixman_region_empty_data; } - else - newReg->data = pixman_region_emptyData; } else if (!reg1->data && !reg2->data) { - /* Covers about 80% of cases that aren't trivially rejected */ - newReg->extents.x1 = MAX(reg1->extents.x1, reg2->extents.x1); - newReg->extents.y1 = MAX(reg1->extents.y1, reg2->extents.y1); - newReg->extents.x2 = MIN(reg1->extents.x2, reg2->extents.x2); - newReg->extents.y2 = MIN(reg1->extents.y2, reg2->extents.y2); - freeData(newReg); - newReg->data = (region_data_type_t *)NULL; + /* Covers about 80% of cases that aren't trivially rejected */ + new_reg->extents.x1 = MAX (reg1->extents.x1, reg2->extents.x1); + new_reg->extents.y1 = MAX (reg1->extents.y1, reg2->extents.y1); + new_reg->extents.x2 = MIN (reg1->extents.x2, reg2->extents.x2); + new_reg->extents.y2 = MIN (reg1->extents.y2, reg2->extents.y2); + + FREE_DATA (new_reg); + + new_reg->data = (region_data_type_t *)NULL; } - else if (!reg2->data && SUBSUMES(®2->extents, ®1->extents)) + else if (!reg2->data && SUBSUMES (®2->extents, ®1->extents)) { - return PREFIX(_copy) (newReg, reg1); + return PREFIX (_copy) (new_reg, reg1); } - else if (!reg1->data && SUBSUMES(®1->extents, ®2->extents)) + else if (!reg1->data && SUBSUMES (®1->extents, ®2->extents)) { - return PREFIX(_copy) (newReg, reg2); + return PREFIX (_copy) (new_reg, reg2); } else if (reg1 == reg2) { - return PREFIX(_copy) (newReg, reg1); + return PREFIX (_copy) (new_reg, reg1); } else { - /* General purpose intersection */ - int overlap; /* result ignored */ - if (!pixman_op(newReg, reg1, reg2, pixman_region_intersectO, FALSE, FALSE, - &overlap)) + /* General purpose intersection */ + int overlap; /* result ignored */ + + if (!pixman_op (new_reg, reg1, reg2, pixman_region_intersect_o, FALSE, FALSE, + &overlap)) + { return FALSE; - pixman_set_extents(newReg); + } + + pixman_set_extents (new_reg); } - good(newReg); + GOOD (new_reg); return(TRUE); } -#define MERGERECT(r) \ -{ \ - if (r->x1 <= x2) { \ - /* Merge with current rectangle */ \ - if (r->x1 < x2) *pOverlap = TRUE; \ - if (x2 < r->x2) x2 = r->x2; \ - } else { \ - /* Add current rectangle, start new one */ \ - NEWRECT(region, pNextRect, x1, y1, x2, y2); \ - x1 = r->x1; \ - x2 = r->x2; \ - } \ - r++; \ -} +#define MERGERECT(r) \ + do \ + { \ + if (r->x1 <= x2) \ + { \ + /* Merge with current rectangle */ \ + if (r->x1 < x2) \ + *overlap = TRUE; \ + \ + if (x2 < r->x2) \ + x2 = r->x2; \ + } \ + else \ + { \ + /* Add current rectangle, start new one */ \ + NEWRECT (region, next_rect, x1, y1, x2, y2); \ + x1 = r->x1; \ + x2 = r->x2; \ + } \ + r++; \ + } while (0) /*====================================================================== * Region Union @@ -1059,7 +1290,7 @@ PREFIX(_intersect) (region_type_t * newReg, /*- *----------------------------------------------------------------------- - * pixman_region_unionO -- + * pixman_region_union_o -- * Handle an overlapping band for the union operation. Picks the * left-most rectangle each time and merges it into the region. * @@ -1068,66 +1299,70 @@ PREFIX(_intersect) (region_type_t * newReg, * * Side Effects: * region is overwritten. - * pOverlap is set to TRUE if any boxes overlap. + * overlap is set to TRUE if any boxes overlap. * *----------------------------------------------------------------------- */ static pixman_bool_t -pixman_region_unionO ( - region_type_t *region, - box_type_t *r1, - box_type_t *r1End, - box_type_t *r2, - box_type_t *r2End, - int y1, - int y2, - int *pOverlap) +pixman_region_union_o (region_type_t *region, + box_type_t * r1, + box_type_t * r1_end, + box_type_t * r2, + box_type_t * r2_end, + int y1, + int y2, + int * overlap) { - box_type_t * pNextRect; - int x1; /* left and right side of current union */ - int x2; + box_type_t *next_rect; + int x1; /* left and right side of current union */ + int x2; assert (y1 < y2); - assert(r1 != r1End && r2 != r2End); + assert (r1 != r1_end && r2 != r2_end); - pNextRect = PIXREGION_TOP(region); + next_rect = PIXREGION_TOP (region); /* Start off current rectangle */ if (r1->x1 < r2->x1) { - x1 = r1->x1; - x2 = r1->x2; - r1++; + x1 = r1->x1; + x2 = r1->x2; + r1++; } else { - x1 = r2->x1; - x2 = r2->x2; - r2++; + x1 = r2->x1; + x2 = r2->x2; + r2++; } - while (r1 != r1End && r2 != r2End) + while (r1 != r1_end && r2 != r2_end) { - if (r1->x1 < r2->x1) MERGERECT(r1) else MERGERECT(r2); + if (r1->x1 < r2->x1) + MERGERECT (r1); + else + MERGERECT (r2); } /* Finish off whoever (if any) is left */ - if (r1 != r1End) + if (r1 != r1_end) { - do - { - MERGERECT(r1); - } while (r1 != r1End); + do + { + MERGERECT (r1); + } + while (r1 != r1_end); } - else if (r2 != r2End) + else if (r2 != r2_end) { - do - { - MERGERECT(r2); - } while (r2 != r2End); + do + { + MERGERECT (r2); + } + while (r2 != r2_end); } /* Add current rectangle */ - NEWRECT(region, pNextRect, x1, y1, x2, y2); + NEWRECT (region, next_rect, x1, y1, x2, y2); return TRUE; } @@ -1136,99 +1371,114 @@ pixman_region_unionO ( * single rectangle */ PIXMAN_EXPORT pixman_bool_t -PREFIX(_union_rect) (region_type_t *dest, - region_type_t *source, - int x, int y, - unsigned int width, unsigned int height) +PREFIX (_union_rect) (region_type_t *dest, + region_type_t *source, + int x, + int y, + unsigned int width, + unsigned int height) { region_type_t region; - if (!width || !height) - return PREFIX(_copy) (dest, source); - region.data = NULL; region.extents.x1 = x; region.extents.y1 = y; region.extents.x2 = x + width; region.extents.y2 = y + height; - return PREFIX(_union) (dest, source, ®ion); + if (!GOOD_RECT (®ion.extents)) + { + if (BAD_RECT (®ion.extents)) + log_region_error (FUNC, "Invalid rectangle passed"); + return PREFIX (_copy) (dest, source); + } + + region.data = NULL; + + return PREFIX (_union) (dest, source, ®ion); } PIXMAN_EXPORT pixman_bool_t -PREFIX(_union) (region_type_t *newReg, - region_type_t *reg1, - region_type_t *reg2) +PREFIX (_union) (region_type_t *new_reg, + region_type_t *reg1, + region_type_t *reg2) { int overlap; /* result ignored */ /* Return TRUE if some overlap * between reg1, reg2 */ - good(reg1); - good(reg2); - good(newReg); + GOOD (reg1); + GOOD (reg2); + GOOD (new_reg); + /* checks all the simple cases */ /* * Region 1 and 2 are the same */ if (reg1 == reg2) - { - return PREFIX(_copy) (newReg, reg1); - } + return PREFIX (_copy) (new_reg, reg1); /* * Region 1 is empty */ - if (PIXREGION_NIL(reg1)) + if (PIXREGION_NIL (reg1)) { - if (PIXREGION_NAR(reg1)) - return pixman_break (newReg); - if (newReg != reg2) - return PREFIX(_copy) (newReg, reg2); - return TRUE; + if (PIXREGION_NAR (reg1)) + return pixman_break (new_reg); + + if (new_reg != reg2) + return PREFIX (_copy) (new_reg, reg2); + + return TRUE; } /* * Region 2 is empty */ - if (PIXREGION_NIL(reg2)) + if (PIXREGION_NIL (reg2)) { - if (PIXREGION_NAR(reg2)) - return pixman_break (newReg); - if (newReg != reg1) - return PREFIX(_copy) (newReg, reg1); - return TRUE; + if (PIXREGION_NAR (reg2)) + return pixman_break (new_reg); + + if (new_reg != reg1) + return PREFIX (_copy) (new_reg, reg1); + + return TRUE; } /* * Region 1 completely subsumes region 2 */ - if (!reg1->data && SUBSUMES(®1->extents, ®2->extents)) + if (!reg1->data && SUBSUMES (®1->extents, ®2->extents)) { - if (newReg != reg1) - return PREFIX(_copy) (newReg, reg1); - return TRUE; + if (new_reg != reg1) + return PREFIX (_copy) (new_reg, reg1); + + return TRUE; } /* * Region 2 completely subsumes region 1 */ - if (!reg2->data && SUBSUMES(®2->extents, ®1->extents)) + if (!reg2->data && SUBSUMES (®2->extents, ®1->extents)) { - if (newReg != reg2) - return PREFIX(_copy) (newReg, reg2); - return TRUE; + if (new_reg != reg2) + return PREFIX (_copy) (new_reg, reg2); + + return TRUE; } - if (!pixman_op(newReg, reg1, reg2, pixman_region_unionO, TRUE, TRUE, &overlap)) + if (!pixman_op (new_reg, reg1, reg2, pixman_region_union_o, TRUE, TRUE, &overlap)) return FALSE; - newReg->extents.x1 = MIN(reg1->extents.x1, reg2->extents.x1); - newReg->extents.y1 = MIN(reg1->extents.y1, reg2->extents.y1); - newReg->extents.x2 = MAX(reg1->extents.x2, reg2->extents.x2); - newReg->extents.y2 = MAX(reg1->extents.y2, reg2->extents.y2); - good(newReg); + new_reg->extents.x1 = MIN (reg1->extents.x1, reg2->extents.x1); + new_reg->extents.y1 = MIN (reg1->extents.y1, reg2->extents.y1); + new_reg->extents.x2 = MAX (reg1->extents.x2, reg2->extents.x2); + new_reg->extents.y2 = MAX (reg1->extents.y2, reg2->extents.y2); + + GOOD (new_reg); + return TRUE; } @@ -1236,71 +1486,83 @@ PREFIX(_union) (region_type_t *newReg, * Batch Rectangle Union *====================================================================*/ -#define ExchangeRects(a, b) \ -{ \ - box_type_t t; \ - t = rects[a]; \ - rects[a] = rects[b]; \ - rects[b] = t; \ -} +#define EXCHANGE_RECTS(a, b) \ + { \ + box_type_t t; \ + t = rects[a]; \ + rects[a] = rects[b]; \ + rects[b] = t; \ + } static void -QuickSortRects( - box_type_t rects[], +quick_sort_rects ( + box_type_t rects[], int numRects) { - int y1; - int x1; - int i, j; + int y1; + int x1; + int i, j; box_type_t *r; /* Always called with numRects > 1 */ do { - if (numRects == 2) - { - if (rects[0].y1 > rects[1].y1 || - (rects[0].y1 == rects[1].y1 && rects[0].x1 > rects[1].x1)) - ExchangeRects(0, 1); - return; + if (numRects == 2) + { + if (rects[0].y1 > rects[1].y1 || + (rects[0].y1 == rects[1].y1 && rects[0].x1 > rects[1].x1)) + { + EXCHANGE_RECTS (0, 1); + } + + return; } - /* Choose partition element, stick in location 0 */ - ExchangeRects(0, numRects >> 1); - y1 = rects[0].y1; - x1 = rects[0].x1; + /* Choose partition element, stick in location 0 */ + EXCHANGE_RECTS (0, numRects >> 1); + y1 = rects[0].y1; + x1 = rects[0].x1; /* Partition array */ i = 0; j = numRects; + do - { - r = &(rects[i]); - do - { - r++; - i++; - } while (i != numRects && - (r->y1 < y1 || (r->y1 == y1 && r->x1 < x1))); + { + r = &(rects[i]); + do + { + r++; + i++; + } + + while (i != numRects && (r->y1 < y1 || (r->y1 == y1 && r->x1 < x1))) + ; + r = &(rects[j]); - do - { - r--; - j--; - } while (y1 < r->y1 || (y1 == r->y1 && x1 < r->x1)); + do + { + r--; + j--; + } + while (y1 < r->y1 || (y1 == r->y1 && x1 < r->x1)); + if (i < j) - ExchangeRects(i, j); - } while (i < j); + EXCHANGE_RECTS (i, j); + } + while (i < j); /* Move partition element back to middle */ - ExchangeRects(0, j); + EXCHANGE_RECTS (0, j); + + /* Recurse */ + if (numRects - j - 1 > 1) + quick_sort_rects (&rects[j + 1], numRects - j - 1); - /* Recurse */ - if (numRects-j-1 > 1) - QuickSortRects(&rects[j+1], numRects-j-1); numRects = j; - } while (numRects > 1); + } + while (numRects > 1); } /*- @@ -1316,7 +1578,7 @@ QuickSortRects( * * Side Effects: * The passed-in ``region'' may be modified. - * pOverlap set to TRUE if any retangles overlapped, + * overlap set to TRUE if any retangles overlapped, * else FALSE; * * Strategy: @@ -1338,208 +1600,247 @@ QuickSortRects( static pixman_bool_t validate (region_type_t * badreg, - int *pOverlap) + int * overlap) { /* Descriptor for regions under construction in Step 2. */ - typedef struct { - region_type_t reg; - int prevBand; - int curBand; - } RegionInfo; - - RegionInfo stack_regions[64]; - - int numRects; /* Original numRects for badreg */ - RegionInfo *ri; /* Array of current regions */ - int numRI; /* Number of entries used in ri */ - int sizeRI; /* Number of entries available in ri */ - int i; /* Index into rects */ - int j; /* Index into ri */ - RegionInfo *rit; /* &ri[j] */ - region_type_t * reg; /* ri[j].reg */ - box_type_t * box; /* Current box in rects */ - box_type_t * riBox; /* Last box in ri[j].reg */ - region_type_t * hreg; /* ri[j_half].reg */ + typedef struct + { + region_type_t reg; + int prev_band; + int cur_band; + } region_info_t; + + region_info_t stack_regions[64]; + + int numRects; /* Original numRects for badreg */ + region_info_t *ri; /* Array of current regions */ + int num_ri; /* Number of entries used in ri */ + int size_ri; /* Number of entries available in ri */ + int i; /* Index into rects */ + int j; /* Index into ri */ + region_info_t *rit; /* &ri[j] */ + region_type_t *reg; /* ri[j].reg */ + box_type_t *box; /* Current box in rects */ + box_type_t *ri_box; /* Last box in ri[j].reg */ + region_type_t *hreg; /* ri[j_half].reg */ pixman_bool_t ret = TRUE; - *pOverlap = FALSE; + *overlap = FALSE; if (!badreg->data) { - good(badreg); - return TRUE; + GOOD (badreg); + return TRUE; } + numRects = badreg->data->numRects; if (!numRects) { - if (PIXREGION_NAR(badreg)) + if (PIXREGION_NAR (badreg)) return FALSE; - good(badreg); - return TRUE; + GOOD (badreg); + return TRUE; } + if (badreg->extents.x1 < badreg->extents.x2) { - if ((numRects) == 1) - { - freeData(badreg); - badreg->data = (region_data_type_t *) NULL; + if ((numRects) == 1) + { + FREE_DATA (badreg); + badreg->data = (region_data_type_t *) NULL; } - else - { - DOWNSIZE(badreg, numRects); + else + { + DOWNSIZE (badreg, numRects); } - good(badreg); + + GOOD (badreg); + return TRUE; } /* Step 1: Sort the rects array into ascending (y1, x1) order */ - QuickSortRects(PIXREGION_BOXPTR(badreg), numRects); + quick_sort_rects (PIXREGION_BOXPTR (badreg), numRects); /* Step 2: Scatter the sorted array into the minimum number of regions */ /* Set up the first region to be the first rectangle in badreg */ /* Note that step 2 code will never overflow the ri[0].reg rects array */ ri = stack_regions; - sizeRI = sizeof (stack_regions) / sizeof (stack_regions[0]); - numRI = 1; - ri[0].prevBand = 0; - ri[0].curBand = 0; + size_ri = sizeof (stack_regions) / sizeof (stack_regions[0]); + num_ri = 1; + ri[0].prev_band = 0; + ri[0].cur_band = 0; ri[0].reg = *badreg; - box = PIXREGION_BOXPTR(&ri[0].reg); + box = PIXREGION_BOXPTR (&ri[0].reg); ri[0].reg.extents = *box; ri[0].reg.data->numRects = 1; - badreg->extents = *pixman_region_emptyBox; - badreg->data = pixman_region_emptyData; + badreg->extents = *pixman_region_empty_box; + badreg->data = pixman_region_empty_data; /* Now scatter rectangles into the minimum set of valid regions. If the - next rectangle to be added to a region would force an existing rectangle - in the region to be split up in order to maintain y-x banding, just - forget it. Try the next region. If it doesn't fit cleanly into any - region, make a new one. */ + * next rectangle to be added to a region would force an existing rectangle + * in the region to be split up in order to maintain y-x banding, just + * forget it. Try the next region. If it doesn't fit cleanly into any + * region, make a new one. + */ for (i = numRects; --i > 0;) { - box++; - /* Look for a region to append box to */ - for (j = numRI, rit = ri; --j >= 0; rit++) - { - reg = &rit->reg; - riBox = PIXREGION_END(reg); - - if (box->y1 == riBox->y1 && box->y2 == riBox->y2) - { - /* box is in same band as riBox. Merge or append it */ - if (box->x1 <= riBox->x2) - { - /* Merge it with riBox */ - if (box->x1 < riBox->x2) *pOverlap = TRUE; - if (box->x2 > riBox->x2) riBox->x2 = box->x2; + box++; + /* Look for a region to append box to */ + for (j = num_ri, rit = ri; --j >= 0; rit++) + { + reg = &rit->reg; + ri_box = PIXREGION_END (reg); + + if (box->y1 == ri_box->y1 && box->y2 == ri_box->y2) + { + /* box is in same band as ri_box. Merge or append it */ + if (box->x1 <= ri_box->x2) + { + /* Merge it with ri_box */ + if (box->x1 < ri_box->x2) + *overlap = TRUE; + + if (box->x2 > ri_box->x2) + ri_box->x2 = box->x2; } - else - { - RECTALLOC_BAIL(reg, 1, bail); - *PIXREGION_TOP(reg) = *box; - reg->data->numRects++; + else + { + RECTALLOC_BAIL (reg, 1, bail); + *PIXREGION_TOP (reg) = *box; + reg->data->numRects++; } - goto NextRect; /* So sue me */ + + goto next_rect; /* So sue me */ } - else if (box->y1 >= riBox->y2) - { - /* Put box into new band */ - if (reg->extents.x2 < riBox->x2) reg->extents.x2 = riBox->x2; - if (reg->extents.x1 > box->x1) reg->extents.x1 = box->x1; - Coalesce(reg, rit->prevBand, rit->curBand); - rit->curBand = reg->data->numRects; - RECTALLOC_BAIL(reg, 1, bail); - *PIXREGION_TOP(reg) = *box; - reg->data->numRects++; - goto NextRect; + else if (box->y1 >= ri_box->y2) + { + /* Put box into new band */ + if (reg->extents.x2 < ri_box->x2) + reg->extents.x2 = ri_box->x2; + + if (reg->extents.x1 > box->x1) + reg->extents.x1 = box->x1; + + COALESCE (reg, rit->prev_band, rit->cur_band); + rit->cur_band = reg->data->numRects; + RECTALLOC_BAIL (reg, 1, bail); + *PIXREGION_TOP (reg) = *box; + reg->data->numRects++; + + goto next_rect; } - /* Well, this region was inappropriate. Try the next one. */ + /* Well, this region was inappropriate. Try the next one. */ } /* for j */ - /* Uh-oh. No regions were appropriate. Create a new one. */ - if (sizeRI == numRI) - { - size_t data_size; - - /* Oops, allocate space for new region information */ - sizeRI <<= 1; - - data_size = sizeRI * sizeof(RegionInfo); - if (data_size / sizeRI != sizeof(RegionInfo)) - goto bail; - if (ri == stack_regions) { - rit = malloc (data_size); - if (!rit) + /* Uh-oh. No regions were appropriate. Create a new one. */ + if (size_ri == num_ri) + { + size_t data_size; + + /* Oops, allocate space for new region information */ + size_ri <<= 1; + + data_size = size_ri * sizeof(region_info_t); + if (data_size / size_ri != sizeof(region_info_t)) + goto bail; + + if (ri == stack_regions) + { + rit = malloc (data_size); + if (!rit) goto bail; - memcpy (rit, ri, numRI * sizeof (RegionInfo)); - } else { - rit = (RegionInfo *) realloc(ri, data_size); - if (!rit) + memcpy (rit, ri, num_ri * sizeof (region_info_t)); + } + else + { + rit = (region_info_t *) realloc (ri, data_size); + if (!rit) goto bail; } - ri = rit; - rit = &ri[numRI]; + ri = rit; + rit = &ri[num_ri]; } - numRI++; - rit->prevBand = 0; - rit->curBand = 0; - rit->reg.extents = *box; - rit->reg.data = (region_data_type_t *)NULL; - if (!pixman_rect_alloc(&rit->reg, (i+numRI) / numRI)) /* MUST force allocation */ + num_ri++; + rit->prev_band = 0; + rit->cur_band = 0; + rit->reg.extents = *box; + rit->reg.data = (region_data_type_t *)NULL; + + /* MUST force allocation */ + if (!pixman_rect_alloc (&rit->reg, (i + num_ri) / num_ri)) goto bail; -NextRect: ; + + next_rect: ; } /* for i */ - /* Make a final pass over each region in order to Coalesce and set - extents.x2 and extents.y2 */ - - for (j = numRI, rit = ri; --j >= 0; rit++) + /* Make a final pass over each region in order to COALESCE and set + * extents.x2 and extents.y2 + */ + for (j = num_ri, rit = ri; --j >= 0; rit++) { - reg = &rit->reg; - riBox = PIXREGION_END(reg); - reg->extents.y2 = riBox->y2; - if (reg->extents.x2 < riBox->x2) reg->extents.x2 = riBox->x2; - Coalesce(reg, rit->prevBand, rit->curBand); + reg = &rit->reg; + ri_box = PIXREGION_END (reg); + reg->extents.y2 = ri_box->y2; + + if (reg->extents.x2 < ri_box->x2) + reg->extents.x2 = ri_box->x2; + + COALESCE (reg, rit->prev_band, rit->cur_band); + if (reg->data->numRects == 1) /* keep unions happy below */ - { - freeData(reg); - reg->data = (region_data_type_t *)NULL; + { + FREE_DATA (reg); + reg->data = (region_data_type_t *)NULL; } } /* Step 3: Union all regions into a single region */ - while (numRI > 1) + while (num_ri > 1) { - int half = numRI/2; - for (j = numRI & 1; j < (half + (numRI & 1)); j++) - { - reg = &ri[j].reg; - hreg = &ri[j+half].reg; - if (!pixman_op(reg, reg, hreg, pixman_region_unionO, TRUE, TRUE, pOverlap)) + int half = num_ri / 2; + for (j = num_ri & 1; j < (half + (num_ri & 1)); j++) + { + reg = &ri[j].reg; + hreg = &ri[j + half].reg; + + if (!pixman_op (reg, reg, hreg, pixman_region_union_o, TRUE, TRUE, overlap)) ret = FALSE; - if (hreg->extents.x1 < reg->extents.x1) + + if (hreg->extents.x1 < reg->extents.x1) reg->extents.x1 = hreg->extents.x1; - if (hreg->extents.y1 < reg->extents.y1) + + if (hreg->extents.y1 < reg->extents.y1) reg->extents.y1 = hreg->extents.y1; - if (hreg->extents.x2 > reg->extents.x2) + + if (hreg->extents.x2 > reg->extents.x2) reg->extents.x2 = hreg->extents.x2; - if (hreg->extents.y2 > reg->extents.y2) + + if (hreg->extents.y2 > reg->extents.y2) reg->extents.y2 = hreg->extents.y2; - freeData(hreg); + + FREE_DATA (hreg); } - numRI -= half; + + num_ri -= half; + if (!ret) goto bail; } + *badreg = ri[0].reg; + if (ri != stack_regions) - free(ri); - good(badreg); + free (ri); + + GOOD (badreg); return ret; + bail: - for (i = 0; i < numRI; i++) - freeData(&ri[i].reg); + for (i = 0; i < num_ri; i++) + FREE_DATA (&ri[i].reg); + if (ri != stack_regions) free (ri); @@ -1547,12 +1848,12 @@ bail: } /*====================================================================== - * Region Subtraction + * Region Subtraction *====================================================================*/ /*- *----------------------------------------------------------------------- - * pixman_region_subtractO -- + * pixman_region_subtract_o -- * Overlapping band subtraction. x1 is the left-most point not yet * checked. * @@ -1566,109 +1867,113 @@ bail: */ /*ARGSUSED*/ static pixman_bool_t -pixman_region_subtractO ( - region_type_t * region, - box_type_t * r1, - box_type_t * r1End, - box_type_t * r2, - box_type_t * r2End, - int y1, - int y2, - int *pOverlap) +pixman_region_subtract_o (region_type_t * region, + box_type_t * r1, + box_type_t * r1_end, + box_type_t * r2, + box_type_t * r2_end, + int y1, + int y2, + int * overlap) { - box_type_t * pNextRect; - int x1; + box_type_t * next_rect; + int x1; x1 = r1->x1; - assert(y1<y2); - assert(r1 != r1End && r2 != r2End); + assert (y1 < y2); + assert (r1 != r1_end && r2 != r2_end); - pNextRect = PIXREGION_TOP(region); + next_rect = PIXREGION_TOP (region); do { - if (r2->x2 <= x1) - { - /* + if (r2->x2 <= x1) + { + /* * Subtrahend entirely to left of minuend: go to next subtrahend. */ - r2++; + r2++; } - else if (r2->x1 <= x1) - { - /* + else if (r2->x1 <= x1) + { + /* * Subtrahend preceeds minuend: nuke left edge of minuend. */ - x1 = r2->x2; - if (x1 >= r1->x2) - { - /* + x1 = r2->x2; + if (x1 >= r1->x2) + { + /* * Minuend completely covered: advance to next minuend and * reset left fence to edge of new minuend. */ - r1++; - if (r1 != r1End) + r1++; + if (r1 != r1_end) x1 = r1->x1; } - else - { - /* + else + { + /* * Subtrahend now used up since it doesn't extend beyond * minuend */ - r2++; + r2++; } } - else if (r2->x1 < r1->x2) - { - /* + else if (r2->x1 < r1->x2) + { + /* * Left part of subtrahend covers part of minuend: add uncovered * part of minuend to region and skip to next subtrahend. */ - assert(x1<r2->x1); - NEWRECT(region, pNextRect, x1, y1, r2->x1, y2); + assert (x1 < r2->x1); + NEWRECT (region, next_rect, x1, y1, r2->x1, y2); - x1 = r2->x2; - if (x1 >= r1->x2) - { - /* + x1 = r2->x2; + if (x1 >= r1->x2) + { + /* * Minuend used up: advance to new... */ - r1++; - if (r1 != r1End) + r1++; + if (r1 != r1_end) x1 = r1->x1; } - else - { - /* + else + { + /* * Subtrahend used up */ - r2++; + r2++; } } - else - { - /* + else + { + /* * Minuend used up: add any remaining piece before advancing. */ - if (r1->x2 > x1) - NEWRECT(region, pNextRect, x1, y1, r1->x2, y2); - r1++; - if (r1 != r1End) + if (r1->x2 > x1) + NEWRECT (region, next_rect, x1, y1, r1->x2, y2); + + r1++; + + if (r1 != r1_end) x1 = r1->x1; } - } while ((r1 != r1End) && (r2 != r2End)); + } + while ((r1 != r1_end) && (r2 != r2_end)); /* * Add remaining minuend rectangles to region. */ - while (r1 != r1End) + while (r1 != r1_end) { - assert(x1<r1->x2); - NEWRECT(region, pNextRect, x1, y1, r1->x2, y2); - r1++; - if (r1 != r1End) + assert (x1 < r1->x2); + + NEWRECT (region, next_rect, x1, y1, r1->x2, y2); + + r1++; + if (r1 != r1_end) x1 = r1->x1; } return TRUE; @@ -1677,59 +1982,62 @@ pixman_region_subtractO ( /*- *----------------------------------------------------------------------- * pixman_region_subtract -- - * Subtract regS from regM and leave the result in regD. + * Subtract reg_s from reg_m and leave the result in reg_d. * S stands for subtrahend, M for minuend and D for difference. * * Results: * TRUE if successful. * * Side Effects: - * regD is overwritten. + * reg_d is overwritten. * *----------------------------------------------------------------------- */ PIXMAN_EXPORT pixman_bool_t -PREFIX(_subtract) (region_type_t * regD, - region_type_t * regM, - region_type_t * regS) +PREFIX (_subtract) (region_type_t *reg_d, + region_type_t *reg_m, + region_type_t *reg_s) { int overlap; /* result ignored */ - good(regM); - good(regS); - good(regD); - /* check for trivial rejects */ - if (PIXREGION_NIL(regM) || PIXREGION_NIL(regS) || - !EXTENTCHECK(®M->extents, ®S->extents)) + GOOD (reg_m); + GOOD (reg_s); + GOOD (reg_d); + + /* check for trivial rejects */ + if (PIXREGION_NIL (reg_m) || PIXREGION_NIL (reg_s) || + !EXTENTCHECK (®_m->extents, ®_s->extents)) { - if (PIXREGION_NAR (regS)) - return pixman_break (regD); - return PREFIX(_copy) (regD, regM); + if (PIXREGION_NAR (reg_s)) + return pixman_break (reg_d); + + return PREFIX (_copy) (reg_d, reg_m); } - else if (regM == regS) + else if (reg_m == reg_s) { - freeData(regD); - regD->extents.x2 = regD->extents.x1; - regD->extents.y2 = regD->extents.y1; - regD->data = pixman_region_emptyData; - return TRUE; + FREE_DATA (reg_d); + reg_d->extents.x2 = reg_d->extents.x1; + reg_d->extents.y2 = reg_d->extents.y1; + reg_d->data = pixman_region_empty_data; + + return TRUE; } /* Add those rectangles in region 1 that aren't in region 2, do yucky substraction for overlaps, and just throw away rectangles in region 2 that aren't in region 1 */ - if (!pixman_op(regD, regM, regS, pixman_region_subtractO, TRUE, FALSE, &overlap)) + if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE, &overlap)) return FALSE; /* - * Can't alter RegD's extents before we call pixman_op because + * Can't alter reg_d's extents before we call pixman_op because * it might be one of the source regions and pixman_op depends * on the extents of those regions being unaltered. Besides, this * way there's no checking against rectangles that will be nuked * due to coalescing, so we have to examine fewer rectangles. */ - pixman_set_extents(regD); - good(regD); + pixman_set_extents (reg_d); + GOOD (reg_d); return TRUE; } @@ -1748,408 +2056,497 @@ PREFIX(_subtract) (region_type_t * regD, * TRUE. * * Side Effects: - * newReg is overwritten. + * new_reg is overwritten. * *----------------------------------------------------------------------- */ pixman_bool_t -PIXMAN_EXPORT PREFIX(_inverse) (region_type_t * newReg, /* Destination region */ - region_type_t * reg1, /* Region to invert */ - box_type_t * invRect) /* Bounding box for inversion */ +PIXMAN_EXPORT PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */ + region_type_t *reg1, /* Region to invert */ + box_type_t * inv_rect) /* Bounding box for inversion */ { - region_type_t invReg; /* Quick and dirty region made from the - * bounding box */ - int overlap; /* result ignored */ - - good(reg1); - good(newReg); - /* check for trivial rejects */ - if (PIXREGION_NIL(reg1) || !EXTENTCHECK(invRect, ®1->extents)) - { - if (PIXREGION_NAR(reg1)) - return pixman_break (newReg); - newReg->extents = *invRect; - freeData(newReg); - newReg->data = (region_data_type_t *)NULL; + region_type_t inv_reg; /* Quick and dirty region made from the + * bounding box */ + int overlap; /* result ignored */ + + GOOD (reg1); + GOOD (new_reg); + + /* check for trivial rejects */ + if (PIXREGION_NIL (reg1) || !EXTENTCHECK (inv_rect, ®1->extents)) + { + if (PIXREGION_NAR (reg1)) + return pixman_break (new_reg); + + new_reg->extents = *inv_rect; + FREE_DATA (new_reg); + new_reg->data = (region_data_type_t *)NULL; + return TRUE; } /* Add those rectangles in region 1 that aren't in region 2, - do yucky substraction for overlaps, and - just throw away rectangles in region 2 that aren't in region 1 */ - invReg.extents = *invRect; - invReg.data = (region_data_type_t *)NULL; - if (!pixman_op(newReg, &invReg, reg1, pixman_region_subtractO, TRUE, FALSE, &overlap)) + * do yucky substraction for overlaps, and + * just throw away rectangles in region 2 that aren't in region 1 + */ + inv_reg.extents = *inv_rect; + inv_reg.data = (region_data_type_t *)NULL; + if (!pixman_op (new_reg, &inv_reg, reg1, pixman_region_subtract_o, TRUE, FALSE, &overlap)) return FALSE; /* - * Can't alter newReg's extents before we call pixman_op because + * Can't alter new_reg's extents before we call pixman_op because * it might be one of the source regions and pixman_op depends * on the extents of those regions being unaltered. Besides, this * way there's no checking against rectangles that will be nuked * due to coalescing, so we have to examine fewer rectangles. */ - pixman_set_extents(newReg); - good(newReg); + pixman_set_extents (new_reg); + GOOD (new_reg); return TRUE; } /* - * RectIn(region, rect) + * rect_in(region, rect) * This routine takes a pointer to a region and a pointer to a box * and determines if the box is outside/inside/partly inside the region. * * The idea is to travel through the list of rectangles trying to cover the * passed box with them. Anytime a piece of the rectangle isn't covered - * by a band of rectangles, partOut is set TRUE. Any time a rectangle in - * the region covers part of the box, partIn is set TRUE. The process ends + * by a band of rectangles, part_out is set TRUE. Any time a rectangle in + * the region covers part of the box, part_in is set TRUE. The process ends * when either the box has been completely covered (we reached a band that - * doesn't overlap the box, partIn is TRUE and partOut is false), the - * box has been partially covered (partIn == partOut == TRUE -- because of + * doesn't overlap the box, part_in is TRUE and part_out is false), the + * box has been partially covered (part_in == part_out == TRUE -- because of * the banding, the first time this is true we know the box is only * partially in the region) or is outside the region (we reached a band - * that doesn't overlap the box at all and partIn is false) + * that doesn't overlap the box at all and part_in is false) */ pixman_region_overlap_t -PIXMAN_EXPORT PREFIX(_contains_rectangle) (region_type_t * region, - box_type_t * prect) +PIXMAN_EXPORT PREFIX (_contains_rectangle) (region_type_t * region, + box_type_t * prect) { - int x; - int y; box_type_t * pbox; - box_type_t * pboxEnd; - int partIn, partOut; - int numRects; + box_type_t * pbox_end; + int part_in, part_out; + int numRects; + int x, y; + + GOOD (region); + + numRects = PIXREGION_NUMRECTS (region); - good(region); - numRects = PIXREGION_NUM_RECTS(region); /* useful optimization */ - if (!numRects || !EXTENTCHECK(®ion->extents, prect)) - return(PIXMAN_REGION_OUT); + if (!numRects || !EXTENTCHECK (®ion->extents, prect)) + return(PIXMAN_REGION_OUT); if (numRects == 1) { - /* We know that it must be PIXMAN_REGION_IN or PIXMAN_REGION_PART */ - if (SUBSUMES(®ion->extents, prect)) + /* We know that it must be PIXMAN_REGION_IN or PIXMAN_REGION_PART */ + if (SUBSUMES (®ion->extents, prect)) return(PIXMAN_REGION_IN); - else + else return(PIXMAN_REGION_PART); } - partOut = FALSE; - partIn = FALSE; + part_out = FALSE; + part_in = FALSE; /* (x,y) starts at upper left of rect, moving to the right and down */ x = prect->x1; y = prect->y1; - /* can stop when both partOut and partIn are TRUE, or we reach prect->y2 */ - for (pbox = PIXREGION_BOXPTR(region), pboxEnd = pbox + numRects; - pbox != pboxEnd; + /* can stop when both part_out and part_in are TRUE, or we reach prect->y2 */ + for (pbox = PIXREGION_BOXPTR (region), pbox_end = pbox + numRects; + pbox != pbox_end; pbox++) { if (pbox->y2 <= y) - continue; /* getting up to speed or skipping remainder of band */ + continue; /* getting up to speed or skipping remainder of band */ if (pbox->y1 > y) { - partOut = TRUE; /* missed part of rectangle above */ - if (partIn || (pbox->y1 >= prect->y2)) - break; - y = pbox->y1; /* x guaranteed to be == prect->x1 */ - } + part_out = TRUE; /* missed part of rectangle above */ + if (part_in || (pbox->y1 >= prect->y2)) + break; + y = pbox->y1; /* x guaranteed to be == prect->x1 */ + } if (pbox->x2 <= x) - continue; /* not far enough over yet */ + continue; /* not far enough over yet */ if (pbox->x1 > x) { - partOut = TRUE; /* missed part of rectangle to left */ - if (partIn) - break; - } + part_out = TRUE; /* missed part of rectangle to left */ + if (part_in) + break; + } if (pbox->x1 < prect->x2) { - partIn = TRUE; /* definitely overlap */ - if (partOut) - break; - } + part_in = TRUE; /* definitely overlap */ + if (part_out) + break; + } if (pbox->x2 >= prect->x2) { - y = pbox->y2; /* finished with this band */ - if (y >= prect->y2) - break; - x = prect->x1; /* reset x out to left again */ - } - else - { - /* + y = pbox->y2; /* finished with this band */ + if (y >= prect->y2) + break; + x = prect->x1; /* reset x out to left again */ + } + else + { + /* * Because boxes in a band are maximal width, if the first box * to overlap the rectangle doesn't completely cover it in that * band, the rectangle must be partially out, since some of it - * will be uncovered in that band. partIn will have been set true + * will be uncovered in that band. part_in will have been set true * by now... */ - partOut = TRUE; - break; + part_out = TRUE; + break; } } - if (partIn) + if (part_in) { - if (y < prect->y2) + if (y < prect->y2) return PIXMAN_REGION_PART; - else + else return PIXMAN_REGION_IN; } else { - return PIXMAN_REGION_OUT; + return PIXMAN_REGION_OUT; } } /* PREFIX(_translate) (region, x, y) - translates in place -*/ + * translates in place + */ PIXMAN_EXPORT void -PREFIX(_translate) (region_type_t * region, int x, int y) +PREFIX (_translate) (region_type_t *region, int x, int y) { int x1, x2, y1, y2; int nbox; box_type_t * pbox; - good(region); + GOOD (region); region->extents.x1 = x1 = region->extents.x1 + x; region->extents.y1 = y1 = region->extents.y1 + y; region->extents.x2 = x2 = region->extents.x2 + x; region->extents.y2 = y2 = region->extents.y2 + y; - if (((x1 - SHRT_MIN)|(y1 - SHRT_MIN)|(SHRT_MAX - x2)|(SHRT_MAX - y2)) >= 0) + + if (((x1 - SHRT_MIN) | (y1 - SHRT_MIN) | (SHRT_MAX - x2) | (SHRT_MAX - y2)) >= 0) { - if (region->data && (nbox = region->data->numRects)) - { - for (pbox = PIXREGION_BOXPTR(region); nbox--; pbox++) - { - pbox->x1 += x; - pbox->y1 += y; - pbox->x2 += x; - pbox->y2 += y; + if (region->data && (nbox = region->data->numRects)) + { + for (pbox = PIXREGION_BOXPTR (region); nbox--; pbox++) + { + pbox->x1 += x; + pbox->y1 += y; + pbox->x2 += x; + pbox->y2 += y; } } - return; + return; } - if (((x2 - SHRT_MIN)|(y2 - SHRT_MIN)|(SHRT_MAX - x1)|(SHRT_MAX - y1)) <= 0) + + if (((x2 - SHRT_MIN) | (y2 - SHRT_MIN) | (SHRT_MAX - x1) | (SHRT_MAX - y1)) <= 0) { - region->extents.x2 = region->extents.x1; - region->extents.y2 = region->extents.y1; - freeData(region); - region->data = pixman_region_emptyData; - return; + region->extents.x2 = region->extents.x1; + region->extents.y2 = region->extents.y1; + FREE_DATA (region); + region->data = pixman_region_empty_data; + return; } + if (x1 < SHRT_MIN) region->extents.x1 = SHRT_MIN; else if (x2 > SHRT_MAX) region->extents.x2 = SHRT_MAX; + if (y1 < SHRT_MIN) region->extents.y1 = SHRT_MIN; else if (y2 > SHRT_MAX) region->extents.y2 = SHRT_MAX; + if (region->data && (nbox = region->data->numRects)) { - box_type_t * pboxout; + box_type_t * pbox_out; - for (pboxout = pbox = PIXREGION_BOXPTR(region); nbox--; pbox++) - { - pboxout->x1 = x1 = pbox->x1 + x; - pboxout->y1 = y1 = pbox->y1 + y; - pboxout->x2 = x2 = pbox->x2 + x; - pboxout->y2 = y2 = pbox->y2 + y; - if (((x2 - SHRT_MIN)|(y2 - SHRT_MIN)| - (SHRT_MAX - x1)|(SHRT_MAX - y1)) <= 0) - { - region->data->numRects--; - continue; + for (pbox_out = pbox = PIXREGION_BOXPTR (region); nbox--; pbox++) + { + pbox_out->x1 = x1 = pbox->x1 + x; + pbox_out->y1 = y1 = pbox->y1 + y; + pbox_out->x2 = x2 = pbox->x2 + x; + pbox_out->y2 = y2 = pbox->y2 + y; + + if (((x2 - SHRT_MIN) | (y2 - SHRT_MIN) | + (SHRT_MAX - x1) | (SHRT_MAX - y1)) <= 0) + { + region->data->numRects--; + continue; } - if (x1 < SHRT_MIN) - pboxout->x1 = SHRT_MIN; - else if (x2 > SHRT_MAX) - pboxout->x2 = SHRT_MAX; - if (y1 < SHRT_MIN) - pboxout->y1 = SHRT_MIN; - else if (y2 > SHRT_MAX) - pboxout->y2 = SHRT_MAX; - pboxout++; + + if (x1 < SHRT_MIN) + pbox_out->x1 = SHRT_MIN; + else if (x2 > SHRT_MAX) + pbox_out->x2 = SHRT_MAX; + + if (y1 < SHRT_MIN) + pbox_out->y1 = SHRT_MIN; + else if (y2 > SHRT_MAX) + pbox_out->y2 = SHRT_MAX; + + pbox_out++; } - if (pboxout != pbox) - { - if (region->data->numRects == 1) + + if (pbox_out != pbox) + { + if (region->data->numRects == 1) + { + region->extents = *PIXREGION_BOXPTR (region); + FREE_DATA (region); + region->data = (region_data_type_t *)NULL; + } + else { - region->extents = *PIXREGION_BOXPTR(region); - freeData(region); - region->data = (region_data_type_t *)NULL; + pixman_set_extents (region); } - else - pixman_set_extents(region); } } + + GOOD (region); } PIXMAN_EXPORT void -PREFIX(_reset) (region_type_t *region, box_type_t *box) +PREFIX (_reset) (region_type_t *region, box_type_t *box) { - good(region); - assert(box->x1<=box->x2); - assert(box->y1<=box->y2); + GOOD (region); + + assert (GOOD_RECT (box)); + region->extents = *box; - freeData(region); - region->data = (region_data_type_t *)NULL; + + FREE_DATA (region); + + region->data = NULL; } /* box is "return" value */ PIXMAN_EXPORT int -PREFIX(_contains_point) (region_type_t * region, - int x, int y, - box_type_t * box) +PREFIX (_contains_point) (region_type_t * region, + int x, int y, + box_type_t * box) { - box_type_t *pbox, *pboxEnd; + box_type_t *pbox, *pbox_end; int numRects; - good(region); - numRects = PIXREGION_NUM_RECTS(region); - if (!numRects || !INBOX(®ion->extents, x, y)) - return(FALSE); + GOOD (region); + numRects = PIXREGION_NUMRECTS (region); + + if (!numRects || !INBOX (®ion->extents, x, y)) + return(FALSE); + if (numRects == 1) { if (box) *box = region->extents; - return(TRUE); + return(TRUE); } - for (pbox = PIXREGION_BOXPTR(region), pboxEnd = pbox + numRects; - pbox != pboxEnd; + + for (pbox = PIXREGION_BOXPTR (region), pbox_end = pbox + numRects; + pbox != pbox_end; pbox++) { if (y >= pbox->y2) - continue; /* not there yet */ - if ((y < pbox->y1) || (x < pbox->x1)) - break; /* missed it */ - if (x >= pbox->x2) - continue; /* not there yet */ + continue; /* not there yet */ + + if ((y < pbox->y1) || (x < pbox->x1)) + break; /* missed it */ + + if (x >= pbox->x2) + continue; /* not there yet */ if (box) *box = *pbox; - return(TRUE); + return(TRUE); } + return(FALSE); } PIXMAN_EXPORT int -PREFIX(_not_empty) (region_type_t * region) +PREFIX (_not_empty) (region_type_t * region) { - good(region); - return(!PIXREGION_NIL(region)); + GOOD (region); + + return(!PIXREGION_NIL (region)); } PIXMAN_EXPORT box_type_t * -PREFIX(_extents) (region_type_t * region) +PREFIX (_extents) (region_type_t * region) { - good(region); + GOOD (region); + return(®ion->extents); } /* - Clip a list of scanlines to a region. The caller has allocated the - space. FSorted is non-zero if the scanline origins are in ascending - order. - returns the number of new, clipped scanlines. -*/ + * Clip a list of scanlines to a region. The caller has allocated the + * space. FSorted is non-zero if the scanline origins are in ascending order. + * + * returns the number of new, clipped scanlines. + */ PIXMAN_EXPORT pixman_bool_t -PREFIX(_selfcheck) (reg) - region_type_t * reg; +PREFIX (_selfcheck) (region_type_t *reg) { int i, numRects; if ((reg->extents.x1 > reg->extents.x2) || - (reg->extents.y1 > reg->extents.y2)) + (reg->extents.y1 > reg->extents.y2)) + { return FALSE; - numRects = PIXREGION_NUM_RECTS(reg); + } + + numRects = PIXREGION_NUMRECTS (reg); if (!numRects) + { return ((reg->extents.x1 == reg->extents.x2) && - (reg->extents.y1 == reg->extents.y2) && - (reg->data->size || (reg->data == pixman_region_emptyData))); + (reg->extents.y1 == reg->extents.y2) && + (reg->data->size || (reg->data == pixman_region_empty_data))); + } else if (numRects == 1) + { return (!reg->data); + } else { - box_type_t * pboxP, * pboxN; - box_type_t box; + box_type_t * pbox_p, * pbox_n; + box_type_t box; - pboxP = PIXREGION_RECTS(reg); - box = *pboxP; - box.y2 = pboxP[numRects-1].y2; - pboxN = pboxP + 1; - for (i = numRects; --i > 0; pboxP++, pboxN++) - { - if ((pboxN->x1 >= pboxN->x2) || - (pboxN->y1 >= pboxN->y2)) + pbox_p = PIXREGION_RECTS (reg); + box = *pbox_p; + box.y2 = pbox_p[numRects - 1].y2; + pbox_n = pbox_p + 1; + + for (i = numRects; --i > 0; pbox_p++, pbox_n++) + { + if ((pbox_n->x1 >= pbox_n->x2) || + (pbox_n->y1 >= pbox_n->y2)) + { return FALSE; - if (pboxN->x1 < box.x1) - box.x1 = pboxN->x1; - if (pboxN->x2 > box.x2) - box.x2 = pboxN->x2; - if ((pboxN->y1 < pboxP->y1) || - ((pboxN->y1 == pboxP->y1) && - ((pboxN->x1 < pboxP->x2) || (pboxN->y2 != pboxP->y2)))) + } + + if (pbox_n->x1 < box.x1) + box.x1 = pbox_n->x1; + + if (pbox_n->x2 > box.x2) + box.x2 = pbox_n->x2; + + if ((pbox_n->y1 < pbox_p->y1) || + ((pbox_n->y1 == pbox_p->y1) && + ((pbox_n->x1 < pbox_p->x2) || (pbox_n->y2 != pbox_p->y2)))) + { return FALSE; + } } - return ((box.x1 == reg->extents.x1) && - (box.x2 == reg->extents.x2) && - (box.y1 == reg->extents.y1) && - (box.y2 == reg->extents.y2)); + + return ((box.x1 == reg->extents.x1) && + (box.x2 == reg->extents.x2) && + (box.y1 == reg->extents.y1) && + (box.y2 == reg->extents.y2)); } } PIXMAN_EXPORT pixman_bool_t -PREFIX(_init_rects) (region_type_t *region, - box_type_t *boxes, int count) +PREFIX (_init_rects) (region_type_t *region, + box_type_t *boxes, int count) { - int overlap; + box_type_t *rects; + int displacement; + int i; /* if it's 1, then we just want to set the extents, so call * the existing method. */ - if (count == 1) { - PREFIX(_init_rect) (region, - boxes[0].x1, - boxes[0].y1, - boxes[0].x2 - boxes[0].x1, - boxes[0].y2 - boxes[0].y1); - return TRUE; + if (count == 1) + { + PREFIX (_init_rect) (region, + boxes[0].x1, + boxes[0].y1, + boxes[0].x2 - boxes[0].x1, + boxes[0].y2 - boxes[0].y1); + return TRUE; } - PREFIX(_init) (region); + PREFIX (_init) (region); /* if it's 0, don't call pixman_rect_alloc -- 0 rectangles is * a special case, and causing pixman_rect_alloc would cause * us to leak memory (because the 0-rect case should be the - * static pixman_region_emptyData data). + * static pixman_region_empty_data data). */ if (count == 0) - return TRUE; + return TRUE; - if (!pixman_rect_alloc(region, count)) + if (!pixman_rect_alloc (region, count)) return FALSE; + rects = PIXREGION_RECTS (region); + /* Copy in the rects */ - memcpy (PIXREGION_RECTS(region), boxes, sizeof(box_type_t) * count); + memcpy (rects, boxes, sizeof(box_type_t) * count); region->data->numRects = count; + /* Eliminate empty and malformed rectangles */ + displacement = 0; + + for (i = 0; i < count; ++i) + { + box_type_t *box = &rects[i]; + + if (box->x1 >= box->x2 || box->y1 >= box->y2) + displacement++; + else if (displacement) + rects[i - displacement] = rects[i]; + } + + region->data->numRects -= displacement; + + /* If eliminating empty rectangles caused there + * to be only 0 or 1 rectangles, deal with that. + */ + if (region->data->numRects == 0) + { + FREE_DATA (region); + PREFIX (_init) (region); + + return TRUE; + } + + if (region->data->numRects == 1) + { + region->extents = rects[0]; + + FREE_DATA (region); + region->data = NULL; + + GOOD (region); + + return TRUE; + } + /* Validate */ region->extents.x1 = region->extents.x2 = 0; - return validate (region, &overlap); + + return validate (region, &i); } diff --git a/lib/pixman/pixman/pixman-region16.c b/lib/pixman/pixman/pixman-region16.c index acee0946b..46f5e26ea 100644 --- a/lib/pixman/pixman/pixman-region16.c +++ b/lib/pixman/pixman/pixman-region16.c @@ -42,42 +42,22 @@ typedef struct { #define PREFIX(x) pixman_region##x +#include "pixman-region.c" + +/* This function exists only to make it possible to preserve the X ABI - + * it should go away at first opportunity. + * + * The problem is that the X ABI exports the three structs and has used + * them through macros. So the X server calls this function with + * the addresses of those structs which makes the existing code continue to + * work. + */ PIXMAN_EXPORT void pixman_region_set_static_pointers (pixman_box16_t *empty_box, pixman_region16_data_t *empty_data, pixman_region16_data_t *broken_data) { - pixman_region_internal_set_static_pointers (empty_box, empty_data, broken_data); -} - -pixman_bool_t -pixman_region16_copy_from_region32 (pixman_region16_t *dst, - pixman_region32_t *src) -{ - int n_boxes, i; - pixman_box32_t *boxes32; - pixman_box16_t *boxes16; - pixman_bool_t retval; - - boxes32 = pixman_region32_rectangles (src, &n_boxes); - - boxes16 = pixman_malloc_ab (n_boxes, sizeof (pixman_box16_t)); - - if (!boxes16) - return FALSE; - - for (i = 0; i < n_boxes; ++i) - { - boxes16[i].x1 = boxes32[i].x1; - boxes16[i].y1 = boxes32[i].y1; - boxes16[i].x2 = boxes32[i].x2; - boxes16[i].y2 = boxes32[i].y2; - } - - pixman_region_fini (dst); - retval = pixman_region_init_rects (dst, boxes16, n_boxes); - free (boxes16); - return retval; + pixman_region_empty_box = empty_box; + pixman_region_empty_data = empty_data; + pixman_broken_data = broken_data; } - -#include "pixman-region.c" diff --git a/lib/pixman/pixman/pixman-region32.c b/lib/pixman/pixman/pixman-region32.c index aac74f68f..aeee86cf9 100644 --- a/lib/pixman/pixman/pixman-region32.c +++ b/lib/pixman/pixman/pixman-region32.c @@ -40,43 +40,4 @@ typedef struct { #define PREFIX(x) pixman_region32##x -#define N_TMP_BOXES (16) - -pixman_bool_t -pixman_region32_copy_from_region16 (pixman_region32_t *dst, - pixman_region16_t *src) -{ - int n_boxes, i; - pixman_box16_t *boxes16; - pixman_box32_t *boxes32; - pixman_box32_t tmp_boxes[N_TMP_BOXES]; - pixman_bool_t retval; - - boxes16 = pixman_region_rectangles (src, &n_boxes); - - if (n_boxes > N_TMP_BOXES) - boxes32 = pixman_malloc_ab (n_boxes, sizeof (pixman_box32_t)); - else - boxes32 = tmp_boxes; - - if (!boxes32) - return FALSE; - - for (i = 0; i < n_boxes; ++i) - { - boxes32[i].x1 = boxes16[i].x1; - boxes32[i].y1 = boxes16[i].y1; - boxes32[i].x2 = boxes16[i].x2; - boxes32[i].y2 = boxes16[i].y2; - } - - pixman_region32_fini (dst); - retval = pixman_region32_init_rects (dst, boxes32, n_boxes); - - if (boxes32 != tmp_boxes) - free (boxes32); - - return retval; -} - #include "pixman-region.c" diff --git a/lib/pixman/pixman/pixman-solid-fill.c b/lib/pixman/pixman/pixman-solid-fill.c index 1805600d8..38675dca8 100644 --- a/lib/pixman/pixman/pixman-solid-fill.c +++ b/lib/pixman/pixman/pixman-solid-fill.c @@ -21,28 +21,35 @@ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#ifdef HAVE_CONFIG_H #include <config.h> +#endif #include "pixman-private.h" static void -solid_fill_get_scanline_32 (pixman_image_t *image, int x, int y, int width, - uint32_t *buffer, uint32_t *mask, uint32_t maskBits) +solid_fill_get_scanline_32 (pixman_image_t *image, + int x, + int y, + int width, + uint32_t * buffer, + const uint32_t *mask, + uint32_t mask_bits) { uint32_t *end = buffer + width; register uint32_t color = ((solid_fill_t *)image)->color; - + while (buffer < end) *(buffer++) = color; - + return; } -static source_pict_class_t +static source_image_class_t solid_fill_classify (pixman_image_t *image, - int x, - int y, - int width, - int height) + int x, + int y, + int width, + int height) { return (image->source.class = SOURCE_IMAGE_CLASS_HORIZONTAL); } @@ -50,25 +57,25 @@ solid_fill_classify (pixman_image_t *image, static void solid_fill_property_changed (pixman_image_t *image) { - image->common.get_scanline_32 = (scanFetchProc)solid_fill_get_scanline_32; - image->common.get_scanline_64 = (scanFetchProc)_pixman_image_get_scanline_64_generic; + image->common.get_scanline_32 = solid_fill_get_scanline_32; + image->common.get_scanline_64 = _pixman_image_get_scanline_generic_64; } static uint32_t color_to_uint32 (const pixman_color_t *color) { return - (color->alpha >> 8 << 24) | - (color->red >> 8 << 16) | + (color->alpha >> 8 << 24) | + (color->red >> 8 << 16) | (color->green & 0xff00) | - (color->blue >> 8); + (color->blue >> 8); } PIXMAN_EXPORT pixman_image_t * pixman_image_create_solid_fill (pixman_color_t *color) { - pixman_image_t *img = _pixman_image_allocate(); - + pixman_image_t *img = _pixman_image_allocate (); + if (!img) return NULL; @@ -79,7 +86,6 @@ pixman_image_create_solid_fill (pixman_color_t *color) img->common.classify = solid_fill_classify; img->common.property_changed = solid_fill_property_changed; - solid_fill_property_changed (img); - return img; } + diff --git a/lib/pixman/pixman/pixman-sse2.c b/lib/pixman/pixman/pixman-sse2.c index 40e222893..bb74882b2 100644 --- a/lib/pixman/pixman/pixman-sse2.c +++ b/lib/pixman/pixman/pixman-sse2.c @@ -23,7 +23,7 @@ * * Author: Rodrigo Kumpera (kumpera@gmail.com) * André Tupinambá (andrelrt@gmail.com) - * + * * Based on work by Owen Taylor and Søren Sandmann */ #ifdef HAVE_CONFIG_H @@ -34,94 +34,110 @@ #include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */ #include <emmintrin.h> /* for SSE2 intrinsics */ #include "pixman-private.h" +#include "pixman-combine32.h" + +#if defined(_MSC_VER) && defined(_M_AMD64) +/* Windows 64 doesn't allow MMX to be used, so + * the pixman-x64-mmx-emulation.h file contains + * implementations of those MMX intrinsics that + * are used in the SSE2 implementation. + */ +# include "pixman-x64-mmx-emulation.h" +#endif #ifdef USE_SSE2 -/* ------------------------------------------------------------------------------------------------- +/* -------------------------------------------------------------------- * Locals */ -static __m64 xMask0080; -static __m64 xMask00ff; -static __m64 xMask0101; -static __m64 xMaskAlpha; +static __m64 mask_x0080; +static __m64 mask_x00ff; +static __m64 mask_x0101; +static __m64 mask_x_alpha; -static __m64 xMask565rgb; -static __m64 xMask565Unpack; +static __m64 mask_x565_rgb; +static __m64 mask_x565_unpack; -static __m128i Mask0080; -static __m128i Mask00ff; -static __m128i Mask0101; -static __m128i Maskffff; -static __m128i Maskff000000; -static __m128i MaskAlpha; +static __m128i mask_0080; +static __m128i mask_00ff; +static __m128i mask_0101; +static __m128i mask_ffff; +static __m128i mask_ff000000; +static __m128i mask_alpha; -static __m128i Mask565r; -static __m128i Mask565g1, Mask565g2; -static __m128i Mask565b; -static __m128i MaskRed; -static __m128i MaskGreen; -static __m128i MaskBlue; +static __m128i mask_565_r; +static __m128i mask_565_g1, mask_565_g2; +static __m128i mask_565_b; +static __m128i mask_red; +static __m128i mask_green; +static __m128i mask_blue; -static __m128i Mask565FixRB; -static __m128i Mask565FixG; +static __m128i mask_565_fix_rb; +static __m128i mask_565_fix_g; -/* ------------------------------------------------------------------------------------------------- +/* ---------------------------------------------------------------------- * SSE2 Inlines */ static force_inline __m128i unpack_32_1x128 (uint32_t data) { - return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128()); + return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128 ()); } static force_inline void -unpack_128_2x128 (__m128i data, __m128i* dataLo, __m128i* dataHi) +unpack_128_2x128 (__m128i data, __m128i* data_lo, __m128i* data_hi) { - *dataLo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ()); - *dataHi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ()); + *data_lo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ()); + *data_hi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ()); } static force_inline __m128i -unpack565to8888 (__m128i lo) +unpack_565_to_8888 (__m128i lo) { __m128i r, g, b, rb, t; - - r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), MaskRed); - g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), MaskGreen); - b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), MaskBlue); + + r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), mask_red); + g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), mask_green); + b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), mask_blue); rb = _mm_or_si128 (r, b); - t = _mm_and_si128 (rb, Mask565FixRB); + t = _mm_and_si128 (rb, mask_565_fix_rb); t = _mm_srli_epi32 (t, 5); rb = _mm_or_si128 (rb, t); - t = _mm_and_si128 (g, Mask565FixG); + t = _mm_and_si128 (g, mask_565_fix_g); t = _mm_srli_epi32 (t, 6); g = _mm_or_si128 (g, t); - + return _mm_or_si128 (rb, g); } static force_inline void -unpack565_128_4x128 (__m128i data, __m128i* data0, __m128i* data1, __m128i* data2, __m128i* data3) +unpack_565_128_4x128 (__m128i data, + __m128i* data0, + __m128i* data1, + __m128i* data2, + __m128i* data3) { __m128i lo, hi; lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ()); hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ()); - lo = unpack565to8888 (lo); - hi = unpack565to8888 (hi); + lo = unpack_565_to_8888 (lo); + hi = unpack_565_to_8888 (hi); unpack_128_2x128 (lo, data0, data1); unpack_128_2x128 (hi, data2, data3); } static force_inline uint16_t -pack565_32_16 (uint32_t pixel) +pack_565_32_16 (uint32_t pixel) { - return (uint16_t) (((pixel>>8) & 0xf800) | ((pixel>>5) & 0x07e0) | ((pixel>>3) & 0x001f)); + return (uint16_t) (((pixel >> 8) & 0xf800) | + ((pixel >> 5) & 0x07e0) | + ((pixel >> 3) & 0x001f)); } static force_inline __m128i @@ -131,308 +147,358 @@ pack_2x128_128 (__m128i lo, __m128i hi) } static force_inline __m128i -pack565_2x128_128 (__m128i lo, __m128i hi) +pack_565_2x128_128 (__m128i lo, __m128i hi) { __m128i data; __m128i r, g1, g2, b; - data = pack_2x128_128 ( lo, hi ); + data = pack_2x128_128 (lo, hi); - r = _mm_and_si128 (data , Mask565r); - g1 = _mm_and_si128 (_mm_slli_epi32 (data , 3), Mask565g1); - g2 = _mm_and_si128 (_mm_srli_epi32 (data , 5), Mask565g2); - b = _mm_and_si128 (_mm_srli_epi32 (data , 3), Mask565b); + r = _mm_and_si128 (data, mask_565_r); + g1 = _mm_and_si128 (_mm_slli_epi32 (data, 3), mask_565_g1); + g2 = _mm_and_si128 (_mm_srli_epi32 (data, 5), mask_565_g2); + b = _mm_and_si128 (_mm_srli_epi32 (data, 3), mask_565_b); return _mm_or_si128 (_mm_or_si128 (_mm_or_si128 (r, g1), g2), b); } static force_inline __m128i -pack565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3) +pack_565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3) { - return _mm_packus_epi16 (pack565_2x128_128 (*xmm0, *xmm1), pack565_2x128_128 (*xmm2, *xmm3)); + return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1), + pack_565_2x128_128 (*xmm2, *xmm3)); } static force_inline int -isOpaque (__m128i x) +is_opaque (__m128i x) { __m128i ffs = _mm_cmpeq_epi8 (x, x); + return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888; } static force_inline int -isZero (__m128i x) +is_zero (__m128i x) { - return _mm_movemask_epi8 (_mm_cmpeq_epi8 (x, _mm_setzero_si128())) == 0xffff; + return _mm_movemask_epi8 ( + _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) == 0xffff; } static force_inline int -isTransparent (__m128i x) +is_transparent (__m128i x) { - return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, _mm_setzero_si128())) & 0x8888) == 0x8888; + return (_mm_movemask_epi8 ( + _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) & 0x8888) == 0x8888; } static force_inline __m128i -expandPixel_32_1x128 (uint32_t data) +expand_pixel_32_1x128 (uint32_t data) { - return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE(1, 0, 1, 0)); + return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE (1, 0, 1, 0)); } static force_inline __m128i -expandAlpha_1x128 (__m128i data) +expand_alpha_1x128 (__m128i data) { - return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); + return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data, + _MM_SHUFFLE (3, 3, 3, 3)), + _MM_SHUFFLE (3, 3, 3, 3)); } static force_inline void -expandAlpha_2x128 (__m128i dataLo, __m128i dataHi, __m128i* alphaLo, __m128i* alphaHi) +expand_alpha_2x128 (__m128i data_lo, + __m128i data_hi, + __m128i* alpha_lo, + __m128i* alpha_hi) { __m128i lo, hi; - lo = _mm_shufflelo_epi16 (dataLo, _MM_SHUFFLE(3, 3, 3, 3)); - hi = _mm_shufflelo_epi16 (dataHi, _MM_SHUFFLE(3, 3, 3, 3)); - *alphaLo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE(3, 3, 3, 3)); - *alphaHi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE(3, 3, 3, 3)); + lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 3, 3, 3)); + hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 3, 3, 3)); + + *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 3, 3, 3)); + *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 3, 3, 3)); } static force_inline void -expandAlphaRev_2x128 (__m128i dataLo, __m128i dataHi, __m128i* alphaLo, __m128i* alphaHi) +expand_alpha_rev_2x128 (__m128i data_lo, + __m128i data_hi, + __m128i* alpha_lo, + __m128i* alpha_hi) { __m128i lo, hi; - lo = _mm_shufflelo_epi16 (dataLo, _MM_SHUFFLE(0, 0, 0, 0)); - hi = _mm_shufflelo_epi16 (dataHi, _MM_SHUFFLE(0, 0, 0, 0)); - *alphaLo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE(0, 0, 0, 0)); - *alphaHi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE(0, 0, 0, 0)); + lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (0, 0, 0, 0)); + hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (0, 0, 0, 0)); + *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (0, 0, 0, 0)); + *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (0, 0, 0, 0)); } static force_inline void -pixMultiply_2x128 (__m128i* dataLo, __m128i* dataHi, __m128i* alphaLo, __m128i* alphaHi, __m128i* retLo, __m128i* retHi) +pix_multiply_2x128 (__m128i* data_lo, + __m128i* data_hi, + __m128i* alpha_lo, + __m128i* alpha_hi, + __m128i* ret_lo, + __m128i* ret_hi) { __m128i lo, hi; - lo = _mm_mullo_epi16 (*dataLo, *alphaLo); - hi = _mm_mullo_epi16 (*dataHi, *alphaHi); - lo = _mm_adds_epu16 (lo, Mask0080); - hi = _mm_adds_epu16 (hi, Mask0080); - *retLo = _mm_mulhi_epu16 (lo, Mask0101); - *retHi = _mm_mulhi_epu16 (hi, Mask0101); + lo = _mm_mullo_epi16 (*data_lo, *alpha_lo); + hi = _mm_mullo_epi16 (*data_hi, *alpha_hi); + lo = _mm_adds_epu16 (lo, mask_0080); + hi = _mm_adds_epu16 (hi, mask_0080); + *ret_lo = _mm_mulhi_epu16 (lo, mask_0101); + *ret_hi = _mm_mulhi_epu16 (hi, mask_0101); } static force_inline void -pixAddMultiply_2x128 (__m128i* srcLo, __m128i* srcHi, __m128i* alphaDstLo, __m128i* alphaDstHi, - __m128i* dstLo, __m128i* dstHi, __m128i* alphaSrcLo, __m128i* alphaSrcHi, - __m128i* retLo, __m128i* retHi) +pix_add_multiply_2x128 (__m128i* src_lo, + __m128i* src_hi, + __m128i* alpha_dst_lo, + __m128i* alpha_dst_hi, + __m128i* dst_lo, + __m128i* dst_hi, + __m128i* alpha_src_lo, + __m128i* alpha_src_hi, + __m128i* ret_lo, + __m128i* ret_hi) { - __m128i lo, hi; - __m128i mulLo, mulHi; + __m128i t1_lo, t1_hi; + __m128i t2_lo, t2_hi; + + pix_multiply_2x128 (src_lo, src_hi, alpha_dst_lo, alpha_dst_hi, &t1_lo, &t1_hi); + pix_multiply_2x128 (dst_lo, dst_hi, alpha_src_lo, alpha_src_hi, &t2_lo, &t2_hi); - lo = _mm_mullo_epi16 (*srcLo, *alphaDstLo); - hi = _mm_mullo_epi16 (*srcHi, *alphaDstHi); - mulLo = _mm_mullo_epi16 (*dstLo, *alphaSrcLo); - mulHi = _mm_mullo_epi16 (*dstHi, *alphaSrcHi); - lo = _mm_adds_epu16 (lo, Mask0080); - hi = _mm_adds_epu16 (hi, Mask0080); - lo = _mm_adds_epu16 (lo, mulLo); - hi = _mm_adds_epu16 (hi, mulHi); - *retLo = _mm_mulhi_epu16 (lo, Mask0101); - *retHi = _mm_mulhi_epu16 (hi, Mask0101); + *ret_lo = _mm_adds_epu8 (t1_lo, t2_lo); + *ret_hi = _mm_adds_epu8 (t1_hi, t2_hi); } static force_inline void -negate_2x128 (__m128i dataLo, __m128i dataHi, __m128i* negLo, __m128i* negHi) +negate_2x128 (__m128i data_lo, + __m128i data_hi, + __m128i* neg_lo, + __m128i* neg_hi) { - *negLo = _mm_xor_si128 (dataLo, Mask00ff); - *negHi = _mm_xor_si128 (dataHi, Mask00ff); + *neg_lo = _mm_xor_si128 (data_lo, mask_00ff); + *neg_hi = _mm_xor_si128 (data_hi, mask_00ff); } static force_inline void -invertColors_2x128 (__m128i dataLo, __m128i dataHi, __m128i* invLo, __m128i* invHi) +invert_colors_2x128 (__m128i data_lo, + __m128i data_hi, + __m128i* inv_lo, + __m128i* inv_hi) { __m128i lo, hi; - lo = _mm_shufflelo_epi16 (dataLo, _MM_SHUFFLE(3, 0, 1, 2)); - hi = _mm_shufflelo_epi16 (dataHi, _MM_SHUFFLE(3, 0, 1, 2)); - *invLo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE(3, 0, 1, 2)); - *invHi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE(3, 0, 1, 2)); + lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 0, 1, 2)); + hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 0, 1, 2)); + *inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 0, 1, 2)); + *inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 0, 1, 2)); } static force_inline void -over_2x128 (__m128i* srcLo, __m128i* srcHi, __m128i* alphaLo, __m128i* alphaHi, __m128i* dstLo, __m128i* dstHi) +over_2x128 (__m128i* src_lo, + __m128i* src_hi, + __m128i* alpha_lo, + __m128i* alpha_hi, + __m128i* dst_lo, + __m128i* dst_hi) { __m128i t1, t2; - negate_2x128 (*alphaLo, *alphaHi, &t1, &t2); + negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2); - pixMultiply_2x128 (dstLo, dstHi, &t1, &t2, dstLo, dstHi); + pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi); - *dstLo = _mm_adds_epu8 (*srcLo, *dstLo); - *dstHi = _mm_adds_epu8 (*srcHi, *dstHi); + *dst_lo = _mm_adds_epu8 (*src_lo, *dst_lo); + *dst_hi = _mm_adds_epu8 (*src_hi, *dst_hi); } static force_inline void -overRevNonPre_2x128 (__m128i srcLo, __m128i srcHi, __m128i* dstLo, __m128i* dstHi) +over_rev_non_pre_2x128 (__m128i src_lo, + __m128i src_hi, + __m128i* dst_lo, + __m128i* dst_hi) { __m128i lo, hi; - __m128i alphaLo, alphaHi; + __m128i alpha_lo, alpha_hi; - expandAlpha_2x128 (srcLo, srcHi, &alphaLo, &alphaHi); + expand_alpha_2x128 (src_lo, src_hi, &alpha_lo, &alpha_hi); - lo = _mm_or_si128 (alphaLo, MaskAlpha); - hi = _mm_or_si128 (alphaHi, MaskAlpha); + lo = _mm_or_si128 (alpha_lo, mask_alpha); + hi = _mm_or_si128 (alpha_hi, mask_alpha); - invertColors_2x128 (srcLo, srcHi, &srcLo, &srcHi); + invert_colors_2x128 (src_lo, src_hi, &src_lo, &src_hi); - pixMultiply_2x128 (&srcLo, &srcHi, &lo, &hi, &lo, &hi); + pix_multiply_2x128 (&src_lo, &src_hi, &lo, &hi, &lo, &hi); - over_2x128 (&lo, &hi, &alphaLo, &alphaHi, dstLo, dstHi); + over_2x128 (&lo, &hi, &alpha_lo, &alpha_hi, dst_lo, dst_hi); } static force_inline void -inOver_2x128 (__m128i* srcLo, __m128i* srcHi, __m128i* alphaLo, __m128i* alphaHi, - __m128i* maskLo, __m128i* maskHi, __m128i* dstLo, __m128i* dstHi) +in_over_2x128 (__m128i* src_lo, + __m128i* src_hi, + __m128i* alpha_lo, + __m128i* alpha_hi, + __m128i* mask_lo, + __m128i* mask_hi, + __m128i* dst_lo, + __m128i* dst_hi) { - __m128i sLo, sHi; - __m128i aLo, aHi; + __m128i s_lo, s_hi; + __m128i a_lo, a_hi; - pixMultiply_2x128 ( srcLo, srcHi, maskLo, maskHi, &sLo, &sHi); - pixMultiply_2x128 (alphaLo, alphaHi, maskLo, maskHi, &aLo, &aHi); + pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi); + pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi); - over_2x128 (&sLo, &sHi, &aLo, &aHi, dstLo, dstHi); + over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi); } static force_inline void -cachePrefetch (__m128i* addr) +cache_prefetch (__m128i* addr) { - _mm_prefetch (addr, _MM_HINT_T0); + _mm_prefetch ((void const*)addr, _MM_HINT_T0); } static force_inline void -cachePrefetchNext (__m128i* addr) +cache_prefetch_next (__m128i* addr) { - _mm_prefetch (addr + 4, _MM_HINT_T0); // 64 bytes ahead + _mm_prefetch ((void const *)(addr + 4), _MM_HINT_T0); /* 64 bytes ahead */ } /* load 4 pixels from a 16-byte boundary aligned address */ static force_inline __m128i -load128Aligned (__m128i* src) +load_128_aligned (__m128i* src) { return _mm_load_si128 (src); } /* load 4 pixels from a unaligned address */ static force_inline __m128i -load128Unaligned (const __m128i* src) +load_128_unaligned (const __m128i* src) { return _mm_loadu_si128 (src); } -/* save 4 pixels using Write Combining memory on a 16-byte boundary aligned address */ +/* save 4 pixels using Write Combining memory on a 16-byte + * boundary aligned address + */ static force_inline void -save128WriteCombining (__m128i* dst, __m128i data) +save_128_write_combining (__m128i* dst, + __m128i data) { _mm_stream_si128 (dst, data); } /* save 4 pixels on a 16-byte boundary aligned address */ static force_inline void -save128Aligned (__m128i* dst, __m128i data) +save_128_aligned (__m128i* dst, + __m128i data) { _mm_store_si128 (dst, data); } /* save 4 pixels on a unaligned address */ static force_inline void -save128Unaligned (__m128i* dst, __m128i data) +save_128_unaligned (__m128i* dst, + __m128i data) { _mm_storeu_si128 (dst, data); } -/* ------------------------------------------------------------------------------------------------- +/* ------------------------------------------------------------------ * MMX inlines */ static force_inline __m64 unpack_32_1x64 (uint32_t data) { - return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (data), _mm_setzero_si64()); + return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (data), _mm_setzero_si64 ()); } static force_inline __m64 -expandAlpha_1x64 (__m64 data) +expand_alpha_1x64 (__m64 data) { - return _mm_shuffle_pi16 (data, _MM_SHUFFLE(3, 3, 3, 3)); + return _mm_shuffle_pi16 (data, _MM_SHUFFLE (3, 3, 3, 3)); } static force_inline __m64 -expandAlphaRev_1x64 (__m64 data) +expand_alpha_rev_1x64 (__m64 data) { - return _mm_shuffle_pi16 (data, _MM_SHUFFLE(0, 0, 0, 0)); + return _mm_shuffle_pi16 (data, _MM_SHUFFLE (0, 0, 0, 0)); } static force_inline __m64 -expandPixel_8_1x64 (uint8_t data) +expand_pixel_8_1x64 (uint8_t data) { - return _mm_shuffle_pi16 (unpack_32_1x64 ((uint32_t)data), _MM_SHUFFLE(0, 0, 0, 0)); + return _mm_shuffle_pi16 ( + unpack_32_1x64 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0)); } static force_inline __m64 -pixMultiply_1x64 (__m64 data, __m64 alpha) +pix_multiply_1x64 (__m64 data, + __m64 alpha) { return _mm_mulhi_pu16 (_mm_adds_pu16 (_mm_mullo_pi16 (data, alpha), - xMask0080), - xMask0101); + mask_x0080), + mask_x0101); } static force_inline __m64 -pixAddMultiply_1x64 (__m64* src, __m64* alphaDst, __m64* dst, __m64* alphaSrc) +pix_add_multiply_1x64 (__m64* src, + __m64* alpha_dst, + __m64* dst, + __m64* alpha_src) { - return _mm_mulhi_pu16 (_mm_adds_pu16 (_mm_adds_pu16 (_mm_mullo_pi16 (*src, *alphaDst), - xMask0080), - _mm_mullo_pi16 (*dst, *alphaSrc)), - xMask0101); + __m64 t1 = pix_multiply_1x64 (*src, *alpha_dst); + __m64 t2 = pix_multiply_1x64 (*dst, *alpha_src); + + return _mm_adds_pu8 (t1, t2); } static force_inline __m64 negate_1x64 (__m64 data) { - return _mm_xor_si64 (data, xMask00ff); + return _mm_xor_si64 (data, mask_x00ff); } static force_inline __m64 -invertColors_1x64 (__m64 data) +invert_colors_1x64 (__m64 data) { - return _mm_shuffle_pi16 (data, _MM_SHUFFLE(3, 0, 1, 2)); + return _mm_shuffle_pi16 (data, _MM_SHUFFLE (3, 0, 1, 2)); } static force_inline __m64 over_1x64 (__m64 src, __m64 alpha, __m64 dst) { - return _mm_adds_pu8 (src, pixMultiply_1x64 (dst, negate_1x64 (alpha))); + return _mm_adds_pu8 (src, pix_multiply_1x64 (dst, negate_1x64 (alpha))); } static force_inline __m64 -inOver_1x64 (__m64* src, __m64* alpha, __m64* mask, __m64* dst) +in_over_1x64 (__m64* src, __m64* alpha, __m64* mask, __m64* dst) { - return over_1x64 (pixMultiply_1x64 (*src, *mask), - pixMultiply_1x64 (*alpha, *mask), + return over_1x64 (pix_multiply_1x64 (*src, *mask), + pix_multiply_1x64 (*alpha, *mask), *dst); } static force_inline __m64 -overRevNonPre_1x64 (__m64 src, __m64 dst) +over_rev_non_pre_1x64 (__m64 src, __m64 dst) { - __m64 alpha = expandAlpha_1x64 (src); + __m64 alpha = expand_alpha_1x64 (src); - return over_1x64 (pixMultiply_1x64 (invertColors_1x64 (src), - _mm_or_si64 (alpha, xMaskAlpha)), + return over_1x64 (pix_multiply_1x64 (invert_colors_1x64 (src), + _mm_or_si64 (alpha, mask_x_alpha)), alpha, dst); } static force_inline uint32_t -pack_1x64_32( __m64 data ) +pack_1x64_32 (__m64 data) { - return _mm_cvtsi64_si32 (_mm_packs_pu16 (data, _mm_setzero_si64())); + return _mm_cvtsi64_si32 (_mm_packs_pu16 (data, _mm_setzero_si64 ())); } /* Expand 16 bits positioned at @pos (0-3) of a mmx register into @@ -462,31 +528,32 @@ expand565_16_1x64 (uint16_t pixel) p = _mm_or_si64 (t1, p); p = _mm_or_si64 (t2, p); - p = _mm_and_si64 (p, xMask565rgb); - p = _mm_mullo_pi16 (p, xMask565Unpack); + p = _mm_and_si64 (p, mask_x565_rgb); + p = _mm_mullo_pi16 (p, mask_x565_unpack); return _mm_srli_pi16 (p, 8); } -/* ------------------------------------------------------------------------------------------------- +/* ---------------------------------------------------------------------------- * Compose Core transformations */ static force_inline uint32_t -coreCombineOverUPixelsse2 (uint32_t src, uint32_t dst) +core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst) { - uint8_t a; - __m64 ms; + uint8_t a; + __m64 ms; a = src >> 24; if (a == 0xff) { - return src; + return src; } else if (src) { - ms = unpack_32_1x64 (src); - return pack_1x64_32 (over_1x64 (ms, expandAlpha_1x64 (ms), unpack_32_1x64 (dst))); + ms = unpack_32_1x64 (src); + return pack_1x64_32 ( + over_1x64 (ms, expand_alpha_1x64 (ms), unpack_32_1x64 (dst))); } return dst; @@ -502,10 +569,10 @@ combine1 (const uint32_t *ps, const uint32_t *pm) __m64 ms, mm; mm = unpack_32_1x64 (*pm); - mm = expandAlpha_1x64 (mm); - + mm = expand_alpha_1x64 (mm); + ms = unpack_32_1x64 (s); - ms = pixMultiply_1x64 (ms, mm); + ms = pix_multiply_1x64 (ms, mm); s = pack_1x64_32 (ms); } @@ -516,270 +583,299 @@ combine1 (const uint32_t *ps, const uint32_t *pm) static force_inline __m128i combine4 (const __m128i *ps, const __m128i *pm) { - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmMskLo, xmmMskHi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_msk_lo, xmm_msk_hi; __m128i s; - + if (pm) { - xmmMskLo = load128Unaligned (pm); + xmm_msk_lo = load_128_unaligned (pm); - if (isTransparent (xmmMskLo)) + if (is_transparent (xmm_msk_lo)) return _mm_setzero_si128 (); } - - s = load128Unaligned (ps); - + + s = load_128_unaligned (ps); + if (pm) { - unpack_128_2x128 (s, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmMskLo, &xmmMskLo, &xmmMskHi); - - expandAlpha_2x128 (xmmMskLo, xmmMskHi, &xmmMskLo, &xmmMskHi); - - pixMultiply_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmMskLo, &xmmMskHi, &xmmSrcLo, &xmmSrcHi); - - s = pack_2x128_128 (xmmSrcLo, xmmSrcHi); + unpack_128_2x128 (s, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_msk_lo, &xmm_msk_lo, &xmm_msk_hi); + + expand_alpha_2x128 (xmm_msk_lo, xmm_msk_hi, &xmm_msk_lo, &xmm_msk_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_msk_lo, &xmm_msk_hi, + &xmm_src_lo, &xmm_src_hi); + + s = pack_2x128_128 (xmm_src_lo, xmm_src_hi); } return s; } static force_inline void -coreCombineOverUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w) +core_combine_over_u_sse2 (uint32_t* pd, + const uint32_t* ps, + const uint32_t* pm, + int w) { uint32_t s, d; - __m128i xmmDstLo, xmmDstHi; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmAlphaLo, xmmAlphaHi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); /* Align dst on a 16-byte boundary */ - while (w && - ((unsigned long)pd & 15)) + while (w && ((unsigned long)pd & 15)) { - d = *pd; - s = combine1 (ps, pm); + d = *pd; + s = combine1 (ps, pm); - *pd++ = coreCombineOverUPixelsse2 (s, d); + *pd++ = core_combine_over_u_pixel_sse2 (s, d); ps++; if (pm) pm++; - w--; + w--; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); - - /* I'm loading unaligned because I'm not sure about the address alignment. */ - xmmSrcHi = combine4 ((__m128i*)ps, (__m128i*)pm); - - if (isOpaque (xmmSrcHi)) - { - save128Aligned ((__m128i*)pd, xmmSrcHi); - } - else if (!isZero (xmmSrcHi)) - { - xmmDstHi = load128Aligned ((__m128i*) pd); - - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); - - expandAlpha_2x128 (xmmSrcLo, xmmSrcHi, &xmmAlphaLo, &xmmAlphaHi); - - over_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmAlphaLo, &xmmAlphaHi, &xmmDstLo, &xmmDstHi); - - /* rebuid the 4 pixel data and save*/ - save128Aligned ((__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); - } - - w -= 4; - ps += 4; - pd += 4; + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); + + /* I'm loading unaligned because I'm not sure about + * the address alignment. + */ + xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); + + if (is_opaque (xmm_src_hi)) + { + save_128_aligned ((__m128i*)pd, xmm_src_hi); + } + else if (!is_zero (xmm_src_hi)) + { + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 ( + xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi); + + over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst_lo, &xmm_dst_hi); + + /* rebuid the 4 pixel data and save*/ + save_128_aligned ((__m128i*)pd, + pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + w -= 4; + ps += 4; + pd += 4; if (pm) pm += 4; } while (w) { - d = *pd; - s = combine1 (ps, pm); + d = *pd; + s = combine1 (ps, pm); - *pd++ = coreCombineOverUPixelsse2 (s, d); + *pd++ = core_combine_over_u_pixel_sse2 (s, d); ps++; if (pm) pm++; - w--; + + w--; } } static force_inline void -coreCombineOverReverseUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w) +core_combine_over_reverse_u_sse2 (uint32_t* pd, + const uint32_t* ps, + const uint32_t* pm, + int w) { uint32_t s, d; - __m128i xmmDstLo, xmmDstHi; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmAlphaLo, xmmAlphaHi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); /* Align dst on a 16-byte boundary */ while (w && ((unsigned long)pd & 15)) { - d = *pd; - s = combine1 (ps, pm); + d = *pd; + s = combine1 (ps, pm); - *pd++ = coreCombineOverUPixelsse2 (d, s); - w--; + *pd++ = core_combine_over_u_pixel_sse2 (d, s); + w--; ps++; if (pm) pm++; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); - /* I'm loading unaligned because I'm not sure about the address alignment. */ - xmmSrcHi = combine4 ((__m128i*)ps, (__m128i*)pm); - xmmDstHi = load128Aligned ((__m128i*) pd); + /* I'm loading unaligned because I'm not sure + * about the address alignment. + */ + xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - expandAlpha_2x128 (xmmDstLo, xmmDstHi, &xmmAlphaLo, &xmmAlphaHi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi); - over_2x128 (&xmmDstLo, &xmmDstHi, &xmmAlphaLo, &xmmAlphaHi, &xmmSrcLo, &xmmSrcHi); + over_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_src_lo, &xmm_src_hi); - /* rebuid the 4 pixel data and save*/ - save128Aligned ((__m128i*)pd, pack_2x128_128 (xmmSrcLo, xmmSrcHi)); + /* rebuid the 4 pixel data and save*/ + save_128_aligned ((__m128i*)pd, + pack_2x128_128 (xmm_src_lo, xmm_src_hi)); + + w -= 4; + ps += 4; + pd += 4; - w -= 4; - ps += 4; - pd += 4; if (pm) pm += 4; } while (w) { - d = *pd; - s = combine1 (ps, pm); + d = *pd; + s = combine1 (ps, pm); - *pd++ = coreCombineOverUPixelsse2 (d, s); + *pd++ = core_combine_over_u_pixel_sse2 (d, s); ps++; - w--; + w--; if (pm) pm++; } } static force_inline uint32_t -coreCombineInUPixelsse2 (uint32_t src, uint32_t dst) +core_combine_in_u_pixelsse2 (uint32_t src, uint32_t dst) { uint32_t maska = src >> 24; if (maska == 0) { - return 0; + return 0; } else if (maska != 0xff) { - return pack_1x64_32(pixMultiply_1x64 (unpack_32_1x64 (dst), expandAlpha_1x64 (unpack_32_1x64 (src)))); + return pack_1x64_32 ( + pix_multiply_1x64 (unpack_32_1x64 (dst), + expand_alpha_1x64 (unpack_32_1x64 (src)))); } return dst; } static force_inline void -coreCombineInUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w) +core_combine_in_u_sse2 (uint32_t* pd, + const uint32_t* ps, + const uint32_t* pm, + int w) { uint32_t s, d; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && ((unsigned long) pd & 15)) { - s = combine1 (ps, pm); - d = *pd; + s = combine1 (ps, pm); + d = *pd; - *pd++ = coreCombineInUPixelsse2 (d, s); - w--; + *pd++ = core_combine_in_u_pixelsse2 (d, s); + w--; ps++; if (pm) pm++; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); - xmmDstHi = load128Aligned ((__m128i*) pd); - xmmSrcHi = combine4 ((__m128i*) ps, (__m128i*) pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm); - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); - expandAlpha_2x128 (xmmDstLo, xmmDstHi, &xmmDstLo, &xmmDstHi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - pixMultiply_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmDstLo, &xmmDstHi, &xmmDstLo, &xmmDstHi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); - save128Aligned ((__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); + save_128_aligned ((__m128i*)pd, + pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - ps += 4; - pd += 4; - w -= 4; + ps += 4; + pd += 4; + w -= 4; if (pm) pm += 4; } while (w) { - s = combine1 (ps, pm); - d = *pd; + s = combine1 (ps, pm); + d = *pd; - *pd++ = coreCombineInUPixelsse2 (d, s); - w--; + *pd++ = core_combine_in_u_pixelsse2 (d, s); + w--; ps++; if (pm) pm++; @@ -787,67 +883,73 @@ coreCombineInUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w) } static force_inline void -coreCombineReverseInUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t *pm, int w) +core_combine_reverse_in_u_sse2 (uint32_t* pd, + const uint32_t* ps, + const uint32_t *pm, + int w) { uint32_t s, d; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && ((unsigned long) pd & 15)) { - s = combine1 (ps, pm); - d = *pd; + s = combine1 (ps, pm); + d = *pd; - *pd++ = coreCombineInUPixelsse2 (s, d); + *pd++ = core_combine_in_u_pixelsse2 (s, d); ps++; - w--; + w--; if (pm) pm++; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); - xmmDstHi = load128Aligned ((__m128i*) pd); - xmmSrcHi = combine4 ((__m128i*) ps, (__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); + xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - expandAlpha_2x128 (xmmSrcLo, xmmSrcHi, &xmmSrcLo, &xmmSrcHi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); - pixMultiply_2x128 (&xmmDstLo, &xmmDstHi, &xmmSrcLo, &xmmSrcHi, &xmmDstLo, &xmmDstHi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_src_lo, &xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi); - save128Aligned ((__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - ps += 4; - pd += 4; - w -= 4; + ps += 4; + pd += 4; + w -= 4; if (pm) pm += 4; } while (w) { - s = combine1 (ps, pm); - d = *pd; + s = combine1 (ps, pm); + d = *pd; - *pd++ = coreCombineInUPixelsse2 (s, d); - w--; + *pd++ = core_combine_in_u_pixelsse2 (s, d); + w--; ps++; if (pm) pm++; @@ -855,135 +957,161 @@ coreCombineReverseInUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t *pm, } static force_inline void -coreCombineReverseOutUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w) +core_combine_reverse_out_u_sse2 (uint32_t* pd, + const uint32_t* ps, + const uint32_t* pm, + int w) { /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && ((unsigned long) pd & 15)) { - uint32_t s = combine1 (ps, pm); - uint32_t d = *pd; + uint32_t s = combine1 (ps, pm); + uint32_t d = *pd; - *pd++ = pack_1x64_32 (pixMultiply_1x64 (unpack_32_1x64 (d), negate_1x64 (expandAlpha_1x64 (unpack_32_1x64 (s))))); + *pd++ = pack_1x64_32 ( + pix_multiply_1x64 ( + unpack_32_1x64 (d), negate_1x64 ( + expand_alpha_1x64 (unpack_32_1x64 (s))))); + if (pm) pm++; ps++; - w--; + w--; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); - xmmSrcHi = combine4 ((__m128i*)ps, (__m128i*)pm); - xmmDstHi = load128Aligned ((__m128i*) pd); + xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - expandAlpha_2x128 (xmmSrcLo, xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - negate_2x128 (xmmSrcLo, xmmSrcHi, &xmmSrcLo, &xmmSrcHi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + negate_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); - pixMultiply_2x128 (&xmmDstLo, &xmmDstHi, &xmmSrcLo, &xmmSrcHi, &xmmDstLo, &xmmDstHi); + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_src_lo, &xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi); - save128Aligned ((__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - ps += 4; - pd += 4; + ps += 4; + pd += 4; if (pm) pm += 4; - w -= 4; + + w -= 4; } while (w) { - uint32_t s = combine1 (ps, pm); - uint32_t d = *pd; + uint32_t s = combine1 (ps, pm); + uint32_t d = *pd; - *pd++ = pack_1x64_32 (pixMultiply_1x64 (unpack_32_1x64 (d), negate_1x64 (expandAlpha_1x64 (unpack_32_1x64 (s))))); + *pd++ = pack_1x64_32 ( + pix_multiply_1x64 ( + unpack_32_1x64 (d), negate_1x64 ( + expand_alpha_1x64 (unpack_32_1x64 (s))))); ps++; if (pm) pm++; - w--; + w--; } } static force_inline void -coreCombineOutUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w) +core_combine_out_u_sse2 (uint32_t* pd, + const uint32_t* ps, + const uint32_t* pm, + int w) { /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && ((unsigned long) pd & 15)) { - uint32_t s = combine1 (ps, pm); - uint32_t d = *pd; - - *pd++ = pack_1x64_32 (pixMultiply_1x64 (unpack_32_1x64 (s), negate_1x64 (expandAlpha_1x64 (unpack_32_1x64 (d))))); - w--; + uint32_t s = combine1 (ps, pm); + uint32_t d = *pd; + + *pd++ = pack_1x64_32 ( + pix_multiply_1x64 ( + unpack_32_1x64 (s), negate_1x64 ( + expand_alpha_1x64 (unpack_32_1x64 (d))))); + w--; ps++; if (pm) pm++; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); - xmmSrcHi = combine4 ((__m128i*) ps, (__m128i*)pm); - xmmDstHi = load128Aligned ((__m128i*) pd); + xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - expandAlpha_2x128 (xmmDstLo, xmmDstHi, &xmmDstLo, &xmmDstHi); - negate_2x128 (xmmDstLo, xmmDstHi, &xmmDstLo, &xmmDstHi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + negate_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - pixMultiply_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmDstLo, &xmmDstHi, &xmmDstLo, &xmmDstHi); + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); - save128Aligned ((__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - ps += 4; - pd += 4; - w -= 4; + ps += 4; + pd += 4; + w -= 4; if (pm) pm += 4; } while (w) { - uint32_t s = combine1 (ps, pm); - uint32_t d = *pd; - - *pd++ = pack_1x64_32 (pixMultiply_1x64 (unpack_32_1x64 (s), negate_1x64 (expandAlpha_1x64 (unpack_32_1x64 (d))))); - w--; + uint32_t s = combine1 (ps, pm); + uint32_t d = *pd; + + *pd++ = pack_1x64_32 ( + pix_multiply_1x64 ( + unpack_32_1x64 (s), negate_1x64 ( + expand_alpha_1x64 (unpack_32_1x64 (d))))); + w--; ps++; if (pm) pm++; @@ -991,87 +1119,96 @@ coreCombineOutUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w } static force_inline uint32_t -coreCombineAtopUPixelsse2 (uint32_t src, uint32_t dst) +core_combine_atop_u_pixel_sse2 (uint32_t src, + uint32_t dst) { __m64 s = unpack_32_1x64 (src); __m64 d = unpack_32_1x64 (dst); - __m64 sa = negate_1x64 (expandAlpha_1x64 (s)); - __m64 da = expandAlpha_1x64 (d); + __m64 sa = negate_1x64 (expand_alpha_1x64 (s)); + __m64 da = expand_alpha_1x64 (d); - return pack_1x64_32 (pixAddMultiply_1x64 (&s, &da, &d, &sa)); + return pack_1x64_32 (pix_add_multiply_1x64 (&s, &da, &d, &sa)); } static force_inline void -coreCombineAtopUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w) +core_combine_atop_u_sse2 (uint32_t* pd, + const uint32_t* ps, + const uint32_t* pm, + int w) { uint32_t s, d; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; - __m128i xmmAlphaSrcLo, xmmAlphaSrcHi; - __m128i xmmAlphaDstLo, xmmAlphaDstHi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && ((unsigned long) pd & 15)) { - s = combine1 (ps, pm); - d = *pd; + s = combine1 (ps, pm); + d = *pd; - *pd++ = coreCombineAtopUPixelsse2 (s, d); - w--; + *pd++ = core_combine_atop_u_pixel_sse2 (s, d); + w--; ps++; if (pm) pm++; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); - xmmSrcHi = combine4 ((__m128i*)ps, (__m128i*)pm); - xmmDstHi = load128Aligned ((__m128i*) pd); + xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - expandAlpha_2x128 (xmmSrcLo, xmmSrcHi, &xmmAlphaSrcLo, &xmmAlphaSrcHi); - expandAlpha_2x128 (xmmDstLo, xmmDstHi, &xmmAlphaDstLo, &xmmAlphaDstHi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); - negate_2x128 (xmmAlphaSrcLo, xmmAlphaSrcHi, &xmmAlphaSrcLo, &xmmAlphaSrcHi); + negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); - pixAddMultiply_2x128 ( &xmmSrcLo, &xmmSrcHi, &xmmAlphaDstLo, &xmmAlphaDstHi, - &xmmDstLo, &xmmDstHi, &xmmAlphaSrcLo, &xmmAlphaSrcHi, - &xmmDstLo, &xmmDstHi ); + pix_add_multiply_2x128 ( + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_dst_lo, &xmm_dst_hi); - save128Aligned ((__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - ps += 4; - pd += 4; - w -= 4; + ps += 4; + pd += 4; + w -= 4; if (pm) pm += 4; } while (w) { - s = combine1 (ps, pm); - d = *pd; + s = combine1 (ps, pm); + d = *pd; - *pd++ = coreCombineAtopUPixelsse2 (s, d); - w--; + *pd++ = core_combine_atop_u_pixel_sse2 (s, d); + w--; ps++; if (pm) pm++; @@ -1079,180 +1216,199 @@ coreCombineAtopUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int } static force_inline uint32_t -coreCombineReverseAtopUPixelsse2 (uint32_t src, uint32_t dst) +core_combine_reverse_atop_u_pixel_sse2 (uint32_t src, + uint32_t dst) { __m64 s = unpack_32_1x64 (src); __m64 d = unpack_32_1x64 (dst); - __m64 sa = expandAlpha_1x64 (s); - __m64 da = negate_1x64 (expandAlpha_1x64 (d)); + __m64 sa = expand_alpha_1x64 (s); + __m64 da = negate_1x64 (expand_alpha_1x64 (d)); - return pack_1x64_32 (pixAddMultiply_1x64 (&s, &da, &d, &sa)); + return pack_1x64_32 (pix_add_multiply_1x64 (&s, &da, &d, &sa)); } static force_inline void -coreCombineReverseAtopUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w) +core_combine_reverse_atop_u_sse2 (uint32_t* pd, + const uint32_t* ps, + const uint32_t* pm, + int w) { uint32_t s, d; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; - __m128i xmmAlphaSrcLo, xmmAlphaSrcHi; - __m128i xmmAlphaDstLo, xmmAlphaDstHi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && ((unsigned long) pd & 15)) { - s = combine1 (ps, pm); - d = *pd; + s = combine1 (ps, pm); + d = *pd; - *pd++ = coreCombineReverseAtopUPixelsse2 (s, d); + *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d); ps++; - w--; + w--; if (pm) pm++; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); - xmmSrcHi = combine4 ((__m128i*)ps, (__m128i*)pm); - xmmDstHi = load128Aligned ((__m128i*) pd); + xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*) pd); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); - expandAlpha_2x128 (xmmSrcLo, xmmSrcHi, &xmmAlphaSrcLo, &xmmAlphaSrcHi); - expandAlpha_2x128 (xmmDstLo, xmmDstHi, &xmmAlphaDstLo, &xmmAlphaDstHi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); - negate_2x128 (xmmAlphaDstLo, xmmAlphaDstHi, &xmmAlphaDstLo, &xmmAlphaDstHi); + negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); - pixAddMultiply_2x128 ( &xmmSrcLo, &xmmSrcHi, &xmmAlphaDstLo, &xmmAlphaDstHi, - &xmmDstLo, &xmmDstHi, &xmmAlphaSrcLo, &xmmAlphaSrcHi, - &xmmDstLo, &xmmDstHi ); + pix_add_multiply_2x128 ( + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_dst_lo, &xmm_dst_hi); - save128Aligned ((__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - ps += 4; - pd += 4; - w -= 4; + ps += 4; + pd += 4; + w -= 4; if (pm) pm += 4; } while (w) { - s = combine1 (ps, pm); - d = *pd; + s = combine1 (ps, pm); + d = *pd; - *pd++ = coreCombineReverseAtopUPixelsse2 (s, d); + *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d); ps++; - w--; + w--; if (pm) pm++; } } static force_inline uint32_t -coreCombineXorUPixelsse2 (uint32_t src, uint32_t dst) +core_combine_xor_u_pixel_sse2 (uint32_t src, + uint32_t dst) { __m64 s = unpack_32_1x64 (src); __m64 d = unpack_32_1x64 (dst); - __m64 negD = negate_1x64 (expandAlpha_1x64 (d)); - __m64 negS = negate_1x64 (expandAlpha_1x64 (s)); + __m64 neg_d = negate_1x64 (expand_alpha_1x64 (d)); + __m64 neg_s = negate_1x64 (expand_alpha_1x64 (s)); - return pack_1x64_32 (pixAddMultiply_1x64 (&s, &negD, &d, &negS)); + return pack_1x64_32 (pix_add_multiply_1x64 (&s, &neg_d, &d, &neg_s)); } static force_inline void -coreCombineXorUsse2 (uint32_t* dst, const uint32_t* src, const uint32_t *mask, int width) +core_combine_xor_u_sse2 (uint32_t* dst, + const uint32_t* src, + const uint32_t *mask, + int width) { int w = width; uint32_t s, d; uint32_t* pd = dst; const uint32_t* ps = src; const uint32_t* pm = mask; - - __m128i xmmSrc, xmmSrcLo, xmmSrcHi; - __m128i xmmDst, xmmDstLo, xmmDstHi; - __m128i xmmAlphaSrcLo, xmmAlphaSrcHi; - __m128i xmmAlphaDstLo, xmmAlphaDstHi; + + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && ((unsigned long) pd & 15)) { - s = combine1 (ps, pm); - d = *pd; + s = combine1 (ps, pm); + d = *pd; - *pd++ = coreCombineXorUPixelsse2 (s, d); - w--; + *pd++ = core_combine_xor_u_pixel_sse2 (s, d); + w--; ps++; if (pm) pm++; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); - - xmmSrc = combine4 ((__m128i*) ps, (__m128i*) pm); - xmmDst = load128Aligned ((__m128i*) pd); - - unpack_128_2x128 (xmmSrc, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmDst, &xmmDstLo, &xmmDstHi); - - expandAlpha_2x128 (xmmSrcLo, xmmSrcHi, &xmmAlphaSrcLo, &xmmAlphaSrcHi); - expandAlpha_2x128 (xmmDstLo, xmmDstHi, &xmmAlphaDstLo, &xmmAlphaDstHi); - - negate_2x128 (xmmAlphaSrcLo, xmmAlphaSrcHi, &xmmAlphaSrcLo, &xmmAlphaSrcHi); - negate_2x128 (xmmAlphaDstLo, xmmAlphaDstHi, &xmmAlphaDstLo, &xmmAlphaDstHi); - - pixAddMultiply_2x128 ( &xmmSrcLo, &xmmSrcHi, &xmmAlphaDstLo, &xmmAlphaDstHi, - &xmmDstLo, &xmmDstHi, &xmmAlphaSrcLo, &xmmAlphaSrcHi, - &xmmDstLo, &xmmDstHi ); - - save128Aligned ((__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); - - ps += 4; - pd += 4; - w -= 4; + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); + + xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm); + xmm_dst = load_128_aligned ((__m128i*) pd); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_add_multiply_2x128 ( + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + w -= 4; if (pm) pm += 4; } while (w) { - s = combine1 (ps, pm); - d = *pd; + s = combine1 (ps, pm); + d = *pd; - *pd++ = coreCombineXorUPixelsse2 (s, d); - w--; + *pd++ = core_combine_xor_u_pixel_sse2 (s, d); + w--; ps++; if (pm) pm++; @@ -1260,68 +1416,77 @@ coreCombineXorUsse2 (uint32_t* dst, const uint32_t* src, const uint32_t *mask, i } static force_inline void -coreCombineAddUsse2 (uint32_t* dst, const uint32_t* src, const uint32_t* mask, int width) +core_combine_add_u_sse2 (uint32_t* dst, + const uint32_t* src, + const uint32_t* mask, + int width) { int w = width; - uint32_t s,d; + uint32_t s, d; uint32_t* pd = dst; const uint32_t* ps = src; const uint32_t* pm = mask; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && (unsigned long)pd & 15) { - s = combine1 (ps, pm); - d = *pd; + s = combine1 (ps, pm); + d = *pd; + ps++; if (pm) pm++; - *pd++ = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d))); - w--; + *pd++ = _mm_cvtsi64_si32 ( + _mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d))); + w--; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { __m128i s; - - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); - s = combine4((__m128i*)ps,(__m128i*)pm); - - save128Aligned( (__m128i*)pd, - _mm_adds_epu8( s, load128Aligned ((__m128i*)pd)) ); - pd += 4; - ps += 4; + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); + + s = combine4 ((__m128i*)ps, (__m128i*)pm); + + save_128_aligned ( + (__m128i*)pd, _mm_adds_epu8 (s, load_128_aligned ((__m128i*)pd))); + + pd += 4; + ps += 4; if (pm) pm += 4; - w -= 4; + w -= 4; } while (w--) { - s = combine1 (ps, pm); - d = *pd; + s = combine1 (ps, pm); + d = *pd; + ps++; - *pd++ = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d))); + *pd++ = _mm_cvtsi64_si32 ( + _mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d))); if (pm) pm++; } } static force_inline uint32_t -coreCombineSaturateUPixelsse2 (uint32_t src, uint32_t dst) +core_combine_saturate_u_pixel_sse2 (uint32_t src, + uint32_t dst) { __m64 ms = unpack_32_1x64 (src); __m64 md = unpack_32_1x64 (dst); @@ -1330,99 +1495,107 @@ coreCombineSaturateUPixelsse2 (uint32_t src, uint32_t dst) if (sa > da) { - ms = pixMultiply_1x64 (ms, expandAlpha_1x64 (unpack_32_1x64 (FbIntDiv(da, sa) << 24))); + ms = pix_multiply_1x64 ( + ms, expand_alpha_1x64 (unpack_32_1x64 (DIV_UN8 (da, sa) << 24))); } return pack_1x64_32 (_mm_adds_pu16 (md, ms)); } static force_inline void -coreCombineSaturateUsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w) +core_combine_saturate_u_sse2 (uint32_t * pd, + const uint32_t *ps, + const uint32_t *pm, + int w) { - uint32_t s,d; + uint32_t s, d; - uint32_t packCmp; - __m128i xmmSrc, xmmDst; + uint32_t pack_cmp; + __m128i xmm_src, xmm_dst; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && (unsigned long)pd & 15) { - s = combine1 (ps, pm); - d = *pd; - *pd++ = coreCombineSaturateUPixelsse2 (s, d); - w--; + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); + w--; ps++; if (pm) pm++; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); - - xmmDst = load128Aligned ((__m128i*)pd); - xmmSrc = combine4 ((__m128i*)ps, (__m128i*)pm); - - packCmp = _mm_movemask_epi8 (_mm_cmpgt_epi32 (_mm_srli_epi32 (xmmSrc, 24), - _mm_srli_epi32 (_mm_xor_si128 (xmmDst, Maskff000000), 24))); - - /* if some alpha src is grater than respective ~alpha dst */ - if (packCmp) - { - s = combine1 (ps++, pm); - d = *pd; - *pd++ = coreCombineSaturateUPixelsse2 (s, d); + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); + + xmm_dst = load_128_aligned ((__m128i*)pd); + xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm); + + pack_cmp = _mm_movemask_epi8 ( + _mm_cmpgt_epi32 ( + _mm_srli_epi32 (xmm_src, 24), + _mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24))); + + /* if some alpha src is grater than respective ~alpha dst */ + if (pack_cmp) + { + s = combine1 (ps++, pm); + d = *pd; + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); if (pm) pm++; - s = combine1 (ps++, pm); - d = *pd; - *pd++ = coreCombineSaturateUPixelsse2 (s, d); + s = combine1 (ps++, pm); + d = *pd; + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); if (pm) pm++; - s = combine1 (ps++, pm); - d = *pd; - *pd++ = coreCombineSaturateUPixelsse2 (s, d); + s = combine1 (ps++, pm); + d = *pd; + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); if (pm) pm++; - s = combine1 (ps++, pm); - d = *pd; - *pd++ = coreCombineSaturateUPixelsse2 (s, d); + s = combine1 (ps++, pm); + d = *pd; + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); if (pm) pm++; - } - else - { - save128Aligned ((__m128i*)pd, _mm_adds_epu8 (xmmDst, xmmSrc)); + } + else + { + save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src)); - pd += 4; - ps += 4; + pd += 4; + ps += 4; if (pm) pm += 4; - } + } - w -= 4; + w -= 4; } while (w--) { - s = combine1 (ps, pm); - d = *pd; - *pd++ = coreCombineSaturateUPixelsse2 (s, d); + s = combine1 (ps, pm); + d = *pd; + + *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); ps++; if (pm) pm++; @@ -1430,1683 +1603,2098 @@ coreCombineSaturateUsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, } static force_inline void -coreCombineSrcCsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t *pm, int w) +core_combine_src_ca_sse2 (uint32_t* pd, + const uint32_t* ps, + const uint32_t *pm, + int w) { uint32_t s, m; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmMaskLo, xmmMaskHi; - __m128i xmmDstLo, xmmDstHi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst_lo, xmm_dst_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && (unsigned long)pd & 15) { - s = *ps++; - m = *pm++; - *pd++ = pack_1x64_32 (pixMultiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m))); - w--; + s = *ps++; + m = *pm++; + *pd++ = pack_1x64_32 ( + pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m))); + w--; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); - xmmSrcHi = load128Unaligned ((__m128i*)ps); - xmmMaskHi = load128Unaligned ((__m128i*)pm); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmMaskHi, &xmmMaskLo, &xmmMaskHi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - pixMultiply_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmMaskLo, &xmmMaskHi, &xmmDstLo, &xmmDstHi); + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); - save128Aligned( (__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - ps += 4; - pd += 4; - pm += 4; - w -= 4; + ps += 4; + pd += 4; + pm += 4; + w -= 4; } while (w) { - s = *ps++; - m = *pm++; - *pd++ = pack_1x64_32 (pixMultiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m))); - w--; + s = *ps++; + m = *pm++; + *pd++ = pack_1x64_32 ( + pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m))); + w--; } } static force_inline uint32_t -coreCombineOverCPixelsse2 (uint32_t src, uint32_t mask, uint32_t dst) +core_combine_over_ca_pixel_sse2 (uint32_t src, + uint32_t mask, + uint32_t dst) { __m64 s = unpack_32_1x64 (src); - __m64 expAlpha = expandAlpha_1x64 (s); - __m64 unpkMask = unpack_32_1x64 (mask); - __m64 unpkDst = unpack_32_1x64 (dst); + __m64 expAlpha = expand_alpha_1x64 (s); + __m64 unpk_mask = unpack_32_1x64 (mask); + __m64 unpk_dst = unpack_32_1x64 (dst); - return pack_1x64_32 (inOver_1x64 (&s, &expAlpha, &unpkMask, &unpkDst)); + return pack_1x64_32 (in_over_1x64 (&s, &expAlpha, &unpk_mask, &unpk_dst)); } static force_inline void -coreCombineOverCsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t *pm, int w) +core_combine_over_ca_sse2 (uint32_t* pd, + const uint32_t* ps, + const uint32_t *pm, + int w) { uint32_t s, m, d; - __m128i xmmAlphaLo, xmmAlphaHi; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; - __m128i xmmMaskLo, xmmMaskHi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && (unsigned long)pd & 15) { - s = *ps++; - m = *pm++; - d = *pd; + s = *ps++; + m = *pm++; + d = *pd; - *pd++ = coreCombineOverCPixelsse2 (s, m, d); - w--; + *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d); + w--; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); - - xmmDstHi = load128Aligned ((__m128i*)pd); - xmmSrcHi = load128Unaligned ((__m128i*)ps); - xmmMaskHi = load128Unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - - expandAlpha_2x128 (xmmSrcLo, xmmSrcHi, &xmmAlphaLo, &xmmAlphaHi); - - inOver_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmAlphaLo, &xmmAlphaHi, &xmmMaskLo, &xmmMaskHi, &xmmDstLo, &xmmDstHi); - - save128Aligned( (__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); + + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; } while (w) { - s = *ps++; - m = *pm++; - d = *pd; + s = *ps++; + m = *pm++; + d = *pd; - *pd++ = coreCombineOverCPixelsse2 (s, m, d); - w--; + *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d); + w--; } } static force_inline uint32_t -coreCombineOverReverseCPixelsse2 (uint32_t src, uint32_t mask, uint32_t dst) +core_combine_over_reverse_ca_pixel_sse2 (uint32_t src, + uint32_t mask, + uint32_t dst) { __m64 d = unpack_32_1x64 (dst); - return pack_1x64_32(over_1x64 (d, expandAlpha_1x64 (d), pixMultiply_1x64 (unpack_32_1x64 (src), unpack_32_1x64 (mask)))); + return pack_1x64_32 ( + over_1x64 (d, expand_alpha_1x64 (d), + pix_multiply_1x64 (unpack_32_1x64 (src), + unpack_32_1x64 (mask)))); } static force_inline void -coreCombineOverReverseCsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t *pm, int w) +core_combine_over_reverse_ca_sse2 (uint32_t* pd, + const uint32_t* ps, + const uint32_t *pm, + int w) { uint32_t s, m, d; - __m128i xmmAlphaLo, xmmAlphaHi; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; - __m128i xmmMaskLo, xmmMaskHi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && (unsigned long)pd & 15) { - s = *ps++; - m = *pm++; - d = *pd; + s = *ps++; + m = *pm++; + d = *pd; - *pd++ = coreCombineOverReverseCPixelsse2 (s, m, d); - w--; + *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d); + w--; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); - - xmmDstHi = load128Aligned ((__m128i*)pd); - xmmSrcHi = load128Unaligned ((__m128i*)ps); - xmmMaskHi = load128Unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - - expandAlpha_2x128 (xmmDstLo, xmmDstHi, &xmmAlphaLo, &xmmAlphaHi); - pixMultiply_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmMaskLo, &xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - - over_2x128 (&xmmDstLo, &xmmDstHi, &xmmAlphaLo, &xmmAlphaHi, &xmmMaskLo, &xmmMaskHi); - - save128Aligned( (__m128i*)pd, pack_2x128_128 (xmmMaskLo, xmmMaskHi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); + + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + over_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask_lo, &xmm_mask_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; } while (w) { - s = *ps++; - m = *pm++; - d = *pd; + s = *ps++; + m = *pm++; + d = *pd; - *pd++ = coreCombineOverReverseCPixelsse2 (s, m, d); - w--; + *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d); + w--; } } static force_inline void -coreCombineInCsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w) +core_combine_in_ca_sse2 (uint32_t * pd, + const uint32_t *ps, + const uint32_t *pm, + int w) { uint32_t s, m, d; - __m128i xmmAlphaLo, xmmAlphaHi; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; - __m128i xmmMaskLo, xmmMaskHi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && (unsigned long)pd & 15) { - s = *ps++; - m = *pm++; - d = *pd; + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x64_32 ( + pix_multiply_1x64 ( + pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)), + expand_alpha_1x64 (unpack_32_1x64 (d)))); - *pd++ = pack_1x64_32 (pixMultiply_1x64 (pixMultiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)), - expandAlpha_1x64 (unpack_32_1x64 (d)))); - w--; + w--; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); + + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); - xmmDstHi = load128Aligned ((__m128i*)pd); - xmmSrcHi = load128Unaligned ((__m128i*)ps); - xmmMaskHi = load128Unaligned ((__m128i*)pm); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmMaskHi, &xmmMaskLo, &xmmMaskHi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi); - expandAlpha_2x128 (xmmDstLo, xmmDstHi, &xmmAlphaLo, &xmmAlphaHi); - pixMultiply_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmMaskLo, &xmmMaskHi, &xmmDstLo, &xmmDstHi); + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); - pixMultiply_2x128 (&xmmDstLo, &xmmDstHi, &xmmAlphaLo, &xmmAlphaHi, &xmmDstLo, &xmmDstHi); + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst_lo, &xmm_dst_hi); - save128Aligned( (__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - ps += 4; - pd += 4; - pm += 4; - w -= 4; + ps += 4; + pd += 4; + pm += 4; + w -= 4; } while (w) { - s = *ps++; - m = *pm++; - d = *pd; + s = *ps++; + m = *pm++; + d = *pd; - *pd++ = pack_1x64_32 (pixMultiply_1x64 (pixMultiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)), - expandAlpha_1x64 (unpack_32_1x64 (d)))); - w--; + *pd++ = pack_1x64_32 ( + pix_multiply_1x64 ( + pix_multiply_1x64 ( + unpack_32_1x64 (s), unpack_32_1x64 (m)), + expand_alpha_1x64 (unpack_32_1x64 (d)))); + + w--; } } static force_inline void -coreCombineInReverseCsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w) +core_combine_in_reverse_ca_sse2 (uint32_t * pd, + const uint32_t *ps, + const uint32_t *pm, + int w) { uint32_t s, m, d; - __m128i xmmAlphaLo, xmmAlphaHi; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; - __m128i xmmMaskLo, xmmMaskHi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && (unsigned long)pd & 15) { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x64_32 (pixMultiply_1x64 (unpack_32_1x64 (d), - pixMultiply_1x64 (unpack_32_1x64 (m), - expandAlpha_1x64 (unpack_32_1x64 (s))))); - w--; + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x64_32 ( + pix_multiply_1x64 ( + unpack_32_1x64 (d), + pix_multiply_1x64 (unpack_32_1x64 (m), + expand_alpha_1x64 (unpack_32_1x64 (s))))); + w--; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); - - xmmDstHi = load128Aligned ((__m128i*)pd); - xmmSrcHi = load128Unaligned ((__m128i*)ps); - xmmMaskHi = load128Unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - - expandAlpha_2x128 (xmmSrcLo, xmmSrcHi, &xmmAlphaLo, &xmmAlphaHi); - pixMultiply_2x128 (&xmmMaskLo, &xmmMaskHi, &xmmAlphaLo, &xmmAlphaHi, &xmmAlphaLo, &xmmAlphaHi); - - pixMultiply_2x128 (&xmmDstLo, &xmmDstHi, &xmmAlphaLo, &xmmAlphaHi, &xmmDstLo, &xmmDstHi); - - save128Aligned( (__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); + + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; } while (w) { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x64_32 (pixMultiply_1x64 (unpack_32_1x64 (d), - pixMultiply_1x64 (unpack_32_1x64 (m), - expandAlpha_1x64 (unpack_32_1x64 (s))))); - w--; + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x64_32 ( + pix_multiply_1x64 ( + unpack_32_1x64 (d), + pix_multiply_1x64 (unpack_32_1x64 (m), + expand_alpha_1x64 (unpack_32_1x64 (s))))); + w--; } } static force_inline void -coreCombineOutCsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w) +core_combine_out_ca_sse2 (uint32_t * pd, + const uint32_t *ps, + const uint32_t *pm, + int w) { uint32_t s, m, d; - __m128i xmmAlphaLo, xmmAlphaHi; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; - __m128i xmmMaskLo, xmmMaskHi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && (unsigned long)pd & 15) { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x64_32 (pixMultiply_1x64 (pixMultiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)), - negate_1x64 (expandAlpha_1x64 (unpack_32_1x64 (d))))); - w--; + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x64_32 ( + pix_multiply_1x64 ( + pix_multiply_1x64 ( + unpack_32_1x64 (s), unpack_32_1x64 (m)), + negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (d))))); + w--; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); - - xmmDstHi = load128Aligned ((__m128i*)pd); - xmmSrcHi = load128Unaligned ((__m128i*)ps); - xmmMaskHi = load128Unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - - expandAlpha_2x128 (xmmDstLo, xmmDstHi, &xmmAlphaLo, &xmmAlphaHi); - negate_2x128 (xmmAlphaLo, xmmAlphaHi, &xmmAlphaLo, &xmmAlphaHi); - - pixMultiply_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmMaskLo, &xmmMaskHi, &xmmDstLo, &xmmDstHi); - pixMultiply_2x128 (&xmmDstLo, &xmmDstHi, &xmmAlphaLo, &xmmAlphaHi, &xmmDstLo, &xmmDstHi); - - save128Aligned( (__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); + + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + negate_2x128 (xmm_alpha_lo, xmm_alpha_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; } while (w) { - s = *ps++; - m = *pm++; - d = *pd; + s = *ps++; + m = *pm++; + d = *pd; - *pd++ = pack_1x64_32 (pixMultiply_1x64 (pixMultiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)), - negate_1x64 (expandAlpha_1x64 (unpack_32_1x64 (d))))); - w--; + *pd++ = pack_1x64_32 ( + pix_multiply_1x64 ( + pix_multiply_1x64 ( + unpack_32_1x64 (s), unpack_32_1x64 (m)), + negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (d))))); + + w--; } } static force_inline void -coreCombineOutReverseCsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w) +core_combine_out_reverse_ca_sse2 (uint32_t * pd, + const uint32_t *ps, + const uint32_t *pm, + int w) { uint32_t s, m, d; - __m128i xmmAlphaLo, xmmAlphaHi; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; - __m128i xmmMaskLo, xmmMaskHi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && (unsigned long)pd & 15) { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x64_32 (pixMultiply_1x64 (unpack_32_1x64 (d), - negate_1x64 (pixMultiply_1x64 (unpack_32_1x64 (m), - expandAlpha_1x64 (unpack_32_1x64 (s)))))); - w--; + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x64_32 ( + pix_multiply_1x64 ( + unpack_32_1x64 (d), + negate_1x64 (pix_multiply_1x64 ( + unpack_32_1x64 (m), + expand_alpha_1x64 (unpack_32_1x64 (s)))))); + w--; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); - xmmDstHi = load128Aligned ((__m128i*)pd); - xmmSrcHi = load128Unaligned ((__m128i*)ps); - xmmMaskHi = load128Unaligned ((__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmMaskHi, &xmmMaskLo, &xmmMaskHi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); - expandAlpha_2x128 (xmmSrcLo, xmmSrcHi, &xmmAlphaLo, &xmmAlphaHi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); - pixMultiply_2x128 (&xmmMaskLo, &xmmMaskHi, &xmmAlphaLo, &xmmAlphaHi, &xmmMaskLo, &xmmMaskHi); + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask_lo, &xmm_mask_hi); - negate_2x128 (xmmMaskLo, xmmMaskHi, &xmmMaskLo, &xmmMaskHi); + negate_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); - pixMultiply_2x128 (&xmmDstLo, &xmmDstHi, &xmmMaskLo, &xmmMaskHi, &xmmDstLo, &xmmDstHi); + pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); - save128Aligned( (__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - ps += 4; - pd += 4; - pm += 4; - w -= 4; + ps += 4; + pd += 4; + pm += 4; + w -= 4; } while (w) { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x64_32 (pixMultiply_1x64 (unpack_32_1x64 (d), - negate_1x64 (pixMultiply_1x64 (unpack_32_1x64 (m), - expandAlpha_1x64 (unpack_32_1x64 (s)))))); - w--; + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x64_32 ( + pix_multiply_1x64 ( + unpack_32_1x64 (d), + negate_1x64 (pix_multiply_1x64 ( + unpack_32_1x64 (m), + expand_alpha_1x64 (unpack_32_1x64 (s)))))); + w--; } } static force_inline uint32_t -coreCombineAtopCPixelsse2 (uint32_t src, uint32_t mask, uint32_t dst) +core_combine_atop_ca_pixel_sse2 (uint32_t src, + uint32_t mask, + uint32_t dst) { __m64 m = unpack_32_1x64 (mask); __m64 s = unpack_32_1x64 (src); __m64 d = unpack_32_1x64 (dst); - __m64 sa = expandAlpha_1x64 (s); - __m64 da = expandAlpha_1x64 (d); + __m64 sa = expand_alpha_1x64 (s); + __m64 da = expand_alpha_1x64 (d); - s = pixMultiply_1x64 (s, m); - m = negate_1x64 (pixMultiply_1x64 (m, sa)); + s = pix_multiply_1x64 (s, m); + m = negate_1x64 (pix_multiply_1x64 (m, sa)); - return pack_1x64_32 (pixAddMultiply_1x64 (&d, &m, &s, &da)); + return pack_1x64_32 (pix_add_multiply_1x64 (&d, &m, &s, &da)); } static force_inline void -coreCombineAtopCsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w) +core_combine_atop_ca_sse2 (uint32_t * pd, + const uint32_t *ps, + const uint32_t *pm, + int w) { uint32_t s, m, d; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; - __m128i xmmAlphaSrcLo, xmmAlphaSrcHi; - __m128i xmmAlphaDstLo, xmmAlphaDstHi; - __m128i xmmMaskLo, xmmMaskHi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && (unsigned long)pd & 15) { - s = *ps++; - m = *pm++; - d = *pd; + s = *ps++; + m = *pm++; + d = *pd; - *pd++ = coreCombineAtopCPixelsse2 (s, m, d); - w--; + *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d); + w--; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); - - xmmDstHi = load128Aligned ((__m128i*)pd); - xmmSrcHi = load128Unaligned ((__m128i*)ps); - xmmMaskHi = load128Unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - - expandAlpha_2x128 (xmmSrcLo, xmmSrcHi, &xmmAlphaSrcLo, &xmmAlphaSrcHi); - expandAlpha_2x128 (xmmDstLo, xmmDstHi, &xmmAlphaDstLo, &xmmAlphaDstHi); - - pixMultiply_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmMaskLo, &xmmMaskHi, &xmmSrcLo, &xmmSrcHi); - pixMultiply_2x128 (&xmmMaskLo, &xmmMaskHi, &xmmAlphaSrcLo, &xmmAlphaSrcHi, &xmmMaskLo, &xmmMaskHi); - - negate_2x128 (xmmMaskLo, xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - - pixAddMultiply_2x128 (&xmmDstLo, &xmmDstHi, &xmmMaskLo, &xmmMaskHi, - &xmmSrcLo, &xmmSrcHi, &xmmAlphaDstLo, &xmmAlphaDstHi, - &xmmDstLo, &xmmDstHi); - - save128Aligned( (__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); + + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi); + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_mask_lo, &xmm_mask_hi); + + negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + pix_add_multiply_2x128 ( + &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; } while (w) { - s = *ps++; - m = *pm++; - d = *pd; + s = *ps++; + m = *pm++; + d = *pd; - *pd++ = coreCombineAtopCPixelsse2 (s, m, d); - w--; + *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d); + w--; } } static force_inline uint32_t -coreCombineReverseAtopCPixelsse2 (uint32_t src, uint32_t mask, uint32_t dst) +core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src, + uint32_t mask, + uint32_t dst) { __m64 m = unpack_32_1x64 (mask); __m64 s = unpack_32_1x64 (src); __m64 d = unpack_32_1x64 (dst); - __m64 da = negate_1x64 (expandAlpha_1x64 (d)); - __m64 sa = expandAlpha_1x64 (s); + __m64 da = negate_1x64 (expand_alpha_1x64 (d)); + __m64 sa = expand_alpha_1x64 (s); - s = pixMultiply_1x64 (s, m); - m = pixMultiply_1x64 (m, sa); + s = pix_multiply_1x64 (s, m); + m = pix_multiply_1x64 (m, sa); - return pack_1x64_32 (pixAddMultiply_1x64 (&d, &m, &s, &da)); + return pack_1x64_32 (pix_add_multiply_1x64 (&d, &m, &s, &da)); } static force_inline void -coreCombineReverseAtopCsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w) +core_combine_reverse_atop_ca_sse2 (uint32_t * pd, + const uint32_t *ps, + const uint32_t *pm, + int w) { uint32_t s, m, d; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; - __m128i xmmAlphaSrcLo, xmmAlphaSrcHi; - __m128i xmmAlphaDstLo, xmmAlphaDstHi; - __m128i xmmMaskLo, xmmMaskHi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && (unsigned long)pd & 15) { - s = *ps++; - m = *pm++; - d = *pd; + s = *ps++; + m = *pm++; + d = *pd; - *pd++ = coreCombineReverseAtopCPixelsse2 (s, m, d); - w--; + *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d); + w--; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); - - xmmDstHi = load128Aligned ((__m128i*)pd); - xmmSrcHi = load128Unaligned ((__m128i*)ps); - xmmMaskHi = load128Unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - - expandAlpha_2x128 (xmmSrcLo, xmmSrcHi, &xmmAlphaSrcLo, &xmmAlphaSrcHi); - expandAlpha_2x128 (xmmDstLo, xmmDstHi, &xmmAlphaDstLo, &xmmAlphaDstHi); - - pixMultiply_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmMaskLo, &xmmMaskHi, &xmmSrcLo, &xmmSrcHi); - pixMultiply_2x128 (&xmmMaskLo, &xmmMaskHi, &xmmAlphaSrcLo, &xmmAlphaSrcHi, &xmmMaskLo, &xmmMaskHi); - - negate_2x128 (xmmAlphaDstLo, xmmAlphaDstHi, &xmmAlphaDstLo, &xmmAlphaDstHi); - - pixAddMultiply_2x128 (&xmmDstLo, &xmmDstHi, &xmmMaskLo, &xmmMaskHi, - &xmmSrcLo, &xmmSrcHi, &xmmAlphaDstLo, &xmmAlphaDstHi, - &xmmDstLo, &xmmDstHi); - - save128Aligned( (__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); + + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi); + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_mask_lo, &xmm_mask_hi); + + negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_add_multiply_2x128 ( + &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; } while (w) { - s = *ps++; - m = *pm++; - d = *pd; + s = *ps++; + m = *pm++; + d = *pd; - *pd++ = coreCombineReverseAtopCPixelsse2 (s, m, d); - w--; + *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d); + w--; } } static force_inline uint32_t -coreCombineXorCPixelsse2 (uint32_t src, uint32_t mask, uint32_t dst) +core_combine_xor_ca_pixel_sse2 (uint32_t src, + uint32_t mask, + uint32_t dst) { __m64 a = unpack_32_1x64 (mask); __m64 s = unpack_32_1x64 (src); __m64 d = unpack_32_1x64 (dst); - __m64 alphaDst = negate_1x64 (pixMultiply_1x64 (a, expandAlpha_1x64 (s))); - __m64 dest = pixMultiply_1x64 (s, a); - __m64 alphaSrc = negate_1x64 (expandAlpha_1x64 (d)); + __m64 alpha_dst = negate_1x64 (pix_multiply_1x64 ( + a, expand_alpha_1x64 (s))); + __m64 dest = pix_multiply_1x64 (s, a); + __m64 alpha_src = negate_1x64 (expand_alpha_1x64 (d)); - return pack_1x64_32 (pixAddMultiply_1x64 (&d, - &alphaDst, - &dest, - &alphaSrc)); + return pack_1x64_32 (pix_add_multiply_1x64 (&d, + &alpha_dst, + &dest, + &alpha_src)); } static force_inline void -coreCombineXorCsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w) +core_combine_xor_ca_sse2 (uint32_t * pd, + const uint32_t *ps, + const uint32_t *pm, + int w) { uint32_t s, m, d; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; - __m128i xmmAlphaSrcLo, xmmAlphaSrcHi; - __m128i xmmAlphaDstLo, xmmAlphaDstHi; - __m128i xmmMaskLo, xmmMaskHi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; + __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && (unsigned long)pd & 15) { - s = *ps++; - m = *pm++; - d = *pd; + s = *ps++; + m = *pm++; + d = *pd; - *pd++ = coreCombineXorCPixelsse2 (s, m, d); - w--; + *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d); + w--; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); - - xmmDstHi = load128Aligned ((__m128i*)pd); - xmmSrcHi = load128Unaligned ((__m128i*)ps); - xmmMaskHi = load128Unaligned ((__m128i*)pm); - - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - - expandAlpha_2x128 (xmmSrcLo, xmmSrcHi, &xmmAlphaSrcLo, &xmmAlphaSrcHi); - expandAlpha_2x128 (xmmDstLo, xmmDstHi, &xmmAlphaDstLo, &xmmAlphaDstHi); - - pixMultiply_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmMaskLo, &xmmMaskHi, &xmmSrcLo, &xmmSrcHi); - pixMultiply_2x128 (&xmmMaskLo, &xmmMaskHi, &xmmAlphaSrcLo, &xmmAlphaSrcHi, &xmmMaskLo, &xmmMaskHi); - - negate_2x128 (xmmAlphaDstLo, xmmAlphaDstHi, &xmmAlphaDstLo, &xmmAlphaDstHi); - negate_2x128 (xmmMaskLo, xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - - pixAddMultiply_2x128 (&xmmDstLo, &xmmDstHi, &xmmMaskLo, &xmmMaskHi, - &xmmSrcLo, &xmmSrcHi, &xmmAlphaDstLo, &xmmAlphaDstHi, - &xmmDstLo, &xmmDstHi); - - save128Aligned( (__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); + + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi); + expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi); + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_alpha_src_lo, &xmm_alpha_src_hi, + &xmm_mask_lo, &xmm_mask_hi); + + negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, + &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); + negate_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + pix_add_multiply_2x128 ( + &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; } while (w) { - s = *ps++; - m = *pm++; - d = *pd; + s = *ps++; + m = *pm++; + d = *pd; - *pd++ = coreCombineXorCPixelsse2 (s, m, d); - w--; + *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d); + w--; } } static force_inline void -coreCombineAddCsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w) +core_combine_add_ca_sse2 (uint32_t * pd, + const uint32_t *ps, + const uint32_t *pm, + int w) { uint32_t s, m, d; - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; - __m128i xmmMaskLo, xmmMaskHi; + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask_lo, xmm_mask_hi; /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w && (unsigned long)pd & 15) { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x64_32 (_mm_adds_pu8 (pixMultiply_1x64 (unpack_32_1x64 (s), - unpack_32_1x64 (m)), - unpack_32_1x64 (d))); - w--; + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x64_32 ( + _mm_adds_pu8 (pix_multiply_1x64 (unpack_32_1x64 (s), + unpack_32_1x64 (m)), + unpack_32_1x64 (d))); + w--; } /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)ps); - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); + cache_prefetch ((__m128i*)ps); + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); while (w >= 4) { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)ps); - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); - - xmmSrcHi = load128Unaligned ((__m128i*)ps); - xmmMaskHi = load128Unaligned ((__m128i*)pm); - xmmDstHi = load128Aligned ((__m128i*)pd); - - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); - - pixMultiply_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmMaskLo, &xmmMaskHi, &xmmSrcLo, &xmmSrcHi); - - save128Aligned( (__m128i*)pd, pack_2x128_128 (_mm_adds_epu8 (xmmSrcLo, xmmDstLo), - _mm_adds_epu8 (xmmSrcHi, xmmDstHi))); - - ps += 4; - pd += 4; - pm += 4; - w -= 4; + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)ps); + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); + + xmm_src_hi = load_128_unaligned ((__m128i*)ps); + xmm_mask_hi = load_128_unaligned ((__m128i*)pm); + xmm_dst_hi = load_128_aligned ((__m128i*)pd); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_src_lo, &xmm_src_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 ( + _mm_adds_epu8 (xmm_src_lo, xmm_dst_lo), + _mm_adds_epu8 (xmm_src_hi, xmm_dst_hi))); + + ps += 4; + pd += 4; + pm += 4; + w -= 4; } while (w) { - s = *ps++; - m = *pm++; - d = *pd; - - *pd++ = pack_1x64_32 (_mm_adds_pu8 (pixMultiply_1x64 (unpack_32_1x64 (s), - unpack_32_1x64 (m)), - unpack_32_1x64 (d))); - w--; + s = *ps++; + m = *pm++; + d = *pd; + + *pd++ = pack_1x64_32 ( + _mm_adds_pu8 (pix_multiply_1x64 (unpack_32_1x64 (s), + unpack_32_1x64 (m)), + unpack_32_1x64 (d))); + w--; } } -/* ------------------------------------------------------------------------------------------------- - * fbComposeSetupSSE2 +/* --------------------------------------------------- + * fb_compose_setup_sSE2 */ static force_inline __m64 -createMask_16_64 (uint16_t mask) +create_mask_16_64 (uint16_t mask) { return _mm_set1_pi16 (mask); } static force_inline __m128i -createMask_16_128 (uint16_t mask) +create_mask_16_128 (uint16_t mask) { return _mm_set1_epi16 (mask); } static force_inline __m64 -createMask_2x32_64 (uint32_t mask0, uint32_t mask1) +create_mask_2x32_64 (uint32_t mask0, + uint32_t mask1) { return _mm_set_pi32 (mask0, mask1); } +/* Work around a code generation bug in Sun Studio 12. */ +#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590) +# define create_mask_2x32_128(mask0, mask1) \ + (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1))) +#else static force_inline __m128i -createMask_2x32_128 (uint32_t mask0, uint32_t mask1) +create_mask_2x32_128 (uint32_t mask0, + uint32_t mask1) { return _mm_set_epi32 (mask0, mask1, mask0, mask1); } +#endif /* SSE2 code patch for fbcompose.c */ -static FASTCALL void -sse2CombineOverU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineOverUsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_over_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_over_u_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineOverReverseU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineOverReverseUsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_over_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_over_reverse_u_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineInU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineInUsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_in_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_in_u_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineInReverseU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineReverseInUsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_in_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_reverse_in_u_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineOutU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineOutUsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_out_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_out_u_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineOutReverseU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineReverseOutUsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_out_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_reverse_out_u_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineAtopU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineAtopUsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_atop_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_atop_u_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineAtopReverseU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineReverseAtopUsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_atop_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_reverse_atop_u_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineXorU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineXorUsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_xor_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_xor_u_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineAddU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineAddUsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_add_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_add_u_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineSaturateU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineSaturateUsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_saturate_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_saturate_u_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineSrcC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineSrcCsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_src_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_src_ca_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineOverC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineOverCsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_over_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_over_ca_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineOverReverseC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineOverReverseCsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_over_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_over_reverse_ca_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineInC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineInCsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_in_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_in_ca_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineInReverseC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineInReverseCsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_in_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_in_reverse_ca_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineOutC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineOutCsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_out_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_out_ca_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineOutReverseC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineOutReverseCsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_out_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_out_reverse_ca_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineAtopC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineAtopCsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_atop_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_atop_ca_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineAtopReverseC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineReverseAtopCsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_atop_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_reverse_atop_ca_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineXorC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineXorCsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_xor_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_xor_ca_sse2 (dst, src, mask, width); + _mm_empty (); } -static FASTCALL void -sse2CombineAddC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width) -{ - coreCombineAddCsse2 (dst, src, mask, width); - _mm_empty(); +static void +sse2_combine_add_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dst, + const uint32_t * src, + const uint32_t * mask, + int width) +{ + core_combine_add_ca_sse2 (dst, src, mask, width); + _mm_empty (); } -/* ------------------------------------------------------------------------------------------------- - * fbCompositeSolid_nx8888 +/* ------------------------------------------------------------------- + * composite_over_n_8888 */ static void -fbCompositeSolid_nx8888sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t src; - uint32_t *dstLine, *dst, d; - uint16_t w; - int dstStride; - __m128i xmmSrc, xmmAlpha; - __m128i xmmDst, xmmDstLo, xmmDstHi; - - fbComposeGetSolid(pSrc, src, pDst->bits.format); +sse2_composite_over_n_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src; + uint32_t *dst_line, *dst, d; + uint16_t w; + int dst_stride; + __m128i xmm_src, xmm_alpha; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); if (src == 0) return; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - xmmSrc = expandPixel_32_1x128 (src); - xmmAlpha = expandAlpha_1x128 (xmmSrc); + xmm_src = expand_pixel_32_1x128 (src); + xmm_alpha = expand_alpha_1x128 (xmm_src); while (height--) { - dst = dstLine; + dst = dst_line; - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)dst); + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)dst); - dstLine += dstStride; - w = width; + dst_line += dst_stride; + w = width; - while (w && (unsigned long)dst & 15) - { - d = *dst; - *dst++ = pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmmSrc), - _mm_movepi64_pi64 (xmmAlpha), - unpack_32_1x64 (d))); - w--; - } + while (w && (unsigned long)dst & 15) + { + d = *dst; + *dst++ = pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src), + _mm_movepi64_pi64 (xmm_alpha), + unpack_32_1x64 (d))); + w--; + } - cachePrefetch ((__m128i*)dst); + cache_prefetch ((__m128i*)dst); - while (w >= 4) - { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)dst); + while (w >= 4) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)dst); - xmmDst = load128Aligned ((__m128i*)dst); + xmm_dst = load_128_aligned ((__m128i*)dst); - unpack_128_2x128 (xmmDst, &xmmDstLo, &xmmDstHi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); - over_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmDstLo, &xmmDstHi); + over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_dst_lo, &xmm_dst_hi); - /* rebuid the 4 pixel data and save*/ - save128Aligned ((__m128i*)dst, pack_2x128_128 (xmmDstLo, xmmDstHi)); + /* rebuid the 4 pixel data and save*/ + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); - w -= 4; - dst += 4; - } + w -= 4; + dst += 4; + } - while (w) - { - d = *dst; - *dst++ = pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmmSrc), - _mm_movepi64_pi64 (xmmAlpha), - unpack_32_1x64 (d))); - w--; - } + while (w) + { + d = *dst; + *dst++ = pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src), + _mm_movepi64_pi64 (xmm_alpha), + unpack_32_1x64 (d))); + w--; + } } - _mm_empty(); + _mm_empty (); } -/* ------------------------------------------------------------------------------------------------- - * fbCompositeSolid_nx0565 +/* --------------------------------------------------------------------- + * composite_over_n_0565 */ static void -fbCompositeSolid_nx0565sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t src; - uint16_t *dstLine, *dst, d; - uint16_t w; - int dstStride; - __m128i xmmSrc, xmmAlpha; - __m128i xmmDst, xmmDst0, xmmDst1, xmmDst2, xmmDst3; - - fbComposeGetSolid(pSrc, src, pDst->bits.format); +sse2_composite_over_n_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src; + uint16_t *dst_line, *dst, d; + uint16_t w; + int dst_stride; + __m128i xmm_src, xmm_alpha; + __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); if (src == 0) - return; + return; - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - xmmSrc = expandPixel_32_1x128 (src); - xmmAlpha = expandAlpha_1x128 (xmmSrc); + xmm_src = expand_pixel_32_1x128 (src); + xmm_alpha = expand_alpha_1x128 (xmm_src); while (height--) { - dst = dstLine; - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)dst); - - dstLine += dstStride; - w = width; - - while (w && (unsigned long)dst & 15) - { - d = *dst; - - *dst++ = pack565_32_16 (pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmmSrc), - _mm_movepi64_pi64 (xmmAlpha), - expand565_16_1x64 (d)))); - w--; - } - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)dst); - - while (w >= 8) - { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)dst); - - xmmDst = load128Aligned ((__m128i*)dst); - - unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3); - - over_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmDst0, &xmmDst1); - over_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmDst2, &xmmDst3); - - xmmDst = pack565_4x128_128 (&xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3); - save128Aligned ((__m128i*)dst, xmmDst); - - dst += 8; - w -= 8; - } - - while (w--) - { - d = *dst; - *dst++ = pack565_32_16 (pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmmSrc), - _mm_movepi64_pi64 (xmmAlpha), - expand565_16_1x64 (d)))); - } + dst = dst_line; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)dst); + + dst_line += dst_stride; + w = width; + + while (w && (unsigned long)dst & 15) + { + d = *dst; + + *dst++ = pack_565_32_16 ( + pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src), + _mm_movepi64_pi64 (xmm_alpha), + expand565_16_1x64 (d)))); + w--; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)dst); + + while (w >= 8) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)dst); + + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_565_128_4x128 (xmm_dst, + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + + over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_dst0, &xmm_dst1); + over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_dst2, &xmm_dst3); + + xmm_dst = pack_565_4x128_128 ( + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + + save_128_aligned ((__m128i*)dst, xmm_dst); + + dst += 8; + w -= 8; + } + + while (w--) + { + d = *dst; + *dst++ = pack_565_32_16 ( + pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src), + _mm_movepi64_pi64 (xmm_alpha), + expand565_16_1x64 (d)))); + } } - _mm_empty(); + _mm_empty (); } -/* ------------------------------------------------------------------------------------------------- - * fbCompositeSolidMask_nx8888x8888C +/* ------------------------------ + * composite_add_n_8888_8888_ca */ - static void -fbCompositeSolidMask_nx8888x8888Csse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t src; - uint32_t *dstLine, d; - uint32_t *maskLine, m; - uint32_t packCmp; - int dstStride, maskStride; - - __m128i xmmSrc, xmmAlpha; - __m128i xmmDst, xmmDstLo, xmmDstHi; - __m128i xmmMask, xmmMaskLo, xmmMaskHi; - - __m64 mmxSrc, mmxAlpha, mmxMask, mmxDst; - - fbComposeGetSolid(pSrc, src, pDst->bits.format); - +sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint32_t *dst_line, d; + uint32_t *mask_line, m; + uint32_t pack_cmp; + int dst_stride, mask_stride; + + __m128i xmm_src, xmm_alpha; + __m128i xmm_dst; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest; + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); + srca = src >> 24; + if (src == 0) return; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint32_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - xmmSrc = _mm_unpacklo_epi8 (createMask_2x32_128 (src, src), _mm_setzero_si128 ()); - xmmAlpha = expandAlpha_1x128 (xmmSrc); - mmxSrc = _mm_movepi64_pi64 (xmmSrc); - mmxAlpha = _mm_movepi64_pi64 (xmmAlpha); + xmm_src = _mm_unpacklo_epi8 ( + create_mask_2x32_128 (src, src), _mm_setzero_si128 ()); + xmm_alpha = expand_alpha_1x128 (xmm_src); + mmx_src = _mm_movepi64_pi64 (xmm_src); + mmx_alpha = _mm_movepi64_pi64 (xmm_alpha); while (height--) { - int w = width; - const uint32_t *pm = (uint32_t *)maskLine; - uint32_t *pd = (uint32_t *)dstLine; - - dstLine += dstStride; - maskLine += maskStride; - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); - - while (w && (unsigned long)pd & 15) - { - m = *pm++; - - if (m) - { - d = *pd; - mmxMask = unpack_32_1x64 (m); - mmxDst = unpack_32_1x64 (d); - - *pd = pack_1x64_32 (inOver_1x64 (&mmxSrc, - &mmxAlpha, - &mmxMask, - &mmxDst)); - } - - pd++; - w--; - } - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)pd); - cachePrefetch ((__m128i*)pm); - - while (w >= 4) - { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)pd); - cachePrefetchNext ((__m128i*)pm); - - xmmMask = load128Unaligned ((__m128i*)pm); - - packCmp = _mm_movemask_epi8 (_mm_cmpeq_epi32 (xmmMask, _mm_setzero_si128())); - - /* if all bits in mask are zero, packCmp are equal to 0xffff */ - if (packCmp != 0xffff) - { - xmmDst = load128Aligned ((__m128i*)pd); - - unpack_128_2x128 (xmmMask, &xmmMaskLo, &xmmMaskHi); - unpack_128_2x128 (xmmDst, &xmmDstLo, &xmmDstHi); - - inOver_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmMaskLo, &xmmMaskHi, &xmmDstLo, &xmmDstHi); - - save128Aligned ((__m128i*)pd, pack_2x128_128 (xmmDstLo, xmmDstHi)); - } - - pd += 4; - pm += 4; - w -= 4; - } - - while (w) - { - m = *pm++; - - if (m) - { - d = *pd; - mmxMask = unpack_32_1x64 (m); - mmxDst = unpack_32_1x64 (d); - - *pd = pack_1x64_32 (inOver_1x64 (&mmxSrc, - &mmxAlpha, - &mmxMask, - &mmxDst)); - } - - pd++; - w--; - } + int w = width; + const uint32_t *pm = (uint32_t *)mask_line; + uint32_t *pd = (uint32_t *)dst_line; + + dst_line += dst_stride; + mask_line += mask_stride; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); + + while (w && (unsigned long)pd & 15) + { + m = *pm++; + + if (m) + { + d = *pd; + + mmx_mask = unpack_32_1x64 (m); + mmx_dest = unpack_32_1x64 (d); + + *pd = pack_1x64_32 ( + _mm_adds_pu8 (pix_multiply_1x64 (mmx_mask, mmx_src), mmx_dest)); + } + + pd++; + w--; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); + + while (w >= 4) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); + + xmm_mask = load_128_unaligned ((__m128i*)pm); + + pack_cmp = + _mm_movemask_epi8 ( + _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); + + /* if all bits in mask are zero, pack_cmp are equal to 0xffff */ + if (pack_cmp != 0xffff) + { + xmm_dst = load_128_aligned ((__m128i*)pd); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_src, &xmm_src, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + xmm_mask_hi = pack_2x128_128 (xmm_mask_lo, xmm_mask_hi); + + save_128_aligned ( + (__m128i*)pd, _mm_adds_epu8 (xmm_mask_hi, xmm_dst)); + } + + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + m = *pm++; + + if (m) + { + d = *pd; + + mmx_mask = unpack_32_1x64 (m); + mmx_dest = unpack_32_1x64 (d); + + *pd = pack_1x64_32 ( + _mm_adds_pu8 (pix_multiply_1x64 (mmx_mask, mmx_src), mmx_dest)); + } + + pd++; + w--; + } } - _mm_empty(); + _mm_empty (); } - -/* ------------------------------------------------------------------------------------------------- - * fbCompositeSrc_8888x8x8888 +/* --------------------------------------------------------------------------- + * composite_over_n_8888_8888_ca */ static void -fbCompositeSrc_8888x8x8888sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - uint32_t mask; - uint16_t w; - int dstStride, srcStride; - - __m128i xmmMask; - __m128i xmmSrc, xmmSrcLo, xmmSrcHi; - __m128i xmmDst, xmmDstLo, xmmDstHi; - __m128i xmmAlphaLo, xmmAlphaHi; - - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); - fbComposeGetSolid (pMask, mask, pDst->bits.format); - - xmmMask = createMask_16_128 (mask >> 24); - - while (height--) - { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)dst); - cachePrefetch ((__m128i*)src); - - while (w && (unsigned long)dst & 15) - { - uint32_t s = *src++; - uint32_t d = *dst; - - __m64 ms = unpack_32_1x64 (s); - __m64 alpha = expandAlpha_1x64 (ms); - __m64 dest = _mm_movepi64_pi64 (xmmMask); - __m64 alphaDst = unpack_32_1x64 (d); - - *dst++ = pack_1x64_32 (inOver_1x64 (&ms, - &alpha, - &dest, - &alphaDst)); +sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src; + uint32_t *dst_line, d; + uint32_t *mask_line, m; + uint32_t pack_cmp; + int dst_stride, mask_stride; + + __m128i xmm_src, xmm_alpha; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest; + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); - w--; - } + if (src == 0) + return; - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)dst); - cachePrefetch ((__m128i*)src); + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - while (w >= 4) - { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)dst); - cachePrefetchNext ((__m128i*)src); + xmm_src = _mm_unpacklo_epi8 ( + create_mask_2x32_128 (src, src), _mm_setzero_si128 ()); + xmm_alpha = expand_alpha_1x128 (xmm_src); + mmx_src = _mm_movepi64_pi64 (xmm_src); + mmx_alpha = _mm_movepi64_pi64 (xmm_alpha); - xmmSrc = load128Unaligned ((__m128i*)src); - xmmDst = load128Aligned ((__m128i*)dst); + while (height--) + { + int w = width; + const uint32_t *pm = (uint32_t *)mask_line; + uint32_t *pd = (uint32_t *)dst_line; + + dst_line += dst_stride; + mask_line += mask_stride; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); + + while (w && (unsigned long)pd & 15) + { + m = *pm++; + + if (m) + { + d = *pd; + mmx_mask = unpack_32_1x64 (m); + mmx_dest = unpack_32_1x64 (d); + + *pd = pack_1x64_32 (in_over_1x64 (&mmx_src, + &mmx_alpha, + &mmx_mask, + &mmx_dest)); + } + + pd++; + w--; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)pd); + cache_prefetch ((__m128i*)pm); + + while (w >= 4) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)pd); + cache_prefetch_next ((__m128i*)pm); + + xmm_mask = load_128_unaligned ((__m128i*)pm); + + pack_cmp = + _mm_movemask_epi8 ( + _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); + + /* if all bits in mask are zero, pack_cmp are equal to 0xffff */ + if (pack_cmp != 0xffff) + { + xmm_dst = load_128_aligned ((__m128i*)pd); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + pd += 4; + pm += 4; + w -= 4; + } + + while (w) + { + m = *pm++; + + if (m) + { + d = *pd; + mmx_mask = unpack_32_1x64 (m); + mmx_dest = unpack_32_1x64 (d); + + *pd = pack_1x64_32 ( + in_over_1x64 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)); + } + + pd++; + w--; + } + } - unpack_128_2x128 (xmmSrc, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmDst, &xmmDstLo, &xmmDstHi); - expandAlpha_2x128 (xmmSrcLo, xmmSrcHi, &xmmAlphaLo, &xmmAlphaHi); + _mm_empty (); +} - inOver_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmAlphaLo, &xmmAlphaHi, &xmmMask, &xmmMask, &xmmDstLo, &xmmDstHi); +/*--------------------------------------------------------------------- + * composite_over_8888_n_8888 + */ - save128Aligned( (__m128i*)dst, pack_2x128_128 (xmmDstLo, xmmDstHi)); +static void +sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + uint32_t mask; + uint16_t w; + int dst_stride, src_stride; - dst += 4; - src += 4; - w -= 4; - } + __m128i xmm_mask; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; - while (w) - { - uint32_t s = *src++; - uint32_t d = *dst; + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - __m64 ms = unpack_32_1x64 (s); - __m64 alpha = expandAlpha_1x64 (ms); - __m64 mask = _mm_movepi64_pi64 (xmmMask); - __m64 dest = unpack_32_1x64 (d); + mask = _pixman_image_get_solid (mask_image, dst_image->bits.format); - *dst++ = pack_1x64_32 (inOver_1x64 (&ms, - &alpha, - &mask, - &dest)); + xmm_mask = create_mask_16_128 (mask >> 24); - w--; - } + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)dst); + cache_prefetch ((__m128i*)src); + + while (w && (unsigned long)dst & 15) + { + uint32_t s = *src++; + uint32_t d = *dst; + + __m64 ms = unpack_32_1x64 (s); + __m64 alpha = expand_alpha_1x64 (ms); + __m64 dest = _mm_movepi64_pi64 (xmm_mask); + __m64 alpha_dst = unpack_32_1x64 (d); + + *dst++ = pack_1x64_32 ( + in_over_1x64 (&ms, &alpha, &dest, &alpha_dst)); + + w--; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)dst); + cache_prefetch ((__m128i*)src); + + while (w >= 4) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)dst); + cache_prefetch_next ((__m128i*)src); + + xmm_src = load_128_unaligned ((__m128i*)src); + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask, &xmm_mask, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + dst += 4; + src += 4; + w -= 4; + } + + while (w) + { + uint32_t s = *src++; + uint32_t d = *dst; + + __m64 ms = unpack_32_1x64 (s); + __m64 alpha = expand_alpha_1x64 (ms); + __m64 mask = _mm_movepi64_pi64 (xmm_mask); + __m64 dest = unpack_32_1x64 (d); + + *dst++ = pack_1x64_32 ( + in_over_1x64 (&ms, &alpha, &mask, &dest)); + + w--; + } } - _mm_empty(); + _mm_empty (); } -/* ------------------------------------------------------------------------------------------------- - * fbCompositeSrc_x888xnx8888 +/* --------------------------------------------------------------------- + * composite_over_x888_n_8888 */ static void -fbCompositeSrc_x888xnx8888sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - uint32_t mask; - int dstStride, srcStride; - uint16_t w; - - __m128i xmmMask, xmmAlpha; - __m128i xmmSrc, xmmSrcLo, xmmSrcHi; - __m128i xmmDst, xmmDstLo, xmmDstHi; - - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); - fbComposeGetSolid (pMask, mask, pDst->bits.format); - - xmmMask = createMask_16_128 (mask >> 24); - xmmAlpha = Mask00ff; - - while (height--) - { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)dst); - cachePrefetch ((__m128i*)src); - - while (w && (unsigned long)dst & 15) - { - uint32_t s = (*src++) | 0xff000000; - uint32_t d = *dst; - - __m64 src = unpack_32_1x64 (s); - __m64 alpha = _mm_movepi64_pi64 (xmmAlpha); - __m64 mask = _mm_movepi64_pi64 (xmmMask); - __m64 dest = unpack_32_1x64 (d); - - *dst++ = pack_1x64_32 (inOver_1x64 (&src, - &alpha, - &mask, - &dest)); - - w--; - } - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)dst); - cachePrefetch ((__m128i*)src); - - while (w >= 4) - { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)dst); - cachePrefetchNext ((__m128i*)src); - - xmmSrc = _mm_or_si128 (load128Unaligned ((__m128i*)src), Maskff000000); - xmmDst = load128Aligned ((__m128i*)dst); - - unpack_128_2x128 (xmmSrc, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmDst, &xmmDstLo, &xmmDstHi); - - inOver_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmAlpha, &xmmAlpha, &xmmMask, &xmmMask, &xmmDstLo, &xmmDstHi); - - save128Aligned( (__m128i*)dst, pack_2x128_128 (xmmDstLo, xmmDstHi)); - - dst += 4; - src += 4; - w -= 4; +sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + uint32_t mask; + int dst_stride, src_stride; + uint16_t w; - } + __m128i xmm_mask, xmm_alpha; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - while (w) - { - uint32_t s = (*src++) | 0xff000000; - uint32_t d = *dst; + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - __m64 src = unpack_32_1x64 (s); - __m64 alpha = _mm_movepi64_pi64 (xmmAlpha); - __m64 mask = _mm_movepi64_pi64 (xmmMask); - __m64 dest = unpack_32_1x64 (d); + mask = _pixman_image_get_solid (mask_image, dst_image->bits.format); - *dst++ = pack_1x64_32 (inOver_1x64 (&src, - &alpha, - &mask, - &dest)); + xmm_mask = create_mask_16_128 (mask >> 24); + xmm_alpha = mask_00ff; - w--; - } + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)dst); + cache_prefetch ((__m128i*)src); + + while (w && (unsigned long)dst & 15) + { + uint32_t s = (*src++) | 0xff000000; + uint32_t d = *dst; + + __m64 src = unpack_32_1x64 (s); + __m64 alpha = _mm_movepi64_pi64 (xmm_alpha); + __m64 mask = _mm_movepi64_pi64 (xmm_mask); + __m64 dest = unpack_32_1x64 (d); + + *dst++ = pack_1x64_32 ( + in_over_1x64 (&src, &alpha, &mask, &dest)); + + w--; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)dst); + cache_prefetch ((__m128i*)src); + + while (w >= 4) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)dst); + cache_prefetch_next ((__m128i*)src); + + xmm_src = _mm_or_si128 ( + load_128_unaligned ((__m128i*)src), mask_ff000000); + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha, &xmm_alpha, + &xmm_mask, &xmm_mask, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + dst += 4; + src += 4; + w -= 4; + + } + + while (w) + { + uint32_t s = (*src++) | 0xff000000; + uint32_t d = *dst; + + __m64 src = unpack_32_1x64 (s); + __m64 alpha = _mm_movepi64_pi64 (xmm_alpha); + __m64 mask = _mm_movepi64_pi64 (xmm_mask); + __m64 dest = unpack_32_1x64 (d); + + *dst++ = pack_1x64_32 ( + in_over_1x64 (&src, &alpha, &mask, &dest)); + + w--; + } } - _mm_empty(); + _mm_empty (); } -/* ------------------------------------------------------------------------------------------------- - * fbCompositeSrc_8888x8888 +/* -------------------------------------------------------------------- + * composite_over_8888_8888 */ static void -fbCompositeSrc_8888x8888sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - int dstStride, srcStride; - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); - - dst = dstLine; - src = srcLine; +sse2_composite_over_8888_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + int dst_stride, src_stride; + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + dst = dst_line; + src = src_line; while (height--) { - coreCombineOverUsse2 (dst, src, NULL, width); + core_combine_over_u_sse2 (dst, src, NULL, width); - dst += dstStride; - src += srcStride; + dst += dst_stride; + src += src_stride; } - _mm_empty(); + _mm_empty (); } -/* ------------------------------------------------------------------------------------------------- - * fbCompositeSrc_8888x0565 +/* ------------------------------------------------------------------ + * composite_over_8888_0565 */ static force_inline uint16_t -fbCompositeSrc_8888x0565pixel (uint32_t src, uint16_t dst) +composite_over_8888_0565pixel (uint32_t src, uint16_t dst) { - __m64 ms; + __m64 ms; ms = unpack_32_1x64 (src); - return pack565_32_16( pack_1x64_32 (over_1x64 (ms, - expandAlpha_1x64 (ms), - expand565_16_1x64 (dst)))); + return pack_565_32_16 ( + pack_1x64_32 ( + over_1x64 ( + ms, expand_alpha_1x64 (ms), expand565_16_1x64 (dst)))); } static void -fbCompositeSrc_8888x0565sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint16_t *dstLine, *dst, d; - uint32_t *srcLine, *src, s; - int dstStride, srcStride; - uint16_t w; - - __m128i xmmAlphaLo, xmmAlphaHi; - __m128i xmmSrc, xmmSrcLo, xmmSrcHi; - __m128i xmmDst, xmmDst0, xmmDst1, xmmDst2, xmmDst3; - - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); +sse2_composite_over_8888_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint16_t *dst_line, *dst, d; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + uint16_t w; + + __m128i xmm_alpha_lo, xmm_alpha_hi; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; + + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); #if 0 /* FIXME @@ -3114,242 +3702,262 @@ fbCompositeSrc_8888x0565sse2 (pixman_implementation_t *imp, * I copy the code from MMX one and keep the fixme. * If it's a problem there, probably is a problem here. */ - assert (pSrc->pDrawable == pMask->pDrawable); + assert (src_image->drawable == mask_image->drawable); #endif while (height--) { - dst = dstLine; - src = srcLine; - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)src); - cachePrefetch ((__m128i*)dst); - - dstLine += dstStride; - srcLine += srcStride; - w = width; - - /* Align dst on a 16-byte boundary */ - while (w && - ((unsigned long)dst & 15)) - { - s = *src++; - d = *dst; - - *dst++ = fbCompositeSrc_8888x0565pixel (s, d); - w--; - } - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)src); - cachePrefetch ((__m128i*)dst); - - /* It's a 8 pixel loop */ - while (w >= 8) - { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)src); - cachePrefetchNext ((__m128i*)dst); - - /* I'm loading unaligned because I'm not sure about the address alignment. */ - xmmSrc = load128Unaligned ((__m128i*) src); - xmmDst = load128Aligned ((__m128i*) dst); - - /* Unpacking */ - unpack_128_2x128 (xmmSrc, &xmmSrcLo, &xmmSrcHi); - unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3); - expandAlpha_2x128 (xmmSrcLo, xmmSrcHi, &xmmAlphaLo, &xmmAlphaHi); - - /* I'm loading next 4 pixels from memory before to optimze the memory read. */ - xmmSrc = load128Unaligned ((__m128i*) (src+4)); - - over_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmAlphaLo, &xmmAlphaHi, &xmmDst0, &xmmDst1); - - /* Unpacking */ - unpack_128_2x128 (xmmSrc, &xmmSrcLo, &xmmSrcHi); - expandAlpha_2x128 (xmmSrcLo, xmmSrcHi, &xmmAlphaLo, &xmmAlphaHi); - - over_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmAlphaLo, &xmmAlphaHi, &xmmDst2, &xmmDst3); - - save128Aligned ((__m128i*)dst, pack565_4x128_128 (&xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3)); - - w -= 8; - dst += 8; - src += 8; - } - - while (w--) - { - s = *src++; - d = *dst; - - *dst++ = fbCompositeSrc_8888x0565pixel (s, d); - } + dst = dst_line; + src = src_line; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)src); + cache_prefetch ((__m128i*)dst); + + dst_line += dst_stride; + src_line += src_stride; + w = width; + + /* Align dst on a 16-byte boundary */ + while (w && + ((unsigned long)dst & 15)) + { + s = *src++; + d = *dst; + + *dst++ = composite_over_8888_0565pixel (s, d); + w--; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)src); + cache_prefetch ((__m128i*)dst); + + /* It's a 8 pixel loop */ + while (w >= 8) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)src); + cache_prefetch_next ((__m128i*)dst); + + /* I'm loading unaligned because I'm not sure + * about the address alignment. + */ + xmm_src = load_128_unaligned ((__m128i*) src); + xmm_dst = load_128_aligned ((__m128i*) dst); + + /* Unpacking */ + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_565_128_4x128 (xmm_dst, + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + /* I'm loading next 4 pixels from memory + * before to optimze the memory read. + */ + xmm_src = load_128_unaligned ((__m128i*) (src + 4)); + + over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst0, &xmm_dst1); + + /* Unpacking */ + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_dst2, &xmm_dst3); + + save_128_aligned ( + (__m128i*)dst, pack_565_4x128_128 ( + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); + + w -= 8; + dst += 8; + src += 8; + } + + while (w--) + { + s = *src++; + d = *dst; + + *dst++ = composite_over_8888_0565pixel (s, d); + } } - _mm_empty(); + _mm_empty (); } -/* ------------------------------------------------------------------------------------------------- - * fbCompositeSolidMask_nx8x8888 +/* ----------------------------------------------------------------- + * composite_over_n_8_8888 */ static void -fbCompositeSolidMask_nx8x8888sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t src, srca; - uint32_t *dstLine, *dst; - uint8_t *maskLine, *mask; - int dstStride, maskStride; - uint16_t w; +sse2_composite_over_n_8_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint16_t w; uint32_t m, d; - __m128i xmmSrc, xmmAlpha, xmmDef; - __m128i xmmDst, xmmDstLo, xmmDstHi; - __m128i xmmMask, xmmMaskLo, xmmMaskHi; + __m128i xmm_src, xmm_alpha, xmm_def; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; - __m64 mmxSrc, mmxAlpha, mmxMask, mmxDest; + __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest; - fbComposeGetSolid(pSrc, src, pDst->bits.format); + src = _pixman_image_get_solid (src_image, dst_image->bits.format); srca = src >> 24; if (src == 0) return; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - xmmDef = createMask_2x32_128 (src, src); - xmmSrc = expandPixel_32_1x128 (src); - xmmAlpha = expandAlpha_1x128 (xmmSrc); - mmxSrc = _mm_movepi64_pi64 (xmmSrc); - mmxAlpha = _mm_movepi64_pi64 (xmmAlpha); + xmm_def = create_mask_2x32_128 (src, src); + xmm_src = expand_pixel_32_1x128 (src); + xmm_alpha = expand_alpha_1x128 (xmm_src); + mmx_src = _mm_movepi64_pi64 (xmm_src); + mmx_alpha = _mm_movepi64_pi64 (xmm_alpha); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; - w = width; - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)mask); - cachePrefetch ((__m128i*)dst); - - while (w && (unsigned long)dst & 15) - { - uint8_t m = *mask++; - - if (m) - { - d = *dst; - mmxMask = expandPixel_8_1x64 (m); - mmxDest = unpack_32_1x64 (d); - - *dst = pack_1x64_32 (inOver_1x64 (&mmxSrc, - &mmxAlpha, - &mmxMask, - &mmxDest)); - } - - w--; - dst++; - } - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)mask); - cachePrefetch ((__m128i*)dst); - - while (w >= 4) - { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)mask); - cachePrefetchNext ((__m128i*)dst); - - m = *((uint32_t*)mask); - - if (srca == 0xff && m == 0xffffffff) - { - save128Aligned ((__m128i*)dst, xmmDef); - } - else if (m) - { - xmmDst = load128Aligned ((__m128i*) dst); - xmmMask = unpack_32_1x128 (m); - xmmMask = _mm_unpacklo_epi8 (xmmMask, _mm_setzero_si128()); - - /* Unpacking */ - unpack_128_2x128 (xmmDst, &xmmDstLo, &xmmDstHi); - unpack_128_2x128 (xmmMask, &xmmMaskLo, &xmmMaskHi); - - expandAlphaRev_2x128 (xmmMaskLo, xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - - inOver_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmMaskLo, &xmmMaskHi, &xmmDstLo, &xmmDstHi); - - save128Aligned ((__m128i*)dst, pack_2x128_128 (xmmDstLo, xmmDstHi)); - } - - w -= 4; - dst += 4; - mask += 4; - } - - while (w) - { - uint8_t m = *mask++; - - if (m) - { - d = *dst; - mmxMask = expandPixel_8_1x64 (m); - mmxDest = unpack_32_1x64 (d); - - *dst = pack_1x64_32 (inOver_1x64 (&mmxSrc, - &mmxAlpha, - &mmxMask, - &mmxDest)); - } - - w--; - dst++; - } + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)mask); + cache_prefetch ((__m128i*)dst); + + while (w && (unsigned long)dst & 15) + { + uint8_t m = *mask++; + + if (m) + { + d = *dst; + mmx_mask = expand_pixel_8_1x64 (m); + mmx_dest = unpack_32_1x64 (d); + + *dst = pack_1x64_32 (in_over_1x64 (&mmx_src, + &mmx_alpha, + &mmx_mask, + &mmx_dest)); + } + + w--; + dst++; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)mask); + cache_prefetch ((__m128i*)dst); + + while (w >= 4) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)mask); + cache_prefetch_next ((__m128i*)dst); + + m = *((uint32_t*)mask); + + if (srca == 0xff && m == 0xffffffff) + { + save_128_aligned ((__m128i*)dst, xmm_def); + } + else if (m) + { + xmm_dst = load_128_aligned ((__m128i*) dst); + xmm_mask = unpack_32_1x128 (m); + xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); + + /* Unpacking */ + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + w -= 4; + dst += 4; + mask += 4; + } + + while (w) + { + uint8_t m = *mask++; + + if (m) + { + d = *dst; + mmx_mask = expand_pixel_8_1x64 (m); + mmx_dest = unpack_32_1x64 (d); + + *dst = pack_1x64_32 (in_over_1x64 (&mmx_src, + &mmx_alpha, + &mmx_mask, + &mmx_dest)); + } + + w--; + dst++; + } } - _mm_empty(); + _mm_empty (); } -/* ------------------------------------------------------------------------------------------------- - * fbCompositeSolidMask_nx8x8888 +/* ---------------------------------------------------------------- + * composite_over_n_8_8888 */ pixman_bool_t -pixmanFillsse2 (uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t data) +pixman_fill_sse2 (uint32_t *bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t data) { - uint32_t byte_width; - uint8_t *byte_line; + uint32_t byte_width; + uint8_t *byte_line; - __m128i xmmDef; + __m128i xmm_def; if (bpp == 16 && (data >> 16 != (data & 0xffff))) return FALSE; @@ -3359,433 +3967,459 @@ pixmanFillsse2 (uint32_t *bits, if (bpp == 16) { - stride = stride * (int) sizeof (uint32_t) / 2; - byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); - byte_width = 2 * width; - stride *= 2; + stride = stride * (int) sizeof (uint32_t) / 2; + byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); + byte_width = 2 * width; + stride *= 2; } else { - stride = stride * (int) sizeof (uint32_t) / 4; - byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); - byte_width = 4 * width; - stride *= 4; + stride = stride * (int) sizeof (uint32_t) / 4; + byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); + byte_width = 4 * width; + stride *= 4; } - cachePrefetch ((__m128i*)byte_line); - xmmDef = createMask_2x32_128 (data, data); + cache_prefetch ((__m128i*)byte_line); + xmm_def = create_mask_2x32_128 (data, data); while (height--) { - int w; - uint8_t *d = byte_line; - byte_line += stride; - w = byte_width; - - - cachePrefetchNext ((__m128i*)d); - - while (w >= 2 && ((unsigned long)d & 3)) - { - *(uint16_t *)d = data; - w -= 2; - d += 2; - } - - while (w >= 4 && ((unsigned long)d & 15)) - { - *(uint32_t *)d = data; - - w -= 4; - d += 4; - } - - cachePrefetchNext ((__m128i*)d); - - while (w >= 128) - { - cachePrefetch (((__m128i*)d) + 12); - - save128Aligned ((__m128i*)(d), xmmDef); - save128Aligned ((__m128i*)(d+16), xmmDef); - save128Aligned ((__m128i*)(d+32), xmmDef); - save128Aligned ((__m128i*)(d+48), xmmDef); - save128Aligned ((__m128i*)(d+64), xmmDef); - save128Aligned ((__m128i*)(d+80), xmmDef); - save128Aligned ((__m128i*)(d+96), xmmDef); - save128Aligned ((__m128i*)(d+112), xmmDef); - - d += 128; - w -= 128; - } - - if (w >= 64) - { - cachePrefetch (((__m128i*)d) + 8); - - save128Aligned ((__m128i*)(d), xmmDef); - save128Aligned ((__m128i*)(d+16), xmmDef); - save128Aligned ((__m128i*)(d+32), xmmDef); - save128Aligned ((__m128i*)(d+48), xmmDef); - - d += 64; - w -= 64; - } - - cachePrefetchNext ((__m128i*)d); - - if (w >= 32) - { - save128Aligned ((__m128i*)(d), xmmDef); - save128Aligned ((__m128i*)(d+16), xmmDef); - - d += 32; - w -= 32; - } - - if (w >= 16) - { - save128Aligned ((__m128i*)(d), xmmDef); - - d += 16; - w -= 16; - } - - cachePrefetchNext ((__m128i*)d); - - while (w >= 4) - { - *(uint32_t *)d = data; - - w -= 4; - d += 4; - } - - if (w >= 2) - { - *(uint16_t *)d = data; - w -= 2; - d += 2; - } + int w; + uint8_t *d = byte_line; + byte_line += stride; + w = byte_width; + + + cache_prefetch_next ((__m128i*)d); + + while (w >= 2 && ((unsigned long)d & 3)) + { + *(uint16_t *)d = data; + w -= 2; + d += 2; + } + + while (w >= 4 && ((unsigned long)d & 15)) + { + *(uint32_t *)d = data; + + w -= 4; + d += 4; + } + + cache_prefetch_next ((__m128i*)d); + + while (w >= 128) + { + cache_prefetch (((__m128i*)d) + 12); + + save_128_aligned ((__m128i*)(d), xmm_def); + save_128_aligned ((__m128i*)(d + 16), xmm_def); + save_128_aligned ((__m128i*)(d + 32), xmm_def); + save_128_aligned ((__m128i*)(d + 48), xmm_def); + save_128_aligned ((__m128i*)(d + 64), xmm_def); + save_128_aligned ((__m128i*)(d + 80), xmm_def); + save_128_aligned ((__m128i*)(d + 96), xmm_def); + save_128_aligned ((__m128i*)(d + 112), xmm_def); + + d += 128; + w -= 128; + } + + if (w >= 64) + { + cache_prefetch (((__m128i*)d) + 8); + + save_128_aligned ((__m128i*)(d), xmm_def); + save_128_aligned ((__m128i*)(d + 16), xmm_def); + save_128_aligned ((__m128i*)(d + 32), xmm_def); + save_128_aligned ((__m128i*)(d + 48), xmm_def); + + d += 64; + w -= 64; + } + + cache_prefetch_next ((__m128i*)d); + + if (w >= 32) + { + save_128_aligned ((__m128i*)(d), xmm_def); + save_128_aligned ((__m128i*)(d + 16), xmm_def); + + d += 32; + w -= 32; + } + + if (w >= 16) + { + save_128_aligned ((__m128i*)(d), xmm_def); + + d += 16; + w -= 16; + } + + cache_prefetch_next ((__m128i*)d); + + while (w >= 4) + { + *(uint32_t *)d = data; + + w -= 4; + d += 4; + } + + if (w >= 2) + { + *(uint16_t *)d = data; + w -= 2; + d += 2; + } } - _mm_empty(); + _mm_empty (); return TRUE; } static void -fbCompositeSolidMaskSrc_nx8x8888sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t src, srca; - uint32_t *dstLine, *dst; - uint8_t *maskLine, *mask; - int dstStride, maskStride; - uint16_t w; - uint32_t m; - - __m128i xmmSrc, xmmDef; - __m128i xmmMask, xmmMaskLo, xmmMaskHi; - - fbComposeGetSolid(pSrc, src, pDst->bits.format); +sse2_composite_src_n_8_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint16_t w; + uint32_t m; + + __m128i xmm_src, xmm_def; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); srca = src >> 24; if (src == 0) { - pixmanFillsse2 (pDst->bits.bits, pDst->bits.rowstride, - PIXMAN_FORMAT_BPP (pDst->bits.format), - xDst, yDst, width, height, 0); - return; + pixman_fill_sse2 (dst_image->bits.bits, dst_image->bits.rowstride, + PIXMAN_FORMAT_BPP (dst_image->bits.format), + dest_x, dest_y, width, height, 0); + return; } - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - xmmDef = createMask_2x32_128 (src, src); - xmmSrc = expandPixel_32_1x128 (src); + xmm_def = create_mask_2x32_128 (src, src); + xmm_src = expand_pixel_32_1x128 (src); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; - w = width; - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)mask); - cachePrefetch ((__m128i*)dst); - - while (w && (unsigned long)dst & 15) - { - uint8_t m = *mask++; - - if (m) - { - *dst = pack_1x64_32 (pixMultiply_1x64 (_mm_movepi64_pi64 (xmmSrc), expandPixel_8_1x64 (m))); - } - else - { - *dst = 0; - } - - w--; - dst++; - } - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)mask); - cachePrefetch ((__m128i*)dst); - - while (w >= 4) - { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)mask); - cachePrefetchNext ((__m128i*)dst); - - m = *((uint32_t*)mask); - - if (srca == 0xff && m == 0xffffffff) - { - save128Aligned ((__m128i*)dst, xmmDef); - } - else if (m) - { - xmmMask = unpack_32_1x128 (m); - xmmMask = _mm_unpacklo_epi8 (xmmMask, _mm_setzero_si128()); - - /* Unpacking */ - unpack_128_2x128 (xmmMask, &xmmMaskLo, &xmmMaskHi); - - expandAlphaRev_2x128 (xmmMaskLo, xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - - pixMultiply_2x128 (&xmmSrc, &xmmSrc, &xmmMaskLo, &xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - - save128Aligned ((__m128i*)dst, pack_2x128_128 (xmmMaskLo, xmmMaskHi)); - } - else - { - save128Aligned ((__m128i*)dst, _mm_setzero_si128()); - } - - w -= 4; - dst += 4; - mask += 4; - } - - while (w) - { - uint8_t m = *mask++; - - if (m) - { - *dst = pack_1x64_32 (pixMultiply_1x64 (_mm_movepi64_pi64 (xmmSrc), expandPixel_8_1x64 (m))); - } - else - { - *dst = 0; - } - - w--; - dst++; - } + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)mask); + cache_prefetch ((__m128i*)dst); + + while (w && (unsigned long)dst & 15) + { + uint8_t m = *mask++; + + if (m) + { + *dst = pack_1x64_32 ( + pix_multiply_1x64 ( + _mm_movepi64_pi64 (xmm_src), expand_pixel_8_1x64 (m))); + } + else + { + *dst = 0; + } + + w--; + dst++; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)mask); + cache_prefetch ((__m128i*)dst); + + while (w >= 4) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)mask); + cache_prefetch_next ((__m128i*)dst); + + m = *((uint32_t*)mask); + + if (srca == 0xff && m == 0xffffffff) + { + save_128_aligned ((__m128i*)dst, xmm_def); + } + else if (m) + { + xmm_mask = unpack_32_1x128 (m); + xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); + + /* Unpacking */ + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_src, &xmm_src, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi)); + } + else + { + save_128_aligned ((__m128i*)dst, _mm_setzero_si128 ()); + } + + w -= 4; + dst += 4; + mask += 4; + } + + while (w) + { + uint8_t m = *mask++; + + if (m) + { + *dst = pack_1x64_32 ( + pix_multiply_1x64 ( + _mm_movepi64_pi64 (xmm_src), expand_pixel_8_1x64 (m))); + } + else + { + *dst = 0; + } + + w--; + dst++; + } } - _mm_empty(); + _mm_empty (); } -/* ------------------------------------------------------------------------------------------------- - * fbCompositeSolidMask_nx8x0565 +/*----------------------------------------------------------------------- + * composite_over_n_8_0565 */ static void -fbCompositeSolidMask_nx8x0565sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t src, srca; - uint16_t *dstLine, *dst, d; - uint8_t *maskLine, *mask; - int dstStride, maskStride; - uint16_t w; +sse2_composite_over_n_8_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint16_t *dst_line, *dst, d; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint16_t w; uint32_t m; - __m64 mmxSrc, mmxAlpha, mmxMask, mmxDest; + __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest; - __m128i xmmSrc, xmmAlpha; - __m128i xmmMask, xmmMaskLo, xmmMaskHi; - __m128i xmmDst, xmmDst0, xmmDst1, xmmDst2, xmmDst3; + __m128i xmm_src, xmm_alpha; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; - fbComposeGetSolid(pSrc, src, pDst->bits.format); + src = _pixman_image_get_solid (src_image, dst_image->bits.format); srca = src >> 24; if (src == 0) return; - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - xmmSrc = expandPixel_32_1x128 (src); - xmmAlpha = expandAlpha_1x128 (xmmSrc); - mmxSrc = _mm_movepi64_pi64 (xmmSrc); - mmxAlpha = _mm_movepi64_pi64 (xmmAlpha); + xmm_src = expand_pixel_32_1x128 (src); + xmm_alpha = expand_alpha_1x128 (xmm_src); + mmx_src = _mm_movepi64_pi64 (xmm_src); + mmx_alpha = _mm_movepi64_pi64 (xmm_alpha); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; - w = width; - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)mask); - cachePrefetch ((__m128i*)dst); - - while (w && (unsigned long)dst & 15) - { - m = *mask++; - - if (m) - { - d = *dst; - mmxMask = expandAlphaRev_1x64 (unpack_32_1x64 (m)); - mmxDest = expand565_16_1x64 (d); - - *dst = pack565_32_16 (pack_1x64_32 (inOver_1x64 (&mmxSrc, - &mmxAlpha, - &mmxMask, - &mmxDest))); - } - - w--; - dst++; - } - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)mask); - cachePrefetch ((__m128i*)dst); - - while (w >= 8) - { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)mask); - cachePrefetchNext ((__m128i*)dst); - - xmmDst = load128Aligned ((__m128i*) dst); - unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3); - - m = *((uint32_t*)mask); - mask += 4; - - if (m) - { - xmmMask = unpack_32_1x128 (m); - xmmMask = _mm_unpacklo_epi8 (xmmMask, _mm_setzero_si128()); - - /* Unpacking */ - unpack_128_2x128 (xmmMask, &xmmMaskLo, &xmmMaskHi); - - expandAlphaRev_2x128 (xmmMaskLo, xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - inOver_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmMaskLo, &xmmMaskHi, &xmmDst0, &xmmDst1); - } - - m = *((uint32_t*)mask); - mask += 4; - - if (m) - { - xmmMask = unpack_32_1x128 (m); - xmmMask = _mm_unpacklo_epi8 (xmmMask, _mm_setzero_si128()); - - /* Unpacking */ - unpack_128_2x128 (xmmMask, &xmmMaskLo, &xmmMaskHi); - - expandAlphaRev_2x128 (xmmMaskLo, xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - inOver_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmMaskLo, &xmmMaskHi, &xmmDst2, &xmmDst3); - } - - save128Aligned ((__m128i*)dst, pack565_4x128_128 (&xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3)); - - w -= 8; - dst += 8; - } - - while (w) - { - m = *mask++; - - if (m) - { - d = *dst; - mmxMask = expandAlphaRev_1x64 (unpack_32_1x64 (m)); - mmxDest = expand565_16_1x64 (d); - - *dst = pack565_32_16 (pack_1x64_32 (inOver_1x64 (&mmxSrc, - &mmxAlpha, - &mmxMask, - &mmxDest))); - } - - w--; - dst++; - } + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)mask); + cache_prefetch ((__m128i*)dst); + + while (w && (unsigned long)dst & 15) + { + m = *mask++; + + if (m) + { + d = *dst; + mmx_mask = expand_alpha_rev_1x64 (unpack_32_1x64 (m)); + mmx_dest = expand565_16_1x64 (d); + + *dst = pack_565_32_16 ( + pack_1x64_32 ( + in_over_1x64 ( + &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); + } + + w--; + dst++; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)mask); + cache_prefetch ((__m128i*)dst); + + while (w >= 8) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)mask); + cache_prefetch_next ((__m128i*)dst); + + xmm_dst = load_128_aligned ((__m128i*) dst); + unpack_565_128_4x128 (xmm_dst, + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + + m = *((uint32_t*)mask); + mask += 4; + + if (m) + { + xmm_mask = unpack_32_1x128 (m); + xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); + + /* Unpacking */ + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst0, &xmm_dst1); + } + + m = *((uint32_t*)mask); + mask += 4; + + if (m) + { + xmm_mask = unpack_32_1x128 (m); + xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); + + /* Unpacking */ + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst2, &xmm_dst3); + } + + save_128_aligned ( + (__m128i*)dst, pack_565_4x128_128 ( + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); + + w -= 8; + dst += 8; + } + + while (w) + { + m = *mask++; + + if (m) + { + d = *dst; + mmx_mask = expand_alpha_rev_1x64 (unpack_32_1x64 (m)); + mmx_dest = expand565_16_1x64 (d); + + *dst = pack_565_32_16 ( + pack_1x64_32 ( + in_over_1x64 ( + &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); + } + + w--; + dst++; + } } - _mm_empty(); + _mm_empty (); } -/* ------------------------------------------------------------------------------------------------- - * fbCompositeSrc_8888RevNPx0565 +/* ----------------------------------------------------------------------- + * composite_over_pixbuf_0565 */ static void -fbCompositeSrc_8888RevNPx0565sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint16_t *dstLine, *dst, d; - uint32_t *srcLine, *src, s; - int dstStride, srcStride; - uint16_t w; - uint32_t opaque, zero; +sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint16_t *dst_line, *dst, d; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + uint16_t w; + uint32_t opaque, zero; __m64 ms; - __m128i xmmSrc, xmmSrcLo, xmmSrcHi; - __m128i xmmDst, xmmDst0, xmmDst1, xmmDst2, xmmDst3; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); #if 0 /* FIXME @@ -3793,133 +4427,144 @@ fbCompositeSrc_8888RevNPx0565sse2 (pixman_implementation_t *imp, * I copy the code from MMX one and keep the fixme. * If it's a problem there, probably is a problem here. */ - assert (pSrc->pDrawable == pMask->pDrawable); + assert (src_image->drawable == mask_image->drawable); #endif while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)src); - cachePrefetch ((__m128i*)dst); - - while (w && (unsigned long)dst & 15) - { - s = *src++; - d = *dst; - - ms = unpack_32_1x64 (s); - - *dst++ = pack565_32_16 (pack_1x64_32 (overRevNonPre_1x64(ms, expand565_16_1x64 (d)))); - w--; - } - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)src); - cachePrefetch ((__m128i*)dst); - - while (w >= 8) - { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)src); - cachePrefetchNext ((__m128i*)dst); - - /* First round */ - xmmSrc = load128Unaligned((__m128i*)src); - xmmDst = load128Aligned ((__m128i*)dst); - - opaque = isOpaque (xmmSrc); - zero = isZero (xmmSrc); - - unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3); - unpack_128_2x128 (xmmSrc, &xmmSrcLo, &xmmSrcHi); - - /* preload next round*/ - xmmSrc = load128Unaligned((__m128i*)(src+4)); - - if (opaque) - { - invertColors_2x128 (xmmSrcLo, xmmSrcHi, &xmmDst0, &xmmDst1); - } - else if (!zero) - { - overRevNonPre_2x128 (xmmSrcLo, xmmSrcHi, &xmmDst0, &xmmDst1); - } - - /* Second round */ - opaque = isOpaque (xmmSrc); - zero = isZero (xmmSrc); - - unpack_128_2x128 (xmmSrc, &xmmSrcLo, &xmmSrcHi); - - if (opaque) - { - invertColors_2x128 (xmmSrcLo, xmmSrcHi, &xmmDst2, &xmmDst3); - } - else if (zero) - { - overRevNonPre_2x128 (xmmSrcLo, xmmSrcHi, &xmmDst2, &xmmDst3); - } - - save128Aligned ((__m128i*)dst, pack565_4x128_128 (&xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3)); - - w -= 8; - src += 8; - dst += 8; - } - - while (w) - { - s = *src++; - d = *dst; - - ms = unpack_32_1x64 (s); - - *dst++ = pack565_32_16 (pack_1x64_32 (overRevNonPre_1x64(ms, expand565_16_1x64 (d)))); - w--; - } + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)src); + cache_prefetch ((__m128i*)dst); + + while (w && (unsigned long)dst & 15) + { + s = *src++; + d = *dst; + + ms = unpack_32_1x64 (s); + + *dst++ = pack_565_32_16 ( + pack_1x64_32 ( + over_rev_non_pre_1x64 (ms, expand565_16_1x64 (d)))); + w--; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)src); + cache_prefetch ((__m128i*)dst); + + while (w >= 8) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)src); + cache_prefetch_next ((__m128i*)dst); + + /* First round */ + xmm_src = load_128_unaligned ((__m128i*)src); + xmm_dst = load_128_aligned ((__m128i*)dst); + + opaque = is_opaque (xmm_src); + zero = is_zero (xmm_src); + + unpack_565_128_4x128 (xmm_dst, + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + + /* preload next round*/ + xmm_src = load_128_unaligned ((__m128i*)(src + 4)); + + if (opaque) + { + invert_colors_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst0, &xmm_dst1); + } + else if (!zero) + { + over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst0, &xmm_dst1); + } + + /* Second round */ + opaque = is_opaque (xmm_src); + zero = is_zero (xmm_src); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + + if (opaque) + { + invert_colors_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst2, &xmm_dst3); + } + else if (!zero) + { + over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst2, &xmm_dst3); + } + + save_128_aligned ( + (__m128i*)dst, pack_565_4x128_128 ( + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); + + w -= 8; + src += 8; + dst += 8; + } + + while (w) + { + s = *src++; + d = *dst; + + ms = unpack_32_1x64 (s); + + *dst++ = pack_565_32_16 ( + pack_1x64_32 ( + over_rev_non_pre_1x64 (ms, expand565_16_1x64 (d)))); + w--; + } } - _mm_empty(); + _mm_empty (); } -/* "8888RevNP" is GdkPixbuf's format: ABGR, non premultiplied */ - -/* ------------------------------------------------------------------------------------------------- - * fbCompositeSrc_8888RevNPx8888 +/* ------------------------------------------------------------------------- + * composite_over_pixbuf_8888 */ static void -fbCompositeSrc_8888RevNPx8888sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t *dstLine, *dst, d; - uint32_t *srcLine, *src, s; - int dstStride, srcStride; - uint16_t w; - uint32_t opaque, zero; - - __m128i xmmSrcLo, xmmSrcHi; - __m128i xmmDstLo, xmmDstHi; - - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); +sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst, d; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + uint16_t w; + uint32_t opaque, zero; + + __m128i xmm_src_lo, xmm_src_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); #if 0 /* FIXME @@ -3927,1031 +4572,1103 @@ fbCompositeSrc_8888RevNPx8888sse2 (pixman_implementation_t *imp, * I copy the code from MMX one and keep the fixme. * If it's a problem there, probably is a problem here. */ - assert (pSrc->pDrawable == pMask->pDrawable); + assert (src_image->drawable == mask_image->drawable); #endif while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)src); - cachePrefetch ((__m128i*)dst); - - while (w && (unsigned long)dst & 15) - { - s = *src++; - d = *dst; - - *dst++ = pack_1x64_32 (overRevNonPre_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (d))); - - w--; - } - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)src); - cachePrefetch ((__m128i*)dst); - - while (w >= 4) - { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)src); - cachePrefetchNext ((__m128i*)dst); - - xmmSrcHi = load128Unaligned((__m128i*)src); - - opaque = isOpaque (xmmSrcHi); - zero = isZero (xmmSrcHi); - - unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi); - - if (opaque) - { - invertColors_2x128( xmmSrcLo, xmmSrcHi, &xmmDstLo, &xmmDstHi); - - save128Aligned ((__m128i*)dst, pack_2x128_128 (xmmDstLo, xmmDstHi)); - } - else if (!zero) - { - xmmDstHi = load128Aligned ((__m128i*)dst); - - unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi); - - overRevNonPre_2x128 (xmmSrcLo, xmmSrcHi, &xmmDstLo, &xmmDstHi); - - save128Aligned ((__m128i*)dst, pack_2x128_128 (xmmDstLo, xmmDstHi)); - } - - w -= 4; - dst += 4; - src += 4; - } - - while (w) - { - s = *src++; - d = *dst; - - *dst++ = pack_1x64_32 (overRevNonPre_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (d))); - - w--; - } + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)src); + cache_prefetch ((__m128i*)dst); + + while (w && (unsigned long)dst & 15) + { + s = *src++; + d = *dst; + + *dst++ = pack_1x64_32 ( + over_rev_non_pre_1x64 ( + unpack_32_1x64 (s), unpack_32_1x64 (d))); + + w--; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)src); + cache_prefetch ((__m128i*)dst); + + while (w >= 4) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)src); + cache_prefetch_next ((__m128i*)dst); + + xmm_src_hi = load_128_unaligned ((__m128i*)src); + + opaque = is_opaque (xmm_src_hi); + zero = is_zero (xmm_src_hi); + + unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); + + if (opaque) + { + invert_colors_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + else if (!zero) + { + xmm_dst_hi = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); + + over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + w -= 4; + dst += 4; + src += 4; + } + + while (w) + { + s = *src++; + d = *dst; + + *dst++ = pack_1x64_32 ( + over_rev_non_pre_1x64 ( + unpack_32_1x64 (s), unpack_32_1x64 (d))); + + w--; + } } - _mm_empty(); + _mm_empty (); } /* ------------------------------------------------------------------------------------------------- - * fbCompositeSolidMask_nx8888x0565C + * composite_over_n_8888_0565_ca */ static void -fbCompositeSolidMask_nx8888x0565Csse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t src; - uint16_t *dstLine, *dst, d; - uint32_t *maskLine, *mask, m; - int dstStride, maskStride; +sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src; + uint16_t *dst_line, *dst, d; + uint32_t *mask_line, *mask, m; + int dst_stride, mask_stride; int w; - uint32_t packCmp; + uint32_t pack_cmp; - __m128i xmmSrc, xmmAlpha; - __m128i xmmMask, xmmMaskLo, xmmMaskHi; - __m128i xmmDst, xmmDst0, xmmDst1, xmmDst2, xmmDst3; + __m128i xmm_src, xmm_alpha; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; - __m64 mmxSrc, mmxAlpha, mmxMask, mmxDest; + __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest; - fbComposeGetSolid(pSrc, src, pDst->bits.format); + src = _pixman_image_get_solid (src_image, dst_image->bits.format); if (src == 0) - return; + return; - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint32_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - xmmSrc = expandPixel_32_1x128 (src); - xmmAlpha = expandAlpha_1x128 (xmmSrc); - mmxSrc = _mm_movepi64_pi64 (xmmSrc); - mmxAlpha = _mm_movepi64_pi64 (xmmAlpha); + xmm_src = expand_pixel_32_1x128 (src); + xmm_alpha = expand_alpha_1x128 (xmm_src); + mmx_src = _mm_movepi64_pi64 (xmm_src); + mmx_alpha = _mm_movepi64_pi64 (xmm_alpha); while (height--) { - w = width; - mask = maskLine; - dst = dstLine; - maskLine += maskStride; - dstLine += dstStride; - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)mask); - cachePrefetch ((__m128i*)dst); - - while (w && ((unsigned long)dst & 15)) - { - m = *(uint32_t *) mask; - - if (m) - { - d = *dst; - mmxMask = unpack_32_1x64 (m); - mmxDest = expand565_16_1x64 (d); - - *dst = pack565_32_16 (pack_1x64_32 (inOver_1x64 (&mmxSrc, - &mmxAlpha, - &mmxMask, - &mmxDest))); - } - - w--; - dst++; - mask++; - } - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)mask); - cachePrefetch ((__m128i*)dst); - - while (w >= 8) - { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)mask); - cachePrefetchNext ((__m128i*)dst); - - /* First round */ - xmmMask = load128Unaligned((__m128i*)mask); - xmmDst = load128Aligned((__m128i*)dst); - - packCmp = _mm_movemask_epi8 (_mm_cmpeq_epi32 (xmmMask, _mm_setzero_si128())); - - unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3); - unpack_128_2x128 (xmmMask, &xmmMaskLo, &xmmMaskHi); - - /* preload next round*/ - xmmMask = load128Unaligned((__m128i*)(mask+4)); - /* preload next round*/ - - if (packCmp != 0xffff) - { - inOver_2x128(&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmMaskLo, &xmmMaskHi, &xmmDst0, &xmmDst1); - } - - /* Second round */ - packCmp = _mm_movemask_epi8 (_mm_cmpeq_epi32 (xmmMask, _mm_setzero_si128())); - - unpack_128_2x128 (xmmMask, &xmmMaskLo, &xmmMaskHi); - - if (packCmp != 0xffff) - { - inOver_2x128(&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmMaskLo, &xmmMaskHi, &xmmDst2, &xmmDst3); - } - - save128Aligned ((__m128i*)dst, pack565_4x128_128 (&xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3)); - - w -= 8; - dst += 8; - mask += 8; - } - - while (w) - { - m = *(uint32_t *) mask; - - if (m) - { - d = *dst; - mmxMask = unpack_32_1x64 (m); - mmxDest = expand565_16_1x64 (d); - - *dst = pack565_32_16 (pack_1x64_32 (inOver_1x64 (&mmxSrc, - &mmxAlpha, - &mmxMask, - &mmxDest))); - } - - w--; - dst++; - mask++; - } + w = width; + mask = mask_line; + dst = dst_line; + mask_line += mask_stride; + dst_line += dst_stride; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)mask); + cache_prefetch ((__m128i*)dst); + + while (w && ((unsigned long)dst & 15)) + { + m = *(uint32_t *) mask; + + if (m) + { + d = *dst; + mmx_mask = unpack_32_1x64 (m); + mmx_dest = expand565_16_1x64 (d); + + *dst = pack_565_32_16 ( + pack_1x64_32 ( + in_over_1x64 ( + &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); + } + + w--; + dst++; + mask++; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)mask); + cache_prefetch ((__m128i*)dst); + + while (w >= 8) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)mask); + cache_prefetch_next ((__m128i*)dst); + + /* First round */ + xmm_mask = load_128_unaligned ((__m128i*)mask); + xmm_dst = load_128_aligned ((__m128i*)dst); + + pack_cmp = _mm_movemask_epi8 ( + _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); + + unpack_565_128_4x128 (xmm_dst, + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + /* preload next round */ + xmm_mask = load_128_unaligned ((__m128i*)(mask + 4)); + + /* preload next round */ + if (pack_cmp != 0xffff) + { + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst0, &xmm_dst1); + } + + /* Second round */ + pack_cmp = _mm_movemask_epi8 ( + _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + + if (pack_cmp != 0xffff) + { + in_over_2x128 (&xmm_src, &xmm_src, + &xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_dst2, &xmm_dst3); + } + + save_128_aligned ( + (__m128i*)dst, pack_565_4x128_128 ( + &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); + + w -= 8; + dst += 8; + mask += 8; + } + + while (w) + { + m = *(uint32_t *) mask; + + if (m) + { + d = *dst; + mmx_mask = unpack_32_1x64 (m); + mmx_dest = expand565_16_1x64 (d); + + *dst = pack_565_32_16 ( + pack_1x64_32 ( + in_over_1x64 ( + &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); + } + + w--; + dst++; + mask++; + } } _mm_empty (); } -/* ------------------------------------------------------------------------------------------------- - * fbCompositeIn_nx8x8 +/* ----------------------------------------------------------------------- + * composite_in_n_8_8 */ static void -fbCompositeIn_nx8x8sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint8_t *dstLine, *dst; - uint8_t *maskLine, *mask; - int dstStride, maskStride; - uint16_t w, d, m; - uint32_t src; - uint8_t sa; - - __m128i xmmAlpha; - __m128i xmmMask, xmmMaskLo, xmmMaskHi; - __m128i xmmDst, xmmDstLo, xmmDstHi; - - fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); - - fbComposeGetSolid(pSrc, src, pDst->bits.format); +sse2_composite_in_n_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint16_t w, d, m; + uint32_t src; + uint8_t sa; + + __m128i xmm_alpha; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + src = _pixman_image_get_solid (src_image, dst_image->bits.format); sa = src >> 24; - if (sa == 0) - return; - xmmAlpha = expandAlpha_1x128 (expandPixel_32_1x128 (src)); + xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; - w = width; - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)mask); - cachePrefetch ((__m128i*)dst); - - while (w && ((unsigned long)dst & 15)) - { - m = (uint32_t) *mask++; - d = (uint32_t) *dst; - - *dst++ = (uint8_t) pack_1x64_32 (pixMultiply_1x64 (pixMultiply_1x64 (_mm_movepi64_pi64 (xmmAlpha), unpack_32_1x64 (m)), - unpack_32_1x64 (d))); - w--; - } - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)mask); - cachePrefetch ((__m128i*)dst); - - while (w >= 16) - { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)mask); - cachePrefetchNext ((__m128i*)dst); - - xmmMask = load128Unaligned((__m128i*)mask); - xmmDst = load128Aligned((__m128i*)dst); - - unpack_128_2x128 (xmmMask, &xmmMaskLo, &xmmMaskHi); - unpack_128_2x128 (xmmDst, &xmmDstLo, &xmmDstHi); - - pixMultiply_2x128 (&xmmAlpha, &xmmAlpha, &xmmMaskLo, &xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - pixMultiply_2x128 (&xmmMaskLo, &xmmMaskHi, &xmmDstLo, &xmmDstHi, &xmmDstLo, &xmmDstHi); - - save128Aligned ((__m128i*)dst, pack_2x128_128 (xmmDstLo, xmmDstHi)); - - mask += 16; - dst += 16; - w -= 16; - } - - while (w) - { - m = (uint32_t) *mask++; - d = (uint32_t) *dst; - - *dst++ = (uint8_t) pack_1x64_32 (pixMultiply_1x64 (pixMultiply_1x64 (_mm_movepi64_pi64 (xmmAlpha), unpack_32_1x64 (m)), - unpack_32_1x64 (d))); - w--; - } + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)mask); + cache_prefetch ((__m128i*)dst); + + while (w && ((unsigned long)dst & 15)) + { + m = (uint32_t) *mask++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x64_32 ( + pix_multiply_1x64 ( + pix_multiply_1x64 (_mm_movepi64_pi64 (xmm_alpha), + unpack_32_1x64 (m)), + unpack_32_1x64 (d))); + w--; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)mask); + cache_prefetch ((__m128i*)dst); + + while (w >= 16) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)mask); + cache_prefetch_next ((__m128i*)dst); + + xmm_mask = load_128_unaligned ((__m128i*)mask); + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + mask += 16; + dst += 16; + w -= 16; + } + + while (w) + { + m = (uint32_t) *mask++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x64_32 ( + pix_multiply_1x64 ( + pix_multiply_1x64 ( + _mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)), + unpack_32_1x64 (d))); + w--; + } } - _mm_empty(); + _mm_empty (); } -/* ------------------------------------------------------------------------------------------------- - * fbCompositeIn_8x8 +/* --------------------------------------------------------------------------- + * composite_in_8_8 */ static void -fbCompositeIn_8x8sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint8_t *dstLine, *dst; - uint8_t *srcLine, *src; - int srcStride, dstStride; - uint16_t w; - uint32_t s, d; - - __m128i xmmSrc, xmmSrcLo, xmmSrcHi; - __m128i xmmDst, xmmDstLo, xmmDstHi; - - fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1); +sse2_composite_in_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int src_stride, dst_stride; + uint16_t w; + uint32_t s, d; + + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)src); - cachePrefetch ((__m128i*)dst); - - while (w && ((unsigned long)dst & 15)) - { - s = (uint32_t) *src++; - d = (uint32_t) *dst; - - *dst++ = (uint8_t) pack_1x64_32 (pixMultiply_1x64 (unpack_32_1x64 (s),unpack_32_1x64 (d))); - w--; - } - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)src); - cachePrefetch ((__m128i*)dst); - - while (w >= 16) - { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)src); - cachePrefetchNext ((__m128i*)dst); - - xmmSrc = load128Unaligned((__m128i*)src); - xmmDst = load128Aligned((__m128i*)dst); - - unpack_128_2x128 (xmmSrc, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmDst, &xmmDstLo, &xmmDstHi); - - pixMultiply_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmDstLo, &xmmDstHi, &xmmDstLo, &xmmDstHi); - - save128Aligned ((__m128i*)dst, pack_2x128_128 (xmmDstLo, xmmDstHi)); - - src += 16; - dst += 16; - w -= 16; - } - - while (w) - { - s = (uint32_t) *src++; - d = (uint32_t) *dst; - - *dst++ = (uint8_t) pack_1x64_32 (pixMultiply_1x64 (unpack_32_1x64 (s),unpack_32_1x64 (d))); - w--; - } + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)src); + cache_prefetch ((__m128i*)dst); + + while (w && ((unsigned long)dst & 15)) + { + s = (uint32_t) *src++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x64_32 ( + pix_multiply_1x64 ( + unpack_32_1x64 (s), unpack_32_1x64 (d))); + w--; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)src); + cache_prefetch ((__m128i*)dst); + + while (w >= 16) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)src); + cache_prefetch_next ((__m128i*)dst); + + xmm_src = load_128_unaligned ((__m128i*)src); + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_dst_lo, &xmm_dst_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + src += 16; + dst += 16; + w -= 16; + } + + while (w) + { + s = (uint32_t) *src++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x64_32 ( + pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (d))); + w--; + } } _mm_empty (); } -/* ------------------------------------------------------------------------------------------------- - * fbCompositeSrcAdd_8888x8x8 +/* ------------------------------------------------------------------------- + * composite_add_8888_8_8 */ static void -fbCompositeSrcAdd_8888x8x8sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint8_t *dstLine, *dst; - uint8_t *maskLine, *mask; - int dstStride, maskStride; - uint16_t w; - uint32_t src; - uint8_t sa; +sse2_composite_add_8888_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + uint16_t w; + uint32_t src; + uint8_t sa; uint32_t m, d; - __m128i xmmAlpha; - __m128i xmmMask, xmmMaskLo, xmmMaskHi; - __m128i xmmDst, xmmDstLo, xmmDstHi; + __m128i xmm_alpha; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; - fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - fbComposeGetSolid(pSrc, src, pDst->bits.format); + src = _pixman_image_get_solid (src_image, dst_image->bits.format); sa = src >> 24; - if (sa == 0) - return; - xmmAlpha = expandAlpha_1x128 (expandPixel_32_1x128 (src)); + xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; - w = width; - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)mask); - cachePrefetch ((__m128i*)dst); - - while (w && ((unsigned long)dst & 15)) - { - m = (uint32_t) *mask++; - d = (uint32_t) *dst; - - *dst++ = (uint8_t) pack_1x64_32 (_mm_adds_pu16 (pixMultiply_1x64 (_mm_movepi64_pi64 (xmmAlpha), unpack_32_1x64 (m)), - unpack_32_1x64 (d))); - w--; - } - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)mask); - cachePrefetch ((__m128i*)dst); - - while (w >= 16) - { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)mask); - cachePrefetchNext ((__m128i*)dst); - - xmmMask = load128Unaligned((__m128i*)mask); - xmmDst = load128Aligned((__m128i*)dst); - - unpack_128_2x128 (xmmMask, &xmmMaskLo, &xmmMaskHi); - unpack_128_2x128 (xmmDst, &xmmDstLo, &xmmDstHi); - - pixMultiply_2x128 (&xmmAlpha, &xmmAlpha, &xmmMaskLo, &xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - - xmmDstLo = _mm_adds_epu16 (xmmMaskLo, xmmDstLo); - xmmDstHi = _mm_adds_epu16 (xmmMaskHi, xmmDstHi); - - save128Aligned ((__m128i*)dst, pack_2x128_128 (xmmDstLo, xmmDstHi)); - - mask += 16; - dst += 16; - w -= 16; - } - - while (w) - { - m = (uint32_t) *mask++; - d = (uint32_t) *dst; - - *dst++ = (uint8_t) pack_1x64_32 (_mm_adds_pu16 (pixMultiply_1x64 (_mm_movepi64_pi64 (xmmAlpha), unpack_32_1x64 (m)), - unpack_32_1x64 (d))); - w--; - } + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)mask); + cache_prefetch ((__m128i*)dst); + + while (w && ((unsigned long)dst & 15)) + { + m = (uint32_t) *mask++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x64_32 ( + _mm_adds_pu16 ( + pix_multiply_1x64 ( + _mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)), + unpack_32_1x64 (d))); + w--; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)mask); + cache_prefetch ((__m128i*)dst); + + while (w >= 16) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)mask); + cache_prefetch_next ((__m128i*)dst); + + xmm_mask = load_128_unaligned ((__m128i*)mask); + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo); + xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + + mask += 16; + dst += 16; + w -= 16; + } + + while (w) + { + m = (uint32_t) *mask++; + d = (uint32_t) *dst; + + *dst++ = (uint8_t) pack_1x64_32 ( + _mm_adds_pu16 ( + pix_multiply_1x64 ( + _mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)), + unpack_32_1x64 (d))); + + w--; + } } - _mm_empty(); + _mm_empty (); } -/* ------------------------------------------------------------------------------------------------- - * fbCompositeSrcAdd_8000x8000 +/* ---------------------------------------------------------------------- + * composite_add_8000_8000 */ static void -fbCompositeSrcAdd_8000x8000sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint8_t *dstLine, *dst; - uint8_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; - uint16_t t; - - fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1); - fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); +sse2_composite_add_8000_8000 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + uint16_t w; + uint16_t t; + + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); while (height--) { - dst = dstLine; - src = srcLine; - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)src); - cachePrefetch ((__m128i*)dst); - - dstLine += dstStride; - srcLine += srcStride; - w = width; - - /* Small head */ - while (w && (unsigned long)dst & 3) - { - t = (*dst) + (*src++); - *dst++ = t | (0 - (t >> 8)); - w--; - } - - coreCombineAddUsse2 ((uint32_t*)dst, (uint32_t*)src, NULL, w >> 2); - - /* Small tail */ - dst += w & 0xfffc; - src += w & 0xfffc; - - w &= 3; - - while (w) - { - t = (*dst) + (*src++); - *dst++ = t | (0 - (t >> 8)); - w--; - } + dst = dst_line; + src = src_line; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)src); + cache_prefetch ((__m128i*)dst); + + dst_line += dst_stride; + src_line += src_stride; + w = width; + + /* Small head */ + while (w && (unsigned long)dst & 3) + { + t = (*dst) + (*src++); + *dst++ = t | (0 - (t >> 8)); + w--; + } + + core_combine_add_u_sse2 ((uint32_t*)dst, (uint32_t*)src, NULL, w >> 2); + + /* Small tail */ + dst += w & 0xfffc; + src += w & 0xfffc; + + w &= 3; + + while (w) + { + t = (*dst) + (*src++); + *dst++ = t | (0 - (t >> 8)); + w--; + } } - _mm_empty(); + _mm_empty (); } -/* ------------------------------------------------------------------------------------------------- - * fbCompositeSrcAdd_8888x8888 +/* --------------------------------------------------------------------- + * composite_add_8888_8888 */ static void -fbCompositeSrcAdd_8888x8888sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - int dstStride, srcStride; - - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); +sse2_composite_add_8888_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; - coreCombineAddUsse2 (dst, src, NULL, width); + core_combine_add_u_sse2 (dst, src, NULL, width); } - _mm_empty(); + _mm_empty (); } /* ------------------------------------------------------------------------------------------------- - * fbCompositeCopyAreasse2 + * sse2_composite_copy_area */ static pixman_bool_t -pixmanBltsse2 (uint32_t *src_bits, - uint32_t *dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, int src_y, - int dst_x, int dst_y, - int width, int height) -{ - uint8_t * src_bytes; - uint8_t * dst_bytes; - int byte_width; +pixman_blt_sse2 (uint32_t *src_bits, + uint32_t *dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) +{ + uint8_t * src_bytes; + uint8_t * dst_bytes; + int byte_width; if (src_bpp != dst_bpp) - return FALSE; + return FALSE; if (src_bpp == 16) { - src_stride = src_stride * (int) sizeof (uint32_t) / 2; - dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; - src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); - dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); - byte_width = 2 * width; - src_stride *= 2; - dst_stride *= 2; + src_stride = src_stride * (int) sizeof (uint32_t) / 2; + dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; + src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); + dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); + byte_width = 2 * width; + src_stride *= 2; + dst_stride *= 2; } else if (src_bpp == 32) { - src_stride = src_stride * (int) sizeof (uint32_t) / 4; - dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; - src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); - dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); - byte_width = 4 * width; - src_stride *= 4; - dst_stride *= 4; + src_stride = src_stride * (int) sizeof (uint32_t) / 4; + dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; + src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); + dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); + byte_width = 4 * width; + src_stride *= 4; + dst_stride *= 4; } else { - return FALSE; + return FALSE; } - cachePrefetch ((__m128i*)src_bytes); - cachePrefetch ((__m128i*)dst_bytes); + cache_prefetch ((__m128i*)src_bytes); + cache_prefetch ((__m128i*)dst_bytes); while (height--) { - int w; - uint8_t *s = src_bytes; - uint8_t *d = dst_bytes; - src_bytes += src_stride; - dst_bytes += dst_stride; - w = byte_width; - - cachePrefetchNext ((__m128i*)s); - cachePrefetchNext ((__m128i*)d); - - while (w >= 2 && ((unsigned long)d & 3)) - { - *(uint16_t *)d = *(uint16_t *)s; - w -= 2; - s += 2; - d += 2; - } - - while (w >= 4 && ((unsigned long)d & 15)) - { - *(uint32_t *)d = *(uint32_t *)s; - - w -= 4; - s += 4; - d += 4; - } - - cachePrefetchNext ((__m128i*)s); - cachePrefetchNext ((__m128i*)d); - - while (w >= 64) - { - __m128i xmm0, xmm1, xmm2, xmm3; - - /* 128 bytes ahead */ - cachePrefetch (((__m128i*)s) + 8); - cachePrefetch (((__m128i*)d) + 8); - - xmm0 = load128Unaligned ((__m128i*)(s)); - xmm1 = load128Unaligned ((__m128i*)(s+16)); - xmm2 = load128Unaligned ((__m128i*)(s+32)); - xmm3 = load128Unaligned ((__m128i*)(s+48)); - - save128Aligned ((__m128i*)(d), xmm0); - save128Aligned ((__m128i*)(d+16), xmm1); - save128Aligned ((__m128i*)(d+32), xmm2); - save128Aligned ((__m128i*)(d+48), xmm3); - - s += 64; - d += 64; - w -= 64; - } - - cachePrefetchNext ((__m128i*)s); - cachePrefetchNext ((__m128i*)d); - - while (w >= 16) - { - save128Aligned ((__m128i*)d, load128Unaligned ((__m128i*)s) ); - - w -= 16; - d += 16; - s += 16; - } - - cachePrefetchNext ((__m128i*)s); - cachePrefetchNext ((__m128i*)d); - - while (w >= 4) - { - *(uint32_t *)d = *(uint32_t *)s; - - w -= 4; - s += 4; - d += 4; - } - - if (w >= 2) - { - *(uint16_t *)d = *(uint16_t *)s; - w -= 2; - s += 2; - d += 2; - } + int w; + uint8_t *s = src_bytes; + uint8_t *d = dst_bytes; + src_bytes += src_stride; + dst_bytes += dst_stride; + w = byte_width; + + cache_prefetch_next ((__m128i*)s); + cache_prefetch_next ((__m128i*)d); + + while (w >= 2 && ((unsigned long)d & 3)) + { + *(uint16_t *)d = *(uint16_t *)s; + w -= 2; + s += 2; + d += 2; + } + + while (w >= 4 && ((unsigned long)d & 15)) + { + *(uint32_t *)d = *(uint32_t *)s; + + w -= 4; + s += 4; + d += 4; + } + + cache_prefetch_next ((__m128i*)s); + cache_prefetch_next ((__m128i*)d); + + while (w >= 64) + { + __m128i xmm0, xmm1, xmm2, xmm3; + + /* 128 bytes ahead */ + cache_prefetch (((__m128i*)s) + 8); + cache_prefetch (((__m128i*)d) + 8); + + xmm0 = load_128_unaligned ((__m128i*)(s)); + xmm1 = load_128_unaligned ((__m128i*)(s + 16)); + xmm2 = load_128_unaligned ((__m128i*)(s + 32)); + xmm3 = load_128_unaligned ((__m128i*)(s + 48)); + + save_128_aligned ((__m128i*)(d), xmm0); + save_128_aligned ((__m128i*)(d + 16), xmm1); + save_128_aligned ((__m128i*)(d + 32), xmm2); + save_128_aligned ((__m128i*)(d + 48), xmm3); + + s += 64; + d += 64; + w -= 64; + } + + cache_prefetch_next ((__m128i*)s); + cache_prefetch_next ((__m128i*)d); + + while (w >= 16) + { + save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) ); + + w -= 16; + d += 16; + s += 16; + } + + cache_prefetch_next ((__m128i*)s); + cache_prefetch_next ((__m128i*)d); + + while (w >= 4) + { + *(uint32_t *)d = *(uint32_t *)s; + + w -= 4; + s += 4; + d += 4; + } + + if (w >= 2) + { + *(uint16_t *)d = *(uint16_t *)s; + w -= 2; + s += 2; + d += 2; + } } - _mm_empty(); + _mm_empty (); return TRUE; } static void -fbCompositeCopyAreasse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - pixmanBltsse2 (pSrc->bits.bits, - pDst->bits.bits, - pSrc->bits.rowstride, - pDst->bits.rowstride, - PIXMAN_FORMAT_BPP (pSrc->bits.format), - PIXMAN_FORMAT_BPP (pDst->bits.format), - xSrc, ySrc, xDst, yDst, width, height); +sse2_composite_copy_area (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + pixman_blt_sse2 (src_image->bits.bits, + dst_image->bits.bits, + src_image->bits.rowstride, + dst_image->bits.rowstride, + PIXMAN_FORMAT_BPP (src_image->bits.format), + PIXMAN_FORMAT_BPP (dst_image->bits.format), + src_x, src_y, dest_x, dest_y, width, height); } #if 0 /* This code are buggy in MMX version, now the bug was translated to SSE2 version */ void -fbCompositeOver_x888x8x8888sse2 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int32_t xSrc, - int32_t ySrc, - int32_t xMask, - int32_t yMask, - int32_t xDst, - int32_t yDst, - int32_t width, - int32_t height) -{ - uint32_t *src, *srcLine, s; - uint32_t *dst, *dstLine, d; - uint8_t *mask, *maskLine; - uint32_t m; - int srcStride, maskStride, dstStride; +sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *src, *src_line, s; + uint32_t *dst, *dst_line, d; + uint8_t *mask, *mask_line; + uint32_t m; + int src_stride, mask_stride, dst_stride; uint16_t w; - __m128i xmmSrc, xmmSrcLo, xmmSrcHi; - __m128i xmmDst, xmmDstLo, xmmDstHi; - __m128i xmmMask, xmmMaskLo, xmmMaskHi; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); + PIXMAN_IMAGE_GET_LINE ( + dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { - src = srcLine; - srcLine += srcStride; - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; - - w = width; - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)src); - cachePrefetch ((__m128i*)dst); - cachePrefetch ((__m128i*)mask); - - while (w && (unsigned long)dst & 15) - { - s = 0xff000000 | *src++; - m = (uint32_t) *mask++; - d = *dst; - - __m64 ms = unpack_32_1x64 (s); - - if (m != 0xff) - { - ms = inOver_1x64 (ms, - xMask00ff, - expandAlphaRev_1x64 (unpack_32_1x64 (m)), - unpack_32_1x64 (d)); - } - - *dst++ = pack_1x64_32 (ms); - w--; - } - - /* call prefetch hint to optimize cache load*/ - cachePrefetch ((__m128i*)src); - cachePrefetch ((__m128i*)dst); - cachePrefetch ((__m128i*)mask); - - while (w >= 4) - { - /* fill cache line with next memory */ - cachePrefetchNext ((__m128i*)src); - cachePrefetchNext ((__m128i*)dst); - cachePrefetchNext ((__m128i*)mask); - - m = *(uint32_t*) mask; - xmmSrc = _mm_or_si128 (load128Unaligned ((__m128i*)src), Maskff000000); - - if (m == 0xffffffff) - { - save128Aligned ((__m128i*)dst, xmmSrc); - } - else - { - xmmDst = load128Aligned ((__m128i*)dst); - - xmmMask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); - - unpack_128_2x128 (xmmSrc, &xmmSrcLo, &xmmSrcHi); - unpack_128_2x128 (xmmMask, &xmmMaskLo, &xmmMaskHi); - unpack_128_2x128 (xmmDst, &xmmDstLo, &xmmDstHi); - - expandAlphaRev_2x128 (xmmMaskLo, xmmMaskHi, &xmmMaskLo, &xmmMaskHi); - - inOver_2x128 (xmmSrcLo, xmmSrcHi, Mask00ff, Mask00ff, xmmMaskLo, xmmMaskHi, &xmmDstLo, &xmmDstHi); - - save128Aligned( (__m128i*)dst, pack_2x128_128 (xmmDstLo, xmmDstHi)); - } - - src += 4; - dst += 4; - mask += 4; - w -= 4; - } - - while (w) - { - m = (uint32_t) *mask++; - - if (m) - { - s = 0xff000000 | *src; - - if (m == 0xff) - { - *dst = s; - } - else - { - d = *dst; - - *dst = pack_1x64_32 (inOver_1x64 (unpack_32_1x64 (s), - xMask00ff, - expandAlphaRev_1x64 (unpack_32_1x64 (m)), - unpack_32_1x64 (d))); - } - - } - - src++; - dst++; - w--; - } + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + w = width; + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)src); + cache_prefetch ((__m128i*)dst); + cache_prefetch ((__m128i*)mask); + + while (w && (unsigned long)dst & 15) + { + s = 0xff000000 | *src++; + m = (uint32_t) *mask++; + d = *dst; + + __m64 ms = unpack_32_1x64 (s); + + if (m != 0xff) + { + ms = in_over_1x64 (ms, + mask_x00ff, + expand_alpha_rev_1x64 (unpack_32_1x64 (m)), + unpack_32_1x64 (d)); + } + + *dst++ = pack_1x64_32 (ms); + w--; + } + + /* call prefetch hint to optimize cache load*/ + cache_prefetch ((__m128i*)src); + cache_prefetch ((__m128i*)dst); + cache_prefetch ((__m128i*)mask); + + while (w >= 4) + { + /* fill cache line with next memory */ + cache_prefetch_next ((__m128i*)src); + cache_prefetch_next ((__m128i*)dst); + cache_prefetch_next ((__m128i*)mask); + + m = *(uint32_t*) mask; + xmm_src = _mm_or_si128 (load_128_unaligned ((__m128i*)src), mask_ff000000); + + if (m == 0xffffffff) + { + save_128_aligned ((__m128i*)dst, xmm_src); + } + else + { + xmm_dst = load_128_aligned ((__m128i*)dst); + + xmm_mask = _mm_unpacklo_epi16 ( + unpack_32_1x128 (m), _mm_setzero_si128 ()); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + in_over_2x128 (xmm_src_lo, xmm_src_hi, + mask_00ff, mask_00ff, + xmm_mask_lo, xmm_mask_hi, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + src += 4; + dst += 4; + mask += 4; + w -= 4; + } + + while (w) + { + m = (uint32_t) *mask++; + + if (m) + { + s = 0xff000000 | *src; + + if (m == 0xff) + { + *dst = s; + } + else + { + d = *dst; + + *dst = pack_1x64_32 ( + in_over_1x64 ( + unpack_32_1x64 (s), + mask_x00ff, + expand_alpha_rev_1x64 (unpack_32_1x64 (m)), + unpack_32_1x64 (d))); + } + + } + + src++; + dst++; + w--; + } } - _mm_empty(); + _mm_empty (); } + #endif -static const FastPathInfo sse2_fast_paths[] = -{ - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, fbCompositeSolidMask_nx8x0565sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, fbCompositeSolidMask_nx8x0565sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSolid_nx8888sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSolid_nx8888sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSolid_nx0565sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_8888x0565sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_8888x0565sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888sse2, 0 }, +static const pixman_fast_path_t sse2_fast_paths[] = +{ + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, sse2_composite_over_n_8_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, sse2_composite_over_n_8_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_a8r8g8b8, sse2_composite_over_n_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_over_n_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, sse2_composite_over_n_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, sse2_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, sse2_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, sse2_composite_over_8888_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, sse2_composite_over_8888_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, sse2_composite_over_8888_0565, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_over_n_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_over_n_8_8888, 0 }, #if 0 /* FIXME: This code are buggy in MMX version, now the bug was translated to SSE2 version */ - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeOver_x888x8x8888sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeOver_x888x8x8888sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeOver_x888x8x8888sse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeOver_x888x8x8888sse2, 0 }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_over_x888_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_over_x888_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_over_x888_8_8888, 0 }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_over_x888_8_8888, 0 }, #endif - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSrc_x888xnx8888sse2, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSrc_x888xnx8888sse2, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSrc_x888xnx8888sse2, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSrc_x888xnx8888sse2, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888sse2, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888sse2, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8x8888sse2, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8x8888sse2, NEED_SOLID_MASK }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8888x8888Csse2, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8888x8888Csse2, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8888x8888Csse2, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8888x8888Csse2, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, fbCompositeSolidMask_nx8888x0565Csse2, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, fbCompositeSolidMask_nx8888x0565Csse2, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888RevNPx8888sse2, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888RevNPx8888sse2, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888RevNPx8888sse2, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888RevNPx8888sse2, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8b8g8r8, fbCompositeSrc_8888RevNPx8888sse2, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, fbCompositeSrc_8888RevNPx8888sse2, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_x8b8g8r8, fbCompositeSrc_8888RevNPx8888sse2, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, fbCompositeSrc_8888RevNPx8888sse2, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, fbCompositeSrc_8888RevNPx0565sse2, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_r5g6b5, fbCompositeSrc_8888RevNPx0565sse2, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_b5g6r5, fbCompositeSrc_8888RevNPx0565sse2, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, fbCompositeSrc_8888RevNPx0565sse2, NEED_PIXBUF }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeCopyAreasse2, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeCopyAreasse2, 0 }, - - { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcAdd_8000x8000sse2, 0 }, - { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrcAdd_8888x8888sse2, 0 }, - { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSrcAdd_8888x8888sse2, 0 }, - { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fbCompositeSrcAdd_8888x8x8sse2, 0 }, - - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSolidMaskSrc_nx8x8888sse2, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSolidMaskSrc_nx8x8888sse2, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSolidMaskSrc_nx8x8888sse2, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSolidMaskSrc_nx8x8888sse2, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeCopyAreasse2, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeCopyAreasse2, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeCopyAreasse2, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeCopyAreasse2, 0 }, - { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeCopyAreasse2, 0 }, - { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeCopyAreasse2, 0 }, - { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeCopyAreasse2, 0 }, - { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeCopyAreasse2, 0 }, - - { PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeIn_8x8sse2, 0 }, - { PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fbCompositeIn_nx8x8sse2, 0 }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_over_x888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_over_x888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_over_x888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_over_x888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_over_8888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_over_8888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_over_8888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_over_8888_n_8888, NEED_SOLID_MASK }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, sse2_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, sse2_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, sse2_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, sse2_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, sse2_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, sse2_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_a8r8g8b8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_x8r8g8b8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8b8g8r8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_x8b8g8r8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, sse2_composite_over_pixbuf_0565, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_r5g6b5, sse2_composite_over_pixbuf_0565, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_b5g6r5, sse2_composite_over_pixbuf_0565, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, sse2_composite_over_pixbuf_0565, NEED_PIXBUF }, + { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_copy_area, 0 }, + { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, sse2_composite_copy_area, 0 }, + + { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, sse2_composite_add_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, + { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, sse2_composite_add_8000_8000, 0 }, + { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, sse2_composite_add_8888_8888, 0 }, + { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, sse2_composite_add_8888_8888, 0 }, + { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, sse2_composite_add_8888_8_8, 0 }, + + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_src_n_8_8888, 0 }, + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_src_n_8_8888, 0 }, + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_src_n_8_8888, 0 }, + { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_src_n_8_8888, 0 }, + { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, sse2_composite_copy_area, 0 }, + { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, sse2_composite_copy_area, 0 }, + { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_copy_area, 0 }, + { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, sse2_composite_copy_area, 0 }, + { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_copy_area, 0 }, + { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, sse2_composite_copy_area, 0 }, + { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, sse2_composite_copy_area, 0 }, + { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, sse2_composite_copy_area, 0 }, + + { PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, sse2_composite_in_8_8, 0 }, + { PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, sse2_composite_in_n_8_8, 0 }, { PIXMAN_OP_NONE }, }; /* * Work around GCC bug causing crashes in Mozilla with SSE2 - * - * When using SSE2 intrinsics, gcc assumes that the stack is 16 byte - * aligned. Unfortunately some code, such as Mozilla and Mono contain - * code that aligns the stack to 4 bytes. + * + * When using -msse, gcc generates movdqa instructions assuming that + * the stack is 16 byte aligned. Unfortunately some applications, such + * as Mozilla and Mono, end up aligning the stack to 4 bytes, which + * causes the movdqa instructions to fail. * * The __force_align_arg_pointer__ makes gcc generate a prologue that * realigns the stack pointer to 16 bytes. @@ -4961,56 +5678,63 @@ static const FastPathInfo sse2_fast_paths[] = * * See https://bugs.freedesktop.org/show_bug.cgi?id=15693 */ +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) __attribute__((__force_align_arg_pointer__)) +#endif static void sse2_composite (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t *src, - pixman_image_t *mask, - pixman_image_t *dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { if (_pixman_run_fast_path (sse2_fast_paths, imp, - op, src, mask, dest, - src_x, src_y, - mask_x, mask_y, - dest_x, dest_y, - width, height)) + op, src, mask, dest, + src_x, src_y, + mask_x, mask_y, + dest_x, dest_y, + width, height)) { return; } _pixman_implementation_composite (imp->delegate, op, - src, mask, dest, - src_x, src_y, - mask_x, mask_y, - dest_x, dest_y, - width, height); + src, mask, dest, + src_x, src_y, + mask_x, mask_y, + dest_x, dest_y, + width, height); } +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) __attribute__((__force_align_arg_pointer__)) +#endif static pixman_bool_t sse2_blt (pixman_implementation_t *imp, - uint32_t *src_bits, - uint32_t *dst_bits, - int src_stride, - int dst_stride, - int src_bpp, - int dst_bpp, - int src_x, int src_y, - int dst_x, int dst_y, - int width, int height) -{ - if (!pixmanBltsse2 ( - src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, - src_x, src_y, dst_x, dst_y, width, height)) + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) +{ + if (!pixman_blt_sse2 ( + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dst_x, dst_y, width, height)) { return _pixman_implementation_blt ( @@ -5022,19 +5746,21 @@ sse2_blt (pixman_implementation_t *imp, return TRUE; } +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) __attribute__((__force_align_arg_pointer__)) +#endif static pixman_bool_t sse2_fill (pixman_implementation_t *imp, - uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) -{ - if (!pixmanFillsse2 (bits, stride, bpp, x, y, width, height, xor)) + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + if (!pixman_fill_sse2 (bits, stride, bpp, x, y, width, height, xor)) { return _pixman_implementation_fill ( imp->delegate, bits, stride, bpp, x, y, width, height, xor); @@ -5043,72 +5769,75 @@ sse2_fill (pixman_implementation_t *imp, return TRUE; } +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) +__attribute__((__force_align_arg_pointer__)) +#endif pixman_implementation_t * -_pixman_implementation_create_sse2 (pixman_implementation_t *toplevel) +_pixman_implementation_create_sse2 (void) { - pixman_implementation_t *mmx = _pixman_implementation_create_mmx (NULL); - pixman_implementation_t *imp = _pixman_implementation_create (toplevel, mmx); + pixman_implementation_t *mmx = _pixman_implementation_create_mmx (); + pixman_implementation_t *imp = _pixman_implementation_create (mmx); /* SSE2 constants */ - Mask565r = createMask_2x32_128 (0x00f80000, 0x00f80000); - Mask565g1 = createMask_2x32_128 (0x00070000, 0x00070000); - Mask565g2 = createMask_2x32_128 (0x000000e0, 0x000000e0); - Mask565b = createMask_2x32_128 (0x0000001f, 0x0000001f); - MaskRed = createMask_2x32_128 (0x00f80000, 0x00f80000); - MaskGreen = createMask_2x32_128 (0x0000fc00, 0x0000fc00); - MaskBlue = createMask_2x32_128 (0x000000f8, 0x000000f8); - Mask565FixRB = createMask_2x32_128 (0x00e000e0, 0x00e000e0); - Mask565FixG = createMask_2x32_128 (0x0000c000, 0x0000c000); - Mask0080 = createMask_16_128 (0x0080); - Mask00ff = createMask_16_128 (0x00ff); - Mask0101 = createMask_16_128 (0x0101); - Maskffff = createMask_16_128 (0xffff); - Maskff000000 = createMask_2x32_128 (0xff000000, 0xff000000); - MaskAlpha = createMask_2x32_128 (0x00ff0000, 0x00000000); - + mask_565_r = create_mask_2x32_128 (0x00f80000, 0x00f80000); + mask_565_g1 = create_mask_2x32_128 (0x00070000, 0x00070000); + mask_565_g2 = create_mask_2x32_128 (0x000000e0, 0x000000e0); + mask_565_b = create_mask_2x32_128 (0x0000001f, 0x0000001f); + mask_red = create_mask_2x32_128 (0x00f80000, 0x00f80000); + mask_green = create_mask_2x32_128 (0x0000fc00, 0x0000fc00); + mask_blue = create_mask_2x32_128 (0x000000f8, 0x000000f8); + mask_565_fix_rb = create_mask_2x32_128 (0x00e000e0, 0x00e000e0); + mask_565_fix_g = create_mask_2x32_128 (0x0000c000, 0x0000c000); + mask_0080 = create_mask_16_128 (0x0080); + mask_00ff = create_mask_16_128 (0x00ff); + mask_0101 = create_mask_16_128 (0x0101); + mask_ffff = create_mask_16_128 (0xffff); + mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000); + mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000); + /* MMX constants */ - xMask565rgb = createMask_2x32_64 (0x000001f0, 0x003f001f); - xMask565Unpack = createMask_2x32_64 (0x00000084, 0x04100840); - - xMask0080 = createMask_16_64 (0x0080); - xMask00ff = createMask_16_64 (0x00ff); - xMask0101 = createMask_16_64 (0x0101); - xMaskAlpha = createMask_2x32_64 (0x00ff0000, 0x00000000); + mask_x565_rgb = create_mask_2x32_64 (0x000001f0, 0x003f001f); + mask_x565_unpack = create_mask_2x32_64 (0x00000084, 0x04100840); + + mask_x0080 = create_mask_16_64 (0x0080); + mask_x00ff = create_mask_16_64 (0x00ff); + mask_x0101 = create_mask_16_64 (0x0101); + mask_x_alpha = create_mask_2x32_64 (0x00ff0000, 0x00000000); - _mm_empty(); + _mm_empty (); /* Set up function pointers */ - + /* SSE code patch for fbcompose.c */ - imp->combine_32[PIXMAN_OP_OVER] = sse2CombineOverU; - imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2CombineOverReverseU; - imp->combine_32[PIXMAN_OP_IN] = sse2CombineInU; - imp->combine_32[PIXMAN_OP_IN_REVERSE] = sse2CombineInReverseU; - imp->combine_32[PIXMAN_OP_OUT] = sse2CombineOutU; - imp->combine_32[PIXMAN_OP_OUT_REVERSE] = sse2CombineOutReverseU; - imp->combine_32[PIXMAN_OP_ATOP] = sse2CombineAtopU; - imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2CombineAtopReverseU; - imp->combine_32[PIXMAN_OP_XOR] = sse2CombineXorU; - imp->combine_32[PIXMAN_OP_ADD] = sse2CombineAddU; - - imp->combine_32[PIXMAN_OP_SATURATE] = sse2CombineSaturateU; - - imp->combine_32_ca[PIXMAN_OP_SRC] = sse2CombineSrcC; - imp->combine_32_ca[PIXMAN_OP_OVER] = sse2CombineOverC; - imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2CombineOverReverseC; - imp->combine_32_ca[PIXMAN_OP_IN] = sse2CombineInC; - imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = sse2CombineInReverseC; - imp->combine_32_ca[PIXMAN_OP_OUT] = sse2CombineOutC; - imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = sse2CombineOutReverseC; - imp->combine_32_ca[PIXMAN_OP_ATOP] = sse2CombineAtopC; - imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2CombineAtopReverseC; - imp->combine_32_ca[PIXMAN_OP_XOR] = sse2CombineXorC; - imp->combine_32_ca[PIXMAN_OP_ADD] = sse2CombineAddC; - + imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u; + imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u; + imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u; + imp->combine_32[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_u; + imp->combine_32[PIXMAN_OP_OUT] = sse2_combine_out_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_u; + imp->combine_32[PIXMAN_OP_ATOP] = sse2_combine_atop_u; + imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_u; + imp->combine_32[PIXMAN_OP_XOR] = sse2_combine_xor_u; + imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u; + + imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u; + + imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca; + imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca; + imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_IN] = sse2_combine_in_ca; + imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_OUT] = sse2_combine_out_ca; + imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP] = sse2_combine_atop_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca; + imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca; + imp->composite = sse2_composite; imp->blt = sse2_blt; imp->fill = sse2_fill; - + return imp; } diff --git a/lib/pixman/pixman/pixman-timer.c b/lib/pixman/pixman/pixman-timer.c index c76264431..f5ae18e89 100644 --- a/lib/pixman/pixman/pixman-timer.c +++ b/lib/pixman/pixman/pixman-timer.c @@ -19,41 +19,48 @@ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#ifdef HAVE_CONFIG_H #include <config.h> +#endif + #include <stdlib.h> #include <stdio.h> #include "pixman-private.h" -static PixmanTimer *timers; +#ifdef PIXMAN_TIMERS + +static pixman_timer_t *timers; static void dump_timers (void) { - PixmanTimer *timer; + pixman_timer_t *timer; for (timer = timers; timer != NULL; timer = timer->next) { printf ("%s: total: %llu n: %llu avg: %f\n", - timer->name, - timer->total, - timer->n_times, - timer->total / (double)timer->n_times); + timer->name, + timer->total, + timer->n_times, + timer->total / (double)timer->n_times); } } void -pixman_timer_register (PixmanTimer *timer) +pixman_timer_register (pixman_timer_t *timer) { static int initialized; - int atexit(void (*function)(void)); + int atexit (void (*function)(void)); if (!initialized) { atexit (dump_timers); initialized = 1; } - + timer->next = timers; timers = timer; } + +#endif diff --git a/lib/pixman/pixman/pixman-transformed-accessors.c b/lib/pixman/pixman/pixman-transformed-accessors.c deleted file mode 100644 index 442ca2474..000000000 --- a/lib/pixman/pixman/pixman-transformed-accessors.c +++ /dev/null @@ -1,3 +0,0 @@ -#define PIXMAN_FB_ACCESSORS - -#include "pixman-transformed.c" diff --git a/lib/pixman/pixman/pixman-transformed.c b/lib/pixman/pixman/pixman-transformed.c deleted file mode 100644 index d721b35a2..000000000 --- a/lib/pixman/pixman/pixman-transformed.c +++ /dev/null @@ -1,510 +0,0 @@ -/* - * - * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc. - * 2005 Lars Knoll & Zack Rusin, Trolltech - * 2008 Aaron Plattner, NVIDIA Corporation - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Keith Packard not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Keith Packard makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#include <stdlib.h> - -#include "pixman-private.h" - -#define Alpha(x) ((x) >> 24) -#define Red(x) (((x) >> 16) & 0xff) -#define Green(x) (((x) >> 8) & 0xff) -#define Blue(x) ((x) & 0xff) - -#define Alpha64(x) ((x) >> 48) -#define Red64(x) (((x) >> 32) & 0xffff) -#define Green64(x) (((x) >> 16) & 0xffff) -#define Blue64(x) ((x) & 0xffff) - -/* - * Fetch from region strategies - */ -typedef FASTCALL uint32_t (*fetchFromRegionProc)(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc32 fetch, pixman_box32_t *box); - -/* - * There are two properties we can make use of when fetching pixels - * - * (a) Is the source clip just the image itself? - * - * (b) Do we know the coordinates of the pixel to fetch are - * within the image boundaries; - * - * Source clips are almost never used, so the important case to optimize - * for is when src_clip is false. Since inside_bounds is statically known, - * the last part of the if statement will normally be optimized away. - */ -static force_inline uint32_t -do_fetch (bits_image_t *pict, int x, int y, fetchPixelProc32 fetch, - pixman_bool_t src_clip, - pixman_bool_t inside_bounds) -{ - if (src_clip) - { - if (pixman_region32_contains_point (pict->common.src_clip, x, y,NULL)) - return fetch (pict, x, y); - else - return 0; - } - else if (inside_bounds) - { - return fetch (pict, x, y); - } - else - { - if (x >= 0 && x < pict->width && y >= 0 && y < pict->height) - return fetch (pict, x, y); - else - return 0; - } -} - -/* - * Fetching Algorithms - */ -static inline uint32_t -fetch_nearest (bits_image_t *pict, - fetchPixelProc32 fetch, - pixman_bool_t affine, - pixman_repeat_t repeat, - pixman_bool_t has_src_clip, - const pixman_vector_t *v) -{ - if (!v->vector[2]) - { - return 0; - } - else - { - int x, y; - pixman_bool_t inside_bounds; - - if (!affine) - { - x = DIV(v->vector[0], v->vector[2]); - y = DIV(v->vector[1], v->vector[2]); - } - else - { - x = v->vector[0]>>16; - y = v->vector[1]>>16; - } - - switch (repeat) - { - case PIXMAN_REPEAT_NORMAL: - x = MOD (x, pict->width); - y = MOD (y, pict->height); - inside_bounds = TRUE; - break; - - case PIXMAN_REPEAT_PAD: - x = CLIP (x, 0, pict->width-1); - y = CLIP (y, 0, pict->height-1); - inside_bounds = TRUE; - break; - - case PIXMAN_REPEAT_REFLECT: - x = MOD (x, pict->width * 2); - if (x >= pict->width) - x = pict->width * 2 - x - 1; - y = MOD (y, pict->height * 2); - if (y >= pict->height) - y = pict->height * 2 - y - 1; - inside_bounds = TRUE; - break; - - case PIXMAN_REPEAT_NONE: - inside_bounds = FALSE; - break; - - default: - return 0; - } - - return do_fetch (pict, x, y, fetch, has_src_clip, inside_bounds); - } -} - -static inline uint32_t -fetch_bilinear (bits_image_t *pict, - fetchPixelProc32 fetch, - pixman_bool_t affine, - pixman_repeat_t repeat, - pixman_bool_t has_src_clip, - const pixman_vector_t *v) -{ - if (!v->vector[2]) - { - return 0; - } - else - { - int x1, x2, y1, y2, distx, idistx, disty, idisty; - uint32_t tl, tr, bl, br, r; - uint32_t ft, fb; - pixman_bool_t inside_bounds; - - if (!affine) - { - pixman_fixed_48_16_t div; - div = ((pixman_fixed_48_16_t)v->vector[0] << 16)/v->vector[2]; - x1 = div >> 16; - distx = ((pixman_fixed_t)div >> 8) & 0xff; - div = ((pixman_fixed_48_16_t)v->vector[1] << 16)/v->vector[2]; - y1 = div >> 16; - disty = ((pixman_fixed_t)div >> 8) & 0xff; - } - else - { - x1 = v->vector[0] >> 16; - distx = (v->vector[0] >> 8) & 0xff; - y1 = v->vector[1] >> 16; - disty = (v->vector[1] >> 8) & 0xff; - } - x2 = x1 + 1; - y2 = y1 + 1; - - idistx = 256 - distx; - idisty = 256 - disty; - - switch (repeat) - { - case PIXMAN_REPEAT_NORMAL: - x1 = MOD (x1, pict->width); - x2 = MOD (x2, pict->width); - y1 = MOD (y1, pict->height); - y2 = MOD (y2, pict->height); - inside_bounds = TRUE; - break; - - case PIXMAN_REPEAT_PAD: - x1 = CLIP (x1, 0, pict->width-1); - x2 = CLIP (x2, 0, pict->width-1); - y1 = CLIP (y1, 0, pict->height-1); - y2 = CLIP (y2, 0, pict->height-1); - inside_bounds = TRUE; - break; - - case PIXMAN_REPEAT_REFLECT: - x1 = MOD (x1, pict->width * 2); - if (x1 >= pict->width) - x1 = pict->width * 2 - x1 - 1; - x2 = MOD (x2, pict->width * 2); - if (x2 >= pict->width) - x2 = pict->width * 2 - x2 - 1; - y1 = MOD (y1, pict->height * 2); - if (y1 >= pict->height) - y1 = pict->height * 2 - y1 - 1; - y2 = MOD (y2, pict->height * 2); - if (y2 >= pict->height) - y2 = pict->height * 2 - y2 - 1; - inside_bounds = TRUE; - break; - - case PIXMAN_REPEAT_NONE: - inside_bounds = FALSE; - break; - - default: - return 0; - } - - tl = do_fetch(pict, x1, y1, fetch, has_src_clip, inside_bounds); - tr = do_fetch(pict, x2, y1, fetch, has_src_clip, inside_bounds); - bl = do_fetch(pict, x1, y2, fetch, has_src_clip, inside_bounds); - br = do_fetch(pict, x2, y2, fetch, has_src_clip, inside_bounds); - - ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx; - fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx; - r = (((ft * idisty + fb * disty) >> 16) & 0xff); - ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx; - fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx; - r |= (((ft * idisty + fb * disty) >> 8) & 0xff00); - ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx; - fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx; - r |= (((ft * idisty + fb * disty)) & 0xff0000); - ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx; - fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx; - r |= (((ft * idisty + fb * disty) << 8) & 0xff000000); - - return r; - } -} - -static void -fbFetchTransformed_Convolution(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, - pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit) -{ - fetchPixelProc32 fetch; - int i; - - pixman_fixed_t *params = pict->common.filter_params; - int32_t cwidth = pixman_fixed_to_int(params[0]); - int32_t cheight = pixman_fixed_to_int(params[1]); - int xoff = (params[0] - pixman_fixed_1) >> 1; - int yoff = (params[1] - pixman_fixed_1) >> 1; - fetch = ACCESS(pixman_fetchPixelProcForPicture32)(pict); - - params += 2; - for (i = 0; i < width; ++i) { - if (!mask || mask[i] & maskBits) - { - if (!v.vector[2]) { - *(buffer + i) = 0; - } else { - int x1, x2, y1, y2, x, y; - int32_t srtot, sgtot, sbtot, satot; - pixman_fixed_t *p = params; - - if (!affine) { - pixman_fixed_48_16_t tmp; - tmp = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2] - xoff; - x1 = pixman_fixed_to_int(tmp); - tmp = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2] - yoff; - y1 = pixman_fixed_to_int(tmp); - } else { - x1 = pixman_fixed_to_int(v.vector[0] - xoff); - y1 = pixman_fixed_to_int(v.vector[1] - yoff); - } - x2 = x1 + cwidth; - y2 = y1 + cheight; - - srtot = sgtot = sbtot = satot = 0; - - for (y = y1; y < y2; y++) { - int ty; - switch (pict->common.repeat) { - case PIXMAN_REPEAT_NORMAL: - ty = MOD (y, pict->height); - break; - case PIXMAN_REPEAT_PAD: - ty = CLIP (y, 0, pict->height-1); - break; - case PIXMAN_REPEAT_REFLECT: - ty = MOD (y, pict->height * 2); - if (ty >= pict->height) - ty = pict->height * 2 - ty - 1; - break; - default: - ty = y; - } - for (x = x1; x < x2; x++) { - if (*p) { - int tx; - switch (pict->common.repeat) { - case PIXMAN_REPEAT_NORMAL: - tx = MOD (x, pict->width); - break; - case PIXMAN_REPEAT_PAD: - tx = CLIP (x, 0, pict->width-1); - break; - case PIXMAN_REPEAT_REFLECT: - tx = MOD (x, pict->width * 2); - if (tx >= pict->width) - tx = pict->width * 2 - tx - 1; - break; - default: - tx = x; - } - if (pixman_region32_contains_point (pict->common.src_clip, tx, ty, NULL)) { - uint32_t c = fetch(pict, tx, ty); - - srtot += Red(c) * *p; - sgtot += Green(c) * *p; - sbtot += Blue(c) * *p; - satot += Alpha(c) * *p; - } - } - p++; - } - } - - satot >>= 16; - srtot >>= 16; - sgtot >>= 16; - sbtot >>= 16; - - if (satot < 0) satot = 0; else if (satot > 0xff) satot = 0xff; - if (srtot < 0) srtot = 0; else if (srtot > 0xff) srtot = 0xff; - if (sgtot < 0) sgtot = 0; else if (sgtot > 0xff) sgtot = 0xff; - if (sbtot < 0) sbtot = 0; else if (sbtot > 0xff) sbtot = 0xff; - - *(buffer + i) = ((satot << 24) | - (srtot << 16) | - (sgtot << 8) | - (sbtot )); - } - } - v.vector[0] += unit.vector[0]; - v.vector[1] += unit.vector[1]; - v.vector[2] += unit.vector[2]; - } -} - -static void -adjust (pixman_vector_t *v, pixman_vector_t *u, pixman_fixed_t adjustment) -{ - int delta_v = (adjustment * v->vector[2]) >> 16; - int delta_u = (adjustment * u->vector[2]) >> 16; - - v->vector[0] += delta_v; - v->vector[1] += delta_v; - - u->vector[0] += delta_u; - u->vector[1] += delta_u; -} - -void -ACCESS(fbFetchTransformed)(bits_image_t * pict, int x, int y, int width, - uint32_t *buffer, uint32_t *mask, uint32_t maskBits) -{ - uint32_t *bits; - int32_t stride; - pixman_vector_t v; - pixman_vector_t unit; - pixman_bool_t affine = TRUE; - - bits = pict->bits; - stride = pict->rowstride; - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - /* when using convolution filters or PIXMAN_REPEAT_PAD one might get here without a transform */ - if (pict->common.transform) - { - if (!pixman_transform_point_3d (pict->common.transform, &v)) - return; - unit.vector[0] = pict->common.transform->matrix[0][0]; - unit.vector[1] = pict->common.transform->matrix[1][0]; - unit.vector[2] = pict->common.transform->matrix[2][0]; - - affine = (v.vector[2] == pixman_fixed_1 && unit.vector[2] == 0); - } - else - { - unit.vector[0] = pixman_fixed_1; - unit.vector[1] = 0; - unit.vector[2] = 0; - } - - if (pict->common.filter == PIXMAN_FILTER_NEAREST || pict->common.filter == PIXMAN_FILTER_FAST) - { - fetchPixelProc32 fetch; - pixman_bool_t src_clip; - int i; - - /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ - adjust (&v, &unit, - pixman_fixed_e); - - fetch = ACCESS(pixman_fetchPixelProcForPicture32)(pict); - - src_clip = pict->common.src_clip != &(pict->common.full_region); - - for ( i = 0; i < width; ++i) - { - if (!mask || mask[i] & maskBits) - *(buffer + i) = fetch_nearest (pict, fetch, affine, pict->common.repeat, src_clip, &v); - - v.vector[0] += unit.vector[0]; - v.vector[1] += unit.vector[1]; - v.vector[2] += unit.vector[2]; - } - } - else if (pict->common.filter == PIXMAN_FILTER_BILINEAR || - pict->common.filter == PIXMAN_FILTER_GOOD || - pict->common.filter == PIXMAN_FILTER_BEST) - { - pixman_bool_t src_clip; - fetchPixelProc32 fetch; - int i; - - /* Let the bilinear code pretend that pixels fall on integer coordinaters */ - adjust (&v, &unit, -(pixman_fixed_1 / 2)); - - fetch = ACCESS(pixman_fetchPixelProcForPicture32)(pict); - src_clip = pict->common.src_clip != &(pict->common.full_region); - - for (i = 0; i < width; ++i) - { - if (!mask || mask[i] & maskBits) - *(buffer + i) = fetch_bilinear (pict, fetch, affine, pict->common.repeat, src_clip, &v); - - v.vector[0] += unit.vector[0]; - v.vector[1] += unit.vector[1]; - v.vector[2] += unit.vector[2]; - } - } - else if (pict->common.filter == PIXMAN_FILTER_CONVOLUTION) - { - /* Round to closest integer, ensuring that 0.5 rounds to 0, not 1 */ - adjust (&v, &unit, - pixman_fixed_e); - - fbFetchTransformed_Convolution(pict, width, buffer, mask, maskBits, affine, v, unit); - } -} - -#define SCANLINE_BUFFER_LENGTH 2048 - -void -ACCESS(fbFetchExternalAlpha)(bits_image_t * pict, int x, int y, int width, - uint32_t *buffer, uint32_t *mask, - uint32_t maskBits) -{ - int i; - uint32_t _alpha_buffer[SCANLINE_BUFFER_LENGTH]; - uint32_t *alpha_buffer = _alpha_buffer; - - if (!pict->common.alpha_map) { - ACCESS(fbFetchTransformed) (pict, x, y, width, buffer, mask, maskBits); - return; - } - if (width > SCANLINE_BUFFER_LENGTH) - alpha_buffer = (uint32_t *) pixman_malloc_ab (width, sizeof(uint32_t)); - - ACCESS(fbFetchTransformed)(pict, x, y, width, buffer, mask, maskBits); - ACCESS(fbFetchTransformed)((bits_image_t *)pict->common.alpha_map, x - pict->common.alpha_origin.x, - y - pict->common.alpha_origin.y, width, - alpha_buffer, mask, maskBits); - for (i = 0; i < width; ++i) { - if (!mask || mask[i] & maskBits) - { - int a = alpha_buffer[i]>>24; - *(buffer + i) = (a << 24) - | (div_255(Red(*(buffer + i)) * a) << 16) - | (div_255(Green(*(buffer + i)) * a) << 8) - | (div_255(Blue(*(buffer + i)) * a)); - } - } - - if (alpha_buffer != _alpha_buffer) - free(alpha_buffer); -} diff --git a/lib/pixman/pixman/pixman-trap.c b/lib/pixman/pixman/pixman-trap.c index 28dacafcc..962cbb39e 100644 --- a/lib/pixman/pixman/pixman-trap.c +++ b/lib/pixman/pixman/pixman-trap.c @@ -27,30 +27,230 @@ #include <stdio.h> #include "pixman-private.h" -typedef uint32_t FbBits; +/* + * Compute the smallest value no less than y which is on a + * grid row + */ + +PIXMAN_EXPORT pixman_fixed_t +pixman_sample_ceil_y (pixman_fixed_t y, int n) +{ + pixman_fixed_t f = pixman_fixed_frac (y); + pixman_fixed_t i = pixman_fixed_floor (y); + + f = ((f + Y_FRAC_FIRST (n)) / STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) + + Y_FRAC_FIRST (n); + + if (f > Y_FRAC_LAST (n)) + { + if (pixman_fixed_to_int (i) == 0x7fff) + { + f = 0xffff; /* saturate */ + } + else + { + f = Y_FRAC_FIRST (n); + i += pixman_fixed_1; + } + } + return (i | f); +} + +/* + * Compute the largest value no greater than y which is on a + * grid row + */ +PIXMAN_EXPORT pixman_fixed_t +pixman_sample_floor_y (pixman_fixed_t y, + int n) +{ + pixman_fixed_t f = pixman_fixed_frac (y); + pixman_fixed_t i = pixman_fixed_floor (y); + + f = DIV (f - Y_FRAC_FIRST (n), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) + + Y_FRAC_FIRST (n); + + if (f < Y_FRAC_FIRST (n)) + { + if (pixman_fixed_to_int (i) == 0x8000) + { + f = 0; /* saturate */ + } + else + { + f = Y_FRAC_LAST (n); + i -= pixman_fixed_1; + } + } + return (i | f); +} + +/* + * Step an edge by any amount (including negative values) + */ +PIXMAN_EXPORT void +pixman_edge_step (pixman_edge_t *e, + int n) +{ + pixman_fixed_48_16_t ne; + + e->x += n * e->stepx; + + ne = e->e + n * (pixman_fixed_48_16_t) e->dx; + + if (n >= 0) + { + if (ne > 0) + { + int nx = (ne + e->dy - 1) / e->dy; + e->e = ne - nx * (pixman_fixed_48_16_t) e->dy; + e->x += nx * e->signdx; + } + } + else + { + if (ne <= -e->dy) + { + int nx = (-ne) / e->dy; + e->e = ne + nx * (pixman_fixed_48_16_t) e->dy; + e->x -= nx * e->signdx; + } + } +} + +/* + * A private routine to initialize the multi-step + * elements of an edge structure + */ +static void +_pixman_edge_multi_init (pixman_edge_t * e, + int n, + pixman_fixed_t *stepx_p, + pixman_fixed_t *dx_p) +{ + pixman_fixed_t stepx; + pixman_fixed_48_16_t ne; + + ne = n * (pixman_fixed_48_16_t) e->dx; + stepx = n * e->stepx; + + if (ne > 0) + { + int nx = ne / e->dy; + ne -= nx * e->dy; + stepx += nx * e->signdx; + } + + *dx_p = ne; + *stepx_p = stepx; +} + +/* + * Initialize one edge structure given the line endpoints and a + * starting y value + */ +PIXMAN_EXPORT void +pixman_edge_init (pixman_edge_t *e, + int n, + pixman_fixed_t y_start, + pixman_fixed_t x_top, + pixman_fixed_t y_top, + pixman_fixed_t x_bot, + pixman_fixed_t y_bot) +{ + pixman_fixed_t dx, dy; + + e->x = x_top; + e->e = 0; + dx = x_bot - x_top; + dy = y_bot - y_top; + e->dy = dy; + e->dx = 0; + + if (dy) + { + if (dx >= 0) + { + e->signdx = 1; + e->stepx = dx / dy; + e->dx = dx % dy; + e->e = -dy; + } + else + { + e->signdx = -1; + e->stepx = -(-dx / dy); + e->dx = -dx % dy; + e->e = 0; + } + + _pixman_edge_multi_init (e, STEP_Y_SMALL (n), + &e->stepx_small, &e->dx_small); + + _pixman_edge_multi_init (e, STEP_Y_BIG (n), + &e->stepx_big, &e->dx_big); + } + pixman_edge_step (e, y_start - y_top); +} + +/* + * Initialize one edge structure given a line, starting y value + * and a pixel offset for the line + */ +PIXMAN_EXPORT void +pixman_line_fixed_edge_init (pixman_edge_t * e, + int n, + pixman_fixed_t y, + const pixman_line_fixed_t *line, + int x_off, + int y_off) +{ + pixman_fixed_t x_off_fixed = pixman_int_to_fixed (x_off); + pixman_fixed_t y_off_fixed = pixman_int_to_fixed (y_off); + const pixman_point_fixed_t *top, *bot; + + if (line->p1.y <= line->p2.y) + { + top = &line->p1; + bot = &line->p2; + } + else + { + top = &line->p2; + bot = &line->p1; + } + + pixman_edge_init (e, n, y, + top->x + x_off_fixed, + top->y + y_off_fixed, + bot->x + x_off_fixed, + bot->y + y_off_fixed); +} PIXMAN_EXPORT void -pixman_add_traps (pixman_image_t * image, - int16_t x_off, - int16_t y_off, - int ntrap, - pixman_trap_t *traps) +pixman_add_traps (pixman_image_t * image, + int16_t x_off, + int16_t y_off, + int ntrap, + pixman_trap_t * traps) { - int bpp; - int width; - int height; + int bpp; + int width; + int height; - pixman_fixed_t x_off_fixed; - pixman_fixed_t y_off_fixed; - pixman_edge_t l, r; - pixman_fixed_t t, b; + pixman_fixed_t x_off_fixed; + pixman_fixed_t y_off_fixed; + pixman_edge_t l, r; + pixman_fixed_t t, b; + _pixman_image_validate (image); + width = image->bits.width; height = image->bits.height; bpp = PIXMAN_FORMAT_BPP (image->bits.format); - - x_off_fixed = pixman_int_to_fixed(x_off); - y_off_fixed = pixman_int_to_fixed(y_off); + + x_off_fixed = pixman_int_to_fixed (x_off); + y_off_fixed = pixman_int_to_fixed (y_off); while (ntrap--) { @@ -58,83 +258,82 @@ pixman_add_traps (pixman_image_t * image, if (t < 0) t = 0; t = pixman_sample_ceil_y (t, bpp); - + b = traps->bot.y + y_off_fixed; if (pixman_fixed_to_int (b) >= height) b = pixman_int_to_fixed (height) - 1; b = pixman_sample_floor_y (b, bpp); - + if (b >= t) { /* initialize edge walkers */ pixman_edge_init (&l, bpp, t, - traps->top.l + x_off_fixed, - traps->top.y + y_off_fixed, - traps->bot.l + x_off_fixed, - traps->bot.y + y_off_fixed); - + traps->top.l + x_off_fixed, + traps->top.y + y_off_fixed, + traps->bot.l + x_off_fixed, + traps->bot.y + y_off_fixed); + pixman_edge_init (&r, bpp, t, - traps->top.r + x_off_fixed, - traps->top.y + y_off_fixed, - traps->bot.r + x_off_fixed, - traps->bot.y + y_off_fixed); - + traps->top.r + x_off_fixed, + traps->top.y + y_off_fixed, + traps->bot.r + x_off_fixed, + traps->bot.y + y_off_fixed); + pixman_rasterize_edges (image, &l, &r, t, b); } + traps++; } } +#if 0 static void dump_image (pixman_image_t *image, - const char *title) + const char * title) { int i, j; - + if (!image->type == BITS) - { printf ("%s is not a regular image\n", title); - } if (!image->bits.format == PIXMAN_a8) - { printf ("%s is not an alpha mask\n", title); - } printf ("\n\n\n%s: \n", title); - + for (i = 0; i < image->bits.height; ++i) { uint8_t *line = (uint8_t *)&(image->bits.bits[i * image->bits.rowstride]); - + for (j = 0; j < image->bits.width; ++j) - printf ("%c", line[j]? '#' : ' '); + printf ("%c", line[j] ? '#' : ' '); printf ("\n"); } } +#endif PIXMAN_EXPORT void -pixman_add_trapezoids (pixman_image_t *image, - int16_t x_off, - int y_off, - int ntraps, - const pixman_trapezoid_t *traps) +pixman_add_trapezoids (pixman_image_t * image, + int16_t x_off, + int y_off, + int ntraps, + const pixman_trapezoid_t *traps) { int i; #if 0 dump_image (image, "before"); #endif - + for (i = 0; i < ntraps; ++i) { const pixman_trapezoid_t *trap = &(traps[i]); - + if (!pixman_trapezoid_valid (trap)) continue; - + pixman_rasterize_trapezoid (image, trap, x_off, y_off); } @@ -144,21 +343,23 @@ pixman_add_trapezoids (pixman_image_t *image, } PIXMAN_EXPORT void -pixman_rasterize_trapezoid (pixman_image_t * image, - const pixman_trapezoid_t *trap, - int x_off, - int y_off) +pixman_rasterize_trapezoid (pixman_image_t * image, + const pixman_trapezoid_t *trap, + int x_off, + int y_off) { - int bpp; - int width; - int height; + int bpp; + int width; + int height; - pixman_fixed_t x_off_fixed; - pixman_fixed_t y_off_fixed; - pixman_edge_t l, r; - pixman_fixed_t t, b; + pixman_fixed_t x_off_fixed; + pixman_fixed_t y_off_fixed; + pixman_edge_t l, r; + pixman_fixed_t t, b; return_if_fail (image->type == BITS); + + _pixman_image_validate (image); if (!pixman_trapezoid_valid (trap)) return; @@ -166,9 +367,10 @@ pixman_rasterize_trapezoid (pixman_image_t * image, width = image->bits.width; height = image->bits.height; bpp = PIXMAN_FORMAT_BPP (image->bits.format); - - x_off_fixed = pixman_int_to_fixed(x_off); - y_off_fixed = pixman_int_to_fixed(y_off); + + x_off_fixed = pixman_int_to_fixed (x_off); + y_off_fixed = pixman_int_to_fixed (y_off); + t = trap->top + y_off_fixed; if (t < 0) t = 0; @@ -178,7 +380,7 @@ pixman_rasterize_trapezoid (pixman_image_t * image, if (pixman_fixed_to_int (b) >= height) b = pixman_int_to_fixed (height) - 1; b = pixman_sample_floor_y (b, bpp); - + if (b >= t) { /* initialize edge walkers */ @@ -188,97 +390,3 @@ pixman_rasterize_trapezoid (pixman_image_t * image, pixman_rasterize_edges (image, &l, &r, t, b); } } - -#if 0 -static int -_GreaterY (pixman_point_fixed_t *a, pixman_point_fixed_t *b) -{ - if (a->y == b->y) - return a->x > b->x; - return a->y > b->y; -} - -/* - * Note that the definition of this function is a bit odd because - * of the X coordinate space (y increasing downwards). - */ -static int -_Clockwise (pixman_point_fixed_t *ref, pixman_point_fixed_t *a, pixman_point_fixed_t *b) -{ - pixman_point_fixed_t ad, bd; - - ad.x = a->x - ref->x; - ad.y = a->y - ref->y; - bd.x = b->x - ref->x; - bd.y = b->y - ref->y; - - return ((pixman_fixed_32_32_t) bd.y * ad.x - (pixman_fixed_32_32_t) ad.y * bd.x) < 0; -} - -/* FIXME -- this could be made more efficient */ -void -fbAddTriangles (pixman_image_t * pPicture, - int16_t x_off, - int16_t y_off, - int ntri, - xTriangle *tris) -{ - pixman_point_fixed_t *top, *left, *right, *tmp; - xTrapezoid trap; - - for (; ntri; ntri--, tris++) - { - top = &tris->p1; - left = &tris->p2; - right = &tris->p3; - if (_GreaterY (top, left)) { - tmp = left; left = top; top = tmp; - } - if (_GreaterY (top, right)) { - tmp = right; right = top; top = tmp; - } - if (_Clockwise (top, right, left)) { - tmp = right; right = left; left = tmp; - } - - /* - * Two cases: - * - * + + - * / \ / \ - * / \ / \ - * / + + \ - * / -- -- \ - * / -- -- \ - * / --- --- \ - * +-- --+ - */ - - trap.top = top->y; - trap.left.p1 = *top; - trap.left.p2 = *left; - trap.right.p1 = *top; - trap.right.p2 = *right; - if (right->y < left->y) - trap.bottom = right->y; - else - trap.bottom = left->y; - fbRasterizeTrapezoid (pPicture, &trap, x_off, y_off); - if (right->y < left->y) - { - trap.top = right->y; - trap.bottom = left->y; - trap.right.p1 = *right; - trap.right.p2 = *left; - } - else - { - trap.top = left->y; - trap.bottom = right->y; - trap.left.p1 = *left; - trap.left.p2 = *right; - } - fbRasterizeTrapezoid (pPicture, &trap, x_off, y_off); - } -} -#endif diff --git a/lib/pixman/pixman/pixman-utils.c b/lib/pixman/pixman/pixman-utils.c index ffb14445e..71282062c 100644 --- a/lib/pixman/pixman/pixman-utils.c +++ b/lib/pixman/pixman/pixman-utils.c @@ -1,5 +1,6 @@ /* * Copyright © 2000 SuSE, Inc. + * Copyright © 1999 Keith Packard * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that @@ -24,210 +25,233 @@ #ifdef HAVE_CONFIG_H #include <config.h> #endif - +#include <stdio.h> #include <stdlib.h> #include "pixman-private.h" /* - * Compute the smallest value no less than y which is on a - * grid row + * Computing composite region */ +#define BOUND(v) (int16_t) ((v) < INT16_MIN ? INT16_MIN : (v) > INT16_MAX ? INT16_MAX : (v)) -PIXMAN_EXPORT pixman_fixed_t -pixman_sample_ceil_y (pixman_fixed_t y, int n) +static inline pixman_bool_t +clip_general_image (pixman_region32_t * region, + pixman_region32_t * clip, + int dx, + int dy) { - pixman_fixed_t f = pixman_fixed_frac(y); - pixman_fixed_t i = pixman_fixed_floor(y); - - f = ((f + Y_FRAC_FIRST(n)) / STEP_Y_SMALL(n)) * STEP_Y_SMALL(n) + Y_FRAC_FIRST(n); - if (f > Y_FRAC_LAST(n)) + if (pixman_region32_n_rects (region) == 1 && + pixman_region32_n_rects (clip) == 1) { - if (pixman_fixed_to_int(i) == 0x7fff) + pixman_box32_t * rbox = pixman_region32_rectangles (region, NULL); + pixman_box32_t * cbox = pixman_region32_rectangles (clip, NULL); + int v; + + if (rbox->x1 < (v = cbox->x1 + dx)) + rbox->x1 = BOUND (v); + if (rbox->x2 > (v = cbox->x2 + dx)) + rbox->x2 = BOUND (v); + if (rbox->y1 < (v = cbox->y1 + dy)) + rbox->y1 = BOUND (v); + if (rbox->y2 > (v = cbox->y2 + dy)) + rbox->y2 = BOUND (v); + if (rbox->x1 >= rbox->x2 || + rbox->y1 >= rbox->y2) { - f = 0xffff; /* saturate */ - } else { - f = Y_FRAC_FIRST(n); - i += pixman_fixed_1; + pixman_region32_init (region); } } - return (i | f); + else if (!pixman_region32_not_empty (clip)) + { + return FALSE; + } + else + { + if (dx || dy) + pixman_region32_translate (region, -dx, -dy); + if (!pixman_region32_intersect (region, region, clip)) + return FALSE; + if (dx || dy) + pixman_region32_translate (region, dx, dy); + } + return pixman_region32_not_empty (region); } -#define _div(a,b) ((a) >= 0 ? (a) / (b) : -((-(a) + (b) - 1) / (b))) - -/* - * Compute the largest value no greater than y which is on a - * grid row - */ -PIXMAN_EXPORT pixman_fixed_t -pixman_sample_floor_y (pixman_fixed_t y, int n) +static inline pixman_bool_t +clip_source_image (pixman_region32_t * region, + pixman_image_t * image, + int dx, + int dy) { - pixman_fixed_t f = pixman_fixed_frac(y); - pixman_fixed_t i = pixman_fixed_floor (y); + /* Source clips are ignored, unless they are explicitly turned on + * and the clip in question was set by an X client. (Because if + * the clip was not set by a client, then it is a hierarchy + * clip and those should always be ignored for sources). + */ + if (!image->common.clip_sources || !image->common.client_clip) + return TRUE; - f = _div(f - Y_FRAC_FIRST(n), STEP_Y_SMALL(n)) * STEP_Y_SMALL(n) + Y_FRAC_FIRST(n); - if (f < Y_FRAC_FIRST(n)) - { - if (pixman_fixed_to_int(i) == 0x8000) - { - f = 0; /* saturate */ - } else { - f = Y_FRAC_LAST(n); - i -= pixman_fixed_1; - } - } - return (i | f); + return clip_general_image (region, + &image->common.clip_region, + dx, dy); } /* - * Step an edge by any amount (including negative values) + * returns FALSE if the final region is empty. Indistinguishable from + * an allocation failure, but rendering ignores those anyways. */ -PIXMAN_EXPORT void -pixman_edge_step (pixman_edge_t *e, int n) +static pixman_bool_t +pixman_compute_composite_region32 (pixman_region32_t * region, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int16_t src_x, + int16_t src_y, + int16_t mask_x, + int16_t mask_y, + int16_t dest_x, + int16_t dest_y, + uint16_t width, + uint16_t height) { - pixman_fixed_48_16_t ne; + int v; + + region->extents.x1 = dest_x; + v = dest_x + width; + region->extents.x2 = BOUND (v); + region->extents.y1 = dest_y; + v = dest_y + height; + region->extents.y2 = BOUND (v); - e->x += n * e->stepx; + region->extents.x1 = MAX (region->extents.x1, 0); + region->extents.y1 = MAX (region->extents.y1, 0); + region->extents.x2 = MIN (region->extents.x2, dst_image->bits.width); + region->extents.y2 = MIN (region->extents.y2, dst_image->bits.height); - ne = e->e + n * (pixman_fixed_48_16_t) e->dx; + region->data = 0; - if (n >= 0) + /* Check for empty operation */ + if (region->extents.x1 >= region->extents.x2 || + region->extents.y1 >= region->extents.y2) { - if (ne > 0) + pixman_region32_init (region); + return FALSE; + } + + if (dst_image->common.have_clip_region) + { + if (!clip_general_image (region, &dst_image->common.clip_region, 0, 0)) { - int nx = (ne + e->dy - 1) / e->dy; - e->e = ne - nx * (pixman_fixed_48_16_t) e->dy; - e->x += nx * e->signdx; + pixman_region32_fini (region); + return FALSE; } } - else + + if (dst_image->common.alpha_map && dst_image->common.alpha_map->common.have_clip_region) { - if (ne <= -e->dy) + if (!clip_general_image (region, &dst_image->common.alpha_map->common.clip_region, + -dst_image->common.alpha_origin_x, + -dst_image->common.alpha_origin_y)) { - int nx = (-ne) / e->dy; - e->e = ne + nx * (pixman_fixed_48_16_t) e->dy; - e->x -= nx * e->signdx; + pixman_region32_fini (region); + return FALSE; } } -} - -/* - * A private routine to initialize the multi-step - * elements of an edge structure - */ -static void -_pixman_edge_multi_init (pixman_edge_t *e, int n, pixman_fixed_t *stepx_p, pixman_fixed_t *dx_p) -{ - pixman_fixed_t stepx; - pixman_fixed_48_16_t ne; - ne = n * (pixman_fixed_48_16_t) e->dx; - stepx = n * e->stepx; - if (ne > 0) + /* clip against src */ + if (src_image->common.have_clip_region) { - int nx = ne / e->dy; - ne -= nx * e->dy; - stepx += nx * e->signdx; + if (!clip_source_image (region, src_image, dest_x - src_x, dest_y - src_y)) + { + pixman_region32_fini (region); + return FALSE; + } } - *dx_p = ne; - *stepx_p = stepx; -} - -/* - * Initialize one edge structure given the line endpoints and a - * starting y value - */ -PIXMAN_EXPORT void -pixman_edge_init (pixman_edge_t *e, - int n, - pixman_fixed_t y_start, - pixman_fixed_t x_top, - pixman_fixed_t y_top, - pixman_fixed_t x_bot, - pixman_fixed_t y_bot) -{ - pixman_fixed_t dx, dy; - - e->x = x_top; - e->e = 0; - dx = x_bot - x_top; - dy = y_bot - y_top; - e->dy = dy; - e->dx = 0; - if (dy) + if (src_image->common.alpha_map && src_image->common.alpha_map->common.have_clip_region) { - if (dx >= 0) + if (!clip_source_image (region, (pixman_image_t *)src_image->common.alpha_map, + dest_x - (src_x - src_image->common.alpha_origin_x), + dest_y - (src_y - src_image->common.alpha_origin_y))) { - e->signdx = 1; - e->stepx = dx / dy; - e->dx = dx % dy; - e->e = -dy; + pixman_region32_fini (region); + return FALSE; } - else + } + /* clip against mask */ + if (mask_image && mask_image->common.have_clip_region) + { + if (!clip_source_image (region, mask_image, dest_x - mask_x, dest_y - mask_y)) { - e->signdx = -1; - e->stepx = -(-dx / dy); - e->dx = -dx % dy; - e->e = 0; + pixman_region32_fini (region); + return FALSE; + } + if (mask_image->common.alpha_map && mask_image->common.alpha_map->common.have_clip_region) + { + if (!clip_source_image (region, (pixman_image_t *)mask_image->common.alpha_map, + dest_x - (mask_x - mask_image->common.alpha_origin_x), + dest_y - (mask_y - mask_image->common.alpha_origin_y))) + { + pixman_region32_fini (region); + return FALSE; + } } - - _pixman_edge_multi_init (e, STEP_Y_SMALL(n), &e->stepx_small, &e->dx_small); - _pixman_edge_multi_init (e, STEP_Y_BIG(n), &e->stepx_big, &e->dx_big); } - pixman_edge_step (e, y_start - y_top); + + return TRUE; } -/* - * Initialize one edge structure given a line, starting y value - * and a pixel offset for the line - */ -PIXMAN_EXPORT void -pixman_line_fixed_edge_init (pixman_edge_t *e, - int n, - pixman_fixed_t y, - const pixman_line_fixed_t *line, - int x_off, - int y_off) +PIXMAN_EXPORT pixman_bool_t +pixman_compute_composite_region (pixman_region16_t * region, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int16_t src_x, + int16_t src_y, + int16_t mask_x, + int16_t mask_y, + int16_t dest_x, + int16_t dest_y, + uint16_t width, + uint16_t height) { - pixman_fixed_t x_off_fixed = pixman_int_to_fixed(x_off); - pixman_fixed_t y_off_fixed = pixman_int_to_fixed(y_off); - const pixman_point_fixed_t *top, *bot; + pixman_region32_t r32; + pixman_bool_t retval; - if (line->p1.y <= line->p2.y) - { - top = &line->p1; - bot = &line->p2; - } - else + pixman_region32_init (&r32); + + retval = pixman_compute_composite_region32 ( + &r32, src_image, mask_image, dst_image, + src_x, src_y, mask_x, mask_y, dest_x, dest_y, + width, height); + + if (retval) { - top = &line->p2; - bot = &line->p1; + if (!pixman_region16_copy_from_region32 (region, &r32)) + retval = FALSE; } - pixman_edge_init (e, n, y, - top->x + x_off_fixed, - top->y + y_off_fixed, - bot->x + x_off_fixed, - bot->y + y_off_fixed); + + pixman_region32_fini (&r32); + return retval; } pixman_bool_t pixman_multiply_overflows_int (unsigned int a, - unsigned int b) + unsigned int b) { return a >= INT32_MAX / b; } pixman_bool_t pixman_addition_overflows_int (unsigned int a, - unsigned int b) + unsigned int b) { return a > INT32_MAX - b; } void * -pixman_malloc_ab(unsigned int a, - unsigned int b) +pixman_malloc_ab (unsigned int a, + unsigned int b) { if (a >= INT32_MAX / b) return NULL; @@ -237,8 +261,8 @@ pixman_malloc_ab(unsigned int a, void * pixman_malloc_abc (unsigned int a, - unsigned int b, - unsigned int c) + unsigned int b, + unsigned int c) { if (a >= INT32_MAX / b) return NULL; @@ -248,286 +272,233 @@ pixman_malloc_abc (unsigned int a, return malloc (a * b * c); } - -/** - * pixman_version: - * - * Returns the version of the pixman library encoded in a single - * integer as per %PIXMAN_VERSION_ENCODE. The encoding ensures that - * later versions compare greater than earlier versions. - * - * A run-time comparison to check that pixman's version is greater than - * or equal to version X.Y.Z could be performed as follows: - * - * <informalexample><programlisting> - * if (pixman_version() >= PIXMAN_VERSION_ENCODE(X,Y,Z)) {...} - * </programlisting></informalexample> - * - * See also pixman_version_string() as well as the compile-time - * equivalents %PIXMAN_VERSION and %PIXMAN_VERSION_STRING. - * - * Return value: the encoded version. - **/ -PIXMAN_EXPORT int -pixman_version (void) +/* + * Helper routine to expand a color component from 0 < n <= 8 bits to 16 + * bits by replication. + */ +static inline uint64_t +expand16 (const uint8_t val, int nbits) { - return PIXMAN_VERSION; -} + /* Start out with the high bit of val in the high bit of result. */ + uint16_t result = (uint16_t)val << (16 - nbits); -/** - * pixman_version_string: - * - * Returns the version of the pixman library as a human-readable string - * of the form "X.Y.Z". - * - * See also pixman_version() as well as the compile-time equivalents - * %PIXMAN_VERSION_STRING and %PIXMAN_VERSION. - * - * Return value: a string containing the version. - **/ -PIXMAN_EXPORT const char* -pixman_version_string (void) -{ - return PIXMAN_VERSION_STRING; -} + if (nbits == 0) + return 0; -/** - * pixman_format_supported_destination: - * @format: A pixman_format_code_t format - * - * Return value: whether the provided format code is a supported - * format for a pixman surface used as a destination in - * rendering. - * - * Currently, all pixman_format_code_t values are supported - * except for the YUV formats. - **/ -PIXMAN_EXPORT pixman_bool_t -pixman_format_supported_destination (pixman_format_code_t format) -{ - switch (format) { - /* 32 bpp formats */ - case PIXMAN_a2b10g10r10: - case PIXMAN_x2b10g10r10: - case PIXMAN_a8r8g8b8: - case PIXMAN_x8r8g8b8: - case PIXMAN_a8b8g8r8: - case PIXMAN_x8b8g8r8: - case PIXMAN_b8g8r8a8: - case PIXMAN_b8g8r8x8: - case PIXMAN_r8g8b8: - case PIXMAN_b8g8r8: - case PIXMAN_r5g6b5: - case PIXMAN_b5g6r5: - /* 16 bpp formats */ - case PIXMAN_a1r5g5b5: - case PIXMAN_x1r5g5b5: - case PIXMAN_a1b5g5r5: - case PIXMAN_x1b5g5r5: - case PIXMAN_a4r4g4b4: - case PIXMAN_x4r4g4b4: - case PIXMAN_a4b4g4r4: - case PIXMAN_x4b4g4r4: - /* 8bpp formats */ - case PIXMAN_a8: - case PIXMAN_r3g3b2: - case PIXMAN_b2g3r3: - case PIXMAN_a2r2g2b2: - case PIXMAN_a2b2g2r2: - case PIXMAN_c8: - case PIXMAN_g8: - case PIXMAN_x4a4: - /* Collides with PIXMAN_c8 - case PIXMAN_x4c4: - */ - /* Collides with PIXMAN_g8 - case PIXMAN_x4g4: - */ - /* 4bpp formats */ - case PIXMAN_a4: - case PIXMAN_r1g2b1: - case PIXMAN_b1g2r1: - case PIXMAN_a1r1g1b1: - case PIXMAN_a1b1g1r1: - case PIXMAN_c4: - case PIXMAN_g4: - /* 1bpp formats */ - case PIXMAN_a1: - case PIXMAN_g1: - return TRUE; - - /* YUV formats */ - case PIXMAN_yuy2: - case PIXMAN_yv12: - default: - return FALSE; + /* Copy the bits in result, doubling the number of bits each time, until + * we fill all 16 bits. + */ + while (nbits < 16) + { + result |= result >> nbits; + nbits *= 2; } + + return result; } -/** - * pixman_format_supported_source: - * @format: A pixman_format_code_t format - * - * Return value: whether the provided format code is a supported - * format for a pixman surface used as a source in - * rendering. - * - * Currently, all pixman_format_code_t values are supported. - **/ -PIXMAN_EXPORT pixman_bool_t -pixman_format_supported_source (pixman_format_code_t format) +/* + * This function expands images from ARGB8 format to ARGB16. To preserve + * precision, it needs to know the original source format. For example, if the + * source was PIXMAN_x1r5g5b5 and the red component contained bits 12345, then + * the expanded value is 12345123. To correctly expand this to 16 bits, it + * should be 1234512345123451 and not 1234512312345123. + */ +void +pixman_expand (uint64_t * dst, + const uint32_t * src, + pixman_format_code_t format, + int width) { - switch (format) { - /* 32 bpp formats */ - case PIXMAN_a2b10g10r10: - case PIXMAN_x2b10g10r10: - case PIXMAN_a8r8g8b8: - case PIXMAN_x8r8g8b8: - case PIXMAN_a8b8g8r8: - case PIXMAN_x8b8g8r8: - case PIXMAN_b8g8r8a8: - case PIXMAN_b8g8r8x8: - case PIXMAN_r8g8b8: - case PIXMAN_b8g8r8: - case PIXMAN_r5g6b5: - case PIXMAN_b5g6r5: - /* 16 bpp formats */ - case PIXMAN_a1r5g5b5: - case PIXMAN_x1r5g5b5: - case PIXMAN_a1b5g5r5: - case PIXMAN_x1b5g5r5: - case PIXMAN_a4r4g4b4: - case PIXMAN_x4r4g4b4: - case PIXMAN_a4b4g4r4: - case PIXMAN_x4b4g4r4: - /* 8bpp formats */ - case PIXMAN_a8: - case PIXMAN_r3g3b2: - case PIXMAN_b2g3r3: - case PIXMAN_a2r2g2b2: - case PIXMAN_a2b2g2r2: - case PIXMAN_c8: - case PIXMAN_g8: - case PIXMAN_x4a4: - /* Collides with PIXMAN_c8 - case PIXMAN_x4c4: - */ - /* Collides with PIXMAN_g8 - case PIXMAN_x4g4: - */ - /* 4bpp formats */ - case PIXMAN_a4: - case PIXMAN_r1g2b1: - case PIXMAN_b1g2r1: - case PIXMAN_a1r1g1b1: - case PIXMAN_a1b1g1r1: - case PIXMAN_c4: - case PIXMAN_g4: - /* 1bpp formats */ - case PIXMAN_a1: - case PIXMAN_g1: - /* YUV formats */ - case PIXMAN_yuy2: - case PIXMAN_yv12: - return TRUE; - - default: - return FALSE; + /* + * Determine the sizes of each component and the masks and shifts + * required to extract them from the source pixel. + */ + const int a_size = PIXMAN_FORMAT_A (format), + r_size = PIXMAN_FORMAT_R (format), + g_size = PIXMAN_FORMAT_G (format), + b_size = PIXMAN_FORMAT_B (format); + const int a_shift = 32 - a_size, + r_shift = 24 - r_size, + g_shift = 16 - g_size, + b_shift = 8 - b_size; + const uint8_t a_mask = ~(~0 << a_size), + r_mask = ~(~0 << r_size), + g_mask = ~(~0 << g_size), + b_mask = ~(~0 << b_size); + int i; + + /* Start at the end so that we can do the expansion in place + * when src == dst + */ + for (i = width - 1; i >= 0; i--) + { + const uint32_t pixel = src[i]; + const uint8_t a = (pixel >> a_shift) & a_mask, + r = (pixel >> r_shift) & r_mask, + g = (pixel >> g_shift) & g_mask, + b = (pixel >> b_shift) & b_mask; + const uint64_t a16 = a_size ? expand16 (a, a_size) : 0xffff, + r16 = expand16 (r, r_size), + g16 = expand16 (g, g_size), + b16 = expand16 (b, b_size); + + dst[i] = a16 << 48 | r16 << 32 | g16 << 16 | b16; } } +/* + * Contracting is easier than expanding. We just need to truncate the + * components. + */ void -_pixman_walk_composite_region (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height, - pixman_bool_t srcRepeat, - pixman_bool_t maskRepeat, - pixman_composite_func_t compositeRect) +pixman_contract (uint32_t * dst, + const uint64_t *src, + int width) { - int n; - const pixman_box32_t *pbox; - int w, h, w_this, h_this; - int x_msk, y_msk, x_src, y_src, x_dst, y_dst; - pixman_region32_t reg; - pixman_region32_t *region; - - pixman_region32_init (®); - if (!pixman_compute_composite_region32 (®, pSrc, pMask, pDst, - xSrc, ySrc, xMask, yMask, xDst, yDst, width, height)) + int i; + + /* Start at the beginning so that we can do the contraction in + * place when src == dst + */ + for (i = 0; i < width; i++) { - return; + const uint8_t a = src[i] >> 56, + r = src[i] >> 40, + g = src[i] >> 24, + b = src[i] >> 8; + + dst[i] = a << 24 | r << 16 | g << 8 | b; } +} - region = ® +static void +walk_region_internal (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int16_t src_x, + int16_t src_y, + int16_t mask_x, + int16_t mask_y, + int16_t dest_x, + int16_t dest_y, + uint16_t width, + uint16_t height, + pixman_bool_t src_repeat, + pixman_bool_t mask_repeat, + pixman_region32_t * region, + pixman_composite_func_t composite_rect) +{ + int n; + const pixman_box32_t *pbox; + int w, h, w_this, h_this; + int x_msk, y_msk, x_src, y_src, x_dst, y_dst; pbox = pixman_region32_rectangles (region, &n); while (n--) { h = pbox->y2 - pbox->y1; - y_src = pbox->y1 - yDst + ySrc; - y_msk = pbox->y1 - yDst + yMask; + y_src = pbox->y1 - dest_y + src_y; + y_msk = pbox->y1 - dest_y + mask_y; y_dst = pbox->y1; + while (h) { h_this = h; w = pbox->x2 - pbox->x1; - x_src = pbox->x1 - xDst + xSrc; - x_msk = pbox->x1 - xDst + xMask; + x_src = pbox->x1 - dest_x + src_x; + x_msk = pbox->x1 - dest_x + mask_x; x_dst = pbox->x1; - if (maskRepeat) + + if (mask_repeat) { - y_msk = MOD (y_msk, pMask->bits.height); - if (h_this > pMask->bits.height - y_msk) - h_this = pMask->bits.height - y_msk; + y_msk = MOD (y_msk, mask_image->bits.height); + if (h_this > mask_image->bits.height - y_msk) + h_this = mask_image->bits.height - y_msk; } - if (srcRepeat) + + if (src_repeat) { - y_src = MOD (y_src, pSrc->bits.height); - if (h_this > pSrc->bits.height - y_src) - h_this = pSrc->bits.height - y_src; + y_src = MOD (y_src, src_image->bits.height); + if (h_this > src_image->bits.height - y_src) + h_this = src_image->bits.height - y_src; } + while (w) { w_this = w; - if (maskRepeat) + + if (mask_repeat) { - x_msk = MOD (x_msk, pMask->bits.width); - if (w_this > pMask->bits.width - x_msk) - w_this = pMask->bits.width - x_msk; + x_msk = MOD (x_msk, mask_image->bits.width); + if (w_this > mask_image->bits.width - x_msk) + w_this = mask_image->bits.width - x_msk; } - if (srcRepeat) + + if (src_repeat) { - x_src = MOD (x_src, pSrc->bits.width); - if (w_this > pSrc->bits.width - x_src) - w_this = pSrc->bits.width - x_src; + x_src = MOD (x_src, src_image->bits.width); + if (w_this > src_image->bits.width - x_src) + w_this = src_image->bits.width - x_src; } - (*compositeRect) (imp, - op, pSrc, pMask, pDst, - x_src, y_src, x_msk, y_msk, x_dst, y_dst, - w_this, h_this); + + (*composite_rect) (imp, op, + src_image, mask_image, dst_image, + x_src, y_src, x_msk, y_msk, x_dst, y_dst, + w_this, h_this); w -= w_this; + x_src += w_this; x_msk += w_this; x_dst += w_this; } + h -= h_this; y_src += h_this; y_msk += h_this; y_dst += h_this; } + pbox++; } - pixman_region32_fini (®); +} + +void +_pixman_walk_composite_region (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int16_t src_x, + int16_t src_y, + int16_t mask_x, + int16_t mask_y, + int16_t dest_x, + int16_t dest_y, + uint16_t width, + uint16_t height, + pixman_composite_func_t composite_rect) +{ + pixman_region32_t region; + + pixman_region32_init (®ion); + + if (pixman_compute_composite_region32 ( + ®ion, src_image, mask_image, dst_image, + src_x, src_y, mask_x, mask_y, dest_x, dest_y, + width, height)) + { + walk_region_internal (imp, op, + src_image, mask_image, dst_image, + src_x, src_y, mask_x, mask_y, dest_x, dest_y, + width, height, FALSE, FALSE, + ®ion, + composite_rect); + + pixman_region32_fini (®ion); + } } static pixman_bool_t @@ -537,9 +508,9 @@ mask_is_solid (pixman_image_t *mask) return TRUE; if (mask->type == BITS && - mask->common.repeat == PIXMAN_REPEAT_NORMAL && - mask->bits.width == 1 && - mask->bits.height == 1) + mask->common.repeat == PIXMAN_REPEAT_NORMAL && + mask->bits.width == 1 && + mask->bits.height == 1) { return TRUE; } @@ -547,26 +518,28 @@ mask_is_solid (pixman_image_t *mask) return FALSE; } -static const FastPathInfo * -get_fast_path (const FastPathInfo *fast_paths, - pixman_op_t op, - pixman_image_t *pSrc, - pixman_image_t *pMask, - pixman_image_t *pDst, - pixman_bool_t is_pixbuf) +static const pixman_fast_path_t * +get_fast_path (const pixman_fast_path_t *fast_paths, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + pixman_bool_t is_pixbuf) { - const FastPathInfo *info; + const pixman_fast_path_t *info; for (info = fast_paths; info->op != PIXMAN_OP_NONE; info++) { - pixman_bool_t valid_src = FALSE; - pixman_bool_t valid_mask = FALSE; + pixman_bool_t valid_src = FALSE; + pixman_bool_t valid_mask = FALSE; if (info->op != op) continue; - if ((info->src_format == PIXMAN_solid && pixman_image_can_get_solid (pSrc)) || - (pSrc->type == BITS && info->src_format == pSrc->bits.format)) + if ((info->src_format == PIXMAN_solid && + _pixman_image_is_solid (src_image)) || + (src_image->type == BITS && + info->src_format == src_image->bits.format)) { valid_src = TRUE; } @@ -574,28 +547,29 @@ get_fast_path (const FastPathInfo *fast_paths, if (!valid_src) continue; - if ((info->mask_format == PIXMAN_null && !pMask) || - (pMask && pMask->type == BITS && info->mask_format == pMask->bits.format)) + if ((info->mask_format == PIXMAN_null && !mask_image) || + (mask_image && mask_image->type == BITS && + info->mask_format == mask_image->bits.format)) { valid_mask = TRUE; if (info->flags & NEED_SOLID_MASK) { - if (!pMask || !mask_is_solid (pMask)) + if (!mask_image || !mask_is_solid (mask_image)) valid_mask = FALSE; } if (info->flags & NEED_COMPONENT_ALPHA) { - if (!pMask || !pMask->common.component_alpha) + if (!mask_image || !mask_image->common.component_alpha) valid_mask = FALSE; } } if (!valid_mask) continue; - - if (info->dest_format != pDst->bits.format) + + if (info->dest_format != dst_image->bits.format) continue; if ((info->flags & NEED_PIXBUF) && !is_pixbuf) @@ -607,72 +581,142 @@ get_fast_path (const FastPathInfo *fast_paths, return NULL; } +static force_inline pixman_bool_t +image_covers (pixman_image_t *image, + pixman_box32_t *extents, + int x, + int y) +{ + if (image->common.type == BITS && + image->common.repeat == PIXMAN_REPEAT_NONE) + { + if (x > extents->x1 || y > extents->y1 || + x + image->bits.width < extents->x2 || + y + image->bits.height < extents->y2) + { + return FALSE; + } + } + + return TRUE; +} + +static force_inline pixman_bool_t +sources_cover (pixman_image_t *src, + pixman_image_t *mask, + pixman_box32_t *extents, + int src_x, + int src_y, + int mask_x, + int mask_y, + int dest_x, + int dest_y) +{ + if (!image_covers (src, extents, dest_x - src_x, dest_y - src_y)) + return FALSE; + + if (!mask) + return TRUE; + + if (!image_covers (mask, extents, dest_x - mask_x, dest_y - mask_y)) + return FALSE; + + return TRUE; +} + pixman_bool_t -_pixman_run_fast_path (const FastPathInfo *paths, - pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t *src, - pixman_image_t *mask, - pixman_image_t *dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +_pixman_run_fast_path (const pixman_fast_path_t *paths, + pixman_implementation_t * imp, + pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) { pixman_composite_func_t func = NULL; - pixman_bool_t src_repeat = src->common.repeat == PIXMAN_REPEAT_NORMAL; - pixman_bool_t mask_repeat = mask && mask->common.repeat == PIXMAN_REPEAT_NORMAL; - - if ((src->type == BITS || pixman_image_can_get_solid (src)) && - (!mask || mask->type == BITS) - && !src->common.transform && !(mask && mask->common.transform) - && !(mask && mask->common.alpha_map) && !src->common.alpha_map && !dest->common.alpha_map - && (src->common.filter != PIXMAN_FILTER_CONVOLUTION) - && (src->common.repeat != PIXMAN_REPEAT_PAD) - && (src->common.repeat != PIXMAN_REPEAT_REFLECT) - && (!mask || (mask->common.filter != PIXMAN_FILTER_CONVOLUTION && - mask->common.repeat != PIXMAN_REPEAT_PAD && - mask->common.repeat != PIXMAN_REPEAT_REFLECT)) - && !src->common.read_func && !src->common.write_func - && !(mask && mask->common.read_func) - && !(mask && mask->common.write_func) - && !dest->common.read_func - && !dest->common.write_func) + pixman_bool_t src_repeat = + src->common.repeat == PIXMAN_REPEAT_NORMAL; + pixman_bool_t mask_repeat = + mask && mask->common.repeat == PIXMAN_REPEAT_NORMAL; + pixman_bool_t result; + pixman_bool_t has_fast_path; + + has_fast_path = !dest->common.alpha_map && + !dest->bits.read_func && + !dest->bits.write_func; + + if (has_fast_path) + { + has_fast_path = (src->type == BITS || _pixman_image_is_solid (src)) && + !src->common.transform && + !src->common.alpha_map && + src->common.filter != PIXMAN_FILTER_CONVOLUTION && + src->common.repeat != PIXMAN_REPEAT_PAD && + src->common.repeat != PIXMAN_REPEAT_REFLECT; + if (has_fast_path && src->type == BITS) + { + has_fast_path = !src->bits.read_func && + !src->bits.write_func && + !PIXMAN_FORMAT_IS_WIDE (src->bits.format); + } + } + + if (mask && has_fast_path) + { + has_fast_path = + mask->type == BITS && + !mask->common.transform && + !mask->common.alpha_map && + !mask->bits.read_func && + !mask->bits.write_func && + mask->common.filter != PIXMAN_FILTER_CONVOLUTION && + mask->common.repeat != PIXMAN_REPEAT_PAD && + mask->common.repeat != PIXMAN_REPEAT_REFLECT && + !PIXMAN_FORMAT_IS_WIDE (mask->bits.format); + } + + if (has_fast_path) { - const FastPathInfo *info; + const pixman_fast_path_t *info; pixman_bool_t pixbuf; pixbuf = - src && src->type == BITS && - mask && mask->type == BITS && - src->bits.bits == mask->bits.bits && - src_x == mask_x && - src_y == mask_y && - !mask->common.component_alpha && + src && src->type == BITS && + mask && mask->type == BITS && + src->bits.bits == mask->bits.bits && + src_x == mask_x && + src_y == mask_y && + !mask->common.component_alpha && !mask_repeat; - + info = get_fast_path (paths, op, src, mask, dest, pixbuf); if (info) { func = info->func; - + if (info->src_format == PIXMAN_solid) src_repeat = FALSE; - if (info->mask_format == PIXMAN_solid || info->flags & NEED_SOLID_MASK) + if (info->mask_format == PIXMAN_solid || + info->flags & NEED_SOLID_MASK) + { mask_repeat = FALSE; + } - if ((src_repeat && - src->bits.width == 1 && - src->bits.height == 1) || - (mask_repeat && - mask->bits.width == 1 && - mask->bits.height == 1)) + if ((src_repeat && + src->bits.width == 1 && + src->bits.height == 1) || + (mask_repeat && + mask->bits.width == 1 && + mask->bits.height == 1)) { /* If src or mask are repeating 1x1 images and src_repeat or * mask_repeat are still TRUE, it means the fast path we @@ -687,17 +731,107 @@ _pixman_run_fast_path (const FastPathInfo *paths, } } + result = FALSE; + if (func) { - _pixman_walk_composite_region (imp, op, - src, mask, dest, - src_x, src_y, mask_x, mask_y, - dest_x, dest_y, - width, height, - src_repeat, mask_repeat, - func); - return TRUE; + pixman_region32_t region; + pixman_region32_init (®ion); + + if (pixman_compute_composite_region32 ( + ®ion, src, mask, dest, + src_x, src_y, mask_x, mask_y, dest_x, dest_y, width, height)) + { + pixman_box32_t *extents = pixman_region32_extents (®ion); + + if (sources_cover ( + src, mask, extents, + src_x, src_y, mask_x, mask_y, dest_x, dest_y)) + { + walk_region_internal (imp, op, + src, mask, dest, + src_x, src_y, mask_x, mask_y, + dest_x, dest_y, + width, height, + src_repeat, mask_repeat, + ®ion, + func); + + result = TRUE; + } + + pixman_region32_fini (®ion); + } } - - return FALSE; + + return result; +} + +#define N_TMP_BOXES (16) + +pixman_bool_t +pixman_region16_copy_from_region32 (pixman_region16_t *dst, + pixman_region32_t *src) +{ + int n_boxes, i; + pixman_box32_t *boxes32; + pixman_box16_t *boxes16; + pixman_bool_t retval; + + boxes32 = pixman_region32_rectangles (src, &n_boxes); + + boxes16 = pixman_malloc_ab (n_boxes, sizeof (pixman_box16_t)); + + if (!boxes16) + return FALSE; + + for (i = 0; i < n_boxes; ++i) + { + boxes16[i].x1 = boxes32[i].x1; + boxes16[i].y1 = boxes32[i].y1; + boxes16[i].x2 = boxes32[i].x2; + boxes16[i].y2 = boxes32[i].y2; + } + + pixman_region_fini (dst); + retval = pixman_region_init_rects (dst, boxes16, n_boxes); + free (boxes16); + return retval; +} + +pixman_bool_t +pixman_region32_copy_from_region16 (pixman_region32_t *dst, + pixman_region16_t *src) +{ + int n_boxes, i; + pixman_box16_t *boxes16; + pixman_box32_t *boxes32; + pixman_box32_t tmp_boxes[N_TMP_BOXES]; + pixman_bool_t retval; + + boxes16 = pixman_region_rectangles (src, &n_boxes); + + if (n_boxes > N_TMP_BOXES) + boxes32 = pixman_malloc_ab (n_boxes, sizeof (pixman_box32_t)); + else + boxes32 = tmp_boxes; + + if (!boxes32) + return FALSE; + + for (i = 0; i < n_boxes; ++i) + { + boxes32[i].x1 = boxes16[i].x1; + boxes32[i].y1 = boxes16[i].y1; + boxes32[i].x2 = boxes16[i].x2; + boxes32[i].y2 = boxes16[i].y2; + } + + pixman_region32_fini (dst); + retval = pixman_region32_init_rects (dst, boxes32, n_boxes); + + if (boxes32 != tmp_boxes) + free (boxes32); + + return retval; } diff --git a/lib/pixman/pixman/pixman-vmx.c b/lib/pixman/pixman/pixman-vmx.c index e371f7f52..06325a7c0 100644 --- a/lib/pixman/pixman/pixman-vmx.c +++ b/lib/pixman/pixman/pixman-vmx.c @@ -33,27 +33,31 @@ #define AVV(x...) {x} static force_inline vector unsigned int -splat_alpha (vector unsigned int pix) { +splat_alpha (vector unsigned int pix) +{ return vec_perm (pix, pix, - (vector unsigned char)AVV(0x00,0x00,0x00,0x00, 0x04,0x04,0x04,0x04, - 0x08,0x08,0x08,0x08, 0x0C,0x0C,0x0C,0x0C)); + (vector unsigned char)AVV ( + 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04, + 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C)); } static force_inline vector unsigned int pix_multiply (vector unsigned int p, vector unsigned int a) { vector unsigned short hi, lo, mod; + /* unpack to short */ hi = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV(0), - (vector unsigned char)p); + vec_mergeh ((vector unsigned char)AVV (0), + (vector unsigned char)p); + mod = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV(0), - (vector unsigned char)a); + vec_mergeh ((vector unsigned char)AVV (0), + (vector unsigned char)a); hi = vec_mladd (hi, mod, (vector unsigned short) - AVV(0x0080,0x0080,0x0080,0x0080, - 0x0080,0x0080,0x0080,0x0080)); + AVV (0x0080, 0x0080, 0x0080, 0x0080, + 0x0080, 0x0080, 0x0080, 0x0080)); hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8))); @@ -61,15 +65,15 @@ pix_multiply (vector unsigned int p, vector unsigned int a) /* unpack to short */ lo = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV(0), - (vector unsigned char)p); + vec_mergel ((vector unsigned char)AVV (0), + (vector unsigned char)p); mod = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV(0), - (vector unsigned char)a); + vec_mergel ((vector unsigned char)AVV (0), + (vector unsigned char)a); lo = vec_mladd (lo, mod, (vector unsigned short) - AVV(0x0080,0x0080,0x0080,0x0080, - 0x0080,0x0080,0x0080,0x0080)); + AVV (0x0080, 0x0080, 0x0080, 0x0080, + 0x0080, 0x0080, 0x0080, 0x0080)); lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8))); @@ -82,63 +86,21 @@ static force_inline vector unsigned int pix_add (vector unsigned int a, vector unsigned int b) { return (vector unsigned int)vec_adds ((vector unsigned char)a, - (vector unsigned char)b); + (vector unsigned char)b); } static force_inline vector unsigned int -pix_add_mul (vector unsigned int x, vector unsigned int a, - vector unsigned int y, vector unsigned int b) +pix_add_mul (vector unsigned int x, + vector unsigned int a, + vector unsigned int y, + vector unsigned int b) { - vector unsigned short hi, lo, mod, hiy, loy, mody; + vector unsigned int t1, t2; - hi = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV(0), - (vector unsigned char)x); - mod = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV(0), - (vector unsigned char)a); - hiy = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV(0), - (vector unsigned char)y); - mody = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV(0), - (vector unsigned char)b); - - hi = vec_mladd (hi, mod, (vector unsigned short) - AVV(0x0080,0x0080,0x0080,0x0080, - 0x0080,0x0080,0x0080,0x0080)); + t1 = pix_multiply (x, a); + t2 = pix_multiply (y, b); - hi = vec_mladd (hiy, mody, hi); - - hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8))); - - hi = vec_sr (hi, vec_splat_u16 (8)); - - lo = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV(0), - (vector unsigned char)x); - mod = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV(0), - (vector unsigned char)a); - - loy = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV(0), - (vector unsigned char)y); - mody = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV(0), - (vector unsigned char)b); - - lo = vec_mladd (lo, mod, (vector unsigned short) - AVV(0x0080,0x0080,0x0080,0x0080, - 0x0080,0x0080,0x0080,0x0080)); - - lo = vec_mladd (loy, mody, lo); - - lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8))); - - lo = vec_sr (lo, vec_splat_u16 (8)); - - return (vector unsigned int)vec_packsu (hi, lo); + return pix_add (t1, t2); } static force_inline vector unsigned int @@ -146,1450 +108,1536 @@ negate (vector unsigned int src) { return vec_nor (src, src); } + /* dest*~srca + src */ static force_inline vector unsigned int -over (vector unsigned int src, vector unsigned int srca, +over (vector unsigned int src, + vector unsigned int srca, vector unsigned int dest) { vector unsigned char tmp = (vector unsigned char) - pix_multiply (dest, negate (srca)); + pix_multiply (dest, negate (srca)); + tmp = vec_adds ((vector unsigned char)src, tmp); return (vector unsigned int)tmp; } /* in == pix_multiply */ -#define in_over(src, srca, mask, dest) over (pix_multiply (src, mask),\ - pix_multiply (srca, mask), dest) +#define in_over(src, srca, mask, dest) \ + over (pix_multiply (src, mask), \ + pix_multiply (srca, mask), dest) -#define COMPUTE_SHIFT_MASK(source) \ +#define COMPUTE_SHIFT_MASK(source) \ source ## _mask = vec_lvsl (0, source); -#define COMPUTE_SHIFT_MASKS(dest, source) \ - dest ## _mask = vec_lvsl (0, dest); \ - source ## _mask = vec_lvsl (0, source); \ +#define COMPUTE_SHIFT_MASKS(dest, source) \ + dest ## _mask = vec_lvsl (0, dest); \ + source ## _mask = vec_lvsl (0, source); \ store_mask = vec_lvsr (0, dest); -#define COMPUTE_SHIFT_MASKC(dest, source, mask) \ - mask ## _mask = vec_lvsl (0, mask); \ - dest ## _mask = vec_lvsl (0, dest); \ - source ## _mask = vec_lvsl (0, source); \ +#define COMPUTE_SHIFT_MASKC(dest, source, mask) \ + mask ## _mask = vec_lvsl (0, mask); \ + dest ## _mask = vec_lvsl (0, dest); \ + source ## _mask = vec_lvsl (0, source); \ store_mask = vec_lvsr (0, dest); /* notice you have to declare temp vars... * Note: tmp3 and tmp4 must remain untouched! */ -#define LOAD_VECTORS(dest, source) \ - tmp1 = (typeof(tmp1))vec_ld(0, source); \ - tmp2 = (typeof(tmp2))vec_ld(15, source); \ - tmp3 = (typeof(tmp3))vec_ld(0, dest); \ - v ## source = (typeof(v ## source)) \ - vec_perm(tmp1, tmp2, source ## _mask); \ - tmp4 = (typeof(tmp4))vec_ld(15, dest); \ - v ## dest = (typeof(v ## dest)) \ - vec_perm(tmp3, tmp4, dest ## _mask); - -#define LOAD_VECTORSC(dest, source, mask) \ - tmp1 = (typeof(tmp1))vec_ld(0, source); \ - tmp2 = (typeof(tmp2))vec_ld(15, source); \ - tmp3 = (typeof(tmp3))vec_ld(0, dest); \ - v ## source = (typeof(v ## source)) \ - vec_perm(tmp1, tmp2, source ## _mask); \ - tmp4 = (typeof(tmp4))vec_ld(15, dest); \ - tmp1 = (typeof(tmp1))vec_ld(0, mask); \ - v ## dest = (typeof(v ## dest)) \ - vec_perm(tmp3, tmp4, dest ## _mask); \ - tmp2 = (typeof(tmp2))vec_ld(15, mask); \ - v ## mask = (typeof(v ## mask)) \ - vec_perm(tmp1, tmp2, mask ## _mask); - -#define LOAD_VECTORSM(dest, source, mask) \ - LOAD_VECTORSC(dest, source, mask) \ - v ## source = pix_multiply(v ## source, \ - splat_alpha (v ## mask)); - -#define STORE_VECTOR(dest) \ - edges = vec_perm (tmp4, tmp3, dest ## _mask); \ - tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \ - tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \ - vec_st ((vector unsigned int) tmp3, 15, dest ); \ - vec_st ((vector unsigned int) tmp1, 0, dest ); - -static FASTCALL void -vmxCombineOverUnomask (uint32_t *dest, const uint32_t *src, int width) +#define LOAD_VECTORS(dest, source) \ + tmp1 = (typeof(tmp1))vec_ld (0, source); \ + tmp2 = (typeof(tmp2))vec_ld (15, source); \ + tmp3 = (typeof(tmp3))vec_ld (0, dest); \ + v ## source = (typeof(v ## source)) \ + vec_perm (tmp1, tmp2, source ## _mask); \ + tmp4 = (typeof(tmp4))vec_ld (15, dest); \ + v ## dest = (typeof(v ## dest)) \ + vec_perm (tmp3, tmp4, dest ## _mask); + +#define LOAD_VECTORSC(dest, source, mask) \ + tmp1 = (typeof(tmp1))vec_ld (0, source); \ + tmp2 = (typeof(tmp2))vec_ld (15, source); \ + tmp3 = (typeof(tmp3))vec_ld (0, dest); \ + v ## source = (typeof(v ## source)) \ + vec_perm (tmp1, tmp2, source ## _mask); \ + tmp4 = (typeof(tmp4))vec_ld (15, dest); \ + tmp1 = (typeof(tmp1))vec_ld (0, mask); \ + v ## dest = (typeof(v ## dest)) \ + vec_perm (tmp3, tmp4, dest ## _mask); \ + tmp2 = (typeof(tmp2))vec_ld (15, mask); \ + v ## mask = (typeof(v ## mask)) \ + vec_perm (tmp1, tmp2, mask ## _mask); + +#define LOAD_VECTORSM(dest, source, mask) \ + LOAD_VECTORSC (dest, source, mask) \ + v ## source = pix_multiply (v ## source, \ + splat_alpha (v ## mask)); + +#define STORE_VECTOR(dest) \ + edges = vec_perm (tmp4, tmp3, dest ## _mask); \ + tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \ + tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \ + vec_st ((vector unsigned int) tmp3, 15, dest); \ + vec_st ((vector unsigned int) tmp1, 0, dest); + +static void +vmx_combine_over_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + dest_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKS(dest, src) + COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { + for (i = width / 4; i > 0; i--) + { - LOAD_VECTORS(dest, src) + LOAD_VECTORS (dest, src); - vdest = over (vsrc, splat_alpha (vsrc), vdest); + vdest = over (vsrc, splat_alpha (vsrc), vdest); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; + src += 4; + dest += 4; } - for (i = width%4; --i >=0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t ia = Alpha (~s); + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t ia = ALPHA_8 (~s); - FbByteMulAdd (d, ia, s); - dest[i] = d; + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + + dest[i] = d; } } -static FASTCALL void -vmxCombineOverUmask (uint32_t *dest, - const uint32_t *src, - const uint32_t *mask, - int width) +static void +vmx_combine_over_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + dest_mask, src_mask, mask_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSM(dest, src, mask); + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); - vdest = over (vsrc, splat_alpha (vsrc), vdest); + vdest = over (vsrc, splat_alpha (vsrc), vdest); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t m = Alpha (mask[i]); - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t ia; + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t ia; - FbByteMul (s, m); + UN8x4_MUL_UN8 (s, m); - ia = Alpha (~s); + ia = ALPHA_8 (~s); - FbByteMulAdd (d, ia, s); - dest[i] = d; + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + dest[i] = d; } } -static FASTCALL void -vmxCombineOverU(pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, - int width) +static void +vmx_combine_over_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { if (mask) - vmxCombineOverUmask(dest, src, mask, width); + vmx_combine_over_u_mask (dest, src, mask, width); else - vmxCombineOverUnomask(dest, src, width); + vmx_combine_over_u_no_mask (dest, src, width); } -static FASTCALL void -vmxCombineOverReverseUnomask (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_over_reverse_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + dest_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKS(dest, src) + COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { + for (i = width / 4; i > 0; i--) + { - LOAD_VECTORS(dest, src) + LOAD_VECTORS (dest, src); - vdest = over (vdest, splat_alpha (vdest) , vsrc); + vdest = over (vdest, splat_alpha (vdest), vsrc); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; + src += 4; + dest += 4; } - for (i = width%4; --i >=0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t ia = Alpha (~dest[i]); + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t ia = ALPHA_8 (~dest[i]); - FbByteMulAdd (s, ia, d); - dest[i] = s; + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); + dest[i] = s; } } -static FASTCALL void -vmxCombineOverReverseUmask (uint32_t *dest, - const uint32_t *src, - const uint32_t *mask, - int width) +static void +vmx_combine_over_reverse_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + dest_mask, src_mask, mask_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { + for (i = width / 4; i > 0; i--) + { - LOAD_VECTORSM(dest, src, mask) + LOAD_VECTORSM (dest, src, mask); - vdest = over (vdest, splat_alpha (vdest) , vsrc); + vdest = over (vdest, splat_alpha (vdest), vsrc); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t m = Alpha (mask[i]); - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t ia = Alpha (~dest[i]); + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t ia = ALPHA_8 (~dest[i]); - FbByteMul (s, m); + UN8x4_MUL_UN8 (s, m); - FbByteMulAdd (s, ia, d); - dest[i] = s; + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); + dest[i] = s; } } -static FASTCALL void -vmxCombineOverReverseU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, - const uint32_t *mask, int width) +static void +vmx_combine_over_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { if (mask) - vmxCombineOverReverseUmask(dest, src, mask, width); + vmx_combine_over_reverse_u_mask (dest, src, mask, width); else - vmxCombineOverReverseUnomask(dest, src, width); + vmx_combine_over_reverse_u_no_mask (dest, src, width); } -static FASTCALL void -vmxCombineInUnomask (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_in_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + dest_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKS(dest, src) + COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS(dest, src) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORS (dest, src); - vdest = pix_multiply (vsrc, splat_alpha (vdest)); + vdest = pix_multiply (vsrc, splat_alpha (vdest)); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; + src += 4; + dest += 4; } - for (i = width%4; --i >=0;) { + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t a = ALPHA_8 (dest[i]); - uint32_t s = src[i]; - uint32_t a = Alpha (dest[i]); - FbByteMul (s, a); - dest[i] = s; + UN8x4_MUL_UN8 (s, a); + dest[i] = s; } } -static FASTCALL void -vmxCombineInUmask (uint32_t *dest, - const uint32_t *src, - const uint32_t *mask, - int width) +static void +vmx_combine_in_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + dest_mask, src_mask, mask_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSM(dest, src, mask) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); - vdest = pix_multiply (vsrc, splat_alpha (vdest)); + vdest = pix_multiply (vsrc, splat_alpha (vdest)); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t m = Alpha (mask[i]); - uint32_t s = src[i]; - uint32_t a = Alpha (dest[i]); + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t a = ALPHA_8 (dest[i]); - FbByteMul (s, m); + UN8x4_MUL_UN8 (s, m); + UN8x4_MUL_UN8 (s, a); - FbByteMul (s, a); - dest[i] = s; + dest[i] = s; } } -static FASTCALL void -vmxCombineInU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, - int width) +static void +vmx_combine_in_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { if (mask) - vmxCombineInUmask(dest, src, mask, width); + vmx_combine_in_u_mask (dest, src, mask, width); else - vmxCombineInUnomask(dest, src, width); + vmx_combine_in_u_no_mask (dest, src, width); } -static FASTCALL void -vmxCombineInReverseUnomask (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_in_reverse_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + dest_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKS(dest, src) + COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS(dest, src) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORS (dest, src); - vdest = pix_multiply (vdest, splat_alpha (vsrc)); + vdest = pix_multiply (vdest, splat_alpha (vsrc)); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; + src += 4; + dest += 4; } - for (i = width%4; --i >=0;) { - uint32_t d = dest[i]; - uint32_t a = Alpha (src[i]); - FbByteMul (d, a); - dest[i] = d; + for (i = width % 4; --i >= 0;) + { + uint32_t d = dest[i]; + uint32_t a = ALPHA_8 (src[i]); + + UN8x4_MUL_UN8 (d, a); + + dest[i] = d; } } -static FASTCALL void -vmxCombineInReverseUmask (uint32_t *dest, - const uint32_t *src, - const uint32_t *mask, - int width) +static void +vmx_combine_in_reverse_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + dest_mask, src_mask, mask_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSM(dest, src, mask) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); - vdest = pix_multiply (vdest, splat_alpha (vsrc)); + vdest = pix_multiply (vdest, splat_alpha (vsrc)); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t m = Alpha (mask[i]); - uint32_t d = dest[i]; - uint32_t a = src[i]; + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t d = dest[i]; + uint32_t a = src[i]; - FbByteMul (a, m); + UN8x4_MUL_UN8 (a, m); + a = ALPHA_8 (a); + UN8x4_MUL_UN8 (d, a); - a = Alpha (a); - FbByteMul (d, a); - dest[i] = d; + dest[i] = d; } } -static FASTCALL void -vmxCombineInReverseU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, - const uint32_t *mask, int width) +static void +vmx_combine_in_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { if (mask) - vmxCombineInReverseUmask(dest, src, mask, width); + vmx_combine_in_reverse_u_mask (dest, src, mask, width); else - vmxCombineInReverseUnomask(dest, src, width); + vmx_combine_in_reverse_u_no_mask (dest, src, width); } -static FASTCALL void -vmxCombineOutUnomask (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_out_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + dest_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKS(dest, src) + COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS(dest, src) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORS (dest, src); - vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); + vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; + src += 4; + dest += 4; } - for (i = width%4; --i >=0;) { - uint32_t s = src[i]; - uint32_t a = Alpha (~dest[i]); - FbByteMul (s, a); - dest[i] = s; + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t a = ALPHA_8 (~dest[i]); + + UN8x4_MUL_UN8 (s, a); + + dest[i] = s; } } -static FASTCALL void -vmxCombineOutUmask (uint32_t *dest, - const uint32_t *src, - const uint32_t *mask, - int width) +static void +vmx_combine_out_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + dest_mask, src_mask, mask_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSM(dest, src, mask) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); - vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); + vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t m = Alpha (mask[i]); - uint32_t s = src[i]; - uint32_t a = Alpha (~dest[i]); + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t a = ALPHA_8 (~dest[i]); - FbByteMul (s, m); + UN8x4_MUL_UN8 (s, m); + UN8x4_MUL_UN8 (s, a); - FbByteMul (s, a); - dest[i] = s; + dest[i] = s; } } -static FASTCALL void -vmxCombineOutU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, - int width) +static void +vmx_combine_out_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { if (mask) - vmxCombineOutUmask(dest, src, mask, width); + vmx_combine_out_u_mask (dest, src, mask, width); else - vmxCombineOutUnomask(dest, src, width); + vmx_combine_out_u_no_mask (dest, src, width); } -static FASTCALL void -vmxCombineOutReverseUnomask (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_out_reverse_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + dest_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKS(dest, src) + COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { + for (i = width / 4; i > 0; i--) + { - LOAD_VECTORS(dest, src) + LOAD_VECTORS (dest, src); - vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); + vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; + src += 4; + dest += 4; } - for (i = width%4; --i >=0;) { - uint32_t d = dest[i]; - uint32_t a = Alpha (~src[i]); - FbByteMul (d, a); - dest[i] = d; + for (i = width % 4; --i >= 0;) + { + uint32_t d = dest[i]; + uint32_t a = ALPHA_8 (~src[i]); + + UN8x4_MUL_UN8 (d, a); + + dest[i] = d; } } -static FASTCALL void -vmxCombineOutReverseUmask (uint32_t *dest, - const uint32_t *src, - const uint32_t *mask, - int width) +static void +vmx_combine_out_reverse_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + dest_mask, src_mask, mask_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSM(dest, src, mask) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); - vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); + vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t m = Alpha (mask[i]); - uint32_t d = dest[i]; - uint32_t a = src[i]; + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t d = dest[i]; + uint32_t a = src[i]; - FbByteMul (a, m); + UN8x4_MUL_UN8 (a, m); + a = ALPHA_8 (~a); + UN8x4_MUL_UN8 (d, a); - a = Alpha (~a); - FbByteMul (d, a); - dest[i] = d; + dest[i] = d; } } -static FASTCALL void -vmxCombineOutReverseU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, - const uint32_t *src, - const uint32_t *mask, - int width) +static void +vmx_combine_out_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { if (mask) - vmxCombineOutReverseUmask(dest, src, mask, width); + vmx_combine_out_reverse_u_mask (dest, src, mask, width); else - vmxCombineOutReverseUnomask(dest, src, width); + vmx_combine_out_reverse_u_no_mask (dest, src, width); } -static FASTCALL void -vmxCombineAtopUnomask (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_atop_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + dest_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKS(dest, src) + COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS(dest, src) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORS (dest, src); - vdest = pix_add_mul (vsrc, splat_alpha (vdest), - vdest, splat_alpha (negate (vsrc))); + vdest = pix_add_mul (vsrc, splat_alpha (vdest), + vdest, splat_alpha (negate (vsrc))); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; + src += 4; + dest += 4; } - for (i = width%4; --i >=0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t dest_a = Alpha (d); - uint32_t src_ia = Alpha (~s); + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t dest_a = ALPHA_8 (d); + uint32_t src_ia = ALPHA_8 (~s); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); - FbByteAddMul (s, dest_a, d, src_ia); - dest[i] = s; + dest[i] = s; } } -static FASTCALL void -vmxCombineAtopUmask (uint32_t *dest, - const uint32_t *src, - const uint32_t *mask, - int width) +static void +vmx_combine_atop_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + dest_mask, src_mask, mask_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSM(dest, src, mask) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); - vdest = pix_add_mul (vsrc, splat_alpha (vdest), - vdest, splat_alpha (negate (vsrc))); + vdest = pix_add_mul (vsrc, splat_alpha (vdest), + vdest, splat_alpha (negate (vsrc))); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t m = Alpha (mask[i]); - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t dest_a = Alpha (d); - uint32_t src_ia; + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t dest_a = ALPHA_8 (d); + uint32_t src_ia; + + UN8x4_MUL_UN8 (s, m); - FbByteMul (s, m); + src_ia = ALPHA_8 (~s); - src_ia = Alpha (~s); + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); - FbByteAddMul (s, dest_a, d, src_ia); - dest[i] = s; + dest[i] = s; } } -static FASTCALL void -vmxCombineAtopU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, - const uint32_t *src, - const uint32_t *mask, - int width) +static void +vmx_combine_atop_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { if (mask) - vmxCombineAtopUmask(dest, src, mask, width); + vmx_combine_atop_u_mask (dest, src, mask, width); else - vmxCombineAtopUnomask(dest, src, width); + vmx_combine_atop_u_no_mask (dest, src, width); } -static FASTCALL void -vmxCombineAtopReverseUnomask (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_atop_reverse_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + dest_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKS(dest, src) + COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS(dest, src) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORS (dest, src); - vdest = pix_add_mul (vdest, splat_alpha (vsrc), - vsrc, splat_alpha (negate (vdest))); + vdest = pix_add_mul (vdest, splat_alpha (vsrc), + vsrc, splat_alpha (negate (vdest))); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; + src += 4; + dest += 4; } - for (i = width%4; --i >=0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t src_a = Alpha (s); - uint32_t dest_ia = Alpha (~d); + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t src_a = ALPHA_8 (s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); - FbByteAddMul (s, dest_ia, d, src_a); - dest[i] = s; + dest[i] = s; } } -static FASTCALL void -vmxCombineAtopReverseUmask (uint32_t *dest, - const uint32_t *src, - const uint32_t *mask, - int width) +static void +vmx_combine_atop_reverse_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + dest_mask, src_mask, mask_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSM(dest, src, mask) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); - vdest = pix_add_mul (vdest, splat_alpha (vsrc), - vsrc, splat_alpha (negate (vdest))); + vdest = pix_add_mul (vdest, splat_alpha (vsrc), + vsrc, splat_alpha (negate (vdest))); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t m = Alpha (mask[i]); - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t src_a; - uint32_t dest_ia = Alpha (~d); + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t src_a; + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); - FbByteMul (s, m); + src_a = ALPHA_8 (s); - src_a = Alpha (s); + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); - FbByteAddMul (s, dest_ia, d, src_a); - dest[i] = s; + dest[i] = s; } } -static FASTCALL void -vmxCombineAtopReverseU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, - const uint32_t *src, - const uint32_t *mask, - int width) +static void +vmx_combine_atop_reverse_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { if (mask) - vmxCombineAtopReverseUmask(dest, src, mask, width); + vmx_combine_atop_reverse_u_mask (dest, src, mask, width); else - vmxCombineAtopReverseUnomask(dest, src, width); + vmx_combine_atop_reverse_u_no_mask (dest, src, width); } -static FASTCALL void -vmxCombineXorUnomask (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_xor_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + dest_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKS(dest, src) + COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS (dest, src) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORS (dest, src); - vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), - vdest, splat_alpha (negate (vsrc))); + vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), + vdest, splat_alpha (negate (vsrc))); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; + src += 4; + dest += 4; } - for (i = width%4; --i >=0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t src_ia = Alpha (~s); - uint32_t dest_ia = Alpha (~d); + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t src_ia = ALPHA_8 (~s); + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); - FbByteAddMul (s, dest_ia, d, src_ia); - dest[i] = s; + dest[i] = s; } } -static FASTCALL void -vmxCombineXorUmask (uint32_t *dest, - const uint32_t *src, - const uint32_t *mask, - int width) +static void +vmx_combine_xor_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + dest_mask, src_mask, mask_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSM(dest, src, mask) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); - vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), - vdest, splat_alpha (negate (vsrc))); + vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), + vdest, splat_alpha (negate (vsrc))); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t m = Alpha (mask[i]); - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t src_ia; - uint32_t dest_ia = Alpha (~d); + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t src_ia; + uint32_t dest_ia = ALPHA_8 (~d); + + UN8x4_MUL_UN8 (s, m); - FbByteMul (s, m); + src_ia = ALPHA_8 (~s); - src_ia = Alpha (~s); + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); - FbByteAddMul (s, dest_ia, d, src_ia); - dest[i] = s; + dest[i] = s; } } -static FASTCALL void -vmxCombineXorU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, - const uint32_t *src, - const uint32_t *mask, - int width) +static void +vmx_combine_xor_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { if (mask) - vmxCombineXorUmask(dest, src, mask, width); + vmx_combine_xor_u_mask (dest, src, mask, width); else - vmxCombineXorUnomask(dest, src, width); + vmx_combine_xor_u_no_mask (dest, src, width); } -static FASTCALL void -vmxCombineAddUnomask (uint32_t *dest, const uint32_t *src, int width) +static void +vmx_combine_add_u_no_mask (uint32_t * dest, + const uint32_t *src, + int width) { int i; - vector unsigned int vdest, vsrc; + vector unsigned int vdest, vsrc; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, store_mask; + dest_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKS(dest, src) + COMPUTE_SHIFT_MASKS (dest, src); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORS(dest, src) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORS (dest, src); - vdest = pix_add (vsrc, vdest); + vdest = pix_add (vsrc, vdest); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; + src += 4; + dest += 4; } - for (i = width%4; --i >=0;) { - uint32_t s = src[i]; - uint32_t d = dest[i]; - FbByteAdd (d, s); - dest[i] = d; + for (i = width % 4; --i >= 0;) + { + uint32_t s = src[i]; + uint32_t d = dest[i]; + + UN8x4_ADD_UN8x4 (d, s); + + dest[i] = d; } } -static FASTCALL void -vmxCombineAddUmask (uint32_t *dest, - const uint32_t *src, - const uint32_t *mask, - int width) +static void +vmx_combine_add_u_mask (uint32_t * dest, + const uint32_t *src, + const uint32_t *mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, src_mask, mask_mask, store_mask; + dest_mask, src_mask, mask_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSM(dest, src, mask) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSM (dest, src, mask); - vdest = pix_add (vsrc, vdest); + vdest = pix_add (vsrc, vdest); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t m = Alpha (mask[i]); - uint32_t s = src[i]; - uint32_t d = dest[i]; + for (i = width % 4; --i >= 0;) + { + uint32_t m = ALPHA_8 (mask[i]); + uint32_t s = src[i]; + uint32_t d = dest[i]; - FbByteMul (s, m); + UN8x4_MUL_UN8 (s, m); + UN8x4_ADD_UN8x4 (d, s); - FbByteAdd (d, s); - dest[i] = d; + dest[i] = d; } } -static FASTCALL void -vmxCombineAddU (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, - const uint32_t *src, - const uint32_t *mask, - int width) +static void +vmx_combine_add_u (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { if (mask) - vmxCombineAddUmask(dest, src, mask, width); + vmx_combine_add_u_mask (dest, src, mask, width); else - vmxCombineAddUnomask(dest, src, width); + vmx_combine_add_u_no_mask (dest, src, width); } -static FASTCALL void -vmxCombineSrcC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +vmx_combine_src_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + dest_mask, mask_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask); - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { + COMPUTE_SHIFT_MASKC (dest, src, mask); - LOAD_VECTORSC(dest, src, mask) + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); - vdest = pix_multiply (vsrc, vmask); + vdest = pix_multiply (vsrc, vmask); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - mask+=4; - src+=4; - dest+=4; + mask += 4; + src += 4; + dest += 4; } - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - FbByteMulC (s, a); - dest[i] = s; + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + + UN8x4_MUL_UN8x4 (s, a); + + dest[i] = s; } } -static FASTCALL void -vmxCombineOverC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +vmx_combine_over_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + dest_mask, mask_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask); - /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { + COMPUTE_SHIFT_MASKC (dest, src, mask); - LOAD_VECTORSC(dest, src, mask) + /* printf ("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); - vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest); + vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - mask+=4; - src+=4; - dest+=4; + mask += 4; + src += 4; + dest += 4; } - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - FbByteMulC (s, a); - FbByteMulAddC (d, ~a, s); - dest[i] = d; + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (s); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s); + + dest[i] = d; } } -static FASTCALL void -vmxCombineOverReverseC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +vmx_combine_over_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + dest_mask, mask_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask); - /* printf("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { + COMPUTE_SHIFT_MASKC (dest, src, mask); - LOAD_VECTORSC (dest, src, mask) + /* printf("%s\n",__PRETTY_FUNCTION__); */ + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); - vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask)); + vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask)); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - mask+=4; - src+=4; - dest+=4; + mask += 4; + src += 4; + dest += 4; } - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t da = Alpha (d); - FbByteMulC (s, a); - FbByteMulAddC (s, ~da, d); - dest[i] = s; + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t ida = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d); + + dest[i] = s; } } -static FASTCALL void -vmxCombineInC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +vmx_combine_in_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + dest_mask, mask_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC(dest, src, mask) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); - vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest)); + vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest)); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t da = Alpha (dest[i]); - FbByteMul (s, a); - FbByteMul (s, da); - dest[i] = s; + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t da = ALPHA_8 (dest[i]); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (s, da); + + dest[i] = s; } } -static FASTCALL void -vmxCombineInReverseC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +vmx_combine_in_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + dest_mask, mask_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { + for (i = width / 4; i > 0; i--) + { - LOAD_VECTORSC(dest, src, mask) + LOAD_VECTORSC (dest, src, mask); - vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc))); + vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc))); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t d = dest[i]; - uint32_t sa = Alpha (src[i]); - FbByteMul (a, sa); - FbByteMulC (d, a); - dest[i] = d; + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (src[i]); + + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4 (d, a); + + dest[i] = d; } } -static FASTCALL void -vmxCombineOutC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +vmx_combine_out_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + dest_mask, mask_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC(dest, src, mask) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); - vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest)); + vdest = pix_multiply ( + pix_multiply (vsrc, vmask), splat_alpha (negate (vdest))); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t da = Alpha (~d); - FbByteMulC (s, a); - FbByteMulC (s, da); - dest[i] = s; + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (s, da); + + dest[i] = s; } } -static FASTCALL void -vmxCombineOutReverseC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +vmx_combine_out_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + dest_mask, mask_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC(dest, src, mask) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); - vdest = pix_multiply (vdest, - negate (pix_multiply (vmask, splat_alpha (vsrc)))); + vdest = pix_multiply ( + vdest, negate (pix_multiply (vmask, splat_alpha (vsrc)))); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t sa = Alpha (s); - FbByteMulC (a, sa); - FbByteMulC (d, ~a); - dest[i] = d; + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (s); + + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4 (d, ~a); + + dest[i] = d; } } -static FASTCALL void -vmxCombineAtopC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +vmx_combine_atop_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask, vsrca; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + dest_mask, mask_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); + + vsrca = splat_alpha (vsrc); - LOAD_VECTORSC(dest, src, mask) + vsrc = pix_multiply (vsrc, vmask); + vmask = pix_multiply (vmask, vsrca); - vdest = pix_add_mul (pix_multiply (vsrc, vmask), splat_alpha (vdest), - vdest, - negate (pix_multiply (vmask, - splat_alpha (vmask)))); + vdest = pix_add_mul (vsrc, splat_alpha (vdest), + negate (vmask), vdest); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t sa = Alpha (s); - uint32_t da = Alpha (d); - - FbByteMulC (s, a); - FbByteMul (a, sa); - FbByteAddMulC (d, ~a, s, da); - dest[i] = d; + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); + + dest[i] = d; } } -static FASTCALL void -vmxCombineAtopReverseC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +vmx_combine_atop_reverse_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + dest_mask, mask_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC(dest, src, mask) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); - vdest = pix_add_mul (vdest, - pix_multiply (vmask, splat_alpha (vsrc)), - pix_multiply (vsrc, vmask), - negate (splat_alpha (vdest))); + vdest = pix_add_mul (vdest, + pix_multiply (vmask, splat_alpha (vsrc)), + pix_multiply (vsrc, vmask), + negate (splat_alpha (vdest))); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t sa = Alpha (s); - uint32_t da = Alpha (d); - - FbByteMulC (s, a); - FbByteMul (a, sa); - FbByteAddMulC (d, a, s, ~da); - dest[i] = d; + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da); + + dest[i] = d; } } -static FASTCALL void -vmxCombineXorC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +vmx_combine_xor_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + dest_mask, mask_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC(dest, src, mask) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); - vdest = pix_add_mul (vdest, - negate (pix_multiply (vmask, splat_alpha (vsrc))), - pix_multiply (vsrc, vmask), - negate (splat_alpha (vdest))); + vdest = pix_add_mul (vdest, + negate (pix_multiply (vmask, splat_alpha (vsrc))), + pix_multiply (vsrc, vmask), + negate (splat_alpha (vdest))); - STORE_VECTOR(dest) + STORE_VECTOR (dest); - src+=4; - dest+=4; - mask+=4; + src += 4; + dest += 4; + mask += 4; } - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; - uint32_t sa = Alpha (s); - uint32_t da = Alpha (d); - - FbByteMulC (s, a); - FbByteMul (a, sa); - FbByteAddMulC (d, ~a, s, ~da); - dest[i] = d; + for (i = width % 4; --i >= 0;) + { + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; + uint32_t sa = ALPHA_8 (s); + uint32_t da = ALPHA_8 (~d); + + UN8x4_MUL_UN8x4 (s, a); + UN8x4_MUL_UN8 (a, sa); + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); + + dest[i] = d; } } -static FASTCALL void -vmxCombineAddC (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) +static void +vmx_combine_add_ca (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, + int width) { int i; - vector unsigned int vdest, vsrc, vmask; + vector unsigned int vdest, vsrc, vmask; vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, - dest_mask, mask_mask, src_mask, store_mask; + dest_mask, mask_mask, src_mask, store_mask; - COMPUTE_SHIFT_MASKC(dest, src, mask) + COMPUTE_SHIFT_MASKC (dest, src, mask); /* printf ("%s\n",__PRETTY_FUNCTION__); */ - for (i = width/4; i > 0; i--) { - - LOAD_VECTORSC(dest, src, mask) - - vdest = pix_add (pix_multiply (vsrc, vmask), vdest); - - STORE_VECTOR(dest) + for (i = width / 4; i > 0; i--) + { + LOAD_VECTORSC (dest, src, mask); - src+=4; - dest+=4; - mask+=4; - } + vdest = pix_add (pix_multiply (vsrc, vmask), vdest); - for (i = width%4; --i >=0;) { - uint32_t a = mask[i]; - uint32_t s = src[i]; - uint32_t d = dest[i]; + STORE_VECTOR (dest); - FbByteMulC (s, a); - FbByteAdd (s, d); - dest[i] = s; + src += 4; + dest += 4; + mask += 4; } -} - - -#if 0 -void -fbCompositeSolid_nx8888vmx (pixman_operator_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height) -{ - uint32_t src; - uint32_t *dstLine, *dst; - int dstStride; - - fbComposeGetSolid (pSrc, pDst, src); - - if (src >> 24 == 0) - return; - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - - while (height--) + for (i = width % 4; --i >= 0;) { - dst = dstLine; - dstLine += dstStride; - /* XXX vmxCombineOverU (dst, src, width); */ - } -} + uint32_t a = mask[i]; + uint32_t s = src[i]; + uint32_t d = dest[i]; -void -fbCompositeSolid_nx0565vmx (pixman_operator_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height) -{ - uint32_t src; - uint16_t *dstLine, *dst; - uint16_t w; - int dstStride; - - fbComposeGetSolid (pSrc, pDst, src); - - if (src >> 24 == 0) - return; + UN8x4_MUL_UN8x4 (s, a); + UN8x4_ADD_UN8x4 (s, d); - fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); - - while (height--) - { - dst = dstLine; - dstLine += dstStride; - vmxCombineOverU565(dst, src, width); + dest[i] = s; } } -static const FastPathInfo vmx_fast_path_array[] = -{ - { PIXMAN_OP_NONE }, -}; - -const FastPathInfo *const vmx_fast_paths = vmx_fast_path_array; - -#endif - pixman_implementation_t * -_pixman_implementation_create_vmx (pixman_implementation_t *toplevel) +_pixman_implementation_create_vmx (void) { - pixman_implementation_t *fast = _pixman_implementation_create_fast_path (NULL); - pixman_implementation_t *imp = _pixman_implementation_create (toplevel, fast); + pixman_implementation_t *fast = _pixman_implementation_create_fast_path (); + pixman_implementation_t *imp = _pixman_implementation_create (fast); /* Set up function pointers */ - - /* SSE code patch for fbcompose.c */ - imp->combine_32[PIXMAN_OP_OVER] = vmxCombineOverU; - imp->combine_32[PIXMAN_OP_OVER_REVERSE] = vmxCombineOverReverseU; - imp->combine_32[PIXMAN_OP_IN] = vmxCombineInU; - imp->combine_32[PIXMAN_OP_IN_REVERSE] = vmxCombineInReverseU; - imp->combine_32[PIXMAN_OP_OUT] = vmxCombineOutU; - imp->combine_32[PIXMAN_OP_OUT_REVERSE] = vmxCombineOutReverseU; - imp->combine_32[PIXMAN_OP_ATOP] = vmxCombineAtopU; - imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmxCombineAtopReverseU; - imp->combine_32[PIXMAN_OP_XOR] = vmxCombineXorU; - - imp->combine_32[PIXMAN_OP_ADD] = vmxCombineAddU; - - imp->combine_32_ca[PIXMAN_OP_SRC] = vmxCombineSrcC; - imp->combine_32_ca[PIXMAN_OP_OVER] = vmxCombineOverC; - imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmxCombineOverReverseC; - imp->combine_32_ca[PIXMAN_OP_IN] = vmxCombineInC; - imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = vmxCombineInReverseC; - imp->combine_32_ca[PIXMAN_OP_OUT] = vmxCombineOutC; - imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = vmxCombineOutReverseC; - imp->combine_32_ca[PIXMAN_OP_ATOP] = vmxCombineAtopC; - imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = vmxCombineAtopReverseC; - imp->combine_32_ca[PIXMAN_OP_XOR] = vmxCombineXorC; - imp->combine_32_ca[PIXMAN_OP_ADD] = vmxCombineAddC; - + + imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u; + imp->combine_32[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_u; + imp->combine_32[PIXMAN_OP_IN] = vmx_combine_in_u; + imp->combine_32[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_u; + imp->combine_32[PIXMAN_OP_OUT] = vmx_combine_out_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_u; + imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u; + imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_u; + imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u; + + imp->combine_32[PIXMAN_OP_ADD] = vmx_combine_add_u; + + imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca; + imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca; + imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_IN] = vmx_combine_in_ca; + imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_OUT] = vmx_combine_out_ca; + imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP] = vmx_combine_atop_ca; + imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_ca; + imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca; + imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca; + return imp; } - diff --git a/lib/pixman/pixman/pixman-x64-mmx-emulation.h b/lib/pixman/pixman/pixman-x64-mmx-emulation.h new file mode 100644 index 000000000..378019cf2 --- /dev/null +++ b/lib/pixman/pixman/pixman-x64-mmx-emulation.h @@ -0,0 +1,263 @@ +#ifndef MMX_X64_H_INCLUDED +#define MMX_X64_H_INCLUDED + +/* Implementation of x64 MMX substitition functions, before + * pixman is reimplemented not to use __m64 type on Visual C++ + * + * Copyright (C)2009 by George Yohng + * Released in public domain. + */ + +#include <intrin.h> + +#define M64C(a) (*(const __m64 *)(&a)) +#define M64U(a) (*(const unsigned long long *)(&a)) + +__inline __m64 +_m_from_int (int a) +{ + long long i64 = a; + + return M64C (i64); +} + +__inline __m64 +_mm_setzero_si64 () +{ + long long i64 = 0; + + return M64C (i64); +} + +__inline __m64 +_mm_set_pi32 (int i1, int i0) +{ + unsigned long long i64 = ((unsigned)i0) + (((unsigned long long)(unsigned)i1) << 32); + + return M64C (i64); +} + +__inline void +_m_empty () +{ +} + +__inline __m64 +_mm_set1_pi16 (short w) +{ + unsigned long long i64 = ((unsigned long long)(unsigned short)(w)) * 0x0001000100010001ULL; + + return M64C (i64); +} + +__inline int +_m_to_int (__m64 m) +{ + return m.m64_i32[0]; +} + +__inline __m64 +_mm_movepi64_pi64 (__m128i a) +{ + return M64C (a.m128i_i64[0]); +} + +__inline __m64 +_m_pand (__m64 a, __m64 b) +{ + unsigned long long i64 = M64U (a) & M64U (b); + + return M64C (i64); +} + +__inline __m64 +_m_por (__m64 a, __m64 b) +{ + unsigned long long i64 = M64U (a) | M64U (b); + + return M64C (i64); +} + +__inline __m64 +_m_pxor (__m64 a, __m64 b) +{ + unsigned long long i64 = M64U (a) ^ M64U (b); + + return M64C (i64); +} + +__inline __m64 +_m_pmulhuw (__m64 a, __m64 b) /* unoptimized */ +{ + unsigned short d[4] = + { + (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]) >> 16), + (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]) >> 16), + (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]) >> 16), + (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]) >> 16) + }; + + return M64C (d[0]); +} + +__inline __m64 +_m_pmullw2 (__m64 a, __m64 b) /* unoptimized */ +{ + unsigned short d[4] = + { + (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0])), + (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1])), + (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2])), + (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3])) + }; + + return M64C (d[0]); +} + +__inline __m64 +_m_pmullw (__m64 a, __m64 b) /* unoptimized */ +{ + unsigned long long x = + ((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]))) + + (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]))) << 16) + + (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]))) << 32) + + (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))) << 48); + + return M64C (x); +} + +__inline __m64 +_m_paddusb (__m64 a, __m64 b) /* unoptimized */ +{ + unsigned long long x = (M64U (a) & 0x00FF00FF00FF00FFULL) + + (M64U (b) & 0x00FF00FF00FF00FFULL); + + unsigned long long y = ((M64U (a) >> 8) & 0x00FF00FF00FF00FFULL) + + ((M64U (b) >> 8) & 0x00FF00FF00FF00FFULL); + + x |= ((x & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF; + y |= ((y & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF; + + x = (x & 0x00FF00FF00FF00FFULL) | ((y & 0x00FF00FF00FF00FFULL) << 8); + + return M64C (x); +} + +__inline __m64 +_m_paddusw (__m64 a, __m64 b) /* unoptimized */ +{ + unsigned long long x = (M64U (a) & 0x0000FFFF0000FFFFULL) + + (M64U (b) & 0x0000FFFF0000FFFFULL); + + unsigned long long y = ((M64U (a) >> 16) & 0x0000FFFF0000FFFFULL) + + ((M64U (b) >> 16) & 0x0000FFFF0000FFFFULL); + + x |= ((x & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF; + y |= ((y & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF; + + x = (x & 0x0000FFFF0000FFFFULL) | ((y & 0x0000FFFF0000FFFFULL) << 16); + + return M64C (x); +} + +__inline __m64 +_m_pshufw (__m64 a, int n) /* unoptimized */ +{ + unsigned short d[4] = + { + a.m64_u16[n & 3], + a.m64_u16[(n >> 2) & 3], + a.m64_u16[(n >> 4) & 3], + a.m64_u16[(n >> 6) & 3] + }; + + return M64C (d[0]); +} + +__inline unsigned char +sat16 (unsigned short d) +{ + if (d > 0xFF) return 0xFF; + else return d & 0xFF; +} + +__inline __m64 +_m_packuswb (__m64 m1, __m64 m2) /* unoptimized */ +{ + unsigned char d[8] = + { + sat16 (m1.m64_u16[0]), + sat16 (m1.m64_u16[1]), + sat16 (m1.m64_u16[2]), + sat16 (m1.m64_u16[3]), + sat16 (m2.m64_u16[0]), + sat16 (m2.m64_u16[1]), + sat16 (m2.m64_u16[2]), + sat16 (m2.m64_u16[3]) + }; + + return M64C (d[0]); +} + +__inline __m64 _m_punpcklbw (__m64 m1, __m64 m2) /* unoptimized */ +{ + unsigned char d[8] = + { + m1.m64_u8[0], + m2.m64_u8[0], + m1.m64_u8[1], + m2.m64_u8[1], + m1.m64_u8[2], + m2.m64_u8[2], + m1.m64_u8[3], + m2.m64_u8[3], + }; + + return M64C (d[0]); +} + +__inline __m64 _m_punpckhbw (__m64 m1, __m64 m2) /* unoptimized */ +{ + unsigned char d[8] = + { + m1.m64_u8[4], + m2.m64_u8[4], + m1.m64_u8[5], + m2.m64_u8[5], + m1.m64_u8[6], + m2.m64_u8[6], + m1.m64_u8[7], + m2.m64_u8[7], + }; + + return M64C (d[0]); +} + +__inline __m64 _m_psrlwi (__m64 a, int n) /* unoptimized */ +{ + unsigned short d[4] = + { + a.m64_u16[0] >> n, + a.m64_u16[1] >> n, + a.m64_u16[2] >> n, + a.m64_u16[3] >> n + }; + + return M64C (d[0]); +} + +__inline __m64 _m_psrlqi (__m64 m, int n) +{ + unsigned long long x = M64U (m) >> n; + + return M64C (x); +} + +__inline __m64 _m_psllqi (__m64 m, int n) +{ + unsigned long long x = M64U (m) << n; + + return M64C (x); +} + +#endif /* MMX_X64_H_INCLUDED */ diff --git a/lib/pixman/pixman/pixman.c b/lib/pixman/pixman/pixman.c new file mode 100644 index 000000000..0edd967cf --- /dev/null +++ b/lib/pixman/pixman/pixman.c @@ -0,0 +1,543 @@ +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Keith Packard, SuSE, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include "pixman-private.h" + +/* + * Operator optimizations based on source or destination opacity + */ +typedef struct +{ + pixman_op_t op; + pixman_op_t op_src_dst_opaque; + pixman_op_t op_src_opaque; + pixman_op_t op_dst_opaque; +} optimized_operator_info_t; + +static const optimized_operator_info_t optimized_operators[] = +{ + /* Input Operator SRC&DST Opaque SRC Opaque DST Opaque */ + { PIXMAN_OP_OVER, PIXMAN_OP_SRC, PIXMAN_OP_SRC, PIXMAN_OP_OVER }, + { PIXMAN_OP_OVER_REVERSE, PIXMAN_OP_DST, PIXMAN_OP_OVER_REVERSE, PIXMAN_OP_DST }, + { PIXMAN_OP_IN, PIXMAN_OP_SRC, PIXMAN_OP_IN, PIXMAN_OP_SRC }, + { PIXMAN_OP_IN_REVERSE, PIXMAN_OP_DST, PIXMAN_OP_DST, PIXMAN_OP_IN_REVERSE }, + { PIXMAN_OP_OUT, PIXMAN_OP_CLEAR, PIXMAN_OP_OUT, PIXMAN_OP_CLEAR }, + { PIXMAN_OP_OUT_REVERSE, PIXMAN_OP_CLEAR, PIXMAN_OP_CLEAR, PIXMAN_OP_OUT_REVERSE }, + { PIXMAN_OP_ATOP, PIXMAN_OP_SRC, PIXMAN_OP_IN, PIXMAN_OP_OVER }, + { PIXMAN_OP_ATOP_REVERSE, PIXMAN_OP_DST, PIXMAN_OP_OVER_REVERSE, PIXMAN_OP_IN_REVERSE }, + { PIXMAN_OP_XOR, PIXMAN_OP_CLEAR, PIXMAN_OP_OUT, PIXMAN_OP_OUT_REVERSE }, + { PIXMAN_OP_SATURATE, PIXMAN_OP_DST, PIXMAN_OP_OVER_REVERSE, PIXMAN_OP_DST }, + { PIXMAN_OP_NONE } +}; + +static pixman_implementation_t *imp; + +/* + * Check if the current operator could be optimized + */ +static const optimized_operator_info_t* +pixman_operator_can_be_optimized (pixman_op_t op) +{ + const optimized_operator_info_t *info; + + for (info = optimized_operators; info->op != PIXMAN_OP_NONE; info++) + { + if (info->op == op) + return info; + } + return NULL; +} + +/* + * Optimize the current operator based on opacity of source or destination + * The output operator should be mathematically equivalent to the source. + */ +static pixman_op_t +pixman_optimize_operator (pixman_op_t op, + pixman_image_t *src_image, + pixman_image_t *mask_image, + pixman_image_t *dst_image) +{ + pixman_bool_t is_source_opaque; + pixman_bool_t is_dest_opaque; + const optimized_operator_info_t *info = pixman_operator_can_be_optimized (op); + + if (!info || mask_image) + return op; + + is_source_opaque = _pixman_image_is_opaque (src_image); + is_dest_opaque = _pixman_image_is_opaque (dst_image); + + if (is_source_opaque == FALSE && is_dest_opaque == FALSE) + return op; + + if (is_source_opaque && is_dest_opaque) + return info->op_src_dst_opaque; + else if (is_source_opaque) + return info->op_src_opaque; + else if (is_dest_opaque) + return info->op_dst_opaque; + + return op; + +} + +static void +apply_workaround (pixman_image_t *image, + int16_t * x, + int16_t * y, + uint32_t ** save_bits, + int * save_dx, + int * save_dy) +{ + /* Some X servers generate images that point to the + * wrong place in memory, but then set the clip region + * to point to the right place. Because of an old bug + * in pixman, this would actually work. + * + * Here we try and undo the damage + */ + int bpp = PIXMAN_FORMAT_BPP (image->bits.format) / 8; + pixman_box32_t *extents; + uint8_t *t; + int dx, dy; + + extents = pixman_region32_extents (&(image->common.clip_region)); + dx = extents->x1; + dy = extents->y1; + + *save_bits = image->bits.bits; + + *x -= dx; + *y -= dy; + pixman_region32_translate (&(image->common.clip_region), -dx, -dy); + + t = (uint8_t *)image->bits.bits; + t += dy * image->bits.rowstride * 4 + dx * bpp; + image->bits.bits = (uint32_t *)t; + + *save_dx = dx; + *save_dy = dy; +} + +static void +unapply_workaround (pixman_image_t *image, uint32_t *bits, int dx, int dy) +{ + image->bits.bits = bits; + pixman_region32_translate (&image->common.clip_region, dx, dy); +} + +PIXMAN_EXPORT void +pixman_image_composite (pixman_op_t op, + pixman_image_t * src, + pixman_image_t * mask, + pixman_image_t * dest, + int16_t src_x, + int16_t src_y, + int16_t mask_x, + int16_t mask_y, + int16_t dest_x, + int16_t dest_y, + uint16_t width, + uint16_t height) +{ + uint32_t *src_bits; + int src_dx, src_dy; + uint32_t *mask_bits; + int mask_dx, mask_dy; + uint32_t *dest_bits; + int dest_dx, dest_dy; + + _pixman_image_validate (src); + if (mask) + _pixman_image_validate (mask); + _pixman_image_validate (dest); + + /* + * Check if we can replace our operator by a simpler one + * if the src or dest are opaque. The output operator should be + * mathematically equivalent to the source. + */ + op = pixman_optimize_operator(op, src, mask, dest); + if (op == PIXMAN_OP_DST || + op == PIXMAN_OP_CONJOINT_DST || + op == PIXMAN_OP_DISJOINT_DST) + { + return; + } + + if (!imp) + imp = _pixman_choose_implementation (); + + if (src->common.need_workaround) + apply_workaround (src, &src_x, &src_y, &src_bits, &src_dx, &src_dy); + if (mask && mask->common.need_workaround) + apply_workaround (mask, &mask_x, &mask_y, &mask_bits, &mask_dx, &mask_dy); + if (dest->common.need_workaround) + apply_workaround (dest, &dest_x, &dest_y, &dest_bits, &dest_dx, &dest_dy); + + _pixman_implementation_composite (imp, op, + src, mask, dest, + src_x, src_y, + mask_x, mask_y, + dest_x, dest_y, + width, height); + + if (src->common.need_workaround) + unapply_workaround (src, src_bits, src_dx, src_dy); + if (mask && mask->common.need_workaround) + unapply_workaround (mask, mask_bits, mask_dx, mask_dy); + if (dest->common.need_workaround) + unapply_workaround (dest, dest_bits, dest_dx, dest_dy); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_blt (uint32_t *src_bits, + uint32_t *dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dst_x, + int dst_y, + int width, + int height) +{ + if (!imp) + imp = _pixman_choose_implementation (); + + return _pixman_implementation_blt (imp, src_bits, dst_bits, src_stride, dst_stride, + src_bpp, dst_bpp, + src_x, src_y, + dst_x, dst_y, + width, height); +} + +PIXMAN_EXPORT pixman_bool_t +pixman_fill (uint32_t *bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + if (!imp) + imp = _pixman_choose_implementation (); + + return _pixman_implementation_fill (imp, bits, stride, bpp, x, y, width, height, xor); +} + +static uint32_t +color_to_uint32 (const pixman_color_t *color) +{ + return + (color->alpha >> 8 << 24) | + (color->red >> 8 << 16) | + (color->green & 0xff00) | + (color->blue >> 8); +} + +static pixman_bool_t +color_to_pixel (pixman_color_t * color, + uint32_t * pixel, + pixman_format_code_t format) +{ + uint32_t c = color_to_uint32 (color); + + if (!(format == PIXMAN_a8r8g8b8 || + format == PIXMAN_x8r8g8b8 || + format == PIXMAN_a8b8g8r8 || + format == PIXMAN_x8b8g8r8 || + format == PIXMAN_b8g8r8a8 || + format == PIXMAN_b8g8r8x8 || + format == PIXMAN_r5g6b5 || + format == PIXMAN_b5g6r5 || + format == PIXMAN_a8)) + { + return FALSE; + } + + if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_ABGR) + { + c = ((c & 0xff000000) >> 0) | + ((c & 0x00ff0000) >> 16) | + ((c & 0x0000ff00) >> 0) | + ((c & 0x000000ff) << 16); + } + if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_BGRA) + { + c = ((c & 0xff000000) >> 24) | + ((c & 0x00ff0000) >> 8) | + ((c & 0x0000ff00) << 8) | + ((c & 0x000000ff) << 24); + } + + if (format == PIXMAN_a8) + c = c >> 24; + else if (format == PIXMAN_r5g6b5 || + format == PIXMAN_b5g6r5) + c = CONVERT_8888_TO_0565 (c); + +#if 0 + printf ("color: %x %x %x %x\n", color->alpha, color->red, color->green, color->blue); + printf ("pixel: %x\n", c); +#endif + + *pixel = c; + return TRUE; +} + +PIXMAN_EXPORT pixman_bool_t +pixman_image_fill_rectangles (pixman_op_t op, + pixman_image_t * dest, + pixman_color_t * color, + int n_rects, + const pixman_rectangle16_t *rects) +{ + pixman_image_t *solid; + pixman_color_t c; + int i; + + _pixman_image_validate (dest); + + if (color->alpha == 0xffff) + { + if (op == PIXMAN_OP_OVER) + op = PIXMAN_OP_SRC; + } + + if (op == PIXMAN_OP_CLEAR) + { + c.red = 0; + c.green = 0; + c.blue = 0; + c.alpha = 0; + + color = &c; + + op = PIXMAN_OP_SRC; + } + + if (op == PIXMAN_OP_SRC) + { + uint32_t pixel; + + if (color_to_pixel (color, &pixel, dest->bits.format)) + { + for (i = 0; i < n_rects; ++i) + { + pixman_region32_t fill_region; + int n_boxes, j; + pixman_box32_t *boxes; + + pixman_region32_init_rect (&fill_region, rects[i].x, rects[i].y, rects[i].width, rects[i].height); + + if (dest->common.have_clip_region) + { + if (!pixman_region32_intersect (&fill_region, + &fill_region, + &dest->common.clip_region)) + return FALSE; + } + + boxes = pixman_region32_rectangles (&fill_region, &n_boxes); + for (j = 0; j < n_boxes; ++j) + { + const pixman_box32_t *box = &(boxes[j]); + pixman_fill (dest->bits.bits, dest->bits.rowstride, PIXMAN_FORMAT_BPP (dest->bits.format), + box->x1, box->y1, box->x2 - box->x1, box->y2 - box->y1, + pixel); + } + + pixman_region32_fini (&fill_region); + } + return TRUE; + } + } + + solid = pixman_image_create_solid_fill (color); + if (!solid) + return FALSE; + + for (i = 0; i < n_rects; ++i) + { + const pixman_rectangle16_t *rect = &(rects[i]); + + pixman_image_composite (op, solid, NULL, dest, + 0, 0, 0, 0, + rect->x, rect->y, + rect->width, rect->height); + } + + pixman_image_unref (solid); + + return TRUE; +} + +/** + * pixman_version: + * + * Returns the version of the pixman library encoded in a single + * integer as per %PIXMAN_VERSION_ENCODE. The encoding ensures that + * later versions compare greater than earlier versions. + * + * A run-time comparison to check that pixman's version is greater than + * or equal to version X.Y.Z could be performed as follows: + * + * <informalexample><programlisting> + * if (pixman_version() >= PIXMAN_VERSION_ENCODE(X,Y,Z)) {...} + * </programlisting></informalexample> + * + * See also pixman_version_string() as well as the compile-time + * equivalents %PIXMAN_VERSION and %PIXMAN_VERSION_STRING. + * + * Return value: the encoded version. + **/ +PIXMAN_EXPORT int +pixman_version (void) +{ + return PIXMAN_VERSION; +} + +/** + * pixman_version_string: + * + * Returns the version of the pixman library as a human-readable string + * of the form "X.Y.Z". + * + * See also pixman_version() as well as the compile-time equivalents + * %PIXMAN_VERSION_STRING and %PIXMAN_VERSION. + * + * Return value: a string containing the version. + **/ +PIXMAN_EXPORT const char* +pixman_version_string (void) +{ + return PIXMAN_VERSION_STRING; +} + +/** + * pixman_format_supported_source: + * @format: A pixman_format_code_t format + * + * Return value: whether the provided format code is a supported + * format for a pixman surface used as a source in + * rendering. + * + * Currently, all pixman_format_code_t values are supported. + **/ +PIXMAN_EXPORT pixman_bool_t +pixman_format_supported_source (pixman_format_code_t format) +{ + switch (format) + { + /* 32 bpp formats */ + case PIXMAN_a2b10g10r10: + case PIXMAN_x2b10g10r10: + case PIXMAN_a2r10g10b10: + case PIXMAN_x2r10g10b10: + case PIXMAN_a8r8g8b8: + case PIXMAN_x8r8g8b8: + case PIXMAN_a8b8g8r8: + case PIXMAN_x8b8g8r8: + case PIXMAN_b8g8r8a8: + case PIXMAN_b8g8r8x8: + case PIXMAN_r8g8b8: + case PIXMAN_b8g8r8: + case PIXMAN_r5g6b5: + case PIXMAN_b5g6r5: + /* 16 bpp formats */ + case PIXMAN_a1r5g5b5: + case PIXMAN_x1r5g5b5: + case PIXMAN_a1b5g5r5: + case PIXMAN_x1b5g5r5: + case PIXMAN_a4r4g4b4: + case PIXMAN_x4r4g4b4: + case PIXMAN_a4b4g4r4: + case PIXMAN_x4b4g4r4: + /* 8bpp formats */ + case PIXMAN_a8: + case PIXMAN_r3g3b2: + case PIXMAN_b2g3r3: + case PIXMAN_a2r2g2b2: + case PIXMAN_a2b2g2r2: + case PIXMAN_c8: + case PIXMAN_g8: + case PIXMAN_x4a4: + /* Collides with PIXMAN_c8 + case PIXMAN_x4c4: + */ + /* Collides with PIXMAN_g8 + case PIXMAN_x4g4: + */ + /* 4bpp formats */ + case PIXMAN_a4: + case PIXMAN_r1g2b1: + case PIXMAN_b1g2r1: + case PIXMAN_a1r1g1b1: + case PIXMAN_a1b1g1r1: + case PIXMAN_c4: + case PIXMAN_g4: + /* 1bpp formats */ + case PIXMAN_a1: + case PIXMAN_g1: + /* YUV formats */ + case PIXMAN_yuy2: + case PIXMAN_yv12: + return TRUE; + + default: + return FALSE; + } +} + +/** + * pixman_format_supported_destination: + * @format: A pixman_format_code_t format + * + * Return value: whether the provided format code is a supported + * format for a pixman surface used as a destination in + * rendering. + * + * Currently, all pixman_format_code_t values are supported + * except for the YUV formats. + **/ +PIXMAN_EXPORT pixman_bool_t +pixman_format_supported_destination (pixman_format_code_t format) +{ + /* YUV formats cannot be written to at the moment */ + if (format == PIXMAN_yuy2 || format == PIXMAN_yv12) + return FALSE; + + return pixman_format_supported_source (format); +} + diff --git a/lib/pixman/pixman/pixman.h b/lib/pixman/pixman/pixman.h index 29c054a6f..5b90a0c8d 100644 --- a/lib/pixman/pixman/pixman.h +++ b/lib/pixman/pixman/pixman.h @@ -166,147 +166,96 @@ struct pixman_transform /* forward declaration (sorry) */ struct pixman_box16; -void -pixman_transform_init_identity(struct pixman_transform *matrix); - -pixman_bool_t -pixman_transform_point_3d (const struct pixman_transform *transform, - struct pixman_vector *vector); - -pixman_bool_t -pixman_transform_point(const struct pixman_transform *transform, - struct pixman_vector *vector); - -pixman_bool_t -pixman_transform_multiply (struct pixman_transform *dst, - const struct pixman_transform *l, - const struct pixman_transform *r); - -void -pixman_transform_init_scale (struct pixman_transform *t, - pixman_fixed_t sx, - pixman_fixed_t sy); - -pixman_bool_t -pixman_transform_scale(struct pixman_transform *forward, - struct pixman_transform *reverse, - pixman_fixed_t sx, pixman_fixed_t sy); - -void -pixman_transform_init_rotate(struct pixman_transform *t, - pixman_fixed_t cos, - pixman_fixed_t sin); - -pixman_bool_t -pixman_transform_rotate(struct pixman_transform *forward, - struct pixman_transform *reverse, - pixman_fixed_t c, pixman_fixed_t s); - -void -pixman_transform_init_translate(struct pixman_transform *t, - pixman_fixed_t tx, pixman_fixed_t ty); - - -pixman_bool_t -pixman_transform_translate(struct pixman_transform *forward, - struct pixman_transform *reverse, - pixman_fixed_t tx, pixman_fixed_t ty); - -pixman_bool_t -pixman_transform_bounds(const struct pixman_transform *matrix, - struct pixman_box16 *b); - - -pixman_bool_t -pixman_transform_invert (struct pixman_transform *dst, - const struct pixman_transform *src); - -pixman_bool_t -pixman_transform_is_identity(const struct pixman_transform *t); - -pixman_bool_t -pixman_transform_is_scale(const struct pixman_transform *t); - -pixman_bool_t -pixman_transform_is_int_translate(const struct pixman_transform *t); - -pixman_bool_t -pixman_transform_is_inverse (const struct pixman_transform *a, - const struct pixman_transform *b); - +void pixman_transform_init_identity (struct pixman_transform *matrix); +pixman_bool_t pixman_transform_point_3d (const struct pixman_transform *transform, + struct pixman_vector *vector); +pixman_bool_t pixman_transform_point (const struct pixman_transform *transform, + struct pixman_vector *vector); +pixman_bool_t pixman_transform_multiply (struct pixman_transform *dst, + const struct pixman_transform *l, + const struct pixman_transform *r); +void pixman_transform_init_scale (struct pixman_transform *t, + pixman_fixed_t sx, + pixman_fixed_t sy); +pixman_bool_t pixman_transform_scale (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t sx, + pixman_fixed_t sy); +void pixman_transform_init_rotate (struct pixman_transform *t, + pixman_fixed_t cos, + pixman_fixed_t sin); +pixman_bool_t pixman_transform_rotate (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t c, + pixman_fixed_t s); +void pixman_transform_init_translate (struct pixman_transform *t, + pixman_fixed_t tx, + pixman_fixed_t ty); +pixman_bool_t pixman_transform_translate (struct pixman_transform *forward, + struct pixman_transform *reverse, + pixman_fixed_t tx, + pixman_fixed_t ty); +pixman_bool_t pixman_transform_bounds (const struct pixman_transform *matrix, + struct pixman_box16 *b); +pixman_bool_t pixman_transform_invert (struct pixman_transform *dst, + const struct pixman_transform *src); +pixman_bool_t pixman_transform_is_identity (const struct pixman_transform *t); +pixman_bool_t pixman_transform_is_scale (const struct pixman_transform *t); +pixman_bool_t pixman_transform_is_int_translate (const struct pixman_transform *t); +pixman_bool_t pixman_transform_is_inverse (const struct pixman_transform *a, + const struct pixman_transform *b); /* * Floating point matrices */ -struct pixman_f_vector { +struct pixman_f_vector +{ double v[3]; }; -struct pixman_f_transform { +struct pixman_f_transform +{ double m[3][3]; }; -pixman_bool_t -pixman_transform_from_pixman_f_transform (struct pixman_transform *t, - const struct pixman_f_transform *ft); - -void -pixman_f_transform_from_pixman_transform (struct pixman_f_transform *ft, - const struct pixman_transform *t); - -pixman_bool_t -pixman_transform_from_pixman_f_transform (struct pixman_transform *t, - const struct pixman_f_transform *ft); - -pixman_bool_t -pixman_f_transform_invert (struct pixman_f_transform *dst, - const struct pixman_f_transform *src); - -pixman_bool_t -pixman_f_transform_point (const struct pixman_f_transform *t, - struct pixman_f_vector *v); - -void -pixman_f_transform_point_3d (const struct pixman_f_transform *t, - struct pixman_f_vector *v); - +pixman_bool_t pixman_transform_from_pixman_f_transform (struct pixman_transform *t, + const struct pixman_f_transform *ft); +void pixman_f_transform_from_pixman_transform (struct pixman_f_transform *ft, + const struct pixman_transform *t); +pixman_bool_t pixman_f_transform_invert (struct pixman_f_transform *dst, + const struct pixman_f_transform *src); +pixman_bool_t pixman_f_transform_point (const struct pixman_f_transform *t, + struct pixman_f_vector *v); +void pixman_f_transform_point_3d (const struct pixman_f_transform *t, + struct pixman_f_vector *v); +void pixman_f_transform_multiply (struct pixman_f_transform *dst, + const struct pixman_f_transform *l, + const struct pixman_f_transform *r); +void pixman_f_transform_init_scale (struct pixman_f_transform *t, + double sx, + double sy); +pixman_bool_t pixman_f_transform_scale (struct pixman_f_transform *forward, + struct pixman_f_transform *reverse, + double sx, + double sy); +void pixman_f_transform_init_rotate (struct pixman_f_transform *t, + double cos, + double sin); +pixman_bool_t pixman_f_transform_rotate (struct pixman_f_transform *forward, + struct pixman_f_transform *reverse, + double c, + double s); +void pixman_f_transform_init_translate (struct pixman_f_transform *t, + double tx, + double ty); +pixman_bool_t pixman_f_transform_translate (struct pixman_f_transform *forward, + struct pixman_f_transform *reverse, + double tx, + double ty); +pixman_bool_t pixman_f_transform_bounds (const struct pixman_f_transform *t, + struct pixman_box16 *b); +void pixman_f_transform_init_identity (struct pixman_f_transform *t); -void -pixman_f_transform_multiply (struct pixman_f_transform *dst, - const struct pixman_f_transform *l, - const struct pixman_f_transform *r); - -void -pixman_f_transform_init_scale (struct pixman_f_transform *t, double sx, double sy); - -pixman_bool_t -pixman_f_transform_scale (struct pixman_f_transform *forward, - struct pixman_f_transform *reverse, - double sx, double sy); - -void -pixman_f_transform_init_rotate (struct pixman_f_transform *t, double cos, double sin); - -pixman_bool_t -pixman_f_transform_rotate (struct pixman_f_transform *forward, - struct pixman_f_transform *reverse, - double c, double s); - -void -pixman_f_transform_init_translate (struct pixman_f_transform *t, double tx, double ty); - -pixman_bool_t -pixman_f_transform_translate (struct pixman_f_transform *forward, - struct pixman_f_transform *reverse, - double tx, double ty); - -pixman_bool_t -pixman_f_transform_bounds (const struct pixman_f_transform *t, struct pixman_box16 *b); - -void -pixman_f_transform_init_identity (struct pixman_f_transform *t); - -/* Don't blame me, blame XRender */ typedef enum { PIXMAN_REPEAT_NONE, @@ -368,6 +317,22 @@ typedef enum PIXMAN_OP_CONJOINT_ATOP_REVERSE = 0x2a, PIXMAN_OP_CONJOINT_XOR = 0x2b, + PIXMAN_OP_MULTIPLY = 0x30, + PIXMAN_OP_SCREEN = 0x31, + PIXMAN_OP_OVERLAY = 0x32, + PIXMAN_OP_DARKEN = 0x33, + PIXMAN_OP_LIGHTEN = 0x34, + PIXMAN_OP_COLOR_DODGE = 0x35, + PIXMAN_OP_COLOR_BURN = 0x36, + PIXMAN_OP_HARD_LIGHT = 0x37, + PIXMAN_OP_SOFT_LIGHT = 0x38, + PIXMAN_OP_DIFFERENCE = 0x39, + PIXMAN_OP_EXCLUSION = 0x3a, + PIXMAN_OP_HSL_HUE = 0x3b, + PIXMAN_OP_HSL_SATURATION = 0x3c, + PIXMAN_OP_HSL_COLOR = 0x3d, + PIXMAN_OP_HSL_LUMINOSITY = 0x3e, + PIXMAN_OP_NONE, PIXMAN_OP_LAST = PIXMAN_OP_NONE } pixman_op_t; @@ -388,8 +353,8 @@ struct pixman_region16_data { struct pixman_rectangle16 { - int16_t x, y; - uint16_t width, height; + int16_t x, y; + uint16_t width, height; }; struct pixman_box16 @@ -400,7 +365,7 @@ struct pixman_box16 struct pixman_region16 { pixman_box16_t extents; - pixman_region16_data_t *data; + pixman_region16_data_t *data; }; typedef enum @@ -410,70 +375,69 @@ typedef enum PIXMAN_REGION_PART } pixman_region_overlap_t; -/* This function exists only to make it possible to preserve the X ABI - it should - * go away at first opportunity. +/* This function exists only to make it possible to preserve + * the X ABI - it should go away at first opportunity. */ -void pixman_region_set_static_pointers (pixman_box16_t *empty_box, - pixman_region16_data_t *empty_data, - pixman_region16_data_t *broken_data); - +void pixman_region_set_static_pointers (pixman_box16_t *empty_box, + pixman_region16_data_t *empty_data, + pixman_region16_data_t *broken_data); /* creation/destruction */ -void pixman_region_init (pixman_region16_t *region); -void pixman_region_init_rect (pixman_region16_t *region, - int x, - int y, - unsigned int width, - unsigned int height); -pixman_bool_t pixman_region_init_rects (pixman_region16_t *region, - pixman_box16_t *boxes, - int count); -void pixman_region_init_with_extents (pixman_region16_t *region, - pixman_box16_t *extents); -void pixman_region_fini (pixman_region16_t *region); +void pixman_region_init (pixman_region16_t *region); +void pixman_region_init_rect (pixman_region16_t *region, + int x, + int y, + unsigned int width, + unsigned int height); +pixman_bool_t pixman_region_init_rects (pixman_region16_t *region, + pixman_box16_t *boxes, + int count); +void pixman_region_init_with_extents (pixman_region16_t *region, + pixman_box16_t *extents); +void pixman_region_fini (pixman_region16_t *region); -/* manipulation */ -void pixman_region_translate (pixman_region16_t *region, - int x, - int y); -pixman_bool_t pixman_region_copy (pixman_region16_t *dest, - pixman_region16_t *source); -pixman_bool_t pixman_region_intersect (pixman_region16_t *newReg, - pixman_region16_t *reg1, - pixman_region16_t *reg2); -pixman_bool_t pixman_region_union (pixman_region16_t *newReg, - pixman_region16_t *reg1, - pixman_region16_t *reg2); -pixman_bool_t pixman_region_union_rect (pixman_region16_t *dest, - pixman_region16_t *source, - int x, - int y, - unsigned int width, - unsigned int height); -pixman_bool_t pixman_region_subtract (pixman_region16_t *regD, - pixman_region16_t *regM, - pixman_region16_t *regS); -pixman_bool_t pixman_region_inverse (pixman_region16_t *newReg, - pixman_region16_t *reg1, - pixman_box16_t *invRect); -pixman_bool_t pixman_region_contains_point (pixman_region16_t *region, - int x, - int y, - pixman_box16_t *box); -pixman_region_overlap_t pixman_region_contains_rectangle (pixman_region16_t *pixman_region16_t, - pixman_box16_t *prect); -pixman_bool_t pixman_region_not_empty (pixman_region16_t *region); -pixman_box16_t * pixman_region_extents (pixman_region16_t *region); -int pixman_region_n_rects (pixman_region16_t *region); -pixman_box16_t * pixman_region_rectangles (pixman_region16_t *region, - int *n_rects); -pixman_bool_t pixman_region_equal (pixman_region16_t *region1, - pixman_region16_t *region2); -pixman_bool_t pixman_region_selfcheck (pixman_region16_t *region); -void pixman_region_reset (pixman_region16_t *region, - pixman_box16_t *box); +/* manipulation */ +void pixman_region_translate (pixman_region16_t *region, + int x, + int y); +pixman_bool_t pixman_region_copy (pixman_region16_t *dest, + pixman_region16_t *source); +pixman_bool_t pixman_region_intersect (pixman_region16_t *new_reg, + pixman_region16_t *reg1, + pixman_region16_t *reg2); +pixman_bool_t pixman_region_union (pixman_region16_t *new_reg, + pixman_region16_t *reg1, + pixman_region16_t *reg2); +pixman_bool_t pixman_region_union_rect (pixman_region16_t *dest, + pixman_region16_t *source, + int x, + int y, + unsigned int width, + unsigned int height); +pixman_bool_t pixman_region_subtract (pixman_region16_t *reg_d, + pixman_region16_t *reg_m, + pixman_region16_t *reg_s); +pixman_bool_t pixman_region_inverse (pixman_region16_t *new_reg, + pixman_region16_t *reg1, + pixman_box16_t *inv_rect); +pixman_bool_t pixman_region_contains_point (pixman_region16_t *region, + int x, + int y, + pixman_box16_t *box); +pixman_region_overlap_t pixman_region_contains_rectangle (pixman_region16_t *pixman_region16_t, + pixman_box16_t *prect); +pixman_bool_t pixman_region_not_empty (pixman_region16_t *region); +pixman_box16_t * pixman_region_extents (pixman_region16_t *region); +int pixman_region_n_rects (pixman_region16_t *region); +pixman_box16_t * pixman_region_rectangles (pixman_region16_t *region, + int *n_rects); +pixman_bool_t pixman_region_equal (pixman_region16_t *region1, + pixman_region16_t *region2); +pixman_bool_t pixman_region_selfcheck (pixman_region16_t *region); +void pixman_region_reset (pixman_region16_t *region, + pixman_box16_t *box); /* * 32 bit regions */ @@ -526,10 +490,10 @@ void pixman_region32_translate (pixman_region32_t *r int y); pixman_bool_t pixman_region32_copy (pixman_region32_t *dest, pixman_region32_t *source); -pixman_bool_t pixman_region32_intersect (pixman_region32_t *newReg, +pixman_bool_t pixman_region32_intersect (pixman_region32_t *new_reg, pixman_region32_t *reg1, pixman_region32_t *reg2); -pixman_bool_t pixman_region32_union (pixman_region32_t *newReg, +pixman_bool_t pixman_region32_union (pixman_region32_t *new_reg, pixman_region32_t *reg1, pixman_region32_t *reg2); pixman_bool_t pixman_region32_union_rect (pixman_region32_t *dest, @@ -538,12 +502,12 @@ pixman_bool_t pixman_region32_union_rect (pixman_region32_t *d int y, unsigned int width, unsigned int height); -pixman_bool_t pixman_region32_subtract (pixman_region32_t *regD, - pixman_region32_t *regM, - pixman_region32_t *regS); -pixman_bool_t pixman_region32_inverse (pixman_region32_t *newReg, +pixman_bool_t pixman_region32_subtract (pixman_region32_t *reg_d, + pixman_region32_t *reg_m, + pixman_region32_t *reg_s); +pixman_bool_t pixman_region32_inverse (pixman_region32_t *new_reg, pixman_region32_t *reg1, - pixman_box32_t *invRect); + pixman_box32_t *inv_rect); pixman_bool_t pixman_region32_contains_point (pixman_region32_t *region, int x, int y, @@ -597,6 +561,8 @@ typedef struct pixman_gradient_stop pixman_gradient_stop_t; typedef uint32_t (* pixman_read_memory_func_t) (const void *src, int size); typedef void (* pixman_write_memory_func_t) (void *dst, uint32_t value, int size); +typedef void (* pixman_image_destroy_func_t) (pixman_image_t *image, void *data); + struct pixman_gradient_stop { pixman_fixed_t x; pixman_color_t color; @@ -657,65 +623,67 @@ struct pixman_indexed /* 32bpp formats */ typedef enum { - PIXMAN_a8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,8,8,8,8), - PIXMAN_x8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,8,8,8), - PIXMAN_a8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,8,8,8,8), - PIXMAN_x8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,8,8,8), - PIXMAN_b8g8r8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,8,8,8,8), - PIXMAN_b8g8r8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,0,8,8,8), + PIXMAN_a8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,8,8,8,8), + PIXMAN_x8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,8,8,8), + PIXMAN_a8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,8,8,8,8), + PIXMAN_x8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,8,8,8), + PIXMAN_b8g8r8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,8,8,8,8), + PIXMAN_b8g8r8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,0,8,8,8), + PIXMAN_x2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,10,10,10), + PIXMAN_a2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,2,10,10,10), PIXMAN_x2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,10,10,10), PIXMAN_a2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,2,10,10,10), /* 24bpp formats */ - PIXMAN_r8g8b8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8), - PIXMAN_b8g8r8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8), + PIXMAN_r8g8b8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8), + PIXMAN_b8g8r8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8), /* 16bpp formats */ - PIXMAN_r5g6b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,6,5), - PIXMAN_b5g6r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,6,5), + PIXMAN_r5g6b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,6,5), + PIXMAN_b5g6r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,6,5), - PIXMAN_a1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,1,5,5,5), - PIXMAN_x1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,5,5), - PIXMAN_a1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,1,5,5,5), - PIXMAN_x1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,5,5), - PIXMAN_a4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,4,4,4,4), - PIXMAN_x4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,4,4,4), - PIXMAN_a4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,4,4,4,4), - PIXMAN_x4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,4,4,4), + PIXMAN_a1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,1,5,5,5), + PIXMAN_x1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,5,5), + PIXMAN_a1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,1,5,5,5), + PIXMAN_x1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,5,5), + PIXMAN_a4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,4,4,4,4), + PIXMAN_x4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,4,4,4), + PIXMAN_a4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,4,4,4,4), + PIXMAN_x4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,4,4,4), /* 8bpp formats */ - PIXMAN_a8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0), - PIXMAN_r3g3b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,0,3,3,2), - PIXMAN_b2g3r3 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,0,3,3,2), - PIXMAN_a2r2g2b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,2,2,2,2), - PIXMAN_a2b2g2r2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,2,2,2,2), + PIXMAN_a8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0), + PIXMAN_r3g3b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,0,3,3,2), + PIXMAN_b2g3r3 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,0,3,3,2), + PIXMAN_a2r2g2b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,2,2,2,2), + PIXMAN_a2b2g2r2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,2,2,2,2), - PIXMAN_c8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0), - PIXMAN_g8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0), + PIXMAN_c8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0), + PIXMAN_g8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0), - PIXMAN_x4a4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,4,0,0,0), + PIXMAN_x4a4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,4,0,0,0), - PIXMAN_x4c4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0), - PIXMAN_x4g4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0), + PIXMAN_x4c4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0), + PIXMAN_x4g4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0), /* 4bpp formats */ - PIXMAN_a4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_A,4,0,0,0), - PIXMAN_r1g2b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,0,1,2,1), - PIXMAN_b1g2r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,0,1,2,1), - PIXMAN_a1r1g1b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,1,1,1,1), - PIXMAN_a1b1g1r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,1,1,1,1), + PIXMAN_a4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_A,4,0,0,0), + PIXMAN_r1g2b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,0,1,2,1), + PIXMAN_b1g2r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,0,1,2,1), + PIXMAN_a1r1g1b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,1,1,1,1), + PIXMAN_a1b1g1r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,1,1,1,1), - PIXMAN_c4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_COLOR,0,0,0,0), - PIXMAN_g4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_GRAY,0,0,0,0), + PIXMAN_c4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_COLOR,0,0,0,0), + PIXMAN_g4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_GRAY,0,0,0,0), /* 1bpp formats */ - PIXMAN_a1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0), + PIXMAN_a1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0), - PIXMAN_g1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0), + PIXMAN_g1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0), /* YUV formats */ - PIXMAN_yuy2 = PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0), - PIXMAN_yv12 = PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0) + PIXMAN_yuy2 = PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0), + PIXMAN_yv12 = PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0) } pixman_format_code_t; /* Querying supported format values. */ @@ -748,6 +716,9 @@ pixman_image_t *pixman_image_create_bits (pixman_format_code_t pixman_image_t *pixman_image_ref (pixman_image_t *image); pixman_bool_t pixman_image_unref (pixman_image_t *image); +void pixman_image_set_destroy_function (pixman_image_t *image, + pixman_image_destroy_func_t function, + void *data); /* Set properties */ pixman_bool_t pixman_image_set_clip_region (pixman_image_t *image, @@ -789,16 +760,16 @@ pixman_bool_t pixman_image_fill_rectangles (pixman_op_t op, const pixman_rectangle16_t *rects); /* Composite */ -pixman_bool_t pixman_compute_composite_region (pixman_region16_t *pRegion, - pixman_image_t *pSrc, - pixman_image_t *pMask, - pixman_image_t *pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, +pixman_bool_t pixman_compute_composite_region (pixman_region16_t *region, + pixman_image_t *src_image, + pixman_image_t *mask_image, + pixman_image_t *dst_image, + int16_t src_x, + int16_t src_y, + int16_t mask_x, + int16_t mask_y, + int16_t dest_x, + int16_t dest_y, uint16_t width, uint16_t height); void pixman_image_composite (pixman_op_t op, @@ -814,6 +785,20 @@ void pixman_image_composite (pixman_op_t op, uint16_t width, uint16_t height); +/* Old X servers rely on out-of-bounds accesses when they are asked + * to composite with a window as the source. They create a pixman image + * pointing to some bogus position in memory, but then they set a clip + * region to the position where the actual bits are. + * + * Due to a bug in old versions of pixman, where it would not clip + * against the image bounds when a clip region was set, this would + * actually work. So by default we allow certain out-of-bound access + * to happen unless explicitly disabled. + * + * Fixed X servers should call this function to disable the workaround. + */ +void pixman_disable_out_of_bounds_workaround (void); + /* * Trapezoids */ @@ -831,26 +816,26 @@ struct pixman_edge { pixman_fixed_t x; pixman_fixed_t e; - pixman_fixed_t stepx; - pixman_fixed_t signdx; - pixman_fixed_t dy; - pixman_fixed_t dx; - - pixman_fixed_t stepx_small; - pixman_fixed_t stepx_big; - pixman_fixed_t dx_small; - pixman_fixed_t dx_big; + pixman_fixed_t stepx; + pixman_fixed_t signdx; + pixman_fixed_t dy; + pixman_fixed_t dx; + + pixman_fixed_t stepx_small; + pixman_fixed_t stepx_big; + pixman_fixed_t dx_small; + pixman_fixed_t dx_big; }; struct pixman_trapezoid { - pixman_fixed_t top, bottom; + pixman_fixed_t top, bottom; pixman_line_fixed_t left, right; }; /* whether 't' is a well defined not obviously empty trapezoid */ -#define pixman_trapezoid_valid(t) \ +#define pixman_trapezoid_valid(t) \ ((t)->left.p1.y != (t)->left.p2.y && \ (t)->right.p1.y != (t)->right.p2.y && \ (int) ((t)->bottom - (t)->top) > 0) @@ -904,5 +889,4 @@ void pixman_rasterize_trapezoid (pixman_image_t *image, int x_off, int y_off); - #endif /* PIXMAN_H__ */ diff --git a/lib/pixman/test/Makefile.am b/lib/pixman/test/Makefile.am index be76dc814..c56f62de7 100644 --- a/lib/pixman/test/Makefile.am +++ b/lib/pixman/test/Makefile.am @@ -4,13 +4,19 @@ INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman TESTPROGRAMS = \ region-test \ scaling-test \ + blitters-test \ fetch-test \ + oob-test \ + window-test \ trap-crasher fetch_test_LDADD = $(TEST_LDADD) region_test_LDADD = $(TEST_LDADD) scaling_test_LDADD = $(TEST_LDADD) +blitters_test_LDADD = $(TEST_LDADD) trap_crasher_LDADD = $(TEST_LDADD) +oob_test_LDADD = $(TEST_LDADD) +window_test_LDADD = $(TEST_LDADD) # GTK using test programs @@ -24,10 +30,10 @@ TESTPROGRAMS += \ composite-test \ gradient-test \ alpha-test \ + screen-test \ + convolution-test \ trap-test -noinst_PROGRAMS = $(TESTPROGRAMS) - INCLUDES += $(GTK_CFLAGS) gradient_test_LDADD = $(GTK_LDADD) @@ -48,5 +54,13 @@ clip_in_SOURCES = clip-in.c utils.c utils.h trap_test_LDADD = $(GTK_LDADD) trap_test_SOURCES = trap-test.c utils.c utils.h +screen_test_LDADD = $(GTK_LDADD) +screen_test_SOURCES = screen-test.c utils.c utils.h + +convolution_test_LDADD = $(GTK_LDADD) +convolution_test_SOURCES = convolution-test.c utils.c utils.h + endif +noinst_PROGRAMS = $(TESTPROGRAMS) + diff --git a/lib/pixman/test/Makefile.in b/lib/pixman/test/Makefile.in index 265f5d0df..f270165db 100644 --- a/lib/pixman/test/Makefile.in +++ b/lib/pixman/test/Makefile.in @@ -42,10 +42,12 @@ host_triplet = @host@ @HAVE_GTK_TRUE@ composite-test \ @HAVE_GTK_TRUE@ gradient-test \ @HAVE_GTK_TRUE@ alpha-test \ +@HAVE_GTK_TRUE@ screen-test \ +@HAVE_GTK_TRUE@ convolution-test \ @HAVE_GTK_TRUE@ trap-test -@HAVE_GTK_TRUE@noinst_PROGRAMS = $(am__EXEEXT_2) @HAVE_GTK_TRUE@am__append_2 = $(GTK_CFLAGS) +noinst_PROGRAMS = $(am__EXEEXT_2) subdir = test DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 @@ -57,9 +59,11 @@ CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = @HAVE_GTK_TRUE@am__EXEEXT_1 = clip-test$(EXEEXT) clip-in$(EXEEXT) \ @HAVE_GTK_TRUE@ composite-test$(EXEEXT) gradient-test$(EXEEXT) \ -@HAVE_GTK_TRUE@ alpha-test$(EXEEXT) trap-test$(EXEEXT) +@HAVE_GTK_TRUE@ alpha-test$(EXEEXT) screen-test$(EXEEXT) \ +@HAVE_GTK_TRUE@ convolution-test$(EXEEXT) trap-test$(EXEEXT) am__EXEEXT_2 = region-test$(EXEEXT) scaling-test$(EXEEXT) \ - fetch-test$(EXEEXT) trap-crasher$(EXEEXT) $(am__EXEEXT_1) + blitters-test$(EXEEXT) fetch-test$(EXEEXT) oob-test$(EXEEXT) \ + window-test$(EXEEXT) trap-crasher$(EXEEXT) $(am__EXEEXT_1) PROGRAMS = $(noinst_PROGRAMS) am__alpha_test_SOURCES_DIST = alpha-test.c utils.c utils.h @HAVE_GTK_TRUE@am_alpha_test_OBJECTS = alpha-test.$(OBJEXT) \ @@ -70,6 +74,9 @@ am__DEPENDENCIES_2 = @HAVE_GTK_TRUE@am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1) \ @HAVE_GTK_TRUE@ $(am__DEPENDENCIES_2) @HAVE_GTK_TRUE@alpha_test_DEPENDENCIES = $(am__DEPENDENCIES_3) +blitters_test_SOURCES = blitters-test.c +blitters_test_OBJECTS = blitters-test.$(OBJEXT) +blitters_test_DEPENDENCIES = $(am__DEPENDENCIES_1) am__clip_in_SOURCES_DIST = clip-in.c utils.c utils.h @HAVE_GTK_TRUE@am_clip_in_OBJECTS = clip-in.$(OBJEXT) utils.$(OBJEXT) clip_in_OBJECTS = $(am_clip_in_OBJECTS) @@ -84,6 +91,11 @@ am__composite_test_SOURCES_DIST = composite-test.c utils.c utils.h @HAVE_GTK_TRUE@ utils.$(OBJEXT) composite_test_OBJECTS = $(am_composite_test_OBJECTS) @HAVE_GTK_TRUE@composite_test_DEPENDENCIES = $(am__DEPENDENCIES_3) +am__convolution_test_SOURCES_DIST = convolution-test.c utils.c utils.h +@HAVE_GTK_TRUE@am_convolution_test_OBJECTS = \ +@HAVE_GTK_TRUE@ convolution-test.$(OBJEXT) utils.$(OBJEXT) +convolution_test_OBJECTS = $(am_convolution_test_OBJECTS) +@HAVE_GTK_TRUE@convolution_test_DEPENDENCIES = $(am__DEPENDENCIES_3) fetch_test_SOURCES = fetch-test.c fetch_test_OBJECTS = fetch-test.$(OBJEXT) fetch_test_DEPENDENCIES = $(am__DEPENDENCIES_1) @@ -92,12 +104,20 @@ am__gradient_test_SOURCES_DIST = gradient-test.c utils.c utils.h @HAVE_GTK_TRUE@ utils.$(OBJEXT) gradient_test_OBJECTS = $(am_gradient_test_OBJECTS) @HAVE_GTK_TRUE@gradient_test_DEPENDENCIES = $(am__DEPENDENCIES_3) +oob_test_SOURCES = oob-test.c +oob_test_OBJECTS = oob-test.$(OBJEXT) +oob_test_DEPENDENCIES = $(am__DEPENDENCIES_1) region_test_SOURCES = region-test.c region_test_OBJECTS = region-test.$(OBJEXT) region_test_DEPENDENCIES = $(am__DEPENDENCIES_1) scaling_test_SOURCES = scaling-test.c scaling_test_OBJECTS = scaling-test.$(OBJEXT) scaling_test_DEPENDENCIES = $(am__DEPENDENCIES_1) +am__screen_test_SOURCES_DIST = screen-test.c utils.c utils.h +@HAVE_GTK_TRUE@am_screen_test_OBJECTS = screen-test.$(OBJEXT) \ +@HAVE_GTK_TRUE@ utils.$(OBJEXT) +screen_test_OBJECTS = $(am_screen_test_OBJECTS) +@HAVE_GTK_TRUE@screen_test_DEPENDENCIES = $(am__DEPENDENCIES_3) trap_crasher_SOURCES = trap-crasher.c trap_crasher_OBJECTS = trap-crasher.$(OBJEXT) trap_crasher_DEPENDENCIES = $(am__DEPENDENCIES_1) @@ -106,6 +126,9 @@ am__trap_test_SOURCES_DIST = trap-test.c utils.c utils.h @HAVE_GTK_TRUE@ utils.$(OBJEXT) trap_test_OBJECTS = $(am_trap_test_OBJECTS) @HAVE_GTK_TRUE@trap_test_DEPENDENCIES = $(am__DEPENDENCIES_3) +window_test_SOURCES = window-test.c +window_test_OBJECTS = window-test.$(OBJEXT) +window_test_DEPENDENCIES = $(am__DEPENDENCIES_1) DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir) depcomp = $(SHELL) $(top_srcdir)/depcomp am__depfiles_maybe = depfiles @@ -117,15 +140,19 @@ LTCOMPILE = $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) \ CCLD = $(CC) LINK = $(LIBTOOL) --tag=CC --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(AM_LDFLAGS) $(LDFLAGS) -o $@ -SOURCES = $(alpha_test_SOURCES) $(clip_in_SOURCES) \ - $(clip_test_SOURCES) $(composite_test_SOURCES) fetch-test.c \ - $(gradient_test_SOURCES) region-test.c scaling-test.c \ - trap-crasher.c $(trap_test_SOURCES) -DIST_SOURCES = $(am__alpha_test_SOURCES_DIST) \ +SOURCES = $(alpha_test_SOURCES) blitters-test.c $(clip_in_SOURCES) \ + $(clip_test_SOURCES) $(composite_test_SOURCES) \ + $(convolution_test_SOURCES) fetch-test.c \ + $(gradient_test_SOURCES) oob-test.c region-test.c \ + scaling-test.c $(screen_test_SOURCES) trap-crasher.c \ + $(trap_test_SOURCES) window-test.c +DIST_SOURCES = $(am__alpha_test_SOURCES_DIST) blitters-test.c \ $(am__clip_in_SOURCES_DIST) $(am__clip_test_SOURCES_DIST) \ - $(am__composite_test_SOURCES_DIST) fetch-test.c \ - $(am__gradient_test_SOURCES_DIST) region-test.c scaling-test.c \ - trap-crasher.c $(am__trap_test_SOURCES_DIST) + $(am__composite_test_SOURCES_DIST) \ + $(am__convolution_test_SOURCES_DIST) fetch-test.c \ + $(am__gradient_test_SOURCES_DIST) oob-test.c region-test.c \ + scaling-test.c $(am__screen_test_SOURCES_DIST) trap-crasher.c \ + $(am__trap_test_SOURCES_DIST) window-test.c ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) @@ -192,6 +219,7 @@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PERL = @PERL@ +PIXMAN_TIMERS = @PIXMAN_TIMERS@ PIXMAN_VERSION_MAJOR = @PIXMAN_VERSION_MAJOR@ PIXMAN_VERSION_MICRO = @PIXMAN_VERSION_MICRO@ PIXMAN_VERSION_MINOR = @PIXMAN_VERSION_MINOR@ @@ -267,12 +295,15 @@ target_alias = @target_alias@ TEST_LDADD = $(top_builddir)/pixman/libpixman-1.la INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman \ $(am__append_2) -TESTPROGRAMS = region-test scaling-test fetch-test trap-crasher \ - $(am__append_1) +TESTPROGRAMS = region-test scaling-test blitters-test fetch-test \ + oob-test window-test trap-crasher $(am__append_1) fetch_test_LDADD = $(TEST_LDADD) region_test_LDADD = $(TEST_LDADD) scaling_test_LDADD = $(TEST_LDADD) +blitters_test_LDADD = $(TEST_LDADD) trap_crasher_LDADD = $(TEST_LDADD) +oob_test_LDADD = $(TEST_LDADD) +window_test_LDADD = $(TEST_LDADD) # GTK using test programs @HAVE_GTK_TRUE@GTK_LDADD = $(TEST_LDADD) $(GTK_LIBS) @@ -288,6 +319,10 @@ trap_crasher_LDADD = $(TEST_LDADD) @HAVE_GTK_TRUE@clip_in_SOURCES = clip-in.c utils.c utils.h @HAVE_GTK_TRUE@trap_test_LDADD = $(GTK_LDADD) @HAVE_GTK_TRUE@trap_test_SOURCES = trap-test.c utils.c utils.h +@HAVE_GTK_TRUE@screen_test_LDADD = $(GTK_LDADD) +@HAVE_GTK_TRUE@screen_test_SOURCES = screen-test.c utils.c utils.h +@HAVE_GTK_TRUE@convolution_test_LDADD = $(GTK_LDADD) +@HAVE_GTK_TRUE@convolution_test_SOURCES = convolution-test.c utils.c utils.h all: all-am .SUFFIXES: @@ -331,6 +366,9 @@ clean-noinstPROGRAMS: alpha-test$(EXEEXT): $(alpha_test_OBJECTS) $(alpha_test_DEPENDENCIES) @rm -f alpha-test$(EXEEXT) $(LINK) $(alpha_test_LDFLAGS) $(alpha_test_OBJECTS) $(alpha_test_LDADD) $(LIBS) +blitters-test$(EXEEXT): $(blitters_test_OBJECTS) $(blitters_test_DEPENDENCIES) + @rm -f blitters-test$(EXEEXT) + $(LINK) $(blitters_test_LDFLAGS) $(blitters_test_OBJECTS) $(blitters_test_LDADD) $(LIBS) clip-in$(EXEEXT): $(clip_in_OBJECTS) $(clip_in_DEPENDENCIES) @rm -f clip-in$(EXEEXT) $(LINK) $(clip_in_LDFLAGS) $(clip_in_OBJECTS) $(clip_in_LDADD) $(LIBS) @@ -340,24 +378,36 @@ clip-test$(EXEEXT): $(clip_test_OBJECTS) $(clip_test_DEPENDENCIES) composite-test$(EXEEXT): $(composite_test_OBJECTS) $(composite_test_DEPENDENCIES) @rm -f composite-test$(EXEEXT) $(LINK) $(composite_test_LDFLAGS) $(composite_test_OBJECTS) $(composite_test_LDADD) $(LIBS) +convolution-test$(EXEEXT): $(convolution_test_OBJECTS) $(convolution_test_DEPENDENCIES) + @rm -f convolution-test$(EXEEXT) + $(LINK) $(convolution_test_LDFLAGS) $(convolution_test_OBJECTS) $(convolution_test_LDADD) $(LIBS) fetch-test$(EXEEXT): $(fetch_test_OBJECTS) $(fetch_test_DEPENDENCIES) @rm -f fetch-test$(EXEEXT) $(LINK) $(fetch_test_LDFLAGS) $(fetch_test_OBJECTS) $(fetch_test_LDADD) $(LIBS) gradient-test$(EXEEXT): $(gradient_test_OBJECTS) $(gradient_test_DEPENDENCIES) @rm -f gradient-test$(EXEEXT) $(LINK) $(gradient_test_LDFLAGS) $(gradient_test_OBJECTS) $(gradient_test_LDADD) $(LIBS) +oob-test$(EXEEXT): $(oob_test_OBJECTS) $(oob_test_DEPENDENCIES) + @rm -f oob-test$(EXEEXT) + $(LINK) $(oob_test_LDFLAGS) $(oob_test_OBJECTS) $(oob_test_LDADD) $(LIBS) region-test$(EXEEXT): $(region_test_OBJECTS) $(region_test_DEPENDENCIES) @rm -f region-test$(EXEEXT) $(LINK) $(region_test_LDFLAGS) $(region_test_OBJECTS) $(region_test_LDADD) $(LIBS) scaling-test$(EXEEXT): $(scaling_test_OBJECTS) $(scaling_test_DEPENDENCIES) @rm -f scaling-test$(EXEEXT) $(LINK) $(scaling_test_LDFLAGS) $(scaling_test_OBJECTS) $(scaling_test_LDADD) $(LIBS) +screen-test$(EXEEXT): $(screen_test_OBJECTS) $(screen_test_DEPENDENCIES) + @rm -f screen-test$(EXEEXT) + $(LINK) $(screen_test_LDFLAGS) $(screen_test_OBJECTS) $(screen_test_LDADD) $(LIBS) trap-crasher$(EXEEXT): $(trap_crasher_OBJECTS) $(trap_crasher_DEPENDENCIES) @rm -f trap-crasher$(EXEEXT) $(LINK) $(trap_crasher_LDFLAGS) $(trap_crasher_OBJECTS) $(trap_crasher_LDADD) $(LIBS) trap-test$(EXEEXT): $(trap_test_OBJECTS) $(trap_test_DEPENDENCIES) @rm -f trap-test$(EXEEXT) $(LINK) $(trap_test_LDFLAGS) $(trap_test_OBJECTS) $(trap_test_LDADD) $(LIBS) +window-test$(EXEEXT): $(window_test_OBJECTS) $(window_test_DEPENDENCIES) + @rm -f window-test$(EXEEXT) + $(LINK) $(window_test_LDFLAGS) $(window_test_OBJECTS) $(window_test_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) @@ -366,16 +416,21 @@ distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alpha-test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blitters-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clip-in.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clip-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/composite-test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/convolution-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fetch-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gradient-test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oob-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/region-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scaling-test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/screen-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/trap-crasher.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/trap-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utils.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/window-test.Po@am__quote@ .c.o: @am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \ diff --git a/lib/pixman/test/blitters-test.c b/lib/pixman/test/blitters-test.c new file mode 100644 index 000000000..d5201e541 --- /dev/null +++ b/lib/pixman/test/blitters-test.c @@ -0,0 +1,655 @@ +/* + * Test program, which stresses the use of different color formats and + * compositing operations. + * + * Just run it without any command line arguments, and it will report either + * "blitters test passed" - everything is ok + * "blitters test failed!" - there is some problem + * + * In the case of failure, finding the problem involves the following steps: + * 1. Get the reference 'blitters-test' binary. It makes sense to disable all + * the cpu specific optimizations in pixman and also configure it with + * '--disable-shared' option. Those who are paranoid can also tweak the + * sources to disable all fastpath functions. The resulting binary + * can be renamed to something like 'blitters-test.ref'. + * 2. Compile the buggy binary (also with the '--disable-shared' option). + * 3. Run 'ruby blitters-test-bisect.rb ./blitters-test.ref ./blitters-test' + * 4. Look at the information about failed case (destination buffer content + * will be shown) and try to figure out what is wrong. Loading + * test program in gdb, specifying failed test number in the command + * line with '-' character prepended and setting breakpoint on + * 'pixman_image_composite' function can provide detailed information + * about function arguments + */ +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <config.h> +#include "pixman.h" + +/* A primitive pseudorandom number generator, taken from POSIX.1-2001 example */ + +static uint32_t lcg_seed; + +static inline uint32_t +lcg_rand (void) +{ + lcg_seed = lcg_seed * 1103515245 + 12345; + return ((uint32_t)(lcg_seed / 65536) % 32768); +} + +static inline void +lcg_srand (uint32_t seed) +{ + lcg_seed = seed; +} + +static inline uint32_t +lcg_rand_n (int max) +{ + return lcg_rand () % max; +} + +static void * +aligned_malloc (size_t align, size_t size) +{ + void *result; + +#ifdef HAVE_POSIX_MEMALIGN + posix_memalign (&result, align, size); +#else + result = malloc (size); +#endif + + return result; +} + +/*----------------------------------------------------------------------------*\ + * CRC-32 version 2.0.0 by Craig Bruce, 2006-04-29. + * + * This program generates the CRC-32 values for the files named in the + * command-line arguments. These are the same CRC-32 values used by GZIP, + * PKZIP, and ZMODEM. The Crc32_ComputeBuf () can also be detached and + * used independently. + * + * THIS PROGRAM IS PUBLIC-DOMAIN SOFTWARE. + * + * Based on the byte-oriented implementation "File Verification Using CRC" + * by Mark R. Nelson in Dr. Dobb's Journal, May 1992, pp. 64-67. + * + * v1.0.0: original release. + * v1.0.1: fixed printf formats. + * v1.0.2: fixed something else. + * v1.0.3: replaced CRC constant table by generator function. + * v1.0.4: reformatted code, made ANSI C. 1994-12-05. + * v2.0.0: rewrote to use memory buffer & static table, 2006-04-29. +\*----------------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------------*\ + * NAME: + * Crc32_ComputeBuf () - computes the CRC-32 value of a memory buffer + * DESCRIPTION: + * Computes or accumulates the CRC-32 value for a memory buffer. + * The 'inCrc32' gives a previously accumulated CRC-32 value to allow + * a CRC to be generated for multiple sequential buffer-fuls of data. + * The 'inCrc32' for the first buffer must be zero. + * ARGUMENTS: + * inCrc32 - accumulated CRC-32 value, must be 0 on first call + * buf - buffer to compute CRC-32 value for + * bufLen - number of bytes in buffer + * RETURNS: + * crc32 - computed CRC-32 value + * ERRORS: + * (no errors are possible) +\*----------------------------------------------------------------------------*/ + +static uint32_t +compute_crc32 (uint32_t in_crc32, + const void *buf, + size_t buf_len) +{ + static const uint32_t crc_table[256] = { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, + 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, + 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, + 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, + 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, + 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, + 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, + 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, + 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, + 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, + 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, + 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, + 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, + 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, + 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, + 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, + 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, + 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, + 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, + 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, + 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, + 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }; + + uint32_t crc32; + unsigned char * byte_buf; + size_t i; + + /* accumulate crc32 for buffer */ + crc32 = in_crc32 ^ 0xFFFFFFFF; + byte_buf = (unsigned char*) buf; + + for (i = 0; i < buf_len; i++) + crc32 = (crc32 >> 8) ^ crc_table[(crc32 ^ byte_buf[i]) & 0xFF]; + + return (crc32 ^ 0xFFFFFFFF); +} + +/* perform endian conversion of pixel data */ +static void +image_endian_swap (pixman_image_t *img, int bpp) +{ + int stride = pixman_image_get_stride (img); + uint32_t *data = pixman_image_get_data (img); + int height = pixman_image_get_height (img); + int i, j; + + /* swap bytes only on big endian systems */ + volatile uint16_t endian_check_var = 0x1234; + if (*(volatile uint8_t *)&endian_check_var != 0x12) + return; + + for (i = 0; i < height; i++) + { + uint8_t *line_data = (uint8_t *)data + stride * i; + /* swap bytes only for 16, 24 and 32 bpp for now */ + switch (bpp) + { + case 1: + for (j = 0; j < stride; j++) + { + line_data[j] = + ((line_data[j] & 0x80) >> 7) | + ((line_data[j] & 0x40) >> 5) | + ((line_data[j] & 0x20) >> 3) | + ((line_data[j] & 0x10) >> 1) | + ((line_data[j] & 0x08) << 1) | + ((line_data[j] & 0x04) << 3) | + ((line_data[j] & 0x02) << 5) | + ((line_data[j] & 0x01) << 7); + } + break; + case 4: + for (j = 0; j < stride; j++) + { + line_data[j] = (line_data[j] >> 4) | (line_data[j] << 4); + } + break; + case 16: + for (j = 0; j + 2 <= stride; j += 2) + { + char t1 = line_data[j + 0]; + char t2 = line_data[j + 1]; + + line_data[j + 1] = t1; + line_data[j + 0] = t2; + } + break; + case 24: + for (j = 0; j + 3 <= stride; j += 3) + { + char t1 = line_data[j + 0]; + char t2 = line_data[j + 1]; + char t3 = line_data[j + 2]; + + line_data[j + 2] = t1; + line_data[j + 1] = t2; + line_data[j + 0] = t3; + } + break; + case 32: + for (j = 0; j + 4 <= stride; j += 4) + { + char t1 = line_data[j + 0]; + char t2 = line_data[j + 1]; + char t3 = line_data[j + 2]; + char t4 = line_data[j + 3]; + + line_data[j + 3] = t1; + line_data[j + 2] = t2; + line_data[j + 1] = t3; + line_data[j + 0] = t4; + } + break; + default: + break; + } + } +} + +/* Create random image for testing purposes */ +static pixman_image_t * +create_random_image (pixman_format_code_t *allowed_formats, + int max_width, + int max_height, + int max_extra_stride, + pixman_format_code_t *used_fmt) +{ + int n = 0, i, width, height, stride; + pixman_format_code_t fmt; + uint32_t *buf; + pixman_image_t *img; + + while (allowed_formats[n] != -1) + n++; + fmt = allowed_formats[lcg_rand_n (n)]; + width = lcg_rand_n (max_width) + 1; + height = lcg_rand_n (max_height) + 1; + stride = (width * PIXMAN_FORMAT_BPP (fmt) + 7) / 8 + + lcg_rand_n (max_extra_stride + 1); + stride = (stride + 3) & ~3; + + /* do the allocation */ + buf = aligned_malloc (64, stride * height); + + /* initialize image with random data */ + for (i = 0; i < stride * height; i++) + { + /* generation is biased to having more 0 or 255 bytes as + * they are more likely to be special-cased in code + */ + *((uint8_t *)buf + i) = lcg_rand_n (4) ? lcg_rand_n (256) : + (lcg_rand_n (2) ? 0 : 255); + } + + img = pixman_image_create_bits (fmt, width, height, buf, stride); + + image_endian_swap (img, PIXMAN_FORMAT_BPP (fmt)); + + if (used_fmt) *used_fmt = fmt; + return img; +} + +/* Free random image, and optionally update crc32 based on its data */ +static uint32_t +free_random_image (uint32_t initcrc, + pixman_image_t *img, + pixman_format_code_t fmt) +{ + uint32_t crc32 = 0; + int stride = pixman_image_get_stride (img); + uint32_t *data = pixman_image_get_data (img); + int height = pixman_image_get_height (img);; + + if (fmt != -1) + { + /* mask unused 'x' part */ + if (PIXMAN_FORMAT_BPP (fmt) - PIXMAN_FORMAT_DEPTH (fmt) && + PIXMAN_FORMAT_DEPTH (fmt) != 0) + { + int i; + uint32_t *data = pixman_image_get_data (img); + uint32_t mask = (1 << PIXMAN_FORMAT_DEPTH (fmt)) - 1; + + if (PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_BGRA) + mask <<= (PIXMAN_FORMAT_BPP (fmt) - PIXMAN_FORMAT_DEPTH (fmt)); + + for (i = 0; i < 32; i++) + mask |= mask << (i * PIXMAN_FORMAT_BPP (fmt)); + + for (i = 0; i < stride * height / 4; i++) + data[i] &= mask; + } + + /* swap endiannes in order to provide identical results on both big + * and litte endian systems + */ + image_endian_swap (img, PIXMAN_FORMAT_BPP (fmt)); + crc32 = compute_crc32 (initcrc, data, stride * height); + } + + pixman_image_unref (img); + free (data); + + return crc32; +} + +static pixman_op_t op_list[] = { + PIXMAN_OP_SRC, + PIXMAN_OP_OVER, + PIXMAN_OP_ADD, + PIXMAN_OP_CLEAR, + PIXMAN_OP_SRC, + PIXMAN_OP_DST, + PIXMAN_OP_OVER, + PIXMAN_OP_OVER_REVERSE, + PIXMAN_OP_IN, + PIXMAN_OP_IN_REVERSE, + PIXMAN_OP_OUT, + PIXMAN_OP_OUT_REVERSE, + PIXMAN_OP_ATOP, + PIXMAN_OP_ATOP_REVERSE, + PIXMAN_OP_XOR, + PIXMAN_OP_ADD, + PIXMAN_OP_SATURATE, + PIXMAN_OP_DISJOINT_CLEAR, + PIXMAN_OP_DISJOINT_SRC, + PIXMAN_OP_DISJOINT_DST, + PIXMAN_OP_DISJOINT_OVER, + PIXMAN_OP_DISJOINT_OVER_REVERSE, + PIXMAN_OP_DISJOINT_IN, + PIXMAN_OP_DISJOINT_IN_REVERSE, + PIXMAN_OP_DISJOINT_OUT, + PIXMAN_OP_DISJOINT_OUT_REVERSE, + PIXMAN_OP_DISJOINT_ATOP, + PIXMAN_OP_DISJOINT_ATOP_REVERSE, + PIXMAN_OP_DISJOINT_XOR, + PIXMAN_OP_CONJOINT_CLEAR, + PIXMAN_OP_CONJOINT_SRC, + PIXMAN_OP_CONJOINT_DST, + PIXMAN_OP_CONJOINT_OVER, + PIXMAN_OP_CONJOINT_OVER_REVERSE, + PIXMAN_OP_CONJOINT_IN, + PIXMAN_OP_CONJOINT_IN_REVERSE, + PIXMAN_OP_CONJOINT_OUT, + PIXMAN_OP_CONJOINT_OUT_REVERSE, + PIXMAN_OP_CONJOINT_ATOP, + PIXMAN_OP_CONJOINT_ATOP_REVERSE, + PIXMAN_OP_CONJOINT_XOR, + PIXMAN_OP_MULTIPLY, + PIXMAN_OP_SCREEN, + PIXMAN_OP_OVERLAY, + PIXMAN_OP_DARKEN, + PIXMAN_OP_LIGHTEN, + PIXMAN_OP_COLOR_DODGE, + PIXMAN_OP_COLOR_BURN, + PIXMAN_OP_HARD_LIGHT, + PIXMAN_OP_DIFFERENCE, + PIXMAN_OP_EXCLUSION, +#if 0 /* these use floating point math and are not always bitexact on different platforms */ + PIXMAN_OP_SOFT_LIGHT, + PIXMAN_OP_HSL_HUE, + PIXMAN_OP_HSL_SATURATION, + PIXMAN_OP_HSL_COLOR, + PIXMAN_OP_HSL_LUMINOSITY, +#endif +}; + +static pixman_format_code_t img_fmt_list[] = { + PIXMAN_a8r8g8b8, + PIXMAN_x8r8g8b8, + PIXMAN_r5g6b5, + PIXMAN_r3g3b2, + PIXMAN_a8, + PIXMAN_a8b8g8r8, + PIXMAN_x8b8g8r8, + PIXMAN_b8g8r8a8, + PIXMAN_b8g8r8x8, + PIXMAN_r8g8b8, + PIXMAN_b8g8r8, + PIXMAN_r5g6b5, + PIXMAN_b5g6r5, + PIXMAN_x2r10g10b10, + PIXMAN_a2r10g10b10, + PIXMAN_x2b10g10r10, + PIXMAN_a2b10g10r10, + PIXMAN_a1r5g5b5, + PIXMAN_x1r5g5b5, + PIXMAN_a1b5g5r5, + PIXMAN_x1b5g5r5, + PIXMAN_a4r4g4b4, + PIXMAN_x4r4g4b4, + PIXMAN_a4b4g4r4, + PIXMAN_x4b4g4r4, + PIXMAN_a8, + PIXMAN_r3g3b2, + PIXMAN_b2g3r3, + PIXMAN_a2r2g2b2, + PIXMAN_a2b2g2r2, +#if 0 /* using these crashes the test */ + PIXMAN_c8, + PIXMAN_g8, + PIXMAN_x4c4, + PIXMAN_x4g4, + PIXMAN_c4, + PIXMAN_g4, + PIXMAN_g1, +#endif + PIXMAN_x4a4, + PIXMAN_a4, + PIXMAN_r1g2b1, + PIXMAN_b1g2r1, + PIXMAN_a1r1g1b1, + PIXMAN_a1b1g1r1, + PIXMAN_a1, + -1 +}; + +static pixman_format_code_t mask_fmt_list[] = { + PIXMAN_a8r8g8b8, + PIXMAN_a8, + PIXMAN_a4, + PIXMAN_a1, + -1 +}; + + +/* + * Composite operation with pseudorandom images + */ +uint32_t +test_composite (uint32_t initcrc, int testnum, int verbose) +{ + int i; + pixman_image_t *src_img = NULL; + pixman_image_t *dst_img = NULL; + pixman_image_t *mask_img = NULL; + int src_width, src_height; + int dst_width, dst_height; + int src_stride, dst_stride; + int src_x, src_y; + int dst_x, dst_y; + int w, h; + int op; + pixman_format_code_t src_fmt, dst_fmt, mask_fmt; + uint32_t *dstbuf; + uint32_t crc32; + int max_width, max_height, max_extra_stride; + + max_width = max_height = 24 + testnum / 10000; + max_extra_stride = 4 + testnum / 1000000; + + if (max_width > 256) + max_width = 256; + + if (max_height > 16) + max_height = 16; + + if (max_extra_stride > 8) + max_extra_stride = 8; + + lcg_srand (testnum); + + op = op_list[lcg_rand_n (sizeof (op_list) / sizeof (op_list[0]))]; + + if (lcg_rand_n (8)) + { + /* normal image */ + src_img = create_random_image (img_fmt_list, max_width, max_height, + max_extra_stride, &src_fmt); + } + else + { + /* solid case */ + src_img = create_random_image (img_fmt_list, 1, 1, + max_extra_stride, &src_fmt); + + pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL); + } + + dst_img = create_random_image (img_fmt_list, max_width, max_height, + max_extra_stride, &dst_fmt); + + mask_img = NULL; + mask_fmt = -1; + + if (lcg_rand_n (2)) + { + if (lcg_rand_n (2)) + { + mask_img = create_random_image (mask_fmt_list, max_width, max_height, + max_extra_stride, &mask_fmt); + } + else + { + /* solid case */ + mask_img = create_random_image (mask_fmt_list, 1, 1, + max_extra_stride, &mask_fmt); + pixman_image_set_repeat (mask_img, PIXMAN_REPEAT_NORMAL); + } + + if (lcg_rand_n (2)) + pixman_image_set_component_alpha (mask_img, 1); + } + + src_width = pixman_image_get_width (src_img); + src_height = pixman_image_get_height (src_img); + src_stride = pixman_image_get_stride (src_img); + + dst_width = pixman_image_get_width (dst_img); + dst_height = pixman_image_get_height (dst_img); + dst_stride = pixman_image_get_stride (dst_img); + + dstbuf = pixman_image_get_data (dst_img); + + src_x = lcg_rand_n (src_width); + src_y = lcg_rand_n (src_height); + dst_x = lcg_rand_n (dst_width); + dst_y = lcg_rand_n (dst_height); + + w = lcg_rand_n (dst_width - dst_x + 1); + h = lcg_rand_n (dst_height - dst_y + 1); + + if (verbose) + { + printf ("op=%d, src_fmt=%08X, dst_fmt=%08X, mask_fmt=%08X\n", + op, src_fmt, dst_fmt, mask_fmt); + printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n", + src_width, src_height, dst_width, dst_height); + printf ("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n", + src_x, src_y, dst_x, dst_y); + printf ("src_stride=%d, dst_stride=%d\n", + src_stride, dst_stride); + printf ("w=%d, h=%d\n", w, h); + } + + pixman_image_composite (op, src_img, mask_img, dst_img, + src_x, src_y, src_x, src_y, dst_x, dst_y, w, h); + + if (verbose) + { + int j; + + printf ("---\n"); + for (i = 0; i < dst_height; i++) + { + for (j = 0; j < dst_stride; j++) + { + if (j == (dst_width * PIXMAN_FORMAT_BPP (dst_fmt) + 7) / 8) + printf ("| "); + + printf ("%02X ", *((uint8_t *)dstbuf + i * dst_stride + j)); + } + printf ("\n"); + } + printf ("---\n"); + } + + free_random_image (initcrc, src_img, -1); + crc32 = free_random_image (initcrc, dst_img, dst_fmt); + + if (mask_img) + free_random_image (initcrc, mask_img, -1); + + return crc32; +} + +int +main (int argc, char *argv[]) +{ + int i, n1 = 1, n2 = 0; + uint32_t crc = 0; + int verbose = getenv ("VERBOSE") != NULL; + + if (argc >= 3) + { + n1 = atoi (argv[1]); + n2 = atoi (argv[2]); + } + else if (argc >= 2) + { + n2 = atoi (argv[1]); + } + else + { + n1 = 1; + n2 = 2000000; + } + + if (n2 < 0) + { + crc = test_composite (0, abs (n2), 1); + printf ("crc32=%08X\n", crc); + } + else + { + for (i = n1; i <= n2; i++) + { + crc = test_composite (crc, i, 0); + + if (verbose) + printf ("%d: %08X\n", i, crc); + } + printf ("crc32=%08X\n", crc); + + if (n2 == 2000000) + { + /* Predefined value for running with all the fastpath functions + disabled. It needs to be updated every time when changes are + introduced to this program or behavior of pixman changes! */ + if (crc == 0x06D8EDB6) + { + printf ("blitters test passed\n"); + } + else + { + printf ("blitters test failed!\n"); + return 1; + } + } + } + return 0; +} diff --git a/lib/pixman/test/clip-test.c b/lib/pixman/test/clip-test.c index 90310f415..900013718 100644 --- a/lib/pixman/test/clip-test.c +++ b/lib/pixman/test/clip-test.c @@ -71,6 +71,7 @@ main (int argc, char **argv) pixman_region32_init_rect (&clip_region, 50, 0, 100, 200); pixman_image_set_clip_region32 (src_img, &clip_region); pixman_image_set_source_clipping (src_img, TRUE); + pixman_image_set_has_client_clip (src_img, TRUE); pixman_image_set_transform (src_img, &trans); pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL); diff --git a/lib/pixman/test/composite-test.c b/lib/pixman/test/composite-test.c index 393e15d8f..49e0220a4 100644 --- a/lib/pixman/test/composite-test.c +++ b/lib/pixman/test/composite-test.c @@ -1,10 +1,49 @@ +#include <gtk/gtk.h> #include <stdlib.h> #include <stdio.h> #include "pixman.h" #include "utils.h" -#define WIDTH 100 -#define HEIGHT 100 +#define WIDTH 60 +#define HEIGHT 60 + +typedef struct { + const char *name; + pixman_op_t op; +} operator_t; + +static const operator_t operators[] = { + { "CLEAR", PIXMAN_OP_CLEAR }, + { "SRC", PIXMAN_OP_SRC }, + { "DST", PIXMAN_OP_DST }, + { "OVER", PIXMAN_OP_OVER }, + { "OVER_REVERSE", PIXMAN_OP_OVER_REVERSE }, + { "IN", PIXMAN_OP_IN }, + { "IN_REVERSE", PIXMAN_OP_IN_REVERSE }, + { "OUT", PIXMAN_OP_OUT }, + { "OUT_REVERSE", PIXMAN_OP_OUT_REVERSE }, + { "ATOP", PIXMAN_OP_ATOP }, + { "ATOP_REVERSE", PIXMAN_OP_ATOP_REVERSE }, + { "XOR", PIXMAN_OP_XOR }, + { "ADD", PIXMAN_OP_ADD }, + { "SATURATE", PIXMAN_OP_SATURATE }, + + { "MULTIPLY", PIXMAN_OP_MULTIPLY }, + { "SCREEN", PIXMAN_OP_SCREEN }, + { "OVERLAY", PIXMAN_OP_OVERLAY }, + { "DARKEN", PIXMAN_OP_DARKEN }, + { "LIGHTEN", PIXMAN_OP_LIGHTEN }, + { "COLOR_DODGE", PIXMAN_OP_COLOR_DODGE }, + { "COLOR_BURN", PIXMAN_OP_COLOR_BURN }, + { "HARD_LIGHT", PIXMAN_OP_HARD_LIGHT }, + { "SOFT_LIGHT", PIXMAN_OP_SOFT_LIGHT }, + { "DIFFERENCE", PIXMAN_OP_DIFFERENCE }, + { "EXCLUSION", PIXMAN_OP_EXCLUSION }, + { "HSL_HUE", PIXMAN_OP_HSL_HUE }, + { "HSL_SATURATION", PIXMAN_OP_HSL_SATURATION }, + { "HSL_COLOR", PIXMAN_OP_HSL_COLOR }, + { "HSL_LUMINOSITY", PIXMAN_OP_HSL_LUMINOSITY }, +}; static uint32_t reader (const void *src, int size) @@ -44,40 +83,107 @@ writer (void *src, uint32_t value, int size) int main (int argc, char **argv) { - uint32_t *src = malloc (WIDTH * HEIGHT * 4); +#define d2f pixman_double_to_fixed + + GtkWidget *window, *swindow; + GtkWidget *table; uint32_t *dest = malloc (WIDTH * HEIGHT * 4); + uint32_t *src = malloc (WIDTH * HEIGHT * 4); pixman_image_t *src_img; pixman_image_t *dest_img; + pixman_point_fixed_t p1 = { -10 << 0, 0 }; + pixman_point_fixed_t p2 = { WIDTH << 16, (HEIGHT - 10) << 16 }; + uint16_t full = 0xcfff; + uint16_t low = 0x5000; + uint16_t alpha = 0xffff; + pixman_gradient_stop_t stops[6] = + { + { d2f (0.0), { full, low, low, alpha } }, + { d2f (0.25), { full, full, low, alpha } }, + { d2f (0.4), { low, full, low, alpha } }, + { d2f (0.5), { low, full, full, alpha } }, + { d2f (0.8), { low, low, full, alpha } }, + { d2f (1.0), { full, low, full, alpha } }, + }; + + int i; - for (i = 0; i < WIDTH * HEIGHT; ++i) - src[i] = 0x7f7f0000; /* red */ + gtk_init (&argc, &argv); - for (i = 0; i < WIDTH * HEIGHT; ++i) - dest[i] = 0x7f00007f; /* blue */ + window = gtk_window_new (GTK_WINDOW_TOPLEVEL); + + gtk_window_set_default_size (window, 800, 600); - src_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, - WIDTH, HEIGHT, - src, - WIDTH * 4); + g_signal_connect (window, "delete-event", + G_CALLBACK (gtk_main_quit), + NULL); + table = gtk_table_new (G_N_ELEMENTS (operators) / 6, 6, TRUE); + + src_img = pixman_image_create_linear_gradient (&p1, &p2, stops, + sizeof (stops) / sizeof (stops[0])); + pixman_image_set_repeat (src_img, PIXMAN_REPEAT_PAD); + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, dest, WIDTH * 4); - - pixman_image_set_accessors (src_img, reader, writer); pixman_image_set_accessors (dest_img, reader, writer); - - pixman_image_composite (PIXMAN_OP_OVER, src_img, NULL, dest_img, - 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); - show_image (dest_img); - + for (i = 0; i < G_N_ELEMENTS (operators); ++i) + { + GtkWidget *image; + GdkPixbuf *pixbuf; + GtkWidget *vbox; + GtkWidget *label; + int j, k; + + vbox = gtk_vbox_new (FALSE, 0); + + label = gtk_label_new (operators[i].name); + gtk_box_pack_start (GTK_BOX (vbox), label, FALSE, FALSE, 6); + gtk_widget_show (label); + + for (j = 0; j < HEIGHT; ++j) + { + for (k = 0; k < WIDTH; ++k) + dest[j * WIDTH + k] = 0x7f6f6f00; + } + pixman_image_composite (operators[i].op, src_img, NULL, dest_img, + 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + pixbuf = pixbuf_from_argb32 (pixman_image_get_data (dest_img), TRUE, + WIDTH, HEIGHT, WIDTH * 4); + image = gtk_image_new_from_pixbuf (pixbuf); + gtk_box_pack_start (GTK_BOX (vbox), image, FALSE, FALSE, 0); + gtk_widget_show (image); + + gtk_table_attach_defaults (GTK_TABLE (table), vbox, + i % 6, (i % 6) + 1, i / 6, (i / 6) + 1); + gtk_widget_show (vbox); + + g_object_unref (pixbuf); + } + pixman_image_unref (src_img); - pixman_image_unref (dest_img); free (src); + pixman_image_unref (dest_img); free (dest); + + swindow = gtk_scrolled_window_new (NULL, NULL); + gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow), + GTK_POLICY_AUTOMATIC, + GTK_POLICY_AUTOMATIC); + gtk_scrolled_window_add_with_viewport (GTK_SCROLLED_WINDOW (swindow), table); + gtk_widget_show (table); + + gtk_container_add (GTK_CONTAINER (window), swindow); + gtk_widget_show (swindow); + + gtk_widget_show (window); + + gtk_main (); + return 0; } diff --git a/lib/pixman/test/convolution-test.c b/lib/pixman/test/convolution-test.c new file mode 100644 index 000000000..8609d38a0 --- /dev/null +++ b/lib/pixman/test/convolution-test.c @@ -0,0 +1,47 @@ +#include <stdio.h> +#include <stdlib.h> +#include "pixman.h" +#include "utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 200 +#define HEIGHT 200 + +#define d2f pixman_double_to_fixed + + uint32_t *src = malloc (WIDTH * HEIGHT * 4); + uint32_t *mask = malloc (WIDTH * HEIGHT * 4); + uint32_t *dest = malloc (WIDTH * HEIGHT * 4); + pixman_fixed_t convolution[] = + { + d2f (3), d2f (3), + d2f (0.5), d2f (0.5), d2f (0.5), + d2f (0.5), d2f (0.5), d2f (0.5), + d2f (0.5), d2f (0.5), d2f (0.5), + }; + pixman_image_t *simg, *mimg, *dimg; + + int i; + + for (i = 0; i < WIDTH * HEIGHT; ++i) + { + src[i] = 0x7f007f00; + mask[i] = (i % 256) * 0x01000000; + dest[i] = 0; + } + + simg = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src, WIDTH * 4); + mimg = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, mask, WIDTH * 4); + dimg = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, dest, WIDTH * 4); + + pixman_image_set_filter (mimg, PIXMAN_FILTER_CONVOLUTION, + convolution, 11); + + pixman_image_composite (PIXMAN_OP_OVER, simg, mimg, dimg, 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + + show_image (dimg); + + return 0; +} diff --git a/lib/pixman/test/fetch-test.c b/lib/pixman/test/fetch-test.c index c41f1a63e..6306a4c42 100644 --- a/lib/pixman/test/fetch-test.c +++ b/lib/pixman/test/fetch-test.c @@ -2,6 +2,7 @@ #include <stdlib.h> #include <stdio.h> #include "pixman.h" +#include <config.h> #define SIZE 1024 @@ -34,8 +35,13 @@ testcase_t testcases[] = { .format = PIXMAN_g1, .width = 8, .height = 2, .stride = 4, +#ifdef WORDS_BIGENDIAN + .src = { 0xaa000000, + 0x55000000 }, +#else .src = { 0x00000055, 0x000000aa }, +#endif .dst = { 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff }, .indexed = &mono_pallete, @@ -51,14 +57,24 @@ testcase_t testcases[] = { 0x00898989, 0x00ababab, 0x00cdcdcd, 0x00efefef, }, }, #endif + /* FIXME: make this work on big endian */ { .format = PIXMAN_yv12, .width = 8, .height = 2, .stride = 8, +#ifdef WORDS_BIGENDIAN + .src = { 0x00ff00ff, 0x00ff00ff, + 0xff00ff00, 0xff00ff00, + 0x80ff8000, + 0x800080ff + }, +#else .src = { 0xff00ff00, 0xff00ff00, 0x00ff00ff, 0x00ff00ff, 0x0080ff80, - 0xff800080}, + 0xff800080 + }, +#endif .dst = { 0xff000000, 0xffffffff, 0xffb80000, 0xffffe113, 0xff000000, 0xffffffff, 0xff0023ee, 0xff4affff, diff --git a/lib/pixman/test/oob-test.c b/lib/pixman/test/oob-test.c new file mode 100644 index 000000000..4f9e5a244 --- /dev/null +++ b/lib/pixman/test/oob-test.c @@ -0,0 +1,101 @@ +#include <stdio.h> +#include <stdlib.h> +#include "pixman.h" + +typedef struct +{ + int width; + int height; + int stride; + pixman_format_code_t format; + +} image_info_t; + +typedef struct +{ + pixman_op_t op; + + image_info_t src; + image_info_t dest; + + int src_x; + int src_y; + int dest_x; + int dest_y; + int width; + int height; +} composite_info_t; + +const composite_info_t info[] = +{ + { + PIXMAN_OP_SRC, + { 3, 6, 16, PIXMAN_a8r8g8b8 }, + { 5, 7, 20, PIXMAN_x8r8g8b8 }, + 1, 8, + 1, -1, + 1, 8 + }, + { + PIXMAN_OP_SRC, + { 7, 5, 36, PIXMAN_a8r8g8b8 }, + { 6, 5, 28, PIXMAN_x8r8g8b8 }, + 8, 5, + 5, 3, + 1, 2 + }, + { + PIXMAN_OP_OVER, + { 10, 10, 40, PIXMAN_a2b10g10r10 }, + { 10, 10, 40, PIXMAN_a2b10g10r10 }, + 0, 0, + 0, 0, + 10, 10 + }, + { + PIXMAN_OP_OVER, + { 10, 10, 40, PIXMAN_x2b10g10r10 }, + { 10, 10, 40, PIXMAN_x2b10g10r10 }, + 0, 0, + 0, 0, + 10, 10 + }, +}; + +static pixman_image_t * +make_image (const image_info_t *info) +{ + char *data = malloc (info->stride * info->height); + int i; + + for (i = 0; i < info->height * info->stride; ++i) + data[i] = (i % 255) ^ (((i % 16) << 4) | (i & 0xf0)); + + return pixman_image_create_bits (info->format, info->width, info->height, (uint32_t *)data, info->stride); +} + +static void +test_composite (const composite_info_t *info) +{ + pixman_image_t *src = make_image (&info->src); + pixman_image_t *dest = make_image (&info->dest); + + pixman_image_composite (PIXMAN_OP_SRC, src, NULL, dest, + info->src_x, info->src_y, + 0, 0, + info->dest_x, info->dest_y, + info->width, info->height); +} + + + +int +main (int argc, char **argv) +{ + int i; + + for (i = 0; i < sizeof (info) / sizeof (info[0]); ++i) + test_composite (&info[i]); + + return 0; +} diff --git a/lib/pixman/test/region-test.c b/lib/pixman/test/region-test.c index e214e9b89..3568969f1 100644 --- a/lib/pixman/test/region-test.c +++ b/lib/pixman/test/region-test.c @@ -3,21 +3,76 @@ #include <stdio.h> #include "pixman.h" -/* This used to go into an infinite loop before pixman-region.c - * was fixed to not use explict "short" variables - */ int main () { pixman_region32_t r1; pixman_region32_t r2; pixman_region32_t r3; + pixman_box32_t boxes[] = { + { 10, 10, 20, 20 }, + { 30, 30, 30, 40 }, + { 50, 45, 60, 44 }, + }; + pixman_box32_t boxes2[] = { + { 2, 6, 7, 6 }, + { 4, 1, 6, 7 }, + }; + pixman_box32_t boxes3[] = { + { 2, 6, 7, 6 }, + { 4, 1, 6, 1 }, + }; + int i; + pixman_box32_t *b; + /* This used to go into an infinite loop before pixman-region.c + * was fixed to not use explict "short" variables + */ pixman_region32_init_rect (&r1, 0, 0, 20, 64000); pixman_region32_init_rect (&r2, 0, 0, 20, 64000); pixman_region32_init_rect (&r3, 0, 0, 20, 64000); pixman_region32_subtract (&r1, &r2, &r3); -} + /* This would produce a region containing an empty + * rectangle in it. Such regions are considered malformed, + * but using an empty rectangle for initialization should + * work. + */ + pixman_region32_init_rects (&r1, boxes, 3); + + b = pixman_region32_rectangles (&r1, &i); + + assert (i == 1); + + while (i--) + { + assert (b[i].x1 < b[i].x2); + assert (b[i].y1 < b[i].y2); + } + + /* This would produce a rectangle containing the bounding box + * of the two rectangles. The correct result is to eliminate + * the broken rectangle. + */ + pixman_region32_init_rects (&r1, boxes2, 2); + + b = pixman_region32_rectangles (&r1, &i); + + assert (i == 1); + + assert (b[0].x1 == 4); + assert (b[0].y1 == 1); + assert (b[0].x2 == 6); + assert (b[0].y2 == 7); + + /* This should produce an empty region */ + pixman_region32_init_rects (&r1, boxes3, 2); + + b = pixman_region32_rectangles (&r1, &i); + + assert (i == 0); + + return 0; +} diff --git a/lib/pixman/test/scaling-test.c b/lib/pixman/test/scaling-test.c index c85908ddc..8899c594f 100644 --- a/lib/pixman/test/scaling-test.c +++ b/lib/pixman/test/scaling-test.c @@ -29,115 +29,192 @@ static uint32_t lcg_seed; -uint32_t lcg_rand(void) +uint32_t +lcg_rand (void) { lcg_seed = lcg_seed * 1103515245 + 12345; return ((uint32_t)(lcg_seed / 65536) % 32768); } -void lcg_srand(uint32_t seed) +void +lcg_srand (uint32_t seed) { lcg_seed = seed; } -uint32_t lcg_rand_n(int max) +uint32_t +lcg_rand_n (int max) { - return lcg_rand() % max; + return lcg_rand () % max; } /*----------------------------------------------------------------------------*\ - * CRC-32 version 2.0.0 by Craig Bruce, 2006-04-29. - * - * This program generates the CRC-32 values for the files named in the - * command-line arguments. These are the same CRC-32 values used by GZIP, - * PKZIP, and ZMODEM. The Crc32_ComputeBuf() can also be detached and - * used independently. - * - * THIS PROGRAM IS PUBLIC-DOMAIN SOFTWARE. - * - * Based on the byte-oriented implementation "File Verification Using CRC" - * by Mark R. Nelson in Dr. Dobb's Journal, May 1992, pp. 64-67. - * - * v1.0.0: original release. - * v1.0.1: fixed printf formats. - * v1.0.2: fixed something else. - * v1.0.3: replaced CRC constant table by generator function. - * v1.0.4: reformatted code, made ANSI C. 1994-12-05. - * v2.0.0: rewrote to use memory buffer & static table, 2006-04-29. +* CRC-32 version 2.0.0 by Craig Bruce, 2006-04-29. +* +* This program generates the CRC-32 values for the files named in the +* command-line arguments. These are the same CRC-32 values used by GZIP, +* PKZIP, and ZMODEM. The compute_crc32() can also be detached and +* used independently. +* +* THIS PROGRAM IS PUBLIC-DOMAIN SOFTWARE. +* +* Based on the byte-oriented implementation "File Verification Using CRC" +* by Mark R. Nelson in Dr. Dobb's Journal, May 1992, pp. 64-67. +* +* v1.0.0: original release. +* v1.0.1: fixed printf formats. +* v1.0.2: fixed something else. +* v1.0.3: replaced CRC constant table by generator function. +* v1.0.4: reformatted code, made ANSI C. 1994-12-05. +* v2.0.0: rewrote to use memory buffer & static table, 2006-04-29. \*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*\ - * NAME: - * Crc32_ComputeBuf() - computes the CRC-32 value of a memory buffer - * DESCRIPTION: - * Computes or accumulates the CRC-32 value for a memory buffer. - * The 'inCrc32' gives a previously accumulated CRC-32 value to allow - * a CRC to be generated for multiple sequential buffer-fuls of data. - * The 'inCrc32' for the first buffer must be zero. - * ARGUMENTS: - * inCrc32 - accumulated CRC-32 value, must be 0 on first call - * buf - buffer to compute CRC-32 value for - * bufLen - number of bytes in buffer - * RETURNS: - * crc32 - computed CRC-32 value - * ERRORS: - * (no errors are possible) +* NAME: +* compute_crc32() - computes the CRC-32 value of a memory buffer +* DESCRIPTION: +* Computes or accumulates the CRC-32 value for a memory buffer. +* The 'in_crc32' gives a previously accumulated CRC-32 value to allow +* a CRC to be generated for multiple sequential buffer-fuls of data. +* The 'in_crc32' for the first buffer must be zero. +* ARGUMENTS: +* in_crc32 - accumulated CRC-32 value, must be 0 on first call +* buf - buffer to compute CRC-32 value for +* buf_len - number of bytes in buffer +* RETURNS: +* crc32 - computed CRC-32 value +* ERRORS: +* (no errors are possible) \*----------------------------------------------------------------------------*/ -static uint32_t Crc32_ComputeBuf( uint32_t inCrc32, const void *buf, - size_t bufLen ) +static uint32_t +compute_crc32 (uint32_t in_crc32, + const void *buf, + size_t buf_len) { - static const uint32_t crcTable[256] = { - 0x00000000,0x77073096,0xEE0E612C,0x990951BA,0x076DC419,0x706AF48F,0xE963A535, - 0x9E6495A3,0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,0x09B64C2B,0x7EB17CBD, - 0xE7B82D07,0x90BF1D91,0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,0x1ADAD47D, - 0x6DDDE4EB,0xF4D4B551,0x83D385C7,0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC, - 0x14015C4F,0x63066CD9,0xFA0F3D63,0x8D080DF5,0x3B6E20C8,0x4C69105E,0xD56041E4, - 0xA2677172,0x3C03E4D1,0x4B04D447,0xD20D85FD,0xA50AB56B,0x35B5A8FA,0x42B2986C, - 0xDBBBC9D6,0xACBCF940,0x32D86CE3,0x45DF5C75,0xDCD60DCF,0xABD13D59,0x26D930AC, - 0x51DE003A,0xC8D75180,0xBFD06116,0x21B4F4B5,0x56B3C423,0xCFBA9599,0xB8BDA50F, - 0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,0x2F6F7C87,0x58684C11,0xC1611DAB, - 0xB6662D3D,0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,0x71B18589,0x06B6B51F, - 0x9FBFE4A5,0xE8B8D433,0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,0x7F6A0DBB, - 0x086D3D2D,0x91646C97,0xE6635C01,0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E, - 0x6C0695ED,0x1B01A57B,0x8208F4C1,0xF50FC457,0x65B0D9C6,0x12B7E950,0x8BBEB8EA, - 0xFCB9887C,0x62DD1DDF,0x15DA2D49,0x8CD37CF3,0xFBD44C65,0x4DB26158,0x3AB551CE, - 0xA3BC0074,0xD4BB30E2,0x4ADFA541,0x3DD895D7,0xA4D1C46D,0xD3D6F4FB,0x4369E96A, - 0x346ED9FC,0xAD678846,0xDA60B8D0,0x44042D73,0x33031DE5,0xAA0A4C5F,0xDD0D7CC9, - 0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,0x5768B525,0x206F85B3,0xB966D409, - 0xCE61E49F,0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,0x59B33D17,0x2EB40D81, - 0xB7BD5C3B,0xC0BA6CAD,0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,0xEAD54739, - 0x9DD277AF,0x04DB2615,0x73DC1683,0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8, - 0xE40ECF0B,0x9309FF9D,0x0A00AE27,0x7D079EB1,0xF00F9344,0x8708A3D2,0x1E01F268, - 0x6906C2FE,0xF762575D,0x806567CB,0x196C3671,0x6E6B06E7,0xFED41B76,0x89D32BE0, - 0x10DA7A5A,0x67DD4ACC,0xF9B9DF6F,0x8EBEEFF9,0x17B7BE43,0x60B08ED5,0xD6D6A3E8, - 0xA1D1937E,0x38D8C2C4,0x4FDFF252,0xD1BB67F1,0xA6BC5767,0x3FB506DD,0x48B2364B, - 0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,0xDF60EFC3,0xA867DF55,0x316E8EEF, - 0x4669BE79,0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,0xCC0C7795,0xBB0B4703, - 0x220216B9,0x5505262F,0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,0xC2D7FFA7, - 0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D,0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A, - 0x9C0906A9,0xEB0E363F,0x72076785,0x05005713,0x95BF4A82,0xE2B87A14,0x7BB12BAE, - 0x0CB61B38,0x92D28E9B,0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21,0x86D3D2D4,0xF1D4E242, - 0x68DDB3F8,0x1FDA836E,0x81BE16CD,0xF6B9265B,0x6FB077E1,0x18B74777,0x88085AE6, - 0xFF0F6A70,0x66063BCA,0x11010B5C,0x8F659EFF,0xF862AE69,0x616BFFD3,0x166CCF45, - 0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,0xA7672661,0xD06016F7,0x4969474D, - 0x3E6E77DB,0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,0xA9BCAE53,0xDEBB9EC5, - 0x47B2CF7F,0x30B5FFE9,0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,0xBAD03605, - 0xCDD70693,0x54DE5729,0x23D967BF,0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94, - 0xB40BBE37,0xC30C8EA1,0x5A05DF1B,0x2D02EF8D }; - uint32_t crc32; - unsigned char *byteBuf; - size_t i; + static const uint32_t crc_table[256] = { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, + 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, + 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, + 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, + 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, + 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, + 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, + 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, + 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, + 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, + 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, + 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, + 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, + 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, + 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, + 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, + 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, + 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, + 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, + 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, + 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, + 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }; + + uint32_t crc32; + unsigned char * byte_buf; + size_t i; /** accumulate crc32 for buffer **/ - crc32 = inCrc32 ^ 0xFFFFFFFF; - byteBuf = (unsigned char*) buf; - for (i=0; i < bufLen; i++) { - crc32 = (crc32 >> 8) ^ crcTable[ (crc32 ^ byteBuf[i]) & 0xFF ]; - } - return( crc32 ^ 0xFFFFFFFF ); + crc32 = in_crc32 ^ 0xFFFFFFFF; + byte_buf = (unsigned char*) buf; + + for (i = 0; i < buf_len; i++) + crc32 = (crc32 >> 8) ^ crc_table[(crc32 ^ byte_buf[i]) & 0xFF]; + + return (crc32 ^ 0xFFFFFFFF); } +/* perform endian conversion of pixel data */ +static void +image_endian_swap (pixman_image_t *img, + int bpp) +{ + int stride = pixman_image_get_stride (img); + uint32_t *data = pixman_image_get_data (img); + int height = pixman_image_get_height (img); + int i, j; + + /* swap bytes only on big endian systems */ + volatile uint16_t endian_check_var = 0x1234; + if (*(volatile uint8_t *)&endian_check_var != 0x12) + return; + + for (i = 0; i < height; i++) + { + char *line_data = (char *)data + stride * i; + + /* swap bytes only for 16, 24 and 32 bpp for now */ + switch (bpp) + { + case 16: + for (j = 0; j + 2 <= stride; j += 2) + { + char t1 = line_data[j + 0]; + char t2 = line_data[j + 1]; + line_data[j + 1] = t1; + line_data[j + 0] = t2; + } + break; + + case 24: + for (j = 0; j + 3 <= stride; j += 3) + { + char t1 = line_data[j + 0]; + char t2 = line_data[j + 1]; + char t3 = line_data[j + 2]; + line_data[j + 2] = t1; + line_data[j + 1] = t2; + line_data[j + 0] = t3; + } + break; + + case 32: + for (j = 0; j + 4 <= stride; j += 4) + { + char t1 = line_data[j + 0]; + char t2 = line_data[j + 1]; + char t3 = line_data[j + 2]; + char t4 = line_data[j + 3]; + line_data[j + 3] = t1; + line_data[j + 2] = t2; + line_data[j + 1] = t3; + line_data[j + 0] = t4; + } + break; + + default: + break; + } + } +} #define MAX_SRC_WIDTH 10 #define MAX_SRC_HEIGHT 10 @@ -148,194 +225,250 @@ static uint32_t Crc32_ComputeBuf( uint32_t inCrc32, const void *buf, /* * Composite operation with pseudorandom images */ -uint32_t test_composite(uint32_t initcrc, int testnum, int verbose) +uint32_t +test_composite (uint32_t initcrc, + int testnum, + int verbose) { - int i; - pixman_image_t *src_img; - pixman_image_t *dst_img; + int i; + pixman_image_t * src_img; + pixman_image_t * dst_img; pixman_transform_t transform; - pixman_region16_t clip; - int src_width, src_height; - int dst_width, dst_height; - int src_stride, dst_stride; - int src_x, src_y; - int dst_x, dst_y; - int src_bpp; - int dst_bpp; - int w, h; - int scale_x = 32768, scale_y = 32768; - int op; - int repeat = 0; - int src_fmt, dst_fmt; - uint32_t *srcbuf; - uint32_t *dstbuf; - uint32_t crc32; - - lcg_srand(testnum); - - src_bpp = (lcg_rand_n(2) == 0) ? 2 : 4; - dst_bpp = (lcg_rand_n(2) == 0) ? 2 : 4; - op = (lcg_rand_n(2) == 0) ? PIXMAN_OP_SRC : PIXMAN_OP_OVER; - - src_width = lcg_rand_n(MAX_SRC_WIDTH) + 1; - src_height = lcg_rand_n(MAX_SRC_HEIGHT) + 1; - dst_width = lcg_rand_n(MAX_DST_WIDTH) + 1; - dst_height = lcg_rand_n(MAX_DST_HEIGHT) + 1; - src_stride = src_width * src_bpp + lcg_rand_n(MAX_STRIDE) * src_bpp; - dst_stride = dst_width * dst_bpp + lcg_rand_n(MAX_STRIDE) * dst_bpp; - if (src_stride & 3) src_stride += 2; - if (dst_stride & 3) dst_stride += 2; - - src_x = -(src_width / 4) + lcg_rand_n(src_width * 3 / 2); - src_y = -(src_height / 4) + lcg_rand_n(src_height * 3 / 2); - dst_x = -(dst_width / 4) + lcg_rand_n(dst_width * 3 / 2); - dst_y = -(dst_height / 4) + lcg_rand_n(dst_height * 3 / 2); - w = lcg_rand_n(dst_width * 3 / 2 - dst_x); - h = lcg_rand_n(dst_height * 3 / 2 - dst_y); - - srcbuf = (uint32_t *)malloc(src_stride * src_height); - dstbuf = (uint32_t *)malloc(dst_stride * dst_height); + pixman_region16_t clip; + int src_width, src_height; + int dst_width, dst_height; + int src_stride, dst_stride; + int src_x, src_y; + int dst_x, dst_y; + int src_bpp; + int dst_bpp; + int w, h; + int scale_x = 32768, scale_y = 32768; + int op; + int repeat = 0; + int src_fmt, dst_fmt; + uint32_t * srcbuf; + uint32_t * dstbuf; + uint32_t crc32; + + lcg_srand (testnum); + + src_bpp = (lcg_rand_n (2) == 0) ? 2 : 4; + dst_bpp = (lcg_rand_n (2) == 0) ? 2 : 4; + op = (lcg_rand_n (2) == 0) ? PIXMAN_OP_SRC : PIXMAN_OP_OVER; + + src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1; + src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1; + dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1; + dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1; + src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp; + dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp; + + if (src_stride & 3) + src_stride += 2; + + if (dst_stride & 3) + dst_stride += 2; + + src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2); + src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2); + dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2); + dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2); + w = lcg_rand_n (dst_width * 3 / 2 - dst_x); + h = lcg_rand_n (dst_height * 3 / 2 - dst_y); + + srcbuf = (uint32_t *)malloc (src_stride * src_height); + dstbuf = (uint32_t *)malloc (dst_stride * dst_height); + for (i = 0; i < src_stride * src_height; i++) - *((uint8_t *)srcbuf + i) = lcg_rand_n(256); + *((uint8_t *)srcbuf + i) = lcg_rand_n (256); + for (i = 0; i < dst_stride * dst_height; i++) - *((uint8_t *)dstbuf + i) = lcg_rand_n(256); + *((uint8_t *)dstbuf + i) = lcg_rand_n (256); - src_fmt = src_bpp == 4 ? (lcg_rand_n(2) == 0 ? - PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5; + src_fmt = src_bpp == 4 ? (lcg_rand_n (2) == 0 ? + PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5; - dst_fmt = dst_bpp == 4 ? (lcg_rand_n(2) == 0 ? - PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5; + dst_fmt = dst_bpp == 4 ? (lcg_rand_n (2) == 0 ? + PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5; - src_img = pixman_image_create_bits( + src_img = pixman_image_create_bits ( src_fmt, src_width, src_height, srcbuf, src_stride); - dst_img = pixman_image_create_bits( + dst_img = pixman_image_create_bits ( dst_fmt, dst_width, dst_height, dstbuf, dst_stride); - if (lcg_rand_n(8) > 0) { - scale_x = 32768 + lcg_rand_n(65536); - scale_y = 32768 + lcg_rand_n(65536); - pixman_transform_init_scale(&transform, scale_x, scale_y); - pixman_image_set_transform(src_img, &transform); + image_endian_swap (src_img, src_bpp * 8); + image_endian_swap (dst_img, dst_bpp * 8); + + if (lcg_rand_n (8) > 0) + { + scale_x = 32768 + lcg_rand_n (65536); + scale_y = 32768 + lcg_rand_n (65536); + pixman_transform_init_scale (&transform, scale_x, scale_y); + pixman_image_set_transform (src_img, &transform); } - switch (lcg_rand_n(4)) { - case 0: repeat = PIXMAN_REPEAT_NONE; break; - case 1: repeat = PIXMAN_REPEAT_NORMAL; break; - case 2: repeat = PIXMAN_REPEAT_PAD; break; - case 3: repeat = PIXMAN_REPEAT_REFLECT; break; + switch (lcg_rand_n (4)) + { + case 0: + repeat = PIXMAN_REPEAT_NONE; + break; + + case 1: + repeat = PIXMAN_REPEAT_NORMAL; + break; + + case 2: + repeat = PIXMAN_REPEAT_PAD; + break; + + case 3: + repeat = PIXMAN_REPEAT_REFLECT; + break; } - pixman_image_set_repeat(src_img, repeat); - - if (verbose) { - printf("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt); - printf("op=%d, scale_x=%d, scale_y=%d, repeat=%d\n", - op, scale_x, scale_y, repeat); - printf("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n", - src_width, src_height, dst_width, dst_height); - printf("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n", - src_x, src_y, dst_x, dst_y); - printf("w=%d, h=%d\n", w, h); + pixman_image_set_repeat (src_img, repeat); + + if (verbose) + { + printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt); + printf ("op=%d, scale_x=%d, scale_y=%d, repeat=%d\n", + op, scale_x, scale_y, repeat); + printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n", + src_width, src_height, dst_width, dst_height); + printf ("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n", + src_x, src_y, dst_x, dst_y); + printf ("w=%d, h=%d\n", w, h); } - if (lcg_rand_n(8) == 0) { - pixman_box16_t clip_boxes[2]; - int n = lcg_rand_n(2) + 1; - for (i = 0; i < n; i++) { - clip_boxes[i].x1 = lcg_rand_n(src_width); - clip_boxes[i].y1 = lcg_rand_n(src_height); - clip_boxes[i].x2 = clip_boxes[i].x1 + lcg_rand_n(src_width - clip_boxes[i].x1); - clip_boxes[i].y2 = clip_boxes[i].y1 + lcg_rand_n(src_height - clip_boxes[i].y1); - if (verbose) { - printf("source clip box: [%d,%d-%d,%d]\n", - clip_boxes[i].x1, clip_boxes[i].y1, - clip_boxes[i].x2, clip_boxes[i].y2); - } - } - pixman_region_init_rects(&clip, clip_boxes, n); - pixman_image_set_clip_region(src_img, &clip); - pixman_image_set_source_clipping(src_img, 1); - pixman_region_fini(&clip); + if (lcg_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = lcg_rand_n (2) + 1; + + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = lcg_rand_n (src_width); + clip_boxes[i].y1 = lcg_rand_n (src_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("source clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (src_img, &clip); + pixman_image_set_source_clipping (src_img, 1); + pixman_region_fini (&clip); } - if (lcg_rand_n(8) == 0) { - pixman_box16_t clip_boxes[2]; - int n = lcg_rand_n(2) + 1; - for (i = 0; i < n; i++) { - clip_boxes[i].x1 = lcg_rand_n(dst_width); - clip_boxes[i].y1 = lcg_rand_n(dst_height); - clip_boxes[i].x2 = clip_boxes[i].x1 + lcg_rand_n(dst_width - clip_boxes[i].x1); - clip_boxes[i].y2 = clip_boxes[i].y1 + lcg_rand_n(dst_height - clip_boxes[i].y1); - if (verbose) { - printf("destination clip box: [%d,%d-%d,%d]\n", - clip_boxes[i].x1, clip_boxes[i].y1, - clip_boxes[i].x2, clip_boxes[i].y2); - } - } - pixman_region_init_rects(&clip, clip_boxes, n); - pixman_image_set_clip_region(dst_img, &clip); - pixman_region_fini(&clip); + if (lcg_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = lcg_rand_n (2) + 1; + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = lcg_rand_n (dst_width); + clip_boxes[i].y1 = lcg_rand_n (dst_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("destination clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (dst_img, &clip); + pixman_region_fini (&clip); } pixman_image_composite (op, src_img, NULL, dst_img, src_x, src_y, 0, 0, dst_x, dst_y, w, h); - if (dst_fmt == PIXMAN_x8r8g8b8) { - /* ignore unused part */ - for (i = 0; i < dst_stride * dst_height / 4; i++) - dstbuf[i] &= 0xFFFFFF; + if (dst_fmt == PIXMAN_x8r8g8b8) + { + /* ignore unused part */ + for (i = 0; i < dst_stride * dst_height / 4; i++) + dstbuf[i] &= 0xFFFFFF; } - if (verbose) { - int j; - for (i = 0; i < dst_height; i++) { - for (j = 0; j < dst_stride; j++) { - printf("%02X ", *((uint8_t *)dstbuf + i * dst_stride + j)); - } - printf("\n"); - } + image_endian_swap (dst_img, dst_bpp * 8); + + if (verbose) + { + int j; + + for (i = 0; i < dst_height; i++) + { + for (j = 0; j < dst_stride; j++) + printf ("%02X ", *((uint8_t *)dstbuf + i * dst_stride + j)); + + printf ("\n"); + } } pixman_image_unref (src_img); pixman_image_unref (dst_img); - crc32 = Crc32_ComputeBuf(initcrc, dstbuf, dst_stride * dst_height); - free(srcbuf); - free(dstbuf); + crc32 = compute_crc32 (initcrc, dstbuf, dst_stride * dst_height); + free (srcbuf); + free (dstbuf); return crc32; } -int main(int argc, char *argv[]) +int +main (int argc, char *argv[]) { - int i, n = 0; + int i, n = 0; uint32_t crc = 0; + pixman_disable_out_of_bounds_workaround (); + if (argc >= 2) - n = atoi(argv[1]); + n = atoi (argv[1]); if (n == 0) n = 3000000; - if (n < 0) { - crc = test_composite(0, -n, 1); - printf("crc32=%08X\n", crc); + if (n < 0) + { + crc = test_composite (0, -n, 1); + printf ("crc32=%08X\n", crc); } - else { - for (i = 1; i <= n; i++) - { - crc = test_composite(crc, i, 0); - } - printf("crc32=%08X\n", crc); -#ifdef LITTLE_ENDIAN - if (n == 3000000) { - /* predefined value for running with all the fastpath functions disabled */ - /* it needs to be updated every time changes are introduced to this program! */ - if (crc == 0xC950E5BB) { - printf("scaling test passed\n"); - } else { - printf("scaling test failed!\n"); - } - } -#endif + else + { + for (i = 1; i <= n; i++) + crc = test_composite (crc, i, 0); + + printf ("crc32=%08X\n", crc); + + if (n == 3000000) + { + /* predefined value for running with all the fastpath functions disabled */ + /* it needs to be updated every time changes are introduced to this program! */ + + if (crc == 0x0B633CF4) + { + printf ("scaling test passed\n"); + } + else + { + printf ("scaling test failed!\n"); + return 1; + } + } } + return 0; } diff --git a/lib/pixman/test/screen-test.c b/lib/pixman/test/screen-test.c new file mode 100644 index 000000000..5e02eee08 --- /dev/null +++ b/lib/pixman/test/screen-test.c @@ -0,0 +1,44 @@ +#include <stdio.h> +#include <stdlib.h> +#include "pixman.h" +#include "utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 40 +#define HEIGHT 40 + + uint32_t *src1 = malloc (WIDTH * HEIGHT * 4); + uint32_t *src2 = malloc (WIDTH * HEIGHT * 4); + uint32_t *src3 = malloc (WIDTH * HEIGHT * 4); + uint32_t *dest = malloc (3 * WIDTH * 2 * HEIGHT * 4); + pixman_image_t *simg1, *simg2, *simg3, *dimg; + + int i; + + for (i = 0; i < WIDTH * HEIGHT; ++i) + { + src1[i] = 0x7ff00000; + src2[i] = 0x7f00ff00; + src3[i] = 0x7f0000ff; + } + + for (i = 0; i < 3 * WIDTH * 2 * HEIGHT; ++i) + { + dest[i] = 0x0; + } + + simg1 = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src1, WIDTH * 4); + simg2 = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src2, WIDTH * 4); + simg3 = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src3, WIDTH * 4); + dimg = pixman_image_create_bits (PIXMAN_a8r8g8b8, 3 * WIDTH, 2 * HEIGHT, dest, 3 * WIDTH * 4); + + pixman_image_composite (PIXMAN_OP_SCREEN, simg1, NULL, dimg, 0, 0, 0, 0, WIDTH, HEIGHT / 4, WIDTH, HEIGHT); + pixman_image_composite (PIXMAN_OP_SCREEN, simg2, NULL, dimg, 0, 0, 0, 0, (WIDTH/2), HEIGHT / 4 + HEIGHT / 2, WIDTH, HEIGHT); + pixman_image_composite (PIXMAN_OP_SCREEN, simg3, NULL, dimg, 0, 0, 0, 0, (4 * WIDTH) / 3, HEIGHT, WIDTH, HEIGHT); + + show_image (dimg); + + return 0; +} diff --git a/lib/pixman/test/window-test.c b/lib/pixman/test/window-test.c new file mode 100644 index 000000000..bbaa3e211 --- /dev/null +++ b/lib/pixman/test/window-test.c @@ -0,0 +1,173 @@ +#include <stdio.h> +#include <stdlib.h> +#include <config.h> +#include "pixman.h" +#include "pixman-private.h" + +#define FALSE 0 +#define TRUE 1 + +/* Randomly decide between 32 and 16 bit + * + * Allocate bits with random width, stride and height + * + * Then make up some random offset (dx, dy) + * + * Then make an image with those values. + * + * Do this for both source and destination + * + * Composite them together using OVER. + * + * The bits in the source and the destination should have + * recognizable colors so that the result can be verified. + * + * Ie., walk the bits and verify that they have been composited. + */ + +static int +get_rand (int bound) +{ + return rand () % bound; +} + +static pixman_image_t * +make_image (int width, int height, pixman_bool_t src, int *rx, int *ry) +{ + pixman_format_code_t format; + pixman_image_t *image; + pixman_region32_t region; + uint8_t *bits; + int stride; + int bpp; + int dx, dy; + int i, j; + + if (src) + format = PIXMAN_a8r8g8b8; + else + format = PIXMAN_r5g6b5; + + bpp = PIXMAN_FORMAT_BPP (format) / 8; + + stride = width + get_rand (width); + stride += (stride & 1); /* Make it an even number */ + + bits = malloc (height * stride * bpp); + + for (j = 0; j < height; ++j) + { + for (i = 0; i < width; ++i) + { + uint8_t *pixel = bits + (stride * j + i) * bpp; + + if (src) + *(uint32_t *)pixel = 0x7f00007f; + else + *(uint16_t *)pixel = 0xf100; + } + } + + dx = dy = 0; + + dx = get_rand (500); + dy = get_rand (500); + + if (!src) + { + /* Now simulate the bogus X server translations */ + bits -= (dy * stride + dx) * bpp; + } + + image = pixman_image_create_bits ( + format, width, height, (uint32_t *)bits, stride * bpp); + + if (!src) + { + /* And add the bogus clip region */ + pixman_region32_init_rect (®ion, dx, dy, dx + width, dy + height); + + pixman_image_set_clip_region32 (image, ®ion); + } + + pixman_image_set_source_clipping (image, TRUE); + + if (src) + { + pixman_transform_t trans; + + pixman_transform_init_identity (&trans); + + pixman_transform_translate (&trans, + NULL, + - pixman_int_to_fixed (width / 2), + - pixman_int_to_fixed (height / 2)); + + pixman_transform_scale (&trans, + NULL, + pixman_double_to_fixed (0.5), + pixman_double_to_fixed (0.5)); + + pixman_transform_translate (&trans, + NULL, + pixman_int_to_fixed (width / 2), + pixman_int_to_fixed (height / 2)); + + pixman_image_set_transform (image, &trans); + pixman_image_set_filter (image, PIXMAN_FILTER_BILINEAR, NULL, 0); + pixman_image_set_repeat (image, PIXMAN_REPEAT_PAD); + } + + if (!src) + { + *rx = dx; + *ry = dy; + } + else + { + *rx = *ry = 0; + } + + return image; +} + +int +main () +{ + pixman_image_t *src, *dest; + int src_x, src_y, dest_x, dest_y; + int i, j; + int width = get_rand (500); + int height = get_rand (500); + + src = make_image (width, height, TRUE, &src_x, &src_y); + dest = make_image (width, height, FALSE, &dest_x, &dest_y); + + pixman_image_composite ( + PIXMAN_OP_OVER, src, NULL, dest, + src_x, src_y, + -1, -1, + dest_x, dest_y, + width, height); + + for (i = 0; i < height; ++i) + { + for (j = 0; j < width; ++j) + { + uint8_t *bits = (uint8_t *)dest->bits.bits; + int bpp = PIXMAN_FORMAT_BPP (dest->bits.format) / 8; + int stride = dest->bits.rowstride * 4; + + uint8_t *pixel = + bits + (i + dest_y) * stride + (j + dest_x) * bpp; + + if (*(uint16_t *)pixel != 0x788f) + { + printf ("bad pixel %x\n", *(uint16_t *)pixel); + assert (*(uint16_t *)pixel == 0x788f); + } + } + } + + return 0; +} |