summaryrefslogtreecommitdiff
path: root/lib/pixman
diff options
context:
space:
mode:
authorMatthieu Herrb <matthieu@cvs.openbsd.org>2010-10-03 18:30:05 +0000
committerMatthieu Herrb <matthieu@cvs.openbsd.org>2010-10-03 18:30:05 +0000
commit519bd19882b18b3cfcccca5fe8e0e6ab6eb3b937 (patch)
tree1ed8f61276ba41eeaf1ffa509465cd2f767cc3aa /lib/pixman
parent9b631ded21a25e9a701bb5c1be5a29597ce2e3c9 (diff)
Update to pixman 0.18.4.
Tweak build to use libpthread-stubs for TLS emulation instead of forcing every application using pixman to use -pthread. Tested by jasper@ and landry@ on a bulk ports build.
Diffstat (limited to 'lib/pixman')
-rw-r--r--lib/pixman/COPYING1
-rw-r--r--lib/pixman/Makefile.bsd-wrapper4
-rw-r--r--lib/pixman/Makefile.in17
-rw-r--r--lib/pixman/README12
-rw-r--r--lib/pixman/TODO5
-rw-r--r--lib/pixman/aclocal.m421
-rw-r--r--lib/pixman/config.h.in13
-rw-r--r--lib/pixman/configure907
-rw-r--r--lib/pixman/configure.ac231
-rw-r--r--lib/pixman/pixman-1.pc.in1
-rw-r--r--lib/pixman/pixman/Makefile.am29
-rw-r--r--lib/pixman/pixman/Makefile.in312
-rw-r--r--lib/pixman/pixman/pixman-access.c178
-rw-r--r--lib/pixman/pixman/pixman-arm-common.h273
-rw-r--r--lib/pixman/pixman/pixman-arm-neon-asm.S1713
-rw-r--r--lib/pixman/pixman/pixman-arm-neon-asm.h906
-rw-r--r--lib/pixman/pixman/pixman-arm-neon.c2897
-rw-r--r--lib/pixman/pixman/pixman-arm-simd-asm.S330
-rw-r--r--lib/pixman/pixman/pixman-arm-simd.c225
-rw-r--r--lib/pixman/pixman/pixman-bits-image.c409
-rw-r--r--lib/pixman/pixman/pixman-compiler.h132
-rw-r--r--lib/pixman/pixman/pixman-conical-gradient.c6
-rw-r--r--lib/pixman/pixman/pixman-cpu.c12
-rw-r--r--lib/pixman/pixman/pixman-edge-imp.h20
-rw-r--r--lib/pixman/pixman/pixman-edge.c5
-rw-r--r--lib/pixman/pixman/pixman-fast-path.c996
-rw-r--r--lib/pixman/pixman/pixman-general.c52
-rw-r--r--lib/pixman/pixman/pixman-image.c301
-rw-r--r--lib/pixman/pixman/pixman-implementation.c55
-rw-r--r--lib/pixman/pixman/pixman-mmx.c292
-rw-r--r--lib/pixman/pixman/pixman-private.h277
-rw-r--r--lib/pixman/pixman/pixman-region.c367
-rw-r--r--lib/pixman/pixman/pixman-solid-fill.c35
-rw-r--r--lib/pixman/pixman/pixman-sse2.c704
-rw-r--r--lib/pixman/pixman/pixman-trap.c14
-rw-r--r--lib/pixman/pixman/pixman-utils.c621
-rw-r--r--lib/pixman/pixman/pixman-vmx.c7
-rw-r--r--lib/pixman/pixman/pixman.c1012
-rw-r--r--lib/pixman/pixman/pixman.h82
-rw-r--r--lib/pixman/pixman/solaris-hwcap.mapfile36
-rw-r--r--lib/pixman/test/Makefile.am52
-rw-r--r--lib/pixman/test/Makefile.in284
-rw-r--r--lib/pixman/test/a1-trap-test.c50
-rw-r--r--lib/pixman/test/alpha-test.c12
-rw-r--r--lib/pixman/test/alphamap.c49
-rw-r--r--lib/pixman/test/blitters-test.c306
-rw-r--r--lib/pixman/test/clip-in.c2
-rw-r--r--lib/pixman/test/clip-test.c4
-rw-r--r--lib/pixman/test/composite-test.c7
-rw-r--r--lib/pixman/test/composite.c901
-rw-r--r--lib/pixman/test/convolution-test.c2
-rw-r--r--lib/pixman/test/fetch-test.c102
-rw-r--r--lib/pixman/test/gradient-test.c10
-rw-r--r--lib/pixman/test/gtk-utils.c113
-rw-r--r--lib/pixman/test/gtk-utils.h13
-rw-r--r--lib/pixman/test/region-test.c49
-rw-r--r--lib/pixman/test/scaling-test.c207
-rw-r--r--lib/pixman/test/screen-test.c2
-rw-r--r--lib/pixman/test/trap-test.c2
-rw-r--r--lib/pixman/test/utils.c289
-rw-r--r--lib/pixman/test/utils.h47
-rw-r--r--lib/pixman/test/window-test.c6
62 files changed, 10152 insertions, 5867 deletions
diff --git a/lib/pixman/COPYING b/lib/pixman/COPYING
index 286158f2e..b0571e6a6 100644
--- a/lib/pixman/COPYING
+++ b/lib/pixman/COPYING
@@ -18,6 +18,7 @@ Copyright 2008 Rodrigo Kumpera
Copyright 2008 André Tupinambá
Copyright 2008 Mozilla Corporation
Copyright 2008 Frederic Plourde
+Copyright 2009 Sun Microsystems, Inc.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
diff --git a/lib/pixman/Makefile.bsd-wrapper b/lib/pixman/Makefile.bsd-wrapper
index afa52b59d..3927b889a 100644
--- a/lib/pixman/Makefile.bsd-wrapper
+++ b/lib/pixman/Makefile.bsd-wrapper
@@ -1,8 +1,8 @@
-# $OpenBSD: Makefile.bsd-wrapper,v 1.11 2010/08/25 17:44:26 todd Exp $
+# $OpenBSD: Makefile.bsd-wrapper,v 1.12 2010/10/03 18:30:04 matthieu Exp $
.include <bsd.own.mk>
-SHARED_LIBS= pixman-1 16.6
+SHARED_LIBS= pixman-1 18.4
.if ${MACHINE} == amd64 && !${COMPILER_VERSION:L:Mgcc4*}
CONFIGURE_ARGS += --disable-sse2
diff --git a/lib/pixman/Makefile.in b/lib/pixman/Makefile.in
index d51ee56ad..a49f36c9f 100644
--- a/lib/pixman/Makefile.in
+++ b/lib/pixman/Makefile.in
@@ -88,13 +88,13 @@ AMDEP_FALSE = @AMDEP_FALSE@
AMDEP_TRUE = @AMDEP_TRUE@
AMTAR = @AMTAR@
AR = @AR@
-ARM_NEON_CFLAGS = @ARM_NEON_CFLAGS@
-ARM_SIMD_CFLAGS = @ARM_SIMD_CFLAGS@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
+CCAS = @CCAS@
+CCASFLAGS = @CCASFLAGS@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
@@ -122,6 +122,7 @@ GTK_CFLAGS = @GTK_CFLAGS@
GTK_LIBS = @GTK_LIBS@
HAVE_GTK_FALSE = @HAVE_GTK_FALSE@
HAVE_GTK_TRUE = @HAVE_GTK_TRUE@
+HAVE_PTHREAD_SETSPECIFIC = @HAVE_PTHREAD_SETSPECIFIC@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
@@ -151,6 +152,7 @@ PIXMAN_VERSION_MAJOR = @PIXMAN_VERSION_MAJOR@
PIXMAN_VERSION_MICRO = @PIXMAN_VERSION_MICRO@
PIXMAN_VERSION_MINOR = @PIXMAN_VERSION_MINOR@
PKG_CONFIG = @PKG_CONFIG@
+PTHREAD_LDFLAGS = @PTHREAD_LDFLAGS@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
@@ -158,6 +160,9 @@ SHELL = @SHELL@
SSE2_CFLAGS = @SSE2_CFLAGS@
SSE2_LDFLAGS = @SSE2_LDFLAGS@
STRIP = @STRIP@
+STUBS_CFLAGS = @STUBS_CFLAGS@
+STUBS_LIBS = @STUBS_LIBS@
+TOOLCHAIN_SUPPORTS__THREAD = @TOOLCHAIN_SUPPORTS__THREAD@
USE_ARM_NEON_FALSE = @USE_ARM_NEON_FALSE@
USE_ARM_NEON_TRUE = @USE_ARM_NEON_TRUE@
USE_ARM_SIMD_FALSE = @USE_ARM_SIMD_FALSE@
@@ -250,15 +255,15 @@ $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
- echo ' cd $(srcdir) && $(AUTOMAKE) --gnu '; \
- cd $(srcdir) && $(AUTOMAKE) --gnu \
+ echo ' cd $(srcdir) && $(AUTOMAKE) --foreign '; \
+ cd $(srcdir) && $(AUTOMAKE) --foreign \
&& exit 0; \
exit 1;; \
esac; \
done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \
cd $(top_srcdir) && \
- $(AUTOMAKE) --gnu Makefile
+ $(AUTOMAKE) --foreign Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
diff --git a/lib/pixman/README b/lib/pixman/README
index 843b06980..60dff4561 100644
--- a/lib/pixman/README
+++ b/lib/pixman/README
@@ -3,16 +3,12 @@ features such as image compositing and trapezoid rasterization.
Please submit bugs & patches to the libpixman bugzilla:
- https://bugs.freedesktop.org/enter_bug.cgi?product=pixman
+ https://bugs.freedesktop.org/enter_bug.cgi?product=pixman
-All questions regarding this software should be directed to either the
-Xorg mailing list:
+All questions regarding this software should be directed to the pixman
+mailing list:
- http://lists.freedesktop.org/mailman/listinfo/xorg
-
-or the cairo mailing list:
-
- http://lists.freedesktop.org/mailman/listinfo/cairo
+ http://lists.freedesktop.org/mailman/listinfo/pixman
The master development code repository can be found at:
diff --git a/lib/pixman/TODO b/lib/pixman/TODO
index 52d737706..4434ec7cb 100644
--- a/lib/pixman/TODO
+++ b/lib/pixman/TODO
@@ -1,3 +1,8 @@
+ - Testing
+ - Test implementations against each other
+ - Test both with and without the operator strength reduction.
+ They shold be identical.
+
- SSE 2 issues:
- Use MM_HINT_NTA instead of MM_HINT_T0
diff --git a/lib/pixman/aclocal.m4 b/lib/pixman/aclocal.m4
index b7278b304..272fe9080 100644
--- a/lib/pixman/aclocal.m4
+++ b/lib/pixman/aclocal.m4
@@ -6824,6 +6824,27 @@ AC_DEFUN([AM_AUTOMAKE_VERSION], [am__api_version="1.9"])
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
[AM_AUTOMAKE_VERSION([1.9.6])])
+# Figure out how to run the assembler. -*- Autoconf -*-
+
+# Copyright (C) 2001, 2003, 2004, 2005 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# serial 4
+
+# AM_PROG_AS
+# ----------
+AC_DEFUN([AM_PROG_AS],
+[# By default we simply use the C compiler to build assembly code.
+AC_REQUIRE([AC_PROG_CC])
+test "${CCAS+set}" = set || CCAS=$CC
+test "${CCASFLAGS+set}" = set || CCASFLAGS=$CFLAGS
+AC_ARG_VAR([CCAS], [assembler compiler command (defaults to CC)])
+AC_ARG_VAR([CCASFLAGS], [assembler compiler flags (defaults to CFLAGS)])
+])
+
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
diff --git a/lib/pixman/config.h.in b/lib/pixman/config.h.in
index 283eb1a1b..6277b106c 100644
--- a/lib/pixman/config.h.in
+++ b/lib/pixman/config.h.in
@@ -18,6 +18,9 @@
/* Whether we have posix_memalign() */
#undef HAVE_POSIX_MEMALIGN
+/* Whether pthread_setspecific() is supported */
+#undef HAVE_PTHREAD_SETSPECIFIC
+
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
@@ -60,13 +63,19 @@
/* enable TIMER_BEGIN/TIMER_END macros */
#undef PIXMAN_TIMERS
+/* The size of `long', as computed by sizeof. */
+#undef SIZEOF_LONG
+
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
-/* use ARM NEON compiler intrinsics */
+/* Whether the tool chain supports __thread */
+#undef TOOLCHAIN_SUPPORTS__THREAD
+
+/* use ARM NEON assembly optimizations */
#undef USE_ARM_NEON
-/* use ARM SIMD compiler intrinsics */
+/* use ARM SIMD assembly optimizations */
#undef USE_ARM_SIMD
/* use GNU-style inline assembler */
diff --git a/lib/pixman/configure b/lib/pixman/configure
index 262b1f57d..1f9d9d8f3 100644
--- a/lib/pixman/configure
+++ b/lib/pixman/configure
@@ -1,8 +1,8 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.62 for pixman 0.16.6.
+# Generated by GNU Autoconf 2.62 for pixman 0.18.4.
#
-# Report bugs to <"sandmann@daimi.au.dk">.
+# Report bugs to <"pixman@lists.freedesktop.org">.
#
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
# 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
@@ -750,9 +750,9 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='pixman'
PACKAGE_TARNAME='pixman'
-PACKAGE_VERSION='0.16.6'
-PACKAGE_STRING='pixman 0.16.6'
-PACKAGE_BUGREPORT='"sandmann@daimi.au.dk"'
+PACKAGE_VERSION='0.18.4'
+PACKAGE_STRING='pixman 0.18.4'
+PACKAGE_BUGREPORT='"pixman@lists.freedesktop.org"'
# Factoring default headers for most tests.
ac_includes_default="\
@@ -872,6 +872,8 @@ AMDEPBACKSLASH
CCDEPMODE
am__fastdepCC_TRUE
am__fastdepCC_FALSE
+CCAS
+CCASFLAGS
SED
GREP
EGREP
@@ -909,10 +911,8 @@ SSE2_LDFLAGS
VMX_CFLAGS
USE_VMX_TRUE
USE_VMX_FALSE
-ARM_SIMD_CFLAGS
USE_ARM_SIMD_TRUE
USE_ARM_SIMD_FALSE
-ARM_NEON_CFLAGS
USE_ARM_NEON_TRUE
USE_ARM_NEON_FALSE
USE_GCC_INLINE_ASM_TRUE
@@ -925,6 +925,11 @@ HAVE_GTK_TRUE
HAVE_GTK_FALSE
DEP_CFLAGS
DEP_LIBS
+STUBS_CFLAGS
+STUBS_LIBS
+TOOLCHAIN_SUPPORTS__THREAD
+HAVE_PTHREAD_SETSPECIFIC
+PTHREAD_LDFLAGS
LIBOBJS
LTLIBOBJS'
ac_subst_files=''
@@ -955,6 +960,8 @@ CFLAGS
LDFLAGS
LIBS
CPPFLAGS
+CCAS
+CCASFLAGS
CPP
CXX
CXXFLAGS
@@ -964,7 +971,9 @@ F77
FFLAGS
PKG_CONFIG
GTK_CFLAGS
-GTK_LIBS'
+GTK_LIBS
+STUBS_CFLAGS
+STUBS_LIBS'
# Initialize some variables set by options.
@@ -1517,7 +1526,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures pixman 0.16.6 to adapt to many kinds of systems.
+\`configure' configures pixman 0.18.4 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1587,7 +1596,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of pixman 0.16.6:";;
+ short | recursive ) echo "Configuration of pixman 0.18.4:";;
esac
cat <<\_ACEOF
@@ -1628,6 +1637,8 @@ Some influential environment variables:
LIBS libraries to pass to the linker, e.g. -l<library>
CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I<include dir> if
you have headers in a nonstandard directory <include dir>
+ CCAS assembler compiler command (defaults to CC)
+ CCASFLAGS assembler compiler flags (defaults to CFLAGS)
CPP C preprocessor
CXX C++ compiler command
CXXFLAGS C++ compiler flags
@@ -1637,11 +1648,14 @@ Some influential environment variables:
PKG_CONFIG path to pkg-config utility
GTK_CFLAGS C compiler flags for GTK, overriding pkg-config
GTK_LIBS linker flags for GTK, overriding pkg-config
+ STUBS_CFLAGS
+ C compiler flags for STUBS, overriding pkg-config
+ STUBS_LIBS linker flags for STUBS, overriding pkg-config
Use these variables to override the choices made by `configure' or to help
it to find libraries and programs with nonstandard names/locations.
-Report bugs to <"sandmann@daimi.au.dk">.
+Report bugs to <"pixman@lists.freedesktop.org">.
_ACEOF
ac_status=$?
fi
@@ -1704,7 +1718,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-pixman configure 0.16.6
+pixman configure 0.18.4
generated by GNU Autoconf 2.62
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1718,7 +1732,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by pixman $as_me 0.16.6, which was
+It was created by pixman $as_me 0.18.4, which was
generated by GNU Autoconf 2.62. Invocation command line was
$ $0 $@
@@ -2367,7 +2381,7 @@ fi
# Define the identity of the package.
PACKAGE='pixman'
- VERSION='0.16.6'
+ VERSION='0.18.4'
cat >>confdefs.h <<_ACEOF
@@ -2514,6 +2528,9 @@ am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'
+# Suppress verbose compile lines
+
+
ac_config_headers="$ac_config_headers config.h"
@@ -3708,6 +3725,13 @@ else
fi
+# By default we simply use the C compiler to build assembly code.
+
+test "${CCAS+set}" = set || CCAS=$CC
+test "${CCASFLAGS+set}" = set || CCASFLAGS=$CFLAGS
+
+
+
# Check whether --enable-shared was given.
if test "${enable_shared+set}" = set; then
enableval=$enable_shared; p=${PACKAGE-default}
@@ -4405,7 +4429,7 @@ ia64-*-hpux*)
;;
*-*-irix6*)
# Find out which ABI we are using.
- echo '#line 4408 "configure"' > conftest.$ac_ext
+ echo '#line 4432 "configure"' > conftest.$ac_ext
if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
(eval $ac_compile) 2>&5
ac_status=$?
@@ -5204,9 +5228,9 @@ $as_echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result
{ $as_echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
$as_echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
( cat <<\_ASBOX
-## ------------------------------------- ##
-## Report this to "sandmann@daimi.au.dk" ##
-## ------------------------------------- ##
+## --------------------------------------------- ##
+## Report this to "pixman@lists.freedesktop.org" ##
+## --------------------------------------------- ##
_ASBOX
) | sed "s/^/$as_me: WARNING: /" >&2
;;
@@ -7513,11 +7537,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:7516: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:7540: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:7520: \$? = $ac_status" >&5
+ echo "$as_me:7544: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -7803,11 +7827,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:7806: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:7830: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:7810: \$? = $ac_status" >&5
+ echo "$as_me:7834: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -7907,11 +7931,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:7910: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:7934: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
- echo "$as_me:7914: \$? = $ac_status" >&5
+ echo "$as_me:7938: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
@@ -10307,7 +10331,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<EOF
-#line 10310 "configure"
+#line 10334 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -10407,7 +10431,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<EOF
-#line 10410 "configure"
+#line 10434 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -12816,11 +12840,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:12819: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:12843: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:12823: \$? = $ac_status" >&5
+ echo "$as_me:12847: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -12920,11 +12944,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:12923: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:12947: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
- echo "$as_me:12927: \$? = $ac_status" >&5
+ echo "$as_me:12951: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
@@ -14503,11 +14527,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:14506: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:14530: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:14510: \$? = $ac_status" >&5
+ echo "$as_me:14534: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -14607,11 +14631,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:14610: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:14634: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
- echo "$as_me:14614: \$? = $ac_status" >&5
+ echo "$as_me:14638: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
@@ -16822,11 +16846,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:16825: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:16849: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:16829: \$? = $ac_status" >&5
+ echo "$as_me:16853: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -17112,11 +17136,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:17115: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:17139: $lt_compile\"" >&5)
(eval "$lt_compile" 2>conftest.err)
ac_status=$?
cat conftest.err >&5
- echo "$as_me:17119: \$? = $ac_status" >&5
+ echo "$as_me:17143: \$? = $ac_status" >&5
if (exit $ac_status) && test -s "$ac_outfile"; then
# The compiler can only warn and ignore the option if not recognized
# So say no if there are warnings other than the usual output.
@@ -17216,11 +17240,11 @@ else
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
-e 's:$: $lt_compiler_flag:'`
- (eval echo "\"\$as_me:17219: $lt_compile\"" >&5)
+ (eval echo "\"\$as_me:17243: $lt_compile\"" >&5)
(eval "$lt_compile" 2>out/conftest.err)
ac_status=$?
cat out/conftest.err >&5
- echo "$as_me:17223: \$? = $ac_status" >&5
+ echo "$as_me:17247: \$? = $ac_status" >&5
if (exit $ac_status) && test -s out/conftest2.$ac_objext
then
# The compiler can only warn and ignore the option if not recognized
@@ -20478,6 +20502,362 @@ _ACEOF
esac
+# The cast to long int works around a bug in the HP C Compiler
+# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+# This bug is HP SR number 8606223364.
+{ $as_echo "$as_me:$LINENO: checking size of long" >&5
+$as_echo_n "checking size of long... " >&6; }
+if test "${ac_cv_sizeof_long+set}" = set; then
+ $as_echo_n "(cached) " >&6
+else
+ if test "$cross_compiling" = yes; then
+ # Depending upon the size, compute the lo and hi bounds.
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long int) (sizeof (long))) >= 0)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_lo=0 ac_mid=0
+ while :; do
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long int) (sizeof (long))) <= $ac_mid)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_hi=$ac_mid; break
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_lo=`expr $ac_mid + 1`
+ if test $ac_lo -le $ac_mid; then
+ ac_lo= ac_hi=
+ break
+ fi
+ ac_mid=`expr 2 '*' $ac_mid + 1`
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ done
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long int) (sizeof (long))) < 0)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_hi=-1 ac_mid=-1
+ while :; do
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long int) (sizeof (long))) >= $ac_mid)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_lo=$ac_mid; break
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_hi=`expr '(' $ac_mid ')' - 1`
+ if test $ac_mid -le $ac_hi; then
+ ac_lo= ac_hi=
+ break
+ fi
+ ac_mid=`expr 2 '*' $ac_mid`
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ done
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_lo= ac_hi=
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+# Binary search between lo and hi bounds.
+while test "x$ac_lo" != "x$ac_hi"; do
+ ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo`
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+int
+main ()
+{
+static int test_array [1 - 2 * !(((long int) (sizeof (long))) <= $ac_mid)];
+test_array [0] = 0
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ ac_hi=$ac_mid
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_lo=`expr '(' $ac_mid ')' + 1`
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+done
+case $ac_lo in
+?*) ac_cv_sizeof_long=$ac_lo;;
+'') if test "$ac_cv_type_long" = yes; then
+ { { $as_echo "$as_me:$LINENO: error: cannot compute sizeof (long)
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot compute sizeof (long)
+See \`config.log' for more details." >&2;}
+ { (exit 77); exit 77; }; }
+ else
+ ac_cv_sizeof_long=0
+ fi ;;
+esac
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+static long int longval () { return (long int) (sizeof (long)); }
+static unsigned long int ulongval () { return (long int) (sizeof (long)); }
+#include <stdio.h>
+#include <stdlib.h>
+int
+main ()
+{
+
+ FILE *f = fopen ("conftest.val", "w");
+ if (! f)
+ return 1;
+ if (((long int) (sizeof (long))) < 0)
+ {
+ long int i = longval ();
+ if (i != ((long int) (sizeof (long))))
+ return 1;
+ fprintf (f, "%ld", i);
+ }
+ else
+ {
+ unsigned long int i = ulongval ();
+ if (i != ((long int) (sizeof (long))))
+ return 1;
+ fprintf (f, "%lu", i);
+ }
+ /* Do not output a trailing newline, as this causes \r\n confusion
+ on some platforms. */
+ return ferror (f) || fclose (f) != 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_sizeof_long=`cat conftest.val`
+else
+ $as_echo "$as_me: program exited with status $ac_status" >&5
+$as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+if test "$ac_cv_type_long" = yes; then
+ { { $as_echo "$as_me:$LINENO: error: cannot compute sizeof (long)
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot compute sizeof (long)
+See \`config.log' for more details." >&2;}
+ { (exit 77); exit 77; }; }
+ else
+ ac_cv_sizeof_long=0
+ fi
+fi
+rm -rf conftest.dSYM
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f conftest.val
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_sizeof_long" >&5
+$as_echo "$ac_cv_sizeof_long" >&6; }
+
+
+
+cat >>confdefs.h <<_ACEOF
+#define SIZEOF_LONG $ac_cv_sizeof_long
+_ACEOF
+
+
+
# Checks for Sun Studio compilers
{ $as_echo "$as_me:$LINENO: checking whether __SUNPRO_C is declared" >&5
$as_echo_n "checking whether __SUNPRO_C is declared... " >&6; }
@@ -20618,13 +20998,13 @@ fi
-LT_VERSION_INFO="16:6:16"
+LT_VERSION_INFO="18:4:18"
PIXMAN_VERSION_MAJOR=0
-PIXMAN_VERSION_MINOR=16
+PIXMAN_VERSION_MINOR=18
-PIXMAN_VERSION_MICRO=6
+PIXMAN_VERSION_MICRO=4
@@ -20807,8 +21187,8 @@ xserver_save_CFLAGS=$CFLAGS
CFLAGS="$MMX_CFLAGS $CFLAGS"
cat >conftest.$ac_ext <<_ACEOF
-#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3))
-error "Need GCC >= 3.3 for MMX intrinsics"
+#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
+error "Need GCC >= 3.4 for MMX intrinsics"
#endif
#include <mmintrin.h>
int main () {
@@ -20915,7 +21295,7 @@ cat >conftest.$ac_ext <<_ACEOF
#include <xmmintrin.h>
#include <emmintrin.h>
int main () {
- __m128i a, b, c;
+ __m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c;
c = _mm_xor_si128 (a, b);
return 0;
}
@@ -21149,19 +21529,23 @@ else
fi
-ARM_SIMD_CFLAGS="-mcpu=arm1136j-s"
-
have_arm_simd=no
{ $as_echo "$as_me:$LINENO: checking whether to use ARM SIMD assembler" >&5
$as_echo_n "checking whether to use ARM SIMD assembler... " >&6; }
xserver_save_CFLAGS=$CFLAGS
-CFLAGS="$ARM_SIMD_CFLAGS $CFLAGS"
+CFLAGS="-x assembler-with-cpp $CFLAGS"
cat >conftest.$ac_ext <<_ACEOF
-int main () {
- asm("uqadd8 r1, r1, r2");
- return 0;
-}
+.text
+.arch armv6
+.object_arch armv4
+.arm
+.altmacro
+#ifndef __ARM_EABI__
+#error EABI is required (to be sure that calling conventions are compatible)
+#endif
+pld [r0]
+uqadd8 r0, r0, r0
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
@@ -21210,20 +21594,8 @@ cat >>confdefs.h <<\_ACEOF
#define USE_ARM_SIMD 1
_ACEOF
-else
- ARM_SIMD_CFLAGS=
fi
-{ $as_echo "$as_me:$LINENO: result: $have_arm_simd" >&5
-$as_echo "$have_arm_simd" >&6; }
-if test $enable_arm_simd = yes && test $have_arm_simd = no ; then
- { { $as_echo "$as_me:$LINENO: error: ARM SIMD intrinsics not detected" >&5
-$as_echo "$as_me: error: ARM SIMD intrinsics not detected" >&2;}
- { (exit 1); exit 1; }; }
-fi
-
-
-
if test $have_arm_simd = yes; then
@@ -21235,20 +21607,33 @@ else
fi
-ARM_NEON_CFLAGS="-mfpu=neon -mcpu=cortex-a8"
+{ $as_echo "$as_me:$LINENO: result: $have_arm_simd" >&5
+$as_echo "$have_arm_simd" >&6; }
+if test $enable_arm_simd = yes && test $have_arm_simd = no ; then
+ { { $as_echo "$as_me:$LINENO: error: ARM SIMD intrinsics not detected" >&5
+$as_echo "$as_me: error: ARM SIMD intrinsics not detected" >&2;}
+ { (exit 1); exit 1; }; }
+fi
have_arm_neon=no
-{ $as_echo "$as_me:$LINENO: checking whether to use ARM NEON" >&5
-$as_echo_n "checking whether to use ARM NEON... " >&6; }
+{ $as_echo "$as_me:$LINENO: checking whether to use ARM NEON assembler" >&5
+$as_echo_n "checking whether to use ARM NEON assembler... " >&6; }
xserver_save_CFLAGS=$CFLAGS
-CFLAGS="$ARM_NEON_CFLAGS $CFLAGS"
+CFLAGS="-x assembler-with-cpp $CFLAGS"
cat >conftest.$ac_ext <<_ACEOF
-#include <arm_neon.h>
-int main () {
- uint8x8_t neon_test=vmov_n_u8(0);
- return 0;
-}
+.text
+.fpu neon
+.arch armv7a
+.object_arch armv4
+.eabi_attribute 10, 0
+.arm
+.altmacro
+#ifndef __ARM_EABI__
+#error EABI is required (to be sure that calling conventions are compatible)
+#endif
+pld [r0]
+vmovn.u16 d0, q0
_ACEOF
rm -f conftest.$ac_objext
if { (ac_try="$ac_compile"
@@ -21297,14 +21682,10 @@ cat >>confdefs.h <<\_ACEOF
#define USE_ARM_NEON 1
_ACEOF
-else
- ARM_NEON_CFLAGS=
fi
-
-
if test $have_arm_neon = yes; then
USE_ARM_NEON_TRUE=
USE_ARM_NEON_FALSE='#'
@@ -21790,6 +22171,372 @@ _ACEOF
fi
+
+support_for__thread=no
+
+{ $as_echo "$as_me:$LINENO: checking for __thread" >&5
+$as_echo_n "checking for __thread... " >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+
+#ifdef __MINGW32__
+#error MinGW has broken __thread support
+#endif
+__thread int x ;
+int main () { return 0; }
+
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_compile") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then
+ support_for__thread=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $support_for__thread = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define TOOLCHAIN_SUPPORTS__THREAD /**/
+_ACEOF
+
+fi
+
+{ $as_echo "$as_me:$LINENO: result: $support_for__thread" >&5
+$as_echo "$support_for__thread" >&6; }
+
+
+if test $support_for__thread = no; then
+
+support_for_pthread_stubs_setspecific=no
+
+{ $as_echo "$as_me:$LINENO: checking for pthread_setspecific in libpthread_stubs" >&5
+$as_echo_n "checking for pthread_setspecific in libpthread_stubs... " >&6; }
+
+save_LDFLAGS=$LDFLAGS
+save_CFLAGS=$CFLAGS
+
+
+pkg_failed=no
+{ $as_echo "$as_me:$LINENO: checking for STUBS" >&5
+$as_echo_n "checking for STUBS... " >&6; }
+
+if test -n "$PKG_CONFIG"; then
+ if test -n "$STUBS_CFLAGS"; then
+ pkg_cv_STUBS_CFLAGS="$STUBS_CFLAGS"
+ else
+ if test -n "$PKG_CONFIG" && \
+ { ($as_echo "$as_me:$LINENO: \$PKG_CONFIG --exists --print-errors \"pthread-stubs\"") >&5
+ ($PKG_CONFIG --exists --print-errors "pthread-stubs") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ pkg_cv_STUBS_CFLAGS=`$PKG_CONFIG --cflags "pthread-stubs" 2>/dev/null`
+else
+ pkg_failed=yes
+fi
+ fi
+else
+ pkg_failed=untried
+fi
+if test -n "$PKG_CONFIG"; then
+ if test -n "$STUBS_LIBS"; then
+ pkg_cv_STUBS_LIBS="$STUBS_LIBS"
+ else
+ if test -n "$PKG_CONFIG" && \
+ { ($as_echo "$as_me:$LINENO: \$PKG_CONFIG --exists --print-errors \"pthread-stubs\"") >&5
+ ($PKG_CONFIG --exists --print-errors "pthread-stubs") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; then
+ pkg_cv_STUBS_LIBS=`$PKG_CONFIG --libs "pthread-stubs" 2>/dev/null`
+else
+ pkg_failed=yes
+fi
+ fi
+else
+ pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+ _pkg_short_errors_supported=yes
+else
+ _pkg_short_errors_supported=no
+fi
+ if test $_pkg_short_errors_supported = yes; then
+ STUBS_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "pthread-stubs"`
+ else
+ STUBS_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "pthread-stubs"`
+ fi
+ # Put the nasty error message in config.log where it belongs
+ echo "$STUBS_PKG_ERRORS" >&5
+
+ { { $as_echo "$as_me:$LINENO: error: Package requirements (pthread-stubs) were not met:
+
+$STUBS_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+Alternatively, you may set the environment variables STUBS_CFLAGS
+and STUBS_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.
+" >&5
+$as_echo "$as_me: error: Package requirements (pthread-stubs) were not met:
+
+$STUBS_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+Alternatively, you may set the environment variables STUBS_CFLAGS
+and STUBS_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.
+" >&2;}
+ { (exit 1); exit 1; }; }
+elif test $pkg_failed = untried; then
+ { { $as_echo "$as_me:$LINENO: error: The pkg-config script could not be found or is too old. Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+Alternatively, you may set the environment variables STUBS_CFLAGS
+and STUBS_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.
+
+To get pkg-config, see <http://www.freedesktop.org/software/pkgconfig>.
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: The pkg-config script could not be found or is too old. Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+Alternatively, you may set the environment variables STUBS_CFLAGS
+and STUBS_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.
+
+To get pkg-config, see <http://www.freedesktop.org/software/pkgconfig>.
+See \`config.log' for more details." >&2;}
+ { (exit 1); exit 1; }; }
+else
+ STUBS_CFLAGS=$pkg_cv_STUBS_CFLAGS
+ STUBS_LIBS=$pkg_cv_STUBS_LIBS
+ { $as_echo "$as_me:$LINENO: result: yes" >&5
+$as_echo "yes" >&6; }
+ :
+fi
+
+CFLAGS="${STUBS_CFLAGS}"
+LDFLAGS="${STUBS_LIBS}"
+
+cat >conftest.$ac_ext <<_ACEOF
+
+#include <pthread.h>
+
+#include <stdlib.h>
+#include <pthread.h>
+
+static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+static pthread_key_t key;
+
+static void
+make_key (void)
+{
+ pthread_key_create (&key, NULL);
+}
+
+int
+main ()
+{
+ void *value = NULL;
+
+ if (pthread_once (&once_control, make_key) != 0)
+ {
+ value = NULL;
+ }
+ else
+ {
+ value = pthread_getspecific (key);
+ if (!value)
+ {
+ value = malloc (100);
+ pthread_setspecific (key, value);
+ }
+ }
+}
+
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
+ support_for_pthread_stubs_setspecific=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext;
+
+CFLAGS=$save_CFLAGS
+LDFLAGS=$save_LDFLAGS
+
+if test $support_for_pthread_stubs_setspecific = yes; then
+ PTHREAD_LDFLAGS="${STUBS_LIBS}"
+ PTHREAD_CFLAGS="${STUBS_CFLAGS}"
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PTHREAD_SETSPECIFIC /**/
+_ACEOF
+
+fi
+
+{ $as_echo "$as_me:$LINENO: result: $support_for_pthread_stubs_setspecific" >&5
+$as_echo "$support_for_pthread_stubs_setspecific" >&6; };
+
+fi
+
+if test $support_for_pthread_stubs_setspecific = no; then
+
+{ $as_echo "$as_me:$LINENO: checking for pthread_setspecific" >&5
+$as_echo_n "checking for pthread_setspecific... " >&6; }
+
+save_LDFLAGS=$LDFLAGS
+
+LDFLAGS="-pthread"
+
+cat >conftest.$ac_ext <<_ACEOF
+
+#include <pthread.h>
+
+#include <stdlib.h>
+#include <pthread.h>
+
+static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+static pthread_key_t key;
+
+static void
+make_key (void)
+{
+ pthread_key_create (&key, NULL);
+}
+
+int
+main ()
+{
+ void *value = NULL;
+
+ if (pthread_once (&once_control, make_key) != 0)
+ {
+ value = NULL;
+ }
+ else
+ {
+ value = pthread_getspecific (key);
+ if (!value)
+ {
+ value = malloc (100);
+ pthread_setspecific (key, value);
+ }
+ }
+}
+
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+ (eval "$ac_link") 2>conftest.er1
+ ac_status=$?
+ grep -v '^ *+' conftest.er1 >conftest.err
+ rm -f conftest.er1
+ cat conftest.err >&5
+ $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ $as_test_x conftest$ac_exeext
+ }; then
+ support_for_pthread_setspecific=yes
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+ conftest$ac_exeext conftest.$ac_ext;
+
+LDFLAGS=$save_LDFLAGS
+
+if test $support_for_pthread_setspecific = yes; then
+ PTHREAD_LDFLAGS="-pthread"
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PTHREAD_SETSPECIFIC /**/
+_ACEOF
+
+fi
+
+{ $as_echo "$as_me:$LINENO: result: $support_for_pthread_setspecific" >&5
+$as_echo "$support_for_pthread_setspecific" >&6; };
+
+fi
+
+
+
+
+
ac_config_files="$ac_config_files pixman-1.pc pixman-1-uninstalled.pc Makefile pixman/Makefile pixman/pixman-version.h test/Makefile"
cat >confcache <<\_ACEOF
@@ -22282,7 +23029,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by pixman $as_me 0.16.6, which was
+This file was extended by pixman $as_me 0.18.4, which was
generated by GNU Autoconf 2.62. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -22335,7 +23082,7 @@ Report bugs to <bug-autoconf@gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\
-pixman config.status 0.16.6
+pixman config.status 0.18.4
configured by $0, generated by GNU Autoconf 2.62,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/lib/pixman/configure.ac b/lib/pixman/configure.ac
index 8fa959ae4..8cfaca439 100644
--- a/lib/pixman/configure.ac
+++ b/lib/pixman/configure.ac
@@ -53,13 +53,16 @@ AC_PREREQ([2.57])
#
m4_define([pixman_major], 0)
-m4_define([pixman_minor], 16)
-m4_define([pixman_micro], 6)
+m4_define([pixman_minor], 18)
+m4_define([pixman_micro], 4)
m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
-AC_INIT(pixman, pixman_version, "sandmann@daimi.au.dk", pixman)
-AM_INIT_AUTOMAKE([dist-bzip2])
+AC_INIT(pixman, pixman_version, "pixman@lists.freedesktop.org", pixman)
+AM_INIT_AUTOMAKE([foreign dist-bzip2])
+
+# Suppress verbose compile lines
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
AM_CONFIG_HEADER(config.h)
@@ -68,11 +71,14 @@ AC_CANONICAL_HOST
test_CFLAGS=${CFLAGS+set} # We may override autoconf default CFLAGS.
AC_PROG_CC
+AM_PROG_AS
AC_PROG_LIBTOOL
AC_CHECK_FUNCS([getisax])
AC_C_BIGENDIAN
AC_C_INLINE
+AC_CHECK_SIZEOF(long)
+
# Checks for Sun Studio compilers
AC_CHECK_DECL([__SUNPRO_C], [SUNCC="yes"], [SUNCC="no"])
AC_CHECK_DECL([__amd64], [AMD64_ABI="yes"], [AMD64_ABI="no"])
@@ -186,8 +192,8 @@ AC_MSG_CHECKING(whether to use MMX intrinsics)
xserver_save_CFLAGS=$CFLAGS
CFLAGS="$MMX_CFLAGS $CFLAGS"
AC_COMPILE_IFELSE([
-#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3))
-error "Need GCC >= 3.3 for MMX intrinsics"
+#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
+error "Need GCC >= 3.4 for MMX intrinsics"
#endif
#include <mmintrin.h>
int main () {
@@ -247,7 +253,7 @@ AC_COMPILE_IFELSE([
#include <xmmintrin.h>
#include <emmintrin.h>
int main () {
- __m128i a, b, c;
+ __m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c;
c = _mm_xor_si128 (a, b);
return 0;
}], have_sse2_intrinsics=yes)
@@ -355,19 +361,23 @@ AC_SUBST(VMX_CFLAGS)
AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
-dnl ===========================================================================
-dnl Check for ARM SIMD instructions
-ARM_SIMD_CFLAGS="-mcpu=arm1136j-s"
-
+dnl ==========================================================================
+dnl Check if assembler is gas compatible and supports ARM SIMD instructions
have_arm_simd=no
AC_MSG_CHECKING(whether to use ARM SIMD assembler)
xserver_save_CFLAGS=$CFLAGS
-CFLAGS="$ARM_SIMD_CFLAGS $CFLAGS"
-AC_COMPILE_IFELSE([
-int main () {
- asm("uqadd8 r1, r1, r2");
- return 0;
-}], have_arm_simd=yes)
+CFLAGS="-x assembler-with-cpp $CFLAGS"
+AC_COMPILE_IFELSE([[
+.text
+.arch armv6
+.object_arch armv4
+.arm
+.altmacro
+#ifndef __ARM_EABI__
+#error EABI is required (to be sure that calling conventions are compatible)
+#endif
+pld [r0]
+uqadd8 r0, r0, r0]], have_arm_simd=yes)
CFLAGS=$xserver_save_CFLAGS
AC_ARG_ENABLE(arm-simd,
@@ -380,34 +390,35 @@ if test $enable_arm_simd = no ; then
fi
if test $have_arm_simd = yes ; then
- AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD compiler intrinsics])
-else
- ARM_SIMD_CFLAGS=
+ AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD assembly optimizations])
fi
+AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes)
+
AC_MSG_RESULT($have_arm_simd)
if test $enable_arm_simd = yes && test $have_arm_simd = no ; then
AC_MSG_ERROR([ARM SIMD intrinsics not detected])
fi
-AC_SUBST(ARM_SIMD_CFLAGS)
-
-AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes)
-
dnl ==========================================================================
-dnl Check for ARM NEON instructions
-ARM_NEON_CFLAGS="-mfpu=neon -mcpu=cortex-a8"
-
+dnl Check if assembler is gas compatible and supports NEON instructions
have_arm_neon=no
-AC_MSG_CHECKING(whether to use ARM NEON)
+AC_MSG_CHECKING(whether to use ARM NEON assembler)
xserver_save_CFLAGS=$CFLAGS
-CFLAGS="$ARM_NEON_CFLAGS $CFLAGS"
-AC_COMPILE_IFELSE([
-#include <arm_neon.h>
-int main () {
- uint8x8_t neon_test=vmov_n_u8(0);
- return 0;
-}], have_arm_neon=yes)
+CFLAGS="-x assembler-with-cpp $CFLAGS"
+AC_COMPILE_IFELSE([[
+.text
+.fpu neon
+.arch armv7a
+.object_arch armv4
+.eabi_attribute 10, 0
+.arm
+.altmacro
+#ifndef __ARM_EABI__
+#error EABI is required (to be sure that calling conventions are compatible)
+#endif
+pld [r0]
+vmovn.u16 d0, q0]], have_arm_neon=yes)
CFLAGS=$xserver_save_CFLAGS
AC_ARG_ENABLE(arm-neon,
@@ -420,13 +431,9 @@ if test $enable_arm_neon = no ; then
fi
if test $have_arm_neon = yes ; then
- AC_DEFINE(USE_ARM_NEON, 1, [use ARM NEON compiler intrinsics])
-else
- ARM_NEON_CFLAGS=
+ AC_DEFINE(USE_ARM_NEON, 1, [use ARM NEON assembly optimizations])
fi
-AC_SUBST(ARM_NEON_CFLAGS)
-
AM_CONDITIONAL(USE_ARM_NEON, test $have_arm_neon = yes)
AC_MSG_RESULT($have_arm_neon)
@@ -510,6 +517,150 @@ if test x$have_posix_memalign = xyes; then
AC_DEFINE(HAVE_POSIX_MEMALIGN, 1, [Whether we have posix_memalign()])
fi
+dnl =====================================
+dnl Thread local storage
+
+support_for__thread=no
+
+AC_MSG_CHECKING(for __thread)
+AC_COMPILE_IFELSE([
+#ifdef __MINGW32__
+#error MinGW has broken __thread support
+#endif
+__thread int x ;
+int main () { return 0; }
+], support_for__thread=yes)
+
+if test $support_for__thread = yes; then
+ AC_DEFINE([TOOLCHAIN_SUPPORTS__THREAD],[],[Whether the tool chain supports __thread])
+fi
+
+AC_MSG_RESULT($support_for__thread)
+
+dnl posix tls
+
+if test $support_for__thread = no; then
+
+support_for_pthread_stubs_setspecific=no
+
+AC_MSG_CHECKING(for pthread_setspecific in libpthread_stubs)
+
+save_LDFLAGS=$LDFLAGS
+save_CFLAGS=$CFLAGS
+
+PKG_CHECK_MODULES(STUBS, pthread-stubs)
+
+CFLAGS="${STUBS_CFLAGS}"
+LDFLAGS="${STUBS_LIBS}"
+
+AC_LINK_IFELSE([
+#include <pthread.h>
+
+#include <stdlib.h>
+#include <pthread.h>
+
+static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+static pthread_key_t key;
+
+static void
+make_key (void)
+{
+ pthread_key_create (&key, NULL);
+}
+
+int
+main ()
+{
+ void *value = NULL;
+
+ if (pthread_once (&once_control, make_key) != 0)
+ {
+ value = NULL;
+ }
+ else
+ {
+ value = pthread_getspecific (key);
+ if (!value)
+ {
+ value = malloc (100);
+ pthread_setspecific (key, value);
+ }
+ }
+}
+], support_for_pthread_stubs_setspecific=yes);
+
+CFLAGS=$save_CFLAGS
+LDFLAGS=$save_LDFLAGS
+
+if test $support_for_pthread_stubs_setspecific = yes; then
+ PTHREAD_LDFLAGS="${STUBS_LIBS}"
+ PTHREAD_CFLAGS="${STUBS_CFLAGS}"
+ AC_DEFINE([HAVE_PTHREAD_SETSPECIFIC], [], [Whether pthread_setspecific() is supported])
+fi
+
+AC_MSG_RESULT($support_for_pthread_stubs_setspecific);
+
+fi
+
+if test $support_for_pthread_stubs_setspecific = no; then
+
+AC_MSG_CHECKING(for pthread_setspecific)
+
+save_LDFLAGS=$LDFLAGS
+
+LDFLAGS="-pthread"
+
+AC_LINK_IFELSE([
+#include <pthread.h>
+
+#include <stdlib.h>
+#include <pthread.h>
+
+static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+static pthread_key_t key;
+
+static void
+make_key (void)
+{
+ pthread_key_create (&key, NULL);
+}
+
+int
+main ()
+{
+ void *value = NULL;
+
+ if (pthread_once (&once_control, make_key) != 0)
+ {
+ value = NULL;
+ }
+ else
+ {
+ value = pthread_getspecific (key);
+ if (!value)
+ {
+ value = malloc (100);
+ pthread_setspecific (key, value);
+ }
+ }
+}
+], support_for_pthread_setspecific=yes);
+
+LDFLAGS=$save_LDFLAGS
+
+if test $support_for_pthread_setspecific = yes; then
+ PTHREAD_LDFLAGS="-pthread"
+ AC_DEFINE([HAVE_PTHREAD_SETSPECIFIC], [], [Whether pthread_setspecific() is supported])
+fi
+
+AC_MSG_RESULT($support_for_pthread_setspecific);
+
+fi
+
+AC_SUBST(TOOLCHAIN_SUPPORTS__THREAD)
+AC_SUBST(HAVE_PTHREAD_SETSPECIFIC)
+AC_SUBST(PTHREAD_LDFLAGS)
+
AC_OUTPUT([pixman-1.pc
pixman-1-uninstalled.pc
Makefile
diff --git a/lib/pixman/pixman-1.pc.in b/lib/pixman/pixman-1.pc.in
index 936d95db0..14bfe1d38 100644
--- a/lib/pixman/pixman-1.pc.in
+++ b/lib/pixman/pixman-1.pc.in
@@ -6,6 +6,7 @@ includedir=@includedir@
Name: Pixman
Description: The pixman library (version 1)
Version: @PACKAGE_VERSION@
+Requires: pthread-stubs >= 0.3
Cflags: -I${includedir}/pixman-1 @DEP_CFLAGS@
Libs: -L${libdir} -lpixman-1 @DEP_LIBS@
diff --git a/lib/pixman/pixman/Makefile.am b/lib/pixman/pixman/Makefile.am
index e19fa6e7f..66ad7f005 100644
--- a/lib/pixman/pixman/Makefile.am
+++ b/lib/pixman/pixman/Makefile.am
@@ -1,7 +1,6 @@
lib_LTLIBRARIES = libpixman-1.la
-libpixman_1_la_LDFLAGS = -version-info $(LT_VERSION_INFO) -no-undefined
+libpixman_1_la_LDFLAGS = -version-info $(LT_VERSION_INFO) -no-undefined @PTHREAD_LDFLAGS@
libpixman_1_la_LIBADD = @DEP_LIBS@ -lm
-libpixman_1_la_CFLAGS = -DPIXMAN_DISABLE_DEPRECATED
libpixman_1_la_SOURCES = \
pixman.h \
pixman-accessor.h \
@@ -64,6 +63,8 @@ libpixman_mmx_la_CFLAGS = $(DEP_CFLAGS) $(MMX_CFLAGS)
libpixman_mmx_la_LIBADD = $(DEP_LIBS)
libpixman_1_la_LDFLAGS += $(MMX_LDFLAGS)
libpixman_1_la_LIBADD += libpixman-mmx.la
+
+ASM_CFLAGS_mmx=$(MMX_CFLAGS)
endif
# vmx code
@@ -75,6 +76,8 @@ libpixman_vmx_la_SOURCES = \
libpixman_vmx_la_CFLAGS = $(DEP_CFLAGS) $(VMX_CFLAGS)
libpixman_vmx_la_LIBADD = $(DEP_LIBS)
libpixman_1_la_LIBADD += libpixman-vmx.la
+
+ASM_CFLAGS_vmx=$(VMX_CFLAGS)
endif
# sse2 code
@@ -86,26 +89,38 @@ libpixman_sse2_la_CFLAGS = $(DEP_CFLAGS) $(SSE2_CFLAGS)
libpixman_sse2_la_LIBADD = $(DEP_LIBS)
libpixman_1_la_LDFLAGS += $(SSE2_LDFLAGS)
libpixman_1_la_LIBADD += libpixman-sse2.la
+
+ASM_CFLAGS_sse2=$(SSE2_CFLAGS)
endif
# arm simd code
if USE_ARM_SIMD
noinst_LTLIBRARIES += libpixman-arm-simd.la
libpixman_arm_simd_la_SOURCES = \
- pixman-arm-simd.c
-libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS) $(ARM_SIMD_CFLAGS)
+ pixman-arm-simd.c \
+ pixman-arm-common.h \
+ pixman-arm-simd-asm.S
+libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS)
libpixman_arm_simd_la_LIBADD = $(DEP_LIBS)
libpixman_1_la_LIBADD += libpixman-arm-simd.la
+
+ASM_CFLAGS_arm_simd=
endif
# arm neon code
if USE_ARM_NEON
noinst_LTLIBRARIES += libpixman-arm-neon.la
libpixman_arm_neon_la_SOURCES = \
- pixman-arm-neon.c
-libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS) $(ARM_NEON_CFLAGS)
+ pixman-arm-neon.c \
+ pixman-arm-common.h \
+ pixman-arm-neon-asm.S \
+ pixman-arm-neon-asm.h
+libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS)
libpixman_arm_neon_la_LIBADD = $(DEP_LIBS)
libpixman_1_la_LIBADD += libpixman-arm-neon.la
-endif
+ASM_CFLAGS_arm_neon=
+endif
+.c.s : $(libpixmaninclude_HEADERS) $(BUILT_SOURCES)
+ $(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $<
diff --git a/lib/pixman/pixman/Makefile.in b/lib/pixman/pixman/Makefile.in
index 51c282071..dbd77f5f0 100644
--- a/lib/pixman/pixman/Makefile.in
+++ b/lib/pixman/pixman/Makefile.in
@@ -87,41 +87,35 @@ LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES)
libpixman_1_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \
$(am__DEPENDENCIES_2) $(am__DEPENDENCIES_3) \
$(am__DEPENDENCIES_4) $(am__DEPENDENCIES_5)
-am_libpixman_1_la_OBJECTS = libpixman_1_la-pixman-access.lo \
- libpixman_1_la-pixman-access-accessors.lo \
- libpixman_1_la-pixman-cpu.lo \
- libpixman_1_la-pixman-gradient-walker.lo \
- libpixman_1_la-pixman-region16.lo \
- libpixman_1_la-pixman-region32.lo \
- libpixman_1_la-pixman-image.lo \
- libpixman_1_la-pixman-implementation.lo \
- libpixman_1_la-pixman-combine32.lo \
- libpixman_1_la-pixman-combine64.lo \
- libpixman_1_la-pixman-general.lo libpixman_1_la-pixman.lo \
- libpixman_1_la-pixman-fast-path.lo \
- libpixman_1_la-pixman-solid-fill.lo \
- libpixman_1_la-pixman-conical-gradient.lo \
- libpixman_1_la-pixman-linear-gradient.lo \
- libpixman_1_la-pixman-radial-gradient.lo \
- libpixman_1_la-pixman-bits-image.lo \
- libpixman_1_la-pixman-utils.lo libpixman_1_la-pixman-edge.lo \
- libpixman_1_la-pixman-edge-accessors.lo \
- libpixman_1_la-pixman-trap.lo libpixman_1_la-pixman-timer.lo \
- libpixman_1_la-pixman-matrix.lo
+am_libpixman_1_la_OBJECTS = pixman-access.lo \
+ pixman-access-accessors.lo pixman-cpu.lo \
+ pixman-gradient-walker.lo pixman-region16.lo \
+ pixman-region32.lo pixman-image.lo pixman-implementation.lo \
+ pixman-combine32.lo pixman-combine64.lo pixman-general.lo \
+ pixman.lo pixman-fast-path.lo pixman-solid-fill.lo \
+ pixman-conical-gradient.lo pixman-linear-gradient.lo \
+ pixman-radial-gradient.lo pixman-bits-image.lo pixman-utils.lo \
+ pixman-edge.lo pixman-edge-accessors.lo pixman-trap.lo \
+ pixman-timer.lo pixman-matrix.lo
libpixman_1_la_OBJECTS = $(am_libpixman_1_la_OBJECTS)
am__DEPENDENCIES_6 =
@USE_ARM_NEON_TRUE@libpixman_arm_neon_la_DEPENDENCIES = \
@USE_ARM_NEON_TRUE@ $(am__DEPENDENCIES_6)
-am__libpixman_arm_neon_la_SOURCES_DIST = pixman-arm-neon.c
+am__libpixman_arm_neon_la_SOURCES_DIST = pixman-arm-neon.c \
+ pixman-arm-common.h pixman-arm-neon-asm.S \
+ pixman-arm-neon-asm.h
@USE_ARM_NEON_TRUE@am_libpixman_arm_neon_la_OBJECTS = \
-@USE_ARM_NEON_TRUE@ libpixman_arm_neon_la-pixman-arm-neon.lo
+@USE_ARM_NEON_TRUE@ libpixman_arm_neon_la-pixman-arm-neon.lo \
+@USE_ARM_NEON_TRUE@ pixman-arm-neon-asm.lo
libpixman_arm_neon_la_OBJECTS = $(am_libpixman_arm_neon_la_OBJECTS)
@USE_ARM_NEON_TRUE@am_libpixman_arm_neon_la_rpath =
@USE_ARM_SIMD_TRUE@libpixman_arm_simd_la_DEPENDENCIES = \
@USE_ARM_SIMD_TRUE@ $(am__DEPENDENCIES_6)
-am__libpixman_arm_simd_la_SOURCES_DIST = pixman-arm-simd.c
+am__libpixman_arm_simd_la_SOURCES_DIST = pixman-arm-simd.c \
+ pixman-arm-common.h pixman-arm-simd-asm.S
@USE_ARM_SIMD_TRUE@am_libpixman_arm_simd_la_OBJECTS = \
-@USE_ARM_SIMD_TRUE@ libpixman_arm_simd_la-pixman-arm-simd.lo
+@USE_ARM_SIMD_TRUE@ libpixman_arm_simd_la-pixman-arm-simd.lo \
+@USE_ARM_SIMD_TRUE@ pixman-arm-simd-asm.lo
libpixman_arm_simd_la_OBJECTS = $(am_libpixman_arm_simd_la_OBJECTS)
@USE_ARM_SIMD_TRUE@am_libpixman_arm_simd_la_rpath =
@USE_MMX_TRUE@libpixman_mmx_la_DEPENDENCIES = $(am__DEPENDENCIES_6)
@@ -145,6 +139,9 @@ libpixman_vmx_la_OBJECTS = $(am_libpixman_vmx_la_OBJECTS)
DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
+CCASCOMPILE = $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS)
+LTCCASCOMPILE = $(LIBTOOL) --mode=compile $(CCAS) $(AM_CCASFLAGS) \
+ $(CCASFLAGS)
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) \
@@ -172,13 +169,13 @@ AMDEP_FALSE = @AMDEP_FALSE@
AMDEP_TRUE = @AMDEP_TRUE@
AMTAR = @AMTAR@
AR = @AR@
-ARM_NEON_CFLAGS = @ARM_NEON_CFLAGS@
-ARM_SIMD_CFLAGS = @ARM_SIMD_CFLAGS@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
+CCAS = @CCAS@
+CCASFLAGS = @CCASFLAGS@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
@@ -206,6 +203,7 @@ GTK_CFLAGS = @GTK_CFLAGS@
GTK_LIBS = @GTK_LIBS@
HAVE_GTK_FALSE = @HAVE_GTK_FALSE@
HAVE_GTK_TRUE = @HAVE_GTK_TRUE@
+HAVE_PTHREAD_SETSPECIFIC = @HAVE_PTHREAD_SETSPECIFIC@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
@@ -235,6 +233,7 @@ PIXMAN_VERSION_MAJOR = @PIXMAN_VERSION_MAJOR@
PIXMAN_VERSION_MICRO = @PIXMAN_VERSION_MICRO@
PIXMAN_VERSION_MINOR = @PIXMAN_VERSION_MINOR@
PKG_CONFIG = @PKG_CONFIG@
+PTHREAD_LDFLAGS = @PTHREAD_LDFLAGS@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
@@ -242,6 +241,9 @@ SHELL = @SHELL@
SSE2_CFLAGS = @SSE2_CFLAGS@
SSE2_LDFLAGS = @SSE2_LDFLAGS@
STRIP = @STRIP@
+STUBS_CFLAGS = @STUBS_CFLAGS@
+STUBS_LIBS = @STUBS_LIBS@
+TOOLCHAIN_SUPPORTS__THREAD = @TOOLCHAIN_SUPPORTS__THREAD@
USE_ARM_NEON_FALSE = @USE_ARM_NEON_FALSE@
USE_ARM_NEON_TRUE = @USE_ARM_NEON_TRUE@
USE_ARM_SIMD_FALSE = @USE_ARM_SIMD_FALSE@
@@ -305,10 +307,10 @@ sysconfdir = @sysconfdir@
target_alias = @target_alias@
lib_LTLIBRARIES = libpixman-1.la
libpixman_1_la_LDFLAGS = -version-info $(LT_VERSION_INFO) \
- -no-undefined $(am__append_2) $(am__append_7)
+ -no-undefined @PTHREAD_LDFLAGS@ $(am__append_2) \
+ $(am__append_7)
libpixman_1_la_LIBADD = @DEP_LIBS@ -lm $(am__append_3) $(am__append_5) \
$(am__append_8) $(am__append_10) $(am__append_12)
-libpixman_1_la_CFLAGS = -DPIXMAN_DISABLE_DEPRECATED
libpixman_1_la_SOURCES = \
pixman.h \
pixman-accessor.h \
@@ -356,32 +358,42 @@ CLEANFILES = pixman-combine32.c pixman-combine64.c pixman-combine32.h pixman-com
@USE_MMX_TRUE@libpixman_mmx_la_CFLAGS = $(DEP_CFLAGS) $(MMX_CFLAGS)
@USE_MMX_TRUE@libpixman_mmx_la_LIBADD = $(DEP_LIBS)
+@USE_MMX_TRUE@ASM_CFLAGS_mmx = $(MMX_CFLAGS)
@USE_VMX_TRUE@libpixman_vmx_la_SOURCES = \
@USE_VMX_TRUE@ pixman-vmx.c \
@USE_VMX_TRUE@ pixman-combine32.h
@USE_VMX_TRUE@libpixman_vmx_la_CFLAGS = $(DEP_CFLAGS) $(VMX_CFLAGS)
@USE_VMX_TRUE@libpixman_vmx_la_LIBADD = $(DEP_LIBS)
+@USE_VMX_TRUE@ASM_CFLAGS_vmx = $(VMX_CFLAGS)
@USE_SSE2_TRUE@libpixman_sse2_la_SOURCES = \
@USE_SSE2_TRUE@ pixman-sse2.c
@USE_SSE2_TRUE@libpixman_sse2_la_CFLAGS = $(DEP_CFLAGS) $(SSE2_CFLAGS)
@USE_SSE2_TRUE@libpixman_sse2_la_LIBADD = $(DEP_LIBS)
+@USE_SSE2_TRUE@ASM_CFLAGS_sse2 = $(SSE2_CFLAGS)
@USE_ARM_SIMD_TRUE@libpixman_arm_simd_la_SOURCES = \
-@USE_ARM_SIMD_TRUE@ pixman-arm-simd.c
+@USE_ARM_SIMD_TRUE@ pixman-arm-simd.c \
+@USE_ARM_SIMD_TRUE@ pixman-arm-common.h \
+@USE_ARM_SIMD_TRUE@ pixman-arm-simd-asm.S
-@USE_ARM_SIMD_TRUE@libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS) $(ARM_SIMD_CFLAGS)
+@USE_ARM_SIMD_TRUE@libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS)
@USE_ARM_SIMD_TRUE@libpixman_arm_simd_la_LIBADD = $(DEP_LIBS)
+@USE_ARM_SIMD_TRUE@ASM_CFLAGS_arm_simd =
@USE_ARM_NEON_TRUE@libpixman_arm_neon_la_SOURCES = \
-@USE_ARM_NEON_TRUE@ pixman-arm-neon.c
+@USE_ARM_NEON_TRUE@ pixman-arm-neon.c \
+@USE_ARM_NEON_TRUE@ pixman-arm-common.h \
+@USE_ARM_NEON_TRUE@ pixman-arm-neon-asm.S \
+@USE_ARM_NEON_TRUE@ pixman-arm-neon-asm.h
-@USE_ARM_NEON_TRUE@libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS) $(ARM_NEON_CFLAGS)
+@USE_ARM_NEON_TRUE@libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS)
@USE_ARM_NEON_TRUE@libpixman_arm_neon_la_LIBADD = $(DEP_LIBS)
+@USE_ARM_NEON_TRUE@ASM_CFLAGS_arm_neon =
all: $(BUILT_SOURCES)
$(MAKE) $(AM_MAKEFLAGS) all-am
.SUFFIXES:
-.SUFFIXES: .c .lo .o .obj
+.SUFFIXES: .S .c .lo .o .obj .s
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
@@ -391,9 +403,9 @@ $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
exit 1;; \
esac; \
done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu pixman/Makefile'; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign pixman/Makefile'; \
cd $(top_srcdir) && \
- $(AUTOMAKE) --gnu pixman/Makefile
+ $(AUTOMAKE) --foreign pixman/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
@@ -468,35 +480,44 @@ mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-access-accessors.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-access.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-bits-image.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-combine32.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-combine64.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-conical-gradient.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-cpu.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-edge-accessors.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-edge.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-fast-path.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-general.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-gradient-walker.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-image.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-implementation.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-linear-gradient.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-matrix.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-radial-gradient.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-region16.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-region32.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-solid-fill.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-timer.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-trap.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman-utils.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_1_la-pixman.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_arm_neon_la-pixman-arm-neon.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_arm_simd_la-pixman-arm-simd.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_mmx_la-pixman-mmx.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_sse2_la-pixman-sse2.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libpixman_vmx_la-pixman-vmx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-access-accessors.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-access.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-bits-image.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-combine32.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-combine64.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-conical-gradient.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-cpu.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-edge-accessors.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-edge.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-fast-path.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-general.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-gradient-walker.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-image.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-implementation.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-linear-gradient.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-matrix.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-radial-gradient.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-region16.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-region32.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-solid-fill.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-timer.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-trap.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-utils.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman.Plo@am__quote@
+
+.S.o:
+ $(CCASCOMPILE) -c $<
+
+.S.obj:
+ $(CCASCOMPILE) -c `$(CYGPATH_W) '$<'`
+
+.S.lo:
+ $(LTCCASCOMPILE) -c -o $@ $<
.c.o:
@am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \
@@ -519,174 +540,6 @@ distclean-compile:
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
-libpixman_1_la-pixman-access.lo: pixman-access.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-access.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-access.Tpo" -c -o libpixman_1_la-pixman-access.lo `test -f 'pixman-access.c' || echo '$(srcdir)/'`pixman-access.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-access.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-access.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-access.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-access.c' object='libpixman_1_la-pixman-access.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-access.lo `test -f 'pixman-access.c' || echo '$(srcdir)/'`pixman-access.c
-
-libpixman_1_la-pixman-access-accessors.lo: pixman-access-accessors.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-access-accessors.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-access-accessors.Tpo" -c -o libpixman_1_la-pixman-access-accessors.lo `test -f 'pixman-access-accessors.c' || echo '$(srcdir)/'`pixman-access-accessors.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-access-accessors.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-access-accessors.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-access-accessors.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-access-accessors.c' object='libpixman_1_la-pixman-access-accessors.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-access-accessors.lo `test -f 'pixman-access-accessors.c' || echo '$(srcdir)/'`pixman-access-accessors.c
-
-libpixman_1_la-pixman-cpu.lo: pixman-cpu.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-cpu.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-cpu.Tpo" -c -o libpixman_1_la-pixman-cpu.lo `test -f 'pixman-cpu.c' || echo '$(srcdir)/'`pixman-cpu.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-cpu.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-cpu.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-cpu.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-cpu.c' object='libpixman_1_la-pixman-cpu.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-cpu.lo `test -f 'pixman-cpu.c' || echo '$(srcdir)/'`pixman-cpu.c
-
-libpixman_1_la-pixman-gradient-walker.lo: pixman-gradient-walker.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-gradient-walker.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-gradient-walker.Tpo" -c -o libpixman_1_la-pixman-gradient-walker.lo `test -f 'pixman-gradient-walker.c' || echo '$(srcdir)/'`pixman-gradient-walker.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-gradient-walker.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-gradient-walker.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-gradient-walker.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-gradient-walker.c' object='libpixman_1_la-pixman-gradient-walker.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-gradient-walker.lo `test -f 'pixman-gradient-walker.c' || echo '$(srcdir)/'`pixman-gradient-walker.c
-
-libpixman_1_la-pixman-region16.lo: pixman-region16.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-region16.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-region16.Tpo" -c -o libpixman_1_la-pixman-region16.lo `test -f 'pixman-region16.c' || echo '$(srcdir)/'`pixman-region16.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-region16.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-region16.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-region16.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-region16.c' object='libpixman_1_la-pixman-region16.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-region16.lo `test -f 'pixman-region16.c' || echo '$(srcdir)/'`pixman-region16.c
-
-libpixman_1_la-pixman-region32.lo: pixman-region32.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-region32.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-region32.Tpo" -c -o libpixman_1_la-pixman-region32.lo `test -f 'pixman-region32.c' || echo '$(srcdir)/'`pixman-region32.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-region32.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-region32.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-region32.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-region32.c' object='libpixman_1_la-pixman-region32.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-region32.lo `test -f 'pixman-region32.c' || echo '$(srcdir)/'`pixman-region32.c
-
-libpixman_1_la-pixman-image.lo: pixman-image.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-image.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-image.Tpo" -c -o libpixman_1_la-pixman-image.lo `test -f 'pixman-image.c' || echo '$(srcdir)/'`pixman-image.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-image.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-image.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-image.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-image.c' object='libpixman_1_la-pixman-image.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-image.lo `test -f 'pixman-image.c' || echo '$(srcdir)/'`pixman-image.c
-
-libpixman_1_la-pixman-implementation.lo: pixman-implementation.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-implementation.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-implementation.Tpo" -c -o libpixman_1_la-pixman-implementation.lo `test -f 'pixman-implementation.c' || echo '$(srcdir)/'`pixman-implementation.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-implementation.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-implementation.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-implementation.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-implementation.c' object='libpixman_1_la-pixman-implementation.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-implementation.lo `test -f 'pixman-implementation.c' || echo '$(srcdir)/'`pixman-implementation.c
-
-libpixman_1_la-pixman-combine32.lo: pixman-combine32.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-combine32.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-combine32.Tpo" -c -o libpixman_1_la-pixman-combine32.lo `test -f 'pixman-combine32.c' || echo '$(srcdir)/'`pixman-combine32.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-combine32.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-combine32.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-combine32.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-combine32.c' object='libpixman_1_la-pixman-combine32.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-combine32.lo `test -f 'pixman-combine32.c' || echo '$(srcdir)/'`pixman-combine32.c
-
-libpixman_1_la-pixman-combine64.lo: pixman-combine64.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-combine64.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-combine64.Tpo" -c -o libpixman_1_la-pixman-combine64.lo `test -f 'pixman-combine64.c' || echo '$(srcdir)/'`pixman-combine64.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-combine64.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-combine64.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-combine64.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-combine64.c' object='libpixman_1_la-pixman-combine64.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-combine64.lo `test -f 'pixman-combine64.c' || echo '$(srcdir)/'`pixman-combine64.c
-
-libpixman_1_la-pixman-general.lo: pixman-general.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-general.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-general.Tpo" -c -o libpixman_1_la-pixman-general.lo `test -f 'pixman-general.c' || echo '$(srcdir)/'`pixman-general.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-general.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-general.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-general.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-general.c' object='libpixman_1_la-pixman-general.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-general.lo `test -f 'pixman-general.c' || echo '$(srcdir)/'`pixman-general.c
-
-libpixman_1_la-pixman.lo: pixman.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman.Tpo" -c -o libpixman_1_la-pixman.lo `test -f 'pixman.c' || echo '$(srcdir)/'`pixman.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman.Tpo" "$(DEPDIR)/libpixman_1_la-pixman.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman.c' object='libpixman_1_la-pixman.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman.lo `test -f 'pixman.c' || echo '$(srcdir)/'`pixman.c
-
-libpixman_1_la-pixman-fast-path.lo: pixman-fast-path.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-fast-path.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-fast-path.Tpo" -c -o libpixman_1_la-pixman-fast-path.lo `test -f 'pixman-fast-path.c' || echo '$(srcdir)/'`pixman-fast-path.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-fast-path.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-fast-path.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-fast-path.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-fast-path.c' object='libpixman_1_la-pixman-fast-path.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-fast-path.lo `test -f 'pixman-fast-path.c' || echo '$(srcdir)/'`pixman-fast-path.c
-
-libpixman_1_la-pixman-solid-fill.lo: pixman-solid-fill.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-solid-fill.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-solid-fill.Tpo" -c -o libpixman_1_la-pixman-solid-fill.lo `test -f 'pixman-solid-fill.c' || echo '$(srcdir)/'`pixman-solid-fill.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-solid-fill.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-solid-fill.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-solid-fill.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-solid-fill.c' object='libpixman_1_la-pixman-solid-fill.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-solid-fill.lo `test -f 'pixman-solid-fill.c' || echo '$(srcdir)/'`pixman-solid-fill.c
-
-libpixman_1_la-pixman-conical-gradient.lo: pixman-conical-gradient.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-conical-gradient.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-conical-gradient.Tpo" -c -o libpixman_1_la-pixman-conical-gradient.lo `test -f 'pixman-conical-gradient.c' || echo '$(srcdir)/'`pixman-conical-gradient.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-conical-gradient.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-conical-gradient.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-conical-gradient.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-conical-gradient.c' object='libpixman_1_la-pixman-conical-gradient.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-conical-gradient.lo `test -f 'pixman-conical-gradient.c' || echo '$(srcdir)/'`pixman-conical-gradient.c
-
-libpixman_1_la-pixman-linear-gradient.lo: pixman-linear-gradient.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-linear-gradient.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-linear-gradient.Tpo" -c -o libpixman_1_la-pixman-linear-gradient.lo `test -f 'pixman-linear-gradient.c' || echo '$(srcdir)/'`pixman-linear-gradient.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-linear-gradient.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-linear-gradient.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-linear-gradient.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-linear-gradient.c' object='libpixman_1_la-pixman-linear-gradient.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-linear-gradient.lo `test -f 'pixman-linear-gradient.c' || echo '$(srcdir)/'`pixman-linear-gradient.c
-
-libpixman_1_la-pixman-radial-gradient.lo: pixman-radial-gradient.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-radial-gradient.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-radial-gradient.Tpo" -c -o libpixman_1_la-pixman-radial-gradient.lo `test -f 'pixman-radial-gradient.c' || echo '$(srcdir)/'`pixman-radial-gradient.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-radial-gradient.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-radial-gradient.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-radial-gradient.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-radial-gradient.c' object='libpixman_1_la-pixman-radial-gradient.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-radial-gradient.lo `test -f 'pixman-radial-gradient.c' || echo '$(srcdir)/'`pixman-radial-gradient.c
-
-libpixman_1_la-pixman-bits-image.lo: pixman-bits-image.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-bits-image.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-bits-image.Tpo" -c -o libpixman_1_la-pixman-bits-image.lo `test -f 'pixman-bits-image.c' || echo '$(srcdir)/'`pixman-bits-image.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-bits-image.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-bits-image.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-bits-image.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-bits-image.c' object='libpixman_1_la-pixman-bits-image.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-bits-image.lo `test -f 'pixman-bits-image.c' || echo '$(srcdir)/'`pixman-bits-image.c
-
-libpixman_1_la-pixman-utils.lo: pixman-utils.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-utils.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-utils.Tpo" -c -o libpixman_1_la-pixman-utils.lo `test -f 'pixman-utils.c' || echo '$(srcdir)/'`pixman-utils.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-utils.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-utils.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-utils.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-utils.c' object='libpixman_1_la-pixman-utils.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-utils.lo `test -f 'pixman-utils.c' || echo '$(srcdir)/'`pixman-utils.c
-
-libpixman_1_la-pixman-edge.lo: pixman-edge.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-edge.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-edge.Tpo" -c -o libpixman_1_la-pixman-edge.lo `test -f 'pixman-edge.c' || echo '$(srcdir)/'`pixman-edge.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-edge.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-edge.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-edge.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-edge.c' object='libpixman_1_la-pixman-edge.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-edge.lo `test -f 'pixman-edge.c' || echo '$(srcdir)/'`pixman-edge.c
-
-libpixman_1_la-pixman-edge-accessors.lo: pixman-edge-accessors.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-edge-accessors.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-edge-accessors.Tpo" -c -o libpixman_1_la-pixman-edge-accessors.lo `test -f 'pixman-edge-accessors.c' || echo '$(srcdir)/'`pixman-edge-accessors.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-edge-accessors.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-edge-accessors.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-edge-accessors.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-edge-accessors.c' object='libpixman_1_la-pixman-edge-accessors.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-edge-accessors.lo `test -f 'pixman-edge-accessors.c' || echo '$(srcdir)/'`pixman-edge-accessors.c
-
-libpixman_1_la-pixman-trap.lo: pixman-trap.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-trap.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-trap.Tpo" -c -o libpixman_1_la-pixman-trap.lo `test -f 'pixman-trap.c' || echo '$(srcdir)/'`pixman-trap.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-trap.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-trap.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-trap.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-trap.c' object='libpixman_1_la-pixman-trap.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-trap.lo `test -f 'pixman-trap.c' || echo '$(srcdir)/'`pixman-trap.c
-
-libpixman_1_la-pixman-timer.lo: pixman-timer.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-timer.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-timer.Tpo" -c -o libpixman_1_la-pixman-timer.lo `test -f 'pixman-timer.c' || echo '$(srcdir)/'`pixman-timer.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-timer.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-timer.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-timer.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-timer.c' object='libpixman_1_la-pixman-timer.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-timer.lo `test -f 'pixman-timer.c' || echo '$(srcdir)/'`pixman-timer.c
-
-libpixman_1_la-pixman-matrix.lo: pixman-matrix.c
-@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -MT libpixman_1_la-pixman-matrix.lo -MD -MP -MF "$(DEPDIR)/libpixman_1_la-pixman-matrix.Tpo" -c -o libpixman_1_la-pixman-matrix.lo `test -f 'pixman-matrix.c' || echo '$(srcdir)/'`pixman-matrix.c; \
-@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_1_la-pixman-matrix.Tpo" "$(DEPDIR)/libpixman_1_la-pixman-matrix.Plo"; else rm -f "$(DEPDIR)/libpixman_1_la-pixman-matrix.Tpo"; exit 1; fi
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pixman-matrix.c' object='libpixman_1_la-pixman-matrix.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_1_la_CFLAGS) $(CFLAGS) -c -o libpixman_1_la-pixman-matrix.lo `test -f 'pixman-matrix.c' || echo '$(srcdir)/'`pixman-matrix.c
-
libpixman_arm_neon_la-pixman-arm-neon.lo: pixman-arm-neon.c
@am__fastdepCC_TRUE@ if $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpixman_arm_neon_la_CFLAGS) $(CFLAGS) -MT libpixman_arm_neon_la-pixman-arm-neon.lo -MD -MP -MF "$(DEPDIR)/libpixman_arm_neon_la-pixman-arm-neon.Tpo" -c -o libpixman_arm_neon_la-pixman-arm-neon.lo `test -f 'pixman-arm-neon.c' || echo '$(srcdir)/'`pixman-arm-neon.c; \
@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/libpixman_arm_neon_la-pixman-arm-neon.Tpo" "$(DEPDIR)/libpixman_arm_neon_la-pixman-arm-neon.Plo"; else rm -f "$(DEPDIR)/libpixman_arm_neon_la-pixman-arm-neon.Tpo"; exit 1; fi
@@ -935,6 +788,9 @@ pixman-combine64.c : pixman-combine.c.template pixman-combine64.h make-combine.p
$(PERL) $(srcdir)/make-combine.pl 16 < $(srcdir)/pixman-combine.c.template > $@ || ($(RM) $@; exit 1)
pixman-combine64.h : pixman-combine.h.template make-combine.pl
$(PERL) $(srcdir)/make-combine.pl 16 < $(srcdir)/pixman-combine.h.template > $@ || ($(RM) $@; exit 1)
+
+.c.s : $(libpixmaninclude_HEADERS) $(BUILT_SOURCES)
+ $(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $<
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
diff --git a/lib/pixman/pixman/pixman-access.c b/lib/pixman/pixman/pixman-access.c
index d9fd38c15..b65ef661d 100644
--- a/lib/pixman/pixman/pixman-access.c
+++ b/lib/pixman/pixman/pixman-access.c
@@ -180,11 +180,11 @@ fetch_scanline_b8g8r8a8 (pixman_image_t *image,
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *pixel = (uint32_t *)bits + x;
const uint32_t *end = pixel + width;
-
+
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
-
+
*buffer++ = (((p & 0xff000000) >> 24) |
((p & 0x00ff0000) >> 8) |
((p & 0x0000ff00) << 8) |
@@ -731,23 +731,27 @@ fetch_scanline_b2g3r3 (pixman_image_t *image,
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint8_t *pixel = (const uint8_t *)bits + x;
const uint8_t *end = pixel + width;
-
+
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
uint32_t r, g, b;
-
- b = (((p & 0xc0) ) |
- ((p & 0xc0) >> 2) |
- ((p & 0xc0) >> 4) |
- ((p & 0xc0) >> 6));
-
- g = ((p & 0x38) | ((p & 0x38) >> 3) | ((p & 0x30) << 2)) << 8;
-
- r = (((p & 0x07) ) |
- ((p & 0x07) << 3) |
- ((p & 0x06) << 6)) << 16;
-
+
+ b = p & 0xc0;
+ b |= b >> 2;
+ b |= b >> 4;
+ b &= 0xff;
+
+ g = (p & 0x38) << 10;
+ g |= g >> 3;
+ g |= g >> 6;
+ g &= 0xff00;
+
+ r = (p & 0x7) << 21;
+ r |= r >> 3;
+ r |= r >> 6;
+ r &= 0xff0000;
+
*buffer++ = 0xff000000 | r | g | b;
}
}
@@ -798,7 +802,7 @@ fetch_scanline_a2b2g2r2 (pixman_image_t *image,
uint32_t a, r, g, b;
a = ((p & 0xc0) * 0x55) << 18;
- b = ((p & 0x30) * 0x55) >> 6;
+ b = ((p & 0x30) * 0x55) >> 4;
g = ((p & 0x0c) * 0x55) << 6;
r = ((p & 0x03) * 0x55) << 16;
@@ -840,20 +844,22 @@ fetch_scanline_x4a4 (pixman_image_t *image,
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint8_t *pixel = (const uint8_t *)bits + x;
const uint8_t *end = pixel + width;
-
+
while (pixel < end)
{
uint8_t p = READ (image, pixel++) & 0xf;
-
+
*buffer++ = (p | (p << 4)) << 24;
}
}
-#define FETCH_8(img,l,o) (READ (img, (uint8_t *)(l) + ((o) >> 2)))
+#define FETCH_8(img,l,o) (READ (img, (((uint8_t *)(l)) + ((o) >> 3))))
#ifdef WORDS_BIGENDIAN
-#define FETCH_4(img,l,o) ((o) & 2 ? FETCH_8 (img,l,o) & 0xf : FETCH_8 (img,l,o) >> 4)
+#define FETCH_4(img,l,o) \
+ (((4 * (o)) & 4) ? (FETCH_8 (img,l, 4 * (o)) & 0xf) : (FETCH_8 (img,l,(4 * (o))) >> 4))
#else
-#define FETCH_4(img,l,o) ((o) & 2 ? FETCH_8 (img,l,o) >> 4 : FETCH_8 (img,l,o) & 0xf)
+#define FETCH_4(img,l,o) \
+ (((4 * (o)) & 4) ? (FETCH_8 (img, l, 4 * (o)) >> 4) : (FETCH_8 (img, l, (4 * (o))) & 0xf))
#endif
static void
@@ -867,13 +873,13 @@ fetch_scanline_a4 (pixman_image_t *image,
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
int i;
-
+
for (i = 0; i < width; ++i)
{
uint32_t p = FETCH_4 (image, bits, i + x);
-
+
p |= p << 4;
-
+
*buffer++ = p << 24;
}
}
@@ -923,7 +929,7 @@ fetch_scanline_b1g2r1 (pixman_image_t *image,
b = ((p & 0x8) * 0xff) >> 3;
g = ((p & 0x6) * 0x55) << 7;
r = ((p & 0x1) * 0xff) << 16;
-
+
*buffer++ = 0xff000000 | r | g | b;
}
}
@@ -940,16 +946,16 @@ fetch_scanline_a1r1g1b1 (pixman_image_t *image,
uint32_t a, r, g, b;
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
int i;
-
+
for (i = 0; i < width; ++i)
{
uint32_t p = FETCH_4 (image, bits, i + x);
-
+
a = ((p & 0x8) * 0xff) << 21;
r = ((p & 0x4) * 0xff) << 14;
g = ((p & 0x2) * 0xff) << 7;
b = ((p & 0x1) * 0xff);
-
+
*buffer++ = a | r | g | b;
}
}
@@ -965,17 +971,17 @@ fetch_scanline_a1b1g1r1 (pixman_image_t *image,
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
int i;
-
+
for (i = 0; i < width; ++i)
{
uint32_t p = FETCH_4 (image, bits, i + x);
uint32_t a, r, g, b;
-
+
a = ((p & 0x8) * 0xff) << 21;
- r = ((p & 0x4) * 0xff) >> 3;
+ b = ((p & 0x4) * 0xff) >> 2;
g = ((p & 0x2) * 0xff) << 7;
- b = ((p & 0x1) * 0xff) << 16;
-
+ r = ((p & 0x1) * 0xff) << 16;
+
*buffer++ = a | r | g | b;
}
}
@@ -1546,23 +1552,25 @@ fetch_pixel_b2g3r3 (bits_image_t *image,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
- uint32_t pixel = READ (image, (uint8_t *) bits + offset);
+ uint32_t p = READ (image, (uint8_t *) bits + offset);
uint32_t r, g, b;
-
- b = ((pixel & 0xc0) |
- ((pixel & 0xc0) >> 2) |
- ((pixel & 0xc0) >> 4) |
- ((pixel & 0xc0) >> 6));
-
- g = ((pixel & 0x38) |
- ((pixel & 0x38) >> 3) |
- ((pixel & 0x30) << 2)) << 8;
-
- r = ((pixel & 0x07) |
- ((pixel & 0x07) << 3) |
- ((pixel & 0x06) << 6)) << 16;
-
- return (0xff000000 | r | g | b);
+
+ b = p & 0xc0;
+ b |= b >> 2;
+ b |= b >> 4;
+ b &= 0xff;
+
+ g = (p & 0x38) << 10;
+ g |= g >> 3;
+ g |= g >> 6;
+ g &= 0xff00;
+
+ r = (p & 0x7) << 21;
+ r |= r >> 3;
+ r |= r >> 6;
+ r &= 0xff0000;
+
+ return 0xff000000 | r | g | b;
}
static uint32_t
@@ -1592,7 +1600,7 @@ fetch_pixel_a2b2g2r2 (bits_image_t *image,
uint32_t a, r, g, b;
a = ((pixel & 0xc0) * 0x55) << 18;
- b = ((pixel & 0x30) * 0x55) >> 6;
+ b = ((pixel & 0x30) * 0x55) >> 4;
g = ((pixel & 0x0c) * 0x55) << 6;
r = ((pixel & 0x03) * 0x55) << 16;
@@ -1674,12 +1682,12 @@ fetch_pixel_a1r1g1b1 (bits_image_t *image,
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = FETCH_4 (image, bits, offset);
uint32_t a, r, g, b;
-
+
a = ((pixel & 0x8) * 0xff) << 21;
r = ((pixel & 0x4) * 0xff) << 14;
g = ((pixel & 0x2) * 0xff) << 7;
b = ((pixel & 0x1) * 0xff);
-
+
return a | r | g | b;
}
@@ -1691,12 +1699,12 @@ fetch_pixel_a1b1g1r1 (bits_image_t *image,
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = FETCH_4 (image, bits, offset);
uint32_t a, r, g, b;
-
+
a = ((pixel & 0x8) * 0xff) << 21;
- r = ((pixel & 0x4) * 0xff) >> 3;
+ b = ((pixel & 0x4) * 0xff) >> 2;
g = ((pixel & 0x2) * 0xff) << 7;
- b = ((pixel & 0x1) * 0xff) << 16;
-
+ r = ((pixel & 0x1) * 0xff) << 16;
+
return a | r | g | b;
}
@@ -1708,7 +1716,7 @@ fetch_pixel_c4 (bits_image_t *image,
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = FETCH_4 (image, bits, offset);
const pixman_indexed_t * indexed = image->indexed;
-
+
return indexed->rgba[pixel];
}
@@ -2425,22 +2433,38 @@ store_scanline_x4a4 (bits_image_t * image,
uint32_t *bits = image->bits + image->rowstride * y;
uint8_t *pixel = ((uint8_t *) bits) + x;
int i;
-
+
for (i = 0; i < width; ++i)
WRITE (image, pixel++, values[i] >> 28);
}
#define STORE_8(img,l,o,v) (WRITE (img, (uint8_t *)(l) + ((o) >> 3), (v)))
#ifdef WORDS_BIGENDIAN
-#define STORE_4(img,l,o,v) \
- STORE_8 (img,l,o,((o) & 4 ? \
- (FETCH_8 (img,l,o) & 0xf0) | (v) : \
- (FETCH_8 (img,l,o) & 0x0f) | ((v) << 4)))
+
+#define STORE_4(img,l,o,v) \
+ do \
+ { \
+ int bo = 4 * (o); \
+ int v4 = (v) & 0x0f; \
+ \
+ STORE_8 (img, l, bo, ( \
+ bo & 4 ? \
+ (FETCH_8 (img, l, bo) & 0xf0) | (v4) : \
+ (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4))); \
+ } while (0)
#else
-#define STORE_4(img,l,o,v) \
- STORE_8 (img,l,o,((o) & 4 ? \
- (FETCH_8 (img,l,o) & 0x0f) | ((v) << 4) : \
- (FETCH_8 (img,l,o) & 0xf0) | (v)))
+
+#define STORE_4(img,l,o,v) \
+ do \
+ { \
+ int bo = 4 * (o); \
+ int v4 = (v) & 0x0f; \
+ \
+ STORE_8 (img, l, bo, ( \
+ bo & 4 ? \
+ (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4) : \
+ (FETCH_8 (img, l, bo) & 0xf0) | (v4))); \
+ } while (0)
#endif
static void
@@ -2452,7 +2476,7 @@ store_scanline_a4 (bits_image_t * image,
{
uint32_t *bits = image->bits + image->rowstride * y;
int i;
-
+
for (i = 0; i < width; ++i)
STORE_4 (image, bits, i + x, values[i] >> 28);
}
@@ -2466,11 +2490,11 @@ store_scanline_r1g2b1 (bits_image_t * image,
{
uint32_t *bits = image->bits + image->rowstride * y;
int i;
-
+
for (i = 0; i < width; ++i)
{
uint32_t pixel;
-
+
SPLIT (values[i]);
pixel = (((r >> 4) & 0x8) |
((g >> 5) & 0x6) |
@@ -2488,11 +2512,11 @@ store_scanline_b1g2r1 (bits_image_t * image,
{
uint32_t *bits = image->bits + image->rowstride * y;
int i;
-
+
for (i = 0; i < width; ++i)
{
uint32_t pixel;
-
+
SPLIT (values[i]);
pixel = (((b >> 4) & 0x8) |
((g >> 5) & 0x6) |
@@ -2510,16 +2534,17 @@ store_scanline_a1r1g1b1 (bits_image_t * image,
{
uint32_t *bits = image->bits + image->rowstride * y;
int i;
-
+
for (i = 0; i < width; ++i)
{
uint32_t pixel;
-
+
SPLIT_A (values[i]);
pixel = (((a >> 4) & 0x8) |
((r >> 5) & 0x4) |
((g >> 6) & 0x2) |
((b >> 7) ));
+
STORE_4 (image, bits, i + x, pixel);
}
}
@@ -2533,16 +2558,17 @@ store_scanline_a1b1g1r1 (bits_image_t * image,
{
uint32_t *bits = image->bits + image->rowstride * y;
int i;
-
+
for (i = 0; i < width; ++i)
{
uint32_t pixel;
-
+
SPLIT_A (values[i]);
pixel = (((a >> 4) & 0x8) |
((b >> 5) & 0x4) |
((g >> 6) & 0x2) |
((r >> 7) ));
+
STORE_4 (image, bits, i + x, pixel);
}
}
@@ -2614,7 +2640,7 @@ store_scanline_g1 (bits_image_t * image,
#else
mask = 1 << ((i + x) & 0x1f);
#endif
- v = RGB24_TO_ENTRY_Y (indexed, values[i]) ? mask : 0;
+ v = RGB24_TO_ENTRY_Y (indexed, values[i]) & 0x1 ? mask : 0;
WRITE (image, pixel, (READ (image, pixel) & ~mask) | v);
}
diff --git a/lib/pixman/pixman/pixman-arm-common.h b/lib/pixman/pixman/pixman-arm-common.h
new file mode 100644
index 000000000..58ee4e1c4
--- /dev/null
+++ b/lib/pixman/pixman/pixman-arm-common.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright © 2010 Nokia Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
+ */
+
+#ifndef PIXMAN_ARM_COMMON_H
+#define PIXMAN_ARM_COMMON_H
+
+/* Define some macros which can expand into proxy functions between
+ * ARM assembly optimized functions and the rest of pixman fast path API.
+ *
+ * All the low level ARM assembly functions have to use ARM EABI
+ * calling convention and take up to 8 arguments:
+ * width, height, dst, dst_stride, src, src_stride, mask, mask_stride
+ *
+ * The arguments are ordered with the most important coming first (the
+ * first 4 arguments are passed to function in registers, the rest are
+ * on stack). The last arguments are optional, for example if the
+ * function is not using mask, then 'mask' and 'mask_stride' can be
+ * omitted when doing a function call.
+ *
+ * Arguments 'src' and 'mask' contain either a pointer to the top left
+ * pixel of the composited rectangle or a pixel color value depending
+ * on the function type. In the case of just a color value (solid source
+ * or mask), the corresponding stride argument is unused.
+ */
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name, \
+ src_type, src_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ src_type *src, \
+ int32_t src_stride); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ src_type *src_line; \
+ int32_t dst_stride, src_stride; \
+ \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
+ src_stride, src_line, src_cnt); \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src_line, src_stride); \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(cputype, name, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ uint32_t src); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ int32_t dst_stride; \
+ uint32_t src; \
+ \
+ src = _pixman_image_get_solid (src_image, dst_image->bits.format); \
+ \
+ if (src == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src); \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(cputype, name, \
+ mask_type, mask_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ uint32_t src, \
+ int32_t unused, \
+ mask_type *mask, \
+ int32_t mask_stride); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ mask_type *mask_line; \
+ int32_t dst_stride, mask_stride; \
+ uint32_t src; \
+ \
+ src = _pixman_image_get_solid (src_image, dst_image->bits.format); \
+ \
+ if (src == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
+ mask_stride, mask_line, mask_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src, 0, \
+ mask_line, mask_stride); \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(cputype, name, \
+ src_type, src_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ src_type *src, \
+ int32_t src_stride, \
+ uint32_t mask); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ src_type *src_line; \
+ int32_t dst_stride, src_stride; \
+ uint32_t mask; \
+ \
+ mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\
+ \
+ if (mask == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
+ src_stride, src_line, src_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src_line, src_stride, \
+ mask); \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name, \
+ src_type, src_cnt, \
+ mask_type, mask_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ src_type *src, \
+ int32_t src_stride, \
+ mask_type *mask, \
+ int32_t mask_stride); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ src_type *src_line; \
+ mask_type *mask_line; \
+ int32_t dst_stride, src_stride, mask_stride; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
+ src_stride, src_line, src_cnt); \
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
+ mask_stride, mask_line, mask_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src_line, src_stride, \
+ mask_line, mask_stride); \
+}
+
+#endif
diff --git a/lib/pixman/pixman/pixman-arm-neon-asm.S b/lib/pixman/pixman/pixman-arm-neon-asm.S
new file mode 100644
index 000000000..9ee3ab308
--- /dev/null
+++ b/lib/pixman/pixman/pixman-arm-neon-asm.S
@@ -0,0 +1,1713 @@
+/*
+ * Copyright © 2009 Nokia Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
+ */
+
+/*
+ * This file contains implementations of NEON optimized pixel processing
+ * functions. There is no full and detailed tutorial, but some functions
+ * (those which are exposing some new or interesting features) are
+ * extensively commented and can be used as examples.
+ *
+ * You may want to have a look at the comments for following functions:
+ * - pixman_composite_over_8888_0565_asm_neon
+ * - pixman_composite_over_n_8_0565_asm_neon
+ */
+
+/* Prevent the stack from becoming executable for no reason... */
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+ .text
+ .fpu neon
+ .arch armv7a
+ .object_arch armv4
+ .eabi_attribute 10, 0 /* suppress Tag_FP_arch */
+ .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */
+ .arm
+ .altmacro
+
+#include "pixman-arm-neon-asm.h"
+
+/* Global configuration options and preferences */
+
+/*
+ * The code can optionally make use of unaligned memory accesses to improve
+ * performance of handling leading/trailing pixels for each scanline.
+ * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for
+ * example in linux if unaligned memory accesses are not configured to
+ * generate.exceptions.
+ */
+.set RESPECT_STRICT_ALIGNMENT, 1
+
+/*
+ * Set default prefetch type. There is a choice between the following options:
+ *
+ * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work
+ * as NOP to workaround some HW bugs or for whatever other reason)
+ *
+ * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where
+ * advanced prefetch intruduces heavy overhead)
+ *
+ * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8
+ * which can run ARM and NEON instructions simultaneously so that extra ARM
+ * instructions do not add (many) extra cycles, but improve prefetch efficiency)
+ *
+ * Note: some types of function can't support advanced prefetch and fallback
+ * to simple one (those which handle 24bpp pixels)
+ */
+.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED
+
+/* Prefetch distance in pixels for simple prefetch */
+.set PREFETCH_DISTANCE_SIMPLE, 64
+
+/*
+ * Implementation of pixman_composite_over_8888_0565_asm_neon
+ *
+ * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and
+ * performs OVER compositing operation. Function fast_composite_over_8888_0565
+ * from pixman-fast-path.c does the same in C and can be used as a reference.
+ *
+ * First we need to have some NEON assembly code which can do the actual
+ * operation on the pixels and provide it to the template macro.
+ *
+ * Template macro quite conveniently takes care of emitting all the necessary
+ * code for memory reading and writing (including quite tricky cases of
+ * handling unaligned leading/trailing pixels), so we only need to deal with
+ * the data in NEON registers.
+ *
+ * NEON registers allocation in general is recommented to be the following:
+ * d0, d1, d2, d3 - contain loaded source pixel data
+ * d4, d5, d6, d7 - contain loaded destination pixels (if they are needed)
+ * d24, d25, d26, d27 - contain loading mask pixel data (if mask is used)
+ * d28, d29, d30, d31 - place for storing the result (destination pixels)
+ *
+ * As can be seen above, four 64-bit NEON registers are used for keeping
+ * intermediate pixel data and up to 8 pixels can be processed in one step
+ * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp).
+ *
+ * This particular function uses the following registers allocation:
+ * d0, d1, d2, d3 - contain loaded source pixel data
+ * d4, d5 - contain loaded destination pixels (they are needed)
+ * d28, d29 - place for storing the result (destination pixels)
+ */
+
+/*
+ * Step one. We need to have some code to do some arithmetics on pixel data.
+ * This is implemented as a pair of macros: '*_head' and '*_tail'. When used
+ * back-to-back, they take pixel data from {d0, d1, d2, d3} and {d4, d5},
+ * perform all the needed calculations and write the result to {d28, d29}.
+ * The rationale for having two macros and not just one will be explained
+ * later. In practice, any single monolitic function which does the work can
+ * be split into two parts in any arbitrary way without affecting correctness.
+ *
+ * There is one special trick here too. Common template macro can optionally
+ * make our life a bit easier by doing R, G, B, A color components
+ * deinterleaving for 32bpp pixel formats (and this feature is used in
+ * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that
+ * instead of having 8 packed pixels in {d0, d1, d2, d3} registers, we
+ * actually use d0 register for blue channel (a vector of eight 8-bit
+ * values), d1 register for green, d2 for red and d3 for alpha. This
+ * simple conversion can be also done with a few NEON instructions:
+ *
+ * Packed to planar conversion:
+ * vuzp.8 d0, d1
+ * vuzp.8 d2, d3
+ * vuzp.8 d1, d3
+ * vuzp.8 d0, d2
+ *
+ * Planar to packed conversion:
+ * vzip.8 d0, d2
+ * vzip.8 d1, d3
+ * vzip.8 d2, d3
+ * vzip.8 d0, d1
+ *
+ * But pixel can be loaded directly in planar format using VLD4.8 NEON
+ * instruction. It is 1 cycle slower than VLD1.32, so this is not always
+ * desirable, that's why deinterleaving is optional.
+ *
+ * But anyway, here is the code:
+ */
+.macro pixman_composite_over_8888_0565_process_pixblock_head
+ /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
+ and put data into d6 - red, d7 - green, d30 - blue */
+ vshrn.u16 d6, q2, #8
+ vshrn.u16 d7, q2, #3
+ vsli.u16 q2, q2, #5
+ vsri.u8 d6, d6, #5
+ vmvn.8 d3, d3 /* invert source alpha */
+ vsri.u8 d7, d7, #6
+ vshrn.u16 d30, q2, #2
+ /* now do alpha blending, storing results in 8-bit planar format
+ into d16 - red, d19 - green, d18 - blue */
+ vmull.u8 q10, d3, d6
+ vmull.u8 q11, d3, d7
+ vmull.u8 q12, d3, d30
+ vrshr.u16 q13, q10, #8
+ vrshr.u16 q3, q11, #8
+ vrshr.u16 q15, q12, #8
+ vraddhn.u16 d20, q10, q13
+ vraddhn.u16 d23, q11, q3
+ vraddhn.u16 d22, q12, q15
+.endm
+
+.macro pixman_composite_over_8888_0565_process_pixblock_tail
+ /* ... continue alpha blending */
+ vqadd.u8 d16, d2, d20
+ vqadd.u8 q9, q0, q11
+ /* convert the result to r5g6b5 and store it into {d28, d29} */
+ vshll.u8 q14, d16, #8
+ vshll.u8 q8, d19, #8
+ vshll.u8 q9, d18, #8
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+.endm
+
+/*
+ * OK, now we got almost everything that we need. Using the above two
+ * macros, the work can be done right. But now we want to optimize
+ * it a bit. ARM Cortex-A8 is an in-order core, and benefits really
+ * a lot from good code scheduling and software pipelining.
+ *
+ * Let's construct some code, which will run in the core main loop.
+ * Some pseudo-code of the main loop will look like this:
+ * head
+ * while (...) {
+ * tail
+ * head
+ * }
+ * tail
+ *
+ * It may look a bit weird, but this setup allows to hide instruction
+ * latencies better and also utilize dual-issue capability more
+ * efficiently (make pairs of load-store and ALU instructions).
+ *
+ * So what we need now is a '*_tail_head' macro, which will be used
+ * in the core main loop. A trivial straightforward implementation
+ * of this macro would look like this:
+ *
+ * pixman_composite_over_8888_0565_process_pixblock_tail
+ * vst1.16 {d28, d29}, [DST_W, :128]!
+ * vld1.16 {d4, d5}, [DST_R, :128]!
+ * vld4.32 {d0, d1, d2, d3}, [SRC]!
+ * pixman_composite_over_8888_0565_process_pixblock_head
+ * cache_preload 8, 8
+ *
+ * Now it also got some VLD/VST instructions. We simply can't move from
+ * processing one block of pixels to the other one with just arithmetics.
+ * The previously processed data needs to be written to memory and new
+ * data needs to be fetched. Fortunately, this main loop does not deal
+ * with partial leading/trailing pixels and can load/store a full block
+ * of pixels in a bulk. Additionally, destination buffer is already
+ * 16 bytes aligned here (which is good for performance).
+ *
+ * New things here are DST_R, DST_W, SRC and MASK identifiers. These
+ * are the aliases for ARM registers which are used as pointers for
+ * accessing data. We maintain separate pointers for reading and writing
+ * destination buffer (DST_R and DST_W).
+ *
+ * Another new thing is 'cache_preload' macro. It is used for prefetching
+ * data into CPU L2 cache and improve performance when dealing with large
+ * images which are far larger than cache size. It uses one argument
+ * (actually two, but they need to be the same here) - number of pixels
+ * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some
+ * details about this macro. Moreover, if good performance is needed
+ * the code from this macro needs to be copied into '*_tail_head' macro
+ * and mixed with the rest of code for optimal instructions scheduling.
+ * We are actually doing it below.
+ *
+ * Now after all the explanations, here is the optimized code.
+ * Different instruction streams (originaling from '*_head', '*_tail'
+ * and 'cache_preload' macro) use different indentation levels for
+ * better readability. Actually taking the code from one of these
+ * indentation levels and ignoring a few VLD/VST instructions would
+ * result in exactly the code from '*_head', '*_tail' or 'cache_preload'
+ * macro!
+ */
+
+#if 1
+
+.macro pixman_composite_over_8888_0565_process_pixblock_tail_head
+ vqadd.u8 d16, d2, d20
+ vld1.16 {d4, d5}, [DST_R, :128]!
+ vqadd.u8 q9, q0, q11
+ vshrn.u16 d6, q2, #8
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ vshrn.u16 d7, q2, #3
+ vsli.u16 q2, q2, #5
+ vshll.u8 q14, d16, #8
+ PF add PF_X, PF_X, #8
+ vshll.u8 q8, d19, #8
+ PF tst PF_CTL, #0xF
+ vsri.u8 d6, d6, #5
+ PF addne PF_X, PF_X, #8
+ vmvn.8 d3, d3
+ PF subne PF_CTL, PF_CTL, #1
+ vsri.u8 d7, d7, #6
+ vshrn.u16 d30, q2, #2
+ vmull.u8 q10, d3, d6
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ vmull.u8 q11, d3, d7
+ vmull.u8 q12, d3, d30
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ vsri.u16 q14, q8, #5
+ PF cmp PF_X, ORIG_W
+ vshll.u8 q9, d18, #8
+ vrshr.u16 q13, q10, #8
+ PF subge PF_X, PF_X, ORIG_W
+ vrshr.u16 q3, q11, #8
+ vrshr.u16 q15, q12, #8
+ PF subges PF_CTL, PF_CTL, #0x10
+ vsri.u16 q14, q9, #11
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ vraddhn.u16 d20, q10, q13
+ vraddhn.u16 d23, q11, q3
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vraddhn.u16 d22, q12, q15
+ vst1.16 {d28, d29}, [DST_W, :128]!
+.endm
+
+#else
+
+/* If we did not care much about the performance, we would just use this... */
+.macro pixman_composite_over_8888_0565_process_pixblock_tail_head
+ pixman_composite_over_8888_0565_process_pixblock_tail
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ vld1.16 {d4, d5}, [DST_R, :128]!
+ vld4.32 {d0, d1, d2, d3}, [SRC]!
+ pixman_composite_over_8888_0565_process_pixblock_head
+ cache_preload 8, 8
+.endm
+
+#endif
+
+/*
+ * And now the final part. We are using 'generate_composite_function' macro
+ * to put all the stuff together. We are specifying the name of the function
+ * which we want to get, number of bits per pixel for the source, mask and
+ * destination (0 if unused, like mask in this case). Next come some bit
+ * flags:
+ * FLAG_DST_READWRITE - tells that the destination buffer is both read
+ * and written, for write-only buffer we would use
+ * FLAG_DST_WRITEONLY flag instead
+ * FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data
+ * and separate color channels for 32bpp format.
+ * The next things are:
+ * - the number of pixels processed per iteration (8 in this case, because
+ * that's the maximum what can fit into four 64-bit NEON registers).
+ * - prefetch distance, measured in pixel blocks. In this case it is 5 times
+ * by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal
+ * prefetch distance can be selected by running some benchmarks.
+ *
+ * After that we specify some macros, these are 'default_init',
+ * 'default_cleanup' here which are empty (but it is possible to have custom
+ * init/cleanup macros to be able to save/restore some extra NEON registers
+ * like d8-d15 or do anything else) followed by
+ * 'pixman_composite_over_8888_0565_process_pixblock_head',
+ * 'pixman_composite_over_8888_0565_process_pixblock_tail' and
+ * 'pixman_composite_over_8888_0565_process_pixblock_tail_head'
+ * which we got implemented above.
+ *
+ * The last part is the NEON registers allocation scheme.
+ */
+generate_composite_function \
+ pixman_composite_over_8888_0565_asm_neon, 32, 0, 16, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_0565_process_pixblock_head, \
+ pixman_composite_over_8888_0565_process_pixblock_tail, \
+ pixman_composite_over_8888_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 24 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_over_n_0565_process_pixblock_head
+ /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
+ and put data into d6 - red, d7 - green, d30 - blue */
+ vshrn.u16 d6, q2, #8
+ vshrn.u16 d7, q2, #3
+ vsli.u16 q2, q2, #5
+ vsri.u8 d6, d6, #5
+ vsri.u8 d7, d7, #6
+ vshrn.u16 d30, q2, #2
+ /* now do alpha blending, storing results in 8-bit planar format
+ into d16 - red, d19 - green, d18 - blue */
+ vmull.u8 q10, d3, d6
+ vmull.u8 q11, d3, d7
+ vmull.u8 q12, d3, d30
+ vrshr.u16 q13, q10, #8
+ vrshr.u16 q3, q11, #8
+ vrshr.u16 q15, q12, #8
+ vraddhn.u16 d20, q10, q13
+ vraddhn.u16 d23, q11, q3
+ vraddhn.u16 d22, q12, q15
+.endm
+
+.macro pixman_composite_over_n_0565_process_pixblock_tail
+ /* ... continue alpha blending */
+ vqadd.u8 d16, d2, d20
+ vqadd.u8 q9, q0, q11
+ /* convert the result to r5g6b5 and store it into {d28, d29} */
+ vshll.u8 q14, d16, #8
+ vshll.u8 q8, d19, #8
+ vshll.u8 q9, d18, #8
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_0565_process_pixblock_tail_head
+ pixman_composite_over_n_0565_process_pixblock_tail
+ vld1.16 {d4, d5}, [DST_R, :128]!
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ pixman_composite_over_n_0565_process_pixblock_head
+.endm
+
+.macro pixman_composite_over_n_0565_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d3[0]}, [DUMMY]
+ vdup.8 d0, d3[0]
+ vdup.8 d1, d3[1]
+ vdup.8 d2, d3[2]
+ vdup.8 d3, d3[3]
+ vmvn.8 d3, d3 /* invert source alpha */
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_0565_init, \
+ default_cleanup, \
+ pixman_composite_over_n_0565_process_pixblock_head, \
+ pixman_composite_over_n_0565_process_pixblock_tail, \
+ pixman_composite_over_n_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 24 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_8888_0565_process_pixblock_head
+ vshll.u8 q8, d1, #8
+ vshll.u8 q14, d2, #8
+ vshll.u8 q9, d0, #8
+.endm
+
+.macro pixman_composite_src_8888_0565_process_pixblock_tail
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+.endm
+
+.macro pixman_composite_src_8888_0565_process_pixblock_tail_head
+ vsri.u16 q14, q8, #5
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vsri.u16 q14, q9, #11
+ PF cmp PF_X, ORIG_W
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ vshll.u8 q8, d1, #8
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+ vshll.u8 q14, d2, #8
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ vshll.u8 q9, d0, #8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_8888_0565_asm_neon, 32, 0, 16, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_8888_0565_process_pixblock_head, \
+ pixman_composite_src_8888_0565_process_pixblock_tail, \
+ pixman_composite_src_8888_0565_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0565_8888_process_pixblock_head
+ vshrn.u16 d30, q0, #8
+ vshrn.u16 d29, q0, #3
+ vsli.u16 q0, q0, #5
+ vmov.u8 d31, #255
+ vsri.u8 d30, d30, #5
+ vsri.u8 d29, d29, #6
+ vshrn.u16 d28, q0, #2
+.endm
+
+.macro pixman_composite_src_0565_8888_process_pixblock_tail
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_src_0565_8888_process_pixblock_tail_head
+ pixman_composite_src_0565_8888_process_pixblock_tail
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vld1.16 {d0, d1}, [SRC]!
+ pixman_composite_src_0565_8888_process_pixblock_head
+ cache_preload 8, 8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_0565_8888_process_pixblock_head, \
+ pixman_composite_src_0565_8888_process_pixblock_tail, \
+ pixman_composite_src_0565_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_add_8000_8000_process_pixblock_head
+ vqadd.u8 q14, q0, q2
+ vqadd.u8 q15, q1, q3
+.endm
+
+.macro pixman_composite_add_8000_8000_process_pixblock_tail
+.endm
+
+.macro pixman_composite_add_8000_8000_process_pixblock_tail_head
+ vld1.8 {d0, d1, d2, d3}, [SRC]!
+ PF add PF_X, PF_X, #32
+ PF tst PF_CTL, #0xF
+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ PF addne PF_X, PF_X, #32
+ PF subne PF_CTL, PF_CTL, #1
+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF cmp PF_X, ORIG_W
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+ vqadd.u8 q14, q0, q2
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vqadd.u8 q15, q1, q3
+.endm
+
+generate_composite_function \
+ pixman_composite_add_8000_8000_asm_neon, 8, 0, 8, \
+ FLAG_DST_READWRITE, \
+ 32, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8000_8000_process_pixblock_head, \
+ pixman_composite_add_8000_8000_process_pixblock_tail, \
+ pixman_composite_add_8000_8000_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_add_8888_8888_process_pixblock_tail_head
+ vld1.8 {d0, d1, d2, d3}, [SRC]!
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF cmp PF_X, ORIG_W
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+ vqadd.u8 q14, q0, q2
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vqadd.u8 q15, q1, q3
+.endm
+
+generate_composite_function \
+ pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8000_8000_process_pixblock_head, \
+ pixman_composite_add_8000_8000_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_process_pixblock_tail_head
+
+generate_composite_function_single_scanline \
+ pixman_composite_scanline_add_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8000_8000_process_pixblock_head, \
+ pixman_composite_add_8000_8000_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_over_8888_8888_process_pixblock_head
+ vmvn.8 d24, d3 /* get inverted alpha */
+ /* do alpha blending */
+ vmull.u8 q8, d24, d4
+ vmull.u8 q9, d24, d5
+ vmull.u8 q10, d24, d6
+ vmull.u8 q11, d24, d7
+.endm
+
+.macro pixman_composite_over_8888_8888_process_pixblock_tail
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+.endm
+
+.macro pixman_composite_over_8888_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vrshr.u16 q14, q8, #8
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ PF cmp PF_X, ORIG_W
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ vmvn.8 d22, d3
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF subge PF_X, PF_X, ORIG_W
+ vmull.u8 q8, d22, d4
+ PF subges PF_CTL, PF_CTL, #0x10
+ vmull.u8 q9, d22, d5
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ vmull.u8 q10, d22, d6
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vmull.u8 q11, d22, d7
+.endm
+
+generate_composite_function \
+ pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_8888_process_pixblock_head, \
+ pixman_composite_over_8888_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8888_process_pixblock_tail_head
+
+generate_composite_function_single_scanline \
+ pixman_composite_scanline_over_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_8888_process_pixblock_head, \
+ pixman_composite_over_8888_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_8888_process_pixblock_tail_head
+ pixman_composite_over_8888_8888_process_pixblock_tail
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ pixman_composite_over_8888_8888_process_pixblock_head
+.endm
+
+.macro pixman_composite_over_n_8888_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d3[0]}, [DUMMY]
+ vdup.8 d0, d3[0]
+ vdup.8 d1, d3[1]
+ vdup.8 d2, d3[2]
+ vdup.8 d3, d3[3]
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_8888_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_8888_process_pixblock_head, \
+ pixman_composite_over_8888_8888_process_pixblock_tail, \
+ pixman_composite_over_n_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head
+ vrshr.u16 q14, q8, #8
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ PF cmp PF_X, ORIG_W
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+ vld4.8 {d0, d1, d2, d3}, [DST_R, :128]!
+ vmvn.8 d22, d3
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF subge PF_X, PF_X, ORIG_W
+ vmull.u8 q8, d22, d4
+ PF subges PF_CTL, PF_CTL, #0x10
+ vmull.u8 q9, d22, d5
+ vmull.u8 q10, d22, d6
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vmull.u8 q11, d22, d7
+.endm
+
+.macro pixman_composite_over_reverse_n_8888_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d7[0]}, [DUMMY]
+ vdup.8 d4, d7[0]
+ vdup.8 d5, d7[1]
+ vdup.8 d6, d7[2]
+ vdup.8 d7, d7[3]
+.endm
+
+generate_composite_function \
+ pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_reverse_n_8888_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_8888_process_pixblock_head, \
+ pixman_composite_over_8888_8888_process_pixblock_tail, \
+ pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 4, /* src_basereg */ \
+ 24 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_over_n_8_0565_process_pixblock_head
+ /* in */
+ vmull.u8 q0, d24, d8
+ vmull.u8 q1, d24, d9
+ vmull.u8 q6, d24, d10
+ vmull.u8 q7, d24, d11
+ vrshr.u16 q10, q0, #8
+ vrshr.u16 q11, q1, #8
+ vrshr.u16 q12, q6, #8
+ vrshr.u16 q13, q7, #8
+ vraddhn.u16 d0, q0, q10
+ vraddhn.u16 d1, q1, q11
+ vraddhn.u16 d2, q6, q12
+ vraddhn.u16 d3, q7, q13
+
+ vshrn.u16 d6, q2, #8
+ vshrn.u16 d7, q2, #3
+ vsli.u16 q2, q2, #5
+ vsri.u8 d6, d6, #5
+ vmvn.8 d3, d3
+ vsri.u8 d7, d7, #6
+ vshrn.u16 d30, q2, #2
+ /* now do alpha blending */
+ vmull.u8 q10, d3, d6
+ vmull.u8 q11, d3, d7
+ vmull.u8 q12, d3, d30
+ vrshr.u16 q13, q10, #8
+ vrshr.u16 q3, q11, #8
+ vrshr.u16 q15, q12, #8
+ vraddhn.u16 d20, q10, q13
+ vraddhn.u16 d23, q11, q3
+ vraddhn.u16 d22, q12, q15
+.endm
+
+.macro pixman_composite_over_n_8_0565_process_pixblock_tail
+ vqadd.u8 d16, d2, d20
+ vqadd.u8 q9, q0, q11
+ /* convert to r5g6b5 */
+ vshll.u8 q14, d16, #8
+ vshll.u8 q8, d19, #8
+ vshll.u8 q9, d18, #8
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_8_0565_process_pixblock_tail_head
+ pixman_composite_over_n_8_0565_process_pixblock_tail
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ vld1.16 {d4, d5}, [DST_R, :128]!
+ vld1.8 {d24}, [MASK]!
+ cache_preload 8, 8
+ pixman_composite_over_n_8_0565_process_pixblock_head
+.endm
+
+/*
+ * This function needs a special initialization of solid mask.
+ * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET
+ * offset, split into color components and replicated in d8-d11
+ * registers. Additionally, this function needs all the NEON registers,
+ * so it has to save d8-d15 registers which are callee saved according
+ * to ABI. These registers are restored from 'cleanup' macro. All the
+ * other NEON registers are caller saved, so can be clobbered freely
+ * without introducing any problems.
+ */
+.macro pixman_composite_over_n_8_0565_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vpush {d8-d15}
+ vld1.32 {d11[0]}, [DUMMY]
+ vdup.8 d8, d11[0]
+ vdup.8 d9, d11[1]
+ vdup.8 d10, d11[2]
+ vdup.8 d11, d11[3]
+.endm
+
+.macro pixman_composite_over_n_8_0565_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8_0565_asm_neon, 0, 8, 16, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_8_0565_init, \
+ pixman_composite_over_n_8_0565_cleanup, \
+ pixman_composite_over_n_8_0565_process_pixblock_head, \
+ pixman_composite_over_n_8_0565_process_pixblock_tail, \
+ pixman_composite_over_n_8_0565_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0565_0565_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_0565_0565_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_0565_0565_process_pixblock_tail_head
+ vst1.16 {d0, d1, d2, d3}, [DST_W, :128]!
+ vld1.16 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 16, 16
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0565_0565_asm_neon, 16, 0, 16, \
+ FLAG_DST_WRITEONLY, \
+ 16, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_0565_0565_process_pixblock_head, \
+ pixman_composite_src_0565_0565_process_pixblock_tail, \
+ pixman_composite_src_0565_0565_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_n_8_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_n_8_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_n_8_process_pixblock_tail_head
+ vst1.8 {d0, d1, d2, d3}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_src_n_8_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d0[0]}, [DUMMY]
+ vsli.u64 d0, d0, #8
+ vsli.u64 d0, d0, #16
+ vsli.u64 d0, d0, #32
+ vorr d1, d0, d0
+ vorr q1, q0, q0
+.endm
+
+.macro pixman_composite_src_n_8_cleanup
+.endm
+
+generate_composite_function \
+ pixman_composite_src_n_8_asm_neon, 0, 0, 8, \
+ FLAG_DST_WRITEONLY, \
+ 32, /* number of pixels, processed in a single block */ \
+ 0, /* prefetch distance */ \
+ pixman_composite_src_n_8_init, \
+ pixman_composite_src_n_8_cleanup, \
+ pixman_composite_src_n_8_process_pixblock_head, \
+ pixman_composite_src_n_8_process_pixblock_tail, \
+ pixman_composite_src_n_8_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_n_0565_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_n_0565_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_n_0565_process_pixblock_tail_head
+ vst1.16 {d0, d1, d2, d3}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_src_n_0565_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d0[0]}, [DUMMY]
+ vsli.u64 d0, d0, #16
+ vsli.u64 d0, d0, #32
+ vorr d1, d0, d0
+ vorr q1, q0, q0
+.endm
+
+.macro pixman_composite_src_n_0565_cleanup
+.endm
+
+generate_composite_function \
+ pixman_composite_src_n_0565_asm_neon, 0, 0, 16, \
+ FLAG_DST_WRITEONLY, \
+ 16, /* number of pixels, processed in a single block */ \
+ 0, /* prefetch distance */ \
+ pixman_composite_src_n_0565_init, \
+ pixman_composite_src_n_0565_cleanup, \
+ pixman_composite_src_n_0565_process_pixblock_head, \
+ pixman_composite_src_n_0565_process_pixblock_tail, \
+ pixman_composite_src_n_0565_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_n_8888_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_n_8888_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_n_8888_process_pixblock_tail_head
+ vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_src_n_8888_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d0[0]}, [DUMMY]
+ vsli.u64 d0, d0, #32
+ vorr d1, d0, d0
+ vorr q1, q0, q0
+.endm
+
+.macro pixman_composite_src_n_8888_cleanup
+.endm
+
+generate_composite_function \
+ pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \
+ FLAG_DST_WRITEONLY, \
+ 8, /* number of pixels, processed in a single block */ \
+ 0, /* prefetch distance */ \
+ pixman_composite_src_n_8888_init, \
+ pixman_composite_src_n_8888_cleanup, \
+ pixman_composite_src_n_8888_process_pixblock_head, \
+ pixman_composite_src_n_8888_process_pixblock_tail, \
+ pixman_composite_src_n_8888_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_8888_8888_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_8888_8888_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_8888_8888_process_pixblock_tail_head
+ vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
+ vld1.32 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 8, 8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_WRITEONLY, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_8888_8888_process_pixblock_head, \
+ pixman_composite_src_8888_8888_process_pixblock_tail, \
+ pixman_composite_src_8888_8888_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_x888_8888_process_pixblock_head
+ vorr q0, q0, q2
+ vorr q1, q1, q2
+.endm
+
+.macro pixman_composite_src_x888_8888_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_x888_8888_process_pixblock_tail_head
+ vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
+ vld1.32 {d0, d1, d2, d3}, [SRC]!
+ vorr q0, q0, q2
+ vorr q1, q1, q2
+ cache_preload 8, 8
+.endm
+
+.macro pixman_composite_src_x888_8888_init
+ vmov.u8 q2, #0xFF
+ vshl.u32 q2, q2, #24
+.endm
+
+generate_composite_function \
+ pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_WRITEONLY, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ pixman_composite_src_x888_8888_init, \
+ default_cleanup, \
+ pixman_composite_src_x888_8888_process_pixblock_head, \
+ pixman_composite_src_x888_8888_process_pixblock_tail, \
+ pixman_composite_src_x888_8888_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_over_n_8_8888_process_pixblock_head
+ /* expecting deinterleaved source data in {d8, d9, d10, d11} */
+ /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
+ /* and destination data in {d4, d5, d6, d7} */
+ /* mask is in d24 (d25, d26, d27 are unused) */
+
+ /* in */
+ vmull.u8 q0, d24, d8
+ vmull.u8 q1, d24, d9
+ vmull.u8 q6, d24, d10
+ vmull.u8 q7, d24, d11
+ vrshr.u16 q10, q0, #8
+ vrshr.u16 q11, q1, #8
+ vrshr.u16 q12, q6, #8
+ vrshr.u16 q13, q7, #8
+ vraddhn.u16 d0, q0, q10
+ vraddhn.u16 d1, q1, q11
+ vraddhn.u16 d2, q6, q12
+ vraddhn.u16 d3, q7, q13
+ vmvn.8 d24, d3 /* get inverted alpha */
+ /* source: d0 - blue, d1 - green, d2 - red, d3 - alpha */
+ /* destination: d4 - blue, d5 - green, d6 - red, d7 - alpha */
+ /* now do alpha blending */
+ vmull.u8 q8, d24, d4
+ vmull.u8 q9, d24, d5
+ vmull.u8 q10, d24, d6
+ vmull.u8 q11, d24, d7
+.endm
+
+.macro pixman_composite_over_n_8_8888_process_pixblock_tail
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_8_8888_process_pixblock_tail_head
+ pixman_composite_over_n_8_8888_process_pixblock_tail
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vld1.8 {d24}, [MASK]!
+ cache_preload 8, 8
+ pixman_composite_over_n_8_8888_process_pixblock_head
+.endm
+
+.macro pixman_composite_over_n_8_8888_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vpush {d8-d15}
+ vld1.32 {d11[0]}, [DUMMY]
+ vdup.8 d8, d11[0]
+ vdup.8 d9, d11[1]
+ vdup.8 d10, d11[2]
+ vdup.8 d11, d11[3]
+.endm
+
+.macro pixman_composite_over_n_8_8888_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8_8888_asm_neon, 0, 8, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_8_8888_init, \
+ pixman_composite_over_n_8_8888_cleanup, \
+ pixman_composite_over_n_8_8888_process_pixblock_head, \
+ pixman_composite_over_n_8_8888_process_pixblock_tail, \
+ pixman_composite_over_n_8_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head
+ /*
+ * 'combine_mask_ca' replacement
+ *
+ * input: solid src (n) in {d8, d9, d10, d11}
+ * dest in {d4, d5, d6, d7 }
+ * mask in {d24, d25, d26, d27}
+ * output: updated src in {d0, d1, d2, d3 }
+ * updated mask in {d24, d25, d26, d3 }
+ */
+ vmull.u8 q0, d24, d8
+ vmull.u8 q1, d25, d9
+ vmull.u8 q6, d26, d10
+ vmull.u8 q7, d27, d11
+ vmull.u8 q9, d11, d25
+ vmull.u8 q12, d11, d24
+ vmull.u8 q13, d11, d26
+ vrshr.u16 q8, q0, #8
+ vrshr.u16 q10, q1, #8
+ vrshr.u16 q11, q6, #8
+ vraddhn.u16 d0, q0, q8
+ vraddhn.u16 d1, q1, q10
+ vraddhn.u16 d2, q6, q11
+ vrshr.u16 q11, q12, #8
+ vrshr.u16 q8, q9, #8
+ vrshr.u16 q6, q13, #8
+ vrshr.u16 q10, q7, #8
+ vraddhn.u16 d24, q12, q11
+ vraddhn.u16 d25, q9, q8
+ vraddhn.u16 d26, q13, q6
+ vraddhn.u16 d3, q7, q10
+ /*
+ * 'combine_over_ca' replacement
+ *
+ * output: updated dest in {d28, d29, d30, d31}
+ */
+ vmvn.8 d24, d24
+ vmvn.8 d25, d25
+ vmull.u8 q8, d24, d4
+ vmull.u8 q9, d25, d5
+ vmvn.8 d26, d26
+ vmvn.8 d27, d3
+ vmull.u8 q10, d26, d6
+ vmull.u8 q11, d27, d7
+.endm
+
+.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail
+ /* ... continue 'combine_over_ca' replacement */
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q6, q10, #8
+ vrshr.u16 q7, q11, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q6, q10
+ vraddhn.u16 d31, q7, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+.endm
+
+.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vrshr.u16 q6, q10, #8
+ vrshr.u16 q7, q11, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q6, q10
+ vraddhn.u16 d31, q7, q11
+ vld4.8 {d24, d25, d26, d27}, [MASK]!
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+ cache_preload 8, 8
+ pixman_composite_over_n_8888_8888_ca_process_pixblock_head
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_n_8888_8888_ca_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vpush {d8-d15}
+ vld1.32 {d11[0]}, [DUMMY]
+ vdup.8 d8, d11[0]
+ vdup.8 d9, d11[1]
+ vdup.8 d10, d11[2]
+ vdup.8 d11, d11[3]
+.endm
+
+.macro pixman_composite_over_n_8888_8888_ca_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_8888_8888_ca_init, \
+ pixman_composite_over_n_8888_8888_ca_cleanup, \
+ pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \
+ pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \
+ pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_add_n_8_8_process_pixblock_head
+ /* expecting source data in {d8, d9, d10, d11} */
+ /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
+ /* and destination data in {d4, d5, d6, d7} */
+ /* mask is in d24, d25, d26, d27 */
+ vmull.u8 q0, d24, d11
+ vmull.u8 q1, d25, d11
+ vmull.u8 q6, d26, d11
+ vmull.u8 q7, d27, d11
+ vrshr.u16 q10, q0, #8
+ vrshr.u16 q11, q1, #8
+ vrshr.u16 q12, q6, #8
+ vrshr.u16 q13, q7, #8
+ vraddhn.u16 d0, q0, q10
+ vraddhn.u16 d1, q1, q11
+ vraddhn.u16 d2, q6, q12
+ vraddhn.u16 d3, q7, q13
+ vqadd.u8 q14, q0, q2
+ vqadd.u8 q15, q1, q3
+.endm
+
+.macro pixman_composite_add_n_8_8_process_pixblock_tail
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_add_n_8_8_process_pixblock_tail_head
+ pixman_composite_add_n_8_8_process_pixblock_tail
+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vld1.8 {d24, d25, d26, d27}, [MASK]!
+ cache_preload 32, 32
+ pixman_composite_add_n_8_8_process_pixblock_head
+.endm
+
+.macro pixman_composite_add_n_8_8_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vpush {d8-d15}
+ vld1.32 {d11[0]}, [DUMMY]
+ vdup.8 d11, d11[3]
+.endm
+
+.macro pixman_composite_add_n_8_8_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_add_n_8_8_asm_neon, 0, 8, 8, \
+ FLAG_DST_READWRITE, \
+ 32, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_add_n_8_8_init, \
+ pixman_composite_add_n_8_8_cleanup, \
+ pixman_composite_add_n_8_8_process_pixblock_head, \
+ pixman_composite_add_n_8_8_process_pixblock_tail, \
+ pixman_composite_add_n_8_8_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_add_8_8_8_process_pixblock_head
+ /* expecting source data in {d0, d1, d2, d3} */
+ /* destination data in {d4, d5, d6, d7} */
+ /* mask in {d24, d25, d26, d27} */
+ vmull.u8 q8, d24, d0
+ vmull.u8 q9, d25, d1
+ vmull.u8 q10, d26, d2
+ vmull.u8 q11, d27, d3
+ vrshr.u16 q0, q8, #8
+ vrshr.u16 q1, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d0, q0, q8
+ vraddhn.u16 d1, q1, q9
+ vraddhn.u16 d2, q12, q10
+ vraddhn.u16 d3, q13, q11
+ vqadd.u8 q14, q0, q2
+ vqadd.u8 q15, q1, q3
+.endm
+
+.macro pixman_composite_add_8_8_8_process_pixblock_tail
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_add_8_8_8_process_pixblock_tail_head
+ pixman_composite_add_8_8_8_process_pixblock_tail
+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vld1.8 {d24, d25, d26, d27}, [MASK]!
+ vld1.8 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 32, 32
+ pixman_composite_add_8_8_8_process_pixblock_head
+.endm
+
+.macro pixman_composite_add_8_8_8_init
+.endm
+
+.macro pixman_composite_add_8_8_8_cleanup
+.endm
+
+generate_composite_function \
+ pixman_composite_add_8_8_8_asm_neon, 8, 8, 8, \
+ FLAG_DST_READWRITE, \
+ 32, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_add_8_8_8_init, \
+ pixman_composite_add_8_8_8_cleanup, \
+ pixman_composite_add_8_8_8_process_pixblock_head, \
+ pixman_composite_add_8_8_8_process_pixblock_tail, \
+ pixman_composite_add_8_8_8_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_add_8888_8888_8888_process_pixblock_head
+ /* expecting source data in {d0, d1, d2, d3} */
+ /* destination data in {d4, d5, d6, d7} */
+ /* mask in {d24, d25, d26, d27} */
+ vmull.u8 q8, d27, d0
+ vmull.u8 q9, d27, d1
+ vmull.u8 q10, d27, d2
+ vmull.u8 q11, d27, d3
+ vrshr.u16 q0, q8, #8
+ vrshr.u16 q1, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d0, q0, q8
+ vraddhn.u16 d1, q1, q9
+ vraddhn.u16 d2, q12, q10
+ vraddhn.u16 d3, q13, q11
+ vqadd.u8 q14, q0, q2
+ vqadd.u8 q15, q1, q3
+.endm
+
+.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vld4.8 {d24, d25, d26, d27}, [MASK]!
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 8, 8
+ pixman_composite_add_8888_8888_8888_process_pixblock_head
+.endm
+
+generate_composite_function \
+ pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_head, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
+
+generate_composite_function_single_scanline \
+ pixman_composite_scanline_add_mask_asm_neon, 32, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_head, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_over_8888_n_8888_process_pixblock_head
+ /* expecting source data in {d0, d1, d2, d3} */
+ /* destination data in {d4, d5, d6, d7} */
+ /* solid mask is in d15 */
+
+ /* 'in' */
+ vmull.u8 q8, d15, d3
+ vmull.u8 q6, d15, d2
+ vmull.u8 q5, d15, d1
+ vmull.u8 q4, d15, d0
+ vrshr.u16 q13, q8, #8
+ vrshr.u16 q12, q6, #8
+ vrshr.u16 q11, q5, #8
+ vrshr.u16 q10, q4, #8
+ vraddhn.u16 d3, q8, q13
+ vraddhn.u16 d2, q6, q12
+ vraddhn.u16 d1, q5, q11
+ vraddhn.u16 d0, q4, q10
+ vmvn.8 d24, d3 /* get inverted alpha */
+ /* now do alpha blending */
+ vmull.u8 q8, d24, d4
+ vmull.u8 q9, d24, d5
+ vmull.u8 q10, d24, d6
+ vmull.u8 q11, d24, d7
+.endm
+
+.macro pixman_composite_over_8888_n_8888_process_pixblock_tail
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ pixman_composite_over_8888_n_8888_process_pixblock_tail
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 8, 8
+ pixman_composite_over_8888_n_8888_process_pixblock_head
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_8888_n_8888_init
+ add DUMMY, sp, #48
+ vpush {d8-d15}
+ vld1.32 {d15[0]}, [DUMMY]
+ vdup.8 d15, d15[3]
+.endm
+
+.macro pixman_composite_over_8888_n_8888_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_8888_n_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_8888_n_8888_init, \
+ pixman_composite_over_8888_n_8888_cleanup, \
+ pixman_composite_over_8888_n_8888_process_pixblock_head, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ pixman_composite_over_8888_n_8888_process_pixblock_tail
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 8, 8
+ vld4.8 {d12, d13, d14, d15}, [MASK]!
+ pixman_composite_over_8888_n_8888_process_pixblock_head
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_8888_8888_8888_init
+ vpush {d8-d15}
+.endm
+
+.macro pixman_composite_over_8888_8888_8888_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_8888_8888_8888_init, \
+ pixman_composite_over_8888_8888_8888_cleanup, \
+ pixman_composite_over_8888_n_8888_process_pixblock_head, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 12 /* mask_basereg */
+
+generate_composite_function_single_scanline \
+ pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ pixman_composite_over_8888_8888_8888_init, \
+ pixman_composite_over_8888_8888_8888_cleanup, \
+ pixman_composite_over_8888_n_8888_process_pixblock_head, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 12 /* mask_basereg */
+
+/******************************************************************************/
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ pixman_composite_over_8888_n_8888_process_pixblock_tail
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 8, 8
+ vld1.8 {d15}, [MASK]!
+ pixman_composite_over_8888_n_8888_process_pixblock_head
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_8888_8_8888_init
+ vpush {d8-d15}
+.endm
+
+.macro pixman_composite_over_8888_8_8888_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_8888_8_8888_init, \
+ pixman_composite_over_8888_8_8888_cleanup, \
+ pixman_composite_over_8888_n_8888_process_pixblock_head, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8_8888_process_pixblock_tail_head \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 15 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0888_0888_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_0888_0888_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_0888_0888_process_pixblock_tail_head
+ vst3.8 {d0, d1, d2}, [DST_W]!
+ vld3.8 {d0, d1, d2}, [SRC]!
+ cache_preload 8, 8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \
+ FLAG_DST_WRITEONLY, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_0888_0888_process_pixblock_head, \
+ pixman_composite_src_0888_0888_process_pixblock_tail, \
+ pixman_composite_src_0888_0888_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0888_8888_rev_process_pixblock_head
+ vswp d0, d2
+.endm
+
+.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head
+ vst4.8 {d0, d1, d2, d3}, [DST_W]!
+ vld3.8 {d0, d1, d2}, [SRC]!
+ vswp d0, d2
+ cache_preload 8, 8
+.endm
+
+.macro pixman_composite_src_0888_8888_rev_init
+ veor d3, d3, d3
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ pixman_composite_src_0888_8888_rev_init, \
+ default_cleanup, \
+ pixman_composite_src_0888_8888_rev_process_pixblock_head, \
+ pixman_composite_src_0888_8888_rev_process_pixblock_tail, \
+ pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0888_0565_rev_process_pixblock_head
+ vshll.u8 q8, d1, #8
+ vshll.u8 q9, d2, #8
+.endm
+
+.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail
+ vshll.u8 q14, d0, #8
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+.endm
+
+.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head
+ vshll.u8 q14, d0, #8
+ vld3.8 {d0, d1, d2}, [SRC]!
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+ vshll.u8 q8, d1, #8
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ vshll.u8 q9, d2, #8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \
+ FLAG_DST_WRITEONLY, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_0888_0565_rev_process_pixblock_head, \
+ pixman_composite_src_0888_0565_rev_process_pixblock_tail, \
+ pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_pixbuf_8888_process_pixblock_head
+ vmull.u8 q8, d3, d0
+ vmull.u8 q9, d3, d1
+ vmull.u8 q10, d3, d2
+.endm
+
+.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail
+ vrshr.u16 q11, q8, #8
+ vswp d3, d31
+ vrshr.u16 q12, q9, #8
+ vrshr.u16 q13, q10, #8
+ vraddhn.u16 d30, q11, q8
+ vraddhn.u16 d29, q12, q9
+ vraddhn.u16 d28, q13, q10
+.endm
+
+.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head
+ vrshr.u16 q11, q8, #8
+ vswp d3, d31
+ vrshr.u16 q12, q9, #8
+ vrshr.u16 q13, q10, #8
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ vraddhn.u16 d30, q11, q8
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vraddhn.u16 d29, q12, q9
+ vraddhn.u16 d28, q13, q10
+ vmull.u8 q8, d3, d0
+ vmull.u8 q9, d3, d1
+ vmull.u8 q10, d3, d2
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF cmp PF_X, ORIG_W
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+.endm
+
+generate_composite_function \
+ pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_pixbuf_8888_process_pixblock_head, \
+ pixman_composite_src_pixbuf_8888_process_pixblock_tail, \
+ pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
diff --git a/lib/pixman/pixman/pixman-arm-neon-asm.h b/lib/pixman/pixman/pixman-arm-neon-asm.h
new file mode 100644
index 000000000..583b96567
--- /dev/null
+++ b/lib/pixman/pixman/pixman-arm-neon-asm.h
@@ -0,0 +1,906 @@
+/*
+ * Copyright © 2009 Nokia Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
+ */
+
+/*
+ * This file contains a macro ('generate_composite_function') which can
+ * construct 2D image processing functions, based on a common template.
+ * Any combinations of source, destination and mask images with 8bpp,
+ * 16bpp, 24bpp, 32bpp color formats are supported.
+ *
+ * This macro takes care of:
+ * - handling of leading and trailing unaligned pixels
+ * - doing most of the work related to L2 cache preload
+ * - encourages the use of software pipelining for better instructions
+ * scheduling
+ *
+ * The user of this macro has to provide some configuration parameters
+ * (bit depths for the images, prefetch distance, etc.) and a set of
+ * macros, which should implement basic code chunks responsible for
+ * pixels processing. See 'pixman-arm-neon-asm.S' file for the usage
+ * examples.
+ *
+ * TODO:
+ * - try overlapped pixel method (from Ian Rickards) when processing
+ * exactly two blocks of pixels
+ * - maybe add an option to do reverse scanline processing
+ */
+
+/*
+ * Bit flags for 'generate_composite_function' macro which are used
+ * to tune generated functions behavior.
+ */
+.set FLAG_DST_WRITEONLY, 0
+.set FLAG_DST_READWRITE, 1
+.set FLAG_DEINTERLEAVE_32BPP, 2
+
+/*
+ * Offset in stack where mask and source pointer/stride can be accessed
+ * from 'init' macro. This is useful for doing special handling for solid mask.
+ */
+.set ARGS_STACK_OFFSET, 40
+
+/*
+ * Constants for selecting preferable prefetch type.
+ */
+.set PREFETCH_TYPE_NONE, 0 /* No prefetch at all */
+.set PREFETCH_TYPE_SIMPLE, 1 /* A simple, fixed-distance-ahead prefetch */
+.set PREFETCH_TYPE_ADVANCED, 2 /* Advanced fine-grained prefetch */
+
+/*
+ * Definitions of supplementary pixld/pixst macros (for partial load/store of
+ * pixel data).
+ */
+
+.macro pixldst1 op, elem_size, reg1, mem_operand, abits
+.if abits > 0
+ op&.&elem_size {d&reg1}, [&mem_operand&, :&abits&]!
+.else
+ op&.&elem_size {d&reg1}, [&mem_operand&]!
+.endif
+.endm
+
+.macro pixldst2 op, elem_size, reg1, reg2, mem_operand, abits
+.if abits > 0
+ op&.&elem_size {d&reg1, d&reg2}, [&mem_operand&, :&abits&]!
+.else
+ op&.&elem_size {d&reg1, d&reg2}, [&mem_operand&]!
+.endif
+.endm
+
+.macro pixldst4 op, elem_size, reg1, reg2, reg3, reg4, mem_operand, abits
+.if abits > 0
+ op&.&elem_size {d&reg1, d&reg2, d&reg3, d&reg4}, [&mem_operand&, :&abits&]!
+.else
+ op&.&elem_size {d&reg1, d&reg2, d&reg3, d&reg4}, [&mem_operand&]!
+.endif
+.endm
+
+.macro pixldst0 op, elem_size, reg1, idx, mem_operand, abits
+ op&.&elem_size {d&reg1[idx]}, [&mem_operand&]!
+.endm
+
+.macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand
+ op&.&elem_size {d&reg1, d&reg2, d&reg3}, [&mem_operand&]!
+.endm
+
+.macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand
+ op&.&elem_size {d&reg1[idx], d&reg2[idx], d&reg3[idx]}, [&mem_operand&]!
+.endm
+
+.macro pixldst numbytes, op, elem_size, basereg, mem_operand, abits
+.if numbytes == 32
+ pixldst4 op, elem_size, %(basereg+4), %(basereg+5), \
+ %(basereg+6), %(basereg+7), mem_operand, abits
+.elseif numbytes == 16
+ pixldst2 op, elem_size, %(basereg+2), %(basereg+3), mem_operand, abits
+.elseif numbytes == 8
+ pixldst1 op, elem_size, %(basereg+1), mem_operand, abits
+.elseif numbytes == 4
+ .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 32)
+ pixldst0 op, 32, %(basereg+0), 1, mem_operand, abits
+ .elseif elem_size == 16
+ pixldst0 op, 16, %(basereg+0), 2, mem_operand, abits
+ pixldst0 op, 16, %(basereg+0), 3, mem_operand, abits
+ .else
+ pixldst0 op, 8, %(basereg+0), 4, mem_operand, abits
+ pixldst0 op, 8, %(basereg+0), 5, mem_operand, abits
+ pixldst0 op, 8, %(basereg+0), 6, mem_operand, abits
+ pixldst0 op, 8, %(basereg+0), 7, mem_operand, abits
+ .endif
+.elseif numbytes == 2
+ .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 16)
+ pixldst0 op, 16, %(basereg+0), 1, mem_operand, abits
+ .else
+ pixldst0 op, 8, %(basereg+0), 2, mem_operand, abits
+ pixldst0 op, 8, %(basereg+0), 3, mem_operand, abits
+ .endif
+.elseif numbytes == 1
+ pixldst0 op, 8, %(basereg+0), 1, mem_operand, abits
+.else
+ .error "unsupported size: numbytes"
+.endif
+.endm
+
+.macro pixld numpix, bpp, basereg, mem_operand, abits=0
+.if bpp > 0
+.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
+ pixldst4 vld4, 8, %(basereg+4), %(basereg+5), \
+ %(basereg+6), %(basereg+7), mem_operand, abits
+.elseif (bpp == 24) && (numpix == 8)
+ pixldst3 vld3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
+.elseif (bpp == 24) && (numpix == 4)
+ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
+ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
+ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
+ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
+.elseif (bpp == 24) && (numpix == 2)
+ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
+ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
+.elseif (bpp == 24) && (numpix == 1)
+ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
+.else
+ pixldst %(numpix * bpp / 8), vld1, %(bpp), basereg, mem_operand, abits
+.endif
+.endif
+.endm
+
+.macro pixst numpix, bpp, basereg, mem_operand, abits=0
+.if bpp > 0
+.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
+ pixldst4 vst4, 8, %(basereg+4), %(basereg+5), \
+ %(basereg+6), %(basereg+7), mem_operand, abits
+.elseif (bpp == 24) && (numpix == 8)
+ pixldst3 vst3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
+.elseif (bpp == 24) && (numpix == 4)
+ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
+ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
+ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
+ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
+.elseif (bpp == 24) && (numpix == 2)
+ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
+ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
+.elseif (bpp == 24) && (numpix == 1)
+ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
+.else
+ pixldst %(numpix * bpp / 8), vst1, %(bpp), basereg, mem_operand, abits
+.endif
+.endif
+.endm
+
+.macro pixld_a numpix, bpp, basereg, mem_operand
+.if (bpp * numpix) <= 128
+ pixld numpix, bpp, basereg, mem_operand, %(bpp * numpix)
+.else
+ pixld numpix, bpp, basereg, mem_operand, 128
+.endif
+.endm
+
+.macro pixst_a numpix, bpp, basereg, mem_operand
+.if (bpp * numpix) <= 128
+ pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix)
+.else
+ pixst numpix, bpp, basereg, mem_operand, 128
+.endif
+.endm
+
+.macro vuzp8 reg1, reg2
+ vuzp.8 d&reg1, d&reg2
+.endm
+
+.macro vzip8 reg1, reg2
+ vzip.8 d&reg1, d&reg2
+.endm
+
+/* deinterleave B, G, R, A channels for eight 32bpp pixels in 4 registers */
+.macro pixdeinterleave bpp, basereg
+.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
+ vuzp8 %(basereg+0), %(basereg+1)
+ vuzp8 %(basereg+2), %(basereg+3)
+ vuzp8 %(basereg+1), %(basereg+3)
+ vuzp8 %(basereg+0), %(basereg+2)
+.endif
+.endm
+
+/* interleave B, G, R, A channels for eight 32bpp pixels in 4 registers */
+.macro pixinterleave bpp, basereg
+.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
+ vzip8 %(basereg+0), %(basereg+2)
+ vzip8 %(basereg+1), %(basereg+3)
+ vzip8 %(basereg+2), %(basereg+3)
+ vzip8 %(basereg+0), %(basereg+1)
+.endif
+.endm
+
+/*
+ * This is a macro for implementing cache preload. The main idea is that
+ * cache preload logic is mostly independent from the rest of pixels
+ * processing code. It starts at the top left pixel and moves forward
+ * across pixels and can jump across scanlines. Prefetch distance is
+ * handled in an 'incremental' way: it starts from 0 and advances to the
+ * optimal distance over time. After reaching optimal prefetch distance,
+ * it is kept constant. There are some checks which prevent prefetching
+ * unneeded pixel lines below the image (but it still can prefetch a bit
+ * more data on the right side of the image - not a big issue and may
+ * be actually helpful when rendering text glyphs). Additional trick is
+ * the use of LDR instruction for prefetch instead of PLD when moving to
+ * the next line, the point is that we have a high chance of getting TLB
+ * miss in this case, and PLD would be useless.
+ *
+ * This sounds like it may introduce a noticeable overhead (when working with
+ * fully cached data). But in reality, due to having a separate pipeline and
+ * instruction queue for NEON unit in ARM Cortex-A8, normal ARM code can
+ * execute simultaneously with NEON and be completely shadowed by it. Thus
+ * we get no performance overhead at all (*). This looks like a very nice
+ * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher,
+ * but still can implement some rather advanced prefetch logic in sofware
+ * for almost zero cost!
+ *
+ * (*) The overhead of the prefetcher is visible when running some trivial
+ * pixels processing like simple copy. Anyway, having prefetch is a must
+ * when working with the graphics data.
+ */
+.macro PF a, x:vararg
+.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_ADVANCED)
+ a x
+.endif
+.endm
+
+.macro cache_preload std_increment, boost_increment
+.if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0)
+.if regs_shortage
+ PF ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */
+.endif
+.if std_increment != 0
+ PF add PF_X, PF_X, #std_increment
+.endif
+ PF tst PF_CTL, #0xF
+ PF addne PF_X, PF_X, #boost_increment
+ PF subne PF_CTL, PF_CTL, #1
+ PF cmp PF_X, ORIG_W
+.if src_bpp_shift >= 0
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+.endif
+.if dst_r_bpp != 0
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+.endif
+.if mask_bpp_shift >= 0
+ PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift]
+.endif
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+.if src_bpp_shift >= 0
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+.endif
+.if dst_r_bpp != 0
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+.endif
+.if mask_bpp_shift >= 0
+ PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]!
+.endif
+.endif
+.endm
+
+.macro cache_preload_simple
+.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_SIMPLE)
+.if src_bpp > 0
+ pld [SRC, #(PREFETCH_DISTANCE_SIMPLE * src_bpp / 8)]
+.endif
+.if dst_r_bpp > 0
+ pld [DST_R, #(PREFETCH_DISTANCE_SIMPLE * dst_r_bpp / 8)]
+.endif
+.if mask_bpp > 0
+ pld [MASK, #(PREFETCH_DISTANCE_SIMPLE * mask_bpp / 8)]
+.endif
+.endif
+.endm
+
+/*
+ * Macro which is used to process leading pixels until destination
+ * pointer is properly aligned (at 16 bytes boundary). When destination
+ * buffer uses 16bpp format, this is unnecessary, or even pointless.
+ */
+.macro ensure_destination_ptr_alignment process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+.if dst_w_bpp != 24
+ tst DST_R, #0xF
+ beq 2f
+
+.irp lowbit, 1, 2, 4, 8, 16
+local skip1
+.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
+.if lowbit < 16 /* we don't need more than 16-byte alignment */
+ tst DST_R, #lowbit
+ beq 1f
+.endif
+ pixld (lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC
+ pixld (lowbit * 8 / dst_w_bpp), mask_bpp, mask_basereg, MASK
+.if dst_r_bpp > 0
+ pixld_a (lowbit * 8 / dst_r_bpp), dst_r_bpp, dst_r_basereg, DST_R
+.else
+ add DST_R, DST_R, #lowbit
+.endif
+ PF add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp)
+ sub W, W, #(lowbit * 8 / dst_w_bpp)
+1:
+.endif
+.endr
+ pixdeinterleave src_bpp, src_basereg
+ pixdeinterleave mask_bpp, mask_basereg
+ pixdeinterleave dst_r_bpp, dst_r_basereg
+
+ process_pixblock_head
+ cache_preload 0, pixblock_size
+ cache_preload_simple
+ process_pixblock_tail
+
+ pixinterleave dst_w_bpp, dst_w_basereg
+.irp lowbit, 1, 2, 4, 8, 16
+.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
+.if lowbit < 16 /* we don't need more than 16-byte alignment */
+ tst DST_W, #lowbit
+ beq 1f
+.endif
+ pixst_a (lowbit * 8 / dst_w_bpp), dst_w_bpp, dst_w_basereg, DST_W
+1:
+.endif
+.endr
+.endif
+2:
+.endm
+
+/*
+ * Special code for processing up to (pixblock_size - 1) remaining
+ * trailing pixels. As SIMD processing performs operation on
+ * pixblock_size pixels, anything smaller than this has to be loaded
+ * and stored in a special way. Loading and storing of pixel data is
+ * performed in such a way that we fill some 'slots' in the NEON
+ * registers (some slots naturally are unused), then perform compositing
+ * operation as usual. In the end, the data is taken from these 'slots'
+ * and saved to memory.
+ *
+ * cache_preload_flag - allows to suppress prefetch if
+ * set to 0
+ * dst_aligned_flag - selects whether destination buffer
+ * is aligned
+ */
+.macro process_trailing_pixels cache_preload_flag, \
+ dst_aligned_flag, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+ tst W, #(pixblock_size - 1)
+ beq 2f
+.irp chunk_size, 16, 8, 4, 2, 1
+.if pixblock_size > chunk_size
+ tst W, #chunk_size
+ beq 1f
+ pixld chunk_size, src_bpp, src_basereg, SRC
+ pixld chunk_size, mask_bpp, mask_basereg, MASK
+.if dst_aligned_flag != 0
+ pixld_a chunk_size, dst_r_bpp, dst_r_basereg, DST_R
+.else
+ pixld chunk_size, dst_r_bpp, dst_r_basereg, DST_R
+.endif
+.if cache_preload_flag != 0
+ PF add PF_X, PF_X, #chunk_size
+.endif
+1:
+.endif
+.endr
+ pixdeinterleave src_bpp, src_basereg
+ pixdeinterleave mask_bpp, mask_basereg
+ pixdeinterleave dst_r_bpp, dst_r_basereg
+
+ process_pixblock_head
+.if cache_preload_flag != 0
+ cache_preload 0, pixblock_size
+ cache_preload_simple
+.endif
+ process_pixblock_tail
+ pixinterleave dst_w_bpp, dst_w_basereg
+.irp chunk_size, 16, 8, 4, 2, 1
+.if pixblock_size > chunk_size
+ tst W, #chunk_size
+ beq 1f
+.if dst_aligned_flag != 0
+ pixst_a chunk_size, dst_w_bpp, dst_w_basereg, DST_W
+.else
+ pixst chunk_size, dst_w_bpp, dst_w_basereg, DST_W
+.endif
+1:
+.endif
+.endr
+2:
+.endm
+
+/*
+ * Macro, which performs all the needed operations to switch to the next
+ * scanline and start the next loop iteration unless all the scanlines
+ * are already processed.
+ */
+.macro advance_to_next_scanline start_of_loop_label
+.if regs_shortage
+ ldrd W, [sp] /* load W and H (width and height) from stack */
+.else
+ mov W, ORIG_W
+.endif
+ add DST_W, DST_W, DST_STRIDE, lsl #dst_bpp_shift
+.if src_bpp != 0
+ add SRC, SRC, SRC_STRIDE, lsl #src_bpp_shift
+.endif
+.if mask_bpp != 0
+ add MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift
+.endif
+.if (dst_w_bpp != 24)
+ sub DST_W, DST_W, W, lsl #dst_bpp_shift
+.endif
+.if (src_bpp != 24) && (src_bpp != 0)
+ sub SRC, SRC, W, lsl #src_bpp_shift
+.endif
+.if (mask_bpp != 24) && (mask_bpp != 0)
+ sub MASK, MASK, W, lsl #mask_bpp_shift
+.endif
+ subs H, H, #1
+ mov DST_R, DST_W
+.if regs_shortage
+ str H, [sp, #4] /* save updated height to stack */
+.endif
+ bge start_of_loop_label
+.endm
+
+/*
+ * Registers are allocated in the following way by default:
+ * d0, d1, d2, d3 - reserved for loading source pixel data
+ * d4, d5, d6, d7 - reserved for loading destination pixel data
+ * d24, d25, d26, d27 - reserved for loading mask pixel data
+ * d28, d29, d30, d31 - final destination pixel data for writeback to memory
+ */
+.macro generate_composite_function fname, \
+ src_bpp_, \
+ mask_bpp_, \
+ dst_w_bpp_, \
+ flags, \
+ pixblock_size_, \
+ prefetch_distance, \
+ init, \
+ cleanup, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head, \
+ dst_w_basereg_ = 28, \
+ dst_r_basereg_ = 4, \
+ src_basereg_ = 0, \
+ mask_basereg_ = 24
+
+ .func fname
+ .global fname
+ /* For ELF format also set function visibility to hidden */
+#ifdef __ELF__
+ .hidden fname
+ .type fname, %function
+#endif
+fname:
+ push {r4-r12, lr} /* save all registers */
+
+/*
+ * Select prefetch type for this function. If prefetch distance is
+ * set to 0 or one of the color formats is 24bpp, SIMPLE prefetch
+ * has to be used instead of ADVANCED.
+ */
+ .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT
+.if prefetch_distance == 0
+ .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
+.elseif (PREFETCH_TYPE_CURRENT > PREFETCH_TYPE_SIMPLE) && \
+ ((src_bpp_ == 24) || (mask_bpp_ == 24) || (dst_w_bpp_ == 24))
+ .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_SIMPLE
+.endif
+
+/*
+ * Make some macro arguments globally visible and accessible
+ * from other macros
+ */
+ .set src_bpp, src_bpp_
+ .set mask_bpp, mask_bpp_
+ .set dst_w_bpp, dst_w_bpp_
+ .set pixblock_size, pixblock_size_
+ .set dst_w_basereg, dst_w_basereg_
+ .set dst_r_basereg, dst_r_basereg_
+ .set src_basereg, src_basereg_
+ .set mask_basereg, mask_basereg_
+
+/*
+ * Assign symbolic names to registers
+ */
+ W .req r0 /* width (is updated during processing) */
+ H .req r1 /* height (is updated during processing) */
+ DST_W .req r2 /* destination buffer pointer for writes */
+ DST_STRIDE .req r3 /* destination image stride */
+ SRC .req r4 /* source buffer pointer */
+ SRC_STRIDE .req r5 /* source image stride */
+ DST_R .req r6 /* destination buffer pointer for reads */
+
+ MASK .req r7 /* mask pointer */
+ MASK_STRIDE .req r8 /* mask stride */
+
+ PF_CTL .req r9 /* combined lines counter and prefetch */
+ /* distance increment counter */
+ PF_X .req r10 /* pixel index in a scanline for current */
+ /* pretetch position */
+ PF_SRC .req r11 /* pointer to source scanline start */
+ /* for prefetch purposes */
+ PF_DST .req r12 /* pointer to destination scanline start */
+ /* for prefetch purposes */
+ PF_MASK .req r14 /* pointer to mask scanline start */
+ /* for prefetch purposes */
+/*
+ * Check whether we have enough registers for all the local variables.
+ * If we don't have enough registers, original width and height are
+ * kept on top of stack (and 'regs_shortage' variable is set to indicate
+ * this for the rest of code). Even if there are enough registers, the
+ * allocation scheme may be a bit different depending on whether source
+ * or mask is not used.
+ */
+.if (PREFETCH_TYPE_CURRENT < PREFETCH_TYPE_ADVANCED)
+ ORIG_W .req r10 /* saved original width */
+ DUMMY .req r12 /* temporary register */
+ .set regs_shortage, 0
+.elseif mask_bpp == 0
+ ORIG_W .req r7 /* saved original width */
+ DUMMY .req r8 /* temporary register */
+ .set regs_shortage, 0
+.elseif src_bpp == 0
+ ORIG_W .req r4 /* saved original width */
+ DUMMY .req r5 /* temporary register */
+ .set regs_shortage, 0
+.else
+ ORIG_W .req r1 /* saved original width */
+ DUMMY .req r1 /* temporary register */
+ .set regs_shortage, 1
+.endif
+
+ .set mask_bpp_shift, -1
+.if src_bpp == 32
+ .set src_bpp_shift, 2
+.elseif src_bpp == 24
+ .set src_bpp_shift, 0
+.elseif src_bpp == 16
+ .set src_bpp_shift, 1
+.elseif src_bpp == 8
+ .set src_bpp_shift, 0
+.elseif src_bpp == 0
+ .set src_bpp_shift, -1
+.else
+ .error "requested src bpp (src_bpp) is not supported"
+.endif
+.if mask_bpp == 32
+ .set mask_bpp_shift, 2
+.elseif mask_bpp == 24
+ .set mask_bpp_shift, 0
+.elseif mask_bpp == 8
+ .set mask_bpp_shift, 0
+.elseif mask_bpp == 0
+ .set mask_bpp_shift, -1
+.else
+ .error "requested mask bpp (mask_bpp) is not supported"
+.endif
+.if dst_w_bpp == 32
+ .set dst_bpp_shift, 2
+.elseif dst_w_bpp == 24
+ .set dst_bpp_shift, 0
+.elseif dst_w_bpp == 16
+ .set dst_bpp_shift, 1
+.elseif dst_w_bpp == 8
+ .set dst_bpp_shift, 0
+.else
+ .error "requested dst bpp (dst_w_bpp) is not supported"
+.endif
+
+.if (((flags) & FLAG_DST_READWRITE) != 0)
+ .set dst_r_bpp, dst_w_bpp
+.else
+ .set dst_r_bpp, 0
+.endif
+.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
+ .set DEINTERLEAVE_32BPP_ENABLED, 1
+.else
+ .set DEINTERLEAVE_32BPP_ENABLED, 0
+.endif
+
+.if prefetch_distance < 0 || prefetch_distance > 15
+ .error "invalid prefetch distance (prefetch_distance)"
+.endif
+
+.if src_bpp > 0
+ ldr SRC, [sp, #40]
+.endif
+.if mask_bpp > 0
+ ldr MASK, [sp, #48]
+.endif
+ PF mov PF_X, #0
+.if src_bpp > 0
+ ldr SRC_STRIDE, [sp, #44]
+.endif
+.if mask_bpp > 0
+ ldr MASK_STRIDE, [sp, #52]
+.endif
+ mov DST_R, DST_W
+
+.if src_bpp == 24
+ sub SRC_STRIDE, SRC_STRIDE, W
+ sub SRC_STRIDE, SRC_STRIDE, W, lsl #1
+.endif
+.if mask_bpp == 24
+ sub MASK_STRIDE, MASK_STRIDE, W
+ sub MASK_STRIDE, MASK_STRIDE, W, lsl #1
+.endif
+.if dst_w_bpp == 24
+ sub DST_STRIDE, DST_STRIDE, W
+ sub DST_STRIDE, DST_STRIDE, W, lsl #1
+.endif
+
+/*
+ * Setup advanced prefetcher initial state
+ */
+ PF mov PF_SRC, SRC
+ PF mov PF_DST, DST_R
+ PF mov PF_MASK, MASK
+ /* PF_CTL = prefetch_distance | ((h - 1) << 4) */
+ PF mov PF_CTL, H, lsl #4
+ PF add PF_CTL, #(prefetch_distance - 0x10)
+
+ init
+.if regs_shortage
+ push {r0, r1}
+.endif
+ subs H, H, #1
+.if regs_shortage
+ str H, [sp, #4] /* save updated height to stack */
+.else
+ mov ORIG_W, W
+.endif
+ blt 9f
+ cmp W, #(pixblock_size * 2)
+ blt 8f
+/*
+ * This is the start of the pipelined loop, which if optimized for
+ * long scanlines
+ */
+0:
+ ensure_destination_ptr_alignment process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+
+ /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
+ pixld_a pixblock_size, dst_r_bpp, \
+ (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
+ pixld pixblock_size, src_bpp, \
+ (src_basereg - pixblock_size * src_bpp / 64), SRC
+ pixld pixblock_size, mask_bpp, \
+ (mask_basereg - pixblock_size * mask_bpp / 64), MASK
+ PF add PF_X, PF_X, #pixblock_size
+ process_pixblock_head
+ cache_preload 0, pixblock_size
+ cache_preload_simple
+ subs W, W, #(pixblock_size * 2)
+ blt 2f
+1:
+ process_pixblock_tail_head
+ cache_preload_simple
+ subs W, W, #pixblock_size
+ bge 1b
+2:
+ process_pixblock_tail
+ pixst_a pixblock_size, dst_w_bpp, \
+ (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
+
+ /* Process the remaining trailing pixels in the scanline */
+ process_trailing_pixels 1, 1, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+ advance_to_next_scanline 0b
+
+.if regs_shortage
+ pop {r0, r1}
+.endif
+ cleanup
+ pop {r4-r12, pc} /* exit */
+/*
+ * This is the start of the loop, designed to process images with small width
+ * (less than pixblock_size * 2 pixels). In this case neither pipelining
+ * nor prefetch are used.
+ */
+8:
+ /* Process exactly pixblock_size pixels if needed */
+ tst W, #pixblock_size
+ beq 1f
+ pixld pixblock_size, dst_r_bpp, \
+ (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
+ pixld pixblock_size, src_bpp, \
+ (src_basereg - pixblock_size * src_bpp / 64), SRC
+ pixld pixblock_size, mask_bpp, \
+ (mask_basereg - pixblock_size * mask_bpp / 64), MASK
+ process_pixblock_head
+ process_pixblock_tail
+ pixst pixblock_size, dst_w_bpp, \
+ (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
+1:
+ /* Process the remaining trailing pixels in the scanline */
+ process_trailing_pixels 0, 0, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+ advance_to_next_scanline 8b
+9:
+.if regs_shortage
+ pop {r0, r1}
+.endif
+ cleanup
+ pop {r4-r12, pc} /* exit */
+
+ .unreq SRC
+ .unreq MASK
+ .unreq DST_R
+ .unreq DST_W
+ .unreq ORIG_W
+ .unreq W
+ .unreq H
+ .unreq SRC_STRIDE
+ .unreq DST_STRIDE
+ .unreq MASK_STRIDE
+ .unreq PF_CTL
+ .unreq PF_X
+ .unreq PF_SRC
+ .unreq PF_DST
+ .unreq PF_MASK
+ .unreq DUMMY
+ .endfunc
+.endm
+
+/*
+ * A simplified variant of function generation template for a single
+ * scanline processing (for implementing pixman combine functions)
+ */
+.macro generate_composite_function_single_scanline fname, \
+ src_bpp_, \
+ mask_bpp_, \
+ dst_w_bpp_, \
+ flags, \
+ pixblock_size_, \
+ init, \
+ cleanup, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head, \
+ dst_w_basereg_ = 28, \
+ dst_r_basereg_ = 4, \
+ src_basereg_ = 0, \
+ mask_basereg_ = 24
+
+ .func fname
+ .global fname
+ /* For ELF format also set function visibility to hidden */
+#ifdef __ELF__
+ .hidden fname
+ .type fname, %function
+#endif
+fname:
+ .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
+/*
+ * Make some macro arguments globally visible and accessible
+ * from other macros
+ */
+ .set src_bpp, src_bpp_
+ .set mask_bpp, mask_bpp_
+ .set dst_w_bpp, dst_w_bpp_
+ .set pixblock_size, pixblock_size_
+ .set dst_w_basereg, dst_w_basereg_
+ .set dst_r_basereg, dst_r_basereg_
+ .set src_basereg, src_basereg_
+ .set mask_basereg, mask_basereg_
+/*
+ * Assign symbolic names to registers
+ */
+ W .req r0 /* width (is updated during processing) */
+ DST_W .req r1 /* destination buffer pointer for writes */
+ SRC .req r2 /* source buffer pointer */
+ DST_R .req ip /* destination buffer pointer for reads */
+ MASK .req r3 /* mask pointer */
+
+.if (((flags) & FLAG_DST_READWRITE) != 0)
+ .set dst_r_bpp, dst_w_bpp
+.else
+ .set dst_r_bpp, 0
+.endif
+.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
+ .set DEINTERLEAVE_32BPP_ENABLED, 1
+.else
+ .set DEINTERLEAVE_32BPP_ENABLED, 0
+.endif
+
+ init
+ mov DST_R, DST_W
+
+ cmp W, #pixblock_size
+ blt 8f
+
+ ensure_destination_ptr_alignment process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+
+ subs W, W, #pixblock_size
+ blt 7f
+
+ /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
+ pixld_a pixblock_size, dst_r_bpp, \
+ (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
+ pixld pixblock_size, src_bpp, \
+ (src_basereg - pixblock_size * src_bpp / 64), SRC
+ pixld pixblock_size, mask_bpp, \
+ (mask_basereg - pixblock_size * mask_bpp / 64), MASK
+ process_pixblock_head
+ subs W, W, #pixblock_size
+ blt 2f
+1:
+ process_pixblock_tail_head
+ subs W, W, #pixblock_size
+ bge 1b
+2:
+ process_pixblock_tail
+ pixst_a pixblock_size, dst_w_bpp, \
+ (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
+7:
+ /* Process the remaining trailing pixels in the scanline (dst aligned) */
+ process_trailing_pixels 0, 1, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+
+ cleanup
+ bx lr /* exit */
+8:
+ /* Process the remaining trailing pixels in the scanline (dst unaligned) */
+ process_trailing_pixels 0, 0, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+
+ cleanup
+ bx lr /* exit */
+
+ .unreq SRC
+ .unreq MASK
+ .unreq DST_R
+ .unreq DST_W
+ .unreq W
+ .endfunc
+.endm
+
+.macro default_init
+.endm
+
+.macro default_cleanup
+.endm
diff --git a/lib/pixman/pixman/pixman-arm-neon.c b/lib/pixman/pixman/pixman-arm-neon.c
index 8a2d72ea3..6808b3658 100644
--- a/lib/pixman/pixman/pixman-arm-neon.c
+++ b/lib/pixman/pixman/pixman-arm-neon.c
@@ -30,1670 +30,85 @@
#include <config.h>
#endif
-#include <arm_neon.h>
#include <string.h>
#include "pixman-private.h"
-
-/* Deal with an intrinsic that is defined differently in GCC */
-#if !defined(__ARMCC_VERSION) && !defined(__pld)
-#define __pld(_x) __builtin_prefetch (_x)
-#endif
-
-static force_inline uint8x8x4_t
-unpack0565 (uint16x8_t rgb)
-{
- uint16x8_t gb, b;
- uint8x8x4_t res;
-
- res.val[3] = vdup_n_u8 (0);
- gb = vshrq_n_u16 (rgb, 5);
- b = vshrq_n_u16 (rgb, 5 + 6);
-
- res.val[0] = vmovn_u16 (rgb); /* get low 5 bits */
- res.val[1] = vmovn_u16 (gb); /* get mid 6 bits */
- res.val[2] = vmovn_u16 (b); /* get top 5 bits */
-
- res.val[0] = vshl_n_u8 (res.val[0], 3); /* shift to top */
- res.val[1] = vshl_n_u8 (res.val[1], 2); /* shift to top */
- res.val[2] = vshl_n_u8 (res.val[2], 3); /* shift to top */
-
- res.val[0] = vsri_n_u8 (res.val[0], res.val[0], 5);
- res.val[1] = vsri_n_u8 (res.val[1], res.val[1], 6);
- res.val[2] = vsri_n_u8 (res.val[2], res.val[2], 5);
-
- return res;
-}
-
-#ifdef USE_GCC_INLINE_ASM
-/* Some versions of gcc have problems with vshll_n_u8 intrinsic (Bug 23576) */
-#define vshll_n_u8(a, n) ({ uint16x8_t r; \
- asm ("vshll.u8 %q0, %P1, %2\n" : "=w" (r) : "w" (a), "i" (n)); r; })
-#endif
-
-static force_inline uint16x8_t
-pack0565 (uint8x8x4_t s)
-{
- uint16x8_t rgb, val_g, val_r;
-
- rgb = vshll_n_u8 (s.val[2], 8);
- val_g = vshll_n_u8 (s.val[1], 8);
- val_r = vshll_n_u8 (s.val[0], 8);
- rgb = vsriq_n_u16 (rgb, val_g, 5);
- rgb = vsriq_n_u16 (rgb, val_r, 5 + 6);
-
- return rgb;
-}
-
-static force_inline uint8x8_t
-neon2mul (uint8x8_t x,
- uint8x8_t alpha)
-{
- uint16x8_t tmp, tmp2;
- uint8x8_t res;
-
- tmp = vmull_u8 (x, alpha);
- tmp2 = vrshrq_n_u16 (tmp, 8);
- res = vraddhn_u16 (tmp, tmp2);
-
- return res;
-}
-
-static force_inline uint8x8x4_t
-neon8mul (uint8x8x4_t x,
- uint8x8_t alpha)
-{
- uint16x8x4_t tmp;
- uint8x8x4_t res;
- uint16x8_t qtmp1, qtmp2;
-
- tmp.val[0] = vmull_u8 (x.val[0], alpha);
- tmp.val[1] = vmull_u8 (x.val[1], alpha);
- tmp.val[2] = vmull_u8 (x.val[2], alpha);
- tmp.val[3] = vmull_u8 (x.val[3], alpha);
-
- qtmp1 = vrshrq_n_u16 (tmp.val[0], 8);
- qtmp2 = vrshrq_n_u16 (tmp.val[1], 8);
- res.val[0] = vraddhn_u16 (tmp.val[0], qtmp1);
- qtmp1 = vrshrq_n_u16 (tmp.val[2], 8);
- res.val[1] = vraddhn_u16 (tmp.val[1], qtmp2);
- qtmp2 = vrshrq_n_u16 (tmp.val[3], 8);
- res.val[2] = vraddhn_u16 (tmp.val[2], qtmp1);
- res.val[3] = vraddhn_u16 (tmp.val[3], qtmp2);
-
- return res;
-}
-
-static force_inline uint8x8x4_t
-neon8qadd (uint8x8x4_t x,
- uint8x8x4_t y)
-{
- uint8x8x4_t res;
-
- res.val[0] = vqadd_u8 (x.val[0], y.val[0]);
- res.val[1] = vqadd_u8 (x.val[1], y.val[1]);
- res.val[2] = vqadd_u8 (x.val[2], y.val[2]);
- res.val[3] = vqadd_u8 (x.val[3], y.val[3]);
-
- return res;
-}
-
-static void
-neon_composite_add_8000_8000 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *src_line, *src;
- int dst_stride, src_stride;
- uint16_t w;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
- if (width >= 8)
- {
- /* Use overlapping 8-pixel method */
- while (height--)
- {
- uint8_t *keep_dst = 0;
- uint8x8_t sval, dval, temp;
-
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
-#ifndef USE_GCC_INLINE_ASM
- sval = vld1_u8 ((void *)src);
- dval = vld1_u8 ((void *)dst);
- keep_dst = dst;
-
- temp = vqadd_u8 (dval, sval);
-
- src += (w & 7);
- dst += (w & 7);
- w -= (w & 7);
-
- while (w)
- {
- sval = vld1_u8 ((void *)src);
- dval = vld1_u8 ((void *)dst);
-
- vst1_u8 ((void *)keep_dst, temp);
- keep_dst = dst;
-
- temp = vqadd_u8 (dval, sval);
-
- src += 8;
- dst += 8;
- w -= 8;
- }
-
- vst1_u8 ((void *)keep_dst, temp);
-#else
- asm volatile (
-/* avoid using d8-d15 (q4-q7) aapcs callee-save registers */
- "vld1.8 {d0}, [%[src]]\n\t"
- "vld1.8 {d4}, [%[dst]]\n\t"
- "mov %[keep_dst], %[dst]\n\t"
-
- "and ip, %[w], #7\n\t"
- "add %[src], %[src], ip\n\t"
- "add %[dst], %[dst], ip\n\t"
- "subs %[w], %[w], ip\n\t"
- "b 9f\n\t"
-/* LOOP */
- "2:\n\t"
- "vld1.8 {d0}, [%[src]]!\n\t"
- "vld1.8 {d4}, [%[dst]]!\n\t"
- "vst1.8 {d20}, [%[keep_dst]]\n\t"
- "sub %[keep_dst], %[dst], #8\n\t"
- "subs %[w], %[w], #8\n\t"
- "9:\n\t"
- "vqadd.u8 d20, d0, d4\n\t"
-
- "bne 2b\n\t"
-
- "1:\n\t"
- "vst1.8 {d20}, [%[keep_dst]]\n\t"
-
- : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "=r" (keep_dst)
- :
- : "ip", "cc", "memory", "d0", "d4",
- "d20"
- );
-#endif
- }
- }
- else
- {
- const uint8_t nil = 0;
- const uint8x8_t vnil = vld1_dup_u8 (&nil);
-
- while (height--)
- {
- uint8x8_t sval = vnil, dval = vnil;
- uint8_t *dst4 = 0, *dst2 = 0;
-
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- if (w & 4)
- {
- sval = vreinterpret_u8_u32 (
- vld1_lane_u32 ((void *)src, vreinterpret_u32_u8 (sval), 1));
- dval = vreinterpret_u8_u32 (
- vld1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (dval), 1));
-
- dst4 = dst;
- src += 4;
- dst += 4;
- }
-
- if (w & 2)
- {
- sval = vreinterpret_u8_u16 (
- vld1_lane_u16 ((void *)src, vreinterpret_u16_u8 (sval), 1));
- dval = vreinterpret_u8_u16 (
- vld1_lane_u16 ((void *)dst, vreinterpret_u16_u8 (dval), 1));
-
- dst2 = dst;
- src += 2;
- dst += 2;
- }
-
- if (w & 1)
- {
- sval = vld1_lane_u8 (src, sval, 1);
- dval = vld1_lane_u8 (dst, dval, 1);
- }
-
- dval = vqadd_u8 (dval, sval);
-
- if (w & 1)
- vst1_lane_u8 (dst, dval, 1);
-
- if (w & 2)
- vst1_lane_u16 ((void *)dst2, vreinterpret_u16_u8 (dval), 1);
-
- if (w & 4)
- vst1_lane_u32 ((void *)dst4, vreinterpret_u32_u8 (dval), 1);
- }
- }
-}
-
-static void
-neon_composite_over_8888_8888 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
- uint32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- if (width >= 8)
- {
- /* Use overlapping 8-pixel method */
- while (height--)
- {
- uint32_t *keep_dst = 0;
- uint8x8x4_t sval, dval, temp;
-
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
-#ifndef USE_GCC_INLINE_ASM
- sval = vld4_u8 ((void *)src);
- dval = vld4_u8 ((void *)dst);
- keep_dst = dst;
-
- temp = neon8mul (dval, vmvn_u8 (sval.val[3]));
- temp = neon8qadd (sval, temp);
-
- src += (w & 7);
- dst += (w & 7);
- w -= (w & 7);
-
- while (w)
- {
- sval = vld4_u8 ((void *)src);
- dval = vld4_u8 ((void *)dst);
-
- vst4_u8 ((void *)keep_dst, temp);
- keep_dst = dst;
-
- temp = neon8mul (dval, vmvn_u8 (sval.val[3]));
- temp = neon8qadd (sval, temp);
-
- src += 8;
- dst += 8;
- w -= 8;
- }
-
- vst4_u8 ((void *)keep_dst, temp);
-#else
- asm volatile (
-/* avoid using d8-d15 (q4-q7) aapcs callee-save registers */
- "vld4.8 {d0-d3}, [%[src]]\n\t"
- "vld4.8 {d4-d7}, [%[dst]]\n\t"
- "mov %[keep_dst], %[dst]\n\t"
-
- "and ip, %[w], #7\n\t"
- "add %[src], %[src], ip, LSL#2\n\t"
- "add %[dst], %[dst], ip, LSL#2\n\t"
- "subs %[w], %[w], ip\n\t"
- "b 9f\n\t"
-/* LOOP */
- "2:\n\t"
- "vld4.8 {d0-d3}, [%[src]]!\n\t"
- "vld4.8 {d4-d7}, [%[dst]]!\n\t"
- "vst4.8 {d20-d23}, [%[keep_dst]]\n\t"
- "sub %[keep_dst], %[dst], #8*4\n\t"
- "subs %[w], %[w], #8\n\t"
- "9:\n\t"
- "vmvn.8 d31, d3\n\t"
- "vmull.u8 q10, d31, d4\n\t"
- "vmull.u8 q11, d31, d5\n\t"
- "vmull.u8 q12, d31, d6\n\t"
- "vmull.u8 q13, d31, d7\n\t"
- "vrshr.u16 q8, q10, #8\n\t"
- "vrshr.u16 q9, q11, #8\n\t"
- "vraddhn.u16 d20, q10, q8\n\t"
- "vraddhn.u16 d21, q11, q9\n\t"
- "vrshr.u16 q8, q12, #8\n\t"
- "vrshr.u16 q9, q13, #8\n\t"
- "vraddhn.u16 d22, q12, q8\n\t"
- "vraddhn.u16 d23, q13, q9\n\t"
-/* result in d20-d23 */
- "vqadd.u8 d20, d0, d20\n\t"
- "vqadd.u8 d21, d1, d21\n\t"
- "vqadd.u8 d22, d2, d22\n\t"
- "vqadd.u8 d23, d3, d23\n\t"
-
- "bne 2b\n\t"
-
- "1:\n\t"
- "vst4.8 {d20-d23}, [%[keep_dst]]\n\t"
-
- : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "=r" (keep_dst)
- :
- : "ip", "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
- "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23"
- );
-#endif
- }
- }
- else
- {
- uint8x8_t alpha_selector = vreinterpret_u8_u64 (
- vcreate_u64 (0x0707070703030303ULL));
-
- /* Handle width < 8 */
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w >= 2)
- {
- uint8x8_t sval, dval;
-
- /* two 32-bit pixels packed into D-reg; ad-hoc vectorization */
- sval = vreinterpret_u8_u32 (vld1_u32 ((void *)src));
- dval = vreinterpret_u8_u32 (vld1_u32 ((void *)dst));
- dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector));
- vst1_u8 ((void *)dst, vqadd_u8 (sval, dval));
-
- src += 2;
- dst += 2;
- w -= 2;
- }
-
- if (w)
- {
- uint8x8_t sval, dval;
-
- /* single 32-bit pixel in lane 0 */
- sval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)src)); /* only interested in lane 0 */
- dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)dst)); /* only interested in lane 0 */
- dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector));
- vst1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0);
- }
- }
- }
-}
-
-static void
-neon_composite_over_8888_n_8888 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- uint32_t mask;
- int dst_stride, src_stride;
- uint32_t w;
- uint8x8_t mask_alpha;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
- mask_alpha = vdup_n_u8 ((mask) >> 24);
-
- if (width >= 8)
- {
- /* Use overlapping 8-pixel method */
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- uint32_t *keep_dst = 0;
-
-#ifndef USE_GCC_INLINE_ASM
- uint8x8x4_t sval, dval, temp;
-
- sval = vld4_u8 ((void *)src);
- dval = vld4_u8 ((void *)dst);
- keep_dst = dst;
-
- sval = neon8mul (sval, mask_alpha);
- temp = neon8mul (dval, vmvn_u8 (sval.val[3]));
- temp = neon8qadd (sval, temp);
-
- src += (w & 7);
- dst += (w & 7);
- w -= (w & 7);
-
- while (w)
- {
- sval = vld4_u8 ((void *)src);
- dval = vld4_u8 ((void *)dst);
-
- vst4_u8 ((void *)keep_dst, temp);
- keep_dst = dst;
-
- sval = neon8mul (sval, mask_alpha);
- temp = neon8mul (dval, vmvn_u8 (sval.val[3]));
- temp = neon8qadd (sval, temp);
-
- src += 8;
- dst += 8;
- w -= 8;
- }
- vst4_u8 ((void *)keep_dst, temp);
-#else
- asm volatile (
-/* avoid using d8-d15 (q4-q7) aapcs callee-save registers */
- "vdup.32 d30, %[mask]\n\t"
- "vdup.8 d30, d30[3]\n\t"
-
- "vld4.8 {d0-d3}, [%[src]]\n\t"
- "vld4.8 {d4-d7}, [%[dst]]\n\t"
- "mov %[keep_dst], %[dst]\n\t"
-
- "and ip, %[w], #7\n\t"
- "add %[src], %[src], ip, LSL#2\n\t"
- "add %[dst], %[dst], ip, LSL#2\n\t"
- "subs %[w], %[w], ip\n\t"
- "b 9f\n\t"
-/* LOOP */
- "2:\n\t"
- "vld4.8 {d0-d3}, [%[src]]!\n\t"
- "vld4.8 {d4-d7}, [%[dst]]!\n\t"
- "vst4.8 {d20-d23}, [%[keep_dst]]\n\t"
- "sub %[keep_dst], %[dst], #8*4\n\t"
- "subs %[w], %[w], #8\n\t"
-
- "9:\n\t"
- "vmull.u8 q10, d30, d0\n\t"
- "vmull.u8 q11, d30, d1\n\t"
- "vmull.u8 q12, d30, d2\n\t"
- "vmull.u8 q13, d30, d3\n\t"
- "vrshr.u16 q8, q10, #8\n\t"
- "vrshr.u16 q9, q11, #8\n\t"
- "vraddhn.u16 d0, q10, q8\n\t"
- "vraddhn.u16 d1, q11, q9\n\t"
- "vrshr.u16 q9, q13, #8\n\t"
- "vrshr.u16 q8, q12, #8\n\t"
- "vraddhn.u16 d3, q13, q9\n\t"
- "vraddhn.u16 d2, q12, q8\n\t"
-
- "vmvn.8 d31, d3\n\t"
- "vmull.u8 q10, d31, d4\n\t"
- "vmull.u8 q11, d31, d5\n\t"
- "vmull.u8 q12, d31, d6\n\t"
- "vmull.u8 q13, d31, d7\n\t"
- "vrshr.u16 q8, q10, #8\n\t"
- "vrshr.u16 q9, q11, #8\n\t"
- "vraddhn.u16 d20, q10, q8\n\t"
- "vrshr.u16 q8, q12, #8\n\t"
- "vraddhn.u16 d21, q11, q9\n\t"
- "vrshr.u16 q9, q13, #8\n\t"
- "vraddhn.u16 d22, q12, q8\n\t"
- "vraddhn.u16 d23, q13, q9\n\t"
-
-/* result in d20-d23 */
- "vqadd.u8 d20, d0, d20\n\t"
- "vqadd.u8 d21, d1, d21\n\t"
- "vqadd.u8 d22, d2, d22\n\t"
- "vqadd.u8 d23, d3, d23\n\t"
-
- "bne 2b\n\t"
-
- "1:\n\t"
- "vst4.8 {d20-d23}, [%[keep_dst]]\n\t"
-
- : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "=r" (keep_dst)
- : [mask] "r" (mask)
- : "ip", "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
- "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27",
- "d30", "d31"
- );
-#endif
- }
- }
- else
- {
- uint8x8_t alpha_selector = vreinterpret_u8_u64 (vcreate_u64 (0x0707070703030303ULL));
-
- /* Handle width < 8 */
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w >= 2)
- {
- uint8x8_t sval, dval;
-
- sval = vreinterpret_u8_u32 (vld1_u32 ((void *)src));
- dval = vreinterpret_u8_u32 (vld1_u32 ((void *)dst));
-
- /* sval * const alpha_mul */
- sval = neon2mul (sval, mask_alpha);
-
- /* dval * 255-(src alpha) */
- dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector));
-
- vst1_u8 ((void *)dst, vqadd_u8 (sval, dval));
-
- src += 2;
- dst += 2;
- w -= 2;
- }
-
- if (w)
- {
- uint8x8_t sval, dval;
-
- sval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)src));
- dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)dst));
-
- /* sval * const alpha_mul */
- sval = neon2mul (sval, mask_alpha);
-
- /* dval * 255-(src alpha) */
- dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector));
-
- vst1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0);
- }
- }
- }
-}
-
-static void
-neon_composite_over_n_8_0565 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint16_t *dst_line, *dst;
- uint8_t *mask_line, *mask;
- int dst_stride, mask_stride;
- uint32_t w;
- uint8x8_t sval2;
- uint8x8x4_t sval8;
-
- src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- sval2=vreinterpret_u8_u32 (vdup_n_u32 (src));
- sval8.val[0]=vdup_lane_u8 (sval2,0);
- sval8.val[1]=vdup_lane_u8 (sval2,1);
- sval8.val[2]=vdup_lane_u8 (sval2,2);
- sval8.val[3]=vdup_lane_u8 (sval2,3);
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- if (width>=8)
- {
- /* Use overlapping 8-pixel method, modified to avoid rewritten dest being reused */
- while (height--)
- {
- uint16_t *keep_dst=0;
-
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
-#ifndef USE_GCC_INLINE_ASM
- uint8x8_t alpha;
- uint16x8_t dval, temp;
- uint8x8x4_t sval8temp;
-
- alpha = vld1_u8 ((void *)mask);
- dval = vld1q_u16 ((void *)dst);
- keep_dst = dst;
-
- sval8temp = neon8mul (sval8, alpha);
- temp = pack0565 (neon8qadd (sval8temp, neon8mul (unpack0565 (dval), vmvn_u8 (sval8temp.val[3]))));
-
- mask += (w & 7);
- dst += (w & 7);
- w -= (w & 7);
-
- while (w)
- {
- dval = vld1q_u16 ((void *)dst);
- alpha = vld1_u8 ((void *)mask);
-
- vst1q_u16 ((void *)keep_dst, temp);
- keep_dst = dst;
-
- sval8temp = neon8mul (sval8, alpha);
- temp = pack0565 (neon8qadd (sval8temp, neon8mul (unpack0565 (dval), vmvn_u8 (sval8temp.val[3]))));
-
- mask+=8;
- dst+=8;
- w-=8;
- }
- vst1q_u16 ((void *)keep_dst, temp);
-#else
- asm volatile (
- "vdup.32 d0, %[src]\n\t"
- "vdup.8 d1, d0[1]\n\t"
- "vdup.8 d2, d0[2]\n\t"
- "vdup.8 d3, d0[3]\n\t"
- "vdup.8 d0, d0[0]\n\t"
-
- "vld1.8 {q12}, [%[dst]]\n\t"
- "vld1.8 {d31}, [%[mask]]\n\t"
- "mov %[keep_dst], %[dst]\n\t"
-
- "and ip, %[w], #7\n\t"
- "add %[mask], %[mask], ip\n\t"
- "add %[dst], %[dst], ip, LSL#1\n\t"
- "subs %[w], %[w], ip\n\t"
- "b 9f\n\t"
-/* LOOP */
- "2:\n\t"
-
- "vld1.16 {q12}, [%[dst]]!\n\t"
- "vld1.8 {d31}, [%[mask]]!\n\t"
- "vst1.16 {q10}, [%[keep_dst]]\n\t"
- "sub %[keep_dst], %[dst], #8*2\n\t"
- "subs %[w], %[w], #8\n\t"
- "9:\n\t"
-/* expand 0565 q12 to 8888 {d4-d7} */
- "vmovn.u16 d4, q12\t\n"
- "vshr.u16 q11, q12, #5\t\n"
- "vshr.u16 q10, q12, #6+5\t\n"
- "vmovn.u16 d5, q11\t\n"
- "vmovn.u16 d6, q10\t\n"
- "vshl.u8 d4, d4, #3\t\n"
- "vshl.u8 d5, d5, #2\t\n"
- "vshl.u8 d6, d6, #3\t\n"
- "vsri.u8 d4, d4, #5\t\n"
- "vsri.u8 d5, d5, #6\t\n"
- "vsri.u8 d6, d6, #5\t\n"
-
- "vmull.u8 q10, d31, d0\n\t"
- "vmull.u8 q11, d31, d1\n\t"
- "vmull.u8 q12, d31, d2\n\t"
- "vmull.u8 q13, d31, d3\n\t"
- "vrshr.u16 q8, q10, #8\n\t"
- "vrshr.u16 q9, q11, #8\n\t"
- "vraddhn.u16 d20, q10, q8\n\t"
- "vraddhn.u16 d21, q11, q9\n\t"
- "vrshr.u16 q9, q13, #8\n\t"
- "vrshr.u16 q8, q12, #8\n\t"
- "vraddhn.u16 d23, q13, q9\n\t"
- "vraddhn.u16 d22, q12, q8\n\t"
-
-/* duplicate in 4/2/1 & 8pix vsns */
- "vmvn.8 d30, d23\n\t"
- "vmull.u8 q14, d30, d6\n\t"
- "vmull.u8 q13, d30, d5\n\t"
- "vmull.u8 q12, d30, d4\n\t"
- "vrshr.u16 q8, q14, #8\n\t"
- "vrshr.u16 q9, q13, #8\n\t"
- "vraddhn.u16 d6, q14, q8\n\t"
- "vrshr.u16 q8, q12, #8\n\t"
- "vraddhn.u16 d5, q13, q9\n\t"
- "vqadd.u8 d6, d6, d22\n\t" /* moved up */
- "vraddhn.u16 d4, q12, q8\n\t"
-/* intentionally don't calculate alpha */
-/* result in d4-d6 */
-
-/* "vqadd.u8 d6, d6, d22\n\t" ** moved up */
- "vqadd.u8 d5, d5, d21\n\t"
- "vqadd.u8 d4, d4, d20\n\t"
-
-/* pack 8888 {d20-d23} to 0565 q10 */
- "vshll.u8 q10, d6, #8\n\t"
- "vshll.u8 q3, d5, #8\n\t"
- "vshll.u8 q2, d4, #8\n\t"
- "vsri.u16 q10, q3, #5\t\n"
- "vsri.u16 q10, q2, #11\t\n"
-
- "bne 2b\n\t"
-
- "1:\n\t"
- "vst1.16 {q10}, [%[keep_dst]]\n\t"
-
- : [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [keep_dst] "=r" (keep_dst)
- : [src] "r" (src)
- : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
- "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29",
- "d30","d31"
- );
-#endif
- }
- }
- else
- {
- while (height--)
- {
- void *dst4=0, *dst2=0;
-
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
-
-#if 1 /* #ifndef USE_GCC_INLINE_ASM */
- uint8x8_t alpha;
- uint16x8_t dval, temp;
- uint8x8x4_t sval8temp;
-
- if (w&4)
- {
- alpha = vreinterpret_u8_u32 (vld1_lane_u32 ((void *)mask, vreinterpret_u32_u8 (alpha),1));
- dval = vreinterpretq_u16_u64 (vld1q_lane_u64 ((void *)dst, vreinterpretq_u64_u16 (dval),1));
- dst4=dst;
- mask+=4;
- dst+=4;
- }
- if (w&2)
- {
- alpha = vreinterpret_u8_u16 (vld1_lane_u16 ((void *)mask, vreinterpret_u16_u8 (alpha),1));
- dval = vreinterpretq_u16_u32 (vld1q_lane_u32 ((void *)dst, vreinterpretq_u32_u16 (dval),1));
- dst2=dst;
- mask+=2;
- dst+=2;
- }
- if (w&1)
- {
- alpha = vld1_lane_u8 ((void *)mask, alpha,1);
- dval = vld1q_lane_u16 ((void *)dst, dval,1);
- }
-
- sval8temp = neon8mul (sval8, alpha);
- temp = pack0565 (neon8qadd (sval8temp, neon8mul (unpack0565 (dval), vmvn_u8 (sval8temp.val[3]))));
-
- if (w&1)
- vst1q_lane_u16 ((void *)dst, temp,1);
- if (w&2)
- vst1q_lane_u32 ((void *)dst2, vreinterpretq_u32_u16 (temp),1);
- if (w&4)
- vst1q_lane_u64 ((void *)dst4, vreinterpretq_u64_u16 (temp),1);
-#else
- /* this code has some bug (does not pass blitters-test) */
- asm volatile (
- "vdup.32 d0, %[src]\n\t"
- "vdup.8 d1, d0[1]\n\t"
- "vdup.8 d2, d0[2]\n\t"
- "vdup.8 d3, d0[3]\n\t"
- "vdup.8 d0, d0[0]\n\t"
-
- "tst %[w], #4\t\n"
- "beq skip_load4\t\n"
-
- "vld1.64 {d25}, [%[dst]]\n\t"
- "vld1.32 {d31[1]}, [%[mask]]\n\t"
- "mov %[dst4], %[dst]\t\n"
- "add %[mask], %[mask], #4\t\n"
- "add %[dst], %[dst], #4*2\t\n"
-
- "skip_load4:\t\n"
- "tst %[w], #2\t\n"
- "beq skip_load2\t\n"
- "vld1.32 {d24[1]}, [%[dst]]\n\t"
- "vld1.16 {d31[1]}, [%[mask]]\n\t"
- "mov %[dst2], %[dst]\t\n"
- "add %[mask], %[mask], #2\t\n"
- "add %[dst], %[dst], #2*2\t\n"
-
- "skip_load2:\t\n"
- "tst %[w], #1\t\n"
- "beq skip_load1\t\n"
- "vld1.16 {d24[1]}, [%[dst]]\n\t"
- "vld1.8 {d31[1]}, [%[mask]]\n\t"
-
- "skip_load1:\t\n"
-/* expand 0565 q12 to 8888 {d4-d7} */
- "vmovn.u16 d4, q12\t\n"
- "vshr.u16 q11, q12, #5\t\n"
- "vshr.u16 q10, q12, #6+5\t\n"
- "vmovn.u16 d5, q11\t\n"
- "vmovn.u16 d6, q10\t\n"
- "vshl.u8 d4, d4, #3\t\n"
- "vshl.u8 d5, d5, #2\t\n"
- "vshl.u8 d6, d6, #3\t\n"
- "vsri.u8 d4, d4, #5\t\n"
- "vsri.u8 d5, d5, #6\t\n"
- "vsri.u8 d6, d6, #5\t\n"
-
- "vmull.u8 q10, d31, d0\n\t"
- "vmull.u8 q11, d31, d1\n\t"
- "vmull.u8 q12, d31, d2\n\t"
- "vmull.u8 q13, d31, d3\n\t"
- "vrshr.u16 q8, q10, #8\n\t"
- "vrshr.u16 q9, q11, #8\n\t"
- "vraddhn.u16 d20, q10, q8\n\t"
- "vraddhn.u16 d21, q11, q9\n\t"
- "vrshr.u16 q9, q13, #8\n\t"
- "vrshr.u16 q8, q12, #8\n\t"
- "vraddhn.u16 d23, q13, q9\n\t"
- "vraddhn.u16 d22, q12, q8\n\t"
-
-/* duplicate in 4/2/1 & 8pix vsns */
- "vmvn.8 d30, d23\n\t"
- "vmull.u8 q14, d30, d6\n\t"
- "vmull.u8 q13, d30, d5\n\t"
- "vmull.u8 q12, d30, d4\n\t"
- "vrshr.u16 q8, q14, #8\n\t"
- "vrshr.u16 q9, q13, #8\n\t"
- "vraddhn.u16 d6, q14, q8\n\t"
- "vrshr.u16 q8, q12, #8\n\t"
- "vraddhn.u16 d5, q13, q9\n\t"
- "vqadd.u8 d6, d6, d22\n\t" /* moved up */
- "vraddhn.u16 d4, q12, q8\n\t"
-/* intentionally don't calculate alpha */
-/* result in d4-d6 */
-
-/* "vqadd.u8 d6, d6, d22\n\t" ** moved up */
- "vqadd.u8 d5, d5, d21\n\t"
- "vqadd.u8 d4, d4, d20\n\t"
-
-/* pack 8888 {d20-d23} to 0565 q10 */
- "vshll.u8 q10, d6, #8\n\t"
- "vshll.u8 q3, d5, #8\n\t"
- "vshll.u8 q2, d4, #8\n\t"
- "vsri.u16 q10, q3, #5\t\n"
- "vsri.u16 q10, q2, #11\t\n"
-
- "tst %[w], #1\n\t"
- "beq skip_store1\t\n"
- "vst1.16 {d20[1]}, [%[dst]]\t\n"
- "skip_store1:\t\n"
- "tst %[w], #2\n\t"
- "beq skip_store2\t\n"
- "vst1.32 {d20[1]}, [%[dst2]]\t\n"
- "skip_store2:\t\n"
- "tst %[w], #4\n\t"
- "beq skip_store4\t\n"
- "vst1.16 {d21}, [%[dst4]]\t\n"
- "skip_store4:\t\n"
-
- : [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [dst4] "+r" (dst4), [dst2] "+r" (dst2)
- : [src] "r" (src)
- : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
- "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29",
- "d30","d31"
- );
-#endif
- }
- }
-}
-
-static void
-neon_composite_over_n_8_8888 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint32_t *dst_line, *dst;
- uint8_t *mask_line, *mask;
- int dst_stride, mask_stride;
- uint32_t w;
- uint8x8_t sval2;
- uint8x8x4_t sval8;
- uint8x8_t mask_selector = vreinterpret_u8_u64 (vcreate_u64 (0x0101010100000000ULL));
- uint8x8_t alpha_selector = vreinterpret_u8_u64 (vcreate_u64 (0x0707070703030303ULL));
-
- src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
- /* bail out if fully transparent */
- srca = src >> 24;
- if (src == 0)
- return;
-
- sval2 = vreinterpret_u8_u32 (vdup_n_u32 (src));
- sval8.val[0] = vdup_lane_u8 (sval2, 0);
- sval8.val[1] = vdup_lane_u8 (sval2, 1);
- sval8.val[2] = vdup_lane_u8 (sval2, 2);
- sval8.val[3] = vdup_lane_u8 (sval2, 3);
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- if (width >= 8)
- {
- /* Use overlapping 8-pixel method, modified to avoid
- * rewritten dest being reused
- */
- while (height--)
- {
- uint32_t *keep_dst = 0;
-
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
-#ifndef USE_GCC_INLINE_ASM
- uint8x8_t alpha;
- uint8x8x4_t dval, temp;
-
- alpha = vld1_u8 ((void *)mask);
- dval = vld4_u8 ((void *)dst);
- keep_dst = dst;
-
- temp = neon8mul (sval8, alpha);
- dval = neon8mul (dval, vmvn_u8 (temp.val[3]));
- temp = neon8qadd (temp, dval);
-
- mask += (w & 7);
- dst += (w & 7);
- w -= (w & 7);
-
- while (w)
- {
- alpha = vld1_u8 ((void *)mask);
- dval = vld4_u8 ((void *)dst);
-
- vst4_u8 ((void *)keep_dst, temp);
- keep_dst = dst;
-
- temp = neon8mul (sval8, alpha);
- dval = neon8mul (dval, vmvn_u8 (temp.val[3]));
- temp = neon8qadd (temp, dval);
-
- mask += 8;
- dst += 8;
- w -= 8;
- }
- vst4_u8 ((void *)keep_dst, temp);
-#else
- asm volatile (
- "vdup.32 d0, %[src]\n\t"
- "vdup.8 d1, d0[1]\n\t"
- "vdup.8 d2, d0[2]\n\t"
- "vdup.8 d3, d0[3]\n\t"
- "vdup.8 d0, d0[0]\n\t"
-
- "vld4.8 {d4-d7}, [%[dst]]\n\t"
- "vld1.8 {d31}, [%[mask]]\n\t"
- "mov %[keep_dst], %[dst]\n\t"
-
- "and ip, %[w], #7\n\t"
- "add %[mask], %[mask], ip\n\t"
- "add %[dst], %[dst], ip, LSL#2\n\t"
- "subs %[w], %[w], ip\n\t"
- "b 9f\n\t"
-/* LOOP */
- "2:\n\t"
- "vld4.8 {d4-d7}, [%[dst]]!\n\t"
- "vld1.8 {d31}, [%[mask]]!\n\t"
- "vst4.8 {d20-d23}, [%[keep_dst]]\n\t"
- "sub %[keep_dst], %[dst], #8*4\n\t"
- "subs %[w], %[w], #8\n\t"
- "9:\n\t"
-
- "vmull.u8 q10, d31, d0\n\t"
- "vmull.u8 q11, d31, d1\n\t"
- "vmull.u8 q12, d31, d2\n\t"
- "vmull.u8 q13, d31, d3\n\t"
- "vrshr.u16 q8, q10, #8\n\t"
- "vrshr.u16 q9, q11, #8\n\t"
- "vraddhn.u16 d20, q10, q8\n\t"
- "vraddhn.u16 d21, q11, q9\n\t"
- "vrshr.u16 q9, q13, #8\n\t"
- "vrshr.u16 q8, q12, #8\n\t"
- "vraddhn.u16 d23, q13, q9\n\t"
- "vraddhn.u16 d22, q12, q8\n\t"
-
- "vmvn.8 d30, d23\n\t"
- "vmull.u8 q12, d30, d4\n\t"
- "vmull.u8 q13, d30, d5\n\t"
- "vmull.u8 q14, d30, d6\n\t"
- "vmull.u8 q15, d30, d7\n\t"
-
- "vrshr.u16 q8, q12, #8\n\t"
- "vrshr.u16 q9, q13, #8\n\t"
- "vraddhn.u16 d4, q12, q8\n\t"
- "vrshr.u16 q8, q14, #8\n\t"
- "vraddhn.u16 d5, q13, q9\n\t"
- "vrshr.u16 q9, q15, #8\n\t"
- "vraddhn.u16 d6, q14, q8\n\t"
- "vraddhn.u16 d7, q15, q9\n\t"
-/* result in d4-d7 */
-
- "vqadd.u8 d20, d4, d20\n\t"
- "vqadd.u8 d21, d5, d21\n\t"
- "vqadd.u8 d22, d6, d22\n\t"
- "vqadd.u8 d23, d7, d23\n\t"
-
- "bne 2b\n\t"
-
- "1:\n\t"
- "vst4.8 {d20-d23}, [%[keep_dst]]\n\t"
-
- : [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [keep_dst] "=r" (keep_dst)
- : [src] "r" (src)
- : "ip", "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
- "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29",
- "d30", "d31"
- );
-#endif
- }
- }
- else
- {
- while (height--)
- {
- uint8x8_t alpha;
-
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w >= 2)
- {
- uint8x8_t dval, temp, res;
-
- alpha = vtbl1_u8 (
- vreinterpret_u8_u16 (vld1_dup_u16 ((void *)mask)), mask_selector);
- dval = vld1_u8 ((void *)dst);
-
- temp = neon2mul (sval2, alpha);
- res = vqadd_u8 (
- temp, neon2mul (dval, vtbl1_u8 (vmvn_u8 (temp), alpha_selector)));
-
- vst1_u8 ((void *)dst, res);
-
- mask += 2;
- dst += 2;
- w -= 2;
- }
-
- if (w)
- {
- uint8x8_t dval, temp, res;
-
- alpha = vtbl1_u8 (vld1_dup_u8 ((void *)mask), mask_selector);
- dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void *)dst));
-
- temp = neon2mul (sval2, alpha);
- res = vqadd_u8 (
- temp, neon2mul (dval, vtbl1_u8 (vmvn_u8 (temp), alpha_selector)));
-
- vst1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (res), 0);
- }
- }
- }
-}
-
-static void
-neon_composite_add_8888_8_8 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *mask_line, *mask;
- int dst_stride, mask_stride;
- uint32_t w;
- uint32_t src;
- uint8x8_t sa;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- src = _pixman_image_get_solid (src_image, dst_image->bits.format);
- sa = vdup_n_u8 ((src) >> 24);
-
- if (width >= 8)
- {
- /* Use overlapping 8-pixel method, modified to avoid rewritten dest being reused */
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- uint8x8_t mval, dval, res;
- uint8_t *keep_dst;
-
- mval = vld1_u8 ((void *)mask);
- dval = vld1_u8 ((void *)dst);
- keep_dst = dst;
-
- res = vqadd_u8 (neon2mul (mval, sa), dval);
-
- mask += (w & 7);
- dst += (w & 7);
- w -= w & 7;
-
- while (w)
- {
- mval = vld1_u8 ((void *)mask);
- dval = vld1_u8 ((void *)dst);
- vst1_u8 ((void *)keep_dst, res);
- keep_dst = dst;
-
- res = vqadd_u8 (neon2mul (mval, sa), dval);
-
- mask += 8;
- dst += 8;
- w -= 8;
- }
- vst1_u8 ((void *)keep_dst, res);
- }
- }
- else
- {
- /* Use 4/2/1 load/store method to handle 1-7 pixels */
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- uint8x8_t mval = sa, dval = sa, res;
- uint8_t *dst4 = 0, *dst2 = 0;
-
- if (w & 4)
- {
- mval = vreinterpret_u8_u32 (
- vld1_lane_u32 ((void *)mask, vreinterpret_u32_u8 (mval), 1));
- dval = vreinterpret_u8_u32 (
- vld1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (dval), 1));
-
- dst4 = dst;
- mask += 4;
- dst += 4;
- }
-
- if (w & 2)
- {
- mval = vreinterpret_u8_u16 (
- vld1_lane_u16 ((void *)mask, vreinterpret_u16_u8 (mval), 1));
- dval = vreinterpret_u8_u16 (
- vld1_lane_u16 ((void *)dst, vreinterpret_u16_u8 (dval), 1));
- dst2 = dst;
- mask += 2;
- dst += 2;
- }
-
- if (w & 1)
- {
- mval = vld1_lane_u8 (mask, mval, 1);
- dval = vld1_lane_u8 (dst, dval, 1);
- }
-
- res = vqadd_u8 (neon2mul (mval, sa), dval);
-
- if (w & 1)
- vst1_lane_u8 (dst, res, 1);
- if (w & 2)
- vst1_lane_u16 ((void *)dst2, vreinterpret_u16_u8 (res), 1);
- if (w & 4)
- vst1_lane_u32 ((void *)dst4, vreinterpret_u32_u8 (res), 1);
- }
- }
-}
-
-#ifdef USE_GCC_INLINE_ASM
-
-static void
-neon_composite_src_16_16 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint16_t *dst_line, *src_line;
- uint32_t dst_stride, src_stride;
-
- if (!height || !width)
- return;
-
- /* We simply copy 16-bit-aligned pixels from one place to another. */
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
- /* Preload the first input scanline */
- {
- uint16_t *src_ptr = src_line;
- uint32_t count = width;
-
- asm volatile (
- "0: @ loop \n"
- " subs %[count], %[count], #32 \n"
- " pld [%[src]] \n"
- " add %[src], %[src], #64 \n"
- " bgt 0b \n"
-
- /* Clobbered input registers marked as input/outputs */
- : [src] "+r" (src_ptr), [count] "+r" (count)
- : /* no unclobbered inputs */
- : "cc"
- );
- }
-
- while (height--)
- {
- uint16_t *dst_ptr = dst_line;
- uint16_t *src_ptr = src_line;
- uint32_t count = width;
- uint32_t tmp = 0;
-
- /* Uses multi-register access and preloading to maximise bandwidth.
- * Each pixel is one halfword, so a quadword contains 8px.
- * Preload frequency assumed a 64-byte cacheline.
- */
- asm volatile (
- " cmp %[count], #64 \n"
- " blt 1f @ skip oversized fragments \n"
- "0: @ start with eight quadwords at a time \n"
- /* preload from next scanline */
- " pld [%[src], %[src_stride], LSL #1] \n"
- " sub %[count], %[count], #64 \n"
- " vld1.16 {d16, d17, d18, d19}, [%[src]]! \n"
- " vld1.16 {d20, d21, d22, d23}, [%[src]]! \n"
- /* preload from next scanline */
- " pld [%[src], %[src_stride], LSL #1] \n"
- " vld1.16 {d24, d25, d26, d27}, [%[src]]! \n"
- " vld1.16 {d28, d29, d30, d31}, [%[src]]! \n"
- " cmp %[count], #64 \n"
- " vst1.16 {d16, d17, d18, d19}, [%[dst]]! \n"
- " vst1.16 {d20, d21, d22, d23}, [%[dst]]! \n"
- " vst1.16 {d24, d25, d26, d27}, [%[dst]]! \n"
- " vst1.16 {d28, d29, d30, d31}, [%[dst]]! \n"
- " bge 0b \n"
- " cmp %[count], #0 \n"
- " beq 7f @ aligned fastpath \n"
- "1: @ four quadwords \n"
- " tst %[count], #32 \n"
- " beq 2f @ skip oversized fragment \n"
- /* preload from next scanline */
- " pld [%[src], %[src_stride], LSL #1] \n"
- " vld1.16 {d16, d17, d18, d19}, [%[src]]! \n"
- " vld1.16 {d20, d21, d22, d23}, [%[src]]! \n"
- " vst1.16 {d16, d17, d18, d19}, [%[dst]]! \n"
- " vst1.16 {d20, d21, d22, d23}, [%[dst]]! \n"
- "2: @ two quadwords \n"
- " tst %[count], #16 \n"
- " beq 3f @ skip oversized fragment \n"
- /* preload from next scanline */
- " pld [%[src], %[src_stride], LSL #1] \n"
- " vld1.16 {d16, d17, d18, d19}, [%[src]]! \n"
- " vst1.16 {d16, d17, d18, d19}, [%[dst]]! \n"
- "3: @ one quadword \n"
- " tst %[count], #8 \n"
- " beq 4f @ skip oversized fragment \n"
- " vld1.16 {d16, d17}, [%[src]]! \n"
- " vst1.16 {d16, d17}, [%[dst]]! \n"
- "4: @ one doubleword \n"
- " tst %[count], #4 \n"
- " beq 5f @ skip oversized fragment \n"
- " vld1.16 {d16}, [%[src]]! \n"
- " vst1.16 {d16}, [%[dst]]! \n"
- "5: @ one word \n"
- " tst %[count], #2 \n"
- " beq 6f @ skip oversized fragment \n"
- " ldr %[tmp], [%[src]], #4 \n"
- " str %[tmp], [%[dst]], #4 \n"
- "6: @ one halfword \n"
- " tst %[count], #1 \n"
- " beq 7f @ skip oversized fragment \n"
- " ldrh %[tmp], [%[src]] \n"
- " strh %[tmp], [%[dst]] \n"
- "7: @ end \n"
-
- /* Clobbered input registers marked as input/outputs */
- : [dst] "+r" (dst_ptr), [src] "+r" (src_ptr),
- [count] "+r" (count), [tmp] "+r" (tmp)
-
- /* Unclobbered input */
- : [src_stride] "r" (src_stride)
-
- /* Clobbered vector registers */
- : "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
- "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", "cc", "memory"
- );
-
- src_line += src_stride;
- dst_line += dst_stride;
- }
-}
-
-#endif /* USE_GCC_INLINE_ASM */
-
-static void
-neon_composite_src_24_16 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint16_t *dst_line;
- uint32_t *src_line;
- uint32_t dst_stride, src_stride;
-
- if (!width || !height)
- return;
-
- /* We simply copy pixels from one place to another,
- * assuming that the source's alpha is opaque.
- */
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
- /* Preload the first input scanline */
- {
- uint8_t *src_ptr = (uint8_t*) src_line;
- uint32_t count = (width + 15) / 16;
-
-#ifdef USE_GCC_INLINE_ASM
- asm volatile (
- "0: @ loop \n"
- " subs %[count], %[count], #1 \n"
- " pld [%[src]] \n"
- " add %[src], %[src], #64 \n"
- " bgt 0b \n"
-
- /* Clobbered input registers marked as input/outputs */
- : [src] "+r" (src_ptr), [count] "+r" (count)
- : /* no unclobbered inputs */
- : "cc"
- );
-#else
- do
- {
- __pld (src_ptr);
- src_ptr += 64;
- }
- while (--count);
-#endif
- }
-
- while (height--)
- {
- uint16_t *dst_ptr = dst_line;
- uint32_t *src_ptr = src_line;
- uint32_t count = width;
- const uint32_t rb_mask = 0x1F;
- const uint32_t g_mask = 0x3F;
-
- /* If you're going to complain about a goto, take a long hard look
- * at the massive blocks of assembler this skips over. ;-)
- */
- if (count < 8)
- goto small_stuff;
-
-#ifdef USE_GCC_INLINE_ASM
-
- /* This is not as aggressive as the RGB565-source case.
- * Generally the source is in cached RAM when the formats are
- * different, so we use preload.
- *
- * We don't need to blend, so we are not reading from the
- * uncached framebuffer.
- */
- asm volatile (
- " cmp %[count], #16 \n"
- " blt 1f @ skip oversized fragments \n"
- "0: @ start with sixteen pixels at a time \n"
- " sub %[count], %[count], #16 \n"
- " pld [%[src], %[src_stride], lsl #2] @ preload from next scanline \n"
- " vld4.8 {d0, d1, d2, d3}, [%[src]]! @ d3 is alpha and ignored, d2-0 are rgb. \n"
- " vld4.8 {d4, d5, d6, d7}, [%[src]]! @ d7 is alpha and ignored, d6-4 are rgb. \n"
- " vshll.u8 q8, d2, #8 @ expand first red for repacking \n"
- " vshll.u8 q10, d1, #8 @ expand first green for repacking \n"
- " vshll.u8 q11, d0, #8 @ expand first blue for repacking \n"
- " vshll.u8 q9, d6, #8 @ expand second red for repacking \n"
- " vsri.u16 q8, q10, #5 @ insert first green after red \n"
- " vshll.u8 q10, d5, #8 @ expand second green for repacking \n"
- " vsri.u16 q8, q11, #11 @ insert first blue after green \n"
- " vshll.u8 q11, d4, #8 @ expand second blue for repacking \n"
- " vsri.u16 q9, q10, #5 @ insert second green after red \n"
- " vsri.u16 q9, q11, #11 @ insert second blue after green \n"
- " cmp %[count], #16 \n"
- " vst1.16 {d16, d17, d18, d19}, [%[dst]]! @ store 16 pixels \n"
- " bge 0b \n"
- "1: @ end of main loop \n"
- " cmp %[count], #8 @ can we still do an 8-pixel block? \n"
- " blt 2f \n"
- " sub %[count], %[count], #8 \n"
- " pld [%[src], %[src_stride], lsl #2] @ preload from next scanline \n"
- " vld4.8 {d0, d1, d2, d3}, [%[src]]! @ d3 is alpha and ignored, d2-0 are rgb. \n"
- " vshll.u8 q8, d2, #8 @ expand first red for repacking \n"
- " vshll.u8 q10, d1, #8 @ expand first green for repacking \n"
- " vshll.u8 q11, d0, #8 @ expand first blue for repacking \n"
- " vsri.u16 q8, q10, #5 @ insert first green after red \n"
- " vsri.u16 q8, q11, #11 @ insert first blue after green \n"
- " vst1.16 {d16, d17}, [%[dst]]! @ store 8 pixels \n"
- "2: @ end \n"
-
- /* Clobbered input and working registers marked as input/outputs */
- : [dst] "+r" (dst_ptr), [src] "+r" (src_ptr), [count] "+r" (count)
-
- /* Unclobbered input */
- : [src_stride] "r" (src_stride)
-
- /* Clobbered vector registers */
-
- /* NB: these are the quad aliases of the
- * double registers used in the asm
- */
- : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16", "d17",
- "d18", "d19", "d20", "d21", "d22", "d23", "cc", "memory"
- );
-#else
- /* A copy of the above code, in intrinsics-form. */
- while (count >= 16)
- {
- uint8x8x4_t pixel_set_a, pixel_set_b;
- uint16x8_t red_a, green_a, blue_a;
- uint16x8_t red_b, green_b, blue_b;
- uint16x8_t dest_pixels_a, dest_pixels_b;
-
- count -= 16;
- __pld (src_ptr + src_stride);
- pixel_set_a = vld4_u8 ((uint8_t*)(src_ptr));
- pixel_set_b = vld4_u8 ((uint8_t*)(src_ptr + 8));
- src_ptr += 16;
-
- red_a = vshll_n_u8 (pixel_set_a.val[2], 8);
- green_a = vshll_n_u8 (pixel_set_a.val[1], 8);
- blue_a = vshll_n_u8 (pixel_set_a.val[0], 8);
-
- red_b = vshll_n_u8 (pixel_set_b.val[2], 8);
- green_b = vshll_n_u8 (pixel_set_b.val[1], 8);
- blue_b = vshll_n_u8 (pixel_set_b.val[0], 8);
-
- dest_pixels_a = vsriq_n_u16 (red_a, green_a, 5);
- dest_pixels_b = vsriq_n_u16 (red_b, green_b, 5);
-
- dest_pixels_a = vsriq_n_u16 (dest_pixels_a, blue_a, 11);
- dest_pixels_b = vsriq_n_u16 (dest_pixels_b, blue_b, 11);
-
- /* There doesn't seem to be an intrinsic for the
- * double-quadword variant
- */
- vst1q_u16 (dst_ptr, dest_pixels_a);
- vst1q_u16 (dst_ptr + 8, dest_pixels_b);
- dst_ptr += 16;
- }
-
- /* 8-pixel loop */
- if (count >= 8)
- {
- uint8x8x4_t pixel_set_a;
- uint16x8_t red_a, green_a, blue_a;
- uint16x8_t dest_pixels_a;
-
- __pld (src_ptr + src_stride);
- count -= 8;
- pixel_set_a = vld4_u8 ((uint8_t*)(src_ptr));
- src_ptr += 8;
-
- red_a = vshll_n_u8 (pixel_set_a.val[2], 8);
- green_a = vshll_n_u8 (pixel_set_a.val[1], 8);
- blue_a = vshll_n_u8 (pixel_set_a.val[0], 8);
-
- dest_pixels_a = vsriq_n_u16 (red_a, green_a, 5);
- dest_pixels_a = vsriq_n_u16 (dest_pixels_a, blue_a, 11);
-
- vst1q_u16 (dst_ptr, dest_pixels_a);
- dst_ptr += 8;
- }
-
-#endif /* USE_GCC_INLINE_ASM */
-
- small_stuff:
- if (count)
- __pld (src_ptr + src_stride);
-
- while (count >= 2)
- {
- uint32_t src_pixel_a = *src_ptr++;
- uint32_t src_pixel_b = *src_ptr++;
-
- /* ARM is really good at shift-then-ALU ops. */
- /* This should be a total of six shift-ANDs and five shift-ORs. */
- uint32_t dst_pixels_a;
- uint32_t dst_pixels_b;
-
- dst_pixels_a = ((src_pixel_a >> 3) & rb_mask);
- dst_pixels_a |= ((src_pixel_a >> 10) & g_mask) << 5;
- dst_pixels_a |= ((src_pixel_a >> 19) & rb_mask) << 11;
-
- dst_pixels_b = ((src_pixel_b >> 3) & rb_mask);
- dst_pixels_b |= ((src_pixel_b >> 10) & g_mask) << 5;
- dst_pixels_b |= ((src_pixel_b >> 19) & rb_mask) << 11;
-
- /* little-endian mode only */
- *((uint32_t*) dst_ptr) = dst_pixels_a | (dst_pixels_b << 16);
- dst_ptr += 2;
- count -= 2;
- }
-
- if (count)
- {
- uint32_t src_pixel = *src_ptr++;
-
- /* ARM is really good at shift-then-ALU ops.
- * This block should end up as three shift-ANDs
- * and two shift-ORs.
- */
- uint32_t tmp_blue = (src_pixel >> 3) & rb_mask;
- uint32_t tmp_green = (src_pixel >> 10) & g_mask;
- uint32_t tmp_red = (src_pixel >> 19) & rb_mask;
- uint16_t dst_pixel = (tmp_red << 11) | (tmp_green << 5) | tmp_blue;
-
- *dst_ptr++ = dst_pixel;
- count--;
- }
-
- src_line += src_stride;
- dst_line += dst_stride;
- }
-}
+#include "pixman-arm-common.h"
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565,
+ uint16_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888,
+ uint8_t, 3, uint8_t, 3)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565,
+ uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888,
+ uint16_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev,
+ uint8_t, 3, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev,
+ uint8_t, 3, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8000_8000,
+ uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
+ uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
+ uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_0565,
+ uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_8888,
+ uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_reverse_n_8888,
+ uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_0565,
+ uint8_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8888,
+ uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8888_8888_ca,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8,
+ uint8_t, 1, uint8_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
+ uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
+ uint8_t, 1, uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
+ uint32_t, 1, uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
+ uint32_t, 1, uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
+ uint32_t, 1, uint32_t, 1, uint32_t, 1)
+
+void
+pixman_composite_src_n_8_asm_neon (int32_t w,
+ int32_t h,
+ uint8_t *dst,
+ int32_t dst_stride,
+ uint8_t src);
+
+void
+pixman_composite_src_n_0565_asm_neon (int32_t w,
+ int32_t h,
+ uint16_t *dst,
+ int32_t dst_stride,
+ uint16_t src);
+
+void
+pixman_composite_src_n_8888_asm_neon (int32_t w,
+ int32_t h,
+ uint32_t *dst,
+ int32_t dst_stride,
+ uint32_t src);
static pixman_bool_t
pixman_fill_neon (uint32_t *bits,
@@ -1705,1019 +120,144 @@ pixman_fill_neon (uint32_t *bits,
int height,
uint32_t _xor)
{
- uint32_t byte_stride, color;
- char *dst;
-
/* stride is always multiple of 32bit units in pixman */
- byte_stride = stride * sizeof(uint32_t);
+ uint32_t byte_stride = stride * sizeof(uint32_t);
switch (bpp)
{
case 8:
- dst = ((char *) bits) + y * byte_stride + x;
- _xor &= 0xff;
- color = _xor << 24 | _xor << 16 | _xor << 8 | _xor;
- break;
-
+ pixman_composite_src_n_8_asm_neon (
+ width,
+ height,
+ (uint8_t *)(((char *) bits) + y * byte_stride + x),
+ byte_stride,
+ _xor & 0xff);
+ return TRUE;
case 16:
- dst = ((char *) bits) + y * byte_stride + x * 2;
- _xor &= 0xffff;
- color = _xor << 16 | _xor;
- width *= 2; /* width to bytes */
- break;
-
+ pixman_composite_src_n_0565_asm_neon (
+ width,
+ height,
+ (uint16_t *)(((char *) bits) + y * byte_stride + x * 2),
+ byte_stride / 2,
+ _xor & 0xffff);
+ return TRUE;
case 32:
- dst = ((char *) bits) + y * byte_stride + x * 4;
- color = _xor;
- width *= 4; /* width to bytes */
- break;
-
+ pixman_composite_src_n_8888_asm_neon (
+ width,
+ height,
+ (uint32_t *)(((char *) bits) + y * byte_stride + x * 4),
+ byte_stride / 4,
+ _xor);
+ return TRUE;
default:
return FALSE;
}
-
-#ifdef USE_GCC_INLINE_ASM
- if (width < 16)
- {
- /* We have a special case for such small widths that don't allow
- * us to use wide 128-bit stores anyway. We don't waste time
- * trying to align writes, since there are only very few of them anyway
- */
- asm volatile (
- "cmp %[height], #0\n"/* Check if empty fill */
- "beq 3f\n"
- "vdup.32 d0, %[color]\n"/* Fill the color to neon req */
-
- /* Check if we have a such width that can easily be handled by single
- * operation for each scanline. This significantly reduces the number
- * of test/branch instructions for each scanline
- */
- "cmp %[width], #8\n"
- "beq 4f\n"
- "cmp %[width], #4\n"
- "beq 5f\n"
- "cmp %[width], #2\n"
- "beq 6f\n"
-
- /* Loop starts here for each scanline */
- "1:\n"
- "mov r4, %[dst]\n" /* Starting address of the current line */
- "tst %[width], #8\n"
- "beq 2f\n"
- "vst1.8 {d0}, [r4]!\n"
- "2:\n"
- "tst %[width], #4\n"
- "beq 2f\n"
- "str %[color], [r4], #4\n"
- "2:\n"
- "tst %[width], #2\n"
- "beq 2f\n"
- "strh %[color], [r4], #2\n"
- "2:\n"
- "tst %[width], #1\n"
- "beq 2f\n"
- "strb %[color], [r4], #1\n"
- "2:\n"
-
- "subs %[height], %[height], #1\n"
- "add %[dst], %[dst], %[byte_stride]\n"
- "bne 1b\n"
- "b 3f\n"
-
- /* Special fillers for those widths that we can do with single operation */
- "4:\n"
- "subs %[height], %[height], #1\n"
- "vst1.8 {d0}, [%[dst]]\n"
- "add %[dst], %[dst], %[byte_stride]\n"
- "bne 4b\n"
- "b 3f\n"
-
- "5:\n"
- "subs %[height], %[height], #1\n"
- "str %[color], [%[dst]]\n"
- "add %[dst], %[dst], %[byte_stride]\n"
- "bne 5b\n"
- "b 3f\n"
-
- "6:\n"
- "subs %[height], %[height], #1\n"
- "strh %[color], [%[dst]]\n"
- "add %[dst], %[dst], %[byte_stride]\n"
- "bne 6b\n"
-
- "3:\n"
- : [height] "+r" (height), [dst] "+r" (dst)
- : [color] "r" (color), [width] "r" (width),
- [byte_stride] "r" (byte_stride)
- : "memory", "cc", "d0", "r4");
- }
- else
- {
- asm volatile (
- "cmp %[height], #0\n"/* Check if empty fill */
- "beq 5f\n"
- "vdup.32 q0, %[color]\n"/* Fill the color to neon req */
-
- /* Loop starts here for each scanline */
- "1:\n"
- "mov r4, %[dst]\n"/* Starting address of the current line */
- "mov r5, %[width]\n"/* We're going to write this many bytes */
- "ands r6, r4, #15\n"/* Are we at the 128-bit aligned address? */
- "beq 2f\n"/* Jump to the best case */
-
- /* We're not 128-bit aligned: However, we know that we can get to the
- next aligned location, since the fill is at least 16 bytes wide */
- "rsb r6, r6, #16\n" /* We would need to go forward this much */
- "sub r5, r5, r6\n"/* Update bytes left */
- "tst r6, #1\n"
- "beq 6f\n"
- "vst1.8 {d0[0]}, [r4]!\n"/* Store byte, now we are word aligned */
- "6:\n"
- "tst r6, #2\n"
- "beq 6f\n"
- "vst1.16 {d0[0]}, [r4, :16]!\n"/* Store half word, now we are 16-bit aligned */
- "6:\n"
- "tst r6, #4\n"
- "beq 6f\n"
- "vst1.32 {d0[0]}, [r4, :32]!\n"/* Store word, now we're 32-bit aligned */
- "6:\n"
- "tst r6, #8\n"
- "beq 2f\n"
- "vst1.64 {d0}, [r4, :64]!\n"/* Store qword now we're 64-bit aligned */
-
- /* The good case: We're 128-bit aligned for this scanline */
- "2:\n"
- "and r6, r5, #15\n"/* Number of tailing bytes */
- "cmp r5, r6\n"/* Do we have at least one qword to write? */
- "beq 6f\n"/* No, we just write the tail */
- "lsr r5, r5, #4\n"/* This many full qwords to write */
-
- /* The main block: Do 128-bit aligned writes */
- "3:\n"
- "subs r5, r5, #1\n"
- "vst1.64 {d0, d1}, [r4, :128]!\n"
- "bne 3b\n"
-
- /* Handle the tailing bytes: Do 64, 32, 16 and 8-bit aligned writes as needed.
- We know that we're currently at 128-bit aligned address, so we can just
- pick the biggest operations that the remaining write width allows */
- "6:\n"
- "cmp r6, #0\n"
- "beq 4f\n"
- "tst r6, #8\n"
- "beq 6f\n"
- "vst1.64 {d0}, [r4, :64]!\n"
- "6:\n"
- "tst r6, #4\n"
- "beq 6f\n"
- "vst1.32 {d0[0]}, [r4, :32]!\n"
- "6:\n"
- "tst r6, #2\n"
- "beq 6f\n"
- "vst1.16 {d0[0]}, [r4, :16]!\n"
- "6:\n"
- "tst r6, #1\n"
- "beq 4f\n"
- "vst1.8 {d0[0]}, [r4]!\n"
- "4:\n"
-
- /* Handle the next scanline */
- "subs %[height], %[height], #1\n"
- "add %[dst], %[dst], %[byte_stride]\n"
- "bne 1b\n"
- "5:\n"
- : [height] "+r" (height), [dst] "+r" (dst)
- : [color] "r" (color), [width] "r" (width),
- [byte_stride] "r" (byte_stride)
- : "memory", "cc", "d0", "d1", "r4", "r5", "r6");
- }
- return TRUE;
-
-#else
-
- /* TODO: intrinsic version for armcc */
- return FALSE;
-
-#endif
-}
-
-/* TODO: is there a more generic way of doing this being introduced? */
-#define NEON_SCANLINE_BUFFER_PIXELS (1024)
-
-static inline void
-neon_quadword_copy (void * dst,
- void * src,
- uint32_t count, /* of quadwords */
- uint32_t trailer_count /* of bytes */)
-{
- uint8_t *t_dst = dst, *t_src = src;
-
- /* Uses aligned multi-register loads to maximise read bandwidth
- * on uncached memory such as framebuffers
- * The accesses do not have the aligned qualifiers, so that the copy
- * may convert between aligned-uncached and unaligned-cached memory.
- * It is assumed that the CPU can infer alignedness from the address.
- */
-
-#ifdef USE_GCC_INLINE_ASM
-
- asm volatile (
- " cmp %[count], #8 \n"
- " blt 1f @ skip oversized fragments \n"
- "0: @ start with eight quadwords at a time \n"
- " sub %[count], %[count], #8 \n"
- " vld1.8 {d16, d17, d18, d19}, [%[src]]! \n"
- " vld1.8 {d20, d21, d22, d23}, [%[src]]! \n"
- " vld1.8 {d24, d25, d26, d27}, [%[src]]! \n"
- " vld1.8 {d28, d29, d30, d31}, [%[src]]! \n"
- " cmp %[count], #8 \n"
- " vst1.8 {d16, d17, d18, d19}, [%[dst]]! \n"
- " vst1.8 {d20, d21, d22, d23}, [%[dst]]! \n"
- " vst1.8 {d24, d25, d26, d27}, [%[dst]]! \n"
- " vst1.8 {d28, d29, d30, d31}, [%[dst]]! \n"
- " bge 0b \n"
- "1: @ four quadwords \n"
- " tst %[count], #4 \n"
- " beq 2f @ skip oversized fragment \n"
- " vld1.8 {d16, d17, d18, d19}, [%[src]]! \n"
- " vld1.8 {d20, d21, d22, d23}, [%[src]]! \n"
- " vst1.8 {d16, d17, d18, d19}, [%[dst]]! \n"
- " vst1.8 {d20, d21, d22, d23}, [%[dst]]! \n"
- "2: @ two quadwords \n"
- " tst %[count], #2 \n"
- " beq 3f @ skip oversized fragment \n"
- " vld1.8 {d16, d17, d18, d19}, [%[src]]! \n"
- " vst1.8 {d16, d17, d18, d19}, [%[dst]]! \n"
- "3: @ one quadword \n"
- " tst %[count], #1 \n"
- " beq 4f @ skip oversized fragment \n"
- " vld1.8 {d16, d17}, [%[src]]! \n"
- " vst1.8 {d16, d17}, [%[dst]]! \n"
- "4: @ end \n"
-
- /* Clobbered input registers marked as input/outputs */
- : [dst] "+r" (t_dst), [src] "+r" (t_src), [count] "+r" (count)
-
- /* No unclobbered inputs */
- :
-
- /* Clobbered vector registers */
- : "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25",
- "d26", "d27", "d28", "d29", "d30", "d31", "cc", "memory");
-
-#else
-
- while (count >= 8)
- {
- uint8x16x4_t t1 = vld4q_u8 (t_src);
- uint8x16x4_t t2 = vld4q_u8 (t_src + sizeof(uint8x16x4_t));
-
- t_src += sizeof(uint8x16x4_t) * 2;
- vst4q_u8 (t_dst, t1);
- vst4q_u8 (t_dst + sizeof(uint8x16x4_t), t2);
- t_dst += sizeof(uint8x16x4_t) * 2;
- count -= 8;
- }
-
- if (count & 4)
- {
- uint8x16x4_t t1 = vld4q_u8 (t_src);
-
- t_src += sizeof(uint8x16x4_t);
- vst4q_u8 (t_dst, t1);
- t_dst += sizeof(uint8x16x4_t);
- }
-
- if (count & 2)
- {
- uint8x8x4_t t1 = vld4_u8 (t_src);
-
- t_src += sizeof(uint8x8x4_t);
- vst4_u8 (t_dst, t1);
- t_dst += sizeof(uint8x8x4_t);
- }
-
- if (count & 1)
- {
- uint8x16_t t1 = vld1q_u8 (t_src);
-
- t_src += sizeof(uint8x16_t);
- vst1q_u8 (t_dst, t1);
- t_dst += sizeof(uint8x16_t);
- }
-
-#endif /* !USE_GCC_INLINE_ASM */
-
- if (trailer_count)
- {
- if (trailer_count & 8)
- {
- uint8x8_t t1 = vld1_u8 (t_src);
-
- t_src += sizeof(uint8x8_t);
- vst1_u8 (t_dst, t1);
- t_dst += sizeof(uint8x8_t);
- }
-
- if (trailer_count & 4)
- {
- *((uint32_t*) t_dst) = *((uint32_t*) t_src);
-
- t_dst += 4;
- t_src += 4;
- }
-
- if (trailer_count & 2)
- {
- *((uint16_t*) t_dst) = *((uint16_t*) t_src);
-
- t_dst += 2;
- t_src += 2;
- }
-
- if (trailer_count & 1)
- {
- *t_dst++ = *t_src++;
- }
- }
-}
-
-static inline void
-solid_over_565_8_pix_neon (uint32_t glyph_colour,
- uint16_t *dest,
- uint8_t * in_mask,
- uint32_t dest_stride, /* bytes, not elements */
- uint32_t mask_stride,
- uint32_t count /* 8-pixel groups */)
-{
- /* Inner loop of glyph blitter (solid colour, alpha mask) */
-
-#ifdef USE_GCC_INLINE_ASM
-
- asm volatile (
- " vld4.8 {d20[], d21[], d22[], d23[]}, [%[glyph_colour]] @ splat solid colour components \n"
- "0: @ loop \n"
- " vld1.16 {d0, d1}, [%[dest]] @ load first pixels from framebuffer \n"
- " vld1.8 {d17}, [%[in_mask]] @ load alpha mask of glyph \n"
- " vmull.u8 q9, d17, d23 @ apply glyph colour alpha to mask \n"
- " vshrn.u16 d17, q9, #8 @ reformat it to match original mask \n"
- " vmvn d18, d17 @ we need the inverse mask for the background \n"
- " vsli.u16 q3, q0, #5 @ duplicate framebuffer blue bits \n"
- " vshrn.u16 d2, q0, #8 @ unpack red from framebuffer pixels \n"
- " vshrn.u16 d4, q0, #3 @ unpack green \n"
- " vsri.u8 d2, d2, #5 @ duplicate red bits (extend 5 to 8) \n"
- " vshrn.u16 d6, q3, #2 @ unpack extended blue (truncate 10 to 8) \n"
- " vsri.u8 d4, d4, #6 @ duplicate green bits (extend 6 to 8) \n"
- " vmull.u8 q1, d2, d18 @ apply inverse mask to background red... \n"
- " vmull.u8 q2, d4, d18 @ ...green... \n"
- " vmull.u8 q3, d6, d18 @ ...blue \n"
- " subs %[count], %[count], #1 @ decrement/test loop counter \n"
- " vmlal.u8 q1, d17, d22 @ add masked foreground red... \n"
- " vmlal.u8 q2, d17, d21 @ ...green... \n"
- " vmlal.u8 q3, d17, d20 @ ...blue \n"
- " add %[in_mask], %[in_mask], %[mask_stride] @ advance mask pointer, while we wait \n"
- " vsri.16 q1, q2, #5 @ pack green behind red \n"
- " vsri.16 q1, q3, #11 @ pack blue into pixels \n"
- " vst1.16 {d2, d3}, [%[dest]] @ store composited pixels \n"
- " add %[dest], %[dest], %[dest_stride] @ advance framebuffer pointer \n"
- " bne 0b @ next please \n"
-
- /* Clobbered registers marked as input/outputs */
- : [dest] "+r" (dest), [in_mask] "+r" (in_mask), [count] "+r" (count)
-
- /* Inputs */
- : [dest_stride] "r" (dest_stride), [mask_stride] "r" (mask_stride), [glyph_colour] "r" (&glyph_colour)
-
- /* Clobbers, including the inputs we modify, and potentially lots of memory */
- : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d17", "d18", "d19",
- "d20", "d21", "d22", "d23", "d24", "d25", "cc", "memory"
- );
-
-#else
-
- uint8x8x4_t solid_colour = vld4_dup_u8 ((uint8_t*) &glyph_colour);
-
- while (count--)
- {
- uint16x8_t pixels = vld1q_u16 (dest);
- uint8x8_t mask = vshrn_n_u16 (vmull_u8 (solid_colour.val[3], vld1_u8 (in_mask)), 8);
- uint8x8_t mask_image = vmvn_u8 (mask);
-
- uint8x8_t t_red = vshrn_n_u16 (pixels, 8);
- uint8x8_t t_green = vshrn_n_u16 (pixels, 3);
- uint8x8_t t_blue = vshrn_n_u16 (vsli_n_u8 (pixels, pixels, 5), 2);
-
- uint16x8_t s_red = vmull_u8 (vsri_n_u8 (t_red, t_red, 5), mask_image);
- uint16x8_t s_green = vmull_u8 (vsri_n_u8 (t_green, t_green, 6), mask_image);
- uint16x8_t s_blue = vmull_u8 (t_blue, mask_image);
-
- s_red = vmlal (s_red, mask, solid_colour.val[2]);
- s_green = vmlal (s_green, mask, solid_colour.val[1]);
- s_blue = vmlal (s_blue, mask, solid_colour.val[0]);
-
- pixels = vsri_n_u16 (s_red, s_green, 5);
- pixels = vsri_n_u16 (pixels, s_blue, 11);
- vst1q_u16 (dest, pixels);
-
- dest += dest_stride;
- mask += mask_stride;
- }
-
-#endif
}
-#if 0 /* this is broken currently */
-static void
-neon_composite_over_n_8_0565 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint16_t *dst_line, *aligned_line;
- uint8_t *mask_line;
- uint32_t dst_stride, mask_stride;
- uint32_t kernel_count, copy_count, copy_tail;
- uint8_t kernel_offset, copy_offset;
-
- src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
- /* bail out if fully transparent or degenerate */
- srca = src >> 24;
- if (src == 0)
- return;
-
- if (width == 0 || height == 0)
- return;
-
- if (width > NEON_SCANLINE_BUFFER_PIXELS)
- {
- /* split the blit, so we can use a fixed-size scanline buffer
- * TODO: there must be a more elegant way of doing this.
- */
- int x;
- for (x = 0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS)
- {
- neon_composite_over_n_8_0565 (
- impl, op,
- src_image, mask_image, dst_image,
- src_x + x, src_y, mask_x + x, mask_y, dest_x + x, dest_y,
- (x + NEON_SCANLINE_BUFFER_PIXELS > width) ? width - x : NEON_SCANLINE_BUFFER_PIXELS, height);
- }
-
- return;
- }
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- /* keep within minimum number of aligned quadwords on width
- * while also keeping the minimum number of columns to process
- */
- {
- unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF;
- unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF;
- unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF;
-
- /* the fast copy should be quadword aligned */
- copy_offset = dst_line - ((uint16_t*) aligned_left);
- aligned_line = dst_line - copy_offset;
- copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4);
- copy_tail = 0;
-
- if (aligned_right - aligned_left > ceiling_length)
- {
- /* unaligned routine is tightest */
- kernel_count = (uint32_t) (ceiling_length >> 4);
- kernel_offset = copy_offset;
- }
- else
- {
- /* aligned routine is equally tight, so it is safer to align */
- kernel_count = copy_count;
- kernel_offset = 0;
- }
-
- /* We should avoid reading beyond scanline ends for safety */
- if (aligned_line < (dst_line - dest_x) ||
- (aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width))
- {
- /* switch to precise read */
- copy_offset = kernel_offset = 0;
- aligned_line = dst_line;
- kernel_count = (uint32_t) (ceiling_length >> 4);
- copy_count = (width * sizeof(*dst_line)) >> 4;
- copy_tail = (width * sizeof(*dst_line)) & 0xF;
- }
- }
-
- {
- uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8]; /* deliberately not initialised */
- uint8_t glyph_line[NEON_SCANLINE_BUFFER_PIXELS + 8];
- int y = height;
-
- /* row-major order */
- /* left edge, middle block, right edge */
- for ( ; y--; mask_line += mask_stride, aligned_line += dst_stride, dst_line += dst_stride)
- {
- /* We don't want to overrun the edges of the glyph,
- * so realign the edge data into known buffers
- */
- neon_quadword_copy (glyph_line + copy_offset, mask_line, width >> 4, width & 0xF);
-
- /* Uncached framebuffer access is really, really slow
- * if we do it piecemeal. It should be much faster if we
- * grab it all at once. One scanline should easily fit in
- * L1 cache, so this should not waste RAM bandwidth.
- */
- neon_quadword_copy (scan_line, aligned_line, copy_count, copy_tail);
-
- /* Apply the actual filter */
- solid_over_565_8_pix_neon (
- src, scan_line + kernel_offset,
- glyph_line + kernel_offset, 8 * sizeof(*dst_line),
- 8, kernel_count);
-
- /* Copy the modified scanline back */
- neon_quadword_copy (dst_line, scan_line + copy_offset,
- width >> 3, (width & 7) * 2);
- }
- }
-}
-#endif
-
-#ifdef USE_GCC_INLINE_ASM
-
-static inline void
-plain_over_565_8_pix_neon (uint32_t colour,
- uint16_t *dest,
- uint32_t dest_stride, /* bytes, not elements */
- uint32_t count /* 8-pixel groups */)
-{
- /* Inner loop for plain translucent rects
- * (solid colour without alpha mask)
- */
- asm volatile (
- " vld4.8 {d20[], d21[], d22[], d23[]}, [%[colour]] @ solid colour load/splat \n"
- " vmull.u8 q12, d23, d22 @ premultiply alpha red \n"
- " vmull.u8 q13, d23, d21 @ premultiply alpha green \n"
- " vmull.u8 q14, d23, d20 @ premultiply alpha blue \n"
- " vmvn d18, d23 @ inverse alpha for background \n"
- "0: @ loop\n"
- " vld1.16 {d0, d1}, [%[dest]] @ load first pixels from framebuffer \n"
- " vshrn.u16 d2, q0, #8 @ unpack red from framebuffer pixels \n"
- " vshrn.u16 d4, q0, #3 @ unpack green \n"
- " vsli.u16 q3, q0, #5 @ duplicate framebuffer blue bits \n"
- " vsri.u8 d2, d2, #5 @ duplicate red bits (extend 5 to 8) \n"
- " vsri.u8 d4, d4, #6 @ duplicate green bits (extend 6 to 8) \n"
- " vshrn.u16 d6, q3, #2 @ unpack extended blue (truncate 10 to 8) \n"
- " vmov q0, q12 @ retrieve foreground red \n"
- " vmlal.u8 q0, d2, d18 @ blend red - my kingdom for a four-operand MLA \n"
- " vmov q1, q13 @ retrieve foreground green \n"
- " vmlal.u8 q1, d4, d18 @ blend green \n"
- " vmov q2, q14 @ retrieve foreground blue \n"
- " vmlal.u8 q2, d6, d18 @ blend blue \n"
- " subs %[count], %[count], #1 @ decrement/test loop counter \n"
- " vsri.16 q0, q1, #5 @ pack green behind red \n"
- " vsri.16 q0, q2, #11 @ pack blue into pixels \n"
- " vst1.16 {d0, d1}, [%[dest]] @ store composited pixels \n"
- " add %[dest], %[dest], %[dest_stride] @ advance framebuffer pointer \n"
- " bne 0b @ next please \n"
-
- /* Clobbered registers marked as input/outputs */
- : [dest] "+r" (dest), [count] "+r" (count)
-
- /* Inputs */
- : [dest_stride] "r" (dest_stride), [colour] "r" (&colour)
-
- /* Clobbers, including the inputs we modify, and
- * potentially lots of memory
- */
- : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d18", "d19",
- "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29",
- "cc", "memory"
- );
-}
-
-static void
-neon_composite_over_n_0565 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+static pixman_bool_t
+pixman_blt_neon (uint32_t *src_bits,
+ uint32_t *dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height)
{
- uint32_t src, srca;
- uint16_t *dst_line, *aligned_line;
- uint32_t dst_stride;
- uint32_t kernel_count, copy_count, copy_tail;
- uint8_t kernel_offset, copy_offset;
-
- src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
- /* bail out if fully transparent */
- srca = src >> 24;
- if (src == 0)
- return;
-
- if (width == 0 || height == 0)
- return;
-
- if (width > NEON_SCANLINE_BUFFER_PIXELS)
- {
- /* split the blit, so we can use a fixed-size scanline buffer *
- * TODO: there must be a more elegant way of doing this.
- */
- int x;
-
- for (x = 0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS)
- {
- neon_composite_over_n_0565 (
- impl, op,
- src_image, mask_image, dst_image,
- src_x + x, src_y, mask_x + x, mask_y, dest_x + x, dest_y,
- (x + NEON_SCANLINE_BUFFER_PIXELS > width) ? width - x : NEON_SCANLINE_BUFFER_PIXELS, height);
- }
- return;
- }
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
- /* keep within minimum number of aligned quadwords on width
- * while also keeping the minimum number of columns to process
- */
- {
- unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF;
- unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF;
- unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF;
-
- /* the fast copy should be quadword aligned */
- copy_offset = dst_line - ((uint16_t*) aligned_left);
- aligned_line = dst_line - copy_offset;
- copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4);
- copy_tail = 0;
-
- if (aligned_right - aligned_left > ceiling_length)
- {
- /* unaligned routine is tightest */
- kernel_count = (uint32_t) (ceiling_length >> 4);
- kernel_offset = copy_offset;
- }
- else
- {
- /* aligned routine is equally tight, so it is safer to align */
- kernel_count = copy_count;
- kernel_offset = 0;
- }
-
- /* We should avoid reading beyond scanline ends for safety */
- if (aligned_line < (dst_line - dest_x) ||
- (aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width))
- {
- /* switch to precise read */
- copy_offset = kernel_offset = 0;
- aligned_line = dst_line;
- kernel_count = (uint32_t) (ceiling_length >> 4);
- copy_count = (width * sizeof(*dst_line)) >> 4;
- copy_tail = (width * sizeof(*dst_line)) & 0xF;
- }
- }
+ if (src_bpp != dst_bpp)
+ return FALSE;
+ switch (src_bpp)
{
- uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8]; /* deliberately not initialised */
-
- /* row-major order */
- /* left edge, middle block, right edge */
- for ( ; height--; aligned_line += dst_stride, dst_line += dst_stride)
- {
- /* Uncached framebuffer access is really, really slow if we do it piecemeal.
- * It should be much faster if we grab it all at once.
- * One scanline should easily fit in L1 cache, so this should
- * not waste RAM bandwidth.
- */
- neon_quadword_copy (scan_line, aligned_line, copy_count, copy_tail);
-
- /* Apply the actual filter */
- plain_over_565_8_pix_neon (
- src, scan_line + kernel_offset, 8 * sizeof(*dst_line), kernel_count);
-
- /* Copy the modified scanline back */
- neon_quadword_copy (
- dst_line, scan_line + copy_offset, width >> 3, (width & 7) * 2);
- }
+ case 16:
+ pixman_composite_src_0565_0565_asm_neon (
+ width, height,
+ (uint16_t *)(((char *) dst_bits) +
+ dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
+ (uint16_t *)(((char *) src_bits) +
+ src_y * src_stride * 4 + src_x * 2), src_stride * 2);
+ return TRUE;
+ case 32:
+ pixman_composite_src_8888_8888_asm_neon (
+ width, height,
+ (uint32_t *)(((char *) dst_bits) +
+ dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
+ (uint32_t *)(((char *) src_bits) +
+ src_y * src_stride * 4 + src_x * 4), src_stride);
+ return TRUE;
+ default:
+ return FALSE;
}
}
-static inline void
-ARGB8_over_565_8_pix_neon (uint32_t *src,
- uint16_t *dest,
- uint32_t src_stride, /* bytes, not elements */
- uint32_t count /* 8-pixel groups */)
-{
- asm volatile (
- "0: @ loop\n"
- " pld [%[src], %[src_stride]] @ preload from next scanline \n"
- " vld1.16 {d0, d1}, [%[dest]] @ load pixels from framebuffer \n"
- " vld4.8 {d20, d21, d22, d23},[%[src]]! @ load source image pixels \n"
- " vsli.u16 q3, q0, #5 @ duplicate framebuffer blue bits \n"
- " vshrn.u16 d2, q0, #8 @ unpack red from framebuffer pixels \n"
- " vshrn.u16 d4, q0, #3 @ unpack green \n"
- " vmvn d18, d23 @ we need the inverse alpha for the background \n"
- " vsri.u8 d2, d2, #5 @ duplicate red bits (extend 5 to 8) \n"
- " vshrn.u16 d6, q3, #2 @ unpack extended blue (truncate 10 to 8) \n"
- " vsri.u8 d4, d4, #6 @ duplicate green bits (extend 6 to 8) \n"
- " vmull.u8 q1, d2, d18 @ apply inverse alpha to background red... \n"
- " vmull.u8 q2, d4, d18 @ ...green... \n"
- " vmull.u8 q3, d6, d18 @ ...blue \n"
- " subs %[count], %[count], #1 @ decrement/test loop counter \n"
- " vmlal.u8 q1, d23, d22 @ add blended foreground red... \n"
- " vmlal.u8 q2, d23, d21 @ ...green... \n"
- " vmlal.u8 q3, d23, d20 @ ...blue \n"
- " vsri.16 q1, q2, #5 @ pack green behind red \n"
- " vsri.16 q1, q3, #11 @ pack blue into pixels \n"
- " vst1.16 {d2, d3}, [%[dest]]! @ store composited pixels \n"
- " bne 0b @ next please \n"
-
- /* Clobbered registers marked as input/outputs */
- : [dest] "+r" (dest), [src] "+r" (src), [count] "+r" (count)
-
- /* Inputs */
- : [src_stride] "r" (src_stride)
-
- /* Clobbers, including the inputs we modify, and potentially lots of memory */
- : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d17", "d18", "d20",
- "d21", "d22", "d23", "cc", "memory"
- );
-}
-
-static void
-neon_composite_over_8888_0565 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+static const pixman_fast_path_t arm_neon_fast_paths[] =
{
- uint32_t *src_line;
- uint16_t *dst_line, *aligned_line;
- uint32_t dst_stride, src_stride;
- uint32_t kernel_count, copy_count, copy_tail;
- uint8_t kernel_offset, copy_offset;
-
- /* we assume mask is opaque
- * so the only alpha to deal with is embedded in src
- */
- if (width > NEON_SCANLINE_BUFFER_PIXELS)
- {
- /* split the blit, so we can use a fixed-size scanline buffer */
- int x;
- for (x = 0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS)
- {
- neon_composite_over_8888_0565 (
- impl, op,
- src_image, mask_image, dst_image,
- src_x + x, src_y, mask_x + x, mask_y, dest_x + x, dest_y,
- (x + NEON_SCANLINE_BUFFER_PIXELS > width) ? width - x : NEON_SCANLINE_BUFFER_PIXELS, height);
- }
- return;
- }
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- /* keep within minimum number of aligned quadwords on width
- * while also keeping the minimum number of columns to process
- */
- {
- unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF;
- unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF;
- unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF;
-
- /* the fast copy should be quadword aligned */
- copy_offset = dst_line - ((uint16_t*) aligned_left);
- aligned_line = dst_line - copy_offset;
- copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4);
- copy_tail = 0;
-
- if (aligned_right - aligned_left > ceiling_length)
- {
- /* unaligned routine is tightest */
- kernel_count = (uint32_t) (ceiling_length >> 4);
- kernel_offset = copy_offset;
- }
- else
- {
- /* aligned routine is equally tight, so it is safer to align */
- kernel_count = copy_count;
- kernel_offset = 0;
- }
-
- /* We should avoid reading beyond scanline ends for safety */
- if (aligned_line < (dst_line - dest_x) ||
- (aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width))
- {
- /* switch to precise read */
- copy_offset = kernel_offset = 0;
- aligned_line = dst_line;
- kernel_count = (uint32_t) (ceiling_length >> 4);
- copy_count = (width * sizeof(*dst_line)) >> 4;
- copy_tail = (width * sizeof(*dst_line)) & 0xF;
- }
- }
-
- /* Preload the first input scanline */
- {
- uint8_t *src_ptr = (uint8_t*) src_line;
- uint32_t count = (width + 15) / 16;
-
-#ifdef USE_GCC_INLINE_ASM
- asm volatile (
- "0: @ loop \n"
- " subs %[count], %[count], #1 \n"
- " pld [%[src]] \n"
- " add %[src], %[src], #64 \n"
- " bgt 0b \n"
-
- /* Clobbered input registers marked as input/outputs */
- : [src] "+r" (src_ptr), [count] "+r" (count)
- : /* no unclobbered inputs */
- : "cc"
- );
-#else
- do
- {
- __pld (src_ptr);
- src_ptr += 64;
- }
- while (--count);
-#endif
- }
-
- {
- uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8]; /* deliberately not initialised */
-
- /* row-major order */
- /* left edge, middle block, right edge */
- for ( ; height--; src_line += src_stride, aligned_line += dst_stride)
- {
- /* Uncached framebuffer access is really, really slow if we do
- * it piecemeal. It should be much faster if we grab it all at
- * once. One scanline should easily fit in L1 cache, so this
- * should not waste RAM bandwidth.
- */
- neon_quadword_copy (scan_line, aligned_line, copy_count, copy_tail);
-
- /* Apply the actual filter */
- ARGB8_over_565_8_pix_neon (
- src_line, scan_line + kernel_offset,
- src_stride * sizeof(*src_line), kernel_count);
-
- /* Copy the modified scanline back */
- neon_quadword_copy (dst_line,
- scan_line + copy_offset,
- width >> 3, (width & 7) * 2);
- }
- }
-}
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, neon_composite_src_0565_8888),
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, neon_composite_src_0565_8888),
+ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, neon_composite_src_0565_8888),
+ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, neon_composite_src_0565_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, neon_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888),
+ PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev),
+ PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev),
+ PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, neon_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8),
+ PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8),
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8000_8000),
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
-#endif /* USE_GCC_INLINE_ASM */
-
-static const pixman_fast_path_t arm_neon_fast_path_array[] =
-{
- { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, neon_composite_add_8888_8_8, 0 },
- { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, neon_composite_add_8000_8000, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, neon_composite_over_n_8_0565, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, neon_composite_over_n_8_0565, 0 },
- { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_src_24_16, 0 },
- { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_src_24_16, 0 },
- { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_src_24_16, 0 },
- { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_src_24_16, 0 },
-#ifdef USE_GCC_INLINE_ASM
- { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_src_16_16, 0 },
- { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_src_16_16, 0 },
-#if 0 /* this code has some bugs */
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_over_n_0565, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_over_n_0565, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_over_8888_0565, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_over_8888_0565, 0 },
-#endif
-#endif
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, neon_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, neon_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, neon_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, neon_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, neon_composite_over_8888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, neon_composite_over_8888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, neon_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, neon_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, neon_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, neon_composite_over_n_8_8888, 0 },
{ PIXMAN_OP_NONE },
};
-const pixman_fast_path_t *const arm_neon_fast_paths = arm_neon_fast_path_array;
-
-static void
-arm_neon_composite (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * mask,
- pixman_image_t * dest,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- if (_pixman_run_fast_path (arm_neon_fast_paths, imp,
- op, src, mask, dest,
- src_x, src_y,
- mask_x, mask_y,
- dest_x, dest_y,
- width, height))
- {
- return;
- }
-
- _pixman_implementation_composite (imp->delegate, op,
- src, mask, dest,
- src_x, src_y,
- mask_x, mask_y,
- dest_x, dest_y,
- width, height);
-}
-
-static pixman_bool_t
-pixman_blt_neon (void *src_bits,
- void *dst_bits,
- int src_stride,
- int dst_stride,
- int src_bpp,
- int dst_bpp,
- int src_x,
- int src_y,
- int dst_x,
- int dst_y,
- int width,
- int height)
-{
- if (!width || !height)
- return TRUE;
-
- /* accelerate only straight copies involving complete bytes */
- if (src_bpp != dst_bpp || (src_bpp & 7))
- return FALSE;
-
- {
- uint32_t bytes_per_pixel = src_bpp >> 3;
- uint32_t byte_width = width * bytes_per_pixel;
- /* parameter is in words for some reason */
- int32_t src_stride_bytes = src_stride * 4;
- int32_t dst_stride_bytes = dst_stride * 4;
- uint8_t *src_bytes = ((uint8_t*) src_bits) +
- src_y * src_stride_bytes + src_x * bytes_per_pixel;
- uint8_t *dst_bytes = ((uint8_t*) dst_bits) +
- dst_y * dst_stride_bytes + dst_x * bytes_per_pixel;
- uint32_t quadword_count = byte_width / 16;
- uint32_t offset = byte_width % 16;
-
- while (height--)
- {
- neon_quadword_copy (dst_bytes, src_bytes, quadword_count, offset);
- src_bytes += src_stride_bytes;
- dst_bytes += dst_stride_bytes;
- }
- }
-
- return TRUE;
-}
-
static pixman_bool_t
arm_neon_blt (pixman_implementation_t *imp,
uint32_t * src_bits,
@@ -2733,17 +273,18 @@ arm_neon_blt (pixman_implementation_t *imp,
int width,
int height)
{
- if (pixman_blt_neon (
+ if (!pixman_blt_neon (
src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
src_x, src_y, dst_x, dst_y, width, height))
+
{
- return TRUE;
+ return _pixman_implementation_blt (
+ imp->delegate,
+ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+ src_x, src_y, dst_x, dst_y, width, height);
}
- return _pixman_implementation_blt (
- imp->delegate,
- src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
- src_x, src_y, dst_x, dst_y, width, height);
+ return TRUE;
}
static pixman_bool_t
@@ -2764,18 +305,48 @@ arm_neon_fill (pixman_implementation_t *imp,
imp->delegate, bits, stride, bpp, x, y, width, height, xor);
}
+#define BIND_COMBINE_U(name) \
+void \
+pixman_composite_scanline_##name##_mask_asm_neon (int32_t w, \
+ const uint32_t *dst, \
+ const uint32_t *src, \
+ const uint32_t *mask); \
+ \
+void \
+pixman_composite_scanline_##name##_asm_neon (int32_t w, \
+ const uint32_t *dst, \
+ const uint32_t *src); \
+ \
+static void \
+neon_combine_##name##_u (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ uint32_t * dest, \
+ const uint32_t * src, \
+ const uint32_t * mask, \
+ int width) \
+{ \
+ if (mask) \
+ pixman_composite_scanline_##name##_mask_asm_neon (width, dest, \
+ src, mask); \
+ else \
+ pixman_composite_scanline_##name##_asm_neon (width, dest, src); \
+}
+
+BIND_COMBINE_U (over)
+BIND_COMBINE_U (add)
+
pixman_implementation_t *
_pixman_implementation_create_arm_neon (void)
{
pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
- pixman_implementation_t *imp = _pixman_implementation_create (general);
+ pixman_implementation_t *imp =
+ _pixman_implementation_create (general, arm_neon_fast_paths);
+
+ imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
+ imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
- imp->composite = arm_neon_composite;
-#if 0 /* this code has some bugs */
imp->blt = arm_neon_blt;
-#endif
imp->fill = arm_neon_fill;
return imp;
}
-
diff --git a/lib/pixman/pixman/pixman-arm-simd-asm.S b/lib/pixman/pixman/pixman-arm-simd-asm.S
new file mode 100644
index 000000000..a82e05de2
--- /dev/null
+++ b/lib/pixman/pixman/pixman-arm-simd-asm.S
@@ -0,0 +1,330 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author: Jeff Muizelaar (jeff@infidigm.net)
+ *
+ */
+
+/* Prevent the stack from becoming executable */
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+ .text
+ .arch armv6
+ .object_arch armv4
+ .arm
+ .altmacro
+
+/* Supplementary macro for setting function attributes */
+.macro pixman_asm_function fname
+ .func fname
+ .global fname
+#ifdef __ELF__
+ .hidden fname
+ .type fname, %function
+#endif
+fname:
+.endm
+
+/*
+ * The code below was generated by gcc 4.3.4 from the commented out
+ * functions in 'pixman-arm-simd.c' file with the following optimization
+ * options: "-O3 -mcpu=arm1136jf-s -fomit-frame-pointer"
+ *
+ * TODO: replace gcc generated code with hand tuned versions because
+ * the code quality is not very good, introduce symbolic register
+ * aliases for better readability and maintainability.
+ */
+
+pixman_asm_function pixman_composite_add_8000_8000_asm_armv6
+ push {r4, r5, r6, r7, r8, r9, r10, r11}
+ mov r10, r1
+ sub sp, sp, #4
+ subs r10, r10, #1
+ mov r11, r0
+ mov r8, r2
+ str r3, [sp]
+ ldr r7, [sp, #36]
+ bcc 0f
+6: cmp r11, #0
+ beq 1f
+ orr r3, r8, r7
+ tst r3, #3
+ beq 2f
+ mov r1, r8
+ mov r0, r7
+ mov r12, r11
+ b 3f
+5: tst r3, #3
+ beq 4f
+3: ldrb r2, [r0], #1
+ subs r12, r12, #1
+ ldrb r3, [r1]
+ uqadd8 r3, r2, r3
+ strb r3, [r1], #1
+ orr r3, r1, r0
+ bne 5b
+1: ldr r3, [sp]
+ add r8, r8, r3
+ ldr r3, [sp, #40]
+ add r7, r7, r3
+10: subs r10, r10, #1
+ bcs 6b
+0: add sp, sp, #4
+ pop {r4, r5, r6, r7, r8, r9, r10, r11}
+ bx lr
+2: mov r12, r11
+ mov r1, r8
+ mov r0, r7
+4: cmp r12, #3
+ subgt r6, r12, #4
+ movgt r9, r12
+ lsrgt r5, r6, #2
+ addgt r3, r5, #1
+ movgt r12, #0
+ lslgt r4, r3, #2
+ ble 7f
+8: ldr r3, [r0, r12]
+ ldr r2, [r1, r12]
+ uqadd8 r3, r3, r2
+ str r3, [r1, r12]
+ add r12, r12, #4
+ cmp r12, r4
+ bne 8b
+ sub r3, r9, #4
+ bic r3, r3, #3
+ add r3, r3, #4
+ subs r12, r6, r5, lsl #2
+ add r1, r1, r3
+ add r0, r0, r3
+ beq 1b
+7: mov r4, #0
+9: ldrb r3, [r1, r4]
+ ldrb r2, [r0, r4]
+ uqadd8 r3, r2, r3
+ strb r3, [r1, r4]
+ add r4, r4, #1
+ cmp r4, r12
+ bne 9b
+ ldr r3, [sp]
+ add r8, r8, r3
+ ldr r3, [sp, #40]
+ add r7, r7, r3
+ b 10b
+.endfunc
+
+pixman_asm_function pixman_composite_over_8888_8888_asm_armv6
+ push {r4, r5, r6, r7, r8, r9, r10, r11}
+ sub sp, sp, #20
+ cmp r1, #0
+ mov r12, r2
+ str r1, [sp, #12]
+ str r0, [sp, #16]
+ ldr r2, [sp, #52]
+ beq 0f
+ lsl r3, r3, #2
+ str r3, [sp]
+ ldr r3, [sp, #56]
+ mov r10, #0
+ lsl r3, r3, #2
+ str r3, [sp, #8]
+ mov r11, r3
+ b 1f
+6: ldr r11, [sp, #8]
+1: ldr r9, [sp]
+ mov r0, r12
+ add r12, r12, r9
+ mov r1, r2
+ str r12, [sp, #4]
+ add r2, r2, r11
+ ldr r12, [sp, #16]
+ ldr r3, =0x00800080
+ ldr r9, =0xff00ff00
+ mov r11, #255
+ cmp r12, #0
+ beq 4f
+5: ldr r5, [r1], #4
+ ldr r4, [r0]
+ sub r8, r11, r5, lsr #24
+ uxtb16 r6, r4
+ uxtb16 r7, r4, ror #8
+ mla r6, r6, r8, r3
+ mla r7, r7, r8, r3
+ uxtab16 r6, r6, r6, ror #8
+ uxtab16 r7, r7, r7, ror #8
+ and r7, r7, r9
+ uxtab16 r6, r7, r6, ror #8
+ uqadd8 r5, r6, r5
+ str r5, [r0], #4
+ subs r12, r12, #1
+ bne 5b
+4: ldr r3, [sp, #12]
+ add r10, r10, #1
+ cmp r10, r3
+ ldr r12, [sp, #4]
+ bne 6b
+0: add sp, sp, #20
+ pop {r4, r5, r6, r7, r8, r9, r10, r11}
+ bx lr
+.endfunc
+
+pixman_asm_function pixman_composite_over_8888_n_8888_asm_armv6
+ push {r4, r5, r6, r7, r8, r9, r10, r11}
+ sub sp, sp, #28
+ cmp r1, #0
+ str r1, [sp, #12]
+ ldrb r1, [sp, #71]
+ mov r12, r2
+ str r0, [sp, #16]
+ ldr r2, [sp, #60]
+ str r1, [sp, #24]
+ beq 0f
+ lsl r3, r3, #2
+ str r3, [sp, #20]
+ ldr r3, [sp, #64]
+ mov r10, #0
+ lsl r3, r3, #2
+ str r3, [sp, #8]
+ mov r11, r3
+ b 1f
+5: ldr r11, [sp, #8]
+1: ldr r4, [sp, #20]
+ mov r0, r12
+ mov r1, r2
+ add r12, r12, r4
+ add r2, r2, r11
+ str r12, [sp]
+ str r2, [sp, #4]
+ ldr r12, [sp, #16]
+ ldr r2, =0x00800080
+ ldr r3, [sp, #24]
+ mov r11, #255
+ cmp r12, #0
+ beq 3f
+4: ldr r5, [r1], #4
+ ldr r4, [r0]
+ uxtb16 r6, r5
+ uxtb16 r7, r5, ror #8
+ mla r6, r6, r3, r2
+ mla r7, r7, r3, r2
+ uxtab16 r6, r6, r6, ror #8
+ uxtab16 r7, r7, r7, ror #8
+ uxtb16 r6, r6, ror #8
+ uxtb16 r7, r7, ror #8
+ orr r5, r6, r7, lsl #8
+ uxtb16 r6, r4
+ uxtb16 r7, r4, ror #8
+ sub r8, r11, r5, lsr #24
+ mla r6, r6, r8, r2
+ mla r7, r7, r8, r2
+ uxtab16 r6, r6, r6, ror #8
+ uxtab16 r7, r7, r7, ror #8
+ uxtb16 r6, r6, ror #8
+ uxtb16 r7, r7, ror #8
+ orr r6, r6, r7, lsl #8
+ uqadd8 r5, r6, r5
+ str r5, [r0], #4
+ subs r12, r12, #1
+ bne 4b
+3: ldr r1, [sp, #12]
+ add r10, r10, #1
+ cmp r10, r1
+ ldr r12, [sp]
+ ldr r2, [sp, #4]
+ bne 5b
+0: add sp, sp, #28
+ pop {r4, r5, r6, r7, r8, r9, r10, r11}
+ bx lr
+.endfunc
+
+pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
+ push {r4, r5, r6, r7, r8, r9, r10, r11}
+ sub sp, sp, #28
+ cmp r1, #0
+ ldr r9, [sp, #60]
+ str r1, [sp, #12]
+ bic r1, r9, #-16777216
+ str r1, [sp, #20]
+ mov r12, r2
+ lsr r1, r9, #8
+ ldr r2, [sp, #20]
+ bic r1, r1, #-16777216
+ bic r2, r2, #65280
+ bic r1, r1, #65280
+ str r2, [sp, #20]
+ str r0, [sp, #16]
+ str r1, [sp, #4]
+ ldr r2, [sp, #68]
+ beq 0f
+ lsl r3, r3, #2
+ str r3, [sp, #24]
+ mov r0, #0
+ b 1f
+5: ldr r3, [sp, #24]
+1: ldr r4, [sp, #72]
+ mov r10, r12
+ mov r1, r2
+ add r12, r12, r3
+ add r2, r2, r4
+ str r12, [sp, #8]
+ str r2, [sp]
+ ldr r12, [sp, #16]
+ ldr r11, =0x00800080
+ ldr r2, [sp, #4]
+ ldr r3, [sp, #20]
+ cmp r12, #0
+ beq 3f
+4: ldrb r5, [r1], #1
+ ldr r4, [r10]
+ mla r6, r3, r5, r11
+ mla r7, r2, r5, r11
+ uxtab16 r6, r6, r6, ror #8
+ uxtab16 r7, r7, r7, ror #8
+ uxtb16 r6, r6, ror #8
+ uxtb16 r7, r7, ror #8
+ orr r5, r6, r7, lsl #8
+ uxtb16 r6, r4
+ uxtb16 r7, r4, ror #8
+ mvn r8, r5
+ lsr r8, r8, #24
+ mla r6, r6, r8, r11
+ mla r7, r7, r8, r11
+ uxtab16 r6, r6, r6, ror #8
+ uxtab16 r7, r7, r7, ror #8
+ uxtb16 r6, r6, ror #8
+ uxtb16 r7, r7, ror #8
+ orr r6, r6, r7, lsl #8
+ uqadd8 r5, r6, r5
+ str r5, [r10], #4
+ subs r12, r12, #1
+ bne 4b
+3: ldr r4, [sp, #12]
+ add r0, r0, #1
+ cmp r0, r4
+ ldr r12, [sp, #8]
+ ldr r2, [sp]
+ bne 5b
+0: add sp, sp, #28
+ pop {r4, r5, r6, r7, r8, r9, r10, r11}
+ bx lr
+.endfunc
diff --git a/lib/pixman/pixman/pixman-arm-simd.c b/lib/pixman/pixman/pixman-arm-simd.c
index fb7bf3da8..389c9e01a 100644
--- a/lib/pixman/pixman/pixman-arm-simd.c
+++ b/lib/pixman/pixman/pixman-arm-simd.c
@@ -28,31 +28,22 @@
#endif
#include "pixman-private.h"
+#include "pixman-arm-common.h"
-static void
-arm_composite_add_8000_8000 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+#if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */
+
+void
+pixman_composite_add_8000_8000_asm_armv6 (int32_t width,
+ int32_t height,
+ uint8_t *dst_line,
+ int32_t dst_stride,
+ uint8_t *src_line,
+ int32_t src_stride)
{
- uint8_t *dst_line, *dst;
- uint8_t *src_line, *src;
- int dst_stride, src_stride;
- uint16_t w;
+ uint8_t *dst, *src;
+ int32_t w;
uint8_t s, d;
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
while (height--)
{
dst = dst_line;
@@ -101,32 +92,21 @@ arm_composite_add_8000_8000 (pixman_implementation_t * impl,
}
-static void
-arm_composite_over_8888_8888 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+void
+pixman_composite_over_8888_8888_asm_armv6 (int32_t width,
+ int32_t height,
+ uint32_t *dst_line,
+ int32_t dst_stride,
+ uint32_t *src_line,
+ int32_t src_stride)
{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
- uint16_t w;
+ uint32_t *dst;
+ uint32_t *src;
+ int32_t w;
uint32_t component_half = 0x800080;
uint32_t upper_component_mask = 0xff00ff00;
uint32_t alpha_mask = 0xff;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
while (height--)
{
dst = dst_line;
@@ -188,40 +168,27 @@ arm_composite_over_8888_8888 (pixman_implementation_t * impl,
"2:\n\t"
: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
: [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask),
- [alpha_mask] "r" (alpha_mask)
+ [alpha_mask] "r" (alpha_mask)
: "r4", "r5", "r6", "r7", "r8", "cc", "memory"
);
}
}
-static void
-arm_composite_over_8888_n_8888 (
- pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+void
+pixman_composite_over_8888_n_8888_asm_armv6 (int32_t width,
+ int32_t height,
+ uint32_t *dst_line,
+ int32_t dst_stride,
+ uint32_t *src_line,
+ int32_t src_stride,
+ uint32_t mask)
{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- uint32_t mask;
- int dst_stride, src_stride;
- uint16_t w;
+ uint32_t *dst;
+ uint32_t *src;
+ int32_t w;
uint32_t component_half = 0x800080;
uint32_t alpha_mask = 0xff;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
mask = (mask) >> 24;
while (height--)
@@ -298,39 +265,28 @@ arm_composite_over_8888_n_8888 (
"2:\n\t"
: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
: [component_half] "r" (component_half), [mask_alpha] "r" (mask),
- [alpha_mask] "r" (alpha_mask)
+ [alpha_mask] "r" (alpha_mask)
: "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory"
);
}
}
-static void
-arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+void
+pixman_composite_over_n_8_8888_asm_armv6 (int32_t width,
+ int32_t height,
+ uint32_t *dst_line,
+ int32_t dst_stride,
+ uint32_t src,
+ int32_t unused,
+ uint8_t *mask_line,
+ int32_t mask_stride)
{
- uint32_t src, srca;
- uint32_t *dst_line, *dst;
- uint8_t *mask_line, *mask;
- int dst_stride, mask_stride;
- uint16_t w;
-
- src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+ uint32_t srca;
+ uint32_t *dst;
+ uint8_t *mask;
+ int32_t w;
- /* bail out if fully transparent */
srca = src >> 24;
- if (src == 0)
- return;
uint32_t component_mask = 0xff00ff;
uint32_t component_half = 0x800080;
@@ -338,9 +294,6 @@ arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
uint32_t src_hi = (src >> 8) & component_mask;
uint32_t src_lo = src & component_mask;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
while (height--)
{
dst = dst_line;
@@ -384,7 +337,8 @@ arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
"uxtb16 r7, r4, ror #8\n\t"
/* we could simplify this to use 'sub' if we were
- * willing to give up a register for alpha_mask */
+ * willing to give up a register for alpha_mask
+ */
"mvn r8, r5\n\t"
"mov r8, r8, lsr #24\n\t"
@@ -419,68 +373,45 @@ arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
}
}
-static const pixman_fast_path_t arm_simd_fast_path_array[] =
-{
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, arm_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, arm_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, arm_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, arm_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, arm_composite_over_8888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, arm_composite_over_8888_n_8888, NEED_SOLID_MASK },
+#endif
- { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, arm_composite_add_8000_8000, 0 },
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8000_8000,
+ uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
+ uint32_t, 1, uint32_t, 1)
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, arm_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, arm_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, arm_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, arm_composite_over_n_8_8888, 0 },
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (armv6, over_8888_n_8888,
+ uint32_t, 1, uint32_t, 1)
- { PIXMAN_OP_NONE },
-};
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (armv6, over_n_8_8888,
+ uint8_t, 1, uint32_t, 1)
-const pixman_fast_path_t *const arm_simd_fast_paths = arm_simd_fast_path_array;
-
-static void
-arm_simd_composite (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * mask,
- pixman_image_t * dest,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+static const pixman_fast_path_t arm_simd_fast_paths[] =
{
- if (_pixman_run_fast_path (arm_simd_fast_paths, imp,
- op, src, mask, dest,
- src_x, src_y,
- mask_x, mask_y,
- dest_x, dest_y,
- width, height))
- {
- return;
- }
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, armv6_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, armv6_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, armv6_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
+
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8000_8000),
+
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888),
- _pixman_implementation_composite (imp->delegate, op,
- src, mask, dest,
- src_x, src_y,
- mask_x, mask_y,
- dest_x, dest_y,
- width, height);
-}
+ { PIXMAN_OP_NONE },
+};
pixman_implementation_t *
_pixman_implementation_create_arm_simd (void)
{
pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
- pixman_implementation_t *imp = _pixman_implementation_create (general);
-
- imp->composite = arm_simd_composite;
+ pixman_implementation_t *imp = _pixman_implementation_create (general, arm_simd_fast_paths);
return imp;
}
-
diff --git a/lib/pixman/pixman/pixman-bits-image.c b/lib/pixman/pixman/pixman-bits-image.c
index 7a1910935..0225ae5aa 100644
--- a/lib/pixman/pixman/pixman-bits-image.c
+++ b/lib/pixman/pixman/pixman-bits-image.c
@@ -4,6 +4,7 @@
* 2008 Aaron Plattner, NVIDIA Corporation
* Copyright © 2000 SuSE, Inc.
* Copyright © 2007, 2009 Red Hat, Inc.
+ * Copyright © 2008 André Tupinambá <andrelrt@gmail.com>
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
@@ -28,6 +29,7 @@
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "pixman-private.h"
@@ -158,6 +160,9 @@ repeat (pixman_repeat_t repeat, int size, int *coord)
case PIXMAN_REPEAT_NONE:
break;
+
+ default:
+ break;
}
}
@@ -182,6 +187,97 @@ bits_image_fetch_pixel_nearest (bits_image_t *image,
}
}
+#if SIZEOF_LONG > 4
+
+static force_inline uint32_t
+bilinear_interpolation (uint32_t tl, uint32_t tr,
+ uint32_t bl, uint32_t br,
+ int distx, int disty)
+{
+ uint64_t distxy, distxiy, distixy, distixiy;
+ uint64_t tl64, tr64, bl64, br64;
+ uint64_t f, r;
+
+ distxy = distx * disty;
+ distxiy = distx * (256 - disty);
+ distixy = (256 - distx) * disty;
+ distixiy = (256 - distx) * (256 - disty);
+
+ /* Alpha and Blue */
+ tl64 = tl & 0xff0000ff;
+ tr64 = tr & 0xff0000ff;
+ bl64 = bl & 0xff0000ff;
+ br64 = br & 0xff0000ff;
+
+ f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
+ r = f & 0x0000ff0000ff0000ull;
+
+ /* Red and Green */
+ tl64 = tl;
+ tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
+
+ tr64 = tr;
+ tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
+
+ bl64 = bl;
+ bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
+
+ br64 = br;
+ br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
+
+ f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
+ r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
+
+ return (uint32_t)(r >> 16);
+}
+
+#else
+
+static force_inline uint32_t
+bilinear_interpolation (uint32_t tl, uint32_t tr,
+ uint32_t bl, uint32_t br,
+ int distx, int disty)
+{
+ int distxy, distxiy, distixy, distixiy;
+ uint32_t f, r;
+
+ distxy = distx * disty;
+ distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */
+ distixy = (disty << 8) - distxy; /* disty * (256 - distx) */
+ distixiy =
+ 256 * 256 - (disty << 8) -
+ (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */
+
+ /* Blue */
+ r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
+ + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
+
+ /* Green */
+ f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
+ + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
+ r |= f & 0xff000000;
+
+ tl >>= 16;
+ tr >>= 16;
+ bl >>= 16;
+ br >>= 16;
+ r >>= 16;
+
+ /* Red */
+ f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
+ + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
+ r |= f & 0x00ff0000;
+
+ /* Alpha */
+ f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
+ + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
+ r |= f & 0xff000000;
+
+ return r;
+}
+
+#endif
+
static force_inline uint32_t
bits_image_fetch_pixel_bilinear (bits_image_t *image,
pixman_fixed_t x,
@@ -191,9 +287,8 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image,
int width = image->width;
int height = image->height;
int x1, y1, x2, y2;
- uint32_t tl, tr, bl, br, r;
- int32_t distx, disty, idistx, idisty;
- uint32_t ft, fb;
+ uint32_t tl, tr, bl, br;
+ int32_t distx, disty;
x1 = x - pixman_fixed_1 / 2;
y1 = y - pixman_fixed_1 / 2;
@@ -212,7 +307,7 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image,
repeat (repeat_mode, height, &y1);
repeat (repeat_mode, width, &x2);
repeat (repeat_mode, height, &y2);
-
+
tl = get_pixel (image, x1, y1, FALSE);
bl = get_pixel (image, x1, y2, FALSE);
tr = get_pixel (image, x2, y1, FALSE);
@@ -226,24 +321,218 @@ bits_image_fetch_pixel_bilinear (bits_image_t *image,
br = get_pixel (image, x2, y2, TRUE);
}
- idistx = 256 - distx;
- idisty = 256 - disty;
+ return bilinear_interpolation (tl, tr, bl, br, distx, disty);
+}
-#define GET8(v, i) ((uint16_t) (uint8_t) ((v) >> i))
- ft = GET8 (tl, 0) * idistx + GET8 (tr, 0) * distx;
- fb = GET8 (bl, 0) * idistx + GET8 (br, 0) * distx;
- r = (((ft * idisty + fb * disty) >> 16) & 0xff);
- ft = GET8 (tl, 8) * idistx + GET8 (tr, 8) * distx;
- fb = GET8 (bl, 8) * idistx + GET8 (br, 8) * distx;
- r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
- ft = GET8 (tl, 16) * idistx + GET8 (tr, 16) * distx;
- fb = GET8 (bl, 16) * idistx + GET8 (br, 16) * distx;
- r |= (((ft * idisty + fb * disty)) & 0xff0000);
- ft = GET8 (tl, 24) * idistx + GET8 (tr, 24) * distx;
- fb = GET8 (bl, 24) * idistx + GET8 (br, 24) * distx;
- r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
+static void
+bits_image_fetch_bilinear_no_repeat_8888 (pixman_image_t * ima,
+ int offset,
+ int line,
+ int width,
+ uint32_t * buffer,
+ const uint32_t * mask,
+ uint32_t mask_bits)
+{
+ bits_image_t *bits = &ima->bits;
+ pixman_fixed_t x_top, x_bottom, x;
+ pixman_fixed_t ux_top, ux_bottom, ux;
+ pixman_vector_t v;
+ uint32_t top_mask, bottom_mask;
+ uint32_t *top_row;
+ uint32_t *bottom_row;
+ uint32_t *end;
+ uint32_t zero[2] = { 0, 0 };
+ int y, y1, y2;
+ int disty;
+ int mask_inc;
+ int w;
- return r;
+ /* reference point is the center of the pixel */
+ v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+ v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+ v.vector[2] = pixman_fixed_1;
+
+ if (!pixman_transform_point_3d (bits->common.transform, &v))
+ return;
+
+ ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0];
+ x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2;
+
+ y = v.vector[1] - pixman_fixed_1/2;
+ disty = (y >> 8) & 0xff;
+
+ /* Load the pointers to the first and second lines from the source
+ * image that bilinear code must read.
+ *
+ * The main trick in this code is about the check if any line are
+ * outside of the image;
+ *
+ * When I realize that a line (any one) is outside, I change
+ * the pointer to a dummy area with zeros. Once I change this, I
+ * must be sure the pointer will not change, so I set the
+ * variables to each pointer increments inside the loop.
+ */
+ y1 = pixman_fixed_to_int (y);
+ y2 = y1 + 1;
+
+ if (y1 < 0 || y1 >= bits->height)
+ {
+ top_row = zero;
+ x_top = 0;
+ ux_top = 0;
+ }
+ else
+ {
+ top_row = bits->bits + y1 * bits->rowstride;
+ x_top = x;
+ ux_top = ux;
+ }
+
+ if (y2 < 0 || y2 >= bits->height)
+ {
+ bottom_row = zero;
+ x_bottom = 0;
+ ux_bottom = 0;
+ }
+ else
+ {
+ bottom_row = bits->bits + y2 * bits->rowstride;
+ x_bottom = x;
+ ux_bottom = ux;
+ }
+
+ /* Instead of checking whether the operation uses the mast in
+ * each loop iteration, verify this only once and prepare the
+ * variables to make the code smaller inside the loop.
+ */
+ if (!mask)
+ {
+ mask_inc = 0;
+ mask_bits = 1;
+ mask = &mask_bits;
+ }
+ else
+ {
+ /* If have a mask, prepare the variables to check it */
+ mask_inc = 1;
+ }
+
+ /* If both are zero, then the whole thing is zero */
+ if (top_row == zero && bottom_row == zero)
+ {
+ memset (buffer, 0, width * sizeof (uint32_t));
+ return;
+ }
+ else if (bits->format == PIXMAN_x8r8g8b8)
+ {
+ if (top_row == zero)
+ {
+ top_mask = 0;
+ bottom_mask = 0xff000000;
+ }
+ else if (bottom_row == zero)
+ {
+ top_mask = 0xff000000;
+ bottom_mask = 0;
+ }
+ else
+ {
+ top_mask = 0xff000000;
+ bottom_mask = 0xff000000;
+ }
+ }
+ else
+ {
+ top_mask = 0;
+ bottom_mask = 0;
+ }
+
+ end = buffer + width;
+
+ /* Zero fill to the left of the image */
+ while (buffer < end && x < pixman_fixed_minus_1)
+ {
+ *buffer++ = 0;
+ x += ux;
+ x_top += ux_top;
+ x_bottom += ux_bottom;
+ mask += mask_inc;
+ }
+
+ /* Left edge
+ */
+ while (buffer < end && x < 0)
+ {
+ uint32_t tr, br;
+ int32_t distx;
+
+ tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask;
+ br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
+
+ distx = (x >> 8) & 0xff;
+
+ *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty);
+
+ x += ux;
+ x_top += ux_top;
+ x_bottom += ux_bottom;
+ mask += mask_inc;
+ }
+
+ /* Main part */
+ w = pixman_int_to_fixed (bits->width - 1);
+
+ while (buffer < end && x < w)
+ {
+ if (*mask)
+ {
+ uint32_t tl, tr, bl, br;
+ int32_t distx;
+
+ tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
+ tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask;
+ bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
+ br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
+
+ distx = (x >> 8) & 0xff;
+
+ *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty);
+ }
+
+ buffer++;
+ x += ux;
+ x_top += ux_top;
+ x_bottom += ux_bottom;
+ mask += mask_inc;
+ }
+
+ /* Right Edge */
+ w = pixman_int_to_fixed (bits->width);
+ while (buffer < end && x < w)
+ {
+ if (*mask)
+ {
+ uint32_t tl, bl;
+ int32_t distx;
+
+ tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
+ bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
+
+ distx = (x >> 8) & 0xff;
+
+ *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty);
+ }
+
+ buffer++;
+ x += ux;
+ x_top += ux_top;
+ x_bottom += ux_bottom;
+ mask += mask_inc;
+ }
+
+ /* Zero fill to the left of the image */
+ while (buffer < end)
+ *buffer++ = 0;
}
static force_inline uint32_t
@@ -340,6 +629,9 @@ bits_image_fetch_pixel_filtered (bits_image_t *image,
case PIXMAN_FILTER_CONVOLUTION:
return bits_image_fetch_pixel_convolution (image, x, y);
break;
+
+ default:
+ break;
}
return 0;
@@ -583,55 +875,6 @@ bits_image_fetch_untransformed_64 (pixman_image_t * image,
}
}
-static pixman_bool_t out_of_bounds_workaround = TRUE;
-
-/* Old X servers rely on out-of-bounds accesses when they are asked
- * to composite with a window as the source. They create a pixman image
- * pointing to some bogus position in memory, but then they set a clip
- * region to the position where the actual bits are.
- *
- * Due to a bug in old versions of pixman, where it would not clip
- * against the image bounds when a clip region was set, this would
- * actually work. So by default we allow certain out-of-bound access
- * to happen unless explicitly disabled.
- *
- * Fixed X servers should call this function to disable the workaround.
- */
-PIXMAN_EXPORT void
-pixman_disable_out_of_bounds_workaround (void)
-{
- out_of_bounds_workaround = FALSE;
-}
-
-static pixman_bool_t
-source_image_needs_out_of_bounds_workaround (bits_image_t *image)
-{
- if (image->common.clip_sources &&
- image->common.repeat == PIXMAN_REPEAT_NONE &&
- image->common.have_clip_region &&
- out_of_bounds_workaround)
- {
- if (!image->common.client_clip)
- {
- /* There is no client clip, so if the clip region extends beyond the
- * drawable geometry, it must be because the X server generated the
- * bogus clip region.
- */
- const pixman_box32_t *extents = pixman_region32_extents (&image->common.clip_region);
-
- if (extents->x1 >= 0 && extents->x2 <= image->width &&
- extents->y1 >= 0 && extents->y2 <= image->height)
- {
- return FALSE;
- }
- }
-
- return TRUE;
- }
-
- return FALSE;
-}
-
static void
bits_image_property_changed (pixman_image_t *image)
{
@@ -665,6 +908,25 @@ bits_image_property_changed (pixman_image_t *image)
image->common.get_scanline_64 = bits_image_fetch_untransformed_64;
image->common.get_scanline_32 = bits_image_fetch_untransformed_32;
}
+ else if (bits->common.transform &&
+ bits->common.transform->matrix[2][0] == 0 &&
+ bits->common.transform->matrix[2][1] == 0 &&
+ bits->common.transform->matrix[2][2] == pixman_fixed_1 &&
+ bits->common.transform->matrix[0][0] > 0 &&
+ bits->common.transform->matrix[1][0] == 0 &&
+ !bits->read_func &&
+ (bits->common.filter == PIXMAN_FILTER_BILINEAR ||
+ bits->common.filter == PIXMAN_FILTER_GOOD ||
+ bits->common.filter == PIXMAN_FILTER_BEST) &&
+ bits->common.repeat == PIXMAN_REPEAT_NONE &&
+ (bits->format == PIXMAN_a8r8g8b8 ||
+ bits->format == PIXMAN_x8r8g8b8))
+ {
+ image->common.get_scanline_64 =
+ _pixman_image_get_scanline_generic_64;
+ image->common.get_scanline_32 =
+ bits_image_fetch_bilinear_no_repeat_8888;
+ }
else
{
image->common.get_scanline_64 =
@@ -675,9 +937,6 @@ bits_image_property_changed (pixman_image_t *image)
bits->store_scanline_64 = bits_image_store_scanline_64;
bits->store_scanline_32 = bits_image_store_scanline_32;
-
- bits->common.need_workaround =
- source_image_needs_out_of_bounds_workaround (bits);
}
static uint32_t *
@@ -731,8 +990,10 @@ pixman_image_create_bits (pixman_format_code_t format,
/* must be a whole number of uint32_t's
*/
- return_val_if_fail (bits == NULL ||
- (rowstride_bytes % sizeof (uint32_t)) == 0, NULL);
+ return_val_if_fail (
+ bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0, NULL);
+
+ return_val_if_fail (PIXMAN_FORMAT_BPP (format) >= PIXMAN_FORMAT_DEPTH (format), NULL);
if (!bits && width && height)
{
diff --git a/lib/pixman/pixman/pixman-compiler.h b/lib/pixman/pixman/pixman-compiler.h
index 9647dbb48..26f7071c9 100644
--- a/lib/pixman/pixman/pixman-compiler.h
+++ b/lib/pixman/pixman/pixman-compiler.h
@@ -69,3 +69,135 @@
# define PIXMAN_EXPORT
#endif
+/* TLS */
+#if defined(TOOLCHAIN_SUPPORTS__THREAD)
+
+# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \
+ static __thread type name
+# define PIXMAN_GET_THREAD_LOCAL(name) \
+ (&name)
+
+#elif defined(__MINGW32__) && !defined(__WIN64)
+
+/* We can't include <windows.h> as it causes carious clashes with
+ * identifiers in pixman, sigh. So just declare the functions we need
+ * here.
+ */
+extern __stdcall long InterlockedCompareExchange(long volatile *, long, long);
+#define InterlockedCompareExchangePointer(d,e,c) \
+ (void *)InterlockedCompareExchange((long volatile *)(d),(long)(e),(long)(c))
+extern __stdcall int TlsAlloc (void);
+extern __stdcall void *TlsGetValue (unsigned);
+extern __stdcall int TlsSetValue (unsigned, void *);
+extern __stdcall void *CreateMutexA(void *, int, char *);
+extern __stdcall int CloseHandle(void *);
+extern __stdcall unsigned WaitForSingleObject (void *, unsigned);
+extern __stdcall int ReleaseMutex (void *);
+
+# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \
+ static volatile int tls_ ## name ## _initialized = 0; \
+ static void *tls_ ## name ## _mutex = NULL; \
+ static unsigned tls_ ## name ## _index; \
+ \
+ static type * \
+ tls_ ## name ## _alloc (void) \
+ { \
+ type *value = calloc (1, sizeof (type)); \
+ if (value) \
+ TlsSetValue (tls_ ## name ## _index, value); \
+ return value; \
+ } \
+ \
+ static force_inline type * \
+ tls_ ## name ## _get (void) \
+ { \
+ type *value; \
+ if (!tls_ ## name ## _initialized) \
+ { \
+ if (!tls_ ## name ## _mutex) \
+ { \
+ void *mutex = CreateMutexA (NULL, 0, NULL); \
+ if (InterlockedCompareExchangePointer ( \
+ &tls_ ## name ## _mutex, mutex, NULL) != NULL) \
+ { \
+ CloseHandle (mutex); \
+ } \
+ } \
+ WaitForSingleObject (tls_ ## name ## _mutex, 0xFFFFFFFF); \
+ if (!tls_ ## name ## _initialized) \
+ { \
+ tls_ ## name ## _index = TlsAlloc (); \
+ tls_ ## name ## _initialized = 1; \
+ } \
+ ReleaseMutex (tls_ ## name ## _mutex); \
+ } \
+ if (tls_ ## name ## _index == 0xFFFFFFFF) \
+ return NULL; \
+ value = TlsGetValue (tls_ ## name ## _index); \
+ if (!value) \
+ value = tls_ ## name ## _alloc (); \
+ return value; \
+ }
+
+# define PIXMAN_GET_THREAD_LOCAL(name) \
+ tls_ ## name ## _get ()
+
+#elif defined(_MSC_VER)
+
+# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \
+ static __declspec(thread) type name
+# define PIXMAN_GET_THREAD_LOCAL(name) \
+ (&name)
+
+#elif defined(HAVE_PTHREAD_SETSPECIFIC)
+
+#include <pthread.h>
+
+# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \
+ static pthread_once_t tls_ ## name ## _once_control = PTHREAD_ONCE_INIT; \
+ static pthread_key_t tls_ ## name ## _key; \
+ \
+ static void \
+ tls_ ## name ## _destroy_value (void *value) \
+ { \
+ free (value); \
+ } \
+ \
+ static void \
+ tls_ ## name ## _make_key (void) \
+ { \
+ pthread_key_create (&tls_ ## name ## _key, \
+ tls_ ## name ## _destroy_value); \
+ } \
+ \
+ static type * \
+ tls_ ## name ## _alloc (void) \
+ { \
+ type *value = calloc (1, sizeof (type)); \
+ if (value) \
+ pthread_setspecific (tls_ ## name ## _key, value); \
+ return value; \
+ } \
+ \
+ static force_inline type * \
+ tls_ ## name ## _get (void) \
+ { \
+ type *value = NULL; \
+ if (pthread_once (&tls_ ## name ## _once_control, \
+ tls_ ## name ## _make_key) == 0) \
+ { \
+ value = pthread_getspecific (tls_ ## name ## _key); \
+ if (!value) \
+ value = tls_ ## name ## _alloc (); \
+ } \
+ return value; \
+ }
+
+# define PIXMAN_GET_THREAD_LOCAL(name) \
+ tls_ ## name ## _get ()
+
+#else
+
+# error "Unknown thread local support for this system"
+
+#endif
diff --git a/lib/pixman/pixman/pixman-conical-gradient.c b/lib/pixman/pixman/pixman-conical-gradient.c
index d720db3d4..0341a8ebf 100644
--- a/lib/pixman/pixman/pixman-conical-gradient.c
+++ b/lib/pixman/pixman/pixman-conical-gradient.c
@@ -23,7 +23,11 @@
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*/
+
+#ifdef HAVE_CONFIG_H
#include <config.h>
+#endif
+
#include <stdlib.h>
#include <math.h>
#include "pixman-private.h"
@@ -49,7 +53,7 @@ conical_gradient_get_scanline_32 (pixman_image_t *image,
double rx = x + 0.5;
double ry = y + 0.5;
double rz = 1.;
- double a = conical->angle / (180. * 65536);
+ double a = (conical->angle * M_PI) / (180. * 65536);
_pixman_gradient_walker_init (&walker, gradient, source->common.repeat);
diff --git a/lib/pixman/pixman/pixman-cpu.c b/lib/pixman/pixman/pixman-cpu.c
index 5d5469bb8..e96b140bd 100644
--- a/lib/pixman/pixman/pixman-cpu.c
+++ b/lib/pixman/pixman/pixman-cpu.c
@@ -253,8 +253,6 @@ pixman_arm_read_auxv ()
if (aux.a_type == AT_HWCAP)
{
uint32_t hwcap = aux.a_un.a_val;
- if (getenv ("ARM_FORCE_HWCAP"))
- hwcap = strtoul (getenv ("ARM_FORCE_HWCAP"), NULL, 0);
/* hardcode these values to avoid depending on specific
* versions of the hwcap header, e.g. HWCAP_NEON
*/
@@ -266,8 +264,6 @@ pixman_arm_read_auxv ()
else if (aux.a_type == AT_PLATFORM)
{
const char *plat = (const char*) aux.a_un.a_val;
- if (getenv ("ARM_FORCE_PLATFORM"))
- plat = getenv ("ARM_FORCE_PLATFORM");
if (strncmp (plat, "v7l", 3) == 0)
{
arm_has_v7 = TRUE;
@@ -280,12 +276,6 @@ pixman_arm_read_auxv ()
}
}
close (fd);
-
- /* if we don't have 2.6.29, we have to do this hack; set
- * the env var to trust HWCAP.
- */
- if (!getenv ("ARM_TRUST_HWCAP") && arm_has_v7)
- arm_has_neon = TRUE;
}
arm_tests_initialized = TRUE;
@@ -319,7 +309,7 @@ pixman_have_arm_neon (void)
#endif /* USE_ARM_SIMD || USE_ARM_NEON */
-#ifdef USE_MMX
+#if defined(USE_MMX) || defined(USE_SSE2)
/* The CPU detection code needs to be in a file not compiled with
* "-mmmx -msse", as gcc would generate CMOV instructions otherwise
* that would lead to SIGILL instructions on old CPUs that don't have
diff --git a/lib/pixman/pixman/pixman-edge-imp.h b/lib/pixman/pixman/pixman-edge-imp.h
index a30f82108..a4698eddb 100644
--- a/lib/pixman/pixman/pixman-edge-imp.h
+++ b/lib/pixman/pixman/pixman-edge-imp.h
@@ -49,10 +49,14 @@ RASTERIZE_EDGES (pixman_image_t *image,
rx = r->x;
#if N_BITS == 1
/* For the non-antialiased case, round the coordinates up, in effect
- * sampling the center of the pixel. (The AA case does a similar
- * adjustment in RENDER_SAMPLES_X) */
- lx += X_FRAC_FIRST(1);
- rx += X_FRAC_FIRST(1);
+ * sampling just slightly to the left of the pixel. This is so that
+ * when the sample point lies exactly on the line, we round towards
+ * north-west.
+ *
+ * (The AA case does a similar adjustment in RENDER_SAMPLES_X)
+ */
+ lx += X_FRAC_FIRST(1) - pixman_fixed_e;
+ rx += X_FRAC_FIRST(1) - pixman_fixed_e;
#endif
/* clip X */
if (lx < 0)
@@ -79,14 +83,6 @@ RASTERIZE_EDGES (pixman_image_t *image,
#if N_BITS == 1
{
-#ifdef WORDS_BIGENDIAN
-# define SCREEN_SHIFT_LEFT(x,n) ((x) << (n))
-# define SCREEN_SHIFT_RIGHT(x,n) ((x) >> (n))
-#else
-# define SCREEN_SHIFT_LEFT(x,n) ((x) >> (n))
-# define SCREEN_SHIFT_RIGHT(x,n) ((x) << (n))
-#endif
-
#define LEFT_MASK(x) \
(((x) & 0x1f) ? \
SCREEN_SHIFT_RIGHT (0xffffffff, (x) & 0x1f) : 0)
diff --git a/lib/pixman/pixman/pixman-edge.c b/lib/pixman/pixman/pixman-edge.c
index 81a2e960a..8d498ab44 100644
--- a/lib/pixman/pixman/pixman-edge.c
+++ b/lib/pixman/pixman/pixman-edge.c
@@ -70,7 +70,7 @@
#define N_BITS 4
#define RASTERIZE_EDGES rasterize_edges_4
-#ifndef WORDS_BIG_ENDIAN
+#ifndef WORDS_BIGENDIAN
#define SHIFT_4(o) ((o) << 2)
#else
#define SHIFT_4(o) ((1 - (o)) << 2)
@@ -358,6 +358,9 @@ PIXMAN_RASTERIZE_EDGES (pixman_image_t *image,
case 8:
rasterize_edges_8 (image, l, r, t, b);
break;
+
+ default:
+ break;
}
}
diff --git a/lib/pixman/pixman/pixman-fast-path.c b/lib/pixman/pixman/pixman-fast-path.c
index 5ab8d8c99..bf5b298c8 100644
--- a/lib/pixman/pixman/pixman-fast-path.c
+++ b/lib/pixman/pixman/pixman-fast-path.c
@@ -27,6 +27,7 @@
#include <config.h>
#endif
#include <string.h>
+#include <stdlib.h>
#include "pixman-private.h"
#include "pixman-combine32.h"
@@ -125,7 +126,7 @@ fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
int src_stride, mask_stride, dst_stride;
uint8_t m;
uint32_t s, d;
- uint16_t w;
+ int32_t w;
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
@@ -183,7 +184,7 @@ fast_composite_in_n_8_8 (pixman_implementation_t *imp,
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask, m;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
uint16_t t;
src = _pixman_image_get_solid (src_image, dest_image->bits.format);
@@ -260,7 +261,7 @@ fast_composite_in_8_8 (pixman_implementation_t *imp,
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
uint8_t s;
uint16_t t;
@@ -308,7 +309,7 @@ fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
uint32_t *dst_line, *dst, d;
uint8_t *mask_line, *mask, m;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
@@ -366,7 +367,7 @@ fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
uint32_t *dst_line, *dst, d;
uint32_t *mask_line, *mask, ma;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
@@ -423,7 +424,7 @@ fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
uint32_t *dst_line, *dst, d;
uint32_t *mask_line, *mask, ma;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
@@ -490,7 +491,7 @@ fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
uint32_t d;
uint8_t *mask_line, *mask, m;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
@@ -555,7 +556,7 @@ fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
uint32_t d;
uint8_t *mask_line, *mask, m;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
@@ -622,7 +623,7 @@ fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
uint32_t d;
uint32_t *mask_line, *mask, ma;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
@@ -697,7 +698,7 @@ fast_composite_over_8888_8888 (pixman_implementation_t *imp,
uint32_t *src_line, *src, s;
int dst_stride, src_stride;
uint8_t a;
- uint16_t w;
+ int32_t w;
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
@@ -723,6 +724,7 @@ fast_composite_over_8888_8888 (pixman_implementation_t *imp,
}
}
+#if 0
static void
fast_composite_over_8888_0888 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -743,7 +745,7 @@ fast_composite_over_8888_0888 (pixman_implementation_t *imp,
uint32_t *src_line, *src, s;
uint8_t a;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
@@ -773,6 +775,7 @@ fast_composite_over_8888_0888 (pixman_implementation_t *imp,
}
}
}
+#endif
static void
fast_composite_over_8888_0565 (pixman_implementation_t *imp,
@@ -794,7 +797,7 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp,
uint32_t *src_line, *src, s;
uint8_t a;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
@@ -847,7 +850,7 @@ fast_composite_src_x888_0565 (pixman_implementation_t *imp,
uint16_t *dst_line, *dst;
uint32_t *src_line, *src, s;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
@@ -887,7 +890,7 @@ fast_composite_add_8000_8000 (pixman_implementation_t *imp,
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
uint8_t s, d;
uint16_t t;
@@ -938,7 +941,7 @@ fast_composite_add_8888_8888 (pixman_implementation_t *imp,
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
uint32_t s, d;
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
@@ -971,24 +974,24 @@ fast_composite_add_8888_8888 (pixman_implementation_t *imp,
}
static void
-fast_composite_add_8888_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+fast_composite_add_n_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
{
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
uint32_t src;
uint8_t sa;
@@ -1023,6 +1026,254 @@ fast_composite_add_8888_8_8 (pixman_implementation_t *imp,
}
}
+#ifdef WORDS_BIGENDIAN
+#define CREATE_BITMASK(n) (0x80000000 >> (n))
+#define UPDATE_BITMASK(n) ((n) >> 1)
+#else
+#define CREATE_BITMASK(n) (1 << (n))
+#define UPDATE_BITMASK(n) ((n) << 1)
+#endif
+
+#define TEST_BIT(p, n) \
+ (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
+#define SET_BIT(p, n) \
+ do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
+
+static void
+fast_composite_add_1000_1000 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src;
+ int dst_stride, src_stride;
+ int32_t w;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
+ src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t,
+ dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ /*
+ * TODO: improve performance by processing uint32_t data instead
+ * of individual bits
+ */
+ if (TEST_BIT (src, src_x + w))
+ SET_BIT (dst, dest_x + w);
+ }
+ }
+}
+
+static void
+fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint32_t *dst, *dst_line;
+ uint32_t *mask, *mask_line;
+ int mask_stride, dst_stride;
+ uint32_t bitcache, bitmask;
+ int32_t w;
+
+ if (width <= 0)
+ return;
+
+ src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t,
+ dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
+ mask_stride, mask_line, 1);
+ mask_line += mask_x >> 5;
+
+ if (srca == 0xff)
+ {
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (mask_x & 31);
+
+ while (w--)
+ {
+ if (bitmask == 0)
+ {
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (0);
+ }
+ if (bitcache & bitmask)
+ *dst = src;
+ bitmask = UPDATE_BITMASK (bitmask);
+ dst++;
+ }
+ }
+ }
+ else
+ {
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (mask_x & 31);
+
+ while (w--)
+ {
+ if (bitmask == 0)
+ {
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (0);
+ }
+ if (bitcache & bitmask)
+ *dst = over (src, *dst);
+ bitmask = UPDATE_BITMASK (bitmask);
+ dst++;
+ }
+ }
+ }
+}
+
+static void
+fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint16_t *dst, *dst_line;
+ uint32_t *mask, *mask_line;
+ int mask_stride, dst_stride;
+ uint32_t bitcache, bitmask;
+ int32_t w;
+ uint32_t d;
+ uint16_t src565;
+
+ if (width <= 0)
+ return;
+
+ src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t,
+ dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
+ mask_stride, mask_line, 1);
+ mask_line += mask_x >> 5;
+
+ if (srca == 0xff)
+ {
+ src565 = CONVERT_8888_TO_0565 (src);
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (mask_x & 31);
+
+ while (w--)
+ {
+ if (bitmask == 0)
+ {
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (0);
+ }
+ if (bitcache & bitmask)
+ *dst = src565;
+ bitmask = UPDATE_BITMASK (bitmask);
+ dst++;
+ }
+ }
+ }
+ else
+ {
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (mask_x & 31);
+
+ while (w--)
+ {
+ if (bitmask == 0)
+ {
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (0);
+ }
+ if (bitcache & bitmask)
+ {
+ d = over (src, CONVERT_0565_TO_0888 (*dst));
+ *dst = CONVERT_8888_TO_0565 (d);
+ }
+ bitmask = UPDATE_BITMASK (bitmask);
+ dst++;
+ }
+ }
+ }
+}
+
/*
* Simple bitblt
*/
@@ -1095,81 +1346,316 @@ fast_composite_src_8888_x888 (pixman_implementation_t *imp,
}
}
-static const pixman_fast_path_t c_fast_paths[] =
+static force_inline pixman_bool_t
+repeat (pixman_repeat_t repeat, int *c, int size)
{
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, fast_composite_over_n_8_0565, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, fast_composite_over_n_8_0565, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r8g8b8, fast_composite_over_n_8_0888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b8g8r8, fast_composite_over_n_8_0888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fast_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fast_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fast_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fast_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fast_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, fast_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, fast_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, fast_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, fast_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, fast_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fast_composite_over_x888_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fast_composite_over_x888_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, fast_composite_over_x888_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, fast_composite_over_x888_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fast_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fast_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fast_composite_over_8888_0565, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fast_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fast_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fast_composite_over_8888_0565, 0 },
- { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fast_composite_add_8888_8888, 0 },
- { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fast_composite_add_8888_8888, 0 },
- { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fast_composite_add_8000_8000, 0 },
- { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fast_composite_add_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fast_composite_add_8888_8_8, 0 },
- { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_a8r8g8b8, fast_composite_solid_fill, 0 },
- { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_x8r8g8b8, fast_composite_solid_fill, 0 },
- { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_a8b8g8r8, fast_composite_solid_fill, 0 },
- { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_x8b8g8r8, fast_composite_solid_fill, 0 },
- { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_a8, fast_composite_solid_fill, 0 },
- { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, fast_composite_solid_fill, 0 },
- { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fast_composite_src_8888_x888, 0 },
- { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fast_composite_src_8888_x888, 0 },
- { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fast_composite_src_8888_x888, 0 },
- { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fast_composite_src_8888_x888, 0 },
- { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fast_composite_src_x888_0565, 0 },
- { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fast_composite_src_x888_0565, 0 },
- { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fast_composite_src_x888_0565, 0 },
- { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fast_composite_src_x888_0565, 0 },
- { PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fast_composite_in_8_8, 0 },
- { PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fast_composite_in_n_8_8, 0 },
- { PIXMAN_OP_NONE },
-};
+ if (repeat == PIXMAN_REPEAT_NONE)
+ {
+ if (*c < 0 || *c >= size)
+ return FALSE;
+ }
+ else if (repeat == PIXMAN_REPEAT_NORMAL)
+ {
+ while (*c >= size)
+ *c -= size;
+ while (*c < 0)
+ *c += size;
+ }
+ else if (repeat == PIXMAN_REPEAT_PAD)
+ {
+ *c = CLIP (*c, 0, size - 1);
+ }
+ else /* REFLECT */
+ {
+ *c = MOD (*c, size * 2);
+ if (*c >= size)
+ *c = size * 2 - *c - 1;
+ }
+ return TRUE;
+}
+
+/* A macroified version of specialized nearest scalers for some
+ * common 8888 and 565 formats. It supports SRC and OVER ops.
+ *
+ * There are two repeat versions, one that handles repeat normal,
+ * and one without repeat handling that only works if the src region
+ * used is completely covered by the pre-repeated source samples.
+ *
+ * The loops are unrolled to process two pixels per iteration for better
+ * performance on most CPU architectures (superscalar processors
+ * can issue several operations simultaneously, other processors can hide
+ * instructions latencies by pipelining operations). Unrolling more
+ * does not make much sense because the compiler will start running out
+ * of spare registers soon.
+ */
+
+#define GET_8888_ALPHA(s) ((s) >> 24)
+ /* This is not actually used since we don't have an OVER with
+ 565 source, but it is needed to build. */
+#define GET_0565_ALPHA(s) 0xff
+
+#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
+ src_type_t, dst_type_t, OP, do_repeat) \
+static void \
+fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dst_x, \
+ int32_t dst_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type_t *dst_line; \
+ src_type_t *src_first_line; \
+ uint32_t d; \
+ src_type_t s1, s2; \
+ uint8_t a1, a2; \
+ int w; \
+ int x1, x2, y; \
+ pixman_fixed_t orig_vx; \
+ pixman_fixed_t max_vx, max_vy; \
+ pixman_vector_t v; \
+ pixman_fixed_t vx, vy; \
+ pixman_fixed_t unit_x, unit_y; \
+ \
+ src_type_t *src; \
+ dst_type_t *dst; \
+ int src_stride, dst_stride; \
+ \
+ if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
+ abort(); \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
+ /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
+ * transformed from destination space to source space */ \
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
+ \
+ /* reference point is the center of the pixel */ \
+ v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
+ v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
+ v.vector[2] = pixman_fixed_1; \
+ \
+ if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
+ return; \
+ \
+ unit_x = src_image->common.transform->matrix[0][0]; \
+ unit_y = src_image->common.transform->matrix[1][1]; \
+ \
+ /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
+ v.vector[0] -= pixman_fixed_e; \
+ v.vector[1] -= pixman_fixed_e; \
+ \
+ vx = v.vector[0]; \
+ vy = v.vector[1]; \
+ \
+ if (do_repeat) \
+ { \
+ /* Clamp repeating positions inside the actual samples */ \
+ max_vx = src_image->bits.width << 16; \
+ max_vy = src_image->bits.height << 16; \
+ \
+ repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
+ repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
+ } \
+ \
+ orig_vx = vx; \
+ \
+ while (--height >= 0) \
+ { \
+ dst = dst_line; \
+ dst_line += dst_stride; \
+ \
+ y = vy >> 16; \
+ vy += unit_y; \
+ if (do_repeat) \
+ repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
+ \
+ src = src_first_line + src_stride * y; \
+ \
+ w = width; \
+ vx = orig_vx; \
+ while ((w -= 2) >= 0) \
+ { \
+ x1 = vx >> 16; \
+ vx += unit_x; \
+ if (do_repeat) \
+ { \
+ /* This works because we know that unit_x is positive */ \
+ while (vx >= max_vx) \
+ vx -= max_vx; \
+ } \
+ s1 = src[x1]; \
+ \
+ x2 = vx >> 16; \
+ vx += unit_x; \
+ if (do_repeat) \
+ { \
+ /* This works because we know that unit_x is positive */ \
+ while (vx >= max_vx) \
+ vx -= max_vx; \
+ } \
+ s2 = src[x2]; \
+ \
+ if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
+ { \
+ a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
+ a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
+ \
+ if (a1 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ else if (s1) \
+ { \
+ d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
+ a1 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ \
+ if (a2 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
+ } \
+ else if (s2) \
+ { \
+ d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
+ a2 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ } \
+ else /* PIXMAN_OP_SRC */ \
+ { \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
+ } \
+ } \
+ \
+ if (w & 1) \
+ { \
+ x1 = vx >> 16; \
+ vx += unit_x; \
+ if (do_repeat) \
+ { \
+ /* This works because we know that unit_x is positive */ \
+ while (vx >= max_vx) \
+ vx -= max_vx; \
+ } \
+ s1 = src[x1]; \
+ \
+ if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
+ { \
+ a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
+ \
+ if (a1 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ else if (s1) \
+ { \
+ d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
+ a1 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ } \
+ else /* PIXMAN_OP_SRC */ \
+ { \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ } \
+ } \
+}
+
+FAST_NEAREST(x888_x888_none, 8888, 8888, uint32_t, uint32_t, SRC, /*repeat: */ 0);
+FAST_NEAREST(x888_x888_normal, 8888, 8888, uint32_t, uint32_t, SRC, /*repeat: */ 1);
+FAST_NEAREST(x888_x888_none, 8888, 8888, uint32_t, uint32_t, OVER, /*repeat: */ 0);
+FAST_NEAREST(x888_x888_normal, 8888, 8888, uint32_t, uint32_t, OVER, /*repeat: */ 1);
+FAST_NEAREST(x888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, /*repeat: */ 0);
+FAST_NEAREST(x888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, /*repeat: */ 1);
+FAST_NEAREST(565_565_none, 0565, 0565, uint16_t, uint16_t, SRC, /*repeat: */ 0);
+FAST_NEAREST(565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, /*repeat: */ 1);
+FAST_NEAREST(8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, /*repeat: */ 0);
+FAST_NEAREST(8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, /*repeat: */ 1);
+
+static force_inline uint32_t
+fetch_nearest (pixman_repeat_t src_repeat,
+ pixman_format_code_t format,
+ uint32_t *src, int x, int src_width)
+{
+ if (repeat (src_repeat, &x, src_width))
+ {
+ if (format == PIXMAN_x8r8g8b8)
+ return *(src + x) | 0xff000000;
+ else
+ return *(src + x);
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+static force_inline void
+combine_over (uint32_t s, uint32_t *dst)
+{
+ if (s)
+ {
+ uint8_t ia = 0xff - (s >> 24);
+
+ if (ia)
+ UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
+ else
+ *dst = s;
+ }
+}
+
+static force_inline void
+combine_src (uint32_t s, uint32_t *dst)
+{
+ *dst = s;
+}
static void
-fast_composite_src_scale_nearest (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+fast_composite_scaled_nearest (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
{
- uint32_t *dst;
- uint32_t *src;
- int dst_stride, src_stride;
- int i, j;
+ uint32_t *dst_line;
+ uint32_t *src_line;
+ int dst_stride, src_stride;
+ int src_width, src_height;
+ pixman_repeat_t src_repeat;
+ pixman_fixed_t unit_x, unit_y;
+ pixman_format_code_t src_format;
pixman_vector_t v;
+ pixman_fixed_t vy;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst, 1);
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
/* pass in 0 instead of src_x and src_y because src_x and src_y need to be
- * transformed from destination space to source space */
- PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src, 1);
+ * transformed from destination space to source space
+ */
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
/* reference point is the center of the pixel */
v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
@@ -1179,145 +1665,221 @@ fast_composite_src_scale_nearest (pixman_implementation_t *imp,
if (!pixman_transform_point_3d (src_image->common.transform, &v))
return;
+ unit_x = src_image->common.transform->matrix[0][0];
+ unit_y = src_image->common.transform->matrix[1][1];
+
/* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
v.vector[0] -= pixman_fixed_e;
v.vector[1] -= pixman_fixed_e;
- for (j = 0; j < height; j++)
+ src_height = src_image->bits.height;
+ src_width = src_image->bits.width;
+ src_repeat = src_image->common.repeat;
+ src_format = src_image->bits.format;
+
+ vy = v.vector[1];
+ while (height--)
{
- pixman_fixed_t vx = v.vector[0];
- pixman_fixed_t vy = v.vector[1];
+ pixman_fixed_t vx = v.vector[0];
+ int y = pixman_fixed_to_int (vy);
+ uint32_t *dst = dst_line;
- for (i = 0; i < width; ++i)
+ dst_line += dst_stride;
+
+ /* adjust the y location by a unit vector in the y direction
+ * this is equivalent to transforming y+1 of the destination point to source space */
+ vy += unit_y;
+
+ if (!repeat (src_repeat, &y, src_height))
{
- pixman_bool_t inside_bounds;
- uint32_t result;
- int x, y;
- x = vx >> 16;
- y = vy >> 16;
-
- /* apply the repeat function */
- switch (src_image->common.repeat)
- {
- case PIXMAN_REPEAT_NORMAL:
- x = MOD (x, src_image->bits.width);
- y = MOD (y, src_image->bits.height);
- inside_bounds = TRUE;
- break;
-
- case PIXMAN_REPEAT_PAD:
- x = CLIP (x, 0, src_image->bits.width - 1);
- y = CLIP (y, 0, src_image->bits.height - 1);
- inside_bounds = TRUE;
- break;
-
- case PIXMAN_REPEAT_REFLECT:
- x = MOD (x, src_image->bits.width * 2);
- if (x >= src_image->bits.width)
- x = src_image->bits.width * 2 - x - 1;
- y = MOD (y, src_image->bits.height * 2);
- if (y >= src_image->bits.height)
- y = src_image->bits.height * 2 - y - 1;
- inside_bounds = TRUE;
- break;
-
- case PIXMAN_REPEAT_NONE:
- default:
- inside_bounds =
- (x >= 0 &&
- x < src_image->bits.width &&
- y >= 0 &&
- y < src_image->bits.height);
- break;
- }
+ if (op == PIXMAN_OP_SRC)
+ memset (dst, 0, sizeof (*dst) * width);
+ }
+ else
+ {
+ int w = width;
+
+ uint32_t *src = src_line + y * src_stride;
- if (inside_bounds)
+ while (w >= 2)
{
- /* XXX: we should move this multiplication out of the loop */
- result = *(src + y * src_stride + x);
+ uint32_t s1, s2;
+ int x1, x2;
+
+ x1 = pixman_fixed_to_int (vx);
+ vx += unit_x;
+
+ x2 = pixman_fixed_to_int (vx);
+ vx += unit_x;
+
+ w -= 2;
+
+ s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
+ s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
+
+ if (op == PIXMAN_OP_OVER)
+ {
+ combine_over (s1, dst++);
+ combine_over (s2, dst++);
+ }
+ else
+ {
+ combine_src (s1, dst++);
+ combine_src (s2, dst++);
+ }
}
- else
+
+ while (w--)
{
- result = 0;
- }
- *(dst + i) = result;
+ uint32_t s;
+ int x;
- /* adjust the x location by a unit vector in the x direction:
- * this is equivalent to transforming x+1 of the destination
- * point to source space
- */
- vx += src_image->common.transform->matrix[0][0];
+ x = pixman_fixed_to_int (vx);
+ vx += unit_x;
+
+ s = fetch_nearest (src_repeat, src_format, src, x, src_width);
+
+ if (op == PIXMAN_OP_OVER)
+ combine_over (s, dst++);
+ else
+ combine_src (s, dst++);
+ }
}
- /* adjust the y location by a unit vector in the y direction
- * this is equivalent to transforming y+1 of the destination point
- * to source space
- */
- v.vector[1] += src_image->common.transform->matrix[1][1];
- dst += dst_stride;
}
}
-static void
-fast_path_composite (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * mask,
- pixman_image_t * dest,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+static const pixman_fast_path_t c_fast_paths[] =
{
- if (src->type == BITS
- && src->common.transform
- && !mask
- && op == PIXMAN_OP_SRC
- && !src->common.alpha_map && !dest->common.alpha_map
- && (src->common.filter == PIXMAN_FILTER_NEAREST)
- && PIXMAN_FORMAT_BPP (dest->bits.format) == 32
- && src->bits.format == dest->bits.format
- && !src->bits.read_func && !src->bits.write_func
- && !dest->bits.read_func && !dest->bits.write_func)
- {
- /* ensure that the transform matrix only has a scale */
- if (src->common.transform->matrix[0][1] == 0 &&
- src->common.transform->matrix[1][0] == 0 &&
- src->common.transform->matrix[2][0] == 0 &&
- src->common.transform->matrix[2][1] == 0 &&
- src->common.transform->matrix[2][2] == pixman_fixed_1)
- {
- _pixman_walk_composite_region (imp, op,
- src, mask, dest,
- src_x, src_y,
- mask_x, mask_y,
- dest_x, dest_y,
- width, height,
- fast_composite_src_scale_nearest);
- return;
- }
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8000_8000),
+ PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000),
+ PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_8888_x888),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_8888_x888),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_8888_x888),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_8888_x888),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
+ PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
+ PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
+
+#define SCALED_NEAREST_FLAGS \
+ (FAST_PATH_SCALE_TRANSFORM | \
+ FAST_PATH_NO_ALPHA_MAP | \
+ FAST_PATH_NEAREST_FILTER | \
+ FAST_PATH_NO_ACCESSORS | \
+ FAST_PATH_NO_WIDE_FORMAT)
+
+#define HAS_NORMAL_REPEAT_FLAGS \
+ (FAST_PATH_NO_REFLECT_REPEAT | \
+ FAST_PATH_NO_PAD_REPEAT | \
+ FAST_PATH_NO_NONE_REPEAT)
+
+#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_NEAREST_FLAGS | HAS_NORMAL_REPEAT_FLAGS | FAST_PATH_16BIT_SAFE | FAST_PATH_X_UNIT_POSITIVE, \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
+ }, \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
}
+ SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, x888_x888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, x888_x888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, x888_x888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, x888_x888),
- if (_pixman_run_fast_path (c_fast_paths, imp,
- op, src, mask, dest,
- src_x, src_y,
- mask_x, mask_y,
- dest_x, dest_y,
- width, height))
- {
- return;
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, x888_x888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, x888_x888),
+
+ SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, x888_565),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, x888_565),
+
+ SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
+
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, x888_x888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, x888_x888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, x888_x888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, x888_x888),
+
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
+
+#define NEAREST_FAST_PATH(op,s,d) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest, \
}
- _pixman_implementation_composite (imp->delegate, op,
- src, mask, dest,
- src_x, src_y,
- mask_x, mask_y,
- dest_x, dest_y,
- width, height);
-}
+ NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
+ NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
+ NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
+ NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
+
+ NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
+ NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
+ NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
+ NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
+
+ NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
+ NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
+ NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
+ NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
+
+ NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
+ NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
+ NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
+ NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
+
+ { PIXMAN_OP_NONE },
+};
static void
pixman_fill8 (uint32_t *bits,
@@ -1430,11 +1992,9 @@ pixman_implementation_t *
_pixman_implementation_create_fast_path (void)
{
pixman_implementation_t *general = _pixman_implementation_create_general ();
- pixman_implementation_t *imp = _pixman_implementation_create (general);
+ pixman_implementation_t *imp = _pixman_implementation_create (general, c_fast_paths);
- imp->composite = fast_path_composite;
imp->fill = fast_path_fill;
return imp;
}
-
diff --git a/lib/pixman/pixman/pixman-general.c b/lib/pixman/pixman/pixman-general.c
index 3ead3dac7..bddf79aae 100644
--- a/lib/pixman/pixman/pixman-general.c
+++ b/lib/pixman/pixman/pixman-general.c
@@ -133,15 +133,27 @@ general_composite_rect (pixman_implementation_t *imp,
/* Skip the store step and composite directly into the
* destination if the output format of the compose func matches
* the destination format.
+ *
+ * If the destination format is a8r8g8b8 then we can always do
+ * this. If it is x8r8g8b8, then we can only do it if the
+ * operator doesn't make use of destination alpha.
*/
- if (!wide &&
- !dest->common.alpha_map &&
- !dest->bits.write_func &&
- (op == PIXMAN_OP_ADD || op == PIXMAN_OP_OVER) &&
- (dest->bits.format == PIXMAN_a8r8g8b8 ||
- dest->bits.format == PIXMAN_x8r8g8b8))
+ if ((dest->bits.format == PIXMAN_a8r8g8b8) ||
+ (dest->bits.format == PIXMAN_x8r8g8b8 &&
+ (op == PIXMAN_OP_OVER ||
+ op == PIXMAN_OP_ADD ||
+ op == PIXMAN_OP_SRC ||
+ op == PIXMAN_OP_CLEAR ||
+ op == PIXMAN_OP_IN_REVERSE ||
+ op == PIXMAN_OP_OUT_REVERSE ||
+ op == PIXMAN_OP_DST)))
{
- store = NULL;
+ if (!wide &&
+ !dest->common.alpha_map &&
+ !dest->bits.write_func)
+ {
+ store = NULL;
+ }
}
if (!store)
@@ -252,26 +264,11 @@ general_composite_rect (pixman_implementation_t *imp,
free (scanline_buffer);
}
-static void
-general_composite (pixman_implementation_t * imp,
- pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * mask,
- pixman_image_t * dest,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+static const pixman_fast_path_t general_fast_path[] =
{
- _pixman_walk_composite_region (imp, op, src, mask, dest, src_x, src_y,
- mask_x, mask_y, dest_x, dest_y,
- width, height,
- general_composite_rect);
-}
+ { PIXMAN_OP_any, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, general_composite_rect },
+ { PIXMAN_OP_NONE }
+};
static pixman_bool_t
general_blt (pixman_implementation_t *imp,
@@ -310,12 +307,11 @@ general_fill (pixman_implementation_t *imp,
pixman_implementation_t *
_pixman_implementation_create_general (void)
{
- pixman_implementation_t *imp = _pixman_implementation_create (NULL);
+ pixman_implementation_t *imp = _pixman_implementation_create (NULL, general_fast_path);
_pixman_setup_combiner_functions_32 (imp);
_pixman_setup_combiner_functions_64 (imp);
- imp->composite = general_composite;
imp->blt = general_blt;
imp->fill = general_fill;
diff --git a/lib/pixman/pixman/pixman-image.c b/lib/pixman/pixman/pixman-image.c
index fff0497f1..03a39db87 100644
--- a/lib/pixman/pixman/pixman-image.c
+++ b/lib/pixman/pixman/pixman-image.c
@@ -48,8 +48,6 @@ _pixman_init_gradient (gradient_t * gradient,
gradient->n_stops = n_stops;
gradient->stop_range = 0xffff;
- gradient->color_table = NULL;
- gradient->color_table_size = 0;
gradient->common.class = SOURCE_IMAGE_CLASS_UNKNOWN;
return TRUE;
@@ -119,7 +117,6 @@ _pixman_image_allocate (void)
common->client_clip = FALSE;
common->destroy_func = NULL;
common->destroy_data = NULL;
- common->need_workaround = FALSE;
common->dirty = TRUE;
}
@@ -233,23 +230,249 @@ pixman_image_set_destroy_function (pixman_image_t * image,
image->common.destroy_data = data;
}
+PIXMAN_EXPORT void *
+pixman_image_get_destroy_data (pixman_image_t *image)
+{
+ return image->common.destroy_data;
+}
+
void
_pixman_image_reset_clip_region (pixman_image_t *image)
{
image->common.have_clip_region = FALSE;
}
+static pixman_bool_t out_of_bounds_workaround = TRUE;
+
+/* Old X servers rely on out-of-bounds accesses when they are asked
+ * to composite with a window as the source. They create a pixman image
+ * pointing to some bogus position in memory, but then they set a clip
+ * region to the position where the actual bits are.
+ *
+ * Due to a bug in old versions of pixman, where it would not clip
+ * against the image bounds when a clip region was set, this would
+ * actually work. So by default we allow certain out-of-bound access
+ * to happen unless explicitly disabled.
+ *
+ * Fixed X servers should call this function to disable the workaround.
+ */
+PIXMAN_EXPORT void
+pixman_disable_out_of_bounds_workaround (void)
+{
+ out_of_bounds_workaround = FALSE;
+}
+
+static pixman_bool_t
+source_image_needs_out_of_bounds_workaround (bits_image_t *image)
+{
+ if (image->common.clip_sources &&
+ image->common.repeat == PIXMAN_REPEAT_NONE &&
+ image->common.have_clip_region &&
+ out_of_bounds_workaround)
+ {
+ if (!image->common.client_clip)
+ {
+ /* There is no client clip, so if the clip region extends beyond the
+ * drawable geometry, it must be because the X server generated the
+ * bogus clip region.
+ */
+ const pixman_box32_t *extents =
+ pixman_region32_extents (&image->common.clip_region);
+
+ if (extents->x1 >= 0 && extents->x2 <= image->width &&
+ extents->y1 >= 0 && extents->y2 <= image->height)
+ {
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void
+compute_image_info (pixman_image_t *image)
+{
+ pixman_format_code_t code;
+ uint32_t flags = 0;
+
+ /* Transform */
+ if (!image->common.transform)
+ {
+ flags |= (FAST_PATH_ID_TRANSFORM | FAST_PATH_X_UNIT_POSITIVE);
+ }
+ else
+ {
+ if (image->common.transform->matrix[0][1] == 0 &&
+ image->common.transform->matrix[1][0] == 0 &&
+ image->common.transform->matrix[2][0] == 0 &&
+ image->common.transform->matrix[2][1] == 0 &&
+ image->common.transform->matrix[2][2] == pixman_fixed_1)
+ {
+ flags |= FAST_PATH_SCALE_TRANSFORM;
+ }
+
+ if (image->common.transform->matrix[0][0] > 0)
+ flags |= FAST_PATH_X_UNIT_POSITIVE;
+ }
+
+ /* Alpha map */
+ if (!image->common.alpha_map)
+ flags |= FAST_PATH_NO_ALPHA_MAP;
+
+ /* Filter */
+ switch (image->common.filter)
+ {
+ case PIXMAN_FILTER_NEAREST:
+ case PIXMAN_FILTER_FAST:
+ flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+ break;
+
+ case PIXMAN_FILTER_CONVOLUTION:
+ break;
+
+ default:
+ flags |= FAST_PATH_NO_CONVOLUTION_FILTER;
+ break;
+ }
+
+ /* Repeat mode */
+ switch (image->common.repeat)
+ {
+ case PIXMAN_REPEAT_NONE:
+ flags |= FAST_PATH_NO_REFLECT_REPEAT | FAST_PATH_NO_PAD_REPEAT;
+ break;
+
+ case PIXMAN_REPEAT_REFLECT:
+ flags |= FAST_PATH_NO_PAD_REPEAT | FAST_PATH_NO_NONE_REPEAT;
+ break;
+
+ case PIXMAN_REPEAT_PAD:
+ flags |= FAST_PATH_NO_REFLECT_REPEAT | FAST_PATH_NO_NONE_REPEAT;
+ break;
+
+ default:
+ flags |= FAST_PATH_NO_REFLECT_REPEAT | FAST_PATH_NO_PAD_REPEAT | FAST_PATH_NO_NONE_REPEAT;
+ break;
+ }
+
+ /* Component alpha */
+ if (image->common.component_alpha)
+ flags |= FAST_PATH_COMPONENT_ALPHA;
+ else
+ flags |= FAST_PATH_UNIFIED_ALPHA;
+
+ flags |= (FAST_PATH_NO_ACCESSORS | FAST_PATH_NO_WIDE_FORMAT);
+
+ /* Type specific checks */
+ switch (image->type)
+ {
+ case SOLID:
+ code = PIXMAN_solid;
+
+ if (image->solid.color.alpha == 0xffff)
+ flags |= FAST_PATH_IS_OPAQUE;
+ break;
+
+ case BITS:
+ if (image->bits.width == 1 &&
+ image->bits.height == 1 &&
+ image->common.repeat != PIXMAN_REPEAT_NONE)
+ {
+ code = PIXMAN_solid;
+ }
+ else
+ {
+ code = image->bits.format;
+
+ if (!image->common.transform &&
+ image->common.repeat == PIXMAN_REPEAT_NORMAL)
+ {
+ flags |= FAST_PATH_SIMPLE_REPEAT;
+ }
+ }
+
+ if (image->common.repeat != PIXMAN_REPEAT_NONE &&
+ !PIXMAN_FORMAT_A (image->bits.format) &&
+ PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_GRAY &&
+ PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_COLOR)
+ {
+ flags |= FAST_PATH_IS_OPAQUE;
+ }
+
+ if (source_image_needs_out_of_bounds_workaround (&image->bits))
+ flags |= FAST_PATH_NEEDS_WORKAROUND;
+
+ if (image->bits.read_func || image->bits.write_func)
+ flags &= ~FAST_PATH_NO_ACCESSORS;
+
+ if (PIXMAN_FORMAT_IS_WIDE (image->bits.format))
+ flags &= ~FAST_PATH_NO_WIDE_FORMAT;
+ break;
+
+ case LINEAR:
+ case RADIAL:
+ code = PIXMAN_unknown;
+
+ if (image->common.repeat != PIXMAN_REPEAT_NONE)
+ {
+ int i;
+
+ flags |= FAST_PATH_IS_OPAQUE;
+ for (i = 0; i < image->gradient.n_stops; ++i)
+ {
+ if (image->gradient.stops[i].color.alpha != 0xffff)
+ {
+ flags &= ~FAST_PATH_IS_OPAQUE;
+ break;
+ }
+ }
+ }
+ break;
+
+ default:
+ code = PIXMAN_unknown;
+ break;
+ }
+
+ /* Both alpha maps and convolution filters can introduce
+ * non-opaqueness in otherwise opaque images. Also
+ * an image with component alpha turned on is only opaque
+ * if all channels are opaque, so we simply turn it off
+ * unconditionally for those images.
+ */
+ if (image->common.alpha_map ||
+ image->common.filter == PIXMAN_FILTER_CONVOLUTION ||
+ image->common.component_alpha)
+ {
+ flags &= ~FAST_PATH_IS_OPAQUE;
+ }
+
+ image->common.flags = flags;
+ image->common.extended_format_code = code;
+}
+
void
_pixman_image_validate (pixman_image_t *image)
{
if (image->common.dirty)
{
+ compute_image_info (image);
+
+ /* It is important that property_changed is
+ * called *after* compute_image_info() because
+ * property_changed() can make use of the flags
+ * to set up accessors etc.
+ */
image->common.property_changed (image);
+
image->common.dirty = FALSE;
}
if (image->common.alpha_map)
- _pixman_image_validate (image->common.alpha_map);
+ _pixman_image_validate ((pixman_image_t *)image->common.alpha_map);
}
PIXMAN_EXPORT pixman_bool_t
@@ -518,25 +741,6 @@ pixman_image_get_depth (pixman_image_t *image)
return 0;
}
-pixman_bool_t
-_pixman_image_is_solid (pixman_image_t *image)
-{
- if (image->type == SOLID)
- return TRUE;
-
- if (image->type != BITS ||
- image->bits.width != 1 ||
- image->bits.height != 1)
- {
- return FALSE;
- }
-
- if (image->common.repeat == PIXMAN_REPEAT_NONE)
- return FALSE;
-
- return TRUE;
-}
-
uint32_t
_pixman_image_get_solid (pixman_image_t * image,
pixman_format_code_t format)
@@ -556,54 +760,3 @@ _pixman_image_get_solid (pixman_image_t * image,
return result;
}
-
-pixman_bool_t
-_pixman_image_is_opaque (pixman_image_t *image)
-{
- int i;
-
- if (image->common.alpha_map)
- return FALSE;
-
- switch (image->type)
- {
- case BITS:
- if (image->common.repeat == PIXMAN_REPEAT_NONE)
- return FALSE;
-
- if (PIXMAN_FORMAT_A (image->bits.format))
- return FALSE;
- break;
-
- case LINEAR:
- case RADIAL:
- if (image->common.repeat == PIXMAN_REPEAT_NONE)
- return FALSE;
-
- for (i = 0; i < image->gradient.n_stops; ++i)
- {
- if (image->gradient.stops[i].color.alpha != 0xffff)
- return FALSE;
- }
- break;
-
- case CONICAL:
- /* Conical gradients always have a transparent border */
- return FALSE;
- break;
-
- case SOLID:
- if (ALPHA_8 (image->solid.color) != 0xff)
- return FALSE;
- break;
- }
-
- /* Convolution filters can introduce translucency if the sum of the
- * weights is lower than 1.
- */
- if (image->common.filter == PIXMAN_FILTER_CONVOLUTION)
- return FALSE;
-
- return TRUE;
-}
-
diff --git a/lib/pixman/pixman/pixman-implementation.c b/lib/pixman/pixman/pixman-implementation.c
index bcda9fe85..bc3749ef5 100644
--- a/lib/pixman/pixman/pixman-implementation.c
+++ b/lib/pixman/pixman/pixman-implementation.c
@@ -28,30 +28,6 @@
#include "pixman-private.h"
static void
-delegate_composite (pixman_implementation_t * imp,
- pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * mask,
- pixman_image_t * dest,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- _pixman_implementation_composite (imp->delegate,
- op,
- src, mask, dest,
- src_x, src_y,
- mask_x, mask_y,
- dest_x, dest_y,
- width, height);
-}
-
-static void
delegate_combine_32 (pixman_implementation_t * imp,
pixman_op_t op,
uint32_t * dest,
@@ -136,7 +112,8 @@ delegate_fill (pixman_implementation_t *imp,
}
pixman_implementation_t *
-_pixman_implementation_create (pixman_implementation_t *delegate)
+_pixman_implementation_create (pixman_implementation_t *delegate,
+ const pixman_fast_path_t *fast_paths)
{
pixman_implementation_t *imp = malloc (sizeof (pixman_implementation_t));
pixman_implementation_t *d;
@@ -145,6 +122,8 @@ _pixman_implementation_create (pixman_implementation_t *delegate)
if (!imp)
return NULL;
+ assert (fast_paths);
+
/* Make sure the whole delegate chain has the right toplevel */
imp->delegate = delegate;
for (d = imp; d != NULL; d = d->delegate)
@@ -152,11 +131,10 @@ _pixman_implementation_create (pixman_implementation_t *delegate)
/* Fill out function pointers with ones that just delegate
*/
- imp->composite = delegate_composite;
imp->blt = delegate_blt;
imp->fill = delegate_fill;
- for (i = 0; i < PIXMAN_OP_LAST; ++i)
+ for (i = 0; i < PIXMAN_N_OPERATORS; ++i)
{
imp->combine_32[i] = delegate_combine_32;
imp->combine_64[i] = delegate_combine_64;
@@ -164,6 +142,8 @@ _pixman_implementation_create (pixman_implementation_t *delegate)
imp->combine_64_ca[i] = delegate_combine_64_ca;
}
+ imp->fast_paths = fast_paths;
+
return imp;
}
@@ -211,27 +191,6 @@ _pixman_implementation_combine_64_ca (pixman_implementation_t * imp,
(*imp->combine_64_ca[op]) (imp, op, dest, src, mask, width);
}
-void
-_pixman_implementation_composite (pixman_implementation_t * imp,
- pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * mask,
- pixman_image_t * dest,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- (*imp->composite) (imp, op,
- src, mask, dest,
- src_x, src_y, mask_x, mask_y, dest_x, dest_y,
- width, height);
-}
-
pixman_bool_t
_pixman_implementation_blt (pixman_implementation_t * imp,
uint32_t * src_bits,
diff --git a/lib/pixman/pixman/pixman-mmx.c b/lib/pixman/pixman/pixman-mmx.c
index 7dcc1dc96..d51b40cc1 100644
--- a/lib/pixman/pixman/pixman-mmx.c
+++ b/lib/pixman/pixman/pixman-mmx.c
@@ -485,7 +485,7 @@ mmx_combine_over_reverse_u (pixman_implementation_t *imp,
{
__m64 d, da;
uint32_t s = combine (src, mask);
-
+
d = load8888 (*dest);
da = expand_alpha (d);
*dest = store8888 (over (d, da, load8888 (s)));
@@ -511,12 +511,12 @@ mmx_combine_in_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 x, a;
-
+
x = load8888 (combine (src, mask));
a = load8888 (*dest);
a = expand_alpha (a);
x = pix_multiply (x, a);
-
+
*dest = store8888 (x);
++dest;
@@ -540,7 +540,7 @@ mmx_combine_in_reverse_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 x, a;
-
+
x = load8888 (*dest);
a = load8888 (combine (src, mask));
a = expand_alpha (a);
@@ -568,7 +568,7 @@ mmx_combine_out_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 x, a;
-
+
x = load8888 (combine (src, mask));
a = load8888 (*dest);
a = expand_alpha (a);
@@ -597,7 +597,7 @@ mmx_combine_out_reverse_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 x, a;
-
+
x = load8888 (*dest);
a = load8888 (combine (src, mask));
a = expand_alpha (a);
@@ -627,7 +627,7 @@ mmx_combine_atop_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 s, da, d, sia;
-
+
s = load8888 (combine (src, mask));
d = load8888 (*dest);
sia = expand_alpha (s);
@@ -659,7 +659,7 @@ mmx_combine_atop_reverse_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 s, dia, d, sa;
-
+
s = load8888 (combine (src, mask));
d = load8888 (*dest);
sa = expand_alpha (s);
@@ -689,7 +689,7 @@ mmx_combine_xor_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 s, dia, d, sia;
-
+
s = load8888 (combine (src, mask));
d = load8888 (*dest);
sia = expand_alpha (s);
@@ -720,7 +720,7 @@ mmx_combine_add_u (pixman_implementation_t *imp,
while (dest < end)
{
__m64 s, d;
-
+
s = load8888 (combine (src, mask));
d = load8888 (*dest);
s = pix_add (s, d);
@@ -785,7 +785,7 @@ mmx_combine_src_ca (pixman_implementation_t *imp,
{
__m64 a = load8888 (*mask);
__m64 s = load8888 (*src);
-
+
s = pix_multiply (s, a);
*dest = store8888 (s);
@@ -864,7 +864,7 @@ mmx_combine_in_ca (pixman_implementation_t *imp,
__m64 s = load8888 (*src);
__m64 d = load8888 (*dest);
__m64 da = expand_alpha (d);
-
+
s = pix_multiply (s, a);
s = pix_multiply (s, da);
*dest = store8888 (s);
@@ -892,7 +892,7 @@ mmx_combine_in_reverse_ca (pixman_implementation_t *imp,
__m64 s = load8888 (*src);
__m64 d = load8888 (*dest);
__m64 sa = expand_alpha (s);
-
+
a = pix_multiply (a, sa);
d = pix_multiply (d, a);
*dest = store8888 (d);
@@ -920,7 +920,7 @@ mmx_combine_out_ca (pixman_implementation_t *imp,
__m64 s = load8888 (*src);
__m64 d = load8888 (*dest);
__m64 da = expand_alpha (d);
-
+
da = negate (da);
s = pix_multiply (s, a);
s = pix_multiply (s, da);
@@ -1102,7 +1102,7 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp,
{
uint32_t src;
uint32_t *dst_line, *dst;
- uint16_t w;
+ int32_t w;
int dst_stride;
__m64 vsrc, vsrca;
@@ -1181,7 +1181,7 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp,
{
uint32_t src;
uint16_t *dst_line, *dst;
- uint16_t w;
+ int32_t w;
int dst_stride;
__m64 vsrc, vsrca;
@@ -1209,7 +1209,7 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp,
{
uint64_t d = *dst;
__m64 vdest = expand565 (M64 (d), 0);
-
+
vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0);
*dst = UINT64 (vdest);
@@ -1240,7 +1240,7 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp,
{
uint64_t d = *dst;
__m64 vdest = expand565 (M64 (d), 0);
-
+
vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0);
*dst = UINT64 (vdest);
@@ -1376,7 +1376,7 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
uint32_t mask;
__m64 vmask;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
__m64 srca;
CHECKPOINT ();
@@ -1385,6 +1385,7 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
+ mask &= 0xff000000;
mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
vmask = load8888 (mask);
srca = MC (4x00ff);
@@ -1461,7 +1462,7 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
uint32_t mask;
__m64 vmask;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
__m64 srca;
CHECKPOINT ();
@@ -1470,6 +1471,7 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
+ mask &= 0xff000000;
mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
vmask = load8888 (mask);
srca = MC (4x00ff);
@@ -1596,7 +1598,7 @@ mmx_composite_over_8888_8888 (pixman_implementation_t *imp,
uint32_t s;
int dst_stride, src_stride;
uint8_t a;
- uint16_t w;
+ int32_t w;
CHECKPOINT ();
@@ -1615,7 +1617,7 @@ mmx_composite_over_8888_8888 (pixman_implementation_t *imp,
{
s = *src++;
a = s >> 24;
-
+
if (a == 0xff)
{
*dst = s;
@@ -1627,7 +1629,7 @@ mmx_composite_over_8888_8888 (pixman_implementation_t *imp,
sa = expand_alpha (ms);
*dst = store8888 (over (ms, sa, load8888 (*dst)));
}
-
+
dst++;
}
}
@@ -1652,7 +1654,7 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
uint16_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
CHECKPOINT ();
@@ -1756,7 +1758,7 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
uint32_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
__m64 vsrc, vsrca;
uint64_t srcsrc;
@@ -1795,7 +1797,7 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
__m64 vdest = in_over (vsrc, vsrca,
expand_alpha_rev (M64 (m)),
load8888 (*dst));
-
+
*dst = store8888 (vdest);
}
@@ -1809,7 +1811,7 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
while (w >= 2)
{
uint64_t m0, m1;
-
+
m0 = *mask;
m1 = *(mask + 1);
@@ -1883,22 +1885,13 @@ pixman_fill_mmx (uint32_t *bits,
if (bpp != 16 && bpp != 32 && bpp != 8)
return FALSE;
- if (bpp == 16 && (xor >> 16 != (xor & 0xffff)))
- return FALSE;
-
- if (bpp == 8 &&
- ((xor >> 16 != (xor & 0xffff)) ||
- (xor >> 24 != (xor & 0x00ff) >> 16)))
- {
- return FALSE;
- }
-
if (bpp == 8)
{
stride = stride * (int) sizeof (uint32_t) / 1;
byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
byte_width = width;
stride *= 1;
+ xor = (xor & 0xff) * 0x01010101;
}
else if (bpp == 16)
{
@@ -1906,6 +1899,7 @@ pixman_fill_mmx (uint32_t *bits,
byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
byte_width = 2 * width;
stride *= 2;
+ xor = (xor & 0xffff) * 0x00010001;
}
else
{
@@ -1928,7 +1922,7 @@ pixman_fill_mmx (uint32_t *bits,
"movq %7, %5\n"
"movq %7, %6\n"
: "=y" (v1), "=y" (v2), "=y" (v3),
- "=y" (v4), "=y" (v5), "=y" (v6), "=y" (v7)
+ "=y" (v4), "=y" (v5), "=y" (v6), "=y" (v7)
: "y" (vfill));
#endif
@@ -1936,7 +1930,7 @@ pixman_fill_mmx (uint32_t *bits,
{
int w;
uint8_t *d = byte_line;
-
+
byte_line += stride;
w = byte_width;
@@ -1976,8 +1970,8 @@ pixman_fill_mmx (uint32_t *bits,
"movq %8, 56(%0)\n"
:
: "r" (d),
- "y" (vfill), "y" (v1), "y" (v2), "y" (v3),
- "y" (v4), "y" (v5), "y" (v6), "y" (v7)
+ "y" (vfill), "y" (v1), "y" (v2), "y" (v3),
+ "y" (v4), "y" (v5), "y" (v6), "y" (v7)
: "memory");
#else
*(__m64*) (d + 0) = vfill;
@@ -2038,7 +2032,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
uint32_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
__m64 vsrc, vsrca;
uint64_t srcsrc;
@@ -2080,7 +2074,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
if (m)
{
__m64 vdest = in (vsrc, expand_alpha_rev (M64 (m)));
-
+
*dst = store8888 (vdest);
}
else
@@ -2136,7 +2130,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
if (m)
{
__m64 vdest = load8888 (*dst);
-
+
vdest = in (vsrc, expand_alpha_rev (M64 (m)));
*dst = store8888 (vdest);
}
@@ -2173,7 +2167,7 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
uint16_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
__m64 vsrc, vsrca, tmp;
uint64_t srcsrcsrcsrc, src16;
@@ -2218,7 +2212,7 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
__m64 vd = M64 (d);
__m64 vdest = in_over (
vsrc, vsrca, expand_alpha_rev (M64 (m)), expand565 (vd, 0));
-
+
vd = pack_565 (vdest, _mm_setzero_si64 (), 0);
*dst = UINT64 (vd);
}
@@ -2313,7 +2307,7 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
uint16_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
CHECKPOINT ();
@@ -2433,7 +2427,7 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
CHECKPOINT ();
@@ -2641,7 +2635,7 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
uint32_t src;
uint8_t sa;
__m64 vsrc, vsrca;
@@ -2723,7 +2717,7 @@ mmx_composite_in_8_8 (pixman_implementation_t *imp,
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
int src_stride, dst_stride;
- uint16_t w;
+ int32_t w;
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
@@ -2788,7 +2782,7 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp,
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
uint32_t src;
uint8_t sa;
__m64 vsrc, vsrca;
@@ -2868,7 +2862,7 @@ mmx_composite_add_8000_8000 (pixman_implementation_t *imp,
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
uint8_t s, d;
uint16_t t;
@@ -2942,7 +2936,7 @@ mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
CHECKPOINT ();
@@ -3082,8 +3076,8 @@ pixman_blt_mmx (uint32_t *src_bits,
:
: "r" (d), "r" (s)
: "memory",
- "%mm0", "%mm1", "%mm2", "%mm3",
- "%mm4", "%mm5", "%mm6", "%mm7");
+ "%mm0", "%mm1", "%mm2", "%mm3",
+ "%mm4", "%mm5", "%mm6", "%mm7");
#else
__m64 v0 = *(__m64 *)(s + 0);
__m64 v1 = *(__m64 *)(s + 8);
@@ -3153,6 +3147,7 @@ mmx_composite_copy_area (pixman_implementation_t *imp,
src_x, src_y, dest_x, dest_y, width, height);
}
+#if 0
static void
mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -3168,11 +3163,11 @@ mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
int32_t width,
int32_t height)
{
- uint32_t *src, *src_line;
- uint32_t *dst, *dst_line;
- uint8_t *mask, *mask_line;
+ uint32_t *src, *src_line;
+ uint32_t *dst, *dst_line;
+ uint8_t *mask, *mask_line;
int src_stride, mask_stride, dst_stride;
- uint16_t w;
+ int32_t w;
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
@@ -3219,116 +3214,82 @@ mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
_mm_empty ();
}
+#endif
static const pixman_fast_path_t mmx_fast_paths[] =
{
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, mmx_composite_over_n_8_0565, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, mmx_composite_over_n_8_0565, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, mmx_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, mmx_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, mmx_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, mmx_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, mmx_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, mmx_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, mmx_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, mmx_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, mmx_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, mmx_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, mmx_composite_over_pixbuf_8888, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_a8r8g8b8, mmx_composite_over_pixbuf_8888, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, mmx_composite_over_pixbuf_8888, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_x8r8g8b8, mmx_composite_over_pixbuf_8888, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, mmx_composite_over_pixbuf_0565, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_r5g6b5, mmx_composite_over_pixbuf_0565, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8b8g8r8, mmx_composite_over_pixbuf_8888, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, mmx_composite_over_pixbuf_8888, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_x8b8g8r8, mmx_composite_over_pixbuf_8888, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, mmx_composite_over_pixbuf_8888, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_b5g6r5, mmx_composite_over_pixbuf_0565, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, mmx_composite_over_pixbuf_0565, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, mmx_composite_over_x888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, mmx_composite_over_x888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, mmx_composite_over_x888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, mmx_composite_over_x888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, mmx_composite_over_8888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, mmx_composite_over_8888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, mmx_composite_over_8888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, mmx_composite_over_8888_n_8888, NEED_SOLID_MASK },
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mmx_composite_over_n_8_0565 ),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mmx_composite_over_n_8_0565 ),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, mmx_composite_over_n_8_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, mmx_composite_over_n_8_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, mmx_composite_over_n_8_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, mmx_composite_over_n_8_8888 ),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, mmx_composite_over_n_8888_8888_ca ),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, mmx_composite_over_n_8888_8888_ca ),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, mmx_composite_over_n_8888_0565_ca ),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, mmx_composite_over_n_8888_8888_ca ),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mmx_composite_over_n_8888_8888_ca ),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, mmx_composite_over_n_8888_0565_ca ),
+ PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, mmx_composite_over_pixbuf_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, mmx_composite_over_pixbuf_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, mmx_composite_over_pixbuf_0565 ),
+ PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, mmx_composite_over_pixbuf_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, mmx_composite_over_pixbuf_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, mmx_composite_over_pixbuf_0565 ),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, mmx_composite_over_x888_n_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, mmx_composite_over_x888_n_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, mmx_composite_over_x888_n_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, mmx_composite_over_x888_n_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, mmx_composite_over_8888_n_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, mmx_composite_over_8888_n_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, mmx_composite_over_8888_n_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, mmx_composite_over_8888_n_8888 ),
#if 0
- /* FIXME: This code is commented out since it's apparently not actually faster than the generic code. */
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, mmx_composite_over_x888_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, mmx_composite_over_x888_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_x8b8r8g8, PIXMAN_a8, PIXMAN_x8b8g8r8, mmx_composite_over_x888_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_x8b8r8g8, PIXMAN_a8, PIXMAN_a8r8g8b8, mmx_composite_over_x888_8_8888, 0 },
+ /* FIXME: This code is commented out since it's apparently
+ * not actually faster than the generic code.
+ */
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, mmx_composite_over_x888_8_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, mmx_composite_over_x888_8_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8r8g8, a8, x8b8g8r8, mmx_composite_over_x888_8_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8r8g8, a8, a8r8g8b8, mmx_composite_over_x888_8_8888 ),
#endif
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_a8r8g8b8, mmx_composite_over_n_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_x8r8g8b8, mmx_composite_over_n_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, mmx_composite_over_n_0565, 0 },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, mmx_composite_copy_area, 0 },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, mmx_composite_copy_area, 0 },
-
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, mmx_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, mmx_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, mmx_composite_over_8888_0565, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, mmx_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, mmx_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, mmx_composite_over_8888_0565, 0 },
-
- { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, mmx_composite_add_8888_8888, 0 },
- { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, mmx_composite_add_8888_8888, 0 },
- { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, mmx_composite_add_8000_8000, 0 },
- { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, mmx_composite_add_n_8_8, 0 },
-
- { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, mmx_composite_src_n_8_8888, 0 },
- { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, mmx_composite_src_n_8_8888, 0 },
- { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, mmx_composite_src_n_8_8888, 0 },
- { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, mmx_composite_src_n_8_8888, 0 },
- { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, mmx_composite_copy_area, 0 },
- { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, mmx_composite_copy_area, 0 },
- { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, mmx_composite_copy_area, 0 },
- { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, mmx_composite_copy_area, 0 },
- { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, mmx_composite_copy_area, 0 },
- { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, mmx_composite_copy_area, 0 },
- { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, mmx_composite_copy_area, 0 },
- { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, mmx_composite_copy_area, 0 },
-
- { PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, mmx_composite_in_8_8, 0 },
- { PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, mmx_composite_in_n_8_8, 0 },
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, mmx_composite_over_n_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, mmx_composite_over_n_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, mmx_composite_over_n_0565 ),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, mmx_composite_copy_area ),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, mmx_composite_copy_area ),
+
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, mmx_composite_over_8888_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, mmx_composite_over_8888_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, mmx_composite_over_8888_0565 ),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, mmx_composite_over_8888_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, mmx_composite_over_8888_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, mmx_composite_over_8888_0565 ),
+
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, mmx_composite_add_8888_8888 ),
+ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, mmx_composite_add_8888_8888 ),
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, mmx_composite_add_8000_8000 ),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, mmx_composite_add_n_8_8 ),
+
+ PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, mmx_composite_src_n_8_8888 ),
+ PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, mmx_composite_src_n_8_8888 ),
+ PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, mmx_composite_src_n_8_8888 ),
+ PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, mmx_composite_src_n_8_8888 ),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, mmx_composite_copy_area ),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, mmx_composite_copy_area ),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, mmx_composite_copy_area ),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, mmx_composite_copy_area ),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, mmx_composite_copy_area ),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, mmx_composite_copy_area ),
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, mmx_composite_copy_area ),
+ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, mmx_composite_copy_area ),
+
+ PIXMAN_STD_FAST_PATH (IN, a8, null, a8, mmx_composite_in_8_8 ),
+ PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, mmx_composite_in_n_8_8 ),
{ PIXMAN_OP_NONE },
};
-static void
-mmx_composite (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * mask,
- pixman_image_t * dest,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- if (_pixman_run_fast_path (mmx_fast_paths, imp,
- op, src, mask, dest,
- src_x, src_y,
- mask_x, mask_y,
- dest_x, dest_y,
- width, height))
- {
- return;
- }
-
- _pixman_implementation_composite (imp->delegate,
- op, src, mask, dest, src_x, src_y,
- mask_x, mask_y, dest_x, dest_y,
- width, height);
-}
-
static pixman_bool_t
mmx_blt (pixman_implementation_t *imp,
uint32_t * src_bits,
@@ -3350,9 +3311,9 @@ mmx_blt (pixman_implementation_t *imp,
{
return _pixman_implementation_blt (
- imp->delegate,
- src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
- src_x, src_y, dst_x, dst_y, width, height);
+ imp->delegate,
+ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+ src_x, src_y, dst_x, dst_y, width, height);
}
return TRUE;
@@ -3372,7 +3333,7 @@ mmx_fill (pixman_implementation_t *imp,
if (!pixman_fill_mmx (bits, stride, bpp, x, y, width, height, xor))
{
return _pixman_implementation_fill (
- imp->delegate, bits, stride, bpp, x, y, width, height, xor);
+ imp->delegate, bits, stride, bpp, x, y, width, height, xor);
}
return TRUE;
@@ -3382,7 +3343,7 @@ pixman_implementation_t *
_pixman_implementation_create_mmx (void)
{
pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
- pixman_implementation_t *imp = _pixman_implementation_create (general);
+ pixman_implementation_t *imp = _pixman_implementation_create (general, mmx_fast_paths);
imp->combine_32[PIXMAN_OP_OVER] = mmx_combine_over_u;
imp->combine_32[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_u;
@@ -3408,7 +3369,6 @@ _pixman_implementation_create_mmx (void)
imp->combine_32_ca[PIXMAN_OP_XOR] = mmx_combine_xor_ca;
imp->combine_32_ca[PIXMAN_OP_ADD] = mmx_combine_add_ca;
- imp->composite = mmx_composite;
imp->blt = mmx_blt;
imp->fill = mmx_fill;
diff --git a/lib/pixman/pixman/pixman-private.h b/lib/pixman/pixman/pixman-private.h
index ff7a65f88..d5767af48 100644
--- a/lib/pixman/pixman/pixman-private.h
+++ b/lib/pixman/pixman/pixman-private.h
@@ -5,9 +5,14 @@
#ifndef PIXMAN_PRIVATE_H
#define PIXMAN_PRIVATE_H
+#define PIXMAN_DISABLE_DEPRECATED
+#define PIXMAN_USE_INTERNAL_API
+
#include "pixman.h"
#include <time.h>
#include <assert.h>
+#include <stdio.h>
+#include <string.h>
#include "pixman-compiler.h"
@@ -83,7 +88,6 @@ struct image_common
* the image is used as a source
*/
pixman_bool_t dirty;
- pixman_bool_t need_workaround;
pixman_transform_t * transform;
pixman_repeat_t repeat;
pixman_filter_t filter;
@@ -100,6 +104,9 @@ struct image_common
pixman_image_destroy_func_t destroy_func;
void * destroy_data;
+
+ uint32_t flags;
+ pixman_format_code_t extended_format_code;
};
struct source_image
@@ -111,7 +118,10 @@ struct source_image
struct solid_fill
{
source_image_t common;
- uint32_t color; /* FIXME: shouldn't this be a pixman_color_t? */
+ pixman_color_t color;
+
+ uint32_t color_32;
+ uint64_t color_64;
};
struct gradient
@@ -120,8 +130,6 @@ struct gradient
int n_stops;
pixman_gradient_stop_t *stops;
int stop_range;
- uint32_t * color_table;
- int color_table_size;
};
struct linear_gradient
@@ -253,10 +261,6 @@ _pixman_image_store_scanline_32 (bits_image_t * image,
int y,
int width,
const uint32_t *buffer);
-void
-_pixman_image_fetch_pixels (bits_image_t *image,
- uint32_t * buffer,
- int n_pixels);
/* Even though the type of buffer is uint32_t *, the function
* actually expects a uint64_t *buffer.
@@ -281,12 +285,6 @@ _pixman_image_reset_clip_region (pixman_image_t *image);
void
_pixman_image_validate (pixman_image_t *image);
-pixman_bool_t
-_pixman_image_is_opaque (pixman_image_t *image);
-
-pixman_bool_t
-_pixman_image_is_solid (pixman_image_t *image);
-
uint32_t
_pixman_image_get_solid (pixman_image_t * image,
pixman_format_code_t format);
@@ -349,13 +347,13 @@ _pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
#define STEP_Y_SMALL(n) (pixman_fixed_1 / N_Y_FRAC (n))
#define STEP_Y_BIG(n) (pixman_fixed_1 - (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n))
-#define Y_FRAC_FIRST(n) (STEP_Y_SMALL (n) / 2)
+#define Y_FRAC_FIRST(n) (STEP_Y_BIG (n) / 2)
#define Y_FRAC_LAST(n) (Y_FRAC_FIRST (n) + (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n))
#define STEP_X_SMALL(n) (pixman_fixed_1 / N_X_FRAC (n))
#define STEP_X_BIG(n) (pixman_fixed_1 - (N_X_FRAC (n) - 1) * STEP_X_SMALL (n))
-#define X_FRAC_FIRST(n) (STEP_X_SMALL (n) / 2)
+#define X_FRAC_FIRST(n) (STEP_X_BIG (n) / 2)
#define X_FRAC_LAST(n) (X_FRAC_FIRST (n) + (N_X_FRAC (n) - 1) * STEP_X_SMALL (n))
#define RENDER_SAMPLES_X(x, n) \
@@ -372,7 +370,6 @@ pixman_rasterize_edges_accessors (pixman_image_t *image,
/*
* Implementations
*/
-
typedef struct pixman_implementation_t pixman_implementation_t;
typedef void (*pixman_combine_32_func_t) (pixman_implementation_t *imp,
@@ -428,23 +425,36 @@ typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp,
void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp);
void _pixman_setup_combiner_functions_64 (pixman_implementation_t *imp);
-struct pixman_implementation_t
+typedef struct
{
- pixman_implementation_t *toplevel;
- pixman_implementation_t *delegate;
-
- pixman_composite_func_t composite;
- pixman_blt_func_t blt;
- pixman_fill_func_t fill;
+ pixman_op_t op;
+ pixman_format_code_t src_format;
+ uint32_t src_flags;
+ pixman_format_code_t mask_format;
+ uint32_t mask_flags;
+ pixman_format_code_t dest_format;
+ uint32_t dest_flags;
+ pixman_composite_func_t func;
+} pixman_fast_path_t;
- pixman_combine_32_func_t combine_32[PIXMAN_OP_LAST];
- pixman_combine_32_func_t combine_32_ca[PIXMAN_OP_LAST];
- pixman_combine_64_func_t combine_64[PIXMAN_OP_LAST];
- pixman_combine_64_func_t combine_64_ca[PIXMAN_OP_LAST];
+struct pixman_implementation_t
+{
+ pixman_implementation_t * toplevel;
+ pixman_implementation_t * delegate;
+ const pixman_fast_path_t * fast_paths;
+
+ pixman_blt_func_t blt;
+ pixman_fill_func_t fill;
+
+ pixman_combine_32_func_t combine_32[PIXMAN_N_OPERATORS];
+ pixman_combine_32_func_t combine_32_ca[PIXMAN_N_OPERATORS];
+ pixman_combine_64_func_t combine_64[PIXMAN_N_OPERATORS];
+ pixman_combine_64_func_t combine_64_ca[PIXMAN_N_OPERATORS];
};
pixman_implementation_t *
-_pixman_implementation_create (pixman_implementation_t *delegate);
+_pixman_implementation_create (pixman_implementation_t *delegate,
+ const pixman_fast_path_t *fast_paths);
void
_pixman_implementation_combine_32 (pixman_implementation_t *imp,
@@ -474,20 +484,6 @@ _pixman_implementation_combine_64_ca (pixman_implementation_t *imp,
const uint64_t * src,
const uint64_t * mask,
int width);
-void
-_pixman_implementation_composite (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * mask,
- pixman_image_t * dest,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height);
pixman_bool_t
_pixman_implementation_blt (pixman_implementation_t *imp,
@@ -556,25 +552,85 @@ _pixman_choose_implementation (void);
* Utilities
*/
-/* These "formats" both have depth 0, so they
+/* These "formats" all have depth 0, so they
* will never clash with any real ones
*/
#define PIXMAN_null PIXMAN_FORMAT (0, 0, 0, 0, 0, 0)
#define PIXMAN_solid PIXMAN_FORMAT (0, 1, 0, 0, 0, 0)
-
-#define NEED_COMPONENT_ALPHA (1 << 0)
-#define NEED_PIXBUF (1 << 1)
-#define NEED_SOLID_MASK (1 << 2)
-
-typedef struct
-{
- pixman_op_t op;
- pixman_format_code_t src_format;
- pixman_format_code_t mask_format;
- pixman_format_code_t dest_format;
- pixman_composite_func_t func;
- uint32_t flags;
-} pixman_fast_path_t;
+#define PIXMAN_pixbuf PIXMAN_FORMAT (0, 2, 0, 0, 0, 0)
+#define PIXMAN_rpixbuf PIXMAN_FORMAT (0, 3, 0, 0, 0, 0)
+#define PIXMAN_unknown PIXMAN_FORMAT (0, 4, 0, 0, 0, 0)
+#define PIXMAN_any PIXMAN_FORMAT (0, 5, 0, 0, 0, 0)
+
+#define PIXMAN_OP_any (PIXMAN_N_OPERATORS + 1)
+
+#define FAST_PATH_ID_TRANSFORM (1 << 0)
+#define FAST_PATH_NO_ALPHA_MAP (1 << 1)
+#define FAST_PATH_NO_CONVOLUTION_FILTER (1 << 2)
+#define FAST_PATH_NO_PAD_REPEAT (1 << 3)
+#define FAST_PATH_NO_REFLECT_REPEAT (1 << 4)
+#define FAST_PATH_NO_ACCESSORS (1 << 5)
+#define FAST_PATH_NO_WIDE_FORMAT (1 << 6)
+#define FAST_PATH_COVERS_CLIP (1 << 7)
+#define FAST_PATH_COMPONENT_ALPHA (1 << 8)
+#define FAST_PATH_UNIFIED_ALPHA (1 << 9)
+#define FAST_PATH_SCALE_TRANSFORM (1 << 10)
+#define FAST_PATH_NEAREST_FILTER (1 << 11)
+#define FAST_PATH_SIMPLE_REPEAT (1 << 12)
+#define FAST_PATH_IS_OPAQUE (1 << 13)
+#define FAST_PATH_NEEDS_WORKAROUND (1 << 14)
+#define FAST_PATH_NO_NONE_REPEAT (1 << 15)
+#define FAST_PATH_SAMPLES_COVER_CLIP (1 << 16)
+#define FAST_PATH_16BIT_SAFE (1 << 17)
+#define FAST_PATH_X_UNIT_POSITIVE (1 << 18)
+
+#define _FAST_PATH_STANDARD_FLAGS \
+ (FAST_PATH_ID_TRANSFORM | \
+ FAST_PATH_NO_ALPHA_MAP | \
+ FAST_PATH_NO_CONVOLUTION_FILTER | \
+ FAST_PATH_NO_PAD_REPEAT | \
+ FAST_PATH_NO_REFLECT_REPEAT | \
+ FAST_PATH_NO_ACCESSORS | \
+ FAST_PATH_NO_WIDE_FORMAT | \
+ FAST_PATH_COVERS_CLIP)
+
+#define FAST_PATH_STD_SRC_FLAGS \
+ _FAST_PATH_STANDARD_FLAGS
+#define FAST_PATH_STD_MASK_U_FLAGS \
+ (_FAST_PATH_STANDARD_FLAGS | \
+ FAST_PATH_UNIFIED_ALPHA)
+#define FAST_PATH_STD_MASK_CA_FLAGS \
+ (_FAST_PATH_STANDARD_FLAGS | \
+ FAST_PATH_COMPONENT_ALPHA)
+#define FAST_PATH_STD_DEST_FLAGS \
+ (FAST_PATH_NO_ACCESSORS | \
+ FAST_PATH_NO_WIDE_FORMAT)
+
+#define FAST_PATH(op, src, src_flags, mask, mask_flags, dest, dest_flags, func) \
+ PIXMAN_OP_ ## op, \
+ PIXMAN_ ## src, \
+ src_flags, \
+ PIXMAN_ ## mask, \
+ mask_flags, \
+ PIXMAN_ ## dest, \
+ dest_flags, \
+ func
+
+#define PIXMAN_STD_FAST_PATH(op, src, mask, dest, func) \
+ { FAST_PATH ( \
+ op, \
+ src, FAST_PATH_STD_SRC_FLAGS, \
+ mask, (PIXMAN_ ## mask) ? FAST_PATH_STD_MASK_U_FLAGS : 0, \
+ dest, FAST_PATH_STD_DEST_FLAGS, \
+ func) }
+
+#define PIXMAN_STD_FAST_PATH_CA(op, src, mask, dest, func) \
+ { FAST_PATH ( \
+ op, \
+ src, FAST_PATH_STD_SRC_FLAGS, \
+ mask, FAST_PATH_STD_MASK_CA_FLAGS, \
+ dest, FAST_PATH_STD_DEST_FLAGS, \
+ func) }
/* Memory allocation helpers */
void *
@@ -590,38 +646,6 @@ pixman_bool_t
pixman_addition_overflows_int (unsigned int a, unsigned int b);
/* Compositing utilities */
-pixman_bool_t
-_pixman_run_fast_path (const pixman_fast_path_t *paths,
- pixman_implementation_t * imp,
- pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * mask,
- pixman_image_t * dest,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height);
-
-void
-_pixman_walk_composite_region (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int16_t src_x,
- int16_t src_y,
- int16_t mask_x,
- int16_t mask_y,
- int16_t dest_x,
- int16_t dest_y,
- uint16_t width,
- uint16_t height,
- pixman_composite_func_t composite_rect);
-
void
pixman_expand (uint64_t * dst,
const uint32_t * src,
@@ -684,29 +708,62 @@ pixman_region16_copy_from_region32 (pixman_region16_t *dst,
((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) | \
((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000)))
+#define CONVERT_0565_TO_8888(s) (CONVERT_0565_TO_0888(s) | 0xff000000)
+
+/* Trivial versions that are useful in macros */
+#define CONVERT_8888_TO_8888(s) (s)
+#define CONVERT_0565_TO_0565(s) (s)
+
#define PIXMAN_FORMAT_IS_WIDE(f) \
(PIXMAN_FORMAT_A (f) > 8 || \
PIXMAN_FORMAT_R (f) > 8 || \
PIXMAN_FORMAT_G (f) > 8 || \
PIXMAN_FORMAT_B (f) > 8)
+#ifdef WORDS_BIGENDIAN
+# define SCREEN_SHIFT_LEFT(x,n) ((x) << (n))
+# define SCREEN_SHIFT_RIGHT(x,n) ((x) >> (n))
+#else
+# define SCREEN_SHIFT_LEFT(x,n) ((x) >> (n))
+# define SCREEN_SHIFT_RIGHT(x,n) ((x) << (n))
+#endif
+
/*
* Various debugging code
*/
#undef DEBUG
-#define DEBUG 0
-#if DEBUG
+/* Turn on debugging depending on what type of release this is
+ */
+#if (((PIXMAN_VERSION_MICRO % 2) == 0) && ((PIXMAN_VERSION_MINOR % 2) == 1))
+
+/* Debugging gets turned on for development releases because these
+ * are the things that end up in bleeding edge distributions such
+ * as Rawhide etc.
+ *
+ * For performance reasons we don't turn it on for stable releases or
+ * random git checkouts. (Random git checkouts are often used for
+ * performance work).
+ */
+
+# define DEBUG
+
+#endif
+
+#ifdef DEBUG
+
+void
+_pixman_log_error (const char *function, const char *message);
#define return_if_fail(expr) \
do \
{ \
- if (!(expr)) \
- { \
- fprintf (stderr, "In %s: %s failed\n", FUNC, # expr); \
- return; \
- } \
+ if (!(expr)) \
+ { \
+ _pixman_log_error (FUNC, "The expression " # expr " was false"); \
+ return; \
+ } \
} \
while (0)
@@ -714,16 +771,27 @@ pixman_region16_copy_from_region32 (pixman_region16_t *dst,
do \
{ \
if (!(expr)) \
- { \
- fprintf (stderr, "In %s: %s failed\n", FUNC, # expr); \
- return (retval); \
- } \
+ { \
+ _pixman_log_error (FUNC, "The expression " # expr " was false"); \
+ return (retval); \
+ } \
} \
while (0)
+#define critical_if_fail(expr) \
+ do \
+ { \
+ if (!(expr)) \
+ _pixman_log_error (FUNC, "The expression " # expr " was false"); \
+ } \
+ while (0)
+
+
#else
-#define return_if_fail(expr) \
+#define _pixman_log_error(f,m) do { } while (0) \
+
+#define return_if_fail(expr) \
do \
{ \
if (!(expr)) \
@@ -739,6 +807,11 @@ pixman_region16_copy_from_region32 (pixman_region16_t *dst,
} \
while (0)
+#define critical_if_fail(expr) \
+ do \
+ { \
+ } \
+ while (0)
#endif
/*
diff --git a/lib/pixman/pixman/pixman-region.c b/lib/pixman/pixman/pixman-region.c
index 8ce5deb77..a9b835488 100644
--- a/lib/pixman/pixman/pixman-region.c
+++ b/lib/pixman/pixman/pixman-region.c
@@ -42,6 +42,25 @@
* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*
+ * Copyright © 1998 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. Keith Packard makes no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdlib.h>
@@ -66,74 +85,17 @@
#define GOOD_RECT(rect) ((rect)->x1 < (rect)->x2 && (rect)->y1 < (rect)->y2)
#define BAD_RECT(rect) ((rect)->x1 > (rect)->x2 || (rect)->y1 > (rect)->y2)
-/* Turn on debugging depending on what type of release this is
- */
-
-#if ((PIXMAN_VERSION_MICRO % 2) == 1)
-/* Random git checkout.
- *
- * Those are often used for performance work, so we don't turn on the
- * full self-checking, but we do turn on the asserts.
- */
-# define FATAL_BUGS
-# define noSELF_CHECKS
-#elif ((PIXMAN_VERSION_MINOR % 2) == 0)
-/* Stable release.
- *
- * We don't want assertions because the X server should stay alive
- * if possible. We also don't want self-checks for performance-reasons.
- */
-# define noFATAL_BUGS
-# define noSELF_CHECKS
-#else
-/* Development snapshot.
- *
- * These are the things that get shipped in development distributions
- * such as Rawhide. We want both self-checking and fatal assertions
- * to catch as many bugs as possible.
- */
-# define FATAL_BUGS
-# define SELF_CHECKS
-#endif
-
-#ifndef FATAL_BUGS
-# undef assert
-# undef abort
-# define assert(expr)
-# define abort()
-#endif
-
-#ifdef SELF_CHECKS
-
-static void
-log_region_error (const char *function, const char *message)
-{
- static int n_messages = 0;
-
- if (n_messages < 50)
- {
- fprintf (stderr,
- "*** BUG ***\n"
- "%s: %s\n"
- "Set a breakpoint on 'log_region_error' to debug\n\n",
- function, message);
-
- abort (); /* This is #defined away unless FATAL_BUGS is defined */
-
- n_messages++;
- }
-}
+#ifdef DEBUG
#define GOOD(reg) \
do \
{ \
if (!PREFIX (_selfcheck (reg))) \
- log_region_error (FUNC, "Malformed region " # reg); \
+ _pixman_log_error (FUNC, "Malformed region " # reg); \
} while (0)
#else
-#define log_region_error(function, name)
#define GOOD(reg)
#endif
@@ -295,7 +257,7 @@ alloc_data (size_t n)
} \
ADDRECT (next_rect, nx1, ny1, nx2, ny2); \
region->data->numRects++; \
- assert (region->data->numRects <= region->data->size); \
+ critical_if_fail (region->data->numRects <= region->data->size); \
} while (0)
#define DOWNSIZE(reg, numRects) \
@@ -420,7 +382,7 @@ PREFIX (_init_rect) (region_type_t * region,
if (!GOOD_RECT (&region->extents))
{
if (BAD_RECT (&region->extents))
- log_region_error (FUNC, "Invalid rectangle passed");
+ _pixman_log_error (FUNC, "Invalid rectangle passed");
PREFIX (_init) (region);
return;
}
@@ -434,7 +396,7 @@ PREFIX (_init_with_extents) (region_type_t *region, box_type_t *extents)
if (!GOOD_RECT (extents))
{
if (BAD_RECT (extents))
- log_region_error (FUNC, "Invalid rectangle passed");
+ _pixman_log_error (FUNC, "Invalid rectangle passed");
PREFIX (_init) (region);
return;
}
@@ -612,7 +574,7 @@ pixman_coalesce (region_type_t * region, /* Region to coalesce */
* Figure out how many rectangles are in the band.
*/
numRects = cur_start - prev_start;
- assert (numRects == region->data->numRects - cur_start);
+ critical_if_fail (numRects == region->data->numRects - cur_start);
if (!numRects) return cur_start;
@@ -700,8 +662,8 @@ pixman_region_append_non_o (region_type_t * region,
new_rects = r_end - r;
- assert (y1 < y2);
- assert (new_rects != 0);
+ critical_if_fail (y1 < y2);
+ critical_if_fail (new_rects != 0);
/* Make sure we have enough space for all rectangles to be added */
RECTALLOC (region, new_rects);
@@ -710,7 +672,7 @@ pixman_region_append_non_o (region_type_t * region,
do
{
- assert (r->x1 < r->x2);
+ critical_if_fail (r->x1 < r->x2);
ADDRECT (next_rect, r->x1, y1, r->x2, y2);
r++;
}
@@ -835,8 +797,8 @@ pixman_op (region_type_t * new_reg, /* Place to store result
r2 = PIXREGION_RECTS (reg2);
r2_end = r2 + numRects;
- assert (r1 != r1_end);
- assert (r2 != r2_end);
+ critical_if_fail (r1 != r1_end);
+ critical_if_fail (r2 != r2_end);
old_data = (region_data_type_t *)NULL;
@@ -904,8 +866,8 @@ pixman_op (region_type_t * new_reg, /* Place to store result
* rectangle after the last one in the current band for their
* respective regions.
*/
- assert (r1 != r1_end);
- assert (r2 != r2_end);
+ critical_if_fail (r1 != r1_end);
+ critical_if_fail (r2 != r2_end);
FIND_BAND (r1, r1_band_end, r1_end, r1y1);
FIND_BAND (r2, r2_band_end, r2_end, r2y1);
@@ -1112,7 +1074,7 @@ pixman_set_extents (region_type_t *region)
region->extents.x2 = box_end->x2;
region->extents.y2 = box_end->y2;
- assert (region->extents.y1 < region->extents.y2);
+ critical_if_fail (region->extents.y1 < region->extents.y2);
while (box <= box_end)
{
@@ -1123,7 +1085,7 @@ pixman_set_extents (region_type_t *region)
box++;
}
- assert (region->extents.x1 < region->extents.x2);
+ critical_if_fail (region->extents.x1 < region->extents.x2);
}
/*======================================================================
@@ -1159,8 +1121,8 @@ pixman_region_intersect_o (region_type_t *region,
next_rect = PIXREGION_TOP (region);
- assert (y1 < y2);
- assert (r1 != r1_end && r2 != r2_end);
+ critical_if_fail (y1 < y2);
+ critical_if_fail (r1 != r1_end && r2 != r2_end);
do
{
@@ -1317,8 +1279,8 @@ pixman_region_union_o (region_type_t *region,
int x1; /* left and right side of current union */
int x2;
- assert (y1 < y2);
- assert (r1 != r1_end && r2 != r2_end);
+ critical_if_fail (y1 < y2);
+ critical_if_fail (r1 != r1_end && r2 != r2_end);
next_rect = PIXREGION_TOP (region);
@@ -1388,10 +1350,10 @@ PREFIX (_union_rect) (region_type_t *dest,
if (!GOOD_RECT (&region.extents))
{
if (BAD_RECT (&region.extents))
- log_region_error (FUNC, "Invalid rectangle passed");
+ _pixman_log_error (FUNC, "Invalid rectangle passed");
return PREFIX (_copy) (dest, source);
}
-
+
region.data = NULL;
return PREFIX (_union) (dest, source, &region);
@@ -1881,8 +1843,8 @@ pixman_region_subtract_o (region_type_t * region,
x1 = r1->x1;
- assert (y1 < y2);
- assert (r1 != r1_end && r2 != r2_end);
+ critical_if_fail (y1 < y2);
+ critical_if_fail (r1 != r1_end && r2 != r2_end);
next_rect = PIXREGION_TOP (region);
@@ -1926,7 +1888,7 @@ pixman_region_subtract_o (region_type_t * region,
* Left part of subtrahend covers part of minuend: add uncovered
* part of minuend to region and skip to next subtrahend.
*/
- assert (x1 < r2->x1);
+ critical_if_fail (x1 < r2->x1);
NEWRECT (region, next_rect, x1, y1, r2->x1, y2);
x1 = r2->x2;
@@ -1968,7 +1930,7 @@ pixman_region_subtract_o (region_type_t * region,
*/
while (r1 != r1_end)
{
- assert (x1 < r1->x2);
+ critical_if_fail (x1 < r1->x2);
NEWRECT (region, next_rect, x1, y1, r1->x2, y2);
@@ -2330,7 +2292,7 @@ PREFIX (_reset) (region_type_t *region, box_type_t *box)
{
GOOD (region);
- assert (GOOD_RECT (box));
+ critical_if_fail (GOOD_RECT (box));
region->extents = *box;
@@ -2470,7 +2432,7 @@ PREFIX (_selfcheck) (region_type_t *reg)
PIXMAN_EXPORT pixman_bool_t
PREFIX (_init_rects) (region_type_t *region,
- box_type_t *boxes, int count)
+ const box_type_t *boxes, int count)
{
box_type_t *rects;
int displacement;
@@ -2550,3 +2512,240 @@ PREFIX (_init_rects) (region_type_t *region,
return validate (region, &i);
}
+
+#define READ(_ptr) (*(_ptr))
+
+static inline box_type_t *
+bitmap_addrect (region_type_t *reg,
+ box_type_t *r,
+ box_type_t **first_rect,
+ int rx1, int ry1,
+ int rx2, int ry2)
+{
+ if ((rx1 < rx2) && (ry1 < ry2) &&
+ (!(reg->data->numRects &&
+ ((r-1)->y1 == ry1) && ((r-1)->y2 == ry2) &&
+ ((r-1)->x1 <= rx1) && ((r-1)->x2 >= rx2))))
+ {
+ if (!reg->data ||
+ reg->data->numRects == reg->data->size)
+ {
+ if (!pixman_rect_alloc (reg, 1))
+ return NULL;
+ *first_rect = PIXREGION_BOXPTR(reg);
+ r = *first_rect + reg->data->numRects;
+ }
+ r->x1 = rx1;
+ r->y1 = ry1;
+ r->x2 = rx2;
+ r->y2 = ry2;
+ reg->data->numRects++;
+ if (r->x1 < reg->extents.x1)
+ reg->extents.x1 = r->x1;
+ if (r->x2 > reg->extents.x2)
+ reg->extents.x2 = r->x2;
+ r++;
+ }
+ return r;
+}
+
+/* Convert bitmap clip mask into clipping region.
+ * First, goes through each line and makes boxes by noting the transitions
+ * from 0 to 1 and 1 to 0.
+ * Then it coalesces the current line with the previous if they have boxes
+ * at the same X coordinates.
+ * Stride is in number of uint32_t per line.
+ */
+PIXMAN_EXPORT void
+PREFIX (_init_from_image) (region_type_t *region,
+ pixman_image_t *image)
+{
+ uint32_t mask0 = 0xffffffff & ~SCREEN_SHIFT_RIGHT(0xffffffff, 1);
+ box_type_t *first_rect, *rects, *prect_line_start;
+ box_type_t *old_rect, *new_rect;
+ uint32_t *pw, w, *pw_line, *pw_line_end;
+ int irect_prev_start, irect_line_start;
+ int h, base, rx1 = 0, crects;
+ int ib;
+ pixman_bool_t in_box, same;
+ int width, height, stride;
+
+ PREFIX(_init) (region);
+
+ return_if_fail (image->type == BITS);
+ return_if_fail (image->bits.format == PIXMAN_a1);
+
+ pw_line = pixman_image_get_data (image);
+ width = pixman_image_get_width (image);
+ height = pixman_image_get_height (image);
+ stride = pixman_image_get_stride (image) / 4;
+
+ first_rect = PIXREGION_BOXPTR(region);
+ rects = first_rect;
+
+ region->extents.x1 = width - 1;
+ region->extents.x2 = 0;
+ irect_prev_start = -1;
+ for (h = 0; h < height; h++)
+ {
+ pw = pw_line;
+ pw_line += stride;
+ irect_line_start = rects - first_rect;
+
+ /* If the Screen left most bit of the word is set, we're starting in
+ * a box */
+ if (READ(pw) & mask0)
+ {
+ in_box = TRUE;
+ rx1 = 0;
+ }
+ else
+ {
+ in_box = FALSE;
+ }
+
+ /* Process all words which are fully in the pixmap */
+ pw_line_end = pw + (width >> 5);
+ for (base = 0; pw < pw_line_end; base += 32)
+ {
+ w = READ(pw++);
+ if (in_box)
+ {
+ if (!~w)
+ continue;
+ }
+ else
+ {
+ if (!w)
+ continue;
+ }
+ for (ib = 0; ib < 32; ib++)
+ {
+ /* If the Screen left most bit of the word is set, we're
+ * starting a box */
+ if (w & mask0)
+ {
+ if (!in_box)
+ {
+ rx1 = base + ib;
+ /* start new box */
+ in_box = TRUE;
+ }
+ }
+ else
+ {
+ if (in_box)
+ {
+ /* end box */
+ rects = bitmap_addrect (region, rects, &first_rect,
+ rx1, h, base + ib, h + 1);
+ if (rects == NULL)
+ goto error;
+ in_box = FALSE;
+ }
+ }
+ /* Shift the word VISUALLY left one. */
+ w = SCREEN_SHIFT_LEFT(w, 1);
+ }
+ }
+
+ if (width & 31)
+ {
+ /* Process final partial word on line */
+ w = READ(pw++);
+ for (ib = 0; ib < (width & 31); ib++)
+ {
+ /* If the Screen left most bit of the word is set, we're
+ * starting a box */
+ if (w & mask0)
+ {
+ if (!in_box)
+ {
+ rx1 = base + ib;
+ /* start new box */
+ in_box = TRUE;
+ }
+ }
+ else
+ {
+ if (in_box)
+ {
+ /* end box */
+ rects = bitmap_addrect(region, rects, &first_rect,
+ rx1, h, base + ib, h + 1);
+ if (rects == NULL)
+ goto error;
+ in_box = FALSE;
+ }
+ }
+ /* Shift the word VISUALLY left one. */
+ w = SCREEN_SHIFT_LEFT(w, 1);
+ }
+ }
+ /* If scanline ended with last bit set, end the box */
+ if (in_box)
+ {
+ rects = bitmap_addrect(region, rects, &first_rect,
+ rx1, h, base + (width & 31), h + 1);
+ if (rects == NULL)
+ goto error;
+ }
+ /* if all rectangles on this line have the same x-coords as
+ * those on the previous line, then add 1 to all the previous y2s and
+ * throw away all the rectangles from this line
+ */
+ same = FALSE;
+ if (irect_prev_start != -1)
+ {
+ crects = irect_line_start - irect_prev_start;
+ if (crects != 0 &&
+ crects == ((rects - first_rect) - irect_line_start))
+ {
+ old_rect = first_rect + irect_prev_start;
+ new_rect = prect_line_start = first_rect + irect_line_start;
+ same = TRUE;
+ while (old_rect < prect_line_start)
+ {
+ if ((old_rect->x1 != new_rect->x1) ||
+ (old_rect->x2 != new_rect->x2))
+ {
+ same = FALSE;
+ break;
+ }
+ old_rect++;
+ new_rect++;
+ }
+ if (same)
+ {
+ old_rect = first_rect + irect_prev_start;
+ while (old_rect < prect_line_start)
+ {
+ old_rect->y2 += 1;
+ old_rect++;
+ }
+ rects -= crects;
+ region->data->numRects -= crects;
+ }
+ }
+ }
+ if(!same)
+ irect_prev_start = irect_line_start;
+ }
+ if (!region->data->numRects)
+ {
+ region->extents.x1 = region->extents.x2 = 0;
+ }
+ else
+ {
+ region->extents.y1 = PIXREGION_BOXPTR(region)->y1;
+ region->extents.y2 = PIXREGION_END(region)->y2;
+ if (region->data->numRects == 1)
+ {
+ free (region->data);
+ region->data = NULL;
+ }
+ }
+
+ error:
+ return;
+}
diff --git a/lib/pixman/pixman/pixman-solid-fill.c b/lib/pixman/pixman/pixman-solid-fill.c
index 38675dca8..48c999a0e 100644
--- a/lib/pixman/pixman/pixman-solid-fill.c
+++ b/lib/pixman/pixman/pixman-solid-fill.c
@@ -36,7 +36,7 @@ solid_fill_get_scanline_32 (pixman_image_t *image,
uint32_t mask_bits)
{
uint32_t *end = buffer + width;
- register uint32_t color = ((solid_fill_t *)image)->color;
+ uint32_t color = image->solid.color_32;
while (buffer < end)
*(buffer++) = color;
@@ -44,6 +44,23 @@ solid_fill_get_scanline_32 (pixman_image_t *image,
return;
}
+static void
+solid_fill_get_scanline_64 (pixman_image_t *image,
+ int x,
+ int y,
+ int width,
+ uint32_t * buffer,
+ const uint32_t *mask,
+ uint32_t mask_bits)
+{
+ uint64_t *b = (uint64_t *)buffer;
+ uint64_t *e = b + width;
+ uint64_t color = image->solid.color_64;
+
+ while (b < e)
+ *(b++) = color;
+}
+
static source_image_class_t
solid_fill_classify (pixman_image_t *image,
int x,
@@ -58,7 +75,7 @@ static void
solid_fill_property_changed (pixman_image_t *image)
{
image->common.get_scanline_32 = solid_fill_get_scanline_32;
- image->common.get_scanline_64 = _pixman_image_get_scanline_generic_64;
+ image->common.get_scanline_64 = solid_fill_get_scanline_64;
}
static uint32_t
@@ -71,6 +88,16 @@ color_to_uint32 (const pixman_color_t *color)
(color->blue >> 8);
}
+static uint64_t
+color_to_uint64 (const pixman_color_t *color)
+{
+ return
+ ((uint64_t)color->alpha << 48) |
+ ((uint64_t)color->red << 32) |
+ ((uint64_t)color->green << 16) |
+ ((uint64_t)color->blue);
+}
+
PIXMAN_EXPORT pixman_image_t *
pixman_image_create_solid_fill (pixman_color_t *color)
{
@@ -80,7 +107,9 @@ pixman_image_create_solid_fill (pixman_color_t *color)
return NULL;
img->type = SOLID;
- img->solid.color = color_to_uint32 (color);
+ img->solid.color = *color;
+ img->solid.color_32 = color_to_uint32 (color);
+ img->solid.color_64 = color_to_uint64 (color);
img->source.class = SOURCE_IMAGE_CLASS_UNKNOWN;
img->common.classify = solid_fill_classify;
diff --git a/lib/pixman/pixman/pixman-sse2.c b/lib/pixman/pixman/pixman-sse2.c
index bb74882b2..946e7ba37 100644
--- a/lib/pixman/pixman/pixman-sse2.c
+++ b/lib/pixman/pixman/pixman-sse2.c
@@ -368,6 +368,22 @@ cache_prefetch_next (__m128i* addr)
_mm_prefetch ((void const *)(addr + 4), _MM_HINT_T0); /* 64 bytes ahead */
}
+/* prefetching NULL is very slow on some systems. don't do that. */
+
+static force_inline void
+maybe_prefetch (__m128i* addr)
+{
+ if (addr)
+ cache_prefetch (addr);
+}
+
+static force_inline void
+maybe_prefetch_next (__m128i* addr)
+{
+ if (addr)
+ cache_prefetch_next (addr);
+}
+
/* load 4 pixels from a 16-byte boundary aligned address */
static force_inline __m128i
load_128_aligned (__m128i* src)
@@ -413,9 +429,15 @@ save_128_unaligned (__m128i* dst,
*/
static force_inline __m64
+load_32_1x64 (uint32_t data)
+{
+ return _mm_cvtsi32_si64 (data);
+}
+
+static force_inline __m64
unpack_32_1x64 (uint32_t data)
{
- return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (data), _mm_setzero_si64 ());
+ return _mm_unpacklo_pi8 (load_32_1x64 (data), _mm_setzero_si64 ());
}
static force_inline __m64
@@ -629,7 +651,7 @@ core_combine_over_u_sse2 (uint32_t* pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
/* Align dst on a 16-byte boundary */
while (w && ((unsigned long)pd & 15))
@@ -647,14 +669,14 @@ core_combine_over_u_sse2 (uint32_t* pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w >= 4)
{
/* fill cache line with next memory */
cache_prefetch_next ((__m128i*)ps);
cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
+ maybe_prefetch_next ((__m128i*)pm);
/* I'm loading unaligned because I'm not sure about
* the address alignment.
@@ -720,7 +742,7 @@ core_combine_over_reverse_u_sse2 (uint32_t* pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
/* Align dst on a 16-byte boundary */
while (w &&
@@ -739,14 +761,14 @@ core_combine_over_reverse_u_sse2 (uint32_t* pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w >= 4)
{
/* fill cache line with next memory */
cache_prefetch_next ((__m128i*)ps);
cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
+ maybe_prefetch_next ((__m128i*)pm);
/* I'm loading unaligned because I'm not sure
* about the address alignment.
@@ -822,7 +844,7 @@ core_combine_in_u_sse2 (uint32_t* pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w && ((unsigned long) pd & 15))
{
@@ -839,14 +861,14 @@ core_combine_in_u_sse2 (uint32_t* pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w >= 4)
{
/* fill cache line with next memory */
cache_prefetch_next ((__m128i*)ps);
cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
+ maybe_prefetch_next ((__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm);
@@ -896,7 +918,7 @@ core_combine_reverse_in_u_sse2 (uint32_t* pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w && ((unsigned long) pd & 15))
{
@@ -913,14 +935,14 @@ core_combine_reverse_in_u_sse2 (uint32_t* pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w >= 4)
{
/* fill cache line with next memory */
cache_prefetch_next ((__m128i*)ps);
cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
+ maybe_prefetch_next ((__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
@@ -965,7 +987,7 @@ core_combine_reverse_out_u_sse2 (uint32_t* pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w && ((unsigned long) pd & 15))
{
@@ -976,7 +998,7 @@ core_combine_reverse_out_u_sse2 (uint32_t* pd,
pix_multiply_1x64 (
unpack_32_1x64 (d), negate_1x64 (
expand_alpha_1x64 (unpack_32_1x64 (s)))));
-
+
if (pm)
pm++;
ps++;
@@ -986,7 +1008,7 @@ core_combine_reverse_out_u_sse2 (uint32_t* pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w >= 4)
{
@@ -996,7 +1018,7 @@ core_combine_reverse_out_u_sse2 (uint32_t* pd,
/* fill cache line with next memory */
cache_prefetch_next ((__m128i*)ps);
cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
+ maybe_prefetch_next ((__m128i*)pm);
xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
@@ -1047,7 +1069,7 @@ core_combine_out_u_sse2 (uint32_t* pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w && ((unsigned long) pd & 15))
{
@@ -1067,7 +1089,7 @@ core_combine_out_u_sse2 (uint32_t* pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w >= 4)
{
@@ -1077,7 +1099,7 @@ core_combine_out_u_sse2 (uint32_t* pd,
/* fill cache line with next memory */
cache_prefetch_next ((__m128i*)ps);
cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
+ maybe_prefetch_next ((__m128i*)pm);
xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
@@ -1147,7 +1169,7 @@ core_combine_atop_u_sse2 (uint32_t* pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w && ((unsigned long) pd & 15))
{
@@ -1164,14 +1186,14 @@ core_combine_atop_u_sse2 (uint32_t* pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w >= 4)
{
/* fill cache line with next memory */
cache_prefetch_next ((__m128i*)ps);
cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
+ maybe_prefetch_next ((__m128i*)pm);
xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
@@ -1244,7 +1266,7 @@ core_combine_reverse_atop_u_sse2 (uint32_t* pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w && ((unsigned long) pd & 15))
{
@@ -1261,14 +1283,14 @@ core_combine_reverse_atop_u_sse2 (uint32_t* pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w >= 4)
{
/* fill cache line with next memory */
cache_prefetch_next ((__m128i*)ps);
cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
+ maybe_prefetch_next ((__m128i*)pm);
xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
@@ -1345,7 +1367,7 @@ core_combine_xor_u_sse2 (uint32_t* dst,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w && ((unsigned long) pd & 15))
{
@@ -1362,14 +1384,14 @@ core_combine_xor_u_sse2 (uint32_t* dst,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w >= 4)
{
/* fill cache line with next memory */
cache_prefetch_next ((__m128i*)ps);
cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
+ maybe_prefetch_next ((__m128i*)pm);
xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm);
xmm_dst = load_128_aligned ((__m128i*) pd);
@@ -1430,7 +1452,7 @@ core_combine_add_u_sse2 (uint32_t* dst,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w && (unsigned long)pd & 15)
{
@@ -1448,7 +1470,7 @@ core_combine_add_u_sse2 (uint32_t* dst,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w >= 4)
{
@@ -1457,7 +1479,7 @@ core_combine_add_u_sse2 (uint32_t* dst,
/* fill cache line with next memory */
cache_prefetch_next ((__m128i*)ps);
cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
+ maybe_prefetch_next ((__m128i*)pm);
s = combine4 ((__m128i*)ps, (__m128i*)pm);
@@ -1516,7 +1538,7 @@ core_combine_saturate_u_sse2 (uint32_t * pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w && (unsigned long)pd & 15)
{
@@ -1533,14 +1555,14 @@ core_combine_saturate_u_sse2 (uint32_t * pd,
/* call prefetch hint to optimize cache load*/
cache_prefetch ((__m128i*)ps);
cache_prefetch ((__m128i*)pd);
- cache_prefetch ((__m128i*)pm);
+ maybe_prefetch ((__m128i*)pm);
while (w >= 4)
{
/* fill cache line with next memory */
cache_prefetch_next ((__m128i*)ps);
cache_prefetch_next ((__m128i*)pd);
- cache_prefetch_next ((__m128i*)pm);
+ maybe_prefetch_next ((__m128i*)pm);
xmm_dst = load_128_aligned ((__m128i*)pd);
xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm);
@@ -2630,8 +2652,8 @@ create_mask_2x32_64 (uint32_t mask0,
/* Work around a code generation bug in Sun Studio 12. */
#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
-# define create_mask_2x32_128(mask0, mask1) \
- (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1)))
+# define create_mask_2x32_128(mask0, mask1) \
+ (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1)))
#else
static force_inline __m128i
create_mask_2x32_128 (uint32_t mask0,
@@ -2928,7 +2950,7 @@ sse2_composite_over_n_8888 (pixman_implementation_t *imp,
{
uint32_t src;
uint32_t *dst_line, *dst, d;
- uint16_t w;
+ int32_t w;
int dst_stride;
__m128i xmm_src, xmm_alpha;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
@@ -3019,7 +3041,7 @@ sse2_composite_over_n_0565 (pixman_implementation_t *imp,
{
uint32_t src;
uint16_t *dst_line, *dst, d;
- uint16_t w;
+ int32_t w;
int dst_stride;
__m128i xmm_src, xmm_alpha;
__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
@@ -3130,7 +3152,7 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
srca = src >> 24;
-
+
if (src == 0)
return;
@@ -3165,7 +3187,7 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
if (m)
{
d = *pd;
-
+
mmx_mask = unpack_32_1x64 (m);
mmx_dest = unpack_32_1x64 (d);
@@ -3204,7 +3226,7 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_mask_lo, &xmm_mask_hi);
xmm_mask_hi = pack_2x128_128 (xmm_mask_lo, xmm_mask_hi);
-
+
save_128_aligned (
(__m128i*)pd, _mm_adds_epu8 (xmm_mask_hi, xmm_dst));
}
@@ -3221,7 +3243,7 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
if (m)
{
d = *pd;
-
+
mmx_mask = unpack_32_1x64 (m);
mmx_dest = unpack_32_1x64 (d);
@@ -3399,7 +3421,7 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
uint32_t mask;
- uint16_t w;
+ int32_t w;
int dst_stride, src_stride;
__m128i xmm_mask;
@@ -3412,7 +3434,7 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
+ mask = _pixman_image_get_solid (mask_image, PIXMAN_a8r8g8b8);
xmm_mask = create_mask_16_128 (mask >> 24);
@@ -3517,7 +3539,7 @@ sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
uint32_t *src_line, *src;
uint32_t mask;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
__m128i xmm_mask, xmm_alpha;
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
@@ -3528,7 +3550,7 @@ sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
+ mask = _pixman_image_get_solid (mask_image, PIXMAN_a8r8g8b8);
xmm_mask = create_mask_16_128 (mask >> 24);
xmm_alpha = mask_00ff;
@@ -3685,7 +3707,7 @@ sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
uint16_t *dst_line, *dst, d;
uint32_t *src_line, *src, s;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
__m128i xmm_alpha_lo, xmm_alpha_hi;
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
@@ -3815,7 +3837,7 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
uint32_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
uint32_t m, d;
__m128i xmm_src, xmm_alpha, xmm_def;
@@ -3959,9 +3981,6 @@ pixman_fill_sse2 (uint32_t *bits,
__m128i xmm_def;
- if (bpp == 16 && (data >> 16 != (data & 0xffff)))
- return FALSE;
-
if (bpp != 16 && bpp != 32)
return FALSE;
@@ -3971,6 +3990,7 @@ pixman_fill_sse2 (uint32_t *bits,
byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
byte_width = 2 * width;
stride *= 2;
+ data = (data & 0xffff) * 0x00010001;
}
else
{
@@ -4100,7 +4120,7 @@ sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
uint32_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
uint32_t m;
__m128i xmm_src, xmm_def;
@@ -4246,7 +4266,7 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
uint16_t *dst_line, *dst, d;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
uint32_t m;
__m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
@@ -4409,7 +4429,7 @@ sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
uint16_t *dst_line, *dst, d;
uint32_t *src_line, *src, s;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
uint32_t opaque, zero;
__m64 ms;
@@ -4555,7 +4575,7 @@ sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
uint32_t *dst_line, *dst, d;
uint32_t *src_line, *src, s;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
uint32_t opaque, zero;
__m128i xmm_src_lo, xmm_src_hi;
@@ -4841,9 +4861,10 @@ sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
- uint16_t w, d, m;
+ uint32_t d, m;
uint32_t src;
uint8_t sa;
+ int32_t w;
__m128i xmm_alpha;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
@@ -4956,7 +4977,7 @@ sse2_composite_in_8_8 (pixman_implementation_t *imp,
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
int src_stride, dst_stride;
- uint16_t w;
+ int32_t w;
uint32_t s, d;
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
@@ -5033,28 +5054,28 @@ sse2_composite_in_8_8 (pixman_implementation_t *imp,
}
/* -------------------------------------------------------------------------
- * composite_add_8888_8_8
+ * composite_add_n_8_8
*/
static void
-sse2_composite_add_8888_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
{
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
- uint16_t w;
+ int32_t w;
uint32_t src;
uint8_t sa;
uint32_t m, d;
@@ -5170,7 +5191,7 @@ sse2_composite_add_8000_8000 (pixman_implementation_t *imp,
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
int dst_stride, src_stride;
- uint16_t w;
+ int32_t w;
uint16_t t;
PIXMAN_IMAGE_GET_LINE (
@@ -5428,9 +5449,7 @@ sse2_composite_copy_area (pixman_implementation_t *imp,
src_x, src_y, dest_x, dest_y, width, height);
}
-#if 0
-/* This code are buggy in MMX version, now the bug was translated to SSE2 version */
-void
+static void
sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
@@ -5450,7 +5469,8 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
uint8_t *mask, *mask_line;
uint32_t m;
int src_stride, mask_stride, dst_stride;
- uint16_t w;
+ int32_t w;
+ __m64 ms;
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
@@ -5465,258 +5485,363 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
while (height--)
{
- src = src_line;
- src_line += src_stride;
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
+ src = src_line;
+ src_line += src_stride;
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+
+ w = width;
+
+ /* call prefetch hint to optimize cache load*/
+ cache_prefetch ((__m128i*)src);
+ cache_prefetch ((__m128i*)dst);
+ cache_prefetch ((__m128i*)mask);
+
+ while (w && (unsigned long)dst & 15)
+ {
+ s = 0xff000000 | *src++;
+ m = (uint32_t) *mask++;
+ d = *dst;
+ ms = unpack_32_1x64 (s);
+
+ if (m != 0xff)
+ {
+ __m64 ma = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
+ __m64 md = unpack_32_1x64 (d);
+
+ ms = in_over_1x64 (&ms, &mask_x00ff, &ma, &md);
+ }
+
+ *dst++ = pack_1x64_32 (ms);
+ w--;
+ }
+
+ /* call prefetch hint to optimize cache load*/
+ cache_prefetch ((__m128i*)src);
+ cache_prefetch ((__m128i*)dst);
+ cache_prefetch ((__m128i*)mask);
+
+ while (w >= 4)
+ {
+ /* fill cache line with next memory */
+ cache_prefetch_next ((__m128i*)src);
+ cache_prefetch_next ((__m128i*)dst);
+ cache_prefetch_next ((__m128i*)mask);
+
+ m = *(uint32_t*) mask;
+ xmm_src = _mm_or_si128 (load_128_unaligned ((__m128i*)src), mask_ff000000);
+
+ if (m == 0xffffffff)
+ {
+ save_128_aligned ((__m128i*)dst, xmm_src);
+ }
+ else
+ {
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+
+ src += 4;
+ dst += 4;
+ mask += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ m = (uint32_t) *mask++;
+
+ if (m)
+ {
+ s = 0xff000000 | *src;
+
+ if (m == 0xff)
+ {
+ *dst = s;
+ }
+ else
+ {
+ __m64 ma, md, ms;
+
+ d = *dst;
+
+ ma = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
+ md = unpack_32_1x64 (d);
+ ms = unpack_32_1x64 (s);
+
+ *dst = pack_1x64_32 (in_over_1x64 (&ms, &mask_x00ff, &ma, &md));
+ }
+
+ }
+
+ src++;
+ dst++;
+ w--;
+ }
+ }
- w = width;
+ _mm_empty ();
+}
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)src);
- cache_prefetch ((__m128i*)dst);
- cache_prefetch ((__m128i*)mask);
+static void
+sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *src, *src_line, s;
+ uint32_t *dst, *dst_line, d;
+ uint8_t *mask, *mask_line;
+ uint32_t m;
+ int src_stride, mask_stride, dst_stride;
+ int32_t w;
- while (w && (unsigned long)dst & 15)
- {
- s = 0xff000000 | *src++;
- m = (uint32_t) *mask++;
- d = *dst;
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
- __m64 ms = unpack_32_1x64 (s);
+ PIXMAN_IMAGE_GET_LINE (
+ dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+ PIXMAN_IMAGE_GET_LINE (
+ src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- if (m != 0xff)
- {
- ms = in_over_1x64 (ms,
- mask_x00ff,
- expand_alpha_rev_1x64 (unpack_32_1x64 (m)),
- unpack_32_1x64 (d));
- }
+ while (height--)
+ {
+ src = src_line;
+ src_line += src_stride;
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
- *dst++ = pack_1x64_32 (ms);
- w--;
- }
+ w = width;
- /* call prefetch hint to optimize cache load*/
- cache_prefetch ((__m128i*)src);
- cache_prefetch ((__m128i*)dst);
- cache_prefetch ((__m128i*)mask);
+ /* call prefetch hint to optimize cache load*/
+ cache_prefetch ((__m128i *)src);
+ cache_prefetch ((__m128i *)dst);
+ cache_prefetch ((__m128i *)mask);
- while (w >= 4)
- {
- /* fill cache line with next memory */
- cache_prefetch_next ((__m128i*)src);
- cache_prefetch_next ((__m128i*)dst);
- cache_prefetch_next ((__m128i*)mask);
+ while (w && (unsigned long)dst & 15)
+ {
+ uint32_t sa;
- m = *(uint32_t*) mask;
- xmm_src = _mm_or_si128 (load_128_unaligned ((__m128i*)src), mask_ff000000);
+ s = *src++;
+ m = (uint32_t) *mask++;
+ d = *dst;
- if (m == 0xffffffff)
+ sa = s >> 24;
+
+ if (m)
{
- save_128_aligned ((__m128i*)dst, xmm_src);
+ if (sa == 0xff && m == 0xff)
+ {
+ *dst = s;
+ }
+ else
+ {
+ __m64 ms, md, ma, msa;
+
+ ma = expand_alpha_rev_1x64 (load_32_1x64 (m));
+ ms = unpack_32_1x64 (s);
+ md = unpack_32_1x64 (d);
+
+ msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));
+
+ *dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));
+ }
}
- else
+
+ dst++;
+ w--;
+ }
+
+ /* call prefetch hint to optimize cache load*/
+ cache_prefetch ((__m128i *)src);
+ cache_prefetch ((__m128i *)dst);
+ cache_prefetch ((__m128i *)mask);
+
+ while (w >= 4)
+ {
+ /* fill cache line with next memory */
+ cache_prefetch_next ((__m128i *)src);
+ cache_prefetch_next ((__m128i *)dst);
+ cache_prefetch_next ((__m128i *)mask);
+
+ m = *(uint32_t *) mask;
+
+ if (m)
{
- xmm_dst = load_128_aligned ((__m128i*)dst);
+ xmm_src = load_128_unaligned ((__m128i*)src);
+
+ if (m == 0xffffffff && is_opaque (xmm_src))
+ {
+ save_128_aligned ((__m128i *)dst, xmm_src);
+ }
+ else
+ {
+ xmm_dst = load_128_aligned ((__m128i *)dst);
- xmm_mask = _mm_unpacklo_epi16 (
- unpack_32_1x128 (m), _mm_setzero_si128 ());
+ xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
- unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
- unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
- unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
- expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
- &xmm_mask_lo, &xmm_mask_hi);
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
+ expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
- in_over_2x128 (xmm_src_lo, xmm_src_hi,
- mask_00ff, mask_00ff,
- xmm_mask_lo, xmm_mask_hi,
- &xmm_dst_lo, &xmm_dst_hi);
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
+ &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
- save_128_aligned (
- (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
}
- src += 4;
- dst += 4;
- mask += 4;
- w -= 4;
- }
+ src += 4;
+ dst += 4;
+ mask += 4;
+ w -= 4;
+ }
- while (w)
- {
- m = (uint32_t) *mask++;
+ while (w)
+ {
+ uint32_t sa;
+
+ s = *src++;
+ m = (uint32_t) *mask++;
+ d = *dst;
+
+ sa = s >> 24;
if (m)
{
- s = 0xff000000 | *src;
-
- if (m == 0xff)
+ if (sa == 0xff && m == 0xff)
{
*dst = s;
}
else
{
- d = *dst;
+ __m64 ms, md, ma, msa;
- *dst = pack_1x64_32 (
- in_over_1x64 (
- unpack_32_1x64 (s),
- mask_x00ff,
- expand_alpha_rev_1x64 (unpack_32_1x64 (m)),
- unpack_32_1x64 (d)));
- }
+ ma = expand_alpha_rev_1x64 (load_32_1x64 (m));
+ ms = unpack_32_1x64 (s);
+ md = unpack_32_1x64 (d);
+
+ msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));
+ *dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));
+ }
}
- src++;
dst++;
- w--;
- }
+ w--;
+ }
}
_mm_empty ();
}
-#endif
-
static const pixman_fast_path_t sse2_fast_paths[] =
{
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, sse2_composite_over_n_8_0565, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, sse2_composite_over_n_8_0565, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_a8r8g8b8, sse2_composite_over_n_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_over_n_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, sse2_composite_over_n_0565, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, sse2_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, sse2_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, sse2_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, sse2_composite_over_8888_0565, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, sse2_composite_over_8888_0565, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_over_n_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_over_n_8_8888, 0 },
-#if 0
- /* FIXME: This code are buggy in MMX version, now the bug was translated to SSE2 version */
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_over_x888_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_over_x888_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_over_x888_8_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_over_x888_8_8888, 0 },
-#endif
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_over_x888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_over_x888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_over_x888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_over_x888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_over_8888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_over_8888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_over_8888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_over_8888_n_8888, NEED_SOLID_MASK },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, sse2_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, sse2_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, sse2_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, sse2_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, sse2_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, sse2_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_a8r8g8b8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_x8r8g8b8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8b8g8r8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_x8b8g8r8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, sse2_composite_over_pixbuf_8888, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, sse2_composite_over_pixbuf_0565, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_r5g6b5, sse2_composite_over_pixbuf_0565, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_b5g6r5, sse2_composite_over_pixbuf_0565, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, sse2_composite_over_pixbuf_0565, NEED_PIXBUF },
- { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_copy_area, 0 },
- { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, sse2_composite_copy_area, 0 },
-
- { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, sse2_composite_add_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
- { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, sse2_composite_add_8000_8000, 0 },
- { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, sse2_composite_add_8888_8888, 0 },
- { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, sse2_composite_add_8888_8888, 0 },
- { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, sse2_composite_add_8888_8_8, 0 },
-
- { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, sse2_composite_src_n_8_8888, 0 },
- { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, sse2_composite_src_n_8_8888, 0 },
- { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, sse2_composite_src_n_8_8888, 0 },
- { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, sse2_composite_src_n_8_8888, 0 },
- { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, sse2_composite_copy_area, 0 },
- { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, sse2_composite_copy_area, 0 },
- { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_copy_area, 0 },
- { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, sse2_composite_copy_area, 0 },
- { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, sse2_composite_copy_area, 0 },
- { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, sse2_composite_copy_area, 0 },
- { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, sse2_composite_copy_area, 0 },
- { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, sse2_composite_copy_area, 0 },
-
- { PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, sse2_composite_in_8_8, 0 },
- { PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, sse2_composite_in_n_8_8, 0 },
+ /* PIXMAN_OP_OVER */
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, sse2_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, sse2_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, sse2_composite_over_n_0565),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, sse2_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, sse2_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, sse2_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, sse2_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, sse2_composite_over_8888_0565),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, sse2_composite_over_8888_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, sse2_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, sse2_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, sse2_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, sse2_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, sse2_composite_over_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, sse2_composite_over_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, sse2_composite_over_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, sse2_composite_over_8888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, sse2_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, sse2_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, sse2_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, sse2_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, sse2_composite_over_x888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, sse2_composite_over_x888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, sse2_composite_over_x888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, sse2_composite_over_x888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, sse2_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, sse2_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, sse2_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, sse2_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, sse2_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, sse2_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, sse2_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, sse2_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, sse2_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, sse2_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, sse2_composite_over_pixbuf_8888),
+ PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, sse2_composite_over_pixbuf_8888),
+ PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, sse2_composite_over_pixbuf_8888),
+ PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, sse2_composite_over_pixbuf_8888),
+ PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, sse2_composite_over_pixbuf_0565),
+ PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, sse2_composite_over_pixbuf_0565),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
+
+ /* PIXMAN_OP_ADD */
+ PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8000_8000),
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8),
+
+ /* PIXMAN_OP_SRC */
+ PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888),
+ PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, sse2_composite_src_n_8_8888),
+ PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, sse2_composite_src_n_8_8888),
+ PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, sse2_composite_src_n_8_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, sse2_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, sse2_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, sse2_composite_copy_area),
+ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, sse2_composite_copy_area),
+
+ /* PIXMAN_OP_IN */
+ PIXMAN_STD_FAST_PATH (IN, a8, null, a8, sse2_composite_in_8_8),
+ PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8),
{ PIXMAN_OP_NONE },
};
-/*
- * Work around GCC bug causing crashes in Mozilla with SSE2
- *
- * When using -msse, gcc generates movdqa instructions assuming that
- * the stack is 16 byte aligned. Unfortunately some applications, such
- * as Mozilla and Mono, end up aligning the stack to 4 bytes, which
- * causes the movdqa instructions to fail.
- *
- * The __force_align_arg_pointer__ makes gcc generate a prologue that
- * realigns the stack pointer to 16 bytes.
- *
- * On x86-64 this is not necessary because the standard ABI already
- * calls for a 16 byte aligned stack.
- *
- * See https://bugs.freedesktop.org/show_bug.cgi?id=15693
- */
-#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
-__attribute__((__force_align_arg_pointer__))
-#endif
-static void
-sse2_composite (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * mask,
- pixman_image_t * dest,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- if (_pixman_run_fast_path (sse2_fast_paths, imp,
- op, src, mask, dest,
- src_x, src_y,
- mask_x, mask_y,
- dest_x, dest_y,
- width, height))
- {
- return;
- }
-
- _pixman_implementation_composite (imp->delegate, op,
- src, mask, dest,
- src_x, src_y,
- mask_x, mask_y,
- dest_x, dest_y,
- width, height);
-}
-
-#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
-__attribute__((__force_align_arg_pointer__))
-#endif
static pixman_bool_t
sse2_blt (pixman_implementation_t *imp,
uint32_t * src_bits,
@@ -5775,8 +5900,12 @@ __attribute__((__force_align_arg_pointer__))
pixman_implementation_t *
_pixman_implementation_create_sse2 (void)
{
- pixman_implementation_t *mmx = _pixman_implementation_create_mmx ();
- pixman_implementation_t *imp = _pixman_implementation_create (mmx);
+#ifdef USE_MMX
+ pixman_implementation_t *fallback = _pixman_implementation_create_mmx ();
+#else
+ pixman_implementation_t *fallback = _pixman_implementation_create_fast_path ();
+#endif
+ pixman_implementation_t *imp = _pixman_implementation_create (fallback, sse2_fast_paths);
/* SSE2 constants */
mask_565_r = create_mask_2x32_128 (0x00f80000, 0x00f80000);
@@ -5834,7 +5963,6 @@ _pixman_implementation_create_sse2 (void)
imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca;
imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca;
- imp->composite = sse2_composite;
imp->blt = sse2_blt;
imp->fill = sse2_fill;
diff --git a/lib/pixman/pixman/pixman-trap.c b/lib/pixman/pixman/pixman-trap.c
index 962cbb39e..8353992c5 100644
--- a/lib/pixman/pixman/pixman-trap.c
+++ b/lib/pixman/pixman/pixman-trap.c
@@ -28,8 +28,8 @@
#include "pixman-private.h"
/*
- * Compute the smallest value no less than y which is on a
- * grid row
+ * Compute the smallest value greater than or equal to y which is on a
+ * grid row.
*/
PIXMAN_EXPORT pixman_fixed_t
@@ -38,7 +38,7 @@ pixman_sample_ceil_y (pixman_fixed_t y, int n)
pixman_fixed_t f = pixman_fixed_frac (y);
pixman_fixed_t i = pixman_fixed_floor (y);
- f = ((f + Y_FRAC_FIRST (n)) / STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) +
+ f = DIV (f - Y_FRAC_FIRST (n) + (STEP_Y_SMALL (n) - pixman_fixed_e), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) +
Y_FRAC_FIRST (n);
if (f > Y_FRAC_LAST (n))
@@ -57,8 +57,8 @@ pixman_sample_ceil_y (pixman_fixed_t y, int n)
}
/*
- * Compute the largest value no greater than y which is on a
- * grid row
+ * Compute the largest value strictly less than y which is on a
+ * grid row.
*/
PIXMAN_EXPORT pixman_fixed_t
pixman_sample_floor_y (pixman_fixed_t y,
@@ -67,7 +67,7 @@ pixman_sample_floor_y (pixman_fixed_t y,
pixman_fixed_t f = pixman_fixed_frac (y);
pixman_fixed_t i = pixman_fixed_floor (y);
- f = DIV (f - Y_FRAC_FIRST (n), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) +
+ f = DIV (f - pixman_fixed_e - Y_FRAC_FIRST (n), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) +
Y_FRAC_FIRST (n);
if (f < Y_FRAC_FIRST (n))
@@ -380,7 +380,7 @@ pixman_rasterize_trapezoid (pixman_image_t * image,
if (pixman_fixed_to_int (b) >= height)
b = pixman_int_to_fixed (height) - 1;
b = pixman_sample_floor_y (b, bpp);
-
+
if (b >= t)
{
/* initialize edge walkers */
diff --git a/lib/pixman/pixman/pixman-utils.c b/lib/pixman/pixman/pixman-utils.c
index 71282062c..3ef88b753 100644
--- a/lib/pixman/pixman/pixman-utils.c
+++ b/lib/pixman/pixman/pixman-utils.c
@@ -30,211 +30,6 @@
#include "pixman-private.h"
-/*
- * Computing composite region
- */
-#define BOUND(v) (int16_t) ((v) < INT16_MIN ? INT16_MIN : (v) > INT16_MAX ? INT16_MAX : (v))
-
-static inline pixman_bool_t
-clip_general_image (pixman_region32_t * region,
- pixman_region32_t * clip,
- int dx,
- int dy)
-{
- if (pixman_region32_n_rects (region) == 1 &&
- pixman_region32_n_rects (clip) == 1)
- {
- pixman_box32_t * rbox = pixman_region32_rectangles (region, NULL);
- pixman_box32_t * cbox = pixman_region32_rectangles (clip, NULL);
- int v;
-
- if (rbox->x1 < (v = cbox->x1 + dx))
- rbox->x1 = BOUND (v);
- if (rbox->x2 > (v = cbox->x2 + dx))
- rbox->x2 = BOUND (v);
- if (rbox->y1 < (v = cbox->y1 + dy))
- rbox->y1 = BOUND (v);
- if (rbox->y2 > (v = cbox->y2 + dy))
- rbox->y2 = BOUND (v);
- if (rbox->x1 >= rbox->x2 ||
- rbox->y1 >= rbox->y2)
- {
- pixman_region32_init (region);
- }
- }
- else if (!pixman_region32_not_empty (clip))
- {
- return FALSE;
- }
- else
- {
- if (dx || dy)
- pixman_region32_translate (region, -dx, -dy);
- if (!pixman_region32_intersect (region, region, clip))
- return FALSE;
- if (dx || dy)
- pixman_region32_translate (region, dx, dy);
- }
- return pixman_region32_not_empty (region);
-}
-
-static inline pixman_bool_t
-clip_source_image (pixman_region32_t * region,
- pixman_image_t * image,
- int dx,
- int dy)
-{
- /* Source clips are ignored, unless they are explicitly turned on
- * and the clip in question was set by an X client. (Because if
- * the clip was not set by a client, then it is a hierarchy
- * clip and those should always be ignored for sources).
- */
- if (!image->common.clip_sources || !image->common.client_clip)
- return TRUE;
-
- return clip_general_image (region,
- &image->common.clip_region,
- dx, dy);
-}
-
-/*
- * returns FALSE if the final region is empty. Indistinguishable from
- * an allocation failure, but rendering ignores those anyways.
- */
-static pixman_bool_t
-pixman_compute_composite_region32 (pixman_region32_t * region,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int16_t src_x,
- int16_t src_y,
- int16_t mask_x,
- int16_t mask_y,
- int16_t dest_x,
- int16_t dest_y,
- uint16_t width,
- uint16_t height)
-{
- int v;
-
- region->extents.x1 = dest_x;
- v = dest_x + width;
- region->extents.x2 = BOUND (v);
- region->extents.y1 = dest_y;
- v = dest_y + height;
- region->extents.y2 = BOUND (v);
-
- region->extents.x1 = MAX (region->extents.x1, 0);
- region->extents.y1 = MAX (region->extents.y1, 0);
- region->extents.x2 = MIN (region->extents.x2, dst_image->bits.width);
- region->extents.y2 = MIN (region->extents.y2, dst_image->bits.height);
-
- region->data = 0;
-
- /* Check for empty operation */
- if (region->extents.x1 >= region->extents.x2 ||
- region->extents.y1 >= region->extents.y2)
- {
- pixman_region32_init (region);
- return FALSE;
- }
-
- if (dst_image->common.have_clip_region)
- {
- if (!clip_general_image (region, &dst_image->common.clip_region, 0, 0))
- {
- pixman_region32_fini (region);
- return FALSE;
- }
- }
-
- if (dst_image->common.alpha_map && dst_image->common.alpha_map->common.have_clip_region)
- {
- if (!clip_general_image (region, &dst_image->common.alpha_map->common.clip_region,
- -dst_image->common.alpha_origin_x,
- -dst_image->common.alpha_origin_y))
- {
- pixman_region32_fini (region);
- return FALSE;
- }
- }
-
- /* clip against src */
- if (src_image->common.have_clip_region)
- {
- if (!clip_source_image (region, src_image, dest_x - src_x, dest_y - src_y))
- {
- pixman_region32_fini (region);
- return FALSE;
- }
- }
- if (src_image->common.alpha_map && src_image->common.alpha_map->common.have_clip_region)
- {
- if (!clip_source_image (region, (pixman_image_t *)src_image->common.alpha_map,
- dest_x - (src_x - src_image->common.alpha_origin_x),
- dest_y - (src_y - src_image->common.alpha_origin_y)))
- {
- pixman_region32_fini (region);
- return FALSE;
- }
- }
- /* clip against mask */
- if (mask_image && mask_image->common.have_clip_region)
- {
- if (!clip_source_image (region, mask_image, dest_x - mask_x, dest_y - mask_y))
- {
- pixman_region32_fini (region);
- return FALSE;
- }
- if (mask_image->common.alpha_map && mask_image->common.alpha_map->common.have_clip_region)
- {
- if (!clip_source_image (region, (pixman_image_t *)mask_image->common.alpha_map,
- dest_x - (mask_x - mask_image->common.alpha_origin_x),
- dest_y - (mask_y - mask_image->common.alpha_origin_y)))
- {
- pixman_region32_fini (region);
- return FALSE;
- }
- }
- }
-
- return TRUE;
-}
-
-PIXMAN_EXPORT pixman_bool_t
-pixman_compute_composite_region (pixman_region16_t * region,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int16_t src_x,
- int16_t src_y,
- int16_t mask_x,
- int16_t mask_y,
- int16_t dest_x,
- int16_t dest_y,
- uint16_t width,
- uint16_t height)
-{
- pixman_region32_t r32;
- pixman_bool_t retval;
-
- pixman_region32_init (&r32);
-
- retval = pixman_compute_composite_region32 (
- &r32, src_image, mask_image, dst_image,
- src_x, src_y, mask_x, mask_y, dest_x, dest_y,
- width, height);
-
- if (retval)
- {
- if (!pixman_region16_copy_from_region32 (region, &r32))
- retval = FALSE;
- }
-
- pixman_region32_fini (&r32);
- return retval;
-}
-
pixman_bool_t
pixman_multiply_overflows_int (unsigned int a,
unsigned int b)
@@ -372,401 +167,6 @@ pixman_contract (uint32_t * dst,
}
}
-static void
-walk_region_internal (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int16_t src_x,
- int16_t src_y,
- int16_t mask_x,
- int16_t mask_y,
- int16_t dest_x,
- int16_t dest_y,
- uint16_t width,
- uint16_t height,
- pixman_bool_t src_repeat,
- pixman_bool_t mask_repeat,
- pixman_region32_t * region,
- pixman_composite_func_t composite_rect)
-{
- int n;
- const pixman_box32_t *pbox;
- int w, h, w_this, h_this;
- int x_msk, y_msk, x_src, y_src, x_dst, y_dst;
-
- pbox = pixman_region32_rectangles (region, &n);
- while (n--)
- {
- h = pbox->y2 - pbox->y1;
- y_src = pbox->y1 - dest_y + src_y;
- y_msk = pbox->y1 - dest_y + mask_y;
- y_dst = pbox->y1;
-
- while (h)
- {
- h_this = h;
- w = pbox->x2 - pbox->x1;
- x_src = pbox->x1 - dest_x + src_x;
- x_msk = pbox->x1 - dest_x + mask_x;
- x_dst = pbox->x1;
-
- if (mask_repeat)
- {
- y_msk = MOD (y_msk, mask_image->bits.height);
- if (h_this > mask_image->bits.height - y_msk)
- h_this = mask_image->bits.height - y_msk;
- }
-
- if (src_repeat)
- {
- y_src = MOD (y_src, src_image->bits.height);
- if (h_this > src_image->bits.height - y_src)
- h_this = src_image->bits.height - y_src;
- }
-
- while (w)
- {
- w_this = w;
-
- if (mask_repeat)
- {
- x_msk = MOD (x_msk, mask_image->bits.width);
- if (w_this > mask_image->bits.width - x_msk)
- w_this = mask_image->bits.width - x_msk;
- }
-
- if (src_repeat)
- {
- x_src = MOD (x_src, src_image->bits.width);
- if (w_this > src_image->bits.width - x_src)
- w_this = src_image->bits.width - x_src;
- }
-
- (*composite_rect) (imp, op,
- src_image, mask_image, dst_image,
- x_src, y_src, x_msk, y_msk, x_dst, y_dst,
- w_this, h_this);
- w -= w_this;
-
- x_src += w_this;
- x_msk += w_this;
- x_dst += w_this;
- }
-
- h -= h_this;
- y_src += h_this;
- y_msk += h_this;
- y_dst += h_this;
- }
-
- pbox++;
- }
-}
-
-void
-_pixman_walk_composite_region (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int16_t src_x,
- int16_t src_y,
- int16_t mask_x,
- int16_t mask_y,
- int16_t dest_x,
- int16_t dest_y,
- uint16_t width,
- uint16_t height,
- pixman_composite_func_t composite_rect)
-{
- pixman_region32_t region;
-
- pixman_region32_init (&region);
-
- if (pixman_compute_composite_region32 (
- &region, src_image, mask_image, dst_image,
- src_x, src_y, mask_x, mask_y, dest_x, dest_y,
- width, height))
- {
- walk_region_internal (imp, op,
- src_image, mask_image, dst_image,
- src_x, src_y, mask_x, mask_y, dest_x, dest_y,
- width, height, FALSE, FALSE,
- &region,
- composite_rect);
-
- pixman_region32_fini (&region);
- }
-}
-
-static pixman_bool_t
-mask_is_solid (pixman_image_t *mask)
-{
- if (mask->type == SOLID)
- return TRUE;
-
- if (mask->type == BITS &&
- mask->common.repeat == PIXMAN_REPEAT_NORMAL &&
- mask->bits.width == 1 &&
- mask->bits.height == 1)
- {
- return TRUE;
- }
-
- return FALSE;
-}
-
-static const pixman_fast_path_t *
-get_fast_path (const pixman_fast_path_t *fast_paths,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- pixman_bool_t is_pixbuf)
-{
- const pixman_fast_path_t *info;
-
- for (info = fast_paths; info->op != PIXMAN_OP_NONE; info++)
- {
- pixman_bool_t valid_src = FALSE;
- pixman_bool_t valid_mask = FALSE;
-
- if (info->op != op)
- continue;
-
- if ((info->src_format == PIXMAN_solid &&
- _pixman_image_is_solid (src_image)) ||
- (src_image->type == BITS &&
- info->src_format == src_image->bits.format))
- {
- valid_src = TRUE;
- }
-
- if (!valid_src)
- continue;
-
- if ((info->mask_format == PIXMAN_null && !mask_image) ||
- (mask_image && mask_image->type == BITS &&
- info->mask_format == mask_image->bits.format))
- {
- valid_mask = TRUE;
-
- if (info->flags & NEED_SOLID_MASK)
- {
- if (!mask_image || !mask_is_solid (mask_image))
- valid_mask = FALSE;
- }
-
- if (info->flags & NEED_COMPONENT_ALPHA)
- {
- if (!mask_image || !mask_image->common.component_alpha)
- valid_mask = FALSE;
- }
- }
-
- if (!valid_mask)
- continue;
-
- if (info->dest_format != dst_image->bits.format)
- continue;
-
- if ((info->flags & NEED_PIXBUF) && !is_pixbuf)
- continue;
-
- return info;
- }
-
- return NULL;
-}
-
-static force_inline pixman_bool_t
-image_covers (pixman_image_t *image,
- pixman_box32_t *extents,
- int x,
- int y)
-{
- if (image->common.type == BITS &&
- image->common.repeat == PIXMAN_REPEAT_NONE)
- {
- if (x > extents->x1 || y > extents->y1 ||
- x + image->bits.width < extents->x2 ||
- y + image->bits.height < extents->y2)
- {
- return FALSE;
- }
- }
-
- return TRUE;
-}
-
-static force_inline pixman_bool_t
-sources_cover (pixman_image_t *src,
- pixman_image_t *mask,
- pixman_box32_t *extents,
- int src_x,
- int src_y,
- int mask_x,
- int mask_y,
- int dest_x,
- int dest_y)
-{
- if (!image_covers (src, extents, dest_x - src_x, dest_y - src_y))
- return FALSE;
-
- if (!mask)
- return TRUE;
-
- if (!image_covers (mask, extents, dest_x - mask_x, dest_y - mask_y))
- return FALSE;
-
- return TRUE;
-}
-
-pixman_bool_t
-_pixman_run_fast_path (const pixman_fast_path_t *paths,
- pixman_implementation_t * imp,
- pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * mask,
- pixman_image_t * dest,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- pixman_composite_func_t func = NULL;
- pixman_bool_t src_repeat =
- src->common.repeat == PIXMAN_REPEAT_NORMAL;
- pixman_bool_t mask_repeat =
- mask && mask->common.repeat == PIXMAN_REPEAT_NORMAL;
- pixman_bool_t result;
- pixman_bool_t has_fast_path;
-
- has_fast_path = !dest->common.alpha_map &&
- !dest->bits.read_func &&
- !dest->bits.write_func;
-
- if (has_fast_path)
- {
- has_fast_path = (src->type == BITS || _pixman_image_is_solid (src)) &&
- !src->common.transform &&
- !src->common.alpha_map &&
- src->common.filter != PIXMAN_FILTER_CONVOLUTION &&
- src->common.repeat != PIXMAN_REPEAT_PAD &&
- src->common.repeat != PIXMAN_REPEAT_REFLECT;
- if (has_fast_path && src->type == BITS)
- {
- has_fast_path = !src->bits.read_func &&
- !src->bits.write_func &&
- !PIXMAN_FORMAT_IS_WIDE (src->bits.format);
- }
- }
-
- if (mask && has_fast_path)
- {
- has_fast_path =
- mask->type == BITS &&
- !mask->common.transform &&
- !mask->common.alpha_map &&
- !mask->bits.read_func &&
- !mask->bits.write_func &&
- mask->common.filter != PIXMAN_FILTER_CONVOLUTION &&
- mask->common.repeat != PIXMAN_REPEAT_PAD &&
- mask->common.repeat != PIXMAN_REPEAT_REFLECT &&
- !PIXMAN_FORMAT_IS_WIDE (mask->bits.format);
- }
-
- if (has_fast_path)
- {
- const pixman_fast_path_t *info;
- pixman_bool_t pixbuf;
-
- pixbuf =
- src && src->type == BITS &&
- mask && mask->type == BITS &&
- src->bits.bits == mask->bits.bits &&
- src_x == mask_x &&
- src_y == mask_y &&
- !mask->common.component_alpha &&
- !mask_repeat;
-
- info = get_fast_path (paths, op, src, mask, dest, pixbuf);
-
- if (info)
- {
- func = info->func;
-
- if (info->src_format == PIXMAN_solid)
- src_repeat = FALSE;
-
- if (info->mask_format == PIXMAN_solid ||
- info->flags & NEED_SOLID_MASK)
- {
- mask_repeat = FALSE;
- }
-
- if ((src_repeat &&
- src->bits.width == 1 &&
- src->bits.height == 1) ||
- (mask_repeat &&
- mask->bits.width == 1 &&
- mask->bits.height == 1))
- {
- /* If src or mask are repeating 1x1 images and src_repeat or
- * mask_repeat are still TRUE, it means the fast path we
- * selected does not actually handle repeating images.
- *
- * So rather than call the "fast path" with a zillion
- * 1x1 requests, we just fall back to the general code (which
- * does do something sensible with 1x1 repeating images).
- */
- func = NULL;
- }
- }
- }
-
- result = FALSE;
-
- if (func)
- {
- pixman_region32_t region;
- pixman_region32_init (&region);
-
- if (pixman_compute_composite_region32 (
- &region, src, mask, dest,
- src_x, src_y, mask_x, mask_y, dest_x, dest_y, width, height))
- {
- pixman_box32_t *extents = pixman_region32_extents (&region);
-
- if (sources_cover (
- src, mask, extents,
- src_x, src_y, mask_x, mask_y, dest_x, dest_y))
- {
- walk_region_internal (imp, op,
- src, mask, dest,
- src_x, src_y, mask_x, mask_y,
- dest_x, dest_y,
- width, height,
- src_repeat, mask_repeat,
- &region,
- func);
-
- result = TRUE;
- }
-
- pixman_region32_fini (&region);
- }
- }
-
- return result;
-}
-
#define N_TMP_BOXES (16)
pixman_bool_t
@@ -835,3 +235,24 @@ pixman_region32_copy_from_region16 (pixman_region32_t *dst,
return retval;
}
+
+#ifdef DEBUG
+
+void
+_pixman_log_error (const char *function, const char *message)
+{
+ static int n_messages = 0;
+
+ if (n_messages < 10)
+ {
+ fprintf (stderr,
+ "*** BUG ***\n"
+ "In %s: %s\n"
+ "Set a breakpoint on '_pixman_log_error' to debug\n\n",
+ function, message);
+
+ n_messages++;
+ }
+}
+
+#endif
diff --git a/lib/pixman/pixman/pixman-vmx.c b/lib/pixman/pixman/pixman-vmx.c
index 06325a7c0..e811cf733 100644
--- a/lib/pixman/pixman/pixman-vmx.c
+++ b/lib/pixman/pixman/pixman-vmx.c
@@ -1607,11 +1607,16 @@ vmx_combine_add_ca (pixman_implementation_t *imp,
}
}
+static const pixman_fast_path_t vmx_fast_paths[] =
+{
+ { PIXMAN_OP_NONE },
+};
+
pixman_implementation_t *
_pixman_implementation_create_vmx (void)
{
pixman_implementation_t *fast = _pixman_implementation_create_fast_path ();
- pixman_implementation_t *imp = _pixman_implementation_create (fast);
+ pixman_implementation_t *imp = _pixman_implementation_create (fast, vmx_fast_paths);
/* Set up function pointers */
diff --git a/lib/pixman/pixman/pixman.c b/lib/pixman/pixman/pixman.c
index 0edd967cf..548242ba0 100644
--- a/lib/pixman/pixman/pixman.c
+++ b/lib/pixman/pixman/pixman.c
@@ -28,192 +28,800 @@
#endif
#include "pixman-private.h"
+#include <stdlib.h>
+
+static pixman_implementation_t *imp;
+
+typedef struct operator_info_t operator_info_t;
+
+struct operator_info_t
+{
+ uint8_t opaque_info[4];
+};
+
+#define PACK(neither, src, dest, both) \
+ {{ (uint8_t)PIXMAN_OP_ ## neither, \
+ (uint8_t)PIXMAN_OP_ ## src, \
+ (uint8_t)PIXMAN_OP_ ## dest, \
+ (uint8_t)PIXMAN_OP_ ## both }}
+
+static const operator_info_t operator_table[] =
+{
+ /* Neither Opaque Src Opaque Dst Opaque Both Opaque */
+ PACK (CLEAR, CLEAR, CLEAR, CLEAR),
+ PACK (SRC, SRC, SRC, SRC),
+ PACK (DST, DST, DST, DST),
+ PACK (OVER, SRC, OVER, SRC),
+ PACK (OVER_REVERSE, OVER_REVERSE, DST, DST),
+ PACK (IN, IN, SRC, SRC),
+ PACK (IN_REVERSE, DST, IN_REVERSE, DST),
+ PACK (OUT, OUT, CLEAR, CLEAR),
+ PACK (OUT_REVERSE, CLEAR, OUT_REVERSE, CLEAR),
+ PACK (ATOP, IN, OVER, SRC),
+ PACK (ATOP_REVERSE, OVER_REVERSE, IN_REVERSE, DST),
+ PACK (XOR, OUT, OUT_REVERSE, CLEAR),
+ PACK (ADD, ADD, ADD, ADD),
+ PACK (SATURATE, OVER_REVERSE, DST, DST),
+
+ {{ 0 /* 0x0e */ }},
+ {{ 0 /* 0x0f */ }},
+
+ PACK (CLEAR, CLEAR, CLEAR, CLEAR),
+ PACK (SRC, SRC, SRC, SRC),
+ PACK (DST, DST, DST, DST),
+ PACK (DISJOINT_OVER, DISJOINT_OVER, DISJOINT_OVER, DISJOINT_OVER),
+ PACK (DISJOINT_OVER_REVERSE, DISJOINT_OVER_REVERSE, DISJOINT_OVER_REVERSE, DISJOINT_OVER_REVERSE),
+ PACK (DISJOINT_IN, DISJOINT_IN, DISJOINT_IN, DISJOINT_IN),
+ PACK (DISJOINT_IN_REVERSE, DISJOINT_IN_REVERSE, DISJOINT_IN_REVERSE, DISJOINT_IN_REVERSE),
+ PACK (DISJOINT_OUT, DISJOINT_OUT, DISJOINT_OUT, DISJOINT_OUT),
+ PACK (DISJOINT_OUT_REVERSE, DISJOINT_OUT_REVERSE, DISJOINT_OUT_REVERSE, DISJOINT_OUT_REVERSE),
+ PACK (DISJOINT_ATOP, DISJOINT_ATOP, DISJOINT_ATOP, DISJOINT_ATOP),
+ PACK (DISJOINT_ATOP_REVERSE, DISJOINT_ATOP_REVERSE, DISJOINT_ATOP_REVERSE, DISJOINT_ATOP_REVERSE),
+ PACK (DISJOINT_XOR, DISJOINT_XOR, DISJOINT_XOR, DISJOINT_XOR),
+
+ {{ 0 /* 0x1c */ }},
+ {{ 0 /* 0x1d */ }},
+ {{ 0 /* 0x1e */ }},
+ {{ 0 /* 0x1f */ }},
+
+ PACK (CLEAR, CLEAR, CLEAR, CLEAR),
+ PACK (SRC, SRC, SRC, SRC),
+ PACK (DST, DST, DST, DST),
+ PACK (CONJOINT_OVER, CONJOINT_OVER, CONJOINT_OVER, CONJOINT_OVER),
+ PACK (CONJOINT_OVER_REVERSE, CONJOINT_OVER_REVERSE, CONJOINT_OVER_REVERSE, CONJOINT_OVER_REVERSE),
+ PACK (CONJOINT_IN, CONJOINT_IN, CONJOINT_IN, CONJOINT_IN),
+ PACK (CONJOINT_IN_REVERSE, CONJOINT_IN_REVERSE, CONJOINT_IN_REVERSE, CONJOINT_IN_REVERSE),
+ PACK (CONJOINT_OUT, CONJOINT_OUT, CONJOINT_OUT, CONJOINT_OUT),
+ PACK (CONJOINT_OUT_REVERSE, CONJOINT_OUT_REVERSE, CONJOINT_OUT_REVERSE, CONJOINT_OUT_REVERSE),
+ PACK (CONJOINT_ATOP, CONJOINT_ATOP, CONJOINT_ATOP, CONJOINT_ATOP),
+ PACK (CONJOINT_ATOP_REVERSE, CONJOINT_ATOP_REVERSE, CONJOINT_ATOP_REVERSE, CONJOINT_ATOP_REVERSE),
+ PACK (CONJOINT_XOR, CONJOINT_XOR, CONJOINT_XOR, CONJOINT_XOR),
+
+ {{ 0 /* 0x2c */ }},
+ {{ 0 /* 0x2d */ }},
+ {{ 0 /* 0x2e */ }},
+ {{ 0 /* 0x2f */ }},
+
+ PACK (MULTIPLY, MULTIPLY, MULTIPLY, MULTIPLY),
+ PACK (SCREEN, SCREEN, SCREEN, SCREEN),
+ PACK (OVERLAY, OVERLAY, OVERLAY, OVERLAY),
+ PACK (DARKEN, DARKEN, DARKEN, DARKEN),
+ PACK (LIGHTEN, LIGHTEN, LIGHTEN, LIGHTEN),
+ PACK (COLOR_DODGE, COLOR_DODGE, COLOR_DODGE, COLOR_DODGE),
+ PACK (COLOR_BURN, COLOR_BURN, COLOR_BURN, COLOR_BURN),
+ PACK (HARD_LIGHT, HARD_LIGHT, HARD_LIGHT, HARD_LIGHT),
+ PACK (SOFT_LIGHT, SOFT_LIGHT, SOFT_LIGHT, SOFT_LIGHT),
+ PACK (DIFFERENCE, DIFFERENCE, DIFFERENCE, DIFFERENCE),
+ PACK (EXCLUSION, EXCLUSION, EXCLUSION, EXCLUSION),
+ PACK (HSL_HUE, HSL_HUE, HSL_HUE, HSL_HUE),
+ PACK (HSL_SATURATION, HSL_SATURATION, HSL_SATURATION, HSL_SATURATION),
+ PACK (HSL_COLOR, HSL_COLOR, HSL_COLOR, HSL_COLOR),
+ PACK (HSL_LUMINOSITY, HSL_LUMINOSITY, HSL_LUMINOSITY, HSL_LUMINOSITY),
+};
+
/*
- * Operator optimizations based on source or destination opacity
+ * Optimize the current operator based on opacity of source or destination
+ * The output operator should be mathematically equivalent to the source.
*/
-typedef struct
+static pixman_op_t
+optimize_operator (pixman_op_t op,
+ uint32_t src_flags,
+ uint32_t mask_flags,
+ uint32_t dst_flags)
{
- pixman_op_t op;
- pixman_op_t op_src_dst_opaque;
- pixman_op_t op_src_opaque;
- pixman_op_t op_dst_opaque;
-} optimized_operator_info_t;
+ pixman_bool_t is_source_opaque, is_dest_opaque;
+ int opaqueness;
+
+ is_source_opaque = ((src_flags & mask_flags) & FAST_PATH_IS_OPAQUE) != 0;
+ is_dest_opaque = (dst_flags & FAST_PATH_IS_OPAQUE) != 0;
-static const optimized_operator_info_t optimized_operators[] =
+ opaqueness = ((is_dest_opaque << 1) | is_source_opaque);
+
+ return operator_table[op].opaque_info[opaqueness];
+}
+
+static void
+apply_workaround (pixman_image_t *image,
+ int32_t * x,
+ int32_t * y,
+ uint32_t ** save_bits,
+ int * save_dx,
+ int * save_dy)
{
- /* Input Operator SRC&DST Opaque SRC Opaque DST Opaque */
- { PIXMAN_OP_OVER, PIXMAN_OP_SRC, PIXMAN_OP_SRC, PIXMAN_OP_OVER },
- { PIXMAN_OP_OVER_REVERSE, PIXMAN_OP_DST, PIXMAN_OP_OVER_REVERSE, PIXMAN_OP_DST },
- { PIXMAN_OP_IN, PIXMAN_OP_SRC, PIXMAN_OP_IN, PIXMAN_OP_SRC },
- { PIXMAN_OP_IN_REVERSE, PIXMAN_OP_DST, PIXMAN_OP_DST, PIXMAN_OP_IN_REVERSE },
- { PIXMAN_OP_OUT, PIXMAN_OP_CLEAR, PIXMAN_OP_OUT, PIXMAN_OP_CLEAR },
- { PIXMAN_OP_OUT_REVERSE, PIXMAN_OP_CLEAR, PIXMAN_OP_CLEAR, PIXMAN_OP_OUT_REVERSE },
- { PIXMAN_OP_ATOP, PIXMAN_OP_SRC, PIXMAN_OP_IN, PIXMAN_OP_OVER },
- { PIXMAN_OP_ATOP_REVERSE, PIXMAN_OP_DST, PIXMAN_OP_OVER_REVERSE, PIXMAN_OP_IN_REVERSE },
- { PIXMAN_OP_XOR, PIXMAN_OP_CLEAR, PIXMAN_OP_OUT, PIXMAN_OP_OUT_REVERSE },
- { PIXMAN_OP_SATURATE, PIXMAN_OP_DST, PIXMAN_OP_OVER_REVERSE, PIXMAN_OP_DST },
- { PIXMAN_OP_NONE }
-};
+ if (image && (image->common.flags & FAST_PATH_NEEDS_WORKAROUND))
+ {
+ /* Some X servers generate images that point to the
+ * wrong place in memory, but then set the clip region
+ * to point to the right place. Because of an old bug
+ * in pixman, this would actually work.
+ *
+ * Here we try and undo the damage
+ */
+ int bpp = PIXMAN_FORMAT_BPP (image->bits.format) / 8;
+ pixman_box32_t *extents;
+ uint8_t *t;
+ int dx, dy;
+
+ extents = pixman_region32_extents (&(image->common.clip_region));
+ dx = extents->x1;
+ dy = extents->y1;
+
+ *save_bits = image->bits.bits;
+
+ *x -= dx;
+ *y -= dy;
+ pixman_region32_translate (&(image->common.clip_region), -dx, -dy);
+
+ t = (uint8_t *)image->bits.bits;
+ t += dy * image->bits.rowstride * 4 + dx * bpp;
+ image->bits.bits = (uint32_t *)t;
+
+ *save_dx = dx;
+ *save_dy = dy;
+ }
+}
-static pixman_implementation_t *imp;
+static void
+unapply_workaround (pixman_image_t *image, uint32_t *bits, int dx, int dy)
+{
+ if (image && (image->common.flags & FAST_PATH_NEEDS_WORKAROUND))
+ {
+ image->bits.bits = bits;
+ pixman_region32_translate (&image->common.clip_region, dx, dy);
+ }
+}
/*
- * Check if the current operator could be optimized
+ * Computing composite region
*/
-static const optimized_operator_info_t*
-pixman_operator_can_be_optimized (pixman_op_t op)
+static inline pixman_bool_t
+clip_general_image (pixman_region32_t * region,
+ pixman_region32_t * clip,
+ int dx,
+ int dy)
{
- const optimized_operator_info_t *info;
+ if (pixman_region32_n_rects (region) == 1 &&
+ pixman_region32_n_rects (clip) == 1)
+ {
+ pixman_box32_t * rbox = pixman_region32_rectangles (region, NULL);
+ pixman_box32_t * cbox = pixman_region32_rectangles (clip, NULL);
+ int v;
- for (info = optimized_operators; info->op != PIXMAN_OP_NONE; info++)
+ if (rbox->x1 < (v = cbox->x1 + dx))
+ rbox->x1 = v;
+ if (rbox->x2 > (v = cbox->x2 + dx))
+ rbox->x2 = v;
+ if (rbox->y1 < (v = cbox->y1 + dy))
+ rbox->y1 = v;
+ if (rbox->y2 > (v = cbox->y2 + dy))
+ rbox->y2 = v;
+ if (rbox->x1 >= rbox->x2 || rbox->y1 >= rbox->y2)
+ {
+ pixman_region32_init (region);
+ return FALSE;
+ }
+ }
+ else if (!pixman_region32_not_empty (clip))
+ {
+ return FALSE;
+ }
+ else
{
- if (info->op == op)
- return info;
+ if (dx || dy)
+ pixman_region32_translate (region, -dx, -dy);
+
+ if (!pixman_region32_intersect (region, region, clip))
+ return FALSE;
+
+ if (dx || dy)
+ pixman_region32_translate (region, dx, dy);
}
- return NULL;
+
+ return pixman_region32_not_empty (region);
+}
+
+static inline pixman_bool_t
+clip_source_image (pixman_region32_t * region,
+ pixman_image_t * image,
+ int dx,
+ int dy)
+{
+ /* Source clips are ignored, unless they are explicitly turned on
+ * and the clip in question was set by an X client. (Because if
+ * the clip was not set by a client, then it is a hierarchy
+ * clip and those should always be ignored for sources).
+ */
+ if (!image->common.clip_sources || !image->common.client_clip)
+ return TRUE;
+
+ return clip_general_image (region,
+ &image->common.clip_region,
+ dx, dy);
}
/*
- * Optimize the current operator based on opacity of source or destination
- * The output operator should be mathematically equivalent to the source.
+ * returns FALSE if the final region is empty. Indistinguishable from
+ * an allocation failure, but rendering ignores those anyways.
*/
-static pixman_op_t
-pixman_optimize_operator (pixman_op_t op,
- pixman_image_t *src_image,
- pixman_image_t *mask_image,
- pixman_image_t *dst_image)
+static pixman_bool_t
+pixman_compute_composite_region32 (pixman_region32_t * region,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
{
- pixman_bool_t is_source_opaque;
- pixman_bool_t is_dest_opaque;
- const optimized_operator_info_t *info = pixman_operator_can_be_optimized (op);
+ region->extents.x1 = dest_x;
+ region->extents.x2 = dest_x + width;
+ region->extents.y1 = dest_y;
+ region->extents.y2 = dest_y + height;
+
+ region->extents.x1 = MAX (region->extents.x1, 0);
+ region->extents.y1 = MAX (region->extents.y1, 0);
+ region->extents.x2 = MIN (region->extents.x2, dst_image->bits.width);
+ region->extents.y2 = MIN (region->extents.y2, dst_image->bits.height);
- if (!info || mask_image)
- return op;
+ region->data = 0;
- is_source_opaque = _pixman_image_is_opaque (src_image);
- is_dest_opaque = _pixman_image_is_opaque (dst_image);
+ /* Check for empty operation */
+ if (region->extents.x1 >= region->extents.x2 ||
+ region->extents.y1 >= region->extents.y2)
+ {
+ pixman_region32_init (region);
+ return FALSE;
+ }
- if (is_source_opaque == FALSE && is_dest_opaque == FALSE)
- return op;
+ if (dst_image->common.have_clip_region)
+ {
+ if (!clip_general_image (region, &dst_image->common.clip_region, 0, 0))
+ {
+ pixman_region32_fini (region);
+ return FALSE;
+ }
+ }
- if (is_source_opaque && is_dest_opaque)
- return info->op_src_dst_opaque;
- else if (is_source_opaque)
- return info->op_src_opaque;
- else if (is_dest_opaque)
- return info->op_dst_opaque;
+ if (dst_image->common.alpha_map && dst_image->common.alpha_map->common.have_clip_region)
+ {
+ if (!clip_general_image (region, &dst_image->common.alpha_map->common.clip_region,
+ -dst_image->common.alpha_origin_x,
+ -dst_image->common.alpha_origin_y))
+ {
+ pixman_region32_fini (region);
+ return FALSE;
+ }
+ }
- return op;
+ /* clip against src */
+ if (src_image->common.have_clip_region)
+ {
+ if (!clip_source_image (region, src_image, dest_x - src_x, dest_y - src_y))
+ {
+ pixman_region32_fini (region);
+ return FALSE;
+ }
+ }
+ if (src_image->common.alpha_map && src_image->common.alpha_map->common.have_clip_region)
+ {
+ if (!clip_source_image (region, (pixman_image_t *)src_image->common.alpha_map,
+ dest_x - (src_x - src_image->common.alpha_origin_x),
+ dest_y - (src_y - src_image->common.alpha_origin_y)))
+ {
+ pixman_region32_fini (region);
+ return FALSE;
+ }
+ }
+ /* clip against mask */
+ if (mask_image && mask_image->common.have_clip_region)
+ {
+ if (!clip_source_image (region, mask_image, dest_x - mask_x, dest_y - mask_y))
+ {
+ pixman_region32_fini (region);
+ return FALSE;
+ }
+ if (mask_image->common.alpha_map && mask_image->common.alpha_map->common.have_clip_region)
+ {
+ if (!clip_source_image (region, (pixman_image_t *)mask_image->common.alpha_map,
+ dest_x - (mask_x - mask_image->common.alpha_origin_x),
+ dest_y - (mask_y - mask_image->common.alpha_origin_y)))
+ {
+ pixman_region32_fini (region);
+ return FALSE;
+ }
+ }
+ }
+ return TRUE;
}
static void
-apply_workaround (pixman_image_t *image,
- int16_t * x,
- int16_t * y,
- uint32_t ** save_bits,
- int * save_dx,
- int * save_dy)
+walk_region_internal (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height,
+ pixman_bool_t src_repeat,
+ pixman_bool_t mask_repeat,
+ pixman_region32_t * region,
+ pixman_composite_func_t composite_rect)
{
- /* Some X servers generate images that point to the
- * wrong place in memory, but then set the clip region
- * to point to the right place. Because of an old bug
- * in pixman, this would actually work.
- *
- * Here we try and undo the damage
- */
- int bpp = PIXMAN_FORMAT_BPP (image->bits.format) / 8;
- pixman_box32_t *extents;
- uint8_t *t;
- int dx, dy;
+ int w, h, w_this, h_this;
+ int x_msk, y_msk, x_src, y_src, x_dst, y_dst;
+ int src_dy = src_y - dest_y;
+ int src_dx = src_x - dest_x;
+ int mask_dy = mask_y - dest_y;
+ int mask_dx = mask_x - dest_x;
+ const pixman_box32_t *pbox;
+ int n;
+
+ pbox = pixman_region32_rectangles (region, &n);
+
+ /* Fast path for non-repeating sources */
+ if (!src_repeat && !mask_repeat)
+ {
+ while (n--)
+ {
+ (*composite_rect) (imp, op,
+ src_image, mask_image, dst_image,
+ pbox->x1 + src_dx,
+ pbox->y1 + src_dy,
+ pbox->x1 + mask_dx,
+ pbox->y1 + mask_dy,
+ pbox->x1,
+ pbox->y1,
+ pbox->x2 - pbox->x1,
+ pbox->y2 - pbox->y1);
+
+ pbox++;
+ }
+
+ return;
+ }
+
+ while (n--)
+ {
+ h = pbox->y2 - pbox->y1;
+ y_src = pbox->y1 + src_dy;
+ y_msk = pbox->y1 + mask_dy;
+ y_dst = pbox->y1;
- extents = pixman_region32_extents (&(image->common.clip_region));
- dx = extents->x1;
- dy = extents->y1;
+ while (h)
+ {
+ h_this = h;
+ w = pbox->x2 - pbox->x1;
+ x_src = pbox->x1 + src_dx;
+ x_msk = pbox->x1 + mask_dx;
+ x_dst = pbox->x1;
- *save_bits = image->bits.bits;
+ if (mask_repeat)
+ {
+ y_msk = MOD (y_msk, mask_image->bits.height);
+ if (h_this > mask_image->bits.height - y_msk)
+ h_this = mask_image->bits.height - y_msk;
+ }
- *x -= dx;
- *y -= dy;
- pixman_region32_translate (&(image->common.clip_region), -dx, -dy);
+ if (src_repeat)
+ {
+ y_src = MOD (y_src, src_image->bits.height);
+ if (h_this > src_image->bits.height - y_src)
+ h_this = src_image->bits.height - y_src;
+ }
- t = (uint8_t *)image->bits.bits;
- t += dy * image->bits.rowstride * 4 + dx * bpp;
- image->bits.bits = (uint32_t *)t;
+ while (w)
+ {
+ w_this = w;
- *save_dx = dx;
- *save_dy = dy;
+ if (mask_repeat)
+ {
+ x_msk = MOD (x_msk, mask_image->bits.width);
+ if (w_this > mask_image->bits.width - x_msk)
+ w_this = mask_image->bits.width - x_msk;
+ }
+
+ if (src_repeat)
+ {
+ x_src = MOD (x_src, src_image->bits.width);
+ if (w_this > src_image->bits.width - x_src)
+ w_this = src_image->bits.width - x_src;
+ }
+
+ (*composite_rect) (imp, op,
+ src_image, mask_image, dst_image,
+ x_src, y_src, x_msk, y_msk, x_dst, y_dst,
+ w_this, h_this);
+ w -= w_this;
+
+ x_src += w_this;
+ x_msk += w_this;
+ x_dst += w_this;
+ }
+
+ h -= h_this;
+ y_src += h_this;
+ y_msk += h_this;
+ y_dst += h_this;
+ }
+
+ pbox++;
+ }
}
-static void
-unapply_workaround (pixman_image_t *image, uint32_t *bits, int dx, int dy)
+#define IS_16BIT(x) (((x) >= INT16_MIN) && ((x) <= INT16_MAX))
+
+static force_inline uint32_t
+compute_src_extents_flags (pixman_image_t *image,
+ pixman_box32_t *extents,
+ int x,
+ int y)
{
- image->bits.bits = bits;
- pixman_region32_translate (&image->common.clip_region, dx, dy);
+ pixman_box16_t extents16;
+ uint32_t flags;
+
+ flags = FAST_PATH_COVERS_CLIP;
+
+ if (image->common.type != BITS)
+ return flags;
+
+ if (image->common.repeat == PIXMAN_REPEAT_NONE &&
+ (x > extents->x1 || y > extents->y1 ||
+ x + image->bits.width < extents->x2 ||
+ y + image->bits.height < extents->y2))
+ {
+ flags &= ~FAST_PATH_COVERS_CLIP;
+ }
+
+ if (IS_16BIT (extents->x1 - x) &&
+ IS_16BIT (extents->y1 - y) &&
+ IS_16BIT (extents->x2 - x) &&
+ IS_16BIT (extents->y2 - y))
+ {
+ extents16.x1 = extents->x1 - x;
+ extents16.y1 = extents->y1 - y;
+ extents16.x2 = extents->x2 - x;
+ extents16.y2 = extents->y2 - y;
+
+ if (!image->common.transform ||
+ pixman_transform_bounds (image->common.transform, &extents16))
+ {
+ if (extents16.x1 >= 0 && extents16.y1 >= 0 &&
+ extents16.x2 <= image->bits.width &&
+ extents16.y2 <= image->bits.height)
+ {
+ flags |= FAST_PATH_SAMPLES_COVER_CLIP;
+ }
+ }
+ }
+
+ if (IS_16BIT (extents->x1 - x - 1) &&
+ IS_16BIT (extents->y1 - y - 1) &&
+ IS_16BIT (extents->x2 - x + 1) &&
+ IS_16BIT (extents->y2 - y + 1))
+ {
+ extents16.x1 = extents->x1 - x - 1;
+ extents16.y1 = extents->y1 - y - 1;
+ extents16.x2 = extents->x2 - x + 1;
+ extents16.y2 = extents->y2 - y + 1;
+
+ if (/* src space expanded by one in dest space fits in 16 bit */
+ (!image->common.transform ||
+ pixman_transform_bounds (image->common.transform, &extents16)) &&
+ /* And src image size can be used as 16.16 fixed point */
+ image->bits.width < 0x7fff &&
+ image->bits.height < 0x7fff)
+ {
+ /* Then we're "16bit safe" */
+ flags |= FAST_PATH_16BIT_SAFE;
+ }
+ }
+
+ return flags;
}
-PIXMAN_EXPORT void
-pixman_image_composite (pixman_op_t op,
- pixman_image_t * src,
- pixman_image_t * mask,
- pixman_image_t * dest,
- int16_t src_x,
- int16_t src_y,
- int16_t mask_x,
- int16_t mask_y,
- int16_t dest_x,
- int16_t dest_y,
- uint16_t width,
- uint16_t height)
+#define N_CACHED_FAST_PATHS 8
+
+typedef struct
{
+ pixman_fast_path_t cache [N_CACHED_FAST_PATHS];
+} cache_t;
+
+PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache);
+
+static void
+do_composite (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t *src,
+ pixman_image_t *mask,
+ pixman_image_t *dest,
+ int src_x,
+ int src_y,
+ int mask_x,
+ int mask_y,
+ int dest_x,
+ int dest_y,
+ int width,
+ int height)
+{
+ pixman_format_code_t src_format, mask_format, dest_format;
+ uint32_t src_flags, mask_flags, dest_flags;
+ pixman_region32_t region;
+ pixman_box32_t *extents;
uint32_t *src_bits;
int src_dx, src_dy;
uint32_t *mask_bits;
int mask_dx, mask_dy;
uint32_t *dest_bits;
int dest_dx, dest_dy;
+ pixman_bool_t need_workaround;
+ const pixman_fast_path_t *info;
+ cache_t *cache;
+ int i;
+
+ src_format = src->common.extended_format_code;
+ src_flags = src->common.flags;
- _pixman_image_validate (src);
if (mask)
- _pixman_image_validate (mask);
- _pixman_image_validate (dest);
+ {
+ mask_format = mask->common.extended_format_code;
+ mask_flags = mask->common.flags;
+ }
+ else
+ {
+ mask_format = PIXMAN_null;
+ mask_flags = FAST_PATH_IS_OPAQUE;
+ }
+
+ dest_format = dest->common.extended_format_code;
+ dest_flags = dest->common.flags;
+
+ /* Check for pixbufs */
+ if ((mask_format == PIXMAN_a8r8g8b8 || mask_format == PIXMAN_a8b8g8r8) &&
+ (src->type == BITS && src->bits.bits == mask->bits.bits) &&
+ (src->common.repeat == mask->common.repeat) &&
+ (src_x == mask_x && src_y == mask_y))
+ {
+ if (src_format == PIXMAN_x8b8g8r8)
+ src_format = mask_format = PIXMAN_pixbuf;
+ else if (src_format == PIXMAN_x8r8g8b8)
+ src_format = mask_format = PIXMAN_rpixbuf;
+ }
+
+ /* Check for workaround */
+ need_workaround = (src_flags | mask_flags | dest_flags) & FAST_PATH_NEEDS_WORKAROUND;
+
+ if (need_workaround)
+ {
+ apply_workaround (src, &src_x, &src_y, &src_bits, &src_dx, &src_dy);
+ apply_workaround (mask, &mask_x, &mask_y, &mask_bits, &mask_dx, &mask_dy);
+ apply_workaround (dest, &dest_x, &dest_y, &dest_bits, &dest_dx, &dest_dy);
+ }
+
+ pixman_region32_init (&region);
+
+ if (!pixman_compute_composite_region32 (
+ &region, src, mask, dest,
+ src_x, src_y, mask_x, mask_y, dest_x, dest_y, width, height))
+ {
+ goto out;
+ }
+ extents = pixman_region32_extents (&region);
+
+ src_flags |= compute_src_extents_flags (src, extents, dest_x - src_x, dest_y - src_y);
+
+ if (mask)
+ mask_flags |= compute_src_extents_flags (mask, extents, dest_x - mask_x, dest_y - mask_y);
+
/*
* Check if we can replace our operator by a simpler one
* if the src or dest are opaque. The output operator should be
* mathematically equivalent to the source.
*/
- op = pixman_optimize_operator(op, src, mask, dest);
- if (op == PIXMAN_OP_DST ||
- op == PIXMAN_OP_CONJOINT_DST ||
- op == PIXMAN_OP_DISJOINT_DST)
+ op = optimize_operator (op, src_flags, mask_flags, dest_flags);
+ if (op == PIXMAN_OP_DST)
+ goto out;
+
+ /* Check cache for fast paths */
+ cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache);
+
+ for (i = 0; i < N_CACHED_FAST_PATHS; ++i)
{
- return;
+ info = &(cache->cache[i]);
+
+ /* Note that we check for equality here, not whether
+ * the cached fast path matches. This is to prevent
+ * us from selecting an overly general fast path
+ * when a more specific one would work.
+ */
+ if (info->op == op &&
+ info->src_format == src_format &&
+ info->mask_format == mask_format &&
+ info->dest_format == dest_format &&
+ info->src_flags == src_flags &&
+ info->mask_flags == mask_flags &&
+ info->dest_flags == dest_flags &&
+ info->func)
+ {
+ goto found;
+ }
}
- if (!imp)
- imp = _pixman_choose_implementation ();
+ while (imp)
+ {
+ info = imp->fast_paths;
- if (src->common.need_workaround)
- apply_workaround (src, &src_x, &src_y, &src_bits, &src_dx, &src_dy);
- if (mask && mask->common.need_workaround)
- apply_workaround (mask, &mask_x, &mask_y, &mask_bits, &mask_dx, &mask_dy);
- if (dest->common.need_workaround)
- apply_workaround (dest, &dest_x, &dest_y, &dest_bits, &dest_dx, &dest_dy);
+ while (info->op != PIXMAN_OP_NONE)
+ {
+ if ((info->op == op || info->op == PIXMAN_OP_any) &&
+ /* Formats */
+ ((info->src_format == src_format) ||
+ (info->src_format == PIXMAN_any)) &&
+ ((info->mask_format == mask_format) ||
+ (info->mask_format == PIXMAN_any)) &&
+ ((info->dest_format == dest_format) ||
+ (info->dest_format == PIXMAN_any)) &&
+ /* Flags */
+ (info->src_flags & src_flags) == info->src_flags &&
+ (info->mask_flags & mask_flags) == info->mask_flags &&
+ (info->dest_flags & dest_flags) == info->dest_flags)
+ {
+ /* Set i to the last spot in the cache so that the
+ * move-to-front code below will work
+ */
+ i = N_CACHED_FAST_PATHS - 1;
+
+ goto found;
+ }
+
+ ++info;
+ }
+
+ imp = imp->delegate;
+ }
+
+ /* We didn't find a compositing routine. This should not happen, but if
+ * it somehow does, just exit rather than crash.
+ */
+ goto out;
- _pixman_implementation_composite (imp, op,
- src, mask, dest,
- src_x, src_y,
- mask_x, mask_y,
- dest_x, dest_y,
- width, height);
+found:
+ walk_region_internal (imp, op,
+ src, mask, dest,
+ src_x, src_y, mask_x, mask_y,
+ dest_x, dest_y,
+ width, height,
+ (src_flags & FAST_PATH_SIMPLE_REPEAT),
+ (mask_flags & FAST_PATH_SIMPLE_REPEAT),
+ &region, info->func);
+
+ if (i)
+ {
+ /* Make a copy of info->func, because info->func may change when
+ * we update the cache.
+ */
+ pixman_composite_func_t func = info->func;
+
+ while (i--)
+ cache->cache[i + 1] = cache->cache[i];
- if (src->common.need_workaround)
+ cache->cache[0].op = op;
+ cache->cache[0].src_format = src_format;
+ cache->cache[0].src_flags = src_flags;
+ cache->cache[0].mask_format = mask_format;
+ cache->cache[0].mask_flags = mask_flags;
+ cache->cache[0].dest_format = dest_format;
+ cache->cache[0].dest_flags = dest_flags;
+ cache->cache[0].func = func;
+ }
+
+out:
+ if (need_workaround)
+ {
unapply_workaround (src, src_bits, src_dx, src_dy);
- if (mask && mask->common.need_workaround)
unapply_workaround (mask, mask_bits, mask_dx, mask_dy);
- if (dest->common.need_workaround)
unapply_workaround (dest, dest_bits, dest_dx, dest_dy);
+ }
+
+ pixman_region32_fini (&region);
+}
+
+PIXMAN_EXPORT void
+pixman_image_composite (pixman_op_t op,
+ pixman_image_t * src,
+ pixman_image_t * mask,
+ pixman_image_t * dest,
+ int16_t src_x,
+ int16_t src_y,
+ int16_t mask_x,
+ int16_t mask_y,
+ int16_t dest_x,
+ int16_t dest_y,
+ uint16_t width,
+ uint16_t height)
+{
+ pixman_image_composite32 (op, src, mask, dest, src_x, src_y,
+ mask_x, mask_y, dest_x, dest_y, width, height);
+}
+
+/*
+ * Work around GCC bug causing crashes in Mozilla with SSE2
+ *
+ * When using -msse, gcc generates movdqa instructions assuming that
+ * the stack is 16 byte aligned. Unfortunately some applications, such
+ * as Mozilla and Mono, end up aligning the stack to 4 bytes, which
+ * causes the movdqa instructions to fail.
+ *
+ * The __force_align_arg_pointer__ makes gcc generate a prologue that
+ * realigns the stack pointer to 16 bytes.
+ *
+ * On x86-64 this is not necessary because the standard ABI already
+ * calls for a 16 byte aligned stack.
+ *
+ * See https://bugs.freedesktop.org/show_bug.cgi?id=15693
+ */
+#if defined (USE_SSE2) && defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
+PIXMAN_EXPORT void
+pixman_image_composite32 (pixman_op_t op,
+ pixman_image_t * src,
+ pixman_image_t * mask,
+ pixman_image_t * dest,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ _pixman_image_validate (src);
+ if (mask)
+ _pixman_image_validate (mask);
+ _pixman_image_validate (dest);
+
+ if (!imp)
+ imp = _pixman_choose_implementation ();
+
+ do_composite (imp, op,
+ src, mask, dest,
+ src_x, src_y,
+ mask_x, mask_y,
+ dest_x, dest_y,
+ width, height);
}
PIXMAN_EXPORT pixman_bool_t
@@ -323,6 +931,45 @@ pixman_image_fill_rectangles (pixman_op_t op,
int n_rects,
const pixman_rectangle16_t *rects)
{
+ pixman_box32_t stack_boxes[6];
+ pixman_box32_t *boxes;
+ pixman_bool_t result;
+ int i;
+
+ if (n_rects > 6)
+ {
+ boxes = pixman_malloc_ab (sizeof (pixman_box32_t), n_rects);
+ if (boxes == NULL)
+ return FALSE;
+ }
+ else
+ {
+ boxes = stack_boxes;
+ }
+
+ for (i = 0; i < n_rects; ++i)
+ {
+ boxes[i].x1 = rects[i].x;
+ boxes[i].y1 = rects[i].y;
+ boxes[i].x2 = boxes[i].x1 + rects[i].width;
+ boxes[i].y2 = boxes[i].y1 + rects[i].height;
+ }
+
+ result = pixman_image_fill_boxes (op, dest, color, n_rects, boxes);
+
+ if (boxes != stack_boxes)
+ free (boxes);
+
+ return result;
+}
+
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_fill_boxes (pixman_op_t op,
+ pixman_image_t * dest,
+ pixman_color_t * color,
+ int n_boxes,
+ const pixman_box32_t *boxes)
+{
pixman_image_t *solid;
pixman_color_t c;
int i;
@@ -331,71 +978,69 @@ pixman_image_fill_rectangles (pixman_op_t op,
if (color->alpha == 0xffff)
{
- if (op == PIXMAN_OP_OVER)
- op = PIXMAN_OP_SRC;
+ if (op == PIXMAN_OP_OVER)
+ op = PIXMAN_OP_SRC;
}
if (op == PIXMAN_OP_CLEAR)
{
- c.red = 0;
- c.green = 0;
- c.blue = 0;
- c.alpha = 0;
+ c.red = 0;
+ c.green = 0;
+ c.blue = 0;
+ c.alpha = 0;
- color = &c;
+ color = &c;
- op = PIXMAN_OP_SRC;
+ op = PIXMAN_OP_SRC;
}
if (op == PIXMAN_OP_SRC)
{
- uint32_t pixel;
+ uint32_t pixel;
- if (color_to_pixel (color, &pixel, dest->bits.format))
- {
- for (i = 0; i < n_rects; ++i)
- {
- pixman_region32_t fill_region;
- int n_boxes, j;
- pixman_box32_t *boxes;
+ if (color_to_pixel (color, &pixel, dest->bits.format))
+ {
+ pixman_region32_t fill_region;
+ int n_rects, j;
+ pixman_box32_t *rects;
- pixman_region32_init_rect (&fill_region, rects[i].x, rects[i].y, rects[i].width, rects[i].height);
+ if (!pixman_region32_init_rects (&fill_region, boxes, n_boxes))
+ return FALSE;
- if (dest->common.have_clip_region)
- {
- if (!pixman_region32_intersect (&fill_region,
- &fill_region,
- &dest->common.clip_region))
- return FALSE;
- }
+ if (dest->common.have_clip_region)
+ {
+ if (!pixman_region32_intersect (&fill_region,
+ &fill_region,
+ &dest->common.clip_region))
+ return FALSE;
+ }
- boxes = pixman_region32_rectangles (&fill_region, &n_boxes);
- for (j = 0; j < n_boxes; ++j)
- {
- const pixman_box32_t *box = &(boxes[j]);
- pixman_fill (dest->bits.bits, dest->bits.rowstride, PIXMAN_FORMAT_BPP (dest->bits.format),
- box->x1, box->y1, box->x2 - box->x1, box->y2 - box->y1,
- pixel);
- }
+ rects = pixman_region32_rectangles (&fill_region, &n_rects);
+ for (j = 0; j < n_rects; ++j)
+ {
+ const pixman_box32_t *rect = &(rects[j]);
+ pixman_fill (dest->bits.bits, dest->bits.rowstride, PIXMAN_FORMAT_BPP (dest->bits.format),
+ rect->x1, rect->y1, rect->x2 - rect->x1, rect->y2 - rect->y1,
+ pixel);
+ }
- pixman_region32_fini (&fill_region);
- }
- return TRUE;
- }
+ pixman_region32_fini (&fill_region);
+ return TRUE;
+ }
}
solid = pixman_image_create_solid_fill (color);
if (!solid)
- return FALSE;
+ return FALSE;
- for (i = 0; i < n_rects; ++i)
+ for (i = 0; i < n_boxes; ++i)
{
- const pixman_rectangle16_t *rect = &(rects[i]);
+ const pixman_box32_t *box = &(boxes[i]);
- pixman_image_composite (op, solid, NULL, dest,
- 0, 0, 0, 0,
- rect->x, rect->y,
- rect->width, rect->height);
+ pixman_image_composite32 (op, solid, NULL, dest,
+ 0, 0, 0, 0,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1);
}
pixman_image_unref (solid);
@@ -541,3 +1186,36 @@ pixman_format_supported_destination (pixman_format_code_t format)
return pixman_format_supported_source (format);
}
+PIXMAN_EXPORT pixman_bool_t
+pixman_compute_composite_region (pixman_region16_t * region,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int16_t src_x,
+ int16_t src_y,
+ int16_t mask_x,
+ int16_t mask_y,
+ int16_t dest_x,
+ int16_t dest_y,
+ uint16_t width,
+ uint16_t height)
+{
+ pixman_region32_t r32;
+ pixman_bool_t retval;
+
+ pixman_region32_init (&r32);
+
+ retval = pixman_compute_composite_region32 (
+ &r32, src_image, mask_image, dst_image,
+ src_x, src_y, mask_x, mask_y, dest_x, dest_y,
+ width, height);
+
+ if (retval)
+ {
+ if (!pixman_region16_copy_from_region32 (region, &r32))
+ retval = FALSE;
+ }
+
+ pixman_region32_fini (&r32);
+ return retval;
+}
diff --git a/lib/pixman/pixman/pixman.h b/lib/pixman/pixman/pixman.h
index 5b90a0c8d..964d04ab9 100644
--- a/lib/pixman/pixman/pixman.h
+++ b/lib/pixman/pixman/pixman.h
@@ -71,12 +71,26 @@ SOFTWARE.
#include <pixman-version.h>
+#ifdef __cplusplus
+#define PIXMAN_BEGIN_DECLS extern "C" {
+#define PIXMAN_END_DECLS }
+#else
+#define PIXMAN_BEGIN_DECLS
+#define PIXMAN_END_DECLS
+#endif
+
+PIXMAN_BEGIN_DECLS
+
/*
* Standard integers
*/
-#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun) || defined (__digital__)
+
+#if !defined (PIXMAN_DONT_DEFINE_STDINT)
+
+#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun) || defined (__digital__) || defined (__HP_cc)
# include <inttypes.h>
-#elif defined (_MSC_VER)
+/* VS 2010 (_MSC_VER 1600) has stdint.h */
+#elif defined (_MSC_VER) && _MSC_VER < 1600
typedef __int8 int8_t;
typedef unsigned __int8 uint8_t;
typedef __int16 int16_t;
@@ -91,6 +105,8 @@ typedef unsigned __int64 uint64_t;
# include <stdint.h>
#endif
+#endif
+
/*
* Boolean
*/
@@ -109,6 +125,7 @@ typedef pixman_fixed_16_16_t pixman_fixed_t;
#define pixman_fixed_e ((pixman_fixed_t) 1)
#define pixman_fixed_1 (pixman_int_to_fixed(1))
#define pixman_fixed_1_minus_e (pixman_fixed_1 - pixman_fixed_e)
+#define pixman_fixed_minus_1 (pixman_int_to_fixed(-1))
#define pixman_fixed_to_int(f) ((int) ((f) >> 16))
#define pixman_int_to_fixed(i) ((pixman_fixed_t) ((i) << 16))
#define pixman_fixed_to_double(f) (double) ((f) / (double) pixman_fixed_1)
@@ -165,6 +182,7 @@ struct pixman_transform
/* forward declaration (sorry) */
struct pixman_box16;
+typedef union pixman_image pixman_image_t;
void pixman_transform_init_identity (struct pixman_transform *matrix);
pixman_bool_t pixman_transform_point_3d (const struct pixman_transform *transform,
@@ -331,10 +349,13 @@ typedef enum
PIXMAN_OP_HSL_HUE = 0x3b,
PIXMAN_OP_HSL_SATURATION = 0x3c,
PIXMAN_OP_HSL_COLOR = 0x3d,
- PIXMAN_OP_HSL_LUMINOSITY = 0x3e,
+ PIXMAN_OP_HSL_LUMINOSITY = 0x3e
- PIXMAN_OP_NONE,
- PIXMAN_OP_LAST = PIXMAN_OP_NONE
+#ifdef PIXMAN_USE_INTERNAL_API
+ ,
+ PIXMAN_N_OPERATORS,
+ PIXMAN_OP_NONE = PIXMAN_N_OPERATORS
+#endif
} pixman_op_t;
/*
@@ -390,10 +411,12 @@ void pixman_region_init_rect (pixman_region16_t *reg
unsigned int width,
unsigned int height);
pixman_bool_t pixman_region_init_rects (pixman_region16_t *region,
- pixman_box16_t *boxes,
+ const pixman_box16_t *boxes,
int count);
void pixman_region_init_with_extents (pixman_region16_t *region,
pixman_box16_t *extents);
+void pixman_region_init_from_image (pixman_region16_t *region,
+ pixman_image_t *image);
void pixman_region_fini (pixman_region16_t *region);
@@ -426,7 +449,7 @@ pixman_bool_t pixman_region_contains_point (pixman_region16_t *reg
int x,
int y,
pixman_box16_t *box);
-pixman_region_overlap_t pixman_region_contains_rectangle (pixman_region16_t *pixman_region16_t,
+pixman_region_overlap_t pixman_region_contains_rectangle (pixman_region16_t *region,
pixman_box16_t *prect);
pixman_bool_t pixman_region_not_empty (pixman_region16_t *region);
pixman_box16_t * pixman_region_extents (pixman_region16_t *region);
@@ -477,10 +500,12 @@ void pixman_region32_init_rect (pixman_region32_t *r
unsigned int width,
unsigned int height);
pixman_bool_t pixman_region32_init_rects (pixman_region32_t *region,
- pixman_box32_t *boxes,
+ const pixman_box32_t *boxes,
int count);
void pixman_region32_init_with_extents (pixman_region32_t *region,
pixman_box32_t *extents);
+void pixman_region32_init_from_image (pixman_region32_t *region,
+ pixman_image_t *image);
void pixman_region32_fini (pixman_region32_t *region);
@@ -554,7 +579,6 @@ const char* pixman_version_string (void);
/*
* Images
*/
-typedef union pixman_image pixman_image_t;
typedef struct pixman_indexed pixman_indexed_t;
typedef struct pixman_gradient_stop pixman_gradient_stop_t;
@@ -637,11 +661,11 @@ typedef enum {
/* 24bpp formats */
PIXMAN_r8g8b8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8),
PIXMAN_b8g8r8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8),
-
+
/* 16bpp formats */
PIXMAN_r5g6b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,6,5),
PIXMAN_b5g6r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,6,5),
-
+
PIXMAN_a1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,1,5,5,5),
PIXMAN_x1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,5,5),
PIXMAN_a1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,1,5,5,5),
@@ -650,35 +674,35 @@ typedef enum {
PIXMAN_x4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,4,4,4),
PIXMAN_a4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,4,4,4,4),
PIXMAN_x4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,4,4,4),
-
+
/* 8bpp formats */
PIXMAN_a8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0),
PIXMAN_r3g3b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,0,3,3,2),
PIXMAN_b2g3r3 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,0,3,3,2),
PIXMAN_a2r2g2b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,2,2,2,2),
PIXMAN_a2b2g2r2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,2,2,2,2),
-
+
PIXMAN_c8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
PIXMAN_g8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
-
+
PIXMAN_x4a4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,4,0,0,0),
-
+
PIXMAN_x4c4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
PIXMAN_x4g4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
-
+
/* 4bpp formats */
PIXMAN_a4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_A,4,0,0,0),
PIXMAN_r1g2b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,0,1,2,1),
PIXMAN_b1g2r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,0,1,2,1),
PIXMAN_a1r1g1b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,1,1,1,1),
PIXMAN_a1b1g1r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,1,1,1,1),
-
+
PIXMAN_c4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_COLOR,0,0,0,0),
PIXMAN_g4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_GRAY,0,0,0,0),
-
+
/* 1bpp formats */
PIXMAN_a1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0),
-
+
PIXMAN_g1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0),
/* YUV formats */
@@ -719,6 +743,7 @@ pixman_bool_t pixman_image_unref (pixman_image_t
void pixman_image_set_destroy_function (pixman_image_t *image,
pixman_image_destroy_func_t function,
void *data);
+void * pixman_image_get_destroy_data (pixman_image_t *image);
/* Set properties */
pixman_bool_t pixman_image_set_clip_region (pixman_image_t *image,
@@ -758,6 +783,11 @@ pixman_bool_t pixman_image_fill_rectangles (pixman_op_t op,
pixman_color_t *color,
int n_rects,
const pixman_rectangle16_t *rects);
+pixman_bool_t pixman_image_fill_boxes (pixman_op_t op,
+ pixman_image_t *dest,
+ pixman_color_t *color,
+ int n_boxes,
+ const pixman_box32_t *boxes);
/* Composite */
pixman_bool_t pixman_compute_composite_region (pixman_region16_t *region,
@@ -784,6 +814,18 @@ void pixman_image_composite (pixman_op_t op,
int16_t dest_y,
uint16_t width,
uint16_t height);
+void pixman_image_composite32 (pixman_op_t op,
+ pixman_image_t *src,
+ pixman_image_t *mask,
+ pixman_image_t *dest,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height);
/* Old X servers rely on out-of-bounds accesses when they are asked
* to composite with a window as the source. They create a pixman image
@@ -889,4 +931,6 @@ void pixman_rasterize_trapezoid (pixman_image_t *image,
int x_off,
int y_off);
+PIXMAN_END_DECLS
+
#endif /* PIXMAN_H__ */
diff --git a/lib/pixman/pixman/solaris-hwcap.mapfile b/lib/pixman/pixman/solaris-hwcap.mapfile
index 7f439a95a..3605ca79f 100644
--- a/lib/pixman/pixman/solaris-hwcap.mapfile
+++ b/lib/pixman/pixman/solaris-hwcap.mapfile
@@ -3,29 +3,23 @@
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, and/or sell copies of the Software, and to permit persons
-# to whom the Software is furnished to do so, provided that the above
-# copyright notice(s) and this permission notice appear in all copies of
-# the Software and that both the above copyright notice(s) and this
-# permission notice appear in supporting documentation.
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
-# OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
-# HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
-# INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
-# FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
-# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
-# WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
#
-# Except as contained in this notice, the name of a copyright holder
-# shall not be used in advertising or otherwise to promote the sale, use
-# or other dealings in this Software without prior written authorization
-# of the copyright holder.
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
#
###############################################################################
#
diff --git a/lib/pixman/test/Makefile.am b/lib/pixman/test/Makefile.am
index c56f62de7..841ff8d7d 100644
--- a/lib/pixman/test/Makefile.am
+++ b/lib/pixman/test/Makefile.am
@@ -2,29 +2,44 @@ TEST_LDADD = $(top_builddir)/pixman/libpixman-1.la
INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman
TESTPROGRAMS = \
+ a1-trap-test \
region-test \
- scaling-test \
- blitters-test \
fetch-test \
oob-test \
window-test \
- trap-crasher
+ trap-crasher \
+ alphamap \
+ blitters-test \
+ scaling-test \
+ composite
+a1_trap_test_LDADD = $(TEST_LDADD)
fetch_test_LDADD = $(TEST_LDADD)
-region_test_LDADD = $(TEST_LDADD)
-scaling_test_LDADD = $(TEST_LDADD)
-blitters_test_LDADD = $(TEST_LDADD)
+composite_LDADD = $(TEST_LDADD)
trap_crasher_LDADD = $(TEST_LDADD)
oob_test_LDADD = $(TEST_LDADD)
window_test_LDADD = $(TEST_LDADD)
+region_test_LDADD = $(TEST_LDADD)
+region_test_SOURCES = region-test.c utils.c utils.h
+
+blitters_test_LDADD = $(TEST_LDADD)
+blitters_test_SOURCES = blitters-test.c utils.c utils.h
+
+scaling_test_LDADD = $(TEST_LDADD)
+scaling_test_SOURCES = scaling-test.c utils.c utils.h
+
+alphamap_LDADD = $(TEST_LDADD)
+alphamap_SOURCES = alphamap.c utils.c utils.h
+
# GTK using test programs
if HAVE_GTK
GTK_LDADD = $(TEST_LDADD) $(GTK_LIBS)
+GTK_UTILS = gtk-utils.c gtk-utils.h
-TESTPROGRAMS += \
+TESTPROGRAMS_GTK = \
clip-test \
clip-in \
composite-test \
@@ -32,35 +47,38 @@ TESTPROGRAMS += \
alpha-test \
screen-test \
convolution-test \
- trap-test
+ trap-test \
+ alphamap
INCLUDES += $(GTK_CFLAGS)
gradient_test_LDADD = $(GTK_LDADD)
-gradient_test_SOURCES = gradient-test.c utils.c utils.h
+gradient_test_SOURCES = gradient-test.c $(GTK_UTILS)
alpha_test_LDADD = $(GTK_LDADD)
-alpha_test_SOURCES = alpha-test.c utils.c utils.h
+alpha_test_SOURCES = alpha-test.c $(GTK_UTILS)
composite_test_LDADD = $(GTK_LDADD)
-composite_test_SOURCES = composite-test.c utils.c utils.h
+composite_test_SOURCES = composite-test.c $(GTK_UTILS)
clip_test_LDADD = $(GTK_LDADD)
-clip_test_SOURCES = clip-test.c utils.c utils.h
+clip_test_SOURCES = clip-test.c $(GTK_UTILS)
clip_in_LDADD = $(GTK_LDADD)
-clip_in_SOURCES = clip-in.c utils.c utils.h
+clip_in_SOURCES = clip-in.c $(GTK_UTILS)
trap_test_LDADD = $(GTK_LDADD)
-trap_test_SOURCES = trap-test.c utils.c utils.h
+trap_test_SOURCES = trap-test.c $(GTK_UTILS)
screen_test_LDADD = $(GTK_LDADD)
-screen_test_SOURCES = screen-test.c utils.c utils.h
+screen_test_SOURCES = screen-test.c $(GTK_UTILS)
convolution_test_LDADD = $(GTK_LDADD)
-convolution_test_SOURCES = convolution-test.c utils.c utils.h
+convolution_test_SOURCES = convolution-test.c $(GTK_UTILS)
endif
-noinst_PROGRAMS = $(TESTPROGRAMS)
+noinst_PROGRAMS = $(TESTPROGRAMS) $(TESTPROGRAMS_GTK)
+
+TESTS = $(TESTPROGRAMS)
diff --git a/lib/pixman/test/Makefile.in b/lib/pixman/test/Makefile.in
index f270165db..3991ea13d 100644
--- a/lib/pixman/test/Makefile.in
+++ b/lib/pixman/test/Makefile.in
@@ -36,18 +36,8 @@ PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-@HAVE_GTK_TRUE@am__append_1 = \
-@HAVE_GTK_TRUE@ clip-test \
-@HAVE_GTK_TRUE@ clip-in \
-@HAVE_GTK_TRUE@ composite-test \
-@HAVE_GTK_TRUE@ gradient-test \
-@HAVE_GTK_TRUE@ alpha-test \
-@HAVE_GTK_TRUE@ screen-test \
-@HAVE_GTK_TRUE@ convolution-test \
-@HAVE_GTK_TRUE@ trap-test
-
-@HAVE_GTK_TRUE@am__append_2 = $(GTK_CFLAGS)
-noinst_PROGRAMS = $(am__EXEEXT_2)
+@HAVE_GTK_TRUE@am__append_1 = $(GTK_CFLAGS)
+noinst_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2)
subdir = test
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
@@ -57,73 +47,88 @@ am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
mkinstalldirs = $(SHELL) $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
-@HAVE_GTK_TRUE@am__EXEEXT_1 = clip-test$(EXEEXT) clip-in$(EXEEXT) \
+am__EXEEXT_1 = a1-trap-test$(EXEEXT) region-test$(EXEEXT) \
+ fetch-test$(EXEEXT) oob-test$(EXEEXT) window-test$(EXEEXT) \
+ trap-crasher$(EXEEXT) alphamap$(EXEEXT) blitters-test$(EXEEXT) \
+ scaling-test$(EXEEXT) composite$(EXEEXT)
+@HAVE_GTK_TRUE@am__EXEEXT_2 = clip-test$(EXEEXT) clip-in$(EXEEXT) \
@HAVE_GTK_TRUE@ composite-test$(EXEEXT) gradient-test$(EXEEXT) \
@HAVE_GTK_TRUE@ alpha-test$(EXEEXT) screen-test$(EXEEXT) \
-@HAVE_GTK_TRUE@ convolution-test$(EXEEXT) trap-test$(EXEEXT)
-am__EXEEXT_2 = region-test$(EXEEXT) scaling-test$(EXEEXT) \
- blitters-test$(EXEEXT) fetch-test$(EXEEXT) oob-test$(EXEEXT) \
- window-test$(EXEEXT) trap-crasher$(EXEEXT) $(am__EXEEXT_1)
+@HAVE_GTK_TRUE@ convolution-test$(EXEEXT) trap-test$(EXEEXT) \
+@HAVE_GTK_TRUE@ alphamap$(EXEEXT)
PROGRAMS = $(noinst_PROGRAMS)
-am__alpha_test_SOURCES_DIST = alpha-test.c utils.c utils.h
+a1_trap_test_SOURCES = a1-trap-test.c
+a1_trap_test_OBJECTS = a1-trap-test.$(OBJEXT)
+am__DEPENDENCIES_1 = $(top_builddir)/pixman/libpixman-1.la
+a1_trap_test_DEPENDENCIES = $(am__DEPENDENCIES_1)
+am__alpha_test_SOURCES_DIST = alpha-test.c gtk-utils.c gtk-utils.h
+@HAVE_GTK_TRUE@am__objects_1 = gtk-utils.$(OBJEXT)
@HAVE_GTK_TRUE@am_alpha_test_OBJECTS = alpha-test.$(OBJEXT) \
-@HAVE_GTK_TRUE@ utils.$(OBJEXT)
+@HAVE_GTK_TRUE@ $(am__objects_1)
alpha_test_OBJECTS = $(am_alpha_test_OBJECTS)
-am__DEPENDENCIES_1 = $(top_builddir)/pixman/libpixman-1.la
am__DEPENDENCIES_2 =
@HAVE_GTK_TRUE@am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1) \
@HAVE_GTK_TRUE@ $(am__DEPENDENCIES_2)
@HAVE_GTK_TRUE@alpha_test_DEPENDENCIES = $(am__DEPENDENCIES_3)
-blitters_test_SOURCES = blitters-test.c
-blitters_test_OBJECTS = blitters-test.$(OBJEXT)
+am_alphamap_OBJECTS = alphamap.$(OBJEXT) utils.$(OBJEXT)
+alphamap_OBJECTS = $(am_alphamap_OBJECTS)
+alphamap_DEPENDENCIES = $(am__DEPENDENCIES_1)
+am_blitters_test_OBJECTS = blitters-test.$(OBJEXT) utils.$(OBJEXT)
+blitters_test_OBJECTS = $(am_blitters_test_OBJECTS)
blitters_test_DEPENDENCIES = $(am__DEPENDENCIES_1)
-am__clip_in_SOURCES_DIST = clip-in.c utils.c utils.h
-@HAVE_GTK_TRUE@am_clip_in_OBJECTS = clip-in.$(OBJEXT) utils.$(OBJEXT)
+am__clip_in_SOURCES_DIST = clip-in.c gtk-utils.c gtk-utils.h
+@HAVE_GTK_TRUE@am_clip_in_OBJECTS = clip-in.$(OBJEXT) $(am__objects_1)
clip_in_OBJECTS = $(am_clip_in_OBJECTS)
@HAVE_GTK_TRUE@clip_in_DEPENDENCIES = $(am__DEPENDENCIES_3)
-am__clip_test_SOURCES_DIST = clip-test.c utils.c utils.h
+am__clip_test_SOURCES_DIST = clip-test.c gtk-utils.c gtk-utils.h
@HAVE_GTK_TRUE@am_clip_test_OBJECTS = clip-test.$(OBJEXT) \
-@HAVE_GTK_TRUE@ utils.$(OBJEXT)
+@HAVE_GTK_TRUE@ $(am__objects_1)
clip_test_OBJECTS = $(am_clip_test_OBJECTS)
@HAVE_GTK_TRUE@clip_test_DEPENDENCIES = $(am__DEPENDENCIES_3)
-am__composite_test_SOURCES_DIST = composite-test.c utils.c utils.h
+composite_SOURCES = composite.c
+composite_OBJECTS = composite.$(OBJEXT)
+composite_DEPENDENCIES = $(am__DEPENDENCIES_1)
+am__composite_test_SOURCES_DIST = composite-test.c gtk-utils.c \
+ gtk-utils.h
@HAVE_GTK_TRUE@am_composite_test_OBJECTS = composite-test.$(OBJEXT) \
-@HAVE_GTK_TRUE@ utils.$(OBJEXT)
+@HAVE_GTK_TRUE@ $(am__objects_1)
composite_test_OBJECTS = $(am_composite_test_OBJECTS)
@HAVE_GTK_TRUE@composite_test_DEPENDENCIES = $(am__DEPENDENCIES_3)
-am__convolution_test_SOURCES_DIST = convolution-test.c utils.c utils.h
+am__convolution_test_SOURCES_DIST = convolution-test.c gtk-utils.c \
+ gtk-utils.h
@HAVE_GTK_TRUE@am_convolution_test_OBJECTS = \
-@HAVE_GTK_TRUE@ convolution-test.$(OBJEXT) utils.$(OBJEXT)
+@HAVE_GTK_TRUE@ convolution-test.$(OBJEXT) $(am__objects_1)
convolution_test_OBJECTS = $(am_convolution_test_OBJECTS)
@HAVE_GTK_TRUE@convolution_test_DEPENDENCIES = $(am__DEPENDENCIES_3)
fetch_test_SOURCES = fetch-test.c
fetch_test_OBJECTS = fetch-test.$(OBJEXT)
fetch_test_DEPENDENCIES = $(am__DEPENDENCIES_1)
-am__gradient_test_SOURCES_DIST = gradient-test.c utils.c utils.h
+am__gradient_test_SOURCES_DIST = gradient-test.c gtk-utils.c \
+ gtk-utils.h
@HAVE_GTK_TRUE@am_gradient_test_OBJECTS = gradient-test.$(OBJEXT) \
-@HAVE_GTK_TRUE@ utils.$(OBJEXT)
+@HAVE_GTK_TRUE@ $(am__objects_1)
gradient_test_OBJECTS = $(am_gradient_test_OBJECTS)
@HAVE_GTK_TRUE@gradient_test_DEPENDENCIES = $(am__DEPENDENCIES_3)
oob_test_SOURCES = oob-test.c
oob_test_OBJECTS = oob-test.$(OBJEXT)
oob_test_DEPENDENCIES = $(am__DEPENDENCIES_1)
-region_test_SOURCES = region-test.c
-region_test_OBJECTS = region-test.$(OBJEXT)
+am_region_test_OBJECTS = region-test.$(OBJEXT) utils.$(OBJEXT)
+region_test_OBJECTS = $(am_region_test_OBJECTS)
region_test_DEPENDENCIES = $(am__DEPENDENCIES_1)
-scaling_test_SOURCES = scaling-test.c
-scaling_test_OBJECTS = scaling-test.$(OBJEXT)
+am_scaling_test_OBJECTS = scaling-test.$(OBJEXT) utils.$(OBJEXT)
+scaling_test_OBJECTS = $(am_scaling_test_OBJECTS)
scaling_test_DEPENDENCIES = $(am__DEPENDENCIES_1)
-am__screen_test_SOURCES_DIST = screen-test.c utils.c utils.h
+am__screen_test_SOURCES_DIST = screen-test.c gtk-utils.c gtk-utils.h
@HAVE_GTK_TRUE@am_screen_test_OBJECTS = screen-test.$(OBJEXT) \
-@HAVE_GTK_TRUE@ utils.$(OBJEXT)
+@HAVE_GTK_TRUE@ $(am__objects_1)
screen_test_OBJECTS = $(am_screen_test_OBJECTS)
@HAVE_GTK_TRUE@screen_test_DEPENDENCIES = $(am__DEPENDENCIES_3)
trap_crasher_SOURCES = trap-crasher.c
trap_crasher_OBJECTS = trap-crasher.$(OBJEXT)
trap_crasher_DEPENDENCIES = $(am__DEPENDENCIES_1)
-am__trap_test_SOURCES_DIST = trap-test.c utils.c utils.h
+am__trap_test_SOURCES_DIST = trap-test.c gtk-utils.c gtk-utils.h
@HAVE_GTK_TRUE@am_trap_test_OBJECTS = trap-test.$(OBJEXT) \
-@HAVE_GTK_TRUE@ utils.$(OBJEXT)
+@HAVE_GTK_TRUE@ $(am__objects_1)
trap_test_OBJECTS = $(am_trap_test_OBJECTS)
@HAVE_GTK_TRUE@trap_test_DEPENDENCIES = $(am__DEPENDENCIES_3)
window_test_SOURCES = window-test.c
@@ -140,18 +145,21 @@ LTCOMPILE = $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) \
CCLD = $(CC)
LINK = $(LIBTOOL) --tag=CC --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(AM_LDFLAGS) $(LDFLAGS) -o $@
-SOURCES = $(alpha_test_SOURCES) blitters-test.c $(clip_in_SOURCES) \
- $(clip_test_SOURCES) $(composite_test_SOURCES) \
+SOURCES = a1-trap-test.c $(alpha_test_SOURCES) $(alphamap_SOURCES) \
+ $(blitters_test_SOURCES) $(clip_in_SOURCES) \
+ $(clip_test_SOURCES) composite.c $(composite_test_SOURCES) \
$(convolution_test_SOURCES) fetch-test.c \
- $(gradient_test_SOURCES) oob-test.c region-test.c \
- scaling-test.c $(screen_test_SOURCES) trap-crasher.c \
+ $(gradient_test_SOURCES) oob-test.c $(region_test_SOURCES) \
+ $(scaling_test_SOURCES) $(screen_test_SOURCES) trap-crasher.c \
$(trap_test_SOURCES) window-test.c
-DIST_SOURCES = $(am__alpha_test_SOURCES_DIST) blitters-test.c \
+DIST_SOURCES = a1-trap-test.c $(am__alpha_test_SOURCES_DIST) \
+ $(alphamap_SOURCES) $(blitters_test_SOURCES) \
$(am__clip_in_SOURCES_DIST) $(am__clip_test_SOURCES_DIST) \
- $(am__composite_test_SOURCES_DIST) \
+ composite.c $(am__composite_test_SOURCES_DIST) \
$(am__convolution_test_SOURCES_DIST) fetch-test.c \
- $(am__gradient_test_SOURCES_DIST) oob-test.c region-test.c \
- scaling-test.c $(am__screen_test_SOURCES_DIST) trap-crasher.c \
+ $(am__gradient_test_SOURCES_DIST) oob-test.c \
+ $(region_test_SOURCES) $(scaling_test_SOURCES) \
+ $(am__screen_test_SOURCES_DIST) trap-crasher.c \
$(am__trap_test_SOURCES_DIST) window-test.c
ETAGS = etags
CTAGS = ctags
@@ -161,13 +169,13 @@ AMDEP_FALSE = @AMDEP_FALSE@
AMDEP_TRUE = @AMDEP_TRUE@
AMTAR = @AMTAR@
AR = @AR@
-ARM_NEON_CFLAGS = @ARM_NEON_CFLAGS@
-ARM_SIMD_CFLAGS = @ARM_SIMD_CFLAGS@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
+CCAS = @CCAS@
+CCASFLAGS = @CCASFLAGS@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
@@ -195,6 +203,7 @@ GTK_CFLAGS = @GTK_CFLAGS@
GTK_LIBS = @GTK_LIBS@
HAVE_GTK_FALSE = @HAVE_GTK_FALSE@
HAVE_GTK_TRUE = @HAVE_GTK_TRUE@
+HAVE_PTHREAD_SETSPECIFIC = @HAVE_PTHREAD_SETSPECIFIC@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
@@ -224,6 +233,7 @@ PIXMAN_VERSION_MAJOR = @PIXMAN_VERSION_MAJOR@
PIXMAN_VERSION_MICRO = @PIXMAN_VERSION_MICRO@
PIXMAN_VERSION_MINOR = @PIXMAN_VERSION_MINOR@
PKG_CONFIG = @PKG_CONFIG@
+PTHREAD_LDFLAGS = @PTHREAD_LDFLAGS@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
@@ -231,6 +241,9 @@ SHELL = @SHELL@
SSE2_CFLAGS = @SSE2_CFLAGS@
SSE2_LDFLAGS = @SSE2_LDFLAGS@
STRIP = @STRIP@
+STUBS_CFLAGS = @STUBS_CFLAGS@
+STUBS_LIBS = @STUBS_LIBS@
+TOOLCHAIN_SUPPORTS__THREAD = @TOOLCHAIN_SUPPORTS__THREAD@
USE_ARM_NEON_FALSE = @USE_ARM_NEON_FALSE@
USE_ARM_NEON_TRUE = @USE_ARM_NEON_TRUE@
USE_ARM_SIMD_FALSE = @USE_ARM_SIMD_FALSE@
@@ -294,35 +307,65 @@ sysconfdir = @sysconfdir@
target_alias = @target_alias@
TEST_LDADD = $(top_builddir)/pixman/libpixman-1.la
INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman \
- $(am__append_2)
-TESTPROGRAMS = region-test scaling-test blitters-test fetch-test \
- oob-test window-test trap-crasher $(am__append_1)
+ $(am__append_1)
+TESTPROGRAMS = \
+ a1-trap-test \
+ region-test \
+ fetch-test \
+ oob-test \
+ window-test \
+ trap-crasher \
+ alphamap \
+ blitters-test \
+ scaling-test \
+ composite
+
+a1_trap_test_LDADD = $(TEST_LDADD)
fetch_test_LDADD = $(TEST_LDADD)
-region_test_LDADD = $(TEST_LDADD)
-scaling_test_LDADD = $(TEST_LDADD)
-blitters_test_LDADD = $(TEST_LDADD)
+composite_LDADD = $(TEST_LDADD)
trap_crasher_LDADD = $(TEST_LDADD)
oob_test_LDADD = $(TEST_LDADD)
window_test_LDADD = $(TEST_LDADD)
+region_test_LDADD = $(TEST_LDADD)
+region_test_SOURCES = region-test.c utils.c utils.h
+blitters_test_LDADD = $(TEST_LDADD)
+blitters_test_SOURCES = blitters-test.c utils.c utils.h
+scaling_test_LDADD = $(TEST_LDADD)
+scaling_test_SOURCES = scaling-test.c utils.c utils.h
+alphamap_LDADD = $(TEST_LDADD)
+alphamap_SOURCES = alphamap.c utils.c utils.h
# GTK using test programs
@HAVE_GTK_TRUE@GTK_LDADD = $(TEST_LDADD) $(GTK_LIBS)
+@HAVE_GTK_TRUE@GTK_UTILS = gtk-utils.c gtk-utils.h
+@HAVE_GTK_TRUE@TESTPROGRAMS_GTK = \
+@HAVE_GTK_TRUE@ clip-test \
+@HAVE_GTK_TRUE@ clip-in \
+@HAVE_GTK_TRUE@ composite-test \
+@HAVE_GTK_TRUE@ gradient-test \
+@HAVE_GTK_TRUE@ alpha-test \
+@HAVE_GTK_TRUE@ screen-test \
+@HAVE_GTK_TRUE@ convolution-test \
+@HAVE_GTK_TRUE@ trap-test \
+@HAVE_GTK_TRUE@ alphamap
+
@HAVE_GTK_TRUE@gradient_test_LDADD = $(GTK_LDADD)
-@HAVE_GTK_TRUE@gradient_test_SOURCES = gradient-test.c utils.c utils.h
+@HAVE_GTK_TRUE@gradient_test_SOURCES = gradient-test.c $(GTK_UTILS)
@HAVE_GTK_TRUE@alpha_test_LDADD = $(GTK_LDADD)
-@HAVE_GTK_TRUE@alpha_test_SOURCES = alpha-test.c utils.c utils.h
+@HAVE_GTK_TRUE@alpha_test_SOURCES = alpha-test.c $(GTK_UTILS)
@HAVE_GTK_TRUE@composite_test_LDADD = $(GTK_LDADD)
-@HAVE_GTK_TRUE@composite_test_SOURCES = composite-test.c utils.c utils.h
+@HAVE_GTK_TRUE@composite_test_SOURCES = composite-test.c $(GTK_UTILS)
@HAVE_GTK_TRUE@clip_test_LDADD = $(GTK_LDADD)
-@HAVE_GTK_TRUE@clip_test_SOURCES = clip-test.c utils.c utils.h
+@HAVE_GTK_TRUE@clip_test_SOURCES = clip-test.c $(GTK_UTILS)
@HAVE_GTK_TRUE@clip_in_LDADD = $(GTK_LDADD)
-@HAVE_GTK_TRUE@clip_in_SOURCES = clip-in.c utils.c utils.h
+@HAVE_GTK_TRUE@clip_in_SOURCES = clip-in.c $(GTK_UTILS)
@HAVE_GTK_TRUE@trap_test_LDADD = $(GTK_LDADD)
-@HAVE_GTK_TRUE@trap_test_SOURCES = trap-test.c utils.c utils.h
+@HAVE_GTK_TRUE@trap_test_SOURCES = trap-test.c $(GTK_UTILS)
@HAVE_GTK_TRUE@screen_test_LDADD = $(GTK_LDADD)
-@HAVE_GTK_TRUE@screen_test_SOURCES = screen-test.c utils.c utils.h
+@HAVE_GTK_TRUE@screen_test_SOURCES = screen-test.c $(GTK_UTILS)
@HAVE_GTK_TRUE@convolution_test_LDADD = $(GTK_LDADD)
-@HAVE_GTK_TRUE@convolution_test_SOURCES = convolution-test.c utils.c utils.h
+@HAVE_GTK_TRUE@convolution_test_SOURCES = convolution-test.c $(GTK_UTILS)
+TESTS = $(TESTPROGRAMS)
all: all-am
.SUFFIXES:
@@ -336,9 +379,9 @@ $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
exit 1;; \
esac; \
done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu test/Makefile'; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign test/Makefile'; \
cd $(top_srcdir) && \
- $(AUTOMAKE) --gnu test/Makefile
+ $(AUTOMAKE) --foreign test/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
@@ -363,9 +406,15 @@ clean-noinstPROGRAMS:
echo " rm -f $$p $$f"; \
rm -f $$p $$f ; \
done
+a1-trap-test$(EXEEXT): $(a1_trap_test_OBJECTS) $(a1_trap_test_DEPENDENCIES)
+ @rm -f a1-trap-test$(EXEEXT)
+ $(LINK) $(a1_trap_test_LDFLAGS) $(a1_trap_test_OBJECTS) $(a1_trap_test_LDADD) $(LIBS)
alpha-test$(EXEEXT): $(alpha_test_OBJECTS) $(alpha_test_DEPENDENCIES)
@rm -f alpha-test$(EXEEXT)
$(LINK) $(alpha_test_LDFLAGS) $(alpha_test_OBJECTS) $(alpha_test_LDADD) $(LIBS)
+alphamap$(EXEEXT): $(alphamap_OBJECTS) $(alphamap_DEPENDENCIES)
+ @rm -f alphamap$(EXEEXT)
+ $(LINK) $(alphamap_LDFLAGS) $(alphamap_OBJECTS) $(alphamap_LDADD) $(LIBS)
blitters-test$(EXEEXT): $(blitters_test_OBJECTS) $(blitters_test_DEPENDENCIES)
@rm -f blitters-test$(EXEEXT)
$(LINK) $(blitters_test_LDFLAGS) $(blitters_test_OBJECTS) $(blitters_test_LDADD) $(LIBS)
@@ -375,6 +424,9 @@ clip-in$(EXEEXT): $(clip_in_OBJECTS) $(clip_in_DEPENDENCIES)
clip-test$(EXEEXT): $(clip_test_OBJECTS) $(clip_test_DEPENDENCIES)
@rm -f clip-test$(EXEEXT)
$(LINK) $(clip_test_LDFLAGS) $(clip_test_OBJECTS) $(clip_test_LDADD) $(LIBS)
+composite$(EXEEXT): $(composite_OBJECTS) $(composite_DEPENDENCIES)
+ @rm -f composite$(EXEEXT)
+ $(LINK) $(composite_LDFLAGS) $(composite_OBJECTS) $(composite_LDADD) $(LIBS)
composite-test$(EXEEXT): $(composite_test_OBJECTS) $(composite_test_DEPENDENCIES)
@rm -f composite-test$(EXEEXT)
$(LINK) $(composite_test_LDFLAGS) $(composite_test_OBJECTS) $(composite_test_LDADD) $(LIBS)
@@ -415,14 +467,18 @@ mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/a1-trap-test.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alpha-test.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alphamap.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blitters-test.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clip-in.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clip-test.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/composite-test.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/composite.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/convolution-test.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fetch-test.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gradient-test.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gtk-utils.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oob-test.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/region-test.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scaling-test.Po@am__quote@
@@ -511,6 +567,79 @@ GTAGS:
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+check-TESTS: $(TESTS)
+ @failed=0; all=0; xfail=0; xpass=0; skip=0; \
+ srcdir=$(srcdir); export srcdir; \
+ list='$(TESTS)'; \
+ if test -n "$$list"; then \
+ for tst in $$list; do \
+ if test -f ./$$tst; then dir=./; \
+ elif test -f $$tst; then dir=; \
+ else dir="$(srcdir)/"; fi; \
+ if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
+ all=`expr $$all + 1`; \
+ case " $(XFAIL_TESTS) " in \
+ *" $$tst "*) \
+ xpass=`expr $$xpass + 1`; \
+ failed=`expr $$failed + 1`; \
+ echo "XPASS: $$tst"; \
+ ;; \
+ *) \
+ echo "PASS: $$tst"; \
+ ;; \
+ esac; \
+ elif test $$? -ne 77; then \
+ all=`expr $$all + 1`; \
+ case " $(XFAIL_TESTS) " in \
+ *" $$tst "*) \
+ xfail=`expr $$xfail + 1`; \
+ echo "XFAIL: $$tst"; \
+ ;; \
+ *) \
+ failed=`expr $$failed + 1`; \
+ echo "FAIL: $$tst"; \
+ ;; \
+ esac; \
+ else \
+ skip=`expr $$skip + 1`; \
+ echo "SKIP: $$tst"; \
+ fi; \
+ done; \
+ if test "$$failed" -eq 0; then \
+ if test "$$xfail" -eq 0; then \
+ banner="All $$all tests passed"; \
+ else \
+ banner="All $$all tests behaved as expected ($$xfail expected failures)"; \
+ fi; \
+ else \
+ if test "$$xpass" -eq 0; then \
+ banner="$$failed of $$all tests failed"; \
+ else \
+ banner="$$failed of $$all tests did not behave as expected ($$xpass unexpected passes)"; \
+ fi; \
+ fi; \
+ dashes="$$banner"; \
+ skipped=""; \
+ if test "$$skip" -ne 0; then \
+ skipped="($$skip tests were not run)"; \
+ test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
+ dashes="$$skipped"; \
+ fi; \
+ report=""; \
+ if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \
+ report="Please report to $(PACKAGE_BUGREPORT)"; \
+ test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \
+ dashes="$$report"; \
+ fi; \
+ dashes=`echo "$$dashes" | sed s/./=/g`; \
+ echo "$$dashes"; \
+ echo "$$banner"; \
+ test -z "$$skipped" || echo "$$skipped"; \
+ test -z "$$report" || echo "$$report"; \
+ echo "$$dashes"; \
+ test "$$failed" -eq 0; \
+ else :; fi
+
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
@@ -539,6 +668,7 @@ distdir: $(DISTFILES)
fi; \
done
check-am: all-am
+ $(MAKE) $(AM_MAKEFLAGS) check-TESTS
check: check-am
all-am: Makefile $(PROGRAMS)
installdirs:
@@ -617,17 +747,17 @@ ps-am:
uninstall-am: uninstall-info-am
-.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
- clean-libtool clean-noinstPROGRAMS ctags distclean \
- distclean-compile distclean-generic distclean-libtool \
- distclean-tags distdir dvi dvi-am html html-am info info-am \
- install install-am install-data install-data-am install-exec \
- install-exec-am install-info install-info-am install-man \
- install-strip installcheck installcheck-am installdirs \
- maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
- pdf pdf-am ps ps-am tags uninstall uninstall-am \
- uninstall-info-am
+.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
+ clean-generic clean-libtool clean-noinstPROGRAMS ctags \
+ distclean distclean-compile distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-exec install-exec-am install-info \
+ install-info-am install-man install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am uninstall-info-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
diff --git a/lib/pixman/test/a1-trap-test.c b/lib/pixman/test/a1-trap-test.c
new file mode 100644
index 000000000..6163e7c61
--- /dev/null
+++ b/lib/pixman/test/a1-trap-test.c
@@ -0,0 +1,50 @@
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pixman.h"
+
+int
+main (int argc, char **argv)
+{
+#define WIDTH 20
+#define HEIGHT 20
+
+ pixman_image_t *src_img;
+ pixman_image_t *mask_img;
+ pixman_image_t *dest_img;
+ pixman_trap_t trap;
+ pixman_color_t red = { 0xffff, 0x0000, 0x0000, 0xffff };
+ uint32_t *bits = malloc (WIDTH * HEIGHT * 4);
+ uint32_t *mbits = malloc (WIDTH * HEIGHT);
+
+ memset (mbits, 0, WIDTH * HEIGHT);
+ memset (bits, 0xff, WIDTH * HEIGHT * 4);
+
+ trap.top.l = pixman_double_to_fixed (0.5);
+ trap.top.r = pixman_double_to_fixed (1.5);
+ trap.top.y = pixman_double_to_fixed (0.5);
+
+ trap.bot.l = pixman_double_to_fixed (0.5);
+ trap.bot.r = pixman_double_to_fixed (1.5);
+ trap.bot.y = pixman_double_to_fixed (1.5);
+
+ mask_img = pixman_image_create_bits (
+ PIXMAN_a1, WIDTH, HEIGHT, mbits, WIDTH);
+ src_img = pixman_image_create_solid_fill (&red);
+ dest_img = pixman_image_create_bits (
+ PIXMAN_a8r8g8b8, WIDTH, HEIGHT, bits, WIDTH * 4);
+
+ pixman_add_traps (mask_img, 0, 0, 1, &trap);
+
+ pixman_image_composite (PIXMAN_OP_OVER,
+ src_img, mask_img, dest_img,
+ 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT);
+
+ assert (bits[0] == 0xffff0000);
+ assert (bits[1] == 0xffffffff);
+ assert (bits[1 * WIDTH + 0] == 0xffffffff);
+ assert (bits[1 * WIDTH + 1] == 0xffffffff);
+
+ return 0;
+}
diff --git a/lib/pixman/test/alpha-test.c b/lib/pixman/test/alpha-test.c
index e2b97c789..92c208142 100644
--- a/lib/pixman/test/alpha-test.c
+++ b/lib/pixman/test/alpha-test.c
@@ -1,7 +1,7 @@
#include <stdio.h>
#include <stdlib.h>
#include "pixman.h"
-#include "utils.h"
+#include "gtk-utils.h"
int
main (int argc, char **argv)
@@ -14,7 +14,6 @@ main (int argc, char **argv)
uint32_t *src = malloc (WIDTH * HEIGHT * 4);
pixman_image_t *grad_img;
pixman_image_t *alpha_img;
- pixman_image_t *solid_img;
pixman_image_t *dest_img;
pixman_image_t *src_img;
int i;
@@ -26,24 +25,25 @@ main (int argc, char **argv)
pixman_point_fixed_t p1 = { pixman_double_to_fixed (0), 0 };
pixman_point_fixed_t p2 = { pixman_double_to_fixed (WIDTH),
pixman_int_to_fixed (0) };
+#if 0
pixman_transform_t trans = {
{ { pixman_double_to_fixed (2), pixman_double_to_fixed (0.5), pixman_double_to_fixed (-100), },
{ pixman_double_to_fixed (0), pixman_double_to_fixed (3), pixman_double_to_fixed (0), },
{ pixman_double_to_fixed (0), pixman_double_to_fixed (0.000), pixman_double_to_fixed (1.0) }
}
};
-
- pixman_transform_t id = {
+#else
+ pixman_transform_t trans = {
{ { pixman_fixed_1, 0, 0 },
{ 0, pixman_fixed_1, 0 },
{ 0, 0, pixman_fixed_1 } }
};
+#endif
pixman_point_fixed_t c_inner;
pixman_point_fixed_t c_outer;
pixman_fixed_t r_inner;
pixman_fixed_t r_outer;
- pixman_color_t red = { 0xffff, 0x0000, 0x0000, 0xffff };
for (i = 0; i < WIDTH * HEIGHT; ++i)
alpha[i] = 0x4f00004f; /* pale blue */
@@ -91,7 +91,7 @@ main (int argc, char **argv)
grad_img = pixman_image_create_linear_gradient (&p1, &p2,
stops, 2);
- pixman_image_set_transform (grad_img, &id);
+ pixman_image_set_transform (grad_img, &trans);
pixman_image_set_repeat (grad_img, PIXMAN_REPEAT_PAD);
pixman_image_composite (PIXMAN_OP_OVER, grad_img, NULL, alpha_img,
diff --git a/lib/pixman/test/alphamap.c b/lib/pixman/test/alphamap.c
new file mode 100644
index 000000000..e6a25efcb
--- /dev/null
+++ b/lib/pixman/test/alphamap.c
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "utils.h"
+
+#define WIDTH 400
+#define HEIGHT 200
+
+int
+main (int argc, char **argv)
+{
+ uint8_t *alpha = make_random_bytes (WIDTH * HEIGHT);
+ uint32_t *src = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * 4);
+ uint32_t *dest = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * 4);
+ int i;
+
+ pixman_image_t *a = pixman_image_create_bits (PIXMAN_a8, WIDTH, HEIGHT, (uint32_t *)alpha, WIDTH);
+ pixman_image_t *d = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, dest, WIDTH * 4);
+
+ for (i = 0; i < 2; ++i)
+ {
+ pixman_format_code_t sformat = (i == 0)? PIXMAN_a8r8g8b8 : PIXMAN_a2r10g10b10;
+ pixman_image_t *s = pixman_image_create_bits (sformat, WIDTH, HEIGHT, src, WIDTH * 4);
+ int j, k;
+
+ pixman_image_set_alpha_map (s, a, 0, 0);
+
+ pixman_image_composite (PIXMAN_OP_SRC, s, NULL, d, 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT);
+
+ for (j = 0; j < HEIGHT; ++j)
+ {
+ for (k = 0; k < WIDTH; ++k)
+ {
+ uint8_t ap = ((uint8_t *)alpha)[j * WIDTH + k];
+ uint32_t dap = (dest[j * WIDTH + k] >> 24);
+ uint32_t sap = (src[j * WIDTH + k] >> 24);
+
+ if (ap != dap)
+ {
+ printf ("Wrong alpha value at (%d, %d). Should be %d; got %d (src was %d)\n", k, j, ap, dap, sap);
+ return 1;
+ }
+ }
+ }
+
+ pixman_image_unref (s);
+ }
+
+ return 0;
+}
diff --git a/lib/pixman/test/blitters-test.c b/lib/pixman/test/blitters-test.c
index d5201e541..1ebf6d9ca 100644
--- a/lib/pixman/test/blitters-test.c
+++ b/lib/pixman/test/blitters-test.c
@@ -25,30 +25,9 @@
#include <stdlib.h>
#include <stdio.h>
#include <config.h>
-#include "pixman.h"
+#include "utils.h"
-/* A primitive pseudorandom number generator, taken from POSIX.1-2001 example */
-
-static uint32_t lcg_seed;
-
-static inline uint32_t
-lcg_rand (void)
-{
- lcg_seed = lcg_seed * 1103515245 + 12345;
- return ((uint32_t)(lcg_seed / 65536) % 32768);
-}
-
-static inline void
-lcg_srand (uint32_t seed)
-{
- lcg_seed = seed;
-}
-
-static inline uint32_t
-lcg_rand_n (int max)
-{
- return lcg_rand () % max;
-}
+static pixman_indexed_t palette;
static void *
aligned_malloc (size_t align, size_t size)
@@ -56,7 +35,8 @@ aligned_malloc (size_t align, size_t size)
void *result;
#ifdef HAVE_POSIX_MEMALIGN
- posix_memalign (&result, align, size);
+ if (posix_memalign (&result, align, size) != 0)
+ result = NULL;
#else
result = malloc (size);
#endif
@@ -64,192 +44,6 @@ aligned_malloc (size_t align, size_t size)
return result;
}
-/*----------------------------------------------------------------------------*\
- * CRC-32 version 2.0.0 by Craig Bruce, 2006-04-29.
- *
- * This program generates the CRC-32 values for the files named in the
- * command-line arguments. These are the same CRC-32 values used by GZIP,
- * PKZIP, and ZMODEM. The Crc32_ComputeBuf () can also be detached and
- * used independently.
- *
- * THIS PROGRAM IS PUBLIC-DOMAIN SOFTWARE.
- *
- * Based on the byte-oriented implementation "File Verification Using CRC"
- * by Mark R. Nelson in Dr. Dobb's Journal, May 1992, pp. 64-67.
- *
- * v1.0.0: original release.
- * v1.0.1: fixed printf formats.
- * v1.0.2: fixed something else.
- * v1.0.3: replaced CRC constant table by generator function.
- * v1.0.4: reformatted code, made ANSI C. 1994-12-05.
- * v2.0.0: rewrote to use memory buffer & static table, 2006-04-29.
-\*----------------------------------------------------------------------------*/
-
-/*----------------------------------------------------------------------------*\
- * NAME:
- * Crc32_ComputeBuf () - computes the CRC-32 value of a memory buffer
- * DESCRIPTION:
- * Computes or accumulates the CRC-32 value for a memory buffer.
- * The 'inCrc32' gives a previously accumulated CRC-32 value to allow
- * a CRC to be generated for multiple sequential buffer-fuls of data.
- * The 'inCrc32' for the first buffer must be zero.
- * ARGUMENTS:
- * inCrc32 - accumulated CRC-32 value, must be 0 on first call
- * buf - buffer to compute CRC-32 value for
- * bufLen - number of bytes in buffer
- * RETURNS:
- * crc32 - computed CRC-32 value
- * ERRORS:
- * (no errors are possible)
-\*----------------------------------------------------------------------------*/
-
-static uint32_t
-compute_crc32 (uint32_t in_crc32,
- const void *buf,
- size_t buf_len)
-{
- static const uint32_t crc_table[256] = {
- 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F,
- 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
- 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2,
- 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
- 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9,
- 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
- 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C,
- 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
- 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423,
- 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
- 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106,
- 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
- 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D,
- 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
- 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950,
- 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
- 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7,
- 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
- 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA,
- 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
- 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81,
- 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,
- 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84,
- 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
- 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB,
- 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
- 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E,
- 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
- 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55,
- 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
- 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28,
- 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
- 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F,
- 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,
- 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242,
- 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
- 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69,
- 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
- 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC,
- 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
- 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693,
- 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
- 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
- };
-
- uint32_t crc32;
- unsigned char * byte_buf;
- size_t i;
-
- /* accumulate crc32 for buffer */
- crc32 = in_crc32 ^ 0xFFFFFFFF;
- byte_buf = (unsigned char*) buf;
-
- for (i = 0; i < buf_len; i++)
- crc32 = (crc32 >> 8) ^ crc_table[(crc32 ^ byte_buf[i]) & 0xFF];
-
- return (crc32 ^ 0xFFFFFFFF);
-}
-
-/* perform endian conversion of pixel data */
-static void
-image_endian_swap (pixman_image_t *img, int bpp)
-{
- int stride = pixman_image_get_stride (img);
- uint32_t *data = pixman_image_get_data (img);
- int height = pixman_image_get_height (img);
- int i, j;
-
- /* swap bytes only on big endian systems */
- volatile uint16_t endian_check_var = 0x1234;
- if (*(volatile uint8_t *)&endian_check_var != 0x12)
- return;
-
- for (i = 0; i < height; i++)
- {
- uint8_t *line_data = (uint8_t *)data + stride * i;
- /* swap bytes only for 16, 24 and 32 bpp for now */
- switch (bpp)
- {
- case 1:
- for (j = 0; j < stride; j++)
- {
- line_data[j] =
- ((line_data[j] & 0x80) >> 7) |
- ((line_data[j] & 0x40) >> 5) |
- ((line_data[j] & 0x20) >> 3) |
- ((line_data[j] & 0x10) >> 1) |
- ((line_data[j] & 0x08) << 1) |
- ((line_data[j] & 0x04) << 3) |
- ((line_data[j] & 0x02) << 5) |
- ((line_data[j] & 0x01) << 7);
- }
- break;
- case 4:
- for (j = 0; j < stride; j++)
- {
- line_data[j] = (line_data[j] >> 4) | (line_data[j] << 4);
- }
- break;
- case 16:
- for (j = 0; j + 2 <= stride; j += 2)
- {
- char t1 = line_data[j + 0];
- char t2 = line_data[j + 1];
-
- line_data[j + 1] = t1;
- line_data[j + 0] = t2;
- }
- break;
- case 24:
- for (j = 0; j + 3 <= stride; j += 3)
- {
- char t1 = line_data[j + 0];
- char t2 = line_data[j + 1];
- char t3 = line_data[j + 2];
-
- line_data[j + 2] = t1;
- line_data[j + 1] = t2;
- line_data[j + 0] = t3;
- }
- break;
- case 32:
- for (j = 0; j + 4 <= stride; j += 4)
- {
- char t1 = line_data[j + 0];
- char t2 = line_data[j + 1];
- char t3 = line_data[j + 2];
- char t4 = line_data[j + 3];
-
- line_data[j + 3] = t1;
- line_data[j + 2] = t2;
- line_data[j + 1] = t3;
- line_data[j + 0] = t4;
- }
- break;
- default:
- break;
- }
- }
-}
-
/* Create random image for testing purposes */
static pixman_image_t *
create_random_image (pixman_format_code_t *allowed_formats,
@@ -266,6 +60,7 @@ create_random_image (pixman_format_code_t *allowed_formats,
while (allowed_formats[n] != -1)
n++;
fmt = allowed_formats[lcg_rand_n (n)];
+
width = lcg_rand_n (max_width) + 1;
height = lcg_rand_n (max_height) + 1;
stride = (width * PIXMAN_FORMAT_BPP (fmt) + 7) / 8 +
@@ -287,6 +82,12 @@ create_random_image (pixman_format_code_t *allowed_formats,
img = pixman_image_create_bits (fmt, width, height, buf, stride);
+ if (PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_COLOR ||
+ PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_GRAY)
+ {
+ pixman_image_set_indexed (img, &palette);
+ }
+
image_endian_swap (img, PIXMAN_FORMAT_BPP (fmt));
if (used_fmt) *used_fmt = fmt;
@@ -302,7 +103,7 @@ free_random_image (uint32_t initcrc,
uint32_t crc32 = 0;
int stride = pixman_image_get_stride (img);
uint32_t *data = pixman_image_get_data (img);
- int height = pixman_image_get_height (img);;
+ int height = pixman_image_get_height (img);
if (fmt != -1)
{
@@ -429,7 +230,6 @@ static pixman_format_code_t img_fmt_list[] = {
PIXMAN_b2g3r3,
PIXMAN_a2r2g2b2,
PIXMAN_a2b2g2r2,
-#if 0 /* using these crashes the test */
PIXMAN_c8,
PIXMAN_g8,
PIXMAN_x4c4,
@@ -437,7 +237,6 @@ static pixman_format_code_t img_fmt_list[] = {
PIXMAN_c4,
PIXMAN_g4,
PIXMAN_g1,
-#endif
PIXMAN_x4a4,
PIXMAN_a4,
PIXMAN_r1g2b1,
@@ -472,10 +271,11 @@ test_composite (uint32_t initcrc, int testnum, int verbose)
int src_stride, dst_stride;
int src_x, src_y;
int dst_x, dst_y;
+ int mask_x, mask_y;
int w, h;
int op;
pixman_format_code_t src_fmt, dst_fmt, mask_fmt;
- uint32_t *dstbuf;
+ uint32_t *dstbuf, *srcbuf, *maskbuf;
uint32_t crc32;
int max_width, max_height, max_extra_stride;
@@ -513,10 +313,43 @@ test_composite (uint32_t initcrc, int testnum, int verbose)
dst_img = create_random_image (img_fmt_list, max_width, max_height,
max_extra_stride, &dst_fmt);
+ src_width = pixman_image_get_width (src_img);
+ src_height = pixman_image_get_height (src_img);
+ src_stride = pixman_image_get_stride (src_img);
+
+ dst_width = pixman_image_get_width (dst_img);
+ dst_height = pixman_image_get_height (dst_img);
+ dst_stride = pixman_image_get_stride (dst_img);
+
+ dstbuf = pixman_image_get_data (dst_img);
+ srcbuf = pixman_image_get_data (src_img);
+
+ src_x = lcg_rand_n (src_width);
+ src_y = lcg_rand_n (src_height);
+ dst_x = lcg_rand_n (dst_width);
+ dst_y = lcg_rand_n (dst_height);
+
mask_img = NULL;
mask_fmt = -1;
+ mask_x = 0;
+ mask_y = 0;
+ maskbuf = NULL;
- if (lcg_rand_n (2))
+ if ((src_fmt == PIXMAN_x8r8g8b8 || src_fmt == PIXMAN_x8b8g8r8) &&
+ (lcg_rand_n (4) == 0))
+ {
+ /* PIXBUF */
+ mask_fmt = lcg_rand_n (2) ? PIXMAN_a8r8g8b8 : PIXMAN_a8b8g8r8;
+ mask_img = pixman_image_create_bits (mask_fmt,
+ src_width,
+ src_height,
+ srcbuf,
+ src_stride);
+ mask_x = src_x;
+ mask_y = src_y;
+ maskbuf = srcbuf;
+ }
+ else if (lcg_rand_n (2))
{
if (lcg_rand_n (2))
{
@@ -533,22 +366,11 @@ test_composite (uint32_t initcrc, int testnum, int verbose)
if (lcg_rand_n (2))
pixman_image_set_component_alpha (mask_img, 1);
- }
- src_width = pixman_image_get_width (src_img);
- src_height = pixman_image_get_height (src_img);
- src_stride = pixman_image_get_stride (src_img);
-
- dst_width = pixman_image_get_width (dst_img);
- dst_height = pixman_image_get_height (dst_img);
- dst_stride = pixman_image_get_stride (dst_img);
-
- dstbuf = pixman_image_get_data (dst_img);
+ mask_x = lcg_rand_n (pixman_image_get_width (mask_img));
+ mask_y = lcg_rand_n (pixman_image_get_height (mask_img));
+ }
- src_x = lcg_rand_n (src_width);
- src_y = lcg_rand_n (src_height);
- dst_x = lcg_rand_n (dst_width);
- dst_y = lcg_rand_n (dst_height);
w = lcg_rand_n (dst_width - dst_x + 1);
h = lcg_rand_n (dst_height - dst_y + 1);
@@ -567,7 +389,7 @@ test_composite (uint32_t initcrc, int testnum, int verbose)
}
pixman_image_composite (op, src_img, mask_img, dst_img,
- src_x, src_y, src_x, src_y, dst_x, dst_y, w, h);
+ src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h);
if (verbose)
{
@@ -592,11 +414,29 @@ test_composite (uint32_t initcrc, int testnum, int verbose)
crc32 = free_random_image (initcrc, dst_img, dst_fmt);
if (mask_img)
- free_random_image (initcrc, mask_img, -1);
+ {
+ if (srcbuf == maskbuf)
+ pixman_image_unref(mask_img);
+ else
+ free_random_image (initcrc, mask_img, -1);
+ }
+
return crc32;
}
+static void
+initialize_palette (void)
+{
+ int i;
+
+ for (i = 0; i < PIXMAN_MAX_INDEXED; ++i)
+ palette.rgba[i] = lcg_rand ();
+
+ for (i = 0; i < 32768; ++i)
+ palette.ent[i] = lcg_rand() & 0xff;
+}
+
int
main (int argc, char *argv[])
{
@@ -604,6 +444,8 @@ main (int argc, char *argv[])
uint32_t crc = 0;
int verbose = getenv ("VERBOSE") != NULL;
+ initialize_palette();
+
if (argc >= 3)
{
n1 = atoi (argv[1]);
@@ -640,7 +482,7 @@ main (int argc, char *argv[])
/* Predefined value for running with all the fastpath functions
disabled. It needs to be updated every time when changes are
introduced to this program or behavior of pixman changes! */
- if (crc == 0x06D8EDB6)
+ if (crc == 0xBBACC28D)
{
printf ("blitters test passed\n");
}
diff --git a/lib/pixman/test/clip-in.c b/lib/pixman/test/clip-in.c
index 55459b204..51579811f 100644
--- a/lib/pixman/test/clip-in.c
+++ b/lib/pixman/test/clip-in.c
@@ -2,7 +2,7 @@
#include <stdlib.h>
#include <string.h>
#include "pixman.h"
-#include "utils.h"
+#include "gtk-utils.h"
/* This test demonstrates that clipping is done totally different depending
* on whether the source is transformed or not.
diff --git a/lib/pixman/test/clip-test.c b/lib/pixman/test/clip-test.c
index 900013718..aa0df4482 100644
--- a/lib/pixman/test/clip-test.c
+++ b/lib/pixman/test/clip-test.c
@@ -1,7 +1,7 @@
#include <stdio.h>
#include <stdlib.h>
#include "pixman.h"
-#include "utils.h"
+#include "gtk-utils.h"
#define WIDTH 200
#define HEIGHT 200
@@ -31,9 +31,11 @@ main (int argc, char **argv)
{ pixman_int_to_fixed (0), { 0xffff, 0x0000, 0x0000, 0xffff } },
{ pixman_int_to_fixed (1), { 0xffff, 0xffff, 0x0000, 0xffff } }
};
+#if 0
pixman_point_fixed_t p1 = { 0, 0 };
pixman_point_fixed_t p2 = { pixman_int_to_fixed (WIDTH),
pixman_int_to_fixed (HEIGHT) };
+#endif
pixman_point_fixed_t c_inner;
pixman_point_fixed_t c_outer;
pixman_fixed_t r_inner;
diff --git a/lib/pixman/test/composite-test.c b/lib/pixman/test/composite-test.c
index 49e0220a4..5401abfdf 100644
--- a/lib/pixman/test/composite-test.c
+++ b/lib/pixman/test/composite-test.c
@@ -2,7 +2,7 @@
#include <stdlib.h>
#include <stdio.h>
#include "pixman.h"
-#include "utils.h"
+#include "gtk-utils.h"
#define WIDTH 60
#define HEIGHT 60
@@ -77,6 +77,9 @@ writer (void *src, uint32_t value, int size)
case 4:
*(uint32_t *)src = value;
break;
+
+ default:
+ break;
}
}
@@ -113,7 +116,7 @@ main (int argc, char **argv)
window = gtk_window_new (GTK_WINDOW_TOPLEVEL);
- gtk_window_set_default_size (window, 800, 600);
+ gtk_window_set_default_size (GTK_WINDOW (window), 800, 600);
g_signal_connect (window, "delete-event",
G_CALLBACK (gtk_main_quit),
diff --git a/lib/pixman/test/composite.c b/lib/pixman/test/composite.c
new file mode 100644
index 000000000..9e8c0fbd4
--- /dev/null
+++ b/lib/pixman/test/composite.c
@@ -0,0 +1,901 @@
+/*
+ * Copyright © 2005 Eric Anholt
+ * Copyright © 2009 Chris Wilson
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Eric Anholt not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. Eric Anholt makes no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * ERIC ANHOLT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL ERIC ANHOLT BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <pixman.h>
+#include <stdio.h>
+#include <stdlib.h> /* abort() */
+#include <math.h>
+#include <config.h>
+
+#define FALSE 0
+#define TRUE !FALSE
+
+#define ARRAY_LENGTH(A) ((int) (sizeof (A) / sizeof ((A) [0])))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+typedef struct color_t color_t;
+typedef struct format_t format_t;
+typedef struct image_t image_t;
+typedef struct operator_t operator_t;
+
+struct color_t
+{
+ double r, g, b, a;
+};
+
+struct format_t
+{
+ pixman_format_code_t format;
+ const char *name;
+};
+
+static color_t colors[] =
+{
+ /* these are premultiplied in main() */
+ { 1.0, 1.0, 1.0, 1.0 },
+ { 1.0, 0.0, 0.0, 1.0 },
+ { 0.0, 1.0, 0.0, 1.0 },
+ { 0.0, 0.0, 1.0, 1.0 },
+ { 0.0, 0.0, 0.0, 1.0 },
+ { 0.5, 0.0, 0.0, 0.5 },
+};
+
+static uint16_t
+_color_double_to_short (double d)
+{
+ uint32_t i;
+
+ i = (uint32_t) (d * 65536);
+ i -= (i >> 16);
+
+ return i;
+}
+
+static void
+compute_pixman_color (const color_t *color,
+ pixman_color_t *out)
+{
+ out->red = _color_double_to_short (color->r);
+ out->green = _color_double_to_short (color->g);
+ out->blue = _color_double_to_short (color->b);
+ out->alpha = _color_double_to_short (color->a);
+}
+
+static const format_t formats[] =
+{
+#define P(x) { PIXMAN_##x, #x }
+ P(a8),
+
+ /* 32bpp formats */
+ P(a8r8g8b8),
+ P(x8r8g8b8),
+ P(a8b8g8r8),
+ P(x8b8g8r8),
+ P(b8g8r8a8),
+ P(b8g8r8x8),
+
+ /* XXX: and here the errors begin! */
+#if 0
+ P(x2r10g10b10),
+ P(a2r10g10b10),
+ P(x2b10g10r10),
+ P(a2b10g10r10),
+
+ /* 24bpp formats */
+ P(r8g8b8),
+ P(b8g8r8),
+
+ /* 16bpp formats */
+ P(r5g6b5),
+ P(b5g6r5),
+
+ P(a1r5g5b5),
+ P(x1r5g5b5),
+ P(a1b5g5r5),
+ P(x1b5g5r5),
+ P(a4r4g4b4),
+ P(x4r4g4b4),
+ P(a4b4g4r4),
+ P(x4b4g4r4),
+
+ /* 8bpp formats */
+ P(a8),
+ P(r3g3b2),
+ P(b2g3r3),
+ P(a2r2g2b2),
+ P(a2b2g2r2),
+
+ P(x4a4),
+
+ /* 4bpp formats */
+ P(a4),
+ P(r1g2b1),
+ P(b1g2r1),
+ P(a1r1g1b1),
+ P(a1b1g1r1),
+
+ /* 1bpp formats */
+ P(a1)
+#endif
+#undef P
+};
+
+struct image_t
+{
+ pixman_image_t *image;
+ const format_t *format;
+ const color_t *color;
+ pixman_repeat_t repeat;
+ int size;
+};
+
+struct operator_t
+{
+ pixman_op_t op;
+ const char *name;
+};
+
+static const operator_t operators[] =
+{
+#define P(x) { PIXMAN_OP_##x, #x }
+ P(CLEAR),
+ P(SRC),
+ P(DST),
+ P(OVER),
+ P(OVER_REVERSE),
+ P(IN),
+ P(IN_REVERSE),
+ P(OUT),
+ P(OUT_REVERSE),
+ P(ATOP),
+ P(ATOP_REVERSE),
+ P(XOR),
+ P(ADD),
+ P(SATURATE),
+
+ P(DISJOINT_CLEAR),
+ P(DISJOINT_SRC),
+ P(DISJOINT_DST),
+ P(DISJOINT_OVER),
+ P(DISJOINT_OVER_REVERSE),
+ P(DISJOINT_IN),
+ P(DISJOINT_IN_REVERSE),
+ P(DISJOINT_OUT),
+ P(DISJOINT_OUT_REVERSE),
+ P(DISJOINT_ATOP),
+ P(DISJOINT_ATOP_REVERSE),
+ P(DISJOINT_XOR),
+
+ P(CONJOINT_CLEAR),
+ P(CONJOINT_SRC),
+ P(CONJOINT_DST),
+ P(CONJOINT_OVER),
+ P(CONJOINT_OVER_REVERSE),
+ P(CONJOINT_IN),
+ P(CONJOINT_IN_REVERSE),
+ P(CONJOINT_OUT),
+ P(CONJOINT_OUT_REVERSE),
+ P(CONJOINT_ATOP),
+ P(CONJOINT_ATOP_REVERSE),
+ P(CONJOINT_XOR),
+#undef P
+};
+
+static double
+calc_op (pixman_op_t op, double src, double dst, double srca, double dsta)
+{
+#define mult_chan(src, dst, Fa, Fb) min ((src) * (Fa) + (dst) * (Fb), 1.0)
+
+ double Fa, Fb;
+
+ switch (op)
+ {
+ case PIXMAN_OP_CLEAR:
+ case PIXMAN_OP_DISJOINT_CLEAR:
+ case PIXMAN_OP_CONJOINT_CLEAR:
+ return mult_chan (src, dst, 0.0, 0.0);
+
+ case PIXMAN_OP_SRC:
+ case PIXMAN_OP_DISJOINT_SRC:
+ case PIXMAN_OP_CONJOINT_SRC:
+ return mult_chan (src, dst, 1.0, 0.0);
+
+ case PIXMAN_OP_DST:
+ case PIXMAN_OP_DISJOINT_DST:
+ case PIXMAN_OP_CONJOINT_DST:
+ return mult_chan (src, dst, 0.0, 1.0);
+
+ case PIXMAN_OP_OVER:
+ return mult_chan (src, dst, 1.0, 1.0 - srca);
+
+ case PIXMAN_OP_OVER_REVERSE:
+ return mult_chan (src, dst, 1.0 - dsta, 1.0);
+
+ case PIXMAN_OP_IN:
+ return mult_chan (src, dst, dsta, 0.0);
+
+ case PIXMAN_OP_IN_REVERSE:
+ return mult_chan (src, dst, 0.0, srca);
+
+ case PIXMAN_OP_OUT:
+ return mult_chan (src, dst, 1.0 - dsta, 0.0);
+
+ case PIXMAN_OP_OUT_REVERSE:
+ return mult_chan (src, dst, 0.0, 1.0 - srca);
+
+ case PIXMAN_OP_ATOP:
+ return mult_chan (src, dst, dsta, 1.0 - srca);
+
+ case PIXMAN_OP_ATOP_REVERSE:
+ return mult_chan (src, dst, 1.0 - dsta, srca);
+
+ case PIXMAN_OP_XOR:
+ return mult_chan (src, dst, 1.0 - dsta, 1.0 - srca);
+
+ case PIXMAN_OP_ADD:
+ return mult_chan (src, dst, 1.0, 1.0);
+
+ case PIXMAN_OP_SATURATE:
+ case PIXMAN_OP_DISJOINT_OVER_REVERSE:
+ if (srca == 0.0)
+ Fa = 1.0;
+ else
+ Fa = min (1.0, (1.0 - dsta) / srca);
+ return mult_chan (src, dst, Fa, 1.0);
+
+ case PIXMAN_OP_DISJOINT_OVER:
+ if (dsta == 0.0)
+ Fb = 1.0;
+ else
+ Fb = min (1.0, (1.0 - srca) / dsta);
+ return mult_chan (src, dst, 1.0, Fb);
+
+ case PIXMAN_OP_DISJOINT_IN:
+ if (srca == 0.0)
+ Fa = 0.0;
+ else
+ Fa = max (0.0, 1.0 - (1.0 - dsta) / srca);
+ return mult_chan (src, dst, Fa, 0.0);
+
+ case PIXMAN_OP_DISJOINT_IN_REVERSE:
+ if (dsta == 0.0)
+ Fb = 0.0;
+ else
+ Fb = max (0.0, 1.0 - (1.0 - srca) / dsta);
+ return mult_chan (src, dst, 0.0, Fb);
+
+ case PIXMAN_OP_DISJOINT_OUT:
+ if (srca == 0.0)
+ Fa = 1.0;
+ else
+ Fa = min (1.0, (1.0 - dsta) / srca);
+ return mult_chan (src, dst, Fa, 0.0);
+
+ case PIXMAN_OP_DISJOINT_OUT_REVERSE:
+ if (dsta == 0.0)
+ Fb = 1.0;
+ else
+ Fb = min (1.0, (1.0 - srca) / dsta);
+ return mult_chan (src, dst, 0.0, Fb);
+
+ case PIXMAN_OP_DISJOINT_ATOP:
+ if (srca == 0.0)
+ Fa = 0.0;
+ else
+ Fa = max (0.0, 1.0 - (1.0 - dsta) / srca);
+ if (dsta == 0.0)
+ Fb = 1.0;
+ else
+ Fb = min (1.0, (1.0 - srca) / dsta);
+ return mult_chan (src, dst, Fa, Fb);
+
+ case PIXMAN_OP_DISJOINT_ATOP_REVERSE:
+ if (srca == 0.0)
+ Fa = 1.0;
+ else
+ Fa = min (1.0, (1.0 - dsta) / srca);
+ if (dsta == 0.0)
+ Fb = 0.0;
+ else
+ Fb = max (0.0, 1.0 - (1.0 - srca) / dsta);
+ return mult_chan (src, dst, Fa, Fb);
+
+ case PIXMAN_OP_DISJOINT_XOR:
+ if (srca == 0.0)
+ Fa = 1.0;
+ else
+ Fa = min (1.0, (1.0 - dsta) / srca);
+ if (dsta == 0.0)
+ Fb = 1.0;
+ else
+ Fb = min (1.0, (1.0 - srca) / dsta);
+ return mult_chan (src, dst, Fa, Fb);
+
+ case PIXMAN_OP_CONJOINT_OVER:
+ if (dsta == 0.0)
+ Fb = 0.0;
+ else
+ Fb = max (0.0, 1.0 - srca / dsta);
+ return mult_chan (src, dst, 1.0, Fb);
+
+ case PIXMAN_OP_CONJOINT_OVER_REVERSE:
+ if (srca == 0.0)
+ Fa = 0.0;
+ else
+ Fa = max (0.0, 1.0 - dsta / srca);
+ return mult_chan (src, dst, Fa, 1.0);
+
+ case PIXMAN_OP_CONJOINT_IN:
+ if (srca == 0.0)
+ Fa = 1.0;
+ else
+ Fa = min (1.0, dsta / srca);
+ return mult_chan (src, dst, Fa, 0.0);
+
+ case PIXMAN_OP_CONJOINT_IN_REVERSE:
+ if (dsta == 0.0)
+ Fb = 1.0;
+ else
+ Fb = min (1.0, srca / dsta);
+ return mult_chan (src, dst, 0.0, Fb);
+
+ case PIXMAN_OP_CONJOINT_OUT:
+ if (srca == 0.0)
+ Fa = 0.0;
+ else
+ Fa = max (0.0, 1.0 - dsta / srca);
+ return mult_chan (src, dst, Fa, 0.0);
+
+ case PIXMAN_OP_CONJOINT_OUT_REVERSE:
+ if (dsta == 0.0)
+ Fb = 0.0;
+ else
+ Fb = max (0.0, 1.0 - srca / dsta);
+ return mult_chan (src, dst, 0.0, Fb);
+
+ case PIXMAN_OP_CONJOINT_ATOP:
+ if (srca == 0.0)
+ Fa = 1.0;
+ else
+ Fa = min (1.0, dsta / srca);
+ if (dsta == 0.0)
+ Fb = 0.0;
+ else
+ Fb = max (0.0, 1.0 - srca / dsta);
+ return mult_chan (src, dst, Fa, Fb);
+
+ case PIXMAN_OP_CONJOINT_ATOP_REVERSE:
+ if (srca == 0.0)
+ Fa = 0.0;
+ else
+ Fa = max (0.0, 1.0 - dsta / srca);
+ if (dsta == 0.0)
+ Fb = 1.0;
+ else
+ Fb = min (1.0, srca / dsta);
+ return mult_chan (src, dst, Fa, Fb);
+
+ case PIXMAN_OP_CONJOINT_XOR:
+ if (srca == 0.0)
+ Fa = 0.0;
+ else
+ Fa = max (0.0, 1.0 - dsta / srca);
+ if (dsta == 0.0)
+ Fb = 0.0;
+ else
+ Fb = max (0.0, 1.0 - srca / dsta);
+ return mult_chan (src, dst, Fa, Fb);
+
+ case PIXMAN_OP_MULTIPLY:
+ case PIXMAN_OP_SCREEN:
+ case PIXMAN_OP_OVERLAY:
+ case PIXMAN_OP_DARKEN:
+ case PIXMAN_OP_LIGHTEN:
+ case PIXMAN_OP_COLOR_DODGE:
+ case PIXMAN_OP_COLOR_BURN:
+ case PIXMAN_OP_HARD_LIGHT:
+ case PIXMAN_OP_SOFT_LIGHT:
+ case PIXMAN_OP_DIFFERENCE:
+ case PIXMAN_OP_EXCLUSION:
+ case PIXMAN_OP_HSL_HUE:
+ case PIXMAN_OP_HSL_SATURATION:
+ case PIXMAN_OP_HSL_COLOR:
+ case PIXMAN_OP_HSL_LUMINOSITY:
+ default:
+ abort();
+ }
+#undef mult_chan
+}
+
+static void
+do_composite (pixman_op_t op,
+ const color_t *src,
+ const color_t *mask,
+ const color_t *dst,
+ color_t *result,
+ pixman_bool_t component_alpha)
+{
+ color_t srcval, srcalpha;
+
+ if (mask == NULL)
+ {
+ srcval = *src;
+
+ srcalpha.r = src->a;
+ srcalpha.g = src->a;
+ srcalpha.b = src->a;
+ srcalpha.a = src->a;
+ }
+ else if (component_alpha)
+ {
+ srcval.r = src->r * mask->r;
+ srcval.g = src->g * mask->g;
+ srcval.b = src->b * mask->b;
+ srcval.a = src->a * mask->a;
+
+ srcalpha.r = src->a * mask->r;
+ srcalpha.g = src->a * mask->g;
+ srcalpha.b = src->a * mask->b;
+ srcalpha.a = src->a * mask->a;
+ }
+ else
+ {
+ srcval.r = src->r * mask->a;
+ srcval.g = src->g * mask->a;
+ srcval.b = src->b * mask->a;
+ srcval.a = src->a * mask->a;
+
+ srcalpha.r = src->a * mask->a;
+ srcalpha.g = src->a * mask->a;
+ srcalpha.b = src->a * mask->a;
+ srcalpha.a = src->a * mask->a;
+ }
+
+ result->r = calc_op (op, srcval.r, dst->r, srcalpha.r, dst->a);
+ result->g = calc_op (op, srcval.g, dst->g, srcalpha.g, dst->a);
+ result->b = calc_op (op, srcval.b, dst->b, srcalpha.b, dst->a);
+ result->a = calc_op (op, srcval.a, dst->a, srcalpha.a, dst->a);
+}
+
+static void
+color_correct (pixman_format_code_t format,
+ color_t *color)
+{
+#define round_pix(pix, mask) \
+ ((int)((pix) * (mask) + .5) / (double) (mask))
+
+ if (PIXMAN_FORMAT_R (format) == 0)
+ {
+ color->r = 0.0;
+ color->g = 0.0;
+ color->b = 0.0;
+ }
+ else
+ {
+ color->r = round_pix (color->r, PIXMAN_FORMAT_R (format));
+ color->g = round_pix (color->g, PIXMAN_FORMAT_G (format));
+ color->b = round_pix (color->b, PIXMAN_FORMAT_B (format));
+ }
+
+ if (PIXMAN_FORMAT_A (format) == 0)
+ color->a = 1.0;
+ else
+ color->a = round_pix (color->a, PIXMAN_FORMAT_A (format));
+
+#undef round_pix
+}
+
+static void
+get_pixel (pixman_image_t *image,
+ pixman_format_code_t format,
+ color_t *color)
+{
+#define MASK(N) ((1UL << (N))-1)
+
+ unsigned long rs, gs, bs, as;
+ int a, r, g, b;
+ unsigned long val;
+
+ val = *(unsigned long *) pixman_image_get_data (image);
+#ifdef WORDS_BIGENDIAN
+ val >>= 8 * sizeof(val) - PIXMAN_FORMAT_BPP (format);
+#endif
+
+ /* Number of bits in each channel */
+ a = PIXMAN_FORMAT_A (format);
+ r = PIXMAN_FORMAT_R (format);
+ g = PIXMAN_FORMAT_G (format);
+ b = PIXMAN_FORMAT_B (format);
+
+ switch (PIXMAN_FORMAT_TYPE (format))
+ {
+ case PIXMAN_TYPE_ARGB:
+ bs = 0;
+ gs = b + bs;
+ rs = g + gs;
+ as = r + rs;
+ break;
+
+ case PIXMAN_TYPE_ABGR:
+ rs = 0;
+ gs = r + rs;
+ bs = g + gs;
+ as = b + bs;
+ break;
+
+ case PIXMAN_TYPE_BGRA:
+ as = 0;
+ rs = PIXMAN_FORMAT_BPP (format) - (b + g + r);
+ gs = r + rs;
+ bs = g + gs;
+ break;
+
+ case PIXMAN_TYPE_A:
+ as = 0;
+ rs = 0;
+ gs = 0;
+ bs = 0;
+ break;
+
+ case PIXMAN_TYPE_OTHER:
+ case PIXMAN_TYPE_COLOR:
+ case PIXMAN_TYPE_GRAY:
+ case PIXMAN_TYPE_YUY2:
+ case PIXMAN_TYPE_YV12:
+ default:
+ abort ();
+ as = 0;
+ rs = 0;
+ gs = 0;
+ bs = 0;
+ break;
+ }
+
+ if (MASK (a) != 0)
+ color->a = ((val >> as) & MASK (a)) / (double) MASK (a);
+ else
+ color->a = 1.0;
+
+ if (MASK (r) != 0)
+ {
+ color->r = ((val >> rs) & MASK (r)) / (double) MASK (r);
+ color->g = ((val >> gs) & MASK (g)) / (double) MASK (g);
+ color->b = ((val >> bs) & MASK (b)) / (double) MASK (b);
+ }
+ else
+ {
+ color->r = 0.0;
+ color->g = 0.0;
+ color->b = 0.0;
+ }
+
+#undef MASK
+}
+
+static double
+eval_diff (color_t *expected, color_t *test)
+{
+ double rscale, gscale, bscale, ascale;
+ double rdiff, gdiff, bdiff, adiff;
+
+ /* XXX: Need to be provided mask shifts so we can produce useful error
+ * values.
+ */
+ rscale = 1.0 * (1 << 5);
+ gscale = 1.0 * (1 << 6);
+ bscale = 1.0 * (1 << 5);
+ ascale = 1.0 * 32;
+
+ rdiff = fabs (test->r - expected->r) * rscale;
+ bdiff = fabs (test->g - expected->g) * gscale;
+ gdiff = fabs (test->b - expected->b) * bscale;
+ adiff = fabs (test->a - expected->a) * ascale;
+
+ return max (max (max (rdiff, gdiff), bdiff), adiff);
+}
+
+static char *
+describe_image (image_t *info, char *buf, int buflen)
+{
+ if (info->size)
+ {
+ snprintf (buf, buflen, "%s %dx%d%s",
+ info->format->name,
+ info->size, info->size,
+ info->repeat ? "R" :"");
+ }
+ else
+ {
+ snprintf (buf, buflen, "solid");
+ }
+
+ return buf;
+}
+
+/* Test a composite of a given operation, source, mask, and destination
+ * picture.
+ * Fills the window, and samples from the 0,0 pixel corner.
+ */
+static pixman_bool_t
+composite_test (image_t *dst,
+ const operator_t *op,
+ image_t *src,
+ image_t *mask,
+ pixman_bool_t component_alpha)
+{
+ pixman_color_t fill;
+ pixman_rectangle16_t rect;
+ color_t expected, result, tdst, tsrc, tmsk;
+ double diff;
+ pixman_bool_t success = TRUE;
+
+ compute_pixman_color (dst->color, &fill);
+ rect.x = rect.y = 0;
+ rect.width = rect.height = dst->size;
+ pixman_image_fill_rectangles (PIXMAN_OP_SRC, dst->image,
+ &fill, 1, &rect);
+
+ if (mask != NULL)
+ {
+ pixman_image_set_component_alpha (mask->image, component_alpha);
+ pixman_image_composite (op->op, src->image, mask->image, dst->image,
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ dst->size, dst->size);
+
+ tmsk = *mask->color;
+ if (mask->size)
+ {
+ color_correct (mask->format->format, &tmsk);
+
+ if (component_alpha &&
+ PIXMAN_FORMAT_R (mask->format->format) == 0)
+ {
+ /* Ax component-alpha masks expand alpha into
+ * all color channels.
+ */
+ tmsk.r = tmsk.g = tmsk.b = tmsk.a;
+ }
+ }
+ }
+ else
+ {
+ pixman_image_composite (op->op, src->image, NULL, dst->image,
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ dst->size, dst->size);
+ }
+ get_pixel (dst->image, dst->format->format, &result);
+
+ tdst = *dst->color;
+ color_correct (dst->format->format, &tdst);
+ tsrc = *src->color;
+ if (src->size)
+ color_correct (src->format->format, &tsrc);
+ do_composite (op->op, &tsrc, mask ? &tmsk : NULL, &tdst,
+ &expected, component_alpha);
+ color_correct (dst->format->format, &expected);
+
+ diff = eval_diff (&expected, &result);
+ if (diff > 3.0)
+ {
+ char buf[40];
+
+ snprintf (buf, sizeof (buf),
+ "%s %scomposite",
+ op->name,
+ component_alpha ? "CA " : "");
+
+ printf ("%s test error of %.4f --\n"
+ " R G B A\n"
+ "got: %.2f %.2f %.2f %.2f [%08lx]\n"
+ "expected: %.2f %.2f %.2f %.2f\n",
+ buf, diff,
+ result.r, result.g, result.b, result.a,
+ *(unsigned long *) pixman_image_get_data (dst->image),
+ expected.r, expected.g, expected.b, expected.a);
+
+ if (mask != NULL)
+ {
+ printf ("src color: %.2f %.2f %.2f %.2f\n"
+ "msk color: %.2f %.2f %.2f %.2f\n"
+ "dst color: %.2f %.2f %.2f %.2f\n",
+ src->color->r, src->color->g,
+ src->color->b, src->color->a,
+ mask->color->r, mask->color->g,
+ mask->color->b, mask->color->a,
+ dst->color->r, dst->color->g,
+ dst->color->b, dst->color->a);
+ printf ("src: %s, ", describe_image (src, buf, sizeof (buf)));
+ printf ("mask: %s, ", describe_image (mask, buf, sizeof (buf)));
+ printf ("dst: %s\n\n", describe_image (dst, buf, sizeof (buf)));
+ }
+ else
+ {
+ printf ("src color: %.2f %.2f %.2f %.2f\n"
+ "dst color: %.2f %.2f %.2f %.2f\n",
+ src->color->r, src->color->g,
+ src->color->b, src->color->a,
+ dst->color->r, dst->color->g,
+ dst->color->b, dst->color->a);
+ printf ("src: %s, ", describe_image (src, buf, sizeof (buf)));
+ printf ("dst: %s\n\n", describe_image (dst, buf, sizeof (buf)));
+ }
+
+ success = FALSE;
+ }
+
+ return success;
+}
+
+#define REPEAT 0x01000000
+#define FLAGS 0xff000000
+
+static void
+image_init (image_t *info,
+ int color,
+ int format,
+ int size)
+{
+ pixman_color_t fill;
+
+ info->color = &colors[color];
+ compute_pixman_color (info->color, &fill);
+
+ info->format = &formats[format];
+ info->size = size & ~FLAGS;
+ info->repeat = PIXMAN_REPEAT_NONE;
+
+ if (info->size)
+ {
+ pixman_rectangle16_t rect;
+
+ info->image = pixman_image_create_bits (info->format->format,
+ info->size, info->size,
+ NULL, 0);
+
+ rect.x = rect.y = 0;
+ rect.width = rect.height = info->size;
+ pixman_image_fill_rectangles (PIXMAN_OP_SRC, info->image, &fill,
+ 1, &rect);
+
+ if (size & REPEAT)
+ {
+ pixman_image_set_repeat (info->image, PIXMAN_REPEAT_NORMAL);
+ info->repeat = PIXMAN_REPEAT_NORMAL;
+ }
+ }
+ else
+ {
+ info->image = pixman_image_create_solid_fill (&fill);
+ }
+}
+
+static void
+image_fini (image_t *info)
+{
+ pixman_image_unref (info->image);
+}
+
+int
+main (void)
+{
+ pixman_bool_t ok, group_ok = TRUE, ca;
+ int i, d, m, s;
+ int tests_passed = 0, tests_total = 0;
+ int sizes[] = { 1, 1 | REPEAT, 10 };
+ int num_tests;
+
+ for (i = 0; i < ARRAY_LENGTH (colors); i++)
+ {
+ colors[i].r *= colors[i].a;
+ colors[i].g *= colors[i].a;
+ colors[i].b *= colors[i].a;
+ }
+
+ num_tests = ARRAY_LENGTH (colors) * ARRAY_LENGTH (formats);
+
+ for (d = 0; d < num_tests; d++)
+ {
+ image_t dst;
+
+ image_init (
+ &dst, d / ARRAY_LENGTH (formats), d % ARRAY_LENGTH (formats), 1);
+
+
+ for (s = -ARRAY_LENGTH (colors);
+ s < ARRAY_LENGTH (sizes) * num_tests;
+ s++)
+ {
+ image_t src;
+
+ if (s < 0)
+ {
+ image_init (&src, -s - 1, 0, 0);
+ }
+ else
+ {
+ image_init (&src,
+ s / ARRAY_LENGTH (sizes) / ARRAY_LENGTH (formats),
+ s / ARRAY_LENGTH (sizes) % ARRAY_LENGTH (formats),
+ sizes[s % ARRAY_LENGTH (sizes)]);
+ }
+
+ for (m = -ARRAY_LENGTH (colors);
+ m < ARRAY_LENGTH (sizes) * num_tests;
+ m++)
+ {
+ image_t mask;
+
+ if (m < 0)
+ {
+ image_init (&mask, -m - 1, 0, 0);
+ }
+ else
+ {
+ image_init (
+ &mask,
+ m / ARRAY_LENGTH (sizes) / ARRAY_LENGTH (formats),
+ m / ARRAY_LENGTH (sizes) % ARRAY_LENGTH (formats),
+ sizes[m % ARRAY_LENGTH (sizes)]);
+ }
+
+ for (ca = -1; ca <= 1; ca++)
+ {
+ for (i = 0; i < ARRAY_LENGTH (operators); i++)
+ {
+ const operator_t *op = &operators[i];
+
+ switch (ca)
+ {
+ case -1:
+ ok = composite_test (&dst, op, &src, NULL, FALSE);
+ break;
+ case 0:
+ ok = composite_test (&dst, op, &src, &mask, FALSE);
+ break;
+ case 1:
+ ok = composite_test (&dst, op, &src, &mask,
+ mask.size? TRUE : FALSE);
+ break;
+ default:
+ break;
+ }
+ group_ok = group_ok && ok;
+ tests_passed += ok;
+ tests_total++;
+ }
+ }
+
+ image_fini (&mask);
+ }
+ image_fini (&src);
+ }
+ image_fini (&dst);
+ }
+
+ return group_ok == FALSE;
+}
diff --git a/lib/pixman/test/convolution-test.c b/lib/pixman/test/convolution-test.c
index 8609d38a0..da284af7b 100644
--- a/lib/pixman/test/convolution-test.c
+++ b/lib/pixman/test/convolution-test.c
@@ -1,7 +1,7 @@
#include <stdio.h>
#include <stdlib.h>
#include "pixman.h"
-#include "utils.h"
+#include "gtk-utils.h"
int
main (int argc, char **argv)
diff --git a/lib/pixman/test/fetch-test.c b/lib/pixman/test/fetch-test.c
index 6306a4c42..2ca16ddbf 100644
--- a/lib/pixman/test/fetch-test.c
+++ b/lib/pixman/test/fetch-test.c
@@ -6,7 +6,8 @@
#define SIZE 1024
-pixman_indexed_t mono_pallete = {
+static pixman_indexed_t mono_palette =
+{
.rgba = { 0x00000000, 0x00ffffff },
};
@@ -20,14 +21,15 @@ typedef struct {
pixman_indexed_t *indexed;
} testcase_t;
-testcase_t testcases[] = {
+static testcase_t testcases[] =
+{
{
.format = PIXMAN_a8r8g8b8,
.width = 2, .height = 2,
.stride = 8,
- .src = { 0x00112233, 0x44556677,
+ .src = { 0x00112233, 0x44556677,
0x8899aabb, 0xccddeeff },
- .dst = { 0x00112233, 0x44556677,
+ .dst = { 0x00112233, 0x44556677,
0x8899aabb, 0xccddeeff },
.indexed = NULL,
},
@@ -36,24 +38,33 @@ testcase_t testcases[] = {
.width = 8, .height = 2,
.stride = 4,
#ifdef WORDS_BIGENDIAN
- .src = { 0xaa000000,
- 0x55000000 },
+ .src =
+ {
+ 0xaa000000,
+ 0x55000000
+ },
#else
- .src = { 0x00000055,
- 0x000000aa },
+ .src =
+ {
+ 0x00000055,
+ 0x000000aa
+ },
#endif
- .dst = { 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000,
- 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff },
- .indexed = &mono_pallete,
+ .dst =
+ {
+ 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000,
+ 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff
+ },
+ .indexed = &mono_palette,
},
#if 0
{
.format = PIXMAN_g8,
.width = 4, .height = 2,
.stride = 4,
- .src = { 0x01234567,
+ .src = { 0x01234567,
0x89abcdef },
- .dst = { 0x00010101, 0x00232323, 0x00454545, 0x00676767,
+ .dst = { 0x00010101, 0x00232323, 0x00454545, 0x00676767,
0x00898989, 0x00ababab, 0x00cdcdcd, 0x00efefef, },
},
#endif
@@ -63,28 +74,33 @@ testcase_t testcases[] = {
.width = 8, .height = 2,
.stride = 8,
#ifdef WORDS_BIGENDIAN
- .src = { 0x00ff00ff, 0x00ff00ff,
- 0xff00ff00, 0xff00ff00,
- 0x80ff8000,
- 0x800080ff
+ .src =
+ {
+ 0x00ff00ff, 0x00ff00ff,
+ 0xff00ff00, 0xff00ff00,
+ 0x80ff8000,
+ 0x800080ff
},
#else
- .src = { 0xff00ff00, 0xff00ff00,
- 0x00ff00ff, 0x00ff00ff,
- 0x0080ff80,
- 0xff800080
- },
+ .src =
+ {
+ 0xff00ff00, 0xff00ff00,
+ 0x00ff00ff, 0x00ff00ff,
+ 0x0080ff80,
+ 0xff800080
+ },
#endif
- .dst = {
- 0xff000000, 0xffffffff, 0xffb80000, 0xffffe113,
- 0xff000000, 0xffffffff, 0xff0023ee, 0xff4affff,
- 0xffffffff, 0xff000000, 0xffffe113, 0xffb80000,
- 0xffffffff, 0xff000000, 0xff4affff, 0xff0023ee,
+ .dst =
+ {
+ 0xff000000, 0xffffffff, 0xffb80000, 0xffffe113,
+ 0xff000000, 0xffffffff, 0xff0023ee, 0xff4affff,
+ 0xffffffff, 0xff000000, 0xffffe113, 0xffb80000,
+ 0xffffffff, 0xff000000, 0xff4affff, 0xff0023ee,
},
},
};
-const int ntestcases = sizeof(testcases)/sizeof(testcases[0]);
+int n_test_cases = sizeof(testcases)/sizeof(testcases[0]);
static uint32_t
@@ -133,26 +149,29 @@ main (int argc, char **argv)
int i, j, x, y;
int ret = 0;
- for (i = 0; i < ntestcases; ++i) {
- for (j = 0; j < 2; ++j) {
+ for (i = 0; i < n_test_cases; ++i)
+ {
+ for (j = 0; j < 2; ++j)
+ {
src_img = pixman_image_create_bits (testcases[i].format,
- testcases[i].width,
+ testcases[i].width,
testcases[i].height,
testcases[i].src,
testcases[i].stride);
pixman_image_set_indexed(src_img, testcases[i].indexed);
dst_img = pixman_image_create_bits (PIXMAN_a8r8g8b8,
- testcases[i].width,
+ testcases[i].width,
testcases[i].height,
dst,
testcases[i].width*4);
- if (j) {
+ if (j)
+ {
pixman_image_set_accessors (src_img, reader, writer);
pixman_image_set_accessors (dst_img, reader, writer);
}
-
+
pixman_image_composite (PIXMAN_OP_SRC, src_img, NULL, dst_img,
0, 0, 0, 0, 0, 0, testcases[i].width, testcases[i].height);
@@ -160,18 +179,23 @@ main (int argc, char **argv)
pixman_image_unref (dst_img);
for (y = 0; y < testcases[i].height; ++y)
- for (x = 0; x < testcases[i].width; ++x) {
- int offset = y*testcases[i].width + x;
- if (dst[offset] != testcases[i].dst[offset]) {
+ {
+ for (x = 0; x < testcases[i].width; ++x)
+ {
+ int offset = y * testcases[i].width + x;
+
+ if (dst[offset] != testcases[i].dst[offset])
+ {
printf ("test %i%c: pixel mismatch at (x=%d,y=%d): %08x expected, %08x obtained\n",
i + 1, 'a' + j,
- x, y,
+ x, y,
testcases[i].dst[offset], dst[offset]);
ret = 1;
}
}
+ }
}
}
-
+
return ret;
}
diff --git a/lib/pixman/test/gradient-test.c b/lib/pixman/test/gradient-test.c
index 2593ee38a..fc84844b0 100644
--- a/lib/pixman/test/gradient-test.c
+++ b/lib/pixman/test/gradient-test.c
@@ -1,7 +1,7 @@
#include <stdio.h>
#include <stdlib.h>
#include "pixman.h"
-#include "utils.h"
+#include "gtk-utils.h"
int
main (int argc, char **argv)
@@ -21,18 +21,20 @@ main (int argc, char **argv)
pixman_point_fixed_t p1 = { pixman_double_to_fixed (0), 0 };
pixman_point_fixed_t p2 = { pixman_double_to_fixed (WIDTH / 8.),
pixman_int_to_fixed (0) };
+#if 0
pixman_transform_t trans = {
{ { pixman_double_to_fixed (2), pixman_double_to_fixed (0.5), pixman_double_to_fixed (-100), },
{ pixman_double_to_fixed (0), pixman_double_to_fixed (3), pixman_double_to_fixed (0), },
{ pixman_double_to_fixed (0), pixman_double_to_fixed (0.000), pixman_double_to_fixed (1.0) }
}
};
-
- pixman_transform_t id = {
+#else
+ pixman_transform_t trans = {
{ { pixman_fixed_1, 0, 0 },
{ 0, pixman_fixed_1, 0 },
{ 0, 0, pixman_fixed_1 } }
};
+#endif
pixman_point_fixed_t c_inner;
pixman_point_fixed_t c_outer;
@@ -67,7 +69,7 @@ main (int argc, char **argv)
src_img = pixman_image_create_linear_gradient (&p1, &p2,
stops, 2);
- pixman_image_set_transform (src_img, &id);
+ pixman_image_set_transform (src_img, &trans);
pixman_image_set_repeat (src_img, PIXMAN_REPEAT_PAD);
pixman_image_composite (PIXMAN_OP_OVER, src_img, NULL, dest_img,
diff --git a/lib/pixman/test/gtk-utils.c b/lib/pixman/test/gtk-utils.c
new file mode 100644
index 000000000..751a164c0
--- /dev/null
+++ b/lib/pixman/test/gtk-utils.c
@@ -0,0 +1,113 @@
+#include <gtk/gtk.h>
+#include <config.h>
+#include "pixman-private.h" /* For image->bits.format
+ * FIXME: there should probably be public API for this
+ */
+#include "gtk-utils.h"
+
+GdkPixbuf *
+pixbuf_from_argb32 (uint32_t *bits,
+ gboolean has_alpha,
+ int width,
+ int height,
+ int stride)
+{
+ GdkPixbuf *pixbuf = gdk_pixbuf_new (GDK_COLORSPACE_RGB, TRUE,
+ 8, width, height);
+ int p_stride = gdk_pixbuf_get_rowstride (pixbuf);
+ guint32 *p_bits = (guint32 *)gdk_pixbuf_get_pixels (pixbuf);
+ int w, h;
+
+ for (h = 0; h < height; ++h)
+ {
+ for (w = 0; w < width; ++w)
+ {
+ uint32_t argb = bits[h * (stride / 4) + w];
+ guint r, g, b, a;
+ char *pb = (char *)p_bits;
+
+ pb += h * p_stride + w * 4;
+
+ r = (argb & 0x00ff0000) >> 16;
+ g = (argb & 0x0000ff00) >> 8;
+ b = (argb & 0x000000ff) >> 0;
+ a = has_alpha? (argb & 0xff000000) >> 24 : 0xff;
+
+ if (a)
+ {
+ r = (r * 255) / a;
+ g = (g * 255) / a;
+ b = (b * 255) / a;
+ }
+
+ if (r > 255) r = 255;
+ if (g > 255) g = 255;
+ if (b > 255) b = 255;
+
+ pb[0] = r;
+ pb[1] = g;
+ pb[2] = b;
+ pb[3] = a;
+ }
+ }
+
+ return pixbuf;
+}
+
+
+static gboolean
+on_expose (GtkWidget *widget, GdkEventExpose *expose, gpointer data)
+{
+ GdkPixbuf *pixbuf = data;
+
+ gdk_draw_pixbuf (widget->window, NULL,
+ pixbuf, 0, 0, 0, 0,
+ gdk_pixbuf_get_width (pixbuf),
+ gdk_pixbuf_get_height (pixbuf),
+ GDK_RGB_DITHER_NONE,
+ 0, 0);
+
+ return TRUE;
+}
+
+void
+show_image (pixman_image_t *image)
+{
+ GtkWidget *window;
+ GdkPixbuf *pixbuf;
+ int width, height, stride;
+ int argc;
+ char **argv;
+ char *arg0 = g_strdup ("pixman-test-program");
+ gboolean has_alpha;
+ pixman_format_code_t format;
+
+ argc = 1;
+ argv = (char **)&arg0;
+
+ gtk_init (&argc, &argv);
+
+ window = gtk_window_new (GTK_WINDOW_TOPLEVEL);
+ width = pixman_image_get_width (image);
+ height = pixman_image_get_height (image);
+ stride = pixman_image_get_stride (image);
+
+ format = image->bits.format;
+
+ if (format == PIXMAN_a8r8g8b8)
+ has_alpha = TRUE;
+ else if (format == PIXMAN_x8r8g8b8)
+ has_alpha = FALSE;
+ else
+ g_error ("Can't deal with this format: %x\n", format);
+
+ pixbuf = pixbuf_from_argb32 (pixman_image_get_data (image), has_alpha,
+ width, height, stride);
+
+ g_signal_connect (window, "expose_event", G_CALLBACK (on_expose), pixbuf);
+ g_signal_connect (window, "delete_event", G_CALLBACK (gtk_main_quit), NULL);
+
+ gtk_widget_show (window);
+
+ gtk_main ();
+}
diff --git a/lib/pixman/test/gtk-utils.h b/lib/pixman/test/gtk-utils.h
new file mode 100644
index 000000000..2cb13bcf0
--- /dev/null
+++ b/lib/pixman/test/gtk-utils.h
@@ -0,0 +1,13 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <glib.h>
+#include <gtk/gtk.h>
+#include "pixman.h"
+
+void show_image (pixman_image_t *image);
+
+GdkPixbuf *pixbuf_from_argb32 (uint32_t *bits,
+ gboolean has_alpha,
+ int width,
+ int height,
+ int stride);
diff --git a/lib/pixman/test/region-test.c b/lib/pixman/test/region-test.c
index 3568969f1..9d5a41eb9 100644
--- a/lib/pixman/test/region-test.c
+++ b/lib/pixman/test/region-test.c
@@ -1,7 +1,7 @@
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
-#include "pixman.h"
+#include "utils.h"
int
main ()
@@ -22,8 +22,15 @@ main ()
{ 2, 6, 7, 6 },
{ 4, 1, 6, 1 },
};
- int i;
+ int i, j;
pixman_box32_t *b;
+ pixman_image_t *image, *fill;
+ pixman_color_t white = {
+ 0xffff,
+ 0xffff,
+ 0xffff,
+ 0xffff
+ };
/* This used to go into an infinite loop before pixman-region.c
* was fixed to not use explict "short" variables
@@ -74,5 +81,43 @@ main ()
assert (i == 0);
+ fill = pixman_image_create_solid_fill (&white);
+ for (i = 0; i < 100; i++)
+ {
+ int image_size = 128;
+
+ pixman_region32_init (&r1);
+
+ /* Add some random rectangles */
+ for (j = 0; j < 64; j++)
+ pixman_region32_union_rect (&r1, &r1,
+ lcg_rand_n (image_size),
+ lcg_rand_n (image_size),
+ lcg_rand_n (25),
+ lcg_rand_n (25));
+
+ /* Clip to image size */
+ pixman_region32_init_rect (&r2, 0, 0, image_size, image_size);
+ pixman_region32_intersect (&r1, &r1, &r2);
+ pixman_region32_fini (&r2);
+
+ /* render region to a1 mask */
+ image = pixman_image_create_bits (PIXMAN_a1, image_size, image_size, NULL, 0);
+ pixman_image_set_clip_region32 (image, &r1);
+ pixman_image_composite32 (PIXMAN_OP_SRC,
+ fill, NULL, image,
+ 0, 0, 0, 0, 0, 0,
+ image_size, image_size);
+ pixman_region32_init_from_image (&r2, image);
+
+ pixman_image_unref (image);
+
+ assert (pixman_region32_equal (&r1, &r2));
+ pixman_region32_fini (&r1);
+ pixman_region32_fini (&r2);
+
+ }
+ pixman_image_unref (fill);
+
return 0;
}
diff --git a/lib/pixman/test/scaling-test.c b/lib/pixman/test/scaling-test.c
index 8899c594f..29772906d 100644
--- a/lib/pixman/test/scaling-test.c
+++ b/lib/pixman/test/scaling-test.c
@@ -23,198 +23,7 @@
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
-#include "pixman.h"
-
-/* A primitive pseudorandom number generator, taken from POSIX.1-2001 example */
-
-static uint32_t lcg_seed;
-
-uint32_t
-lcg_rand (void)
-{
- lcg_seed = lcg_seed * 1103515245 + 12345;
- return ((uint32_t)(lcg_seed / 65536) % 32768);
-}
-
-void
-lcg_srand (uint32_t seed)
-{
- lcg_seed = seed;
-}
-
-uint32_t
-lcg_rand_n (int max)
-{
- return lcg_rand () % max;
-}
-
-/*----------------------------------------------------------------------------*\
-* CRC-32 version 2.0.0 by Craig Bruce, 2006-04-29.
-*
-* This program generates the CRC-32 values for the files named in the
-* command-line arguments. These are the same CRC-32 values used by GZIP,
-* PKZIP, and ZMODEM. The compute_crc32() can also be detached and
-* used independently.
-*
-* THIS PROGRAM IS PUBLIC-DOMAIN SOFTWARE.
-*
-* Based on the byte-oriented implementation "File Verification Using CRC"
-* by Mark R. Nelson in Dr. Dobb's Journal, May 1992, pp. 64-67.
-*
-* v1.0.0: original release.
-* v1.0.1: fixed printf formats.
-* v1.0.2: fixed something else.
-* v1.0.3: replaced CRC constant table by generator function.
-* v1.0.4: reformatted code, made ANSI C. 1994-12-05.
-* v2.0.0: rewrote to use memory buffer & static table, 2006-04-29.
-\*----------------------------------------------------------------------------*/
-
-/*----------------------------------------------------------------------------*\
-* NAME:
-* compute_crc32() - computes the CRC-32 value of a memory buffer
-* DESCRIPTION:
-* Computes or accumulates the CRC-32 value for a memory buffer.
-* The 'in_crc32' gives a previously accumulated CRC-32 value to allow
-* a CRC to be generated for multiple sequential buffer-fuls of data.
-* The 'in_crc32' for the first buffer must be zero.
-* ARGUMENTS:
-* in_crc32 - accumulated CRC-32 value, must be 0 on first call
-* buf - buffer to compute CRC-32 value for
-* buf_len - number of bytes in buffer
-* RETURNS:
-* crc32 - computed CRC-32 value
-* ERRORS:
-* (no errors are possible)
-\*----------------------------------------------------------------------------*/
-
-static uint32_t
-compute_crc32 (uint32_t in_crc32,
- const void *buf,
- size_t buf_len)
-{
- static const uint32_t crc_table[256] = {
- 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F,
- 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
- 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2,
- 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
- 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9,
- 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
- 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C,
- 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
- 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423,
- 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
- 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106,
- 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
- 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D,
- 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
- 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950,
- 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
- 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7,
- 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
- 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA,
- 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
- 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81,
- 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,
- 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84,
- 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
- 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB,
- 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
- 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E,
- 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
- 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55,
- 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
- 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28,
- 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
- 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F,
- 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,
- 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242,
- 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
- 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69,
- 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
- 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC,
- 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
- 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693,
- 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
- 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
- };
-
- uint32_t crc32;
- unsigned char * byte_buf;
- size_t i;
-
- /** accumulate crc32 for buffer **/
- crc32 = in_crc32 ^ 0xFFFFFFFF;
- byte_buf = (unsigned char*) buf;
-
- for (i = 0; i < buf_len; i++)
- crc32 = (crc32 >> 8) ^ crc_table[(crc32 ^ byte_buf[i]) & 0xFF];
-
- return (crc32 ^ 0xFFFFFFFF);
-}
-
-/* perform endian conversion of pixel data */
-static void
-image_endian_swap (pixman_image_t *img,
- int bpp)
-{
- int stride = pixman_image_get_stride (img);
- uint32_t *data = pixman_image_get_data (img);
- int height = pixman_image_get_height (img);
- int i, j;
-
- /* swap bytes only on big endian systems */
- volatile uint16_t endian_check_var = 0x1234;
- if (*(volatile uint8_t *)&endian_check_var != 0x12)
- return;
-
- for (i = 0; i < height; i++)
- {
- char *line_data = (char *)data + stride * i;
-
- /* swap bytes only for 16, 24 and 32 bpp for now */
- switch (bpp)
- {
- case 16:
- for (j = 0; j + 2 <= stride; j += 2)
- {
- char t1 = line_data[j + 0];
- char t2 = line_data[j + 1];
- line_data[j + 1] = t1;
- line_data[j + 0] = t2;
- }
- break;
-
- case 24:
- for (j = 0; j + 3 <= stride; j += 3)
- {
- char t1 = line_data[j + 0];
- char t2 = line_data[j + 1];
- char t3 = line_data[j + 2];
- line_data[j + 2] = t1;
- line_data[j + 1] = t2;
- line_data[j + 0] = t3;
- }
- break;
-
- case 32:
- for (j = 0; j + 4 <= stride; j += 4)
- {
- char t1 = line_data[j + 0];
- char t2 = line_data[j + 1];
- char t3 = line_data[j + 2];
- char t4 = line_data[j + 3];
- line_data[j + 3] = t1;
- line_data[j + 2] = t2;
- line_data[j + 1] = t3;
- line_data[j + 0] = t4;
- }
- break;
-
- default:
- break;
- }
- }
-}
+#include "utils.h"
#define MAX_SRC_WIDTH 10
#define MAX_SRC_HEIGHT 10
@@ -266,7 +75,7 @@ test_composite (uint32_t initcrc,
if (src_stride & 3)
src_stride += 2;
-
+
if (dst_stride & 3)
dst_stride += 2;
@@ -326,9 +135,17 @@ test_composite (uint32_t initcrc,
case 3:
repeat = PIXMAN_REPEAT_REFLECT;
break;
+
+ default:
+ break;
}
pixman_image_set_repeat (src_img, repeat);
+ if (lcg_rand_n (2))
+ pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0);
+ else
+ pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
+
if (verbose)
{
printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt);
@@ -362,7 +179,7 @@ test_composite (uint32_t initcrc,
clip_boxes[i].x2, clip_boxes[i].y2);
}
}
-
+
pixman_region_init_rects (&clip, clip_boxes, n);
pixman_image_set_clip_region (src_img, &clip);
pixman_image_set_source_clipping (src_img, 1);
@@ -458,7 +275,7 @@ main (int argc, char *argv[])
/* predefined value for running with all the fastpath functions disabled */
/* it needs to be updated every time changes are introduced to this program! */
- if (crc == 0x0B633CF4)
+ if (crc == 0x2168ACD1)
{
printf ("scaling test passed\n");
}
diff --git a/lib/pixman/test/screen-test.c b/lib/pixman/test/screen-test.c
index 5e02eee08..e69dba3de 100644
--- a/lib/pixman/test/screen-test.c
+++ b/lib/pixman/test/screen-test.c
@@ -1,7 +1,7 @@
#include <stdio.h>
#include <stdlib.h>
#include "pixman.h"
-#include "utils.h"
+#include "gtk-utils.h"
int
main (int argc, char **argv)
diff --git a/lib/pixman/test/trap-test.c b/lib/pixman/test/trap-test.c
index 1da439bd6..19295e7a5 100644
--- a/lib/pixman/test/trap-test.c
+++ b/lib/pixman/test/trap-test.c
@@ -2,7 +2,7 @@
#include <stdlib.h>
#include <string.h>
#include "pixman.h"
-#include "utils.h"
+#include "gtk-utils.h"
int
main (int argc, char **argv)
diff --git a/lib/pixman/test/utils.c b/lib/pixman/test/utils.c
index a609315c5..58cd100e2 100644
--- a/lib/pixman/test/utils.c
+++ b/lib/pixman/test/utils.c
@@ -1,113 +1,208 @@
-#include <gtk/gtk.h>
-#include <config.h>
-#include "pixman-private.h" /* For image->bits.format
- * FIXME: there should probably be public API for this
- */
#include "utils.h"
-GdkPixbuf *
-pixbuf_from_argb32 (uint32_t *bits,
- gboolean has_alpha,
- int width,
- int height,
- int stride)
+/* Random number seed
+ */
+
+uint32_t lcg_seed;
+
+/*----------------------------------------------------------------------------*\
+ * CRC-32 version 2.0.0 by Craig Bruce, 2006-04-29.
+ *
+ * This program generates the CRC-32 values for the files named in the
+ * command-line arguments. These are the same CRC-32 values used by GZIP,
+ * PKZIP, and ZMODEM. The Crc32_ComputeBuf () can also be detached and
+ * used independently.
+ *
+ * THIS PROGRAM IS PUBLIC-DOMAIN SOFTWARE.
+ *
+ * Based on the byte-oriented implementation "File Verification Using CRC"
+ * by Mark R. Nelson in Dr. Dobb's Journal, May 1992, pp. 64-67.
+ *
+ * v1.0.0: original release.
+ * v1.0.1: fixed printf formats.
+ * v1.0.2: fixed something else.
+ * v1.0.3: replaced CRC constant table by generator function.
+ * v1.0.4: reformatted code, made ANSI C. 1994-12-05.
+ * v2.0.0: rewrote to use memory buffer & static table, 2006-04-29.
+\*----------------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------------*\
+ * NAME:
+ * Crc32_ComputeBuf () - computes the CRC-32 value of a memory buffer
+ * DESCRIPTION:
+ * Computes or accumulates the CRC-32 value for a memory buffer.
+ * The 'inCrc32' gives a previously accumulated CRC-32 value to allow
+ * a CRC to be generated for multiple sequential buffer-fuls of data.
+ * The 'inCrc32' for the first buffer must be zero.
+ * ARGUMENTS:
+ * inCrc32 - accumulated CRC-32 value, must be 0 on first call
+ * buf - buffer to compute CRC-32 value for
+ * bufLen - number of bytes in buffer
+ * RETURNS:
+ * crc32 - computed CRC-32 value
+ * ERRORS:
+ * (no errors are possible)
+\*----------------------------------------------------------------------------*/
+
+uint32_t
+compute_crc32 (uint32_t in_crc32,
+ const void *buf,
+ size_t buf_len)
{
- GdkPixbuf *pixbuf = gdk_pixbuf_new (GDK_COLORSPACE_RGB, TRUE,
- 8, width, height);
- int p_stride = gdk_pixbuf_get_rowstride (pixbuf);
- guint32 *p_bits = (guint32 *)gdk_pixbuf_get_pixels (pixbuf);
- int w, h;
-
- for (h = 0; h < height; ++h)
- {
- for (w = 0; w < width; ++w)
- {
- uint32_t argb = bits[h * (stride / 4) + w];
- guint r, g, b, a;
- char *pb = (char *)p_bits;
+ static const uint32_t crc_table[256] = {
+ 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F,
+ 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
+ 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2,
+ 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
+ 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9,
+ 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
+ 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C,
+ 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
+ 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423,
+ 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
+ 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106,
+ 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
+ 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D,
+ 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
+ 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950,
+ 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
+ 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7,
+ 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
+ 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA,
+ 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
+ 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81,
+ 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,
+ 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84,
+ 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
+ 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB,
+ 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
+ 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E,
+ 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
+ 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55,
+ 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
+ 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28,
+ 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
+ 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F,
+ 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,
+ 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242,
+ 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
+ 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69,
+ 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
+ 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC,
+ 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
+ 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693,
+ 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
+ 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
+ };
+
+ uint32_t crc32;
+ unsigned char * byte_buf;
+ size_t i;
+
+ /* accumulate crc32 for buffer */
+ crc32 = in_crc32 ^ 0xFFFFFFFF;
+ byte_buf = (unsigned char*) buf;
- pb += h * p_stride + w * 4;
+ for (i = 0; i < buf_len; i++)
+ crc32 = (crc32 >> 8) ^ crc_table[(crc32 ^ byte_buf[i]) & 0xFF];
- r = (argb & 0x00ff0000) >> 16;
- g = (argb & 0x0000ff00) >> 8;
- b = (argb & 0x000000ff) >> 0;
- a = has_alpha? (argb & 0xff000000) >> 24 : 0xff;
+ return (crc32 ^ 0xFFFFFFFF);
+}
+
+/* perform endian conversion of pixel data
+ */
+void
+image_endian_swap (pixman_image_t *img, int bpp)
+{
+ int stride = pixman_image_get_stride (img);
+ uint32_t *data = pixman_image_get_data (img);
+ int height = pixman_image_get_height (img);
+ int i, j;
- if (a)
+ /* swap bytes only on big endian systems */
+ volatile uint16_t endian_check_var = 0x1234;
+ if (*(volatile uint8_t *)&endian_check_var != 0x12)
+ return;
+
+ for (i = 0; i < height; i++)
+ {
+ uint8_t *line_data = (uint8_t *)data + stride * i;
+ /* swap bytes only for 16, 24 and 32 bpp for now */
+ switch (bpp)
+ {
+ case 1:
+ for (j = 0; j < stride; j++)
{
- r = (r * 255) / a;
- g = (g * 255) / a;
- b = (b * 255) / a;
+ line_data[j] =
+ ((line_data[j] & 0x80) >> 7) |
+ ((line_data[j] & 0x40) >> 5) |
+ ((line_data[j] & 0x20) >> 3) |
+ ((line_data[j] & 0x10) >> 1) |
+ ((line_data[j] & 0x08) << 1) |
+ ((line_data[j] & 0x04) << 3) |
+ ((line_data[j] & 0x02) << 5) |
+ ((line_data[j] & 0x01) << 7);
}
+ break;
+ case 4:
+ for (j = 0; j < stride; j++)
+ {
+ line_data[j] = (line_data[j] >> 4) | (line_data[j] << 4);
+ }
+ break;
+ case 16:
+ for (j = 0; j + 2 <= stride; j += 2)
+ {
+ char t1 = line_data[j + 0];
+ char t2 = line_data[j + 1];
+
+ line_data[j + 1] = t1;
+ line_data[j + 0] = t2;
+ }
+ break;
+ case 24:
+ for (j = 0; j + 3 <= stride; j += 3)
+ {
+ char t1 = line_data[j + 0];
+ char t2 = line_data[j + 1];
+ char t3 = line_data[j + 2];
+
+ line_data[j + 2] = t1;
+ line_data[j + 1] = t2;
+ line_data[j + 0] = t3;
+ }
+ break;
+ case 32:
+ for (j = 0; j + 4 <= stride; j += 4)
+ {
+ char t1 = line_data[j + 0];
+ char t2 = line_data[j + 1];
+ char t3 = line_data[j + 2];
+ char t4 = line_data[j + 3];
- if (r > 255) r = 255;
- if (g > 255) g = 255;
- if (b > 255) b = 255;
-
- pb[0] = r;
- pb[1] = g;
- pb[2] = b;
- pb[3] = a;
+ line_data[j + 3] = t1;
+ line_data[j + 2] = t2;
+ line_data[j + 1] = t3;
+ line_data[j + 0] = t4;
+ }
+ break;
+ default:
+ break;
}
}
-
- return pixbuf;
}
-
-static gboolean
-on_expose (GtkWidget *widget, GdkEventExpose *expose, gpointer data)
+uint8_t *
+make_random_bytes (int n_bytes)
{
- GdkPixbuf *pixbuf = data;
-
- gdk_draw_pixbuf (widget->window, NULL,
- pixbuf, 0, 0, 0, 0,
- gdk_pixbuf_get_width (pixbuf),
- gdk_pixbuf_get_height (pixbuf),
- GDK_RGB_DITHER_NONE,
- 0, 0);
-
- return TRUE;
-}
+ uint8_t *bytes = malloc (n_bytes);
+ int i;
-void
-show_image (pixman_image_t *image)
-{
- GtkWidget *window;
- GdkPixbuf *pixbuf;
- int width, height, stride;
- int argc;
- char **argv;
- char *arg0 = g_strdup ("pixman-test-program");
- gboolean has_alpha;
- pixman_format_code_t format;
-
- argc = 1;
- argv = (char **)&arg0;
-
- gtk_init (&argc, &argv);
-
- window = gtk_window_new (GTK_WINDOW_TOPLEVEL);
- width = pixman_image_get_width (image);
- height = pixman_image_get_height (image);
- stride = pixman_image_get_stride (image);
-
- format = image->bits.format;
-
- if (format == PIXMAN_a8r8g8b8)
- has_alpha = TRUE;
- else if (format == PIXMAN_x8r8g8b8)
- has_alpha = FALSE;
- else
- g_error ("Can't deal with this format: %x\n", format);
-
- pixbuf = pixbuf_from_argb32 (pixman_image_get_data (image), has_alpha,
- width, height, stride);
-
- g_signal_connect (window, "expose_event", G_CALLBACK (on_expose), pixbuf);
- g_signal_connect (window, "delete_event", G_CALLBACK (gtk_main_quit), NULL);
-
- gtk_widget_show (window);
-
- gtk_main ();
+ if (!bytes)
+ return NULL;
+
+ for (i = 0; i < n_bytes; ++i)
+ bytes[i] = lcg_rand () & 0xff;
+
+ return bytes;
}
diff --git a/lib/pixman/test/utils.h b/lib/pixman/test/utils.h
index bc110d847..fb1ccec48 100644
--- a/lib/pixman/test/utils.h
+++ b/lib/pixman/test/utils.h
@@ -1,6 +1,45 @@
-#include <stdio.h>
#include <stdlib.h>
-#include <glib.h>
-#include "pixman.h"
+#include <config.h>
+#include "pixman-private.h" /* For 'inline' definition */
-void show_image (pixman_image_t *image);
+/* A primitive pseudorandom number generator,
+ * taken from POSIX.1-2001 example
+ */
+
+extern uint32_t lcg_seed;
+
+static inline uint32_t
+lcg_rand (void)
+{
+ lcg_seed = lcg_seed * 1103515245 + 12345;
+ return ((uint32_t)(lcg_seed / 65536) % 32768);
+}
+
+static inline void
+lcg_srand (uint32_t seed)
+{
+ lcg_seed = seed;
+}
+
+static inline uint32_t
+lcg_rand_n (int max)
+{
+ return lcg_rand () % max;
+}
+
+
+/* CRC 32 computation
+ */
+uint32_t
+compute_crc32 (uint32_t in_crc32,
+ const void *buf,
+ size_t buf_len);
+
+/* perform endian conversion of pixel data
+ */
+void
+image_endian_swap (pixman_image_t *img, int bpp);
+
+/* Generate n_bytes random bytes in malloced memory */
+uint8_t *
+make_random_bytes (int n_bytes);
diff --git a/lib/pixman/test/window-test.c b/lib/pixman/test/window-test.c
index bbaa3e211..919fc16ed 100644
--- a/lib/pixman/test/window-test.c
+++ b/lib/pixman/test/window-test.c
@@ -1,8 +1,8 @@
#include <stdio.h>
#include <stdlib.h>
#include <config.h>
-#include "pixman.h"
#include "pixman-private.h"
+#include "pixman.h"
#define FALSE 0
#define TRUE 1
@@ -137,8 +137,8 @@ main ()
pixman_image_t *src, *dest;
int src_x, src_y, dest_x, dest_y;
int i, j;
- int width = get_rand (500);
- int height = get_rand (500);
+ int width = get_rand (499) + 1;
+ int height = get_rand (499) + 1;
src = make_image (width, height, TRUE, &src_x, &src_y);
dest = make_image (width, height, FALSE, &dest_x, &dest_y);