Update to pixman 0.30.0. Tested by several people during t2k13. Thanks.

author: Matthieu Herrb <matthieu@cvs.openbsd.org> 2013-06-07 17:18:02 +0000
committer: Matthieu Herrb <matthieu@cvs.openbsd.org> 2013-06-07 17:18:02 +0000
commit: 05f5801ea23297bb114b9f00d5f4c7d23743b121 (patch)
tree: 14dbb55d6b817ce49d2798c9cf00c42bc4011a50
parent: 6babe96864db98aee21458f0a62425b19818a203 (diff)
90 files changed, 30199 insertions, 2733 deletions
diff --git a/lib/pixman/ChangeLog b/lib/pixman/ChangeLog
index a9bfdeeb2..562bcc066 100644
--- a/lib/pixman/ChangeLog
+++ b/lib/pixman/ChangeLog
@@ -1,3 +1,19500 @@
+commit 41daf50aaeca71e70bc27aa4bf27ae4812c6eecf
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed May 8 19:31:22 2013 -0400
+
+    Pre-release version bump to 0.30.0
+
+commit 5a7179191dba6c592a63cad8aa8bc8af7ab5e586
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Apr 30 18:57:43 2013 -0400
+
+    Post-release version bump to 0.29.5
+
+commit 2714b5d201525e176429c0c030b8376a32b4f6c7
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Apr 30 18:50:04 2013 -0400
+
+    Pre-release version bump to 0.29.4
+
+commit 7fc2654a1fdd6d6c41eddaac50b3668433873679
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Apr 27 04:27:39 2013 -0400
+
+    pixman/refactor: Delete this file
+    
+    Essentially all of it is obsolete by now.
+
+commit cb928a77c05a9c581e596b8eb24962d47fc39e9f
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Mon Apr 15 19:33:02 2013 +0200
+
+    MIPS: DSPr2: Added rpixbuf fast path.
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+           rpixbuf =  L1:  14.63  L2:  13.55  M:  9.91 ( 79.53%)  HT:  8.47  VT:  8.32  R:  8.17  RT:  4.90 (  33Kops/s)
+    
+    Optimized:
+           rpixbuf =  L1:  45.69  L2:  37.30  M: 17.24 (138.31%)  HT: 15.66  VT: 14.88  R: 13.97  RT:  8.38 (  44Kops/s)
+
+commit c6a6fbdcd3ef18f733ff7ad11d5fafac384744cd
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Mon Apr 15 19:33:01 2013 +0200
+
+    MIPS: DSPr2: Added pixbuf fast path.
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+            pixbuf =  L1:  18.18  L2:  16.47  M: 13.36 (107.27%)  HT: 10.16  VT: 10.07  R:  9.84  RT:  5.54 (  35Kops/s)
+    
+    Optimized:
+            pixbuf =  L1:  43.54  L2:  36.02  M: 17.08 (137.09%)  HT: 15.58  VT: 14.85  R: 13.87  RT:  8.38 (  44Kops/s)
+
+commit f69335d5292310dc18f2f84d462430137a771976
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Mon Apr 15 19:33:00 2013 +0200
+
+    test: add "pixbuf" and "rpixbuf" to lowlevel-blt-bench
+    
+    Add necessary support to lowlevel-blt benchmark for benchmarking pixbuf and
+    rpixbuf fast paths. bench_composite function now checks for pixbuf string in
+    testname, and if that is detected, use same bits for src and mask images.
+
+commit 3dc9e3827e342b415c519da1039b9a2e4fb293ec
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Mon Apr 15 19:32:59 2013 +0200
+
+    test: add "src_0888_8888_rev" and "src_0888_0565_rev" to lowlevel-blt-bench
+
+commit 44174ce51d1ed5a1bf988b9dd9218d8cbd379de3
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Mon Apr 15 19:32:58 2013 +0200
+
+    MIPS: DSPr2: Fix for bug in in_n_8 routine.
+    
+    Rounding logic was not implemented right.
+    Instead of using rounding version of the 8-bit shift, logical shifts were used.
+    Also, code used unnecessary multiplications, which could be avoided by packing
+    4 destination (a8) pixel into one 32bit register. There were also, unnecessary
+    spills on stack. Code is rewritten to address mentioned issues.
+    
+    The bug was revealed by increasing number of the iterations in blitters-test.
+    
+    Performance numbers on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+                       in_n_8 =  L1:  21.20  L2:  22.86  M: 21.42 ( 14.21%)  HT: 15.97  VT: 15.69  R: 15.47  RT:  8.00 (  48Kops/s)
+    Optimized (first implementation, with bug):
+                       in_n_8 =  L1:  89.38  L2:  86.07  M: 65.48 ( 43.44%)  HT: 44.64  VT: 41.50  R: 40.77  RT: 16.94 (  66Kops/s)
+    Optimized (with bug fix, and code revisited):
+                       in_n_8 =  L1: 102.33  L2:  95.65  M: 70.54 ( 46.84%)  HT: 48.35  VT: 45.06  R: 43.20  RT: 17.60 (  66Kops/s)
+
+commit 5858f09d264ef762ddcf7ede324bfce9f5991d29
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Mon Apr 15 19:32:57 2013 +0200
+
+    MIPS: DSPr2: Added src_0565_8888 nearest neighbor fast path.
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+             src_0565_8888 =  L1:  20.70  L2:  19.22  M: 12.50 ( 49.79%)  HT: 10.45  VT: 10.18  R:  9.99  RT:  5.31 (  31Kops/s)
+    
+    Optimized:
+             src_0565_8888 =  L1:  62.98  L2:  53.44  M: 23.07 ( 91.87%)  HT: 19.85  VT: 19.15  R: 17.70  RT:  9.68 (  43Kops/s)
+
+commit 311d55b6d8e1ac3acaa12d1d7c3eefdcfdc70718
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Mon Apr 15 19:32:56 2013 +0200
+
+    MIPS: DSPr2: Added over_8888_0565 nearest neighbor fast path.
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+            over_8888_0565 =  L1:  13.22  L2:  12.02  M:  9.77 ( 38.92%)  HT:  8.58  VT:  8.35  R:  8.38  RT:  5.78 (  35Kops/s)
+    
+    Optimized:
+            over_8888_0565 =  L1:  26.20  L2:  22.97  M: 15.92 ( 63.40%)  HT: 13.33  VT: 13.13  R: 12.72  RT:  7.65 (  39Kops/s)
+
+commit bd487ee34c343142cbe451a2e04541d8aba0eaa7
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Mon Apr 15 19:32:55 2013 +0200
+
+    MIPS: DSPr2: Added over_8888_8888 nearest neighbor fast path.
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+            over_8888_8888 =  L1:  19.47  L2:  16.30  M: 11.24 ( 59.69%)  HT:  9.54  VT:  9.29  R:  9.47  RT:  6.24 (  37Kops/s)
+    
+    Optimized:
+            over_8888_8888 =  L1:  43.67  L2:  33.30  M: 16.32 ( 86.65%)  HT: 14.10  VT: 13.78  R: 12.96  RT:  7.85 (  39Kops/s)
+
+commit 66def909ad82ed4ccb49380031cb828655c9a47f
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Mon Apr 15 19:32:54 2013 +0200
+
+    MIPS: DSPr2: Fix bug in over_n_8888_8888_ca/over_n_8888_0565_ca routines
+    
+    After introducing new PRNG (pseudorandom number generator) a bug in two DSPr2
+    routines was revealed. Bug manifested by wrong calculation in composite and
+    glyph tests, which caused make check to fail for MIPS DSPr2 optimizations.
+    
+    Bug was in the calculation of the:
+    *dst = over (src, *dst) when ma == 0xffffffff
+    
+    In this case src was not negated and shifted right by 24 bits, it was only
+    negated. When implementing this routine in the first place, I missplaced those
+    shifts, which alowed me to combine code for over operation and:
+        UN8x4_MUL_UN8x4 (s, ma);
+        UN8x4_MUL_UN8 (ma, srca);
+        ma = ~ma;
+        UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
+    So I decided to rewrite that piece of code from scratch. I changed logic, so
+    now assembly code mimics code from pixman-fast-path.c but processes two pixels
+    at a time. This code should be easier to debug and maintain.
+    
+    The bug was revealed in commit b31a6962. Errors were detected by composite
+    and glyph tests.
+
+commit d768558ce195caa208262866f9262b29efff22dc
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Mon Jan 28 07:00:12 2013 +0200
+
+    sse2: faster bilinear interpolation (get rid of XOR instruction)
+    
+    The old code was calculating horizontal weights for right pixels
+    in the following way (for simplicity assume 8-bit interpolation
+    precision):
+    
+      Start with "x = vx" and do increment "x += ux" after each pixel.
+      In this case right pixel weight for interpolation can be calculated
+      as "((x >> 8) ^ 0xFF) + 1", which is the same as "256 - (x >> 8)".
+    
+    The new code instead:
+    
+      Starts with "x = -(vx + 1)", performs increment "x += -ux" after
+      each pixel and calculates right weights as just "(x >> 8) + 1",
+      eliminating the need for XOR operation in the inner loop.
+    
+    So we have one instruction less on the critical path. Benchmarks
+    with "lowlevel-blt-bench -b src_8888_8888" using GCC 4.7.2 on
+    x86-64 system and default optimizations:
+    
+    Intel Core i7 860 (2.8GHz):
+        before: src_8888_8888 =  L1: 291.37  L2: 288.58  M:285.38
+        after:  src_8888_8888 =  L1: 319.66  L2: 316.47  M:312.06
+    
+    Intel Core2 T7300 (2GHz):
+        before: src_8888_8888 =  L1: 121.95  L2: 118.38  M:118.52
+        after:  src_8888_8888 =  L1: 128.82  L2: 125.12  M:124.88
+    
+    Intel Atom N450 (1.67GHz):
+        before: src_8888_8888 =  L1:  64.25  L2:  62.37  M: 61.80
+        after:  src_8888_8888 =  L1:  64.23  L2:  62.37  M: 61.82
+    
+    Inspired by the "sse2_bilinear_interpolation" function (single
+    pixel interpolation) from:
+        http://lists.freedesktop.org/archives/pixman/2013-January/002575.html
+
+commit 59109f32930a0c163628f8087cbb0a15b19cb96b
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Tue Mar 5 00:59:13 2013 +0200
+
+    test: larger 0xFF/0x00 filled clusters in random images for blitters-test
+    
+    Current blitters-test program had difficulties detecting a bug in
+    over_n_8888_8888_ca implementation for MIPS DSPr2:
+    
+        http://lists.freedesktop.org/archives/pixman/2013-March/002645.html
+    
+    In order to hit the buggy code path, two consecutive mask values had
+    to be equal to 0xFFFFFFFF because of loop unrolling. The current
+    blitters-test generates random images in such a way that each byte
+    has 25% probability for having 0xFF value. Hence each 32-bit mask
+    value has ~0.4% probability for 0xFFFFFFFF. Because we are testing
+    many compositing operations with many pixels, encountering at least
+    one 0xFFFFFFFF mask value reasonably fast is not a problem. If a
+    bug related to 0xFFFFFFFF mask value is artificialy introduced into
+    over_n_8888_8888_ca generic C function, it gets detected on 675591
+    iteration in blitters-test (out of 2000000).
+    
+    However two consecutive 0xFFFFFFFF mask values are much less likely
+    to be generated, so the bug was missed by blitters-test.
+    
+    This patch addresses the problem by also randomly setting the 32-bit
+    values in images to either 0xFFFFFFFF or 0x00000000 (also with 25%
+    probability). It allows to have larger clusters of consecutive 0x00
+    or 0xFF bytes in images which may have special shortcuts for handling
+    them in unrolled or SIMD optimized code.
+
+commit a99147d1ea0d67f635f9284c242485fb5621cab3
+Author: Stefan Weil <sw@weilnetz.de>
+Date:   Sat Apr 27 08:00:38 2013 +0200
+
+    Trivial spelling fixes in comments
+    
+    They were found by codespell.
+    
+    Signed-off-by: Stefan Weil <sw@weilnetz.de>
+
+commit 9d0bb10312e5de0653c9e28df79ce8a5e8cec97a
+Author: Peter Breitenlohner <peb@mppmu.mpg.de>
+Date:   Mon Apr 8 13:13:05 2013 +0200
+
+    Check for missing sqrtf() as, e.g., for Solaris 9
+    
+    Signed-off-by: Peter Breitenlohner <peb@mppmu.mpg.de>
+
+commit d8ac35af1208a4fa4d67f03fee10b5449fb8495a
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Feb 14 08:06:19 2013 -0500
+
+    Improve precision of calculations in pixman-gradient-walker.c
+    
+    The computations in pixman-gradient-walker.c currently take place at
+    very limited 8 bit precision which results in quite visible artefacts
+    in gradients. An example is the one produced by demos/linear-gradient
+    which currently looks like this:
+    
+        http://i.imgur.com/kQbX8nd.png
+    
+    With the changes in this commit, the gradient looks like this:
+    
+        http://i.imgur.com/nUlyuKI.png
+    
+    The images are also available here:
+    
+        http://people.freedesktop.org/~sandmann/gradients/before.png
+        http://people.freedesktop.org/~sandmann/gradients/after.png
+    
+    This patch computes pixels using floating point, but uses a faster
+    algorithm, which makes up for the loss of performance.
+    
+    == Theory:
+    
+    In both the new and the old algorithm, the various gradient
+    implementations compute a parameter x that indicates how far along the
+    gradient the current scanline is. The current algorithm has a cache of
+    the two color stops surrounding the last parameter; those are used in
+    a SIMD-within-register fashion in this way:
+    
+        t1 = walker->left_rb * idist + walker->right_rb * dist;
+    
+    where dist and idist are the distances to the left and right color
+    stops respectively normalized to the distance between the left and
+    right stops. The normalization (which involves a division) is captured
+    in another cached variable "stepper". The cached values are recomputed
+    whenever the parameter moves in between two different stops (called
+    "reset" in the implementation).
+    
+    Because idist and dist are computed in 8 bits only, a lot of
+    information is lost, which is quite visible as the image linked above
+    shows.
+    
+    The new algorithm caches more information in the following way. When
+    interpolating between stops, the formula to be used is this:
+    
+         t = ((x - left) / (right - left));
+    
+         result = lc * (1 - t) + rc * t;
+    
+    where
+    
+        - x is the parameter as computed by the main gradient code,
+        - left is the position of the left color stop,
+        - right is the position of the right color stop
+        - lc is the color of the left color stop
+        - rc is the color of the right color stop
+    
+    That formula can also be written like this:
+    
+        result
+          = lc * (1 - t) + rc * t;
+          = lc + (rc - lc) * t
+          = lc + (rc - lc) * ((x - left) / (right - left))
+          = (rc - lc) / (right - left) * x +
+          	       lc - (left * (rc - lc)) / (right - left)
+          = s * x + b
+    
+    where
+    
+        s = (rc - lc) / (right - left)
+    
+    and
+    
+        b = lc - left * (rc - lc) / (right - left)
+          = (lc * (right - left) - left * (rc - lc)) / (right - left)
+          = (lc * right - rc * left) / (right - left)
+    
+    To summarize, setting w = (right - left):
+    
+        s = (rc - lc) / w
+        b = (lc * right - rc * left) / w
+    
+        r = s * x + b
+    
+    Since s and b only depend on the two active stops, both can be cached
+    so that the computation only needs to do one multiplication and one
+    addition per pixel (followed by premultiplication of the alpha
+    channel). That is, seven multiplications in total, which is the same
+    number as the old SIMD-within-register implementation had.
+    
+    == Implementation notes:
+    
+    The new formula described above is implemented in single precision
+    floating point, and the eight divisions necessary to compute the
+    cached values are done by multiplication with the reciprocal of the
+    distance between the color stops.
+    
+    The alpha values used in the cached computation are scaled by 255.0,
+    whereas the RGB values are kept in the [0, 1] interval. The ensures
+    that after premultiplication, all values will be in the [0, 255]
+    interval.
+    
+    This scaling is done by first dividing all the all the channels by
+    257, and then later on dividing the r, g, b channels by 255. It would
+    be more natural to do all this scaling in only one place, but
+    inexplicably, that results in a (substantial) slowdown on Sandy Bridge
+    with GCC v 4.7.
+    
+    == Performance impact (median of three runs of radial-perf-test):
+    
+       == Intel Sandy Bridge, Core i3 @ 1.2GHz
+    
+       Before: 0.014553
+       After:  0.014410
+       Change: 1.0% faster
+    
+       == AMD Barcelona @ 1.2 GHz
+    
+       Before: 0.021735
+       After:  0.021328
+       Change: 1.9% faster
+    
+    Ie., slightly faster, though conceivably there could be a negative
+    impact on machines with a bigger difference between integer and
+    floating point performance.
+    
+    V2:
+    
+    - Use 's' and 'b' in the variable names instead of 'm' and 'd'. This
+      way they match the explanation above
+    
+    - Move variable declarations to the top of the function
+    
+    - Remove unused stepper field
+    
+    - Some formatting fixes
+    
+    - Don't pointlessly include pixman-combine32.h
+    
+    - Don't offset x for each pixel; go back to offsetting left_x and
+      right_x at reset time. The offsets cancel out in the formula above,
+      so there is no impact on the calcualations.
+
+commit a1c2331e0eb35d87cf295518838debe1217ca9df
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Mar 8 14:05:50 2013 -0500
+
+    Move the IS_ZERO() to pixman-private.h and rename to FLOAT_IS_ZERO()
+    
+    Some upcoming changes to pixman-gradient-walker.c will need this
+    macro.
+
+commit 2c953e572f6c3c18046e768dd07d12150b1f2e94
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Feb 24 21:49:06 2013 -0500
+
+    test: Add radial-perf-test, a microbenchmark for radial gradients
+    
+    This benchmark renders one of the radial gradients used in the
+    swfdec-youtube cairo trace 500 times and reports the average time it
+    took.
+    
+    V2: Update .gitignore
+
+commit 460faaa41105c2939d041506f6ff08e2b12e7596
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Feb 14 20:32:31 2013 -0500
+
+    demos: Add linear-gradient demo program
+    
+    This program displays a linear gradient from blue to yellow. Due to
+    limited precision in pixman-gradient-walker.c, it currently has some
+    ugly artefacts that gives it a 'brushed metal' appearance.
+    
+    V2: Update .gitignore
+
+commit aaae3d8eefa069098e9014822817ca1429fdea46
+Author: Behdad Esfahbod <behdad@behdad.org>
+Date:   Fri Mar 8 06:00:00 2013 -0500
+
+    Remove unused macro
+
+commit 5feda20fc39407879993ed4a6d861ef7f78d9432
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Wed Feb 27 14:40:51 2013 +0100
+
+    MIPS: DSPr2: Added more fast-paths for SRC operation:
+     - src_0888_8888_rev
+     - src_0888_0565_rev
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+            src_0888_8888_rev =  L1:  51.88  L2:  42.00  M: 19.04 ( 88.50%)  HT: 15.27  VT: 14.62  R: 14.13  RT:  7.12 (  45Kops/s)
+            src_0888_0565_rev =  L1:  31.96  L2:  30.90  M: 22.60 ( 75.03%)  HT: 15.32  VT: 15.11  R: 14.49  RT:  6.64 (  43Kops/s)
+    
+    Optimized:
+            src_0888_8888_rev =  L1: 222.73  L2: 113.70  M: 20.97 ( 97.35%)  HT: 18.31  VT: 17.14  R: 16.71  RT:  9.74 (  54Kops/s)
+            src_0888_0565_rev =  L1: 100.37  L2:  74.27  M: 29.43 ( 97.63%)  HT: 22.92  VT: 21.59  R: 20.52  RT: 10.56 (  56Kops/s)
+
+commit 43914d68d1c87a9da6f53e6b0a12941c97bb0e5d
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Wed Feb 27 14:39:45 2013 +0100
+
+    MIPS: DSPr2: Added more fast-paths for OVER operation:
+     - over_8888_0565
+     - over_n_8_8
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+            over_8888_0565 =  L1:  14.30  L2:  13.22  M: 10.43 ( 41.56%)  HT: 12.51  VT: 12.95  R: 11.82  RT:  7.34 (  49Kops/s)
+                over_n_8_8 =  L1:  12.77  L2:  16.93  M: 15.03 ( 29.94%)  HT: 10.78  VT: 10.72  R: 10.29  RT:  4.92 (  33Kops/s)
+    
+    Optimized:
+            over_8888_0565 =  L1:  26.03  L2:  22.92  M: 15.68 ( 62.43%)  HT: 16.19  VT: 16.27  R: 14.93  RT:  8.60 (  52Kops/s)
+                over_n_8_8 =  L1:  62.00  L2:  55.17  M: 40.29 ( 80.23%)  HT: 26.77  VT: 25.64  R: 24.13  RT: 10.01 (  47Kops/s)
+
+commit 2156fb51b353867d5a18b734690ca551f74d4fb1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Feb 15 18:34:46 2013 -0500
+
+    gtk-utils.c: Use cairo in show_image() rather than GdkPixbuf
+    
+    GdkPixbufs are not premultiplied, so when using them to display pixman
+    images, there is some unecessary conversions going on: First the image
+    is converted to non-premultiplied, and then GdkPixbuf premultiplies
+    before sending the result to the X server. These conversions may cause
+    the displayed image to not be exactly identical to the original.
+    
+    This patch just uses a cairo image surface instead, which avoids these
+    conversions.
+    
+    Also make the comment about sRGB a little more concise.
+
+commit 5e207f825bd1ed3142a623bcbceca00508907c5e
+Author: Ben Avison <bavison@riscosopen.org>
+Date:   Wed Feb 6 00:39:12 2013 +0000
+
+    Fix to lowlevel-blt-bench
+    
+    The source, mask and destination buffers are initialised to 0xCC just after
+    they are allocated. Between each benchmark, there are a pair of memcpys,
+    from the destination buffer to the source buffer and back again (there are
+    no explanatory comments, but presumably this is an effort to flush the
+    caches). However, it has an unintended consequence, which is to change the
+    contents of the buffers on entry to subsequent benchmarks. This means it is
+    not a fair test: for example, with over_n_8888 (featured in the following
+    patches) it reports L2 and even M tests as being faster than the L1 test,
+    because after the L1 test, the source buffer is filled with fully opaque
+    pixels, for which over_n_8888 has a shortcut.
+    
+    The fix here is simply to reverse the order of the memcpys, so src and
+    destination are both filled with 0xCC on entry to all tests.
+
+commit d26f922dc1a605dae00fa0540198707485ba1f08
+Author: Stefan Weil <sw@weilnetz.de>
+Date:   Sat Feb 9 12:40:16 2013 +0100
+
+    sse2: Use uintptr_t in type casts from pointer to integral value
+    
+    Some recent code added new type casts from pointer to unsigned long.
+    These type casts result in compiler warnings for systems like
+    MinGW-w64 (64 bit Windows) where sizeof(unsigned long) != sizeof(void *).
+    
+    Signed-off-by: Stefan Weil <sw@weilnetz.de>
+    Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit dc80eb09e2831d5ad3bfe638462f80921357952b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Jan 31 14:54:49 2013 -0500
+
+    lookup_composite: Don't update cache in case of error
+    
+    If we fail to find a composite function, don't update the fast path
+    cache with the dummy compositing function.
+    
+    Also make the error message state that the bug is likely caused by
+    issues with thread local storage.
+
+commit 4dced81c917c753a4e699e3793efa15a39361cf0
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Jan 31 14:36:38 2013 -0500
+
+    Turn on error logging at all times
+    
+    While releasing 0.29.2 the distcheck run produced a number of error
+    messages that had to be fixed in 349015e1fc5d912ba4253133b90e751d0b.
+    These were not caught before so nobody had actually run pixman with
+    debugging turned on. It's not the first time this has happened, see
+    5b0563f39eb29e4ae431717696174da5 for example.
+    
+    So this patch makes the return_if_fail() macros use unlikely() around
+    the expressions and then turns on error logging at all times. The
+    performance hit should negligible since we were already evaluating the
+    expressions.
+    
+    The place where DEBUG actually does cause a performance hit is in the
+    region selfcheck code, and that will still only be enabled in
+    development snapshots.
+
+commit f4c9492c12d98f76d99b4dbdca56d517e1ffdb19
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Jan 31 14:31:26 2013 -0500
+
+    pixman-compiler.h: Add unlikely() macro
+    
+    When compiling with GCC this macro expands to __builtin_expect((expr), 0).
+    On other compilers, it just expands to (expr).
+
+commit 5ebb5ac3807cdc7bb76358041a15cc5adca2ef23
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jan 22 08:29:57 2013 -0500
+
+    utils.c: Increase acceptable deviation to 0.0064 in pixel_checker_t
+    
+    The check-formats programs reveals that the 8 bit pipeline cannot meet
+    the current 0.004 acceptable deviation specified in utils.c, so we
+    have to increase it. Some of the failing pixels were captured in
+    pixel-test, which with this commit now passes.
+    
+    == a4r4g4b4 DISJOINT_XOR a8r8g8b8 ==
+    
+    The DISJOINT_XOR operator applied to an a4r4g4b4 source pixel of
+    0xd0c0 and a destination pixel of 0x5300ea00 results in the exact
+    value:
+    
+        fa = (1 - da) / sa = (1 - 0x53 / 255.0) / (0xd / 15.0) = 0.7782
+        fb = (1 - sa) / da = (1 - 0xd / 15.0) / (0x53 / 255.0) = 0.4096
+    
+        r = fa * (0xc / 15.0) + fb * (0xea / 255.0) = 0.99853
+    
+    But when computing in 8 bits, we get:
+    
+        fa8 = ((255 - 0x53) * 255 + 0xdd / 2) / 0xdd = 0xc6
+        fb8 = ((255 - 0xdd) * 255 + 0x53 / 3) / 0x53 = 0x68
+    
+        r8 = (fa8 * 0xcc + 127) / 255 + (fb8 * 0xea + 127) / 255 = 0xfd
+    
+    and
+    
+        0xfd / 255.0 = 0.9921568627450981
+    
+    for a deviation of 0.00637118610187, which we then have to consider
+    acceptable given the current implementation.
+    
+    By switching to computing the result with
+    
+       r = (fa * s + fb * d + 127) / 255
+    
+    rather than
+    
+       r = (fa * s + 127) / 255 + (fb * d + 127) / 255
+    
+    the deviation would be only 0.00244961747442, so at some point it may
+    be worth doing either this, or switching to floating point for
+    operators that involve divisions.
+    
+    Note that the conversion from 4 bits to 8 bits does not cause any
+    error in this case because both rounding and bit replication produces
+    an exact result when the number of from-bits divide the number of
+    to-bits.
+    
+    == a8r8g8b8 OVER r5g6b5 ==
+    
+    When OVER compositing the a8r8g8b8 pixel 0x0f00c300 with the x14r6g6b6
+    pixel 0x03c0, the true floating point value of the resulting green
+    channel is:
+    
+       0xc3 / 255.0 + (1.0 - 0x0f / 255.0) * (0x0f / 63.0) = 0.9887955
+    
+    but when compositing 8 bit values, where the 6-bit green channel is
+    converted to 8 bit through bit replication, the 8-bit result is:
+    
+       0xc3 + ((255 - 0x0f) * 0x3c + 127) / 255 = 251
+    
+    which corresponds to a real value of 0.984314. The difference from the
+    true value is 0.004482 which is bigger than the acceptable deviation
+    of 0.004. So, if we were to compute all the CONJOINT/DISJOINT
+    operators in floating point, or otherwise make them more accurate, the
+    acceptable deviation could be set at 0.0045.
+    
+    If we were doing the 6-bit conversion with rounding:
+    
+       (x / 63.0 * 255.0 + 0.5)
+    
+    instead of bit replication, the deviation in this particular case
+    would be only 0.0005, so we may want to consider this at some
+    point.
+
+commit f2ba7fe1d812a30004b734e398f45b586833d43f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jan 19 16:32:15 2013 -0500
+
+    test: Add new pixel-test regression test
+    
+    This test program contains a table of individual operator/pixel
+    combinations. For each pixel combination, images of various sizes are
+    filled with the pixels and then composited. The result is then
+    verified against the output of do_composite(). If the result doesn't
+    match, detailed error information is printed.
+    
+    The initial 14 pixel combinations currently all fail.
+
+commit 6781636740099633b9a8f7e0cc8e7828770f2fc3
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jan 21 15:02:53 2013 -0500
+
+    a1-trap-test: Add tests for operator_name and format_name()
+    
+    The check-formats.c test depends on the exact format of the strings
+    returned from these functions, so add a test here.
+    
+    a1-trap-test isn't the ideal place, but it seems like overkill to add
+    a new test just for these trivial checks.
+
+commit d1434d112ca5cd325e4fb85fc60afd1b9e902786
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jan 21 15:54:05 2013 -0500
+
+    test: Add new check-formats utility
+    
+    Given an operator and two formats, this program will composite and
+    check all pixels where the red and blue channels are 0. That is, if
+    the two formats are a8r8g8b8 and a4r4g4b4, all source pixels matching
+    the mask
+    
+        0xff00ff00
+    
+    are composited with the given operator against all destination pixels
+    matching the mask
+    
+        0xf0f0
+    
+    and the result is then verified against the do_composite() function
+    that was moved to utils.c earlier.
+    
+    This program reveals that a number of operators and format
+    combinations are not computed to within the precision currently
+    accepted by pixel_checker_t. For example:
+    
+        check-formats over a8r8g8b8 r5g6b5 | grep failed | wc -l
+        30
+    
+    reveals that there are 30 pixel combinations where OVER produces
+    insufficiently precise results for the a8r8g8b8 and r5g6b5 formats.
+
+commit 1820131fe6674d46b9876965b30b331d593124a8
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jan 22 07:36:19 2013 -0500
+
+    utils.[ch]: Add pixel_checker_get_masks()
+    
+    This function returns the a, r, g, and b masks corresponding to the
+    pixel checker's format.
+
+commit 5eb61f72ea50e02eb185c746108909945b589e65
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jan 22 11:57:53 2013 -0500
+
+    test/utils.[ch]: Add pixel_checker_convert_pixel_to_color()
+    
+    This function takes a pixel in the format corresponding to the pixel
+    checker, and converts to a color_t.
+
+commit 3ae717f71a31620a5cb28792b9effd0c69ffb822
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jan 19 12:14:24 2013 -0500
+
+    test: Move do_composite() function from composite.c to utils.c
+    
+    So that it can be used in other tests.
+
+commit 958bd334b3c17f529c80f2eeef4224f45c62f292
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jan 29 21:42:02 2013 -0500
+
+    Post-release version bump to 0.29.3
+
+commit a56707e23bf2d3ef7c2ff9f66f214716791a424d
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jan 29 20:23:39 2013 -0500
+
+    Pre-release version bump to 0.29.2
+
+commit 349015e1fc5d912ba4253133b90e751d0b6ca7f2
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jan 29 20:23:31 2013 -0500
+
+    stresstest: Ensure that the rasterizer is only given alpha formats
+    
+    In c2cb303d33ec11390b93cabd90f0f9, return_if_fail()s were added to
+    prevent the trapezoid rasterizers from being called with non-alpha
+    formats. However, stress-test actually does call the rasterizers with
+    non-alpha formats, but because _pixman_log_error() is disabled in
+    versions with an odd minor number, the errors never materialized.
+    
+    Fix this by changing the argument to random format to an enum of three
+    values DONT_CARE, PREFER_ALPHA, or REQUIRE_ALPHA, and then in the
+    switch that calls the trapezoid rasterizers, pass the appropriate
+    value for the function in question.
+
+commit afde862928da7ac927cf4b60a022fafe5f060d26
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Jan 27 20:08:06 2013 -0500
+
+    Change default GPGKEY to 3892336E, which is soren.sandmann@gmail.com
+    
+    The old one belongs to the email address sandmann@daimi.au.dk, which
+    doesn't work anyore.
+    
+    Also use gpg to get the name and address for the "(Signed by ...)"
+    line since that works more reliably for me than using git.
+
+commit 69a7a9b6b6dc5b769888c469de3435059318f7cc
+Author: Ben Avison <bavison@riscosopen.org>
+Date:   Thu Jan 24 18:19:48 2013 +0000
+
+    Improve L1 and L2 benchmark tests for caches that don't use allocate-on-write
+    
+    In particular this affects single-core ARMs (e.g. ARM11, Cortex-A8), which
+    are usually configured this way. For other CPUs, this should only add a
+    constant time, which will be cancelled out by the EXCLUDE_OVERHEAD runs.
+    
+    The problems were caused by cachelines becoming permanently evicted from
+    the cache, because the code that was intended to pull them back in again on
+    each iteration assumed too long a cache line (for the L1 test) or failed to
+    read memory beyond the first pixel row (for the L2 test). Also, the reloading
+    of the source buffer was unnecessary.
+    
+    These issues were identified by Siarhei in this post:
+    http://lists.freedesktop.org/archives/pixman/2013-January/002543.html
+
+commit 1fa67f499d3826fad8783684bb90c8aadd9f682f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Jan 18 14:13:21 2013 -0500
+
+    pixman-combine-float.c: Use IS_ZERO() in clip_color() and set_sat()
+    
+    The clip_color() function has some checks to avoid division by zero,
+    but they are done by comparing the value to 4 * FLT_EPSILON, where a
+    better choice is the IS_ZERO() macro that compares to +/- FLT_MIN.
+    
+    In set_sat(), the check is that *max > *min before dividing by *max -
+    *min, but that has the potential problem that interactions between GCC
+    optimizions and 80 bit x87 registers could mean that (*max > *min) is
+    true in 80 bits, but (*max - *min) is 0 in 32 bits, so that the
+    division by zero is not prevented. Using IS_ZERO() here as well
+    prevents this.
+
+commit 7e53e5866458fe592fc109cb1455c21c4b61dee9
+Author: Ben Avison <bavison@riscosopen.org>
+Date:   Sat Jan 19 16:16:53 2013 +0000
+
+    ARMv6: Replacement add_8_8, over_8888_8888, over_8888_n_8888 and over_n_8_8888 routines
+    
+    Improved by adding preloads, combining writes and using the SEL
+    instruction.
+    
+    add_8_8
+    
+        Before          After
+        Mean   StdDev   Mean   StdDev  Confidence  Change
+    L1  62.1   0.2      543.4  12.4    100.0%      +774.9%
+    L2  38.7   0.4      116.8  1.7     100.0%      +201.8%
+    M   40.0   0.1      110.1  0.5     100.0%      +175.3%
+    HT  30.9   0.2      43.4   0.5     100.0%      +40.4%
+    VT  30.6   0.3      39.2   0.5     100.0%      +28.0%
+    R   21.3   0.2      35.4   0.4     100.0%      +66.6%
+    RT  8.6    0.2      10.2   0.3     100.0%      +19.4%
+    
+    over_8888_8888
+    
+        Before          After
+        Mean   StdDev   Mean   StdDev  Confidence  Change
+    L1  32.3   0.1      38.0   0.2     100.0%      +17.7%
+    L2  15.9   0.4      30.6   0.5     100.0%      +92.8%
+    M   13.3   0.0      25.6   0.0     100.0%      +92.9%
+    HT  10.5   0.1      15.5   0.1     100.0%      +47.1%
+    VT  10.4   0.1      14.6   0.1     100.0%      +40.8%
+    R   10.3   0.1      15.8   0.1     100.0%      +53.3%
+    RT  6.0    0.1      7.6    0.1     100.0%      +25.9%
+    
+    over_8888_n_8888
+    
+        Before          After
+        Mean   StdDev   Mean   StdDev  Confidence  Change
+    L1  17.6   0.1      21.0   0.1     100.0%      +19.2%
+    L2  11.2   0.2      19.2   0.1     100.0%      +71.2%
+    M   10.2   0.0      19.6   0.0     100.0%      +92.6%
+    HT  8.4    0.0      11.9   0.1     100.0%      +41.7%
+    VT  8.3    0.0      11.3   0.1     100.0%      +36.4%
+    R   8.3    0.0      11.8   0.1     100.0%      +43.1%
+    RT  5.1    0.1      6.2    0.1     100.0%      +21.3%
+    
+    over_n_8_8888
+    
+        Before          After
+        Mean   StdDev   Mean   StdDev  Confidence  Change
+    L1  17.5   0.1      22.8   0.8     100.0%      +30.1%
+    L2  14.2   0.3      21.7   0.2     100.0%      +52.6%
+    M   12.0   0.0      22.3   0.0     100.0%      +84.8%
+    HT  10.5   0.1      14.1   0.1     100.0%      +34.5%
+    VT  10.0   0.1      13.5   0.1     100.0%      +35.3%
+    R   9.4    0.0      12.9   0.2     100.0%      +37.7%
+    RT  5.5    0.1      6.5    0.2     100.0%      +19.2%
+
+commit f87dfd6f37a29c69320edd92f28aed5334b09366
+Author: Ben Avison <bavison@riscosopen.org>
+Date:   Sat Jan 19 16:16:52 2013 +0000
+
+    ARMv6: New conversion routines
+    
+    There was no previous attempt at accelerating these specifically for
+    ARMv6.
+    
+    src_x888_8888
+    
+        Before          After
+        Mean   StdDev   Mean   StdDev  Confidence  Change
+    L1  96.7   0.5      270.4  2.6     100.0%      +179.5%
+    L2  44.6   2.7      110.6  9.7     100.0%      +148.0%
+    M   26.9   0.1      87.6   0.5     100.0%      +226.1%
+    HT  19.3   0.2      37.5   0.4     100.0%      +93.7%
+    VT  18.6   0.1      33.7   0.4     100.0%      +81.6%
+    R   18.4   0.1      32.2   0.3     100.0%      +75.2%
+    RT  9.2    0.2      12.1   0.3     100.0%      +31.4%
+    
+    src_0565_8888
+    
+        Before          After
+        Mean   StdDev   Mean   StdDev  Confidence  Change
+    L1  37.0   0.3      66.9   0.2     100.0%      +80.8%
+    L2  30.3   0.2      55.9   0.3     100.0%      +84.4%
+    M   25.9   0.0      62.3   0.2     100.0%      +140.3%
+    HT  15.2   0.1      33.1   0.3     100.0%      +116.9%
+    VT  15.1   0.1      30.7   0.3     100.0%      +103.6%
+    R   14.2   0.1      27.6   0.3     100.0%      +94.0%
+    RT  6.0    0.1      11.2   0.3     100.0%      +87.2%
+
+commit a0f59f3b2884b056428229363576666f158a9bb4
+Author: Ben Avison <bavison@riscosopen.org>
+Date:   Sat Jan 19 16:16:51 2013 +0000
+
+    ARMv6: New blit routines
+    
+    These are usable either as various composite operations, or via the
+    top-level function pixman_blt() which now does some blitting for the
+    first time on an ARMv6 platform (previously it just returned FALSE).
+    
+    src_8888_8888
+    
+        Before          After
+        Mean   StdDev   Mean   StdDev  Confidence  Change
+    L1  414.5  9.4      445.8  3.6     100.0%      +7.6%
+    L2  93.3   20.7     114.5  12.9    100.0%      +22.7%
+    M   57.0   0.2      89.2   0.5     100.0%      +56.4%
+    HT  28.7   0.3      39.6   0.4     100.0%      +37.9%
+    VT  25.5   0.2      35.3   0.4     100.0%      +38.4%
+    R   20.1   0.1      33.8   0.3     100.0%      +67.8%
+    RT  7.8    0.2      12.7   0.4     100.0%      +62.7%
+    
+    src_0565_0565
+    
+        Before          After
+        Mean   StdDev   Mean   StdDev  Confidence  Change
+    L1  397.4  6.1      412.5  5.2     100.0%      +3.8%
+    L2  143.2  10.9     141.9  6.5     68.9%       -0.9%  (insignificant)
+    M   90.7   0.4      133.5  0.7     100.0%      +47.1%
+    HT  38.6   0.3      53.7   0.7     100.0%      +39.0%
+    VT  33.0   0.3      47.3   0.6     100.0%      +43.3%
+    R   25.7   0.2      42.1   0.5     100.0%      +64.1%
+    RT  8.0    0.2      13.3   0.3     100.0%      +65.6%
+    
+    src_8_8
+    
+        Before          After
+        Mean   StdDev   Mean   StdDev  Confidence  Change
+    L1  716.5  9.8      768.2  20.4    100.0%      +7.2%
+    L2  246.2  12.7     260.5  8.8     100.0%      +5.8%
+    M   146.8  0.7      227.9  0.7     100.0%      +55.2%
+    HT  44.9   0.6      62.1   1.0     100.0%      +38.2%
+    VT  35.6   0.4      53.4   0.7     100.0%      +50.0%
+    R   29.7   0.3      48.2   0.6     100.0%      +62.2%
+    RT  8.6    0.2      12.9   0.4     100.0%      +49.3%
+
+commit 3cff56c5b091d2e584503e7887414e224876de37
+Author: Ben Avison <bavison@riscosopen.org>
+Date:   Sat Jan 19 16:16:50 2013 +0000
+
+    ARMv6: New fill routines
+    
+    Note that this also effectively accelerates src_n_8888, src_n_0565 and
+    src_n_8 composite types, because of the fast paths in
+    pixman-fast-path.c implemented by fast_composite_solid_fill(), which
+    end up dispatching these platform-specific fill routines.
+    
+    src_n_8888
+    
+        Before          After
+        Mean   StdDev   Mean   StdDev  Confidence  Change
+    L1  157.3  1.1      574.2  8.7     100.0%      +265.0%
+    L2  94.2   0.5      364.8  4.2     100.0%      +287.3%
+    M   92.7   0.4      358.7  1.1     100.0%      +287.1%
+    HT  68.5   0.9      133.6  4.0     100.0%      +95.2%
+    VT  61.3   0.8      111.8  2.6     100.0%      +82.4%
+    R   61.1   0.9      108.7  2.8     100.0%      +78.1%
+    RT  24.6   1.0      28.6   1.6     100.0%      +16.0%
+    
+    src_n_0565
+    
+        Before          After
+        Mean   StdDev   Mean   StdDev  Confidence  Change
+    L1  157.4  1.0      983.1  38.5    100.0%      +524.6%
+    L2  93.6   0.5      696.0  14.3    100.0%      +643.4%
+    M   92.7   0.4      680.5  1.0     100.0%      +634.0%
+    HT  68.3   0.9      160.3  6.6     100.0%      +134.6%
+    VT  61.1   0.8      130.1  3.4     100.0%      +112.9%
+    R   61.0   0.8      125.4  4.1     100.0%      +105.7%
+    RT  24.9   1.3      29.5   1.5     100.0%      +18.2%
+    
+    src_n_8
+    
+        Before          After
+        Mean   StdDev   Mean   StdDev  Confidence  Change
+    L1  154.7  1.0      1324.4 48.5    100.0%      +756.3%
+    L2  92.4   0.4      1178.4 10.9    100.0%      +1175.6%
+    M   92.9   0.4      1275.7 2.1     100.0%      +1273.5%
+    HT  68.2   1.0      169.8  5.5     100.0%      +149.0%
+    VT  61.2   1.0      138.5  3.6     100.0%      +126.3%
+    R   61.3   0.9      130.1  3.8     100.0%      +112.4%
+    RT  25.5   1.3      29.2   1.9     100.0%      +14.6%
+
+commit 2e173326aaf232d84ed71faf3517bd7989680e27
+Author: Ben Avison <bavison@riscosopen.org>
+Date:   Mon Jan 28 17:03:50 2013 +0000
+
+    ARMv6: Lay the groundwork for later patches in the series
+    
+    Move the entire contents of pixman-arm-simd-asm.S to a new file;
+    ultimately this will only retain the scaled operations, so it is
+    named pixman-arm-simd-asm-scaled.S. Added new header file
+    pixman-arm-simd-asm.h, containing the macros which are the basis of
+    all the new ARMv6 implementations, although at this point in the
+    series, nothing uses them and the library should be binary-identical.
+
+commit 65fc1adb6545737058e938105ae948a3607c277c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jan 26 00:34:53 2013 -0500
+
+    demo/scale: Add a spin button to set the number of subsample bits
+    
+    For large upscalings the level of subsampling for the filter has a
+    quite visible effect, so make it settable in the UI so that people can
+    experiment with various values.
+
+commit ed39992564beefe6b12f81e842caba11aff98a9c
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Sat Dec 15 07:18:53 2012 +0200
+
+    Use pixman_transform_point_31_16() from pixman_transform_point()
+    
+    Old functions pixman_transform_point() and pixman_transform_point_3d()
+    now become just wrappers for pixman_transform_point_31_16() and
+    pixman_transform_point_31_16_3d(). Eventually their uses should be
+    completely eliminated in the pixman code and replaced with their
+    extended range counterparts. This is needed in order to be able
+    to correctly handle any matrices and parameters that may come
+    to pixman from the code responsible for XRender implementation.
+
+commit 5a78d74ccccba2aeb473f04ade44512d2f6c0613
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Sat Dec 15 06:19:21 2012 +0200
+
+    test: Added matrix-test for testing projective transform accuracy
+    
+    This test uses __float128 data type when it is available
+    for implementing a "perfect" reference implementation. The
+    output from from pixman_transform_point_31_16() and
+    pixman_transform_point_31_16_affine() is compared with the
+    reference implementation to make sure that the rounding
+    errors may only show up in a single least significant bit.
+    
+    The platforms and compilers, which do not support __float128
+    data type, can rely on crc32 checksum for the pseudorandom
+    transform results.
+
+commit 09600ae7e34eb777471c931cd4c3a8cdbda6e84a
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Wed Dec 12 02:41:55 2012 +0200
+
+    configure.ac: Added detection for __float128 support
+    
+    GCC supports 128-bit floating point data type on some platforms (including
+    but not limited to x86 and x86-64). This may be useful for tests, which
+    need prefectly accurate reference implementations of certain algorithms.
+
+commit c3deb8334a71998b986a7b8d5b74bedf26cc23aa
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Fri Dec 14 18:43:57 2012 +0200
+
+    Add higher precision "pixman_transform_point_*" functions
+    
+    The following new functions are added:
+    
+    pixman_transform_point_31_16_3d() -
+        Calculates the product of a matrix and a vector multiplication.
+    
+    pixman_transform_point_31_16() -
+        Calculates the product of a matrix and a vector multiplication.
+        Then converts the homogenous resulting vector [x, y, z] to
+        cartesian [x', y', 1] variant, where x' = x / z, and y' = y / z.
+    
+    pixman_transform_point_31_16_affine() -
+        A faster sibling of the other two functions, which assumes affine
+        transformation, where the bottom row of the matrix is [0, 0, 1] and
+        the last element of the input vector is set to 1.
+    
+    These functions transform a point with 31.16 fixed point coordinates from
+    the destination space to a point with 48.16 fixed point coordinates in
+    the source space.
+    
+    The results are accurate and the rounding errors may only show up in
+    the least significant bit. No overflows are possible for the affine
+    transformations as long as the input data is provided in 31.16 format.
+    In the case of projective transformations, some output values may be not
+    representable using 48.16 fixed point format. In this case the results
+    are clamped to return maximum or minimum 48.16 values (so that the caller
+    can at least handle NONE and PAD repeats correctly).
+
+commit a47ed2c31180e6c3b332747a1721731e0649b10f
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Mon Dec 3 17:42:21 2012 +0200
+
+    Faster fetch for the C variant of r5g6b5 src/dest iterator
+    
+    Processing two pixels at once is used to reduce the number of
+    arithmetic operations.
+    
+    The speedup relative to the generic fetch_scanline_r5g6b5() from
+    "pixman-access.c" (pixman was compiled with gcc 4.7.2):
+    
+        MIPS 74K        480MHz  :  20.32 MPix/s ->  26.47 MPix/s
+        ARM11           700MHz  :  34.95 MPix/s ->  38.22 MPix/s
+        ARM Cortex-A8  1000MHz  :  87.44 MPix/s -> 100.92 MPix/s
+        ARM Cortex-A9  1700MHz  : 150.95 MPix/s -> 158.13 MPix/s
+        ARM Cortex-A15 1700MHz  : 148.91 MPix/s -> 155.42 MPix/s
+        IBM Cell PPU   3200MHz  :  75.29 MPix/s ->  98.33 MPix/s
+        Intel Core i7  2800MHz  : 257.02 MPix/s -> 376.93 MPix/s
+    
+    That's the performance for C code (SIMD and assembly optimizations
+    are disabled via PIXMAN_DISABLE environment variable).
+
+commit e66fd5ccb6b69dfa1acde36220dc3c3c44026890
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Mon Dec 3 17:07:31 2012 +0200
+
+    Faster write-back for the C variant of r5g6b5 dest iterator
+    
+    Unrolling loops improves performance, so just use it here.
+    
+    Also GCC can't properly optimize this code for RISC processors and
+    allocate 0x1F001F constant in a register. Because this constant is
+    too large to be represented as an immediate operand in instructions,
+    GCC inserts some redundant arithmetics. This problem can be workarounded
+    by explicitly using a variable for 0x1F001F constant and also initializing
+    it by a read from another volatile variable. In this case GCC is forced
+    to allocate a register for it, because it is not seen as a constant anymore.
+    
+    The speedup relative to the generic store_scanline_r5g6b5() from
+    "pixman-access.c" (pixman was compiled with gcc 4.7.2):
+    
+        MIPS 74K        480MHz  :  33.22 MPix/s ->  43.42 MPix/s
+        ARM11           700MHz  :  50.16 MPix/s ->  78.23 MPix/s
+        ARM Cortex-A8  1000MHz  : 117.75 MPix/s -> 196.34 MPix/s
+        ARM Cortex-A9  1700MHz  : 177.04 MPix/s -> 320.32 MPix/s
+        ARM Cortex-A15 1700MHz  : 231.44 MPix/s -> 261.64 MPix/s
+        IBM Cell PPU   3200MHz  : 130.25 MPix/s -> 145.61 MPix/s
+        Intel Core i7  2800MHz  : 502.21 MPix/s -> 721.73 MPix/s
+    
+    That's the performance for C code (SIMD and assembly optimizations
+    are disabled via PIXMAN_DISABLE environment variable).
+
+commit a9f66694163da9e8e41a69497acbadd630e0cb51
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Mon Dec 3 06:32:46 2012 +0200
+
+    Added C variants of r5g6b5 fetch/write-back iterators
+    
+    Adding specialized iterators for r5g6b5 color format allows us to work
+    on fine tuning performance of r5g6b5 fetch/write-back operations in the
+    pixman general "fetch -> combine -> store" pipeline.
+    
+    These iterators also make "src_x888_0565" fast path redundant, so it can
+    be removed.
+
+commit 794033ed43ed74ad66075a4d0c83fd36565da876
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Wed Jan 23 10:27:22 2013 +0000
+
+    Eliminate duplicate copies of channel flags for pixman_image_composite32()
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit a59f081df45ec5c15b295bb31b22dbe787e2f2b1
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Sat Jan 12 16:52:47 2013 +0000
+
+    Always return a valid function from lookup_combiner()
+    
+    We should always have at least a C combiner available, so we never
+    expect the search to fail. If it does, emit an error and return a
+    dummy function.
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 520230914bbb56473b872f2ef7dc59092f426415
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Sat Jan 12 08:28:32 2013 +0000
+
+    Always return a valid function from lookup_composite()
+    
+    We never expect to fail to find the appropriate function as the
+    general_composite_rect should always match. So if somehow we fallthrough
+    the search, emit a _pixman_log_error() and return a dummy function.
+    
+    Note that we remove some conditionals and a level of indentation hence a
+    large amount of code movement. This also reveals that in a few places we
+    are duplicating stack variables that can be eliminated later.
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b283c864a3de039f9213adaf402c6597db12d0c4
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Tue Jan 8 18:39:03 2013 +0000
+
+    sse2: Add fast paths for bilinear source with a solid mask
+    
+    Based on the existing sse2_8888_n_8888 nearest scaling routines.
+    
+    fishbowl on an i5-2500: 60.9s -> 56.9s
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit d00ce4091215e8a648c6f1912829b35c02b06add
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Tue Jan 1 19:41:54 2013 +0000
+
+    sse2: Add a fast path for add_n_8_8888
+    
+    This path is being exercised by compositing of trapezoids for clipmasks, for
+    instance as used in the firefox-asteroids cairo-trace.
+    
+    IVB i7-3720qm ./tests/lowlevel-blt-bench add_n_8_8888:
+    
+    reference memcpy speed = 14846.7MB/s (3711.7MP/s for 32bpp fills)
+    
+    before: L1: 681.10  L2: 735.14  M:701.44 ( 28.35%)  HT:283.32  VT:213.23  R:208.93  RT: 77.89 ( 793Kops/s)
+    
+    after:  L1: 992.91  L2:1017.33  M:982.58 ( 39.88%)  HT:458.93  VT:332.32  R:326.13  RT:136.66 (1287Kops/s)
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 7ced3beec99e9965717f76cc822d0702383a1fce
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Tue Jan 1 19:41:54 2013 +0000
+
+    sse2: Add a fast path for add_n_8888
+    
+    This path is being exercised by inplace compositing of trapezoids, for
+    instance as used in the firefox-asteroids cairo-trace.
+    
+    IVB i3-3720qm ./tests/lowlevel-blt-bench add_n_888:
+    
+    reference memcpy speed = 14918.3MB/s (3729.6MP/s for 32bpp fills)
+    
+    before: L1:1752.44  L2:2259.48  M:2215.73 ( 58.80%)  HT:589.49   VT:404.04   R:424.69  RT:134.68 (1182Kops/s)
+    
+    after:  L1:3931.21  L2:6132.78  M:3440.17 ( 92.24%)  HT:1337.70  VT:1357.64  R:1270.27  RT:359.78 (2161Kops/s)
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit b7f523e3bcbef1f08bf9b374f2704723d5298c1f
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Thu Jan 24 14:49:41 2013 -0500
+
+    Add a version of bilinear_interpolation for precision <=4
+    
+    Having 4 or fewer bits means we can do two components at
+    a time in a single 32 bit register.
+    
+    Here are the results for firefox-fishtank on a Pandaboard with
+    4.6.3 and PIXMAN_DISABLE="arm-neon"
+    
+    Before:
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image           t-firefox-fishtank    7.841    7.910   0.70%    6/6
+    
+    After:
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image           t-firefox-fishtank    6.951    6.995   1.11%    6/6
+
+commit 24e83cae64eaa238a7bf67488917b0f8cac89114
+Author: Ben Avison <bavison@riscosopen.org>
+Date:   Sat Jan 19 16:36:22 2013 +0000
+
+    Tweaks to lowlevel-blt-bench
+    
+    This adds two extra tests, src_n_8 and src_8_8, which I have been
+    using to benchmark my ARMv6 changes.
+    
+    I'd also like to propose that it requires an exact test name as the
+    executable's argument, as achieved by this strstr to strcmp change.
+    Without this, it is impossible to only benchmark (for example)
+    add_8_8, add_n_8 or src_n_8, due to those also being substrings of
+    many other test names.
+
+commit b527a0e615a726aa6a7d18f0ea0b38564b153afa
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jan 19 12:29:48 2013 -0500
+
+    test: Use operator_name() and format_name() in composite.c
+    
+    With the operator_name() and format_name() functions there is no
+    longer any reason for composite.c to have its own table of format and
+    operator names.
+
+commit 4eb9a24abae6cee7562c3ec8965dc4eaaba0e8ab
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jan 19 09:36:50 2013 -0500
+
+    utils.[ch]: Add new format_name() function
+    
+    This function returns the name of the given format code, which is
+    useful for printing out debug information. The function is written as
+    a switch without a default value so that the compiler will warn if new
+    formats are added in the future. The fake formats used in the fast
+    path tables are also recognized.
+    
+    The function is used in alpha_map.c, where it replaces an existing
+    format_name() function, and in blitters-test.c, affine-test.c, and
+    scaling-test.c.
+
+commit 1676b4938912bd140791c347aa4d08db255dd60f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jan 19 08:55:27 2013 -0500
+
+    test/utils.[ch]: Add new function operator_name()
+    
+    This function returns the name of the given operator, which is useful
+    for printing out debug information. The function is done as a switch
+    without a default value so that the compiler will warn if new
+    operators are added in the future.
+    
+    The function is used in affine-test.c, scaling-test.c, and
+    blitters-test.c.
+
+commit 8d85311143b0bc30d3490c0ca2ddbe927a1f9ac8
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jan 12 08:03:35 2013 -0500
+
+    README: Add guidelines on how to contribute patches
+    
+    Ben Avison pointed out here:
+    
+       http://lists.freedesktop.org/archives/pixman/2013-January/002485.html
+    
+    that there isn't really any documentation about how to submit patches
+    to pixman. This patch adds some information to the README file.
+    
+    v2: Incorporate some comments from Ben Avison
+    v3: Change gitweb URL to cgit
+
+commit 61dacffaf47e6b631a2c67230f8f111038d1de09
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Jan 18 16:53:32 2013 -0800
+
+    Convert INCLUDES to AM_CPPFLAGS
+    
+    INCLUDES has been deprecated starting with automake 1.13. Convert all
+    occurrences with the recommended AM_CPPFLAGS replacement.
+
+commit c7c28f440db083d69ca930b44fc6280bb558e098
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Jan 18 16:49:00 2013 -0800
+
+    Add new demos and tests to .gitignore
+
+commit 2c6577476e5b18e17904ae8af244a39c352e2e33
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Tue Jan 22 03:01:05 2013 +0100
+
+    MIPS: DSPr2: Added more fast-paths:
+     - over_reverse_n_8888
+     - in_n_8_8
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+            over_reverse_n_8888 =  L1:  19.42  L2:  19.07  M: 15.38 ( 40.80%)  HT: 13.35  VT: 13.10  R: 12.92  RT:  8.27 (  49Kops/s)
+                       in_n_8_8 =  L1:  21.20  L2:  22.86  M: 21.42 ( 14.21%)  HT: 15.97  VT: 15.69  R: 15.47  RT:  8.00 (  48Kops/s)
+    
+    Optimized:
+            over_reverse_n_8888 =  L1:  60.09  L2:  47.87  M: 28.65 ( 76.02%)  HT: 23.58  VT: 22.51  R: 21.99  RT: 12.28 (  60Kops/s)
+                       in_n_8_8 =  L1:  89.38  L2:  86.07  M: 65.48 ( 43.44%)  HT: 44.64  VT: 41.50  R: 40.77  RT: 16.94 (  66Kops/s)
+
+commit a67b0e24d7eaba3b9525eeb8bf357ded95cc6b7c
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Tue Jan 22 02:59:44 2013 +0100
+
+    MIPS: DSPr2: Added more fast-paths for REVERSE operation:
+     - out_reverse_8_0565
+     - out_reverse_8_8888
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+            out_reverse_8_0565 =  L1:  14.29  L2:  13.58  M: 12.14 ( 24.16%)  HT:  9.23  VT:  9.12  R:  8.84  RT:  4.75 (  36Kops/s)
+            out_reverse_8_8888 =  L1:  27.46  L2:  23.24  M: 17.41 ( 57.73%)  HT: 12.61  VT: 12.47  R: 11.79  RT:  5.86 (  41Kops/s)
+    
+    Optimized:
+            out_reverse_8_0565 =  L1:  28.24  L2:  25.64  M: 20.63 ( 41.05%)  HT: 16.69  VT: 16.14  R: 15.50  RT:  8.69 (  52Kops/s)
+            out_reverse_8_8888 =  L1:  52.78  L2:  41.44  M: 23.50 ( 77.94%)  HT: 18.79  VT: 18.16  R: 16.90  RT:  9.11 (  53Kops/s)
+
+commit 35cc965514ca6e665c18411fcf66db826d559c2a
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Dec 20 11:28:25 2012 -0500
+
+    pixman-filter.c: Cope with NULL returns from malloc()
+    
+    v2: Don't return a pointer to uninitialized memory when the allocation
+    of horz and vert fails, but allocation of params doesn't.
+
+commit 58526cfc7290a740f61e288f09fe721c4e6511bd
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Aug 26 22:06:27 2012 -0400
+
+    Handle solid images in the noop iterator
+    
+    The noop src iterator already has code to handle solid images, but
+    that code never actually runs currently because it is not possible for
+    an image to have both a format code of PIXMAN_solid and a flag of
+    FAST_PATH_BITS_IMAGE.
+    
+    If these two were to be set at the same time, the
+    fast_composite_tiled_repeat() fast path would trigger for solid images
+    (because it triggers for PIXMAN_any formats, which includes
+    PIXMAN_solid), but for solid images we can usually do better than that
+    fast path.
+    
+    So this patch removes _pixman_solid_fill_iter_init() and instead
+    handles such images (along with repeating 1x1 bits images without an
+    alpha map) in pixman-noop.c.
+    
+    When a 1x1R image is involved in the general composite path, before
+    this patch, it would hit this code in repeat() in pixman-inlines.h:
+    
+            while (*c >= size)
+                *c -= size;
+            while (*c < 0)
+                *c += size;
+    
+    and those loops could run for a huge number of iteratons (proportional
+    to the composite width). For such cases, the performance improvement
+    is really big:
+    
+    ./test/lowlevel-blt-bench -n add_n_8888:
+    
+    Before:
+    
+        add_n_8888 =  L1:   3.86  L2:   3.78  M:  1.40 (  0.06%)  HT:  1.43  VT:  1.41  R:  1.41  RT:  1.38 (  19Kops/s)
+    
+    After:
+    
+        add_n_8888 =  L1:1236.86  L2:2468.49  M:1097.88 ( 49.04%)  HT:476.49  VT:429.05  R:417.04  RT:155.12 ( 817Kops/s)
+
+commit 480dd38fd190fb7ca4ff172a31a4a6ef2944f20c
+Author: Marko Lindqvist <cazfi74@gmail.com>
+Date:   Thu Jan 3 06:38:01 2013 +0200
+
+    Fix build with automake-1.13
+    
+    Automake-1.13 has removed long obsolete AM_CONFIG_HEADER macro (
+    http://lists.gnu.org/archive/html/automake/2012-12/msg00038.html )
+    and autoreconf errors out upon seeing it.
+    
+    Attached patch replaces obsolete AM_CONFIG_HEADER with now proper
+    AC_CONFIG_HEADERS.
+
+commit 1abde88ae60ae0877073d85cbf5b39013337f5da
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Thu Dec 20 05:14:39 2012 +0200
+
+    Use more appropriate types and remove a magic constant
+
+commit c1fd5a42439b21872170979d8c400cbb374e1f9d
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Thu Dec 20 05:00:46 2012 +0200
+
+    Define SIZE_MAX if it is not provided by the standard C headers
+    
+    C++ compilers do not define SIZE_MAX. It is also not available
+    if the code is compiled by some C compilers:
+        http://lists.freedesktop.org/archives/pixman/2012-August/002196.html
+
+commit 66c429282282176cdb5913b7396116c28725363e
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Sun Dec 16 04:03:58 2012 +0200
+
+    Rename 'xor' variable to 'filler' (because 'xor' is a C++ keyword)
+
+commit 4dfda2adfe2eb1130fc27b1da35df778284afd91
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 14 21:53:34 2012 -0500
+
+    float-combiner.c: Change tests for x == 0.0 tests to - FLT_MIN < x < FLT_MIN
+    
+    pixman-float-combiner.c currently uses checks like these:
+    
+        if (x == 0.0f)
+            ...
+        else
+            ... / x;
+    
+    to prevent division by 0. In theory this is correct: a division-by-zero
+    exception is only supposed to happen when the floating point numerator is
+    exactly equal to a positive or negative zero.
+    
+    However, in practice, the combination of x87 and gcc optimizations
+    causes issues. The x87 registers are 80 bits wide, which means the
+    initial test:
+    
+    	if (x == 0.0f)
+    
+    may be false when x is an 80 bit floating point number, but when x is
+    rounded to a 32 bit single precision number, it becomes equal to
+    0.0. In principle, gcc should compensate for this quirk of x87, and
+    there are some options such as -ffloat-store, -fexcess-precision=standard,
+    and -std=c99 that will make it do so, but these all have a performance
+    cost.  It is also possible to set the FPU to a mode that makes it do
+    all computation with single or double precision, but that would
+    require pixman to save the existing mode before doing anything with
+    floating point and restore it afterwards.
+    
+    Instead, this patch side-steps the issue by replacing exact checks for
+    equality with zero with a new macro that checkes whether the value is
+    between -FLT_MIN and FLT_MIN.
+    
+    There is extensive reading material about this issue linked off the
+    infamous gcc bug 323:
+    
+        http://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
+
+commit 2734071d7bee699401dc8c98d5c2ef0e2dbb0c91
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Thu Dec 6 17:13:16 2012 +0200
+
+    ARM: make use of UQADD8 instruction even in generic C code paths
+    
+    ARMv6 has UQADD8 instruction, which implements unsigned saturated
+    addition for 8-bit values packed in 32-bit registers. It is very useful
+    for UN8x4_ADD_UN8x4, UN8_rb_ADD_UN8_rb and ADD_UN8 macros (which would
+    otherwise need a lot of arithmetic operations to simulate this operation).
+    Since most of the major ARM linux distros are built for ARMv7, we are
+    much less dependent on runtime CPU detection and can get practical
+    benefits from conditional compilation here for a lot of users.
+    
+    The results of cairo-perf-trace benchmark on ARM Cortex-A15 with pixman
+    compiled by gcc 4.7.2 and PIXMAN_DISABLE set to "arm-simd arm-neon":
+    
+    Speedups
+    ========
+    image    firefox-talos-gfx  (29938.22 0.12%) ->  (27814.76 0.51%) : 1.08x speedup
+    image    firefox-asteroids  (23241.11 0.07%) ->  (21795.19 0.07%) : 1.07x speedup
+    image firefox-canvas-alpha (174519.85 0.08%) -> (164788.64 0.20%) : 1.06x speedup
+    image              poppler   (9464.46 1.61%) ->   (8991.53 0.14%) : 1.05x speedup
+
+commit f9a41703b2d46c988b9e4e378d27396f718006ae
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Mon Dec 3 03:01:21 2012 +0200
+
+    Faster conversion from a8r8g8b8 to r5g6b5 in C code
+    
+    This change reduces 3 shifts, 3 ANDs and 2 ORs (total 8 arithmetic
+    operations) to 3 shifts, 2 ANDs and 2 ORs (total 7 arithmetic
+    operations).
+    
+    We get garbage in the high 16 bits of the result, which might need
+    to be cleared when casting to uint16_t (it would bring us back to
+    total 8 arithmetic operations). However in the case if the result
+    of a8r8g8b8->r5g6b5 conversion is immediately stored to memory, no
+    extra instructions for clearing these garbage bits are needed.
+    
+    This allows the a8r8g8b8->r5g6b5 conversion code to be compiled
+    into 4 instructions for ARM instead of 5 (assuming a good optimizing
+    compiler), which has no pipeline stalls on ARM11 as an additional
+    bonus.
+    
+    The change in benchmark results for 'lowlevel-blt-bench src_8888_0565'
+    with PIXMAN_DISABLE="arm-simd arm-neon mips-dspr2 mmx sse2" and pixman
+    compiled by gcc-4.7.2:
+    
+        MIPS 74K        480MHz  :  40.44 MPix/s ->  40.13 MPix/s
+        ARM11           700MHz  :  50.28 MPix/s ->  62.85 MPix/s
+        ARM Cortex-A8  1000MHz  : 124.38 MPix/s -> 141.85 MPix/s
+        ARM Cortex-A15 1700MHz  : 281.07 MPix/s -> 303.29 MPix/s
+        Intel Core i7  2800MHz  : 515.92 MPix/s -> 531.16 MPix/s
+    
+    The same trick was used in xomap (X server for Nokia N800/N810):
+        http://repository.maemo.org/pool/diablo/free/x/xorg-server/
+        xorg-server_1.3.99.0~git20070321-0osso20083801.tar.gz
+
+commit 3922e90c400fca3ac43dc77b8dd0c0591e7e4fbc
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Mon Dec 3 02:50:20 2012 +0200
+
+    Change CONVERT_XXXX_TO_YYYY macros into inline functions
+    
+    It is easier and safer to modify their code in the case if the
+    calculations need some temporary variables. And the temporary
+    variables will be needed soon.
+
+commit e4519360c15772ac51038b9f86e3f730f06cfb65
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Mon Dec 3 05:44:36 2012 +0200
+
+    test: add "src_0565_8888" to lowlevel-blt-bench
+
+commit 6a6c8c51ed9e7272e624b3c99187ddf71d19a0fd
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Dec 13 15:37:40 2012 -0500
+
+    pixman_composite_trapezoids(): Check for NULL return from create_bits()
+    
+    A check is needed that the creation of the temporary image in
+    pixman_composite_trapezoids() succeeds.
+    
+    Fixes crash in stress-test -s 0x313c on my system.
+
+commit c2cb303d33ec11390b93cabd90f0f95bc9264113
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Dec 13 15:26:17 2012 -0500
+
+    pixman_composite_trapezoids: Return early if mask_format is not of TYPE_ALPHA
+    
+    stress-test -s 0x17ee crashes because pixman_composite_trapezoids() is
+    given a mask_format of PIXMAN_c8, which causes it to create a
+    temporary image with that format but without a palette. This causes
+    crashes later.
+    
+    The only mask_format that we actually support are those of TYPE_ALPHA,
+    so this patch add a return_if_fail() to ensure this.
+    
+    Similarly, although currently it won't crash if given an invalid
+    format, alpha-only formats have always been the only thing that made
+    sense for the pixman_rasterize_edges() functions, so add a
+    return_if_fail() ensuring that the destination format is of type
+    PIXMAN_TYPE_ALPHA.
+
+commit 1f0c02811ea71b36380b9d4029a248659bd9af50
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Dec 13 11:21:16 2012 -0500
+
+    Add testing of trapezoids to stress-test
+    
+    The entry points add_trapezoids(), rasterize_trapezoid() and
+    composite_trapezoid() are exercised with random trapezoids.
+    
+    This uncovers crashes with stress-test seeds 0x17ee and 0x313c.
+
+commit 526dc06e5694172abf979c03a5cf530207fe2d27
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Dec 8 06:06:34 2012 -0500
+
+    demos/radial-test: Add checkerboard to display the alpha channel
+
+commit 6402b2aa0c2215a5add233b3c1bc2ae634d43aaf
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Dec 8 06:46:38 2012 -0500
+
+    demos/conical-test: Use the draw_checkerboard() utility function
+    
+    Instead of having its own copy.
+
+commit e382e52d675a4ae86ed94ab1124ea7d98c3db75a
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Dec 8 06:44:24 2012 -0500
+
+    test/utils.[ch]: Add utility function to draw a checkerboard
+    
+    This is useful in demo programs to display the alpha channel.
+
+commit b0a6504122ba4f585fb60626ec71bf613fc64fae
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 7 19:51:19 2012 -0500
+
+    radial: When comparing t to mindr, use >= rather than >
+    
+    Radial gradients are conceptually rendered as a sequence of circles
+    generated by linearly extrapolating from the two circles given by the
+    gradient specification. Any circles in that sequence that would end up
+    with a negative radius are not drawn, a condition that is enforced by
+    checking that t * dr is bigger than mindr:
+    
+         if (t * dr > mindr)
+    
+    However, it is legitimate for a circle to have radius exactly 0, so
+    the test should use >= rather than >.
+    
+    This gets rid of the dots in demos/radial-test except for when the c2
+    circle has radius 0 and a repeat mode of either NONE or NORMAL. Both
+    those dots correspond to a t value of 1.0, which is outside the
+    defined interval of [0.0, 1.0) and therefore subject to the repeat
+    algorithm. As a result, in the NONE case, a value of 1.0 turns into
+    transparent black. In the NORMAL case, 1.0 wraps around and becomes
+    0.0 which is red, unlike 0.99 which is blue.
+    
+    Cc: ranma42@gmail.com
+
+commit 54aca22058e8f4daf999b37e5c5e6ddd8e67f811
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 7 19:43:53 2012 -0500
+
+    demos/radial-test: Add zero-radius circles to demonstrate rendering bugs
+    
+    Add two new gradient columns, one where the start circle is has radius
+    0 and one where the end circle has radius 0. All the new gradients
+    except for one are rendered with a bright dot in the middle. In most
+    but not all cases this is incorrect.
+    
+    Cc: ranma42@gmail.com
+
+commit fdab3c1b6cd9c5e197ec3f6bc0a03da32880e317
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Sat Dec 8 15:16:51 2012 +0200
+
+    test: Workaround unaligned MOVDQA bug (http://gcc.gnu.org/PR55614)
+    
+    Just use SSE2 intrinsics to do unaligned memory accesses as
+    a workaround for this gcc bug related to vector extensions.
+
+commit 2bc59006d7fe91abf68a2061ad86c06e1b2964ab
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Fri Nov 30 12:00:47 2012 +0200
+
+    Improve performance of combine_over_u
+    
+    The generic C over_u combiner can be a lot faster with the
+    addition of special shortcuts for 0xFF and 0x00 alpha/mask
+    values. This is already implemented in C and SSE2 fast paths.
+    
+    Profiling the run of cairo-perf-trace benchmarks with PIXMAN_DISABLE
+    environment variable set to "fast mmx sse2" on Intel Core i7:
+    
+    === before ===
+    
+    37.32%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] combine_over_u
+    21.37%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_no_repeat_8888
+    13.51%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_affine_none_a8r8g8b8
+     2.96%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] radial_compute_color
+     2.74%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] fetch_scanline_a8
+     2.71%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] fetch_scanline_x8r8g8b8
+     2.17%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] _pixman_gradient_walker_pixel
+     1.86%  cairo-perf-trac  libcairo.so.2.11200.0 [.] _cairo_tor_scan_converter_generate
+     1.57%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_affine_pad_a8r8g8b8
+     0.97%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] combine_in_reverse_u
+     0.96%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] combine_over_ca
+    
+    === after ===
+    
+    28.79%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_no_repeat_8888
+    18.44%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_affine_none_a8r8g8b8
+    15.54%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] combine_over_u
+     3.94%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] radial_compute_color
+     3.69%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] fetch_scanline_a8
+     3.69%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] fetch_scanline_x8r8g8b8
+     2.94%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] _pixman_gradient_walker_pixel
+     2.52%  cairo-perf-trac  libcairo.so.2.11200.0 [.] _cairo_tor_scan_converter_generate
+     2.08%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_affine_pad_a8r8g8b8
+     1.31%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] combine_in_reverse_u
+     1.29%  cairo-perf-trac  libpixman-1.so.0.29.1 [.] combine_over_ca
+
+commit 8ca4e144724ba2041bc5ef077ccf6d24e7cf4d1f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Nov 26 14:27:34 2012 -0500
+
+    Add fast paths for separable convolution
+    
+    Similar to the fast paths for general affine access, add some fast
+    paths for the separable filter for all combinations of formats
+    x8r8g8b8, a8r8g8b8, r5g6b5, a8 with the four repeat modes.
+    
+    It is easy to see the speedup in the demos/scale program.
+
+commit 4f18ba30cea56331e30992242201b20954c8f7f2
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Dec 4 13:17:49 2012 -0500
+
+    Add demo program for conical gradients
+    
+    This new test is derived from radial-test.c and displays conical
+    gradients at various angles.
+    
+    It also demonstrates how PIXMAN_REPEAT_NORMAL is supposed to work when
+    used with a gradient specification where the first stop is not a 0.0:
+    In this case the gradient is supposed to have a smooth transition from
+    the last stop back to the first stop with no sharp transitions. It
+    also shows that the repeat mode is not ignored for conical gradients
+    as one might be tempted to think.
+
+commit 3a98787bddeb007a1cd2b86235205774c15250f2
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Nov 12 12:27:39 2012 -0500
+
+    Add demos/zone_plate.png
+    
+    The zone plate image is a useful test case for image scalers because
+    it contains all representable frequencies, so any imperfection in
+    resampling filters will show up as Moire patterns.
+    
+    This version is symmetric around the midpoint of the image, so since
+    rotating it is supposed to be a noop, it can also be used to verify
+    that the resampling filters don't shift the image.
+    
+    V2: Run the file through OptiPNG to cut the size in half, as suggested
+    by Siarhei.
+
+commit 97491ed26cfd4bad9cceffa789bfcbef77421d38
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Nov 22 10:18:26 2012 -0500
+
+    demos: Add new demo program, "scale"
+    
+    This program allows interactively scaling and rotating images with
+    using various filters and repeat modes. It uses
+    pixman_filter_create_separate_convolution() to generate the filters.
+
+commit 7f5bb22d17f17c2032914163a318f4ec438ba280
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Nov 22 10:16:16 2012 -0500
+
+    demos/gtk-utils.[ch]: Add pixman_image_from_file()
+    
+    This function uses GdkPixbuf to load various common formats such as
+    .png and .jpg into a pixman image.
+
+commit 6915f3e24f4169260a8ad6ab7ff3087388dbe5db
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Nov 22 10:15:06 2012 -0500
+
+    Add new pixman_filter_create_separable_convolution() API
+    
+    This new API is a helper function to create filter parameters suitable
+    for use with PIXMAN_FILTER_SEPARABLE_CONVOLUTION.
+    
+    For each dimension, given a scale factor, reconstruction and sample
+    filter kernels, and a subsampling resolution, this function will
+    compute a convolution of the two kernels scaled appropriately, then
+    sample that convolution and return the resulting vectors in a form
+    suitable for being used as parameters to
+    PIXMAN_FILTER_SEPARABLE_CONVOLUTION.
+    
+    The filter kernels offered are the following:
+    
+      - IMPULSE:            Dirac delta function, ie., point sampling
+      - BOX:                Box filter
+      - LINEAR:             Linear filter, aka. "Tent" filter
+      - CUBIC:              Cubic filter, currently Mitchell-Netravali
+      - GAUSSIAN:           Gaussian function, sigma=1, support=3*sigma
+      - LANCZOS2:           Two-lobed Lanczos filter
+      - LANCZOS3:           Three-lobed Lanczos filter
+      - LANCZOS3_STRETCHED: Three-lobed Lanczos filter, stretched by 4/3.0.
+                            This is the "Nice" filter from Dirty Pixels by
+                            Jim Blinn.
+    
+    The intended way to use this function is to extract scaling factors
+    from the transformation and then pass those to this function to get a
+    filter suitable for compositing with that transformation. The filter
+    kernels can be chosen according to quality and performance tradeoffs.
+    
+    To get equivalent quality to GdkPixbuf for downscalings, use BOX for
+    both reconstruction and sampling. For upscalings, use LINEAR for
+    reconstruction and IMPULSE for sampling (though note that for
+    upscaling in both X and Y directions, simply using
+    PIXMAN_FILTER_BILINEAR will likely be a better choice).
+
+commit 68760d3fe1351cb745aedcada7d765edc08bbe8b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Nov 22 10:17:56 2012 -0500
+
+    rounding.txt: Describe how SEPARABLE_CONVOLUTION filter works
+    
+    Add some notes on how to compute the convolution matrices to be used
+    with the SEPARABLE_CONVOLUTION filter.
+
+commit 6fd480b17c8398c217e4c11e826c82dbb8288006
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Nov 22 10:14:06 2012 -0500
+
+    Add new filter PIXMAN_FILTER_SEPARABLE_CONVOLUTION
+    
+    This filter is a new way to use a convolution matrix for filtering. In
+    contrast to the existing CONVOLUTION filter, this new variant is
+    different in two respects:
+    
+    - It is subsampled: Instead of just one convolution matrix, this
+      filter chooses between a number of matrices based on the subpixel
+      sample location, allowing the convolution kernel to be sampled at a
+      higher resolution.
+    
+    - It is separable: Each matrix is specified as the tensor product of
+      two vectors. This has the advantages that many fewer values have to
+      be stored, and that the filtering can be done separately in the x
+      and y dimensions (although the initial implementation doesn't
+      actually do that).
+    
+    The motivation for this new filter is to improve image downsampling
+    quality. Currently, the best pixman can do is the regular convolution
+    filter which is limited to coarsely sampled convolution kernels.
+    
+    With this new feature, any separable filter can be used at any desired
+    resolution.
+
+commit 7e39861da3655779ce76a72592feed3c1dd90017
+Author: Benjamin Gilbert <bgilbert@backtick.net>
+Date:   Sat Dec 1 23:55:31 2012 -0500
+
+    Fix thread safety on mingw-w64 and clang
+    
+    After finding a working TLS storage class specifier, configure was
+    continuing to test other candidates.  This caused it to prefer
+    __declspec(thread) over __thread.  However, __declspec(thread) is
+    ignored with a warning by mingw-w64 [1] and silently ignored by clang [2].
+    The resulting binary behaved as if PIXMAN_NO_TLS was defined.
+    
+    Bug introduced by a069da6c.
+    
+    [1] https://bugs.freedesktop.org/show_bug.cgi?id=57591
+    [2] http://lists.freedesktop.org/archives/pixman/2012-October/002320.html
+
+commit ebedd9a2ad8e841cd8323838b5136657d9ebb988
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Sun Nov 25 02:59:25 2012 +0200
+
+    test: Get rid of the obsolete 'prng_rand_N' and 'prng_rand_u32'
+    
+    They are the same as 'prng_rand_n' and 'prng_rand'
+
+commit b31a696263f1ae9aebb9bb21b93a0c15453bf611
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Sun Nov 25 02:50:35 2012 +0200
+
+    test: Switch to the new PRNG instead of old LCG
+    
+    Wallclock time for running pixman "make check" (compile time not included):
+    
+    ----------------------------+----------------+-----------------------------+
+                                | old PRNG (LCG) |   new PRNG (Bob Jenkins)    |
+           Processor type       +----------------+------------+----------------+
+                                |    gcc 4.5     |  gcc 4.5   | gcc 4.7 (simd) |
+    ----------------------------+----------------+------------+----------------+
+    quad Intel Core i7  @2.8GHz |    0m49.494s   |  0m43.722s |    0m37.560s   |
+    dual ARM Cortex-A15 @1.7GHz |     5m8.465s   |  4m37.375s |    3m45.819s   |
+         IBM Cell PPU   @3.2GHz |    23m0.821s   | 20m38.316s |   16m37.513s   |
+    ----------------------------+----------------+------------+----------------+
+    
+    But some tests got a particularly large boost. For example benchmarking and
+    profiling blitters-test on Core i7:
+    
+    === before ===
+    
+    $ time ./blitters-test
+    
+    real    0m10.907s
+    user    0m55.650s
+    sys     0m0.000s
+    
+      70.45%  blitters-test  blitters-test       [.] create_random_image
+      15.81%  blitters-test  blitters-test       [.] compute_crc32_for_image_internal
+       2.26%  blitters-test  blitters-test       [.] _pixman_implementation_lookup_composite
+       1.07%  blitters-test  libc-2.15.so        [.] _int_free
+       0.89%  blitters-test  libc-2.15.so        [.] malloc_consolidate
+       0.87%  blitters-test  libc-2.15.so        [.] _int_malloc
+       0.75%  blitters-test  blitters-test       [.] combine_conjoint_general_u
+       0.61%  blitters-test  blitters-test       [.] combine_disjoint_general_u
+       0.40%  blitters-test  blitters-test       [.] test_composite
+       0.31%  blitters-test  libc-2.15.so        [.] _int_memalign
+       0.31%  blitters-test  blitters-test       [.] _pixman_bits_image_setup_accessors
+       0.28%  blitters-test  libc-2.15.so        [.] malloc
+    
+    === after ===
+    
+    $ time ./blitters-test
+    
+    real    0m3.655s
+    user    0m20.550s
+    sys     0m0.000s
+    
+      41.77%  blitters-test.n  blitters-test.new  [.] compute_crc32_for_image_internal
+      15.77%  blitters-test.n  blitters-test.new  [.] prng_randmemset_r
+       6.15%  blitters-test.n  blitters-test.new  [.] _pixman_implementation_lookup_composite
+       3.09%  blitters-test.n  libc-2.15.so       [.] _int_free
+       2.68%  blitters-test.n  libc-2.15.so       [.] malloc_consolidate
+       2.39%  blitters-test.n  libc-2.15.so       [.] _int_malloc
+       2.27%  blitters-test.n  blitters-test.new  [.] create_random_image
+       2.22%  blitters-test.n  blitters-test.new  [.] combine_conjoint_general_u
+       1.52%  blitters-test.n  blitters-test.new  [.] combine_disjoint_general_u
+       1.40%  blitters-test.n  blitters-test.new  [.] test_composite
+       1.02%  blitters-test.n  blitters-test.new  [.] prng_srand_r
+       1.00%  blitters-test.n  blitters-test.new  [.] _pixman_image_validate
+       0.96%  blitters-test.n  blitters-test.new  [.] _pixman_bits_image_setup_accessors
+       0.90%  blitters-test.n  libc-2.15.so       [.] malloc
+
+commit 309e66f047cab0951d8e42628dcd181e2d14c58d
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Sat Nov 24 23:22:48 2012 +0200
+
+    test: Search/replace 'lcg_*' -> 'prng_*'
+    
+    The 'lcg' prefix is going to be misleading if we replace
+    PRNG algorithm.
+
+commit d6545a2fc6f65c4959c6f85a15e95675347c0940
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Sat Nov 24 19:43:41 2012 +0200
+
+    test: Added a better PRNG (pseudorandom number generator)
+    
+    This adds a fast SIMD-optimized variant of a small noncryptographic
+    PRNG originally developed by Bob Jenkins:
+        http://www.burtleburtle.net/bob/rand/smallprng.html
+    
+    The generated pseudorandom data is good enough to pass "Big Crush"
+    tests from TestU01 (http://en.wikipedia.org/wiki/TestU01).
+    
+    SIMD code uses http://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html
+    which is a GCC specific extension. There is also a slower alternative
+    code path, which should work with any C compiler.
+    
+    The performance of filling buffer with random data:
+       Intel Core i7  @2.8GHz (SSE2)     : ~5.9 GB/s
+       ARM Cortex-A15 @1.7GHz (NEON)     : ~2.2 GB/s
+       IBM Cell PPU   @3.2GHz (Altivec)  : ~1.7 GB/s
+
+commit 41f98a07fc3235b64713a39238238801304ac346
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Fri Nov 23 09:07:23 2012 +0200
+
+    test: Change is_little_endian() into inline function
+    
+    Also dropped redundant volatile keyword because any object
+    can be accessed via char* pointer without breaking aliasing
+    rules. The compilers are able to optimize this function to either
+    constant 0 or 1.
+
+commit 978bab253d1d061b00b5e80aa45ab6986aac466f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Nov 21 11:43:31 2012 -0500
+
+    Add text file rounding.txt describing how rounding works
+    
+    It is not entirely obvious how pixman gets from "location in the
+    source image" to "pixel value stored in the destination". This file
+    describes how the filters work, and in particular how positions are
+    rounded to samples.
+
+commit 74319e9d39f5d7f85cb75fcb91343f298b0e62e2
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Nov 20 23:28:43 2012 -0500
+
+    Convolution filter: round color values instead of truncating
+    
+    The pixel computed by the convolution filter should be rounded off,
+    not truncated. As a simple example consider a convolution matrix
+    consisting of five times 0x3333. If all five all five input pixels are
+    0xff, then the result of truncating will be
+    
+        (5 * 0x3333 * 255) >> 16 = 254
+    
+    But the real value of the computation is (5 * 0x3333 / 65536.0) * 254
+    = 254.9961, so the error is almost 1. If the user isn't very careful
+    about normalizing the convolution kernel so that it sums to one in
+    fixed point, such error might cause solid images to change color, or
+    opaque images to become translucent.
+    
+    The fix is simply to round instead of truncate.
+
+commit f0816ddaf4e61d9295de5b1cbe51f956db7fbd16
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Nov 20 03:23:51 2012 -0500
+
+    Round fixed-point multiplication
+    
+    After two fixed-point numbers are multiplied, the result is shifted
+    into place, but up until now pixman has simply discarded the low-order
+    bits instead of rounding to the closest number.
+    
+    Fix that by adding 0x8000 (or 0x2 in one place) before shifting and
+    update the test checksums to match.
+
+commit 44dd746bb68625b2f6be77c3f80292b45defe9d7
+Author: Stefan Weil <sw@weilnetz.de>
+Date:   Tue Nov 13 19:44:44 2012 +0100
+
+    test: Fix compiler warnings caused by unused code
+    
+    Signed-off-by: Stefan Weil <sw@weilnetz.de>
+
+commit 5f96022d3bca15050958512f1c15a0067d2225af
+Author: Stefan Weil <sw@weilnetz.de>
+Date:   Tue Nov 13 19:38:32 2012 +0100
+
+    pixman: Use uintptr_t in type casts from pointer to integral value
+    
+    These modifications fix lots of compiler warnings for systems where
+    sizeof(unsigned long) != sizeof(void *).
+    This is especially true for MinGW-w64 (64 bit Windows).
+    
+    Signed-off-by: Stefan Weil <sw@weilnetz.de>
+
+commit a96efd02d68b726d6d140d0bd211bc7cc1be127a
+Author: Stefan Weil <sw@weilnetz.de>
+Date:   Tue Nov 13 19:44:15 2012 +0100
+
+    Always use xmmintrin.h for 64 bit Windows
+    
+    MinGW-w64 uses the GNU compiler and does not define _MSC_VER.
+    Nevertheless, it provides xmmintrin.h and must be handled
+    here like the MS compiler. Otherwise compilation fails due to
+    conflicting declarations.
+    
+    Signed-off-by: Stefan Weil <sw@weilnetz.de>
+
+commit 899e0d60524bcd2cff6cad6acb310181fb96b39a
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Mon Nov 12 22:48:51 2012 +0100
+
+    MIPS: DSPr2: Added several nearest neighbor fast paths with a8 mask:
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench -n
+    
+    Referent (before):
+            over_8888_8_0565 =  L1:   9.62  L2:   8.85  M:  7.40 ( 39.27%)  HT:  5.67  VT:  5.61  R:  5.45  RT:  2.98 (  22Kops/s)
+            over_0565_8_0565 =  L1:   7.90  L2:   7.49  M:  6.72 ( 26.75%)  HT:  5.24  VT:  5.20  R:  5.06  RT:  2.90 (  22Kops/s)
+    
+    Optimized:
+            over_8888_8_0565 =  L1:  18.51  L2:  16.82  M: 12.13 ( 64.43%)  HT: 10.06  VT:  9.88  R:  9.54  RT:  5.63 (  31Kops/s)
+            over_0565_8_0565 =  L1:  14.82  L2:  13.94  M: 11.34 ( 45.20%)  HT:  9.45  VT:  9.35  R:  9.03  RT:  5.50 (  31Kops/s)
+
+commit a432bdce6637aa96060b9f1e25aae51c6fb95670
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Mon Nov 12 22:48:53 2012 +0100
+
+    MIPS: DSPr2: Added more fast-paths for OVER operation:
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+            over_n_0565 =  L1:  14.48  L2:  21.36  M: 17.57 ( 23.30%)  HT:  6.95  VT:  6.44  R:  6.39  RT:  2.16 (  22Kops/s)
+            over_n_8888 =  L1:  92.60  L2:  86.13  M: 24.41 ( 64.74%)  HT:  8.94  VT:  8.06  R:  8.00  RT:  2.53 (  25Kops/s)
+    
+    Optimized:
+            over_n_0565 =  L1:  27.65  L2: 189.22  M: 58.19 ( 77.12%)  HT: 52.80  VT: 49.88  R: 47.53  RT: 23.67 (  72Kops/s)
+            over_n_8888 =  L1: 235.99  L2: 230.86  M: 29.09 ( 77.11%)  HT: 27.95  VT: 27.24  R: 26.58  RT: 18.10 (  67Kops/s)
+
+commit e33e9d3f55590c369c532b0305f928045e0a46cb
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Mon Nov 12 22:48:52 2012 +0100
+
+    MIPS: DSPr2: Added more fast-paths for SRC operation:
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+            src_n_8_8888 =  L1:  13.79  L2:  22.47  M: 17.55 ( 58.28%)  HT:  6.95  VT:  6.46  R:  6.34  RT:  2.07 (  20Kops/s)
+               src_n_8_8 =  L1:  20.22  L2:  20.21  M: 18.20 ( 24.17%)  HT:  6.65  VT:  6.22  R:  6.11  RT:  2.03 (  20Kops/s)
+    
+    Optimized:
+            src_n_8_8888 =  L1:  58.31  L2:  53.34  M: 25.69 ( 85.29%)  HT: 22.55  VT: 21.44  R: 19.91  RT: 10.34 (  48Kops/s)
+               src_n_8_8 =  L1: 102.60  L2:  89.43  M: 65.01 ( 86.32%)  HT: 37.87  VT: 37.02  R: 32.43  RT: 12.41 (  51Kops/s)
+
+commit d881e1f5801ca0aefecccb43db05db539b3080d5
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Nov 11 14:05:54 2012 -0500
+
+    Allow src and dst to be identical in pixman_f_transform_invert()
+    
+    It is useful to be able to invert a matrix in place, but currently
+    pixman_f_transform_invert() will produce wrong results if you pass the
+    same matrix as both source and destination.
+    
+    Fix that by inverting into a temporary matrix and then copying that to
+    the destination.
+
+commit 614e7aaf14652c726b067bbc7562ef237dcd50de
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Nov 8 03:11:51 2012 -0500
+
+    pixman.h: Add typedefs for pixman_f_transform and pixman_f_vector
+
+commit b2e0e240fec4a8eaa7fe8da3a6807bcb8ac97edf
+Author: Joshua Root <jmr@macports.org>
+Date:   Fri Nov 9 14:39:14 2012 +1100
+
+    Fix undeclared variable use and sysctlbyname error handling on ppc
+    
+    Fixes bug 56889.
+
+commit 400436dc52450359de35cac9efa6aea631cf34e9
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Oct 31 13:14:07 2012 -0400
+
+    pixman_image_composite: Reduce opaque masks to NULL
+    
+    When the mask is known to be opaque, we might as well reduce it to
+    NULL to take advantage of the various fast paths that operate on NULL
+    masks.
+
+commit f2ada9e63fdd1034766e86d71008e0d819074f27
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Nov 7 13:45:09 2012 -0500
+
+    Post-release version bump to 0.29.1
+
+commit 8a2ff3e0ef0449921d962f8b9c093c2353ffd945
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Nov 7 13:40:34 2012 -0500
+
+    Pre-release version bump to 0.28.0
+
+commit 4b91f6ca72db3e8cbd7e97e9ef44be2f8994040d
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Oct 25 10:42:26 2012 -0400
+
+    Post-release version bump to 0.27.5
+
+commit 0de3f3344908757b61f9f51b59d4a39f7447451b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Oct 25 10:35:27 2012 -0400
+
+    Pre-release version bump to 0.27.4
+
+commit f0750258459580bbc9f136710f8e5c551bd01a0f
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Sun Oct 14 11:58:52 2012 +0200
+
+    MIPS: DSPr2: Added more fast-paths for ADD operation: - add_8888_8888_8888 - add_8_8 - add_8888_8888
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+            add_8888_8888_8888 =  L1:  17.55  L2:  13.35  M:  8.13 ( 93.95%)  HT:  6.60  VT:  6.64  R:  6.45  RT:  3.47 (  26Kops/s)
+            add_8_8            =  L1:  86.07  L2:  84.89  M: 62.36 ( 90.11%)  HT: 36.36  VT: 34.74  R: 29.56  RT: 11.56 (  52Kops/s)
+            add_8888_8888      =  L1:  95.59  L2:  73.05  M: 17.62 (101.84%)  HT: 15.46  VT: 15.01  R: 13.94  RT:  6.71 (  42Kops/s)
+    
+    Optimized:
+            add_8888_8888_8888 =  L1:  41.52  L2:  33.21  M: 11.97 (138.45%)  HT: 10.47  VT: 10.19  R:  9.42  RT:  4.86 (  32Kops/s)
+            add_8_8            =  L1: 135.06  L2: 104.82  M: 57.13 ( 82.58%)  HT: 34.79  VT: 36.60  R: 28.28  RT: 10.54 (  51Kops/s)
+            add_8888_8888      =  L1: 176.36  L2:  67.82  M: 17.48 (101.06%)  HT: 15.16  VT: 14.62  R: 13.88  RT:  8.05 (  45Kops/s)
+
+commit ca83717c63813b6f53f89dd94b5771bd32382a18
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Sun Oct 14 11:58:51 2012 +0200
+
+    MIPS: DSPr2: Added more fast-paths for ADD operation: - add_0565_8_0565 - add_8888_8_8888 - add_8888_n_8888
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+            add_0565_8_0565 =  L1:   8.89  L2:   8.37  M:  7.35 ( 29.22%)  HT:  5.90  VT:  5.85  R:  5.67  RT:  3.31 (  26Kops/s)
+            add_8888_8_8888 =  L1:  17.22  L2:  14.17  M:  9.89 ( 65.56%)  HT:  7.57  VT:  7.50  R:  7.36  RT:  4.10 (  30Kops/s)
+            add_8888_n_8888 =  L1:  17.79  L2:  14.87  M: 10.35 ( 54.89%)  HT:  5.19  VT:  4.93  R:  4.92  RT:  1.90 (  19Kops/s)
+    
+    Optimized:
+            add_0565_8_0565 =  L1:  21.72  L2:  20.01  M: 14.96 ( 59.54%)  HT: 12.03  VT: 11.81  R: 11.26  RT:  6.33 (  37Kops/s)
+            add_8888_8_8888 =  L1:  47.42  L2:  38.64  M: 15.90 (105.48%)  HT: 13.34  VT: 13.03  R: 11.84  RT:  6.63 (  38Kops/s)
+            add_8888_n_8888 =  L1:  54.83  L2:  42.66  M: 17.36 ( 92.11%)  HT: 15.20  VT: 14.82  R: 13.66  RT:  7.83 (  41Kops/s)
+
+commit 52d20e692ebc605077448ab6f52fd257f83481b2
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Sun Oct 14 11:58:50 2012 +0200
+
+    MIPS: DSPr2: Added fast-paths for ADD operation: - add_n_8_8 - add_n_8_8888 - add_8_8_8
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+            add_n_8_8    =  L1:  41.37  L2:  37.83  M: 30.38 ( 60.45%)  HT: 23.70  VT: 22.85  R: 21.51  RT: 10.32 (  45Kops/s)
+            add_n_8_8888 =  L1:  16.01  L2:  14.46  M: 11.64 ( 46.32%)  HT:  5.50  VT:  5.18  R:  5.06  RT:  1.89 (  18Kops/s)
+            add_8_8_8    =  L1:  13.26  L2:  12.47  M: 11.16 ( 29.61%)  HT:  8.09  VT:  8.04  R:  7.68  RT:  3.90 (  29Kops/s)
+    
+    Optimized:
+            add_n_8_8    =  L1:  96.03  L2:  79.37  M: 51.89 (103.31%)  HT: 32.59  VT: 31.29  R: 28.52  RT: 11.08 (  46Kops/s)
+            add_n_8_8888 =  L1:  53.61  L2:  46.92  M: 23.78 ( 94.70%)  HT: 19.06  VT: 18.64  R: 17.30  RT:  9.15 (  43Kops/s)
+            add_8_8_8    =  L1:  89.65  L2:  66.82  M: 37.10 ( 98.48%)  HT: 22.10  VT: 21.74  R: 20.12  RT:  8.12 (  41Kops/s)
+
+commit 9df645dfb04b5a790faabe1e9a84fc37287d91b0
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Fri Oct 19 01:59:16 2012 +0300
+
+    Workaround for FTBFS with gcc 4.6 (http://gcc.gnu.org/PR54965)
+    
+    GCC 4.6 has problems with force_inline, so just use normal inline instead.
+    Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=55630
+
+commit 31e5a0a393defb8e0534ab1bde29ab23fc04795d
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Oct 12 18:34:33 2012 -0400
+
+    pixman_composite_trapezoids(): don't clip to extents for some operators
+    
+    pixman_composite_trapezoids() is supposed to composite across the
+    entire destination, but it actually only composites across the extent
+    of the trapezoids. For operators such as ADD or OVER this doesn't
+    matter since a zero source has no effect on the destination. But for
+    operators such as SRC or IN, it does matter.
+    
+    So for such operators where a zero source has an effect, don't clip to
+    the trap extents.
+
+commit 65db2362e2793a527c2e831cceb81d8d6ad51b8f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Oct 12 18:29:56 2012 -0400
+
+    pixman_composite_trapezoids(): Factor out extents computation
+    
+    The computation of the extents rectangle is moved to its own
+    function.
+
+commit 2d9cb563b415e90cef898de03de7ed9c6f091db1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Oct 12 18:07:29 2012 -0400
+
+    Add new pixman_image_create_bits_no_clear() API
+    
+    When pixman_image_create_bits() function is given NULL for bits, it
+    will allocate a new buffer and initialize it to zero. However, in some
+    cases, only a small region of the image is actually used; in that case
+    it is wasteful to touch all of the memory.
+    
+    The new pixman_image_create_bits_no_clear() works exactly like
+    _create_bits() except that it doesn't initialize any newly allocated
+    memory.
+
+commit af803be17b4ea5f53db9af57b6c6ef06db99ebbd
+Author: Benny Siegert <bsiegert@gmail.com>
+Date:   Sun Oct 14 16:28:48 2012 +0200
+
+    configure.ac: PIXMAN_LINK_WITH_ENV fix
+    
+    (fixes bug #52101)
+    
+    On MirBSD, the compiler produces a (harmless) warning when the compiler
+    is called without the standard CFLAGS:
+    
+    foo.c:0: note: someone does not honour COPTS correctly, passed 0 times
+    
+    However, PIXMAN_LINK_WITH_ENV considers _any_ output on stderr as an
+    error, even if the exit status of the compiler is 0. Furthermore, it
+    resets CFLAGS and LDFLAGS at the start. On MirBSD, this will lead to a
+    warning in each test, making all such tests fail. In particular, the
+    pthread_setspecific test fails, thus pixman is compiled without thread
+    support. This leads to compile errors later on, or at least it did when
+    I tried this on pkgsrc. Re-adding the saved CFLAGS, LDFLAGS and LIBS
+    before the test makes it work.
+    
+    The second hunk inverts the order of the pthread flag checks. On BSD
+    systems (this is true at least on OpenBSD and MirBSD), both -lpthread
+    and -pthread work but the latter is "preferred", whatever this means.
+
+commit 6e56098c0338ce74228187e4c96fed1a66cb0956
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Sat Sep 29 02:29:22 2012 +0300
+
+    Add missing force_inline to in() function used for C fast paths
+
+commit 90bcafa495c1074b0ea1d35f99aa4837917494bd
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Sun Jul 8 23:10:00 2012 +0300
+
+    MIPS: skip runtime detection for DSPr2 if -mdspr2 option is in CFLAGS
+    
+    This provides a way to enable MIPS DSP ASE optimizations if running
+    under qemu-user (where /proc/cpuinfo contains information about the
+    host processor instead of the emulated one). Can be used for running
+    pixman test suite in qemu-user when having no access to real MIPS
+    hardware.
+
+commit d5f2f39319fc358cccda60abe0bc927bd27131c1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Oct 11 04:04:04 2012 -0400
+
+    region: Remove overlap argument from pixman_op()
+    
+    This is used to compute whether the regions in question overlap, but
+    nothing makes use of this information, so it can be removed.
+
+commit cb4f325ec0e844008075fe89ceb9f634ae41e7c9
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Oct 11 04:07:00 2012 -0400
+
+    region: Formatting fix
+    
+    The while part of a do/while loop was formatted as if it were a while
+    loop with an empty body. Probably some indent tool misinterpreted the
+    code at some point.
+
+commit 15b153d633fcfce886c30fee98599fddbf019ee8
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Oct 7 17:58:32 2012 -0400
+
+    Only regard images as pixbufs if they have identity transformations
+    
+    In order for a src/mask pair to be considered a pixbuf, they have to
+    have identical transformations, but we don't check for that. Since the
+    only fast paths we have for pixbufs require identity transformations,
+    it sufficies to check that both source and mask are
+    untransformed.
+    
+    This is also the reason that this bug can't be triggered by any test
+    code - if the source and mask had different transformations, we would
+    consider them a pixbuf, but then wouldn't take the fast path because
+    at least one of the transformations would be different from the
+    identity.
+
+commit 3d81d89c292058522cce91338028d9b4c4a23c24
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Oct 4 12:41:08 2012 -0400
+
+    Remove BUILT_SOURCES
+    
+    pixman-combine32.[ch] were the only built sources, so BUILT_SOURCES
+    can now be removed.
+
+commit ec7aa11a6e4d0d02df9b339dfce9460dce954602
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Sep 23 03:52:34 2012 -0400
+
+    Speed up pixman_expand_to_float()
+    
+    GCC doesn't move the divisions out of the loop, so do it manually by
+    looking up the four (1.0f / mask) values in a table. Table lookups are
+    used under the theory that one L2 hit plus three L1 hits is preferable
+    to four floating point divisions.
+
+commit 8ccda2be30adf9dfcc3087b38a5062258324dcce
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Sep 21 18:36:16 2012 -0400
+
+    Don't auto-generate pixman-combine32.[ch] anymore
+    
+    Since pixman-combine64.[ch] are not used anymore, there is no point
+    generating these files from pixman-combine.[ch].template.
+    
+    Also get rid of dependency on perl in configure.ac.
+
+commit 4afd20cc71ba75190ebcead774b946157d0995a6
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Aug 18 14:39:29 2012 -0400
+
+    Remove 64 bit pipeline
+    
+    The 64 bit pipeline is not used anymore, so it can now be removed.
+    
+    Don't generate pixman-combine64.[ch] anymore. Don't generate the
+    pixman-srgb.c anymore. Delete all the 64 bit fetchers in
+    pixman-access.c, all the 64 bit iterator functions in
+    pixman-bits-image.c and all the functions that expand from 8 to 16
+    bits.
+
+commit 5ff0bbd9721bb216a8332cbde18adc458af3cdec
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Aug 18 14:11:07 2012 -0400
+
+    Switch the wide pipeline over to using floating point
+    
+    In pixman-bits-image.c, remove bits_image_fetch_untransformed_64() and
+    add bits_image_fetch_untransformed_float(); change
+    dest_get_scanline_wide() to produce a floating point buffer,
+    
+    In the gradients, change *_get_scanline_wide() to call
+    pixman_expand_to_float() instead of pixman_expand().
+    
+    In pixman-general.c change the wide Bpp to 16 instead of 8, and
+    initialize the buffers to 0 to prevent NaNs from causing trouble.
+    
+    In pixman-noop.c make the wide solid iterator generate floating point
+    pixels.
+    
+    In pixman-solid-fill.c, cache a floating point pixel, and make the
+    wide iterator generate floating point pixels.
+    
+    Bug fix in bits_image_fetch_untransformed_repeat_normal
+
+commit e75bacc5f9196c3980ce331c7d53de5b7e92d699
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Aug 18 13:22:15 2012 -0400
+
+    pixman-access.c: Add floating point accessor functions
+    
+    Three new function pointer fields are added to bits_image_t:
+    
+          fetch_scanline_float
+          fetch_pixel_float
+          store_scanline_float
+    
+    similar to the existing 32 and 64 bit accessors. The fetcher_info_t
+    struct in pixman_access similarly gets a new get_scanline_float field.
+    
+    For most formats, the new get_scanline_float field is set to a new
+    function fetch_scanline_generic_float() that first calls the 32 bit
+    fetcher uses the 32 bit scanline fetcher and then expands these pixels
+    to floating point.
+    
+    For the 10 bpc formats, new floating point accessors are added that
+    use pixman_unorm_to_float() and pixman_float_to_unorm() to convert
+    back and forth.
+    
+    The PIXMAN_a8r8g8b8_sRGB format is handled with a 256-entry table that
+    maps 8 bit sRGB channels to linear single precision floating point
+    numbers. The sRGB->linear direction can then be done with a simple
+    table lookup.
+    
+    The other direction is currently done with 4096-entry table which
+    works fine for 16 bit integers, but not so great for floating
+    point. So instead this patch uses a binary search in the sRGB->linear
+    table. The existing 32 bit accessors for the sRGB format are also
+    converted to use this method.
+
+commit 23252393a2dcae4dc5a7d03727dd66cdd81286ba
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Aug 18 13:02:31 2012 -0400
+
+    pixman-utils.c, pixman-private.h: Add floating point conversion routines
+    
+    A new struct argb_t containing a floating point pixel is added to
+    pixman-private.h and conversion routines are added to pixman-utils.c
+    to convert normalized integers to and from that struct.
+    
+    New functions:
+    
+      - pixman_expand_to_float()
+        Expands a buffer of integer pixels to a buffer of argb_t pixels
+    
+      - pixman_contract_from_float()
+        Converts a buffer of argb_t pixels to a buffer integer pixels
+    
+      - pixman_float_to_unorm()
+        Converts a floating point number to an unsigned normalized integer
+    
+      - pixman_unorm_to_float()
+        Converts an unsigned normalized integer to a floating point number
+
+commit 4760599ff3008ab0f1e36a7d4d362362817fd930
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Sep 9 17:56:53 2012 -0400
+
+    Add combiner test
+    
+    This test runs the new floating point combiners on random input with
+    divide-by-zero exceptions turned on.
+    
+    With the floating point combiners the only thing we guarantee is that
+    divide-by-zero exceptions are not generated, so change
+    enable_fp_exceptions() to only enable those, and rename accordingly.
+
+commit a5b459114e35c7a946362f1e5857e8a87a403ec3
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri May 14 00:42:04 2010 -0400
+
+    Add pixman-combine-float.c
+    
+    This file contains floating point implementations of combiners for all
+    pixman operators. These combiners operate on buffers containing single
+    precision floating point pixels stored in (a, r, g, b) order.
+    
+    The combiners are added to the pixman_implementation_t struct, but
+    nothing uses them yet.
+    
+    This commit incorporates a number of bug fixes contributed by Andrea
+    Canciani.
+    
+    Some notes:
+    
+    - The combiners are making sure to never divide by zero regardless of
+      input, so an application could enable divide-by-zero exceptions and
+      pixman wouldn't generate any.
+    
+    - The operators are implemented according to the Render spec. Ie.,
+    
+        - If the input pixels are between 0 and 1, then so is the output.
+    
+        - The source and destination coefficients for the conjoint and
+          disjoint operators are clamped to [0, 1].
+    
+    - The PDF operators are not described in the render spec, and the
+      implementation here doesn't do any clamping except in the final
+      conversion from floating point to destination format.
+    
+    All of the above will need to be rethought if we add support for pixel
+    formats that can support negative and greater-than-one pixels. It is
+    in fact already the case in principle that convolution filters can
+    produce pixels with negative values, but since these go through the
+    broken "wide" path that narrows everything to 32 bits, these negative
+    values don't currently survive to the combiners.
+
+commit 7a9c2d586b2349b5e17966a96d7fe8c390abb75a
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jun 2 00:15:54 2012 -0400
+
+    blitters-test: Prepare for floating point
+    
+    Comment out some formats in blitters-test that are going to rely on
+    floating point in some upcoming patches.
+
+commit 600a06c81d3999bc6551c7e889726ed7b8bec84d
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Jul 11 03:27:49 2012 -0400
+
+    glyph-test: Prepare for floating point
+    
+    In preparation for an upcoming change of the wide pipe to use floating
+    point, comment out some formats in glyph-test that are going to be
+    using floating point and update the CRC32 value to match.
+
+commit 2e17b6dd4ee7c32684fb7ffc70d3ad3ebf7cb2ef
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Sep 29 10:34:17 2012 -0400
+
+    Make pixman.h more const-correct
+    
+    Add const to pointer arguments when the function doesn't change the
+    pointed-to data.
+    
+    Also in add_glyphs() in pixman-glyph.c make 'white' in add_glyphs()
+    static and const.
+
+commit 183afcf1d95625a1f237ef349a1c8931d94d000d
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Sep 30 11:59:23 2012 -0700
+
+    iwmmxt: Don't define dummy _mm_empty for >=gcc-4.8
+    
+    Definition was not present in <4.8.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=55451
+
+commit d4b72eb6ccc1f004efedbc6552ee22499350be4d
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Sep 29 18:15:54 2012 -0400
+
+    rotate-test: Call image_endian_swap() in make_image()
+    
+    Otherwise the test fails on big-endian.
+    
+    Tested-by: Matt Turner <mattst88@gmail.com>
+
+commit aff796d6cee4cb81f0352c2f7d0c994229bd5ca1
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Mon Jun 25 22:36:52 2012 -0400
+
+    Add scaled nearest repeat fast paths
+    
+    Before this patch it was often faster to scale and repeat
+    in two passes because each pass used a fast path vs.
+    the slow path that the single pass approach takes. This
+    makes it so that the single pass approach has competitive
+    performance.
+
+commit 05560828c495ed9226b43b30e1824447e3d8eff3
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Sep 21 16:34:24 2012 -0700
+
+    sse2: mark pack_565_2x128_128 as static force_inline
+
+commit de60e2e0e3eb6084f8f14b63f25b3cbfb012943f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Sep 15 03:13:09 2012 -0400
+
+    Fix for infinite-loop test
+    
+    The infinite loop detected by "affine-test 212944861" is caused by an
+    overflow in this expression:
+    
+        max_x = pixman_fixed_to_int (vx + (width - 1) * unit_x) + 1;
+    
+    where (width - 1) * unit_x doesn't fit in a signed int. This causes
+    max_x to be too small so that this:
+    
+        src_width = 0
+    
+        while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x)
+            src_width += src_image->bits.width;
+    
+    results in src_width being 0. Later on when src_width is used for
+    repeat calculations, we get the infinite loop.
+    
+    By casting unit_x to int64_t, the expression no longer overflows and
+    affine-test 212944861 and infinite-loop no longer loop forever.
+
+commit aa311a4641b79eac39fe602b75d7bee3de9b1dce
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Sep 13 19:31:29 2012 -0400
+
+    test: Add inifinite-loop test
+    
+    This test demonstrates a bug where a certain transformation matrix can
+    result in an infinite loop. It was extracted as a standalone version
+    of "affine-test 212944861".
+    
+    If given the option -nf, the test program will not call fail_after()
+    and therefore potentially run forever.
+
+commit d5c721768c9811ce22bc0cd50bdf1c7bccc264e0
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Sep 13 19:29:19 2012 -0400
+
+    affine-test: Print out the transformation matrix when verbose
+    
+    Printing out the translation and scale is a bit misleading because the
+    actual transformation matrix can be modified in various other ways.
+    
+    Instead simply print the whole transformation matrix that is actually
+    used.
+
+commit 292fce7a230dd253fff71bd1bb2fbf9b7996a892
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Fri Sep 14 09:31:26 2012 +0200
+
+    MIPS: DSPr2: Added OVER combiner and two new fast paths: - over_8888_8888 - over_8888_8888_8888
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+              over_8888_8888 =  L1:  19.61  L2:  17.10  M: 11.16 ( 59.20%)  HT: 16.47  VT: 15.81  R: 14.82  RT:  8.90 (  50Kops/s)
+         over_8888_8888_8888 =  L1:  13.56  L2:  11.22  M:  7.46 ( 79.18%)  HT:  6.24  VT:  6.20  R:  6.11  RT:  3.95 (  29Kops/s)
+    
+    Optimized:
+              over_8888_8888 =  L1:  46.42  L2:  36.70  M: 16.69 ( 88.57%)  HT: 17.11  VT: 16.55  R: 15.31  RT:  9.48 (  52Kops/s)
+         over_8888_8888_8888 =  L1:  26.06  L2:  22.53  M: 11.49 (121.91%)  HT:  9.93  VT:  9.62  R:  9.19  RT:  5.75 (  36Kops/s)
+
+commit 28c9bd4866088a017a0cdf3f0fb47467b97bbc29
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Fri Sep 14 09:31:25 2012 +0200
+
+    MIPS: DSPr2: Added fast-paths for OVER operation: - over_0565_n_0565 - over_0565_8_0565
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+            over_0565_n_0565 =  L1:   7.56  L2:   7.24  M:  6.16 ( 16.38%)  HT:  4.01  VT:  3.84  R:  3.79  RT:  1.66 (  18Kops/s)
+            over_0565_8_0565 =  L1:   7.43  L2:   7.05  M:  5.98 ( 23.85%)  HT:  5.27  VT:  5.23  R:  5.09  RT:  3.14 (  28Kops/s)
+    
+    Optimized:
+            over_0565_n_0565 =  L1:  15.47  L2:  14.52  M: 12.30 ( 32.65%)  HT: 10.76  VT: 10.57  R: 10.27  RT:  6.63 (  46Kops/s)
+            over_0565_8_0565 =  L1:  15.47  L2:  14.61  M: 11.78 ( 46.92%)  HT: 10.00  VT:  9.84  R:  9.40  RT:  5.81 (  43Kops/s)
+
+commit b660eb30b4e5f690d191b26a500a6ba224986b3a
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Fri Sep 14 09:31:24 2012 +0200
+
+    MIPS: DSPr2: Added fast-paths for OVER operation: - over_8888_n_0565 - over_8888_8_0565
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+            over_8888_n_0565 =  L1:   8.95  L2:   8.33  M:  6.95 ( 27.74%)  HT:  4.27  VT:  4.07  R:  4.01  RT:  1.74 (  19Kops/s)
+            over_8888_8_0565 =  L1:   8.86  L2:   8.11  M:  6.72 ( 35.71%)  HT:  5.68  VT:  5.62  R:  5.47  RT:  3.35 (  30Kops/s)
+    
+    Optimized:
+            over_8888_n_0565 =  L1:  18.76  L2:  17.55  M: 13.11 ( 52.19%)  HT: 11.35  VT: 11.10  R: 10.88  RT:  6.94 (  47Kops/s)
+            over_8888_8_0565 =  L1:  18.14  L2:  16.79  M: 12.10 ( 64.25%)  HT: 10.24  VT:  9.98  R:  9.63  RT:  5.89 (  43Kops/s)
+
+commit 37e3368e20cee42f1e1039bb112ed9a09d21156f
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Fri Sep 14 09:31:23 2012 +0200
+
+    MIPS: DSPr2: Added fast-paths for OVER operation: - over_8888_n_8888 - over_8888_8_8888
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench results
+    
+    Referent (before):
+            over_8888_n_8888 =  L1:   9.92  L2:  11.27  M:  8.50 ( 45.23%)  HT:  4.70  VT:  4.45  R:  4.49  RT:  1.85 (  20Kops/s)
+            over_8888_8_8888 =  L1:  12.54  L2:  10.86  M:  8.18 ( 54.36%)  HT:  6.53  VT:  6.45  R:  6.41  RT:  3.83 (  33Kops/s)
+    
+    Optimized:
+            over_8888_n_8888 =  L1:  28.02  L2:  24.92  M: 14.72 ( 78.15%)  HT: 13.03  VT: 12.65  R: 12.00  RT:  7.49 (  49Kops/s)
+            over_8888_8_8888 =  L1:  26.92  L2:  23.93  M: 13.65 ( 90.58%)  HT: 11.68  VT: 11.29  R: 10.56  RT:  6.37 (  45Kops/s)
+
+commit f580c4c5b2a435ebe2751ce0dace6c42568557f8
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Sep 21 16:52:16 2012 -0400
+
+    pixman-combine.c.template: Formatting clean-ups
+    
+    Various formatting fixes, and removal of some obsolete comments about
+    strength reduction of operators.
+
+commit 58f8704664d1f8c812a85b929a50818f213a8438
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Sep 20 21:43:24 2012 -0400
+
+    Fix bugs in pixman-image.c
+    
+    In the checks for whether the transforms are rotation matrices "-1"
+    and "1" were used instead of the correct -pixman_fixed_1 and
+    pixman_fixed_1.
+    
+    Fixes test suite failure for rotate-test.
+
+commit 550dfc5e7ecd5b099c1009d77c56cb91a62caeb1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Sep 20 18:41:33 2012 -0400
+
+    Add rotate-test.c test program
+    
+    This program exercises a bug in pixman-image.c where "-1" and "1" were
+    used instead of the correct "- pixman_fixed_1" and "pixman_fixed_1".
+    
+    With the fast implementation enabled:
+    
+         % ./rotate-test
+         rotate test failed! (checksum=35A01AAB, expected 03A24D51)
+    
+    Without it:
+    
+         % env PIXMAN_DISABLE=fast ./rotate-test
+         pixman: Disabled fast implementation
+         rotate test passed (checksum=03A24D51)
+    
+    V2: The first version didn't have lcg_srand (testnum) in test_transform().
+
+commit 2ab77c97a5a3a816d6383bdc3b6c8bdceb0383b7
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Sep 19 12:04:11 2012 -0400
+
+    Fix bugs in component alpha combiners for separable PDF operators
+    
+    In general, the component alpha version of an operator is supposed to
+    do this:
+    
+           - multiply source with mask in all channels
+           - multiply mask with source alpha in all channels
+           - compute the regular operator in all channels using the
+             mask value whenever source alpha is called for
+    
+    The first two steps are usually accomplished with the function
+    combine_mask_ca(), but for operators where source alpha is not used,
+    such as SRC, ADD and OUT, the simpler function
+    combine_mask_value_ca(), which doesn't compute the new mask values,
+    can be used.
+    
+    However, the PDF blend modes generally *do* make use of source alpha,
+    so they can't use combine_mask_value_ca() as they do now. They have to
+    use combine_mask_ca().
+    
+    This patch fixes this in combine_multiply_ca() and the CA combiners
+    generated by PDF_SEPARABLE_BLEND_MODE.
+
+commit c4b69e706e63e01fbc70e0026c2079007c89de14
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Sep 19 19:46:13 2012 -0400
+
+    Fix bug in fast_composite_scaled_nearest()
+    
+    The fast_composite_scaled_nearest() function can be called when the
+    format is x8b8g8r8. In that case pixels fetched in fetch_nearest()
+    need to have their alpha channel set to 0xff.
+    
+    Fixes test suite failure in scaling-test.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 35be7acb660228d4e350b5806c81e55606352e0d
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Sep 19 19:26:49 2012 -0400
+
+    Add PIXMAN_x8b8g8r8 and PIXMAN_a8b8g8r8 formats to scaling-test
+    
+    Update the CRC values based on what the general implementation
+    reports. This reveals a bug in the fast implementation:
+    
+        % env PIXMAN_DISABLE="mmx sse2" ./test/scaling-test
+        pixman: Disabled mmx implementation
+        pixman: Disabled sse2 implementation
+        scaling test failed! (checksum=AA722B06, expected 03A23E0C)
+    
+    vs.
+    
+        % env PIXMAN_DISABLE="mmx sse2 fast" ./test/scaling-test
+        pixman: Disabled fast implementation
+        pixman: Disabled mmx implementation
+        pixman: Disabled sse2 implementation
+        scaling test passed (checksum=03A23E0C)
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 9decb9a97975ae6bf25a42c0fd2eaa21b166c36d
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Sep 15 19:10:56 2012 -0400
+
+    implementation: Rename delegate to fallback
+    
+    At this point the chain of implementations has nothing to do with the
+    delegation design pattern anymore, so rename the delegate pointer to
+    'fallback'.
+
+commit b96599ccf353e89f95aa106853fcf310203c5874
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Sep 15 13:58:45 2012 -0400
+
+    _pixman_implementation_create(): Initialize implementation with memset()
+    
+    All the function pointers are NULL by default now, so we can just zero
+    the struct. Also write the function a little more compactly.
+
+commit 9539a18832c278ca0f6f572d8765932be6c9ad65
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Sep 15 13:53:17 2012 -0400
+
+    Rename _pixman_lookup_composite_function() to _pixman_implementation_lookup_composite()
+    
+    And move it into pixman-implementation.c which is where it belongs
+    logically.
+
+commit ee6af72dadaf9eb049bfeb35dc9ff57c3902403a
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Sep 15 13:20:52 2012 -0400
+
+    Move delegation of src/dest iter init into pixman-implementation.c
+    
+    Instead of relying on each implementation to delegate when an iterator
+    can't be initialized, change the type of iterator initializers to
+    boolean and make pixman-implementation.c do the delegation whenever an
+    iterator initializer returns FALSE.
+
+commit c710d0fae2a9dc7d20913e5e39a1bb53f7c942db
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Sep 15 13:08:51 2012 -0400
+
+    Move fill delegation into pixman-implementation.c
+    
+    As in the blt commit, do the delegation in pixman-implementation.c
+    whenever the implementation fill returns FALSE instead of relying on
+    each implementation to do it by itself.
+    
+    With this change there is no longer any reason for the implementations
+    to have one fill function that delegates and one that actually blits,
+    so consolidate those in the NEON, DSPr2, SSE2, and MMX
+    implementations.
+
+commit 534507ba3b00b9aaadc9f181c282b01e4e2fe415
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Sep 15 13:03:10 2012 -0400
+
+    Move blt delegation into pixman-implementation.c
+    
+    Rather than require each individual implementation to do the
+    delegation for blt, just do it in pixman-implementation.c whenever the
+    implementation blt returns FALSE.
+    
+    With this change, there is no longer any reason for the
+    implementations to have one blt function that delegates and one that
+    actually blits, so consolidate those in the NEON, DSPr2, SSE2, and MMX
+    implementations.
+
+commit 7ef4436abbdb898dc656ebb5832ed5d6fd764bba
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Sep 15 12:48:42 2012 -0400
+
+    implementation: Write lookup_combiner() in a less convoluted way.
+    
+    Instead of initializing an array on the stack, just use a simple
+    switch to select which set of combiners to look up in.
+
+commit 3124a51abb89475b8c5045bc96e04c5852694a16
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Sep 16 00:25:38 2012 -0400
+
+    build: Remove useless DEP_CFLAGS/DEP_LIBS variables
+
+commit 46e4faf8ef34d49f15e1946d105289fb06365553
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Thu Jun 21 06:07:07 2012 +0200
+
+    build: Improve win32 build system
+    
+    Handle cross-directory dependencies using PHONY targets and clean up
+    some redundancies.
+
+commit c89efdd211cf7cd3dc69a4140045ceab6f445730
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Tue Jul 17 16:14:20 2012 +0200
+
+    mmx: Fix x86 build on MSVC
+    
+    The MSVC compiler is very strict about variable declarations after
+    statements.
+    
+    Move all the declarations of each block before any statement in
+    the same block to fix multiple instances of:
+    
+    pixman-mmx.c(xxxx) : error C2275: '__m64' : illegal use of this type
+    as an expression
+
+commit 1e3e569b04f45592ce2174e48df0fcb333ce0ad3
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Aug 26 18:23:53 2012 -0400
+
+    test/utils.c: Use pow(), not powf() in sRGB conversion routines
+    
+    These functions are operating on double precision values, so use pow()
+    instead of powf().
+
+commit 8577daba04e60c1b4c44ce01c6874a573952913a
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Aug 26 18:13:47 2012 -0400
+
+    pixel_checker: Move sRGB conversion into get_limits()
+    
+    The sRGB conversion has to be done every time the limits are being
+    computed. Without this fix, pixel_checker_get_min/max() will produce
+    the wrong results when called from somewhere other than
+    pixel_checker_check().
+
+commit 62eb6e5e054da498e38da80ba8143f0a069b0c17
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Aug 25 17:14:50 2012 -0400
+
+    Remove obsolete TODO file
+
+commit 384846b38cfb5e1895ae49c40adbf72a85b63d95
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Aug 19 13:45:21 2012 -0400
+
+    Remove pointless declaration of _pixman_image_get_scanline_generic_64()
+    
+    This declaration used to be necessary when
+    _pixman_image_get_scanline_generic_64() referred to a structure that
+    itself referred back to _pixman_image_get_scanline_generic_64().
+
+commit 09cb1ae10b1976970233c934d27c36e0a4203e1c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jun 9 09:15:53 2012 -0400
+
+    demos: Add srgb_trap_test.c
+    
+    This demo program composites a bunch of trapezoids side by side with
+    and without gamma aware compositing.
+
+commit 04e878c231ad3624c57e51a5fcdc55a177d4dc0f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jun 9 09:42:56 2012 -0400
+
+    Make show_image() cope with more formats
+    
+    This makes show_image() deal with more formats than just a8r8g8b8, in
+    particular, a8r8g8b8_sRGB can now be handled.
+    
+    Images that are passed to show_image with a format of a8r8g8b8_sRGB
+    are displayed without modification under the assumption that the
+    monitor is approximately sRGB.
+    
+    Images with a format of a8r8g8b8 are also displayed without
+    modification since many other users of show_image() have been
+    generating essentially sRGB data with this format. Other formats are
+    also assumed to be gamma compressed; these are converted to a8r8g8b8
+    before being displayed.
+    
+    With these changes, srgb-test.c doesn't need to do its own conversion
+    anymore.
+
+commit 8db9ec9814a3dcd8211ec60cd4fd3c9ae9d77924
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jul 31 15:01:16 2012 -0400
+
+    Define TIMER_BEGIN and TIMER_END even when timers are not enabled
+    
+    This allows code that uses these macros to build when timers are
+    disabled.
+
+commit da5268cc19e03b24737dec3e2c51296156b869a8
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Aug 1 15:56:13 2012 -0400
+
+    Post-release version bump to 0.27.3
+
+commit e8ddef78b67a0699a990f3c785396d4b1955f972
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Aug 1 15:22:57 2012 -0400
+
+    Pre-release version bump to 0.27.2
+
+commit c214ca51a00fdd0e773ace32076c3ed8a5d0d482
+Author: Sebastian Bauer <mail@sebastianbauer.info>
+Date:   Tue Jul 31 07:30:32 2012 +0200
+
+    Use angle brackets form of including config.h
+
+commit 98617b3796d12c18d8306cca590160caa3c95454
+Author: Sebastian Bauer <mail@sebastianbauer.info>
+Date:   Tue Jul 31 07:30:31 2012 +0200
+
+    Added HAVE_CONFIG_H check before including config.h
+
+commit 5b0563f39eb29e4ae431717696174da5e282c346
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jul 30 16:21:39 2012 -0400
+
+    glyph-test: Avoid setting solid images as alpha maps.
+    
+    glyph-test would sometimes set a solid image as an alpha map, which is
+    not allowed. When this happened and the debug spew was enabled,
+    messages like this one would be generated:
+    
+        *** BUG ***
+        In pixman_image_set_alpha_map: The expression
+                !alpha_map || alpha_map->type == BITS was false
+        Set a breakpoint on '_pixman_log_error' to debug
+    
+    Fix this by not passing the ALLOW_SOLID flag to create_image() when
+    the resulting is to be used as an alpha map.
+
+commit 38fe7cd7be388aae6dff7d9b9979eb4ffa5fa175
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jul 30 16:10:05 2012 -0400
+
+    stress-test: Avoid overflows in clip rectangles
+    
+    The rectangles in the clip region set in set_general_properties()
+    would sometimes overflow, which would lead to messages like these:
+    
+          *** BUG ***
+          In pixman_region32_union_rect: Invalid rectangle passed
+          Set a breakpoint on '_pixman_log_error' to debug
+    
+    when the micro version number of pixman is even.
+    
+    Fix this by detecting the overflow and clamping such that the x2/y2
+    coordinates are less than INT32_MAX.
+
+commit 24d83cbf3df06505fa4cf827271aa2985414cfdd
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jul 30 15:54:27 2012 -0400
+
+    Add make-srgb.pl to EXTRA_DIST
+    
+    Otherwise make distcheck doesn't pass.
+
+commit 72ba0b955504ecdc69f4cbf96a677b82be18b0cb
+Author: Antti S. Lankila <alankila@bel.fi>
+Date:   Sun Jul 29 22:14:34 2012 +0300
+
+    Add tests to validate new sRGB behavior
+    
+    Composite checks random combinations of operations that now also have
+    sRGB sources, masks and destinations, and stress-test validates the
+    read/write primitives.
+
+commit a161a6ba2394aed68148304de83b8f2c185f4c32
+Author: Antti S. Lankila <alankila@bel.fi>
+Date:   Sun Jul 29 21:56:18 2012 +0300
+
+    Add sRGB blending demo program
+    
+    Simple sRGB color blender test can be used to determine if the sRGB processing
+    works as expected. It blends alpha ramps of purple and green together such that
+    at midpoint of image, 50 % blend of both is realized. At that point, sRGB-aware
+    processing yields a result close to #bbb rather than #888, which is the linear
+    light blending result.
+    
+    The demo also contains the sample computation for sRGB premultiplied alpha.
+
+commit 7460457f80b1482338318f0ddcdf5311659fae7b
+Author: Antti S. Lankila <alankila@bel.fi>
+Date:   Sun Jul 29 21:46:58 2012 +0300
+
+    Add support for sRGB surfaces
+    
+    sRGB format is defined as a new format type, PIXMAN_TYPE_ARGB_SRGB. One form of
+    this type is provided, PIXMAN_a8r8g8b8_sRGB. Use of an sRGB format triggers
+    wide processing, and the pixel fetch/store functions handle the relevant
+    conversion between color spaces. Pixman itself is thought to compose in the
+    linearized sRGB color space.
+    
+    sRGB conversion is tabularized. For sRGB to linear, we are using only 256
+    values because the current source format uses 8 bits per component precision.
+    For linear to sRGB, it turns out that only 4096 brightness levels are required
+    to generate all of the 256 sRGB color values, and therefore only 12 bits per
+    component are considered during store. As a special case, a no-op
+    sRGB->linear->sRGB conversion is constructed to be lossless by adjusting the
+    sRGB->linear conversion table where necessary.
+
+commit 1dcca0f7ae64e9a96f2feba85dd728c636744009
+Author: Antti S. Lankila <alankila@bel.fi>
+Date:   Sat Jul 28 14:02:42 2012 +0300
+
+    Remove unnecessary dst initialization
+    
+    The initialization work is already performed correctly in image_init().
+
+commit 56321eff65832791252c7c324930d14c44d4d5f7
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jul 9 06:58:59 2012 -0400
+
+    Make pixman-mmx.c compile on x86-32 without optimization
+    
+    When not optimizing, write _mm_shuffle_pi16() as a statement
+    expression with inline assembly. That way we avoid
+    __builtin_ia32_pshufw(), which is only available when compiling with
+    -msse, while still allowing the non-optimizing gcc to understand that
+    the second argument is a compile time constant.
+    
+    Tested-by: Knut Petersen <knut_petersen@t-online.de>
+
+commit 0c81957e9b4f83944075167ae27a955bb253e267
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Jun 28 15:53:14 2012 -0400
+
+    Cleanups and simplifications in x86 CPU feature detection
+    
+    A new function pixman_cpuid() is added that runs the cpuid instruction
+    and returns the results. On GCC this function uses inline assembly; on
+    MSVC, the function calls the __cpuid intrinsic.
+    
+    There is also a new function called have_cpuid() which detects whether
+    cpuid is available. On x86-64 and MSVC, it simply returns TRUE; on
+    x86-32 bit, it checks whether the 22nd bit of eflags can be
+    modified. On MSVC this does have the consequence that pixman will no
+    longer work CPUS without cpuid (ie., older than 486 and some 486
+    models).
+    
+    These two functions together makes it possible to write a generic
+    detect_cpu_features() in plain C. This function is then used in a new
+    have_feature() function that checks whether a specific set of feature
+    bits is available.
+    
+    Aside from the cleanups and simplifications, the main benefit from
+    this patch is that pixman now can do feature detection on x86-64, so
+    that newer instruction sets such as SSSE3 and SSE4.1 can be used. (And
+    apparently the assumption that x86-64 CPUs always have MMX and SSE2 is
+    no longer correct: Knight's Corner is x86-64, but doesn't have them).
+    
+    V2: Rename the constants in the getisax() code, as pointed out by Alan
+    Coopersmith. Also reinstate the result variable and initialize
+    features to 0.
+    
+    V3: Fixes for the fact that the upper 32 bits of a 64 bit register are
+    zeroed whenever the corresponding 32 bit register is written to.
+    
+    V4: Fixes for the fact that in 32 bit mode, when gcc is not optimizing
+    there were not enough registers available. The new code uses the "a",
+    "b", "c", and "d" constraints instead, and has two separate versions
+    for 32 and 64 bit modes.
+
+commit 4d641c3803d508ba1eb40e61257949422ae2b90d
+Author: Sebastian Bauer <mail@sebastianbauer.info>
+Date:   Sun Jul 8 18:48:45 2012 -0400
+
+    Changed the style of two function headers
+    
+    Declare functions *_inverse() and *_contains_rectangle() in the same
+    way as the other functions are declared. This doesn't imply any semantic
+    changes. It's just a unification of coding styles.
+
+commit 86ad09b548b45a5a5074d9d83970d5e7e7f89d31
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Mon Jul 2 20:54:20 2012 +0200
+
+    MIPS: DSPr2: Added more bilinear fast paths (without mask)
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench -b
+    
+    Referent (before):
+      src_8888_8888 =  L1:   8.18  L2:   7.79  M:  6.32 ( 33.51%)  HT:  5.78  VT:  5.70  R:  5.61  RT:  3.79 (  29Kops/s)
+      src_8888_0565 =  L1:   6.90  L2:   7.14  M:  6.47 ( 25.75%)  HT:  5.54  VT:  5.51  R:  5.46  RT:  3.53 (  28Kops/s)
+      src_0565_x888 =  L1:   3.76  L2:   3.71  M:  3.37 ( 13.41%)  HT:  3.26  VT:  3.22  R:  3.20  RT:  2.58 (  23Kops/s)
+      src_0565_0565 =  L1:   3.59  L2:   3.56  M:  3.47 (  9.19%)  HT:  3.19  VT:  3.18  R:  3.16  RT:  2.46 (  22Kops/s)
+     over_8888_8888 =  L1:   5.99  L2:   5.66  M:  4.95 ( 26.28%)  HT:  4.40  VT:  4.38  R:  4.31  RT:  3.02 (  26Kops/s)
+      add_8888_8888 =  L1:   6.84  L2:   6.39  M:  5.48 ( 29.09%)  HT:  4.80  VT:  4.79  R:  4.70  RT:  3.20 (  27Kops/s)
+    
+    Optimized:
+      src_8888_8888 =  L1:  18.27  L2:  16.69  M: 12.87 ( 68.25%)  HT: 11.80  VT: 11.61  R: 10.60  RT:  7.05 (  41Kops/s)
+      src_8888_0565 =  L1:  15.18  L2:  14.10  M: 11.75 ( 46.71%)  HT: 10.64  VT: 10.50  R: 10.03  RT:  7.15 (  41Kops/s)
+      src_0565_x888 =  L1:  10.45  L2:   9.96  M:  9.23 ( 36.72%)  HT:  8.39  VT:  8.29  R:  8.02  RT:  5.75 (  37Kops/s)
+      src_0565_0565 =  L1:   9.37  L2:   8.98  M:  8.50 ( 22.53%)  HT:  7.71  VT:  7.66  R:  7.52  RT:  5.59 (  37Kops/s)
+     over_8888_8888 =  L1:  12.21  L2:  11.01  M:  8.56 ( 45.36%)  HT:  7.71  VT:  7.64  R:  7.43  RT:  5.51 (  36Kops/s)
+      add_8888_8888 =  L1:  17.72  L2:  15.16  M: 10.78 ( 57.13%)  HT:  9.46  VT:  9.30  R:  9.00  RT:  6.03 (  38Kops/s)
+
+commit 707a8be11280c4d395e662e869d4a98d75bb5571
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Mon Jul 2 20:54:19 2012 +0200
+
+    MIPS: DSPr2: Added several bilinear fast paths with a8 mask
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz:
+    
+    lowlevel-blt-bench -b
+    
+    Referent (before):
+    
+      src_8888_8_8888 =  L1:   6.37  L2:   6.08  M:  5.46 ( 32.57%)  HT:  4.64  VT:  4.61  R:  4.52  RT:  2.85 (  23Kops/s)
+      src_8888_8_0565 =  L1:   5.89  L2:   5.66  M:  5.11 ( 23.71%)  HT:  4.36  VT:  4.34  R:  4.26  RT:  2.71 (  22Kops/s)
+      src_0565_8_x888 =  L1:   3.32  L2:   3.27  M:  3.17 ( 14.71%)  HT:  2.86  VT:  2.84  R:  2.81  RT:  2.07 (  19Kops/s)
+      src_0565_8_0565 =  L1:   3.19  L2:   3.15  M:  3.05 ( 10.11%)  HT:  2.75  VT:  2.74  R:  2.71  RT:  2.00 (  18Kops/s)
+     over_8888_8_8888 =  L1:   4.99  L2:   4.71  M:  4.11 ( 27.22%)  HT:  3.59  VT:  3.58  R:  3.50  RT:  2.36 (  21Kops/s)
+      add_8888_8_8888 =  L1:   5.60  L2:   5.26  M:  4.52 ( 29.95%)  HT:  3.92  VT:  3.89  R:  3.80  RT:  2.49 (  21Kops/s)
+    
+    Optimized:
+    
+      src_8888_8_8888 =  L1:  13.19  L2:  12.13  M:  9.75 ( 58.22%)  HT:  8.60  VT:  8.44  R:  7.90  RT:  5.06 (  33Kops/s)
+      src_8888_8_0565 =  L1:  11.64  L2:  10.81  M:  9.18 ( 42.63%)  HT:  8.04  VT:  7.90  R:  7.57  RT:  5.02 (  32Kops/s)
+      src_0565_8_x888 =  L1:   8.34  L2:   7.95  M:  7.29 ( 33.85%)  HT:  6.55  VT:  6.48  R:  6.25  RT:  4.35 (  30Kops/s)
+      src_0565_8_0565 =  L1:   7.71  L2:   7.35  M:  6.90 ( 22.90%)  HT:  6.14  VT:  6.10  R:  5.94  RT:  4.07 (  29Kops/s)
+     over_8888_8_8888 =  L1:   9.73  L2:   8.99  M:  7.15 ( 47.41%)  HT:  6.40  VT:  6.30  R:  6.11  RT:  4.28 (  30Kops/s)
+      add_8888_8_8888 =  L1:  13.01  L2:  11.72  M:  8.70 ( 57.68%)  HT:  7.59  VT:  7.46  R:  7.20  RT:  4.74 (  32Kops/s)
+
+commit 6aac8e85701be418e1ce13debc1bc8a30687f66b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Jun 27 22:11:29 2012 -0400
+
+    Simplify CPU detection on PPC.
+    
+    Get rid of the initialized and have_vmx static variables in
+    pixman-ppc.c There is no point to them since CPU detection only
+    happens once per process.
+    
+    On Linux, just read /proc/self/auxv instead of generating the filename
+    with getpid() and don't bother with the stack buffer. Instead just
+    read the aux entries one by one.
+
+commit 4b78d785373c1d02abe695267379674776b3da3d
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Jun 27 22:05:18 2012 -0400
+
+    Simplifications to ARM CPU detection
+    
+    Organize pixman-arm.c such that each operating system/compiler exports
+    a detect_cpu_features() function that returns a bitmask with the
+    various features that we are interested in. A new function
+    have_feature() then calls this function, caches the result, and return
+    whether the given feature is available.
+    
+    The result is that all the pixman_have_arm_<feature> functions become
+    redundant and can be deleted.
+
+commit 8b795a9c17aa25328b9c76b949d319bb578d5f1e
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Jun 27 14:14:54 2012 -0400
+
+    Simplify MIPS CPU detection
+    
+    There is no reason to have pixman_have_<feature> functions when all
+    they do is call pixman_have_mips_feature().
+    
+    Instead rename pixman_have_mips_feature() to have_feature() and call
+    it directly from _pixman_mips_get_implementations(). Also on
+    non-Linux, just make have_feature() return FALSE.
+
+commit 16502dd3ae3bf1d49faf1de533bd58013e168e64
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Jun 27 23:04:24 2012 -0400
+
+    Move the remaining bits of pixman-cpu into pixman-implementation.c
+
+commit 5813bb96aec1c48636db621558534561fef67b68
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jun 26 17:26:34 2012 -0400
+
+    Move MIPS specific CPU detection to its own file, pixman-mips.c
+
+commit 4ac0a1d60fccf4f9a782747ce61fd15825eddb5a
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jun 26 17:30:22 2012 -0400
+
+    Move PowerPC specific CPU detection to its own file pixman-ppc.c
+
+commit 8590415f0e54520a176ff0fb53deb82be16873dd
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jun 26 17:02:24 2012 -0400
+
+    Move ARM specific CPU detection to a new file pixman-arm.c
+    
+    Similar to the x86 commit, this moves the ARM specific CPU detection
+    to its own file which exports a pixman_arm_get_implementations()
+    function that is supposed to be a noop on non-ARM.
+
+commit 39ac18570a70674897aa7085406d9a4f6069feb4
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jun 26 12:44:32 2012 -0400
+
+    Move x86 specific CPU detection to a new file pixman-x86.c
+    
+    Extract the x86 specific parts of pixman-cpu.c and put them in their
+    own file called pixman-x86.c which exports one function
+    pixman_x86_get_implementations() that creates the MMX and SSE2
+    implementations. This file is supposed to be compiled on all
+    architectures, but pixman_x86_get_implementations() should be a noop
+    on non-x86.
+
+commit 1a3b7614a9808f8af15204d0751a6820bf67059c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jun 26 18:07:39 2012 -0400
+
+    pixman-cpu.c: Rename disabled to _pixman_disabled() and export it
+
+commit d4aa82fb9148862904bb7ca33655ce8d571643b0
+Author: Sebastian Bauer <mail@sebastianbauer.info>
+Date:   Tue Jul 3 05:55:14 2012 -0400
+
+    Qualify the static variables in pixman_f_transform_invert() with the const keyword.
+    
+    Their contents is not overwritten.
+
+commit f9c91ee2f27eaea68d8c3a130bf7d4bc0c860834
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Jul 1 16:59:53 2012 -0400
+
+    Use a compile-time constant for the "K" constraint in the MMX detection.
+    
+    When compiling with -O0, gcc doesn't understand that in
+    
+         signed char x = 0;
+    
+         ...
+    
+         asm ("...",
+         	  : "K" (x));
+    
+    x is constant. Fix this by using an immediate constant instead of a
+    variable.
+
+commit cd7ecf548a9e8115226bf0fec174f3abc54becb5
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Jul 1 06:54:06 2012 -0400
+
+    In fast_composite_tiled_repeat() don't clone images with a palette
+    
+    In fast_composite_tiled_repeat() if the source image is less than a
+    certain constant width, a clone is created which is then
+    pre-repeated. However, the source image's palette, if it has one, is
+    not cloned, so for indexed images, the pre-repeating would crash.
+    
+    Fix this by not doing any pre-repeating for images with a palette set.
+
+commit 7b20ad39f778d765566d3f2c5f7c50964100efc1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Jul 1 06:53:18 2012 -0400
+
+    test: Make stress-test more likely to actually composite something
+    
+    stress-test current almost never composites anything because the clip
+    rectangles and transformations are such that either
+    _pixman_compute_composite_region32() or analyze_extent() will return
+    FALSE.
+    
+    Fix this by:
+    
+    - making log_rand() return smaller numbers so that the clip rectangles
+      are more likely to be within the destination image
+    
+    - adding rand_x() and rand_y() functions that pick positions within an
+      image and using them for positioning alpha maps and source/mask
+      positions.
+    
+    - making it less likely that clip regions are used in general
+    
+    These changes make the test take longer, so speed it up a little by
+    making most images smaller and by reducing the maximum convolution
+    filter from 17x19 to 3x4.
+    
+    With these changes, stress-test reveals a crash in iteration 0xd39
+    where fast_composite_tiled_repeat() creates an indexed image without a
+    palette.
+
+commit 4cdf8e9f3aca1925aeca25debb9268877ba3cd3d
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Jul 1 16:35:46 2012 -0400
+
+    sse2: add missing ABGR entires for bilinear src_8888_8888
+
+commit ef99f9e97260cc55678385a6d691c195f57bd6b1
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon May 21 05:56:58 2012 -0400
+
+    loongson: optimize _mm_set_pi* functions with shuffle instructions
+
+commit 9aa8e3a26071739d160496ef9f6126f296c500eb
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Jun 27 13:00:36 2012 -0400
+
+    mmx: optimize bilinear function when using 7-bit precision
+    
+    Loongson:
+    image             firefox-fishtank 1037.738 1040.218   0.19%    3/3
+    image             firefox-fishtank 1056.611 1057.581   0.20%    3/3
+    
+    ARM/iwMMXt:
+    image             firefox-fishtank 1487.282 1492.640   0.17%    3/3
+    image             firefox-fishtank 1363.913 1364.366   0.11%    3/3
+
+commit 1ad6ae6ee8a350f6fe4f30ba928aacf44d04f86e
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun May 20 20:51:08 2012 -0400
+
+    mmx: add scaled bilinear over_8888_8_8888
+    
+    Loongson:
+    image             firefox-fishtank 1665.163 1670.370   0.17%    3/3
+    image             firefox-fishtank 1037.738 1040.218   0.19%    3/3
+    
+    ARM/iwMMXt:
+    image             firefox-fishtank 2042.723 2045.308   0.10%    3/3
+    image             firefox-fishtank 1487.282 1492.640   0.17%    3/3
+
+commit c43de364cbcd195f7d1d6881a6109cbb3d6b73b8
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Jun 27 12:57:45 2012 -0400
+
+    mmx: add scaled bilinear over_8888_8888
+    
+    Loongson:
+    image         firefox-planet-gnome  157.012  158.087   0.30%    6/6
+    image         firefox-planet-gnome  156.617  157.109   0.15%    5/6
+    
+    ARM/iwMMXt:
+    image         firefox-planet-gnome  148.086  149.339   0.76%    6/6
+    image         firefox-planet-gnome  144.939  146.123   0.61%    6/6
+
+commit 9209cd746b7a81d0536df6dadd6a0b0b983291cb
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Tue Jun 19 00:30:51 2012 -0400
+
+    mmx: add scaled bilinear src_8888_8888
+    
+    Loongson:
+    image         firefox-planet-gnome  170.025  170.229   0.09%    3/4
+    image         firefox-planet-gnome  157.012  158.087   0.30%    6/6
+    
+    ARM/iwMMXt:
+    image         firefox-planet-gnome  164.192  164.875   0.34%    3/4
+    image         firefox-planet-gnome  148.086  149.339   0.76%    6/6
+
+commit 51f27d7364d66e47d882ee531b6655368159231a
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu Jun 28 12:17:16 2012 -0400
+
+    mmx: Use expand_alpha instead of mask/shift
+
+commit b0855f095aba8e0c98d1fd1863b278fc72a4dd2c
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Sun Jul 1 23:00:34 2012 +0300
+
+    Change default bilinear interpolation precision to 7 bits
+    
+    This improves performance for the current SSE2 code. Further
+    reduction to 4 bits may be considered later if it proves
+    to allow additional speedup.
+
+commit c430b1dba7bfea0031227dd4b976da3dd7c4ac02
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Tue Jun 26 01:47:18 2012 +0300
+
+    sse2: _mm_madd_epi16 for faster bilinear scaling with 7-bit precision
+    
+    Reducing interpolation precision allows the use of PMADDWD instruction.
+    This makes bilinear scaling much faster (on Intel Core i7):
+    
+    8-bit: image             firefox-fishtank   57.584   58.349   0.74%    3/3
+    7-bit: image             firefox-fishtank   51.139   51.229   0.30%    3/3
+    
+    8-bit: src_8888_8888 =  L1: 228.71  L2: 226.52  M:224.82 ( 14.95%)  HT:183.22  VT:154.02  R:171.72  RT:109.36
+    7-bit: src_8888_8888 =  L1: 320.45  L2: 317.43  M:314.38 ( 20.77%)  HT:215.13  VT:177.35  R:204.46  RT:121.93
+
+commit ccd31896bc2f1f323b3be9e8b1447cab892ee62d
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Tue Jun 26 01:06:10 2012 +0300
+
+    Bilinear interpolation precision is now configurable at compile time
+    
+    Macro BILINEAR_INTERPOLATION_BITS in pixman-private.h selects
+    the number of fractional bits used for bilinear interpolation.
+    
+    scaling-test and affine-test have checksums for 4-bit, 7-bit
+    and 8-bit configurations.
+
+commit ad9f1d020188fe90ae742041195baebdfbe6fe27
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Jun 29 14:24:30 2012 -0400
+
+    Fix distcheck due to custom iwMMXt rules
+
+commit ff5d041b88c667141b891909acd3085c3ed54994
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Mon Jun 25 07:24:27 2012 +0300
+
+    sse2: faster bilinear scaling (use _mm_loadl_epi64)
+    
+    Using _mm_loadl_epi64() to load two pixels at once (pairs of top
+    and bottom pixels) is faster than loading each pixel separately
+    and combining them with _mm_set_epi32().
+    
+    === cairo-perf-trace ===
+    
+    before: image             firefox-fishtank   66.912   66.931   0.13%    3/3
+    after:  image             firefox-fishtank   57.584   58.349   0.74%    3/3
+    
+    === lowlevel-blt-bench ===
+    
+    before: src_8888_8888 =  L1: 181.10  L2: 179.14  M:178.08 ( 11.02%)  HT:153.22  VT:133.45  R:142.24  RT: 95.32
+    after:  src_8888_8888 =  L1: 228.68  L2: 225.75  M:223.98 ( 14.23%)  HT:185.32  VT:155.06  R:162.73  RT:102.52
+    
+    This improvement was suggested by Matt Turner on irc.
+
+commit fc162bad561a516f648daf07e9d22d427fe60e74
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Mon Jun 25 07:11:59 2012 +0300
+
+    test: support nearest/bilinear scaling in lowlevel-blt-bench
+    
+    Scale factor is selected to be nearly 1x, so that the MPix/s results
+    can be directly compared with the results of non-scaled compositing
+    operations.
+
+commit 387e9bcddb90bd2c7d1dfb81c073196f9f81042d
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Sat Jun 23 04:08:28 2012 +0300
+
+    test: Fix for strict aliasing issue in 'get_random_seed'
+    
+    Gets rid of gcc warning when compiled with -fstrict-aliasing option in CFLAGS
+
+commit 4cbeb0aedccde5d2eb87daec08040a8bf161f6d7
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Wed Jun 20 17:13:33 2012 +0200
+
+    build: Fix compilation on win32
+    
+    When compiling using the win32 build system, config.h is not
+    available nor needed.
+    
+    Fixes:
+    
+    pixman-glyph.c(26) : fatal error C1083: Cannot open include file:
+    'config.h': No such file or directory
+
+commit 21077e1b83912b5e895b160bbbcd9b4664191506
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed May 2 23:13:43 2012 -0400
+
+    sse2: add src_x888_0565
+    
+    Port of 2ddd1c498b to SSE2.
+    
+    Uses the pmadd technique described in
+    http://software.intel.com/sites/landingpage/legacy/mmx/MMX_App_24-16_Bit_Conversion.pdf
+    
+    Works around lack of packusdw instruction by first sign extending the
+    values.
+    
+    fast:	src_8888_0565 =  L1: 681.40  L2: 689.20  M: 644.76 ( 25.51%)  HT:404.42  VT:288.04  R:306.07  RT:150.80 (1619Kops/s)
+    mmx:	src_8888_0565 =  L1:2056.03  L2:1985.44  M:1574.91 ( 61.87%)  HT:533.10  VT:376.35  R:416.10  RT:178.79 (1833Kops/s)
+    sse2:	src_8888_0565 =  L1:3793.42  L2:3653.44  M:1878.83 ( 73.94%)  HT:535.03  VT:407.96  R:421.46  RT:163.31 (1727Kops/s)
+    
+    and for reference, using packusdw
+    sse4:	src_8888_0565 =  L1:4396.18  L2:4229.25  M:1904.04 ( 75.18%)  HT:559.79  VT:427.96  R:440.06  RT:165.71 (1744Kops/s)
+    
+    Notice that MMX is faster in the RT case because it can operate on
+    8-bytes instead of the current 16-bytes for SSE2.
+
+commit 7db07cb731e3689328d9ecbdafffe99d7d38388e
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Jun 13 13:18:49 2012 -0400
+
+    sse2: enable over_n_0565 for b5g6r5
+    
+    Same as b950bb12 for MMX.
+
+commit 45946c5fa1760ad185ae20e8797635b0a256ea08
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Jun 13 16:37:48 2012 -0400
+
+    .gitignore: add test/glyph-test
+
+commit eadb442b5c825679016de7e7acb837e58f92bfc4
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jun 12 22:04:29 2012 -0400
+
+    test: Add missing break in stress-test.c
+    
+    Found by coverity:
+    
+    https://bugzilla.redhat.com/show_bug.cgi?id=756069
+
+commit 492dac7593075e622cfeddc73298df29d50b76bc
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Wed Jun 6 23:54:20 2012 +0300
+
+    test: fix bisecting issue in fuzzer-find-diff.pl
+    
+    Before bisecting to find the exact test which has failed, we
+    first need to make sure that the first test is fine (the first
+    test is "good" and the whole range is "bad"). Otherwise
+    test 2 gets incorrectly flagged as problematic in the case
+    if we already got a failure on test 1 right from the start.
+
+commit 40a0d10eeaedb879bbffe41f0537e8468c563df7
+Author: Siarhei Siamashka <siarhei.siamashka@gmail.com>
+Date:   Wed Jun 6 22:21:32 2012 +0300
+
+    test: OpenMP 2.5 requires signed loop iteration variables
+    
+    Unsigned loop variables are only supported since version 3.0
+    of OpenMP specification. Changing loop variables to use int32_t
+    type fixes pixman build problems with path64 compiler.
+
+commit 619a60d201bfdfe2f15fca75f6e686fc7c275b5b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jun 11 19:13:45 2012 -0400
+
+    test: Make glyph test pass on big endian
+    
+    The destination buffer was initialized with random uint32_t values, so
+    it started out different on big endian vs. little endian. Fix that by
+    initializing the buffer with random uint8_t values instead.
+
+commit f80e7ad3cbf46218bb3b4247e7b9e5d448670ad8
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Jan 8 13:21:11 2012 -0500
+
+    bits-image: Turn all the fetchers into iterator getters
+    
+    Instead of caching these fetchers in the image structure, and then
+    have the iterator getter call them from there, simply change them to
+    be iterator getters themselves.
+    
+    This avoids an extra indirect function call and lets us get rid of the
+    get_scanline_32/64 fields in pixman_image_t.
+
+commit fd175f9d02f36b1e91973e4264519228547f5dc7
+Author: Antti S. Lankila <alankila@bel.fi>
+Date:   Sun Jun 10 19:22:56 2012 +0300
+
+    Faster unorm_to_unorm for wide processing.
+    
+    Optimizing the unorm_to_unorm functions allows a speedup from:
+    
+    src_8888_2x10 =  L1:  62.08  L2:  60.73  M: 59.61 (  4.30%)  HT: 46.81
+    	VT: 42.17  R: 43.18  RT: 26.01 (325Kops/s)
+    
+    to:
+    
+    src_8888_2x10 =  L1:  76.94  L2:  78.43  M: 75.87 (  5.59%)  HT: 56.73
+    	VT: 52.39  R: 53.00  RT: 29.29 (363Kops/s)
+    
+    on a i7 Q720 -based laptop.
+    
+    The key of the patch is the observation that unorm_to_unorm's work can
+    more easily be done with a simple multiplication and shift, when the
+    function is applied repeatedly and the parameters are not compile-time
+    constants. For instance, converting from 0xfe to 0xfefe (expanding
+    from 8 bits to 16 bits) can be done by calculating
+    
+    c = c * 0x101
+    
+    However, sometimes the result is not a neat replication of all the
+    bits. For instance, going from 10 bits to 16 bits can be done by
+    calculating
+    
+    c = c * 0x401UL >> 4
+    
+    where the intermediate result is 20 bit wide repetition of the 10-bit
+    pattern followed by shifting off the unnecessary lowest bits.
+    
+    The patch has the algorithm to calculate the factor and the shift, and
+    converts the code to use it.
+
+commit 367b78fd5c57ee05298eb11370b68d01613961e5
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed May 30 16:44:04 2012 -0400
+
+    configure.ac: add iwmmxt2 configure flag
+    
+    The flag allows the user to select whether pixman-mmx.c is compiled with
+    -march=iwmmxt or -march=iwmmxt2.
+    
+    gcc has scheduling support for the Marvell CPU in the XO 1.75 when
+    building with -march=iwmmxt2.
+
+commit 31a6563ec5167d6b15fdb8c158a71ab4f97015ab
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed May 30 16:26:32 2012 -0400
+
+    autotools: use custom build rule to build iwMMXt code
+    
+    gcc has no sane way of enabling iwmmxt code generation, like -msse for
+    SSE, so you have to use -march=iwmmxt{,2}. User CFLAGS are placed after
+    -march=iwmmxt and override the march value, so we have to use a custom
+    build rule to order the CFLAGS such that pixman-mmx.c will be built with
+    the necessary CFLAGS.
+
+commit 706bf8264cb48aac36e36ff5e23f0ad8a47ff73c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue May 3 07:25:50 2011 -0400
+
+    Speed up _pixman_image_get_solid() in common cases
+    
+    Make _pixman_image_get_solid() faster by special-casing the common
+    cases where the image is SOLID or a repeating a8r8g8b8 image.
+    
+    This optimization together with the previous one results in a small
+    but reproducable performance improvement on the xfce4-terminal-a1
+    cairo trace:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    Before:
+    [  0]    image            xfce4-terminal-a1    1.221    1.239   1.21%  100/100
+    After:
+    [  0]    image            xfce4-terminal-a1    1.170    1.199   1.26%  100/100
+    
+    Either optimization by itself is difficult to separate from noise.
+
+commit 934c9d8546b71ddea91ac16b0928101903e2608e
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon May 28 02:36:22 2012 -0400
+
+    Speed up _pixman_composite_glyphs_no_mask()
+    
+    Bypass much of the overhead of pixman_image_composite32() by only
+    computing the composite region once instead of once per glyph, and by
+    only looking up the composite function whenever the glyph format or
+    flags change.
+    
+    As part of this, the pixman_compute_composite_region32() was renamed
+    to _pixman_compute_composite_region32() and exported in
+    pixman-private.h.
+    
+    I couldn't find a trace that would reliably demonstrate that this is
+    actually an improvement by itself (since _pixman_composite_glyphs_no_mask()
+    is called so rarely), but together with the following optimization for
+    solid sources, there is a small but reliable improvement to the
+    xfce4-a1-terminal cairo trace.
+
+commit a162189dc0fa9978a3b5b6dd24f9bc12995805ed
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon May 28 01:22:26 2012 -0400
+
+    Speed up pixman_composite_glyphs()
+    
+    When adding glyphs to the mask, bypass most of the overhead of
+    pixman_image_composite32() by:
+    
+    - Only looking up the composite function when the glyph changes either
+      format or flags.
+    
+    - Only using a white source when the glyph format is different from
+      the mask format.
+    
+    - Simply intersecting the glyph rectangle with the destination
+      rectangle instead of doing the full _pixman_composite_region32().
+    
+    Performance results:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    Before:
+    [  0]    image            firefox-talos-gfx    6.570    6.577   0.13%    8/10
+    After:
+    [  0]    image            firefox-talos-gfx    4.272    4.289   0.28%   10/10
+    
+    V2: Changes to deal with white sources
+
+commit d9710442b4b2294e1ccd1594c54ca8a4feda2ac5
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun May 27 18:23:20 2012 -0400
+
+    test: Add glyph-test
+    
+    This test tests the new glyph cache and compositing API. Much of this
+    test is intending to making sure that clipping and alpha map handling
+    survive any optimizations that may be added to the glyph compositing.
+    
+    V2: Evaluating lcg_rand_n() multiple times in an argument list lead
+        to undefined behavior.
+
+commit dc9237472789b0b45393f6f7eeafa057a86280c4
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon May 28 16:14:12 2012 -0400
+
+    Add support for alpha maps to compute_crc32_for_image().
+    
+    When a destination image I has an alpha map A, the following rules apply:
+    
+       - If I has an alpha channel itself, the content of that channel is
+         undefined
+    
+       - If A has RGB channels, the content of those channels is
+         undefined.
+    
+    Hence in order to compute the CRC32 for such an image, we have to mask
+    off the alpha channel of the image, and the RGB channels of the alpha
+    map.
+    
+    V2: Shifting by 32 is undefined in C
+
+commit 43e029d525c191a771e5b964fccff09b6d341bb2
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun May 27 13:38:14 2012 -0400
+
+    Move CRC32 computation from blitters-test.c into utils.c
+    
+    This way it can be used in other tests.
+
+commit fce31a5ef8c915ee6b7aee4c6b57bee245185c1f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue May 29 04:14:38 2012 -0400
+
+    Add pixman_glyph_cache_t API
+    
+    This new API allows entire glyph strings to be composited in one go
+    which reduces overhead compared to multiple calls to
+    pixman_image_composite32().
+    
+    The pixman_glyph_cache_t is a hash table that maps two keys (a "font"
+    and a "glyph" key, but they are just keys; there is no distinction
+    between them as far as pixman is concerned) to a glyph. Glyphs in the
+    cache can be composited through two new entry points
+    pixman_glyph_cache_composite_glyphs() and
+    pixman_glyph_cache_composite_glyphs_no_mask().
+    
+    A glyph cache may only be inserted into when it is "frozen", which is
+    achieved by calling pixman_glyph_cache_freeze(). When
+    pixman_glyph_cache_thaw() is later called, if the cache has become too
+    crowded, some glyphs (currently the least-recently-used) will
+    automatically be evicted. This means that a user must ensure that all
+    the required glyphs are present in the cache before compositing a
+    string. The intended way to use the cache is like this:
+    
+            pixman_glyph_t glyphs[MAX_GLYPHS];
+    
+            pixman_glyph_cache_freeze (cache);
+    
+            for (i = 0; i < n_glyphs; ++i)
+            {
+                const void *g;
+    
+                if (!(g = pixman_glyph_cache_lookup (cache, font_key, glyph_key)))
+                {
+                    img = <rasterize glyph as a pixman_image_t>;
+    
+                    g = pixman_glyph_cache_insert (cache, font_key, glyph_key,
+                                                   glyph_origin_x, glyph_origin_y,
+                                                   img);
+    
+                    if (!g)
+                    {
+                        /* Clean up out-of-memory condition */
+                        goto oom;
+                    }
+    
+                    glyphs[i].pos_x = glyph_x_pos;
+                    glyphs[i].pos_y = glyph_y_pos;
+                    glyphs[i].glyph = g;
+                }
+            }
+    
+            pixman_composite_glyphs (op, src, dest, ..., cache, n_glyphs, glyphs);
+    
+            pixman_glyph_cache_thaw (cache);
+    
+    V2:
+    - Move glyphs to front of the MRU list when they are used. Pointed
+      out by Behdad Esfahbod.
+    - Composite glyphs with (white IN glyph) ADD mask in order to support
+      mixed a8 and a8r8g8b8 glyphs. Also pointed out by Behdad.
+    - Add pixman_glyph_get_mask_format
+
+commit a3ae88b71b9d2dfc53303963157ecce4b29f0486
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Apr 27 12:07:16 2011 -0400
+
+    Add doubly linked lists
+    
+    This commit adds some new inline functions to maintain a doubly linked
+    list.
+    
+    The way to use them is to embed a pixman_link_t into the structures
+    that should be linked, and use a pixman_list_t as the head of the
+    list.
+    
+    The new functions are
+    
+        pixman_list_init (pixman_list_t *list);
+        pixman_list_prepend (pixman_list_t *list, pixman_link_t *link);
+        pixman_list_move_to_front (pixman_list_t *list, pixman_link_t *link);
+    
+    There are also a new macro:
+    
+        CONTAINER_OF(type, member, data);
+    
+    that can be used to get from a pointer to a member to the containing
+    structure.
+    
+    V2: Use the C89 macro offsetof() instead of rolling our own -
+    suggested by Alan Coopersmith.
+
+commit c2230fe2aff709de21cc2ee3fa27c3f7578e7f9d
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu May 24 03:10:34 2012 -0400
+
+    Make use of image flags in mmx and sse2 iterators
+    
+    Now that we have the full image flags available, the SSE2 and MMX
+    iterators can simply check against SAMPLES_COVER_CLIP_NEAREST (which
+    is computed in pixman_image_composite32()) instead of comparing all
+    the x/y/width/height parameters.
+
+commit c1065a9cb4ab1f5847b2373847c65d8ea68975f1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu May 24 03:00:38 2012 -0400
+
+    Pass the full image flags to iterators
+    
+    When pixman_image_composite32() is called some flags are computed that
+    indicate various things about the composite operation that can't be
+    deduced from the image flags themselves. These additional flags are
+    not currently available to iterators. All they can do is read the
+    image flags in image->common.flags.
+    
+    Fix that by passing the info->{src, mask, dest}_flags on to the
+    iterator initialization and store the flags in the iter struct as
+    "image_flags". At the same time rename the *iterator* flags variable
+    to "iter_flags" to avoid confusion.
+
+commit da6193b1fcc1dfab27f4c36917864f2f2c41cf3e
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun May 27 13:01:57 2012 -0400
+
+    mmx: add missing _mm_empty calls
+    
+    Fixes spurious test failures on x86-32.
+
+commit 62c4bdc94f82d1e4c5dc0e58b5903382d74f3883
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri May 18 01:37:07 2012 -0400
+
+    mmx: add over_reverse_n_8888
+    
+    Loongson:
+    over_reverse_n_8888 =  L1:  16.04  L2:  15.35  M: 10.20 ( 27.96%)  HT: 10.95  VT: 10.45  R:  9.18  RT:  6.99 (  76Kops/s)
+    over_reverse_n_8888 =  L1:  27.40  L2:  26.67  M: 16.97 ( 45.78%)  HT: 16.66  VT: 15.38  R: 14.15  RT:  9.44 (  97Kops/s)
+    
+    image                      poppler   34.106   35.500   1.48%    6/6
+    image                      poppler   29.598   30.835   1.70%    6/6
+    
+    ARM/iwMMXt:
+    over_reverse_n_8888 =  L1:  15.63  L2:  14.33  M: 10.83 ( 27.55%)  HT:  9.78  VT:  9.91  R:  9.49  RT:  6.96 (  69Kops/s)
+    over_reverse_n_8888 =  L1:  22.79  L2:  19.40  M: 13.76 ( 34.19%)  HT: 11.66  VT: 11.86  R: 11.17  RT:  7.85 (  75Kops/s)
+    
+    image                      poppler   38.040   38.606   1.10%    6/6
+    image                      poppler   31.686   32.278   0.80%    5/6
+
+commit 17acc7a4c707db4804b6bf47db30883745049fdb
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu May 17 23:27:59 2012 -0400
+
+    mmx: add add_0565_0565
+    
+    Loongson:
+    add_0565_0565 =  L1:  15.37  L2:  14.91  M: 11.83 ( 16.06%)  HT: 10.53  VT: 10.15  R:  9.74  RT:  6.19 (  68Kops/s)
+    add_0565_0565 =  L1:  45.06  L2:  46.71  M: 27.45 ( 38.00%)  HT: 23.76  VT: 22.84  R: 18.96  RT:  9.79 ( 104Kops/s)
+    
+    ARM/iwMMXt:
+    add_0565_0565 =  L1:  12.87  L2:  11.58  M: 10.11 ( 12.50%)  HT:  9.06  VT:  8.66  R:  7.70  RT:  5.62 (  58Kops/s)
+    add_0565_0565 =  L1:  31.14  L2:  28.87  M: 22.46 ( 28.60%)  HT: 18.61  VT: 17.04  R: 15.21  RT:  9.35 (  90Kops/s)
+
+commit d551dc049498d17ab879fd67d47508cafaaede06
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu May 17 23:29:51 2012 -0400
+
+    fast: add add_0565_0565 function
+    
+    I'll need this code for header and tail alignment loops in MMX, so I
+    might as well implement a fast path here.
+
+commit f8dc0e98343c7936a37a3624721c5782e7ac309c
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu May 17 13:22:18 2012 -0400
+
+    mmx: implement expand_4x565 in terms of expand_4xpacked565
+    
+    Loongson:
+            over_n_0565 =  L1:  38.57  L2:  38.88  M: 30.01 ( 20.97%)  HT: 23.60  VT: 23.88  R: 21.95  RT: 11.65 ( 113Kops/s)
+            over_n_0565 =  L1:  56.28  L2:  55.90  M: 34.20 ( 23.82%)  HT: 25.66  VT: 26.60  R: 23.78  RT: 11.80 ( 115Kops/s)
+    
+         over_8888_0565 =  L1:  35.89  L2:  36.11  M: 21.56 ( 45.47%)  HT: 18.33  VT: 17.90  R: 16.27  RT:  9.07 (  98Kops/s)
+         over_8888_0565 =  L1:  40.91  L2:  41.06  M: 23.13 ( 48.46%)  HT: 19.24  VT: 18.71  R: 16.82  RT:  9.18 (  99Kops/s)
+    
+          over_n_8_0565 =  L1:  28.92  L2:  29.12  M: 21.42 ( 30.00%)  HT: 18.37  VT: 17.75  R: 16.15  RT:  8.79 (  91Kops/s)
+          over_n_8_0565 =  L1:  32.32  L2:  32.13  M: 22.44 ( 31.27%)  HT: 19.15  VT: 18.66  R: 16.62  RT:  8.86 (  92Kops/s)
+    
+    over_n_8888_0565_ca =  L1:  29.33  L2:  29.22  M: 18.99 ( 66.69%)  HT: 16.69  VT: 16.22  R: 14.63  RT:  8.42 (  88Kops/s)
+    over_n_8888_0565_ca =  L1:  34.97  L2:  34.14  M: 20.32 ( 71.73%)  HT: 17.67  VT: 17.19  R: 15.23  RT:  8.50 (  89Kops/s)
+    
+    ARM/iwMMXt:
+            over_n_0565 =  L1:  29.70  L2:  30.53  M: 24.47 ( 14.84%)  HT: 22.28  VT: 21.72  R: 21.13  RT: 12.58 ( 105Kops/s)
+            over_n_0565 =  L1:  41.42  L2:  40.00  M: 30.95 ( 19.13%)  HT: 27.06  VT: 27.28  R: 23.43  RT: 14.44 ( 114Kops/s)
+    
+         over_8888_0565 =  L1:  12.73  L2:  11.53  M:  9.07 ( 16.47%)  HT:  9.00  VT:  9.25  R:  8.44  RT:  7.27 (  76Kops/s)
+         over_8888_0565 =  L1:  23.72  L2:  21.76  M: 15.89 ( 29.51%)  HT: 14.36  VT: 14.05  R: 12.44  RT:  8.94 (  86Kops/s)
+    
+          over_n_8_0565 =  L1:   6.80  L2:   7.15  M:  6.37 (  7.90%)  HT:  6.58  VT:  6.24  R:  6.49  RT:  5.94 (  59Kops/s)
+          over_n_8_0565 =  L1:  12.06  L2:  11.02  M: 10.16 ( 13.43%)  HT:  9.57  VT:  8.49  R:  9.10  RT:  6.86 (  69Kops/s)
+    
+    over_n_8888_0565_ca =  L1:   7.62  L2:   7.01  M:  6.27 ( 20.52%)  HT:  6.00  VT:  6.07  R:  5.68  RT:  5.53 (  57Kops/s)
+    over_n_8888_0565_ca =  L1:  13.54  L2:  11.96  M:  9.76 ( 30.66%)  HT:  9.72  VT:  8.45  R:  9.37  RT:  6.85 (  67Kops/s)
+
+commit 51681a052f9e1d0970a79187974da77d9bf69450
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun May 13 20:39:05 2012 -0400
+
+    mmx: add and use expand_4xpacked565 function
+    
+    Loongson:
+    add_0565_0565 =  L1:  14.39  L2:  13.98  M: 11.28 ( 15.22%)  HT: 10.11  VT:  9.74  R:  9.39  RT:  6.05 (  67Kops/s)
+    add_0565_0565 =  L1:  15.37  L2:  14.91  M: 11.83 ( 16.06%)  HT: 10.53  VT: 10.15  R:  9.74  RT:  6.19 (  68Kops/s)
+    
+    ARM/iwMMXt:
+    add_0565_0565 =  L1:  11.12  L2:  10.40  M:  8.82 ( 10.65%)  HT:  7.98  VT:  7.41  R:  7.57  RT:  5.21 (  54Kops/s)
+    add_0565_0565 =  L1:  12.87  L2:  11.58  M: 10.11 ( 12.50%)  HT:  9.06  VT:  8.66  R:  7.70  RT:  5.62 (  58Kops/s)
+
+commit 6491c70e3a2a2e10e99c84024895f346f2300f63
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat May 26 16:34:13 2012 -0400
+
+    Post-release version bump to 0.27.1
+
+commit b1a401e6c9f204d33a55eee41897d66ab81d6117
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat May 26 16:17:14 2012 -0400
+
+    Pre-release version bump to 0.26.0
+
+commit f71e3dba979fcfc1cf87d01137e1e32451a173b1
+Author: Ingmar Runge <ingmar@irsoft.de>
+Date:   Sat May 19 15:45:18 2012 +0200
+
+    Fix MSVC compilation
+    
+    Only up to three SSE intrinsics supported in function declaration.
+
+commit 1e59e18d73a6e45729a99fe6ccc74d61631ff5f0
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu May 24 15:30:41 2012 -0400
+
+    test: Composite with solid images instead of using pixman_image_fill_*
+    
+    There is a couple of places where the test suite uses the
+    pixman_image_fill_* functions to initialize images. These functions
+    can fail, and will do so if the "fast" implementation is disabled.
+    
+    So to make sure the test suite passes even using
+    PIXMAN_DISABLE="fast", use pixman_image_composite32() with a solid
+    image instead of pixman_image_fill_*.
+
+commit 30816e3068bccf7c78c78f916b54971d24873bdc
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Thu May 3 00:03:43 2012 +0200
+
+    MIPS: DSPr2: Added bilinear over_8888_8_8888 fast path.
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz
+    
+    Referent (before):
+    
+    cairo-perf-trace:
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [ # ]    image: pixman 0.25.3
+    [  0]    image             firefox-fishtank 2289.180 2290.567   0.05%    5/6
+    
+    Optimized:
+    
+    cairo-perf-trace:
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [ # ]    image: pixman 0.25.3
+    [  0]    image             firefox-fishtank 1700.925 1708.314   0.22%    5/6
+
+commit aea0522f6f1a51b97a673cfe4dc157e501008580
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Wed May 23 18:53:43 2012 +0200
+
+    MIPS: DSPr2: Fix bug in over_n_8888_8888_ca/over_n_8888_0565_ca routines
+    
+    In main loop (unrolled by factor 2), instead of negating multiplied
+    mask values by srca, values of srca was negated, and passed as alpha
+    argument for
+    
+        UN8x4_MUL_UN8x4_ADD_UN8x4 macro.
+    
+    Instead of:
+    
+        ma = ~ma;
+        UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
+    
+    Code was doing this:
+    
+        ma = ~srca;
+        UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
+    
+    Key is in substituting registers s0/s1 (containing srca value), with
+    t0/t1 containing mask values multiplied by srca.  Register usage is
+    also improved (less registers are saved on stack, for
+    over_n_8888_8888_ca routine).
+    
+    The bug was introduced in commit d2ee5631 and revealed by composite test.
+
+commit 74bf5dc2f99245e7b486203b6ba074fb629eb5f3
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun May 20 13:09:16 2012 -0400
+
+    demos: Add parrot.jpg to EXTRA_DIST
+    
+    Pointed out by Cyril Brulebois.
+
+commit 55698584be93706794b181cbf595846da578e103
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Tue May 15 16:32:08 2012 -0400
+
+    configure.ac: Fail the ARM/iwMMXt test if not compiling with -march=iwmmxt
+    
+    If not compiling with -march=iwmmxt, the configure test will still pass,
+    thinking that the __builtin_arm_* intrinsic is a function instead of
+    generating a single instruction. Since no linking is done, the configure
+    test doesn't catch this, and we get linking errors in the build.
+
+commit 3682b615154338f9754e7c1e046b42bb8ad584fa
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue May 15 13:38:44 2012 -0400
+
+    Post-release version bump to 0.25.7
+
+commit 1e1a00e964a1d8ef43d6d75c1c3a0b5d518d1979
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue May 15 13:20:09 2012 -0400
+
+    Pre-release version bump to 0.25.6
+    
+    Note that 0.25.4 was a botched release that doesn't have a tag and
+    doesn't correspond to any commit ID. It was however uploaded and
+    announced, so I'll just use the 0.25.6 version number.
+
+commit b2c16aaadfae64d2573abb537bfedd92c13b8d06
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue May 15 13:19:19 2012 -0400
+
+    demos/Makefile.am: Add parrot.c to EXTRA_DIST
+    
+    To get 'make distcheck' to pass.
+
+commit 50d3088d7882e1054a35e917becb7752662da6f0
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri May 11 21:59:13 2012 -0400
+
+    configure.ac: Rename loongson -> loongson-mmi
+    
+    Make it match with the other fast paths, and the PIXMAN_DISABLE value is
+    already loongson-mmi.
+
+commit a0a40cb822bec52494c64e6750be50b734dc29df
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri May 11 21:49:42 2012 -0400
+
+    configure.ac: Fix loongson-mmi out-of-tree builds
+    
+    When building out-of-tree, gcc wasn't able to find loongson-mmintrin.h
+    to compile the test program. Add -I$srcdir to CFLAGS to point gcc to it.
+
+commit 618a08e6aa03b38e8dc71ac610f7fdd55e8a8558
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Thu May 3 00:03:42 2012 +0200
+
+    MIPS: DSPr2: Added over_n_8_8888 and over_n_8_0565 fast paths.
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz
+    
+    Referent (before):
+    
+    lowlevel-blt-bench:
+         over_n_8_8888 =  L1:  10.40  L2:   9.79  M:  8.47 ( 33.62%)  HT:  7.64  VT:  7.59  R:  7.48  RT:  5.30 (  40Kops/s)
+         over_n_8_0565 =  L1:   7.40  L2:   7.23  M:  6.78 ( 17.94%)  HT:  6.23  VT:  6.17  R:  6.14  RT:  4.62 (  37Kops/s)
+    
+    Optimized:
+    
+    lowlevel-blt-bench:
+         over_n_8_8888 =  L1:  27.25  L2:  26.24  M: 18.15 ( 72.12%)  HT: 14.52  VT: 14.31  R: 13.83  RT:  7.57 (  48Kops/s)
+         over_n_8_0565 =  L1:  18.91  L2:  17.59  M: 15.06 ( 39.90%)  HT: 12.18  VT: 11.98  R: 11.83  RT:  6.80 (  46Kops/s)
+
+commit 7d4beedc612a32b73d7673bbf6447de0f3fca298
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed May 9 19:20:55 2012 -0400
+
+    mmx: add and use pack_4x565 function
+    
+    The pack_4x565 makes use of the pack_4xpacked565 function which uses pmadd.
+    
+    Some of the speed up is probably attributable to removing the artificial
+    serialization imposed by the
+    	vdest = pack_565 (..., vdest, 0);
+    	vdest = pack_565 (..., vdest, 1);
+    	...
+    pattern.
+    
+    Loongson:
+            over_n_0565 =  L1:  16.44  L2:  16.42  M: 13.83 (  9.85%)  HT: 12.83  VT: 12.61  R: 12.34  RT:  8.90 (  93Kops/s)
+            over_n_0565 =  L1:  42.48  L2:  42.53  M: 29.83 ( 21.20%)  HT: 23.39  VT: 23.72  R: 21.80  RT: 11.60 ( 113Kops/s)
+    
+         over_8888_0565 =  L1:  15.61  L2:  15.42  M: 12.11 ( 25.79%)  HT: 11.07  VT: 10.70  R: 10.37  RT:  7.25 (  82Kops/s)
+         over_8888_0565 =  L1:  35.01  L2:  35.20  M: 21.42 ( 45.57%)  HT: 18.12  VT: 17.61  R: 16.09  RT:  9.01 (  97Kops/s)
+    
+          over_n_8_0565 =  L1:  15.17  L2:  14.94  M: 12.57 ( 17.86%)  HT: 11.96  VT: 11.52  R: 10.79  RT:  7.31 (  79Kops/s)
+          over_n_8_0565 =  L1:  29.83  L2:  29.79  M: 21.85 ( 30.94%)  HT: 18.82  VT: 18.25  R: 16.15  RT:  8.72 (  91Kops/s)
+    
+    over_n_8888_0565_ca =  L1:  15.25  L2:  15.02  M: 11.64 ( 41.39%)  HT: 11.08  VT: 10.72  R: 10.02  RT:  7.00 (  77Kops/s)
+    over_n_8888_0565_ca =  L1:  30.12  L2:  29.99  M: 19.47 ( 68.99%)  HT: 17.05  VT: 16.55  R: 14.67  RT:  8.38 (  88Kops/s)
+    
+    ARM/iwMMXt:
+            over_n_0565 =  L1:  19.29  L2:  19.88  M: 17.38 ( 10.54%)  HT: 15.53  VT: 16.11  R: 13.69  RT: 11.00 (  96Kops/s)
+            over_n_0565 =  L1:  36.02  L2:  34.85  M: 28.04 ( 16.97%)  HT: 22.12  VT: 24.21  R: 22.36  RT: 12.22 ( 103Kops/s)
+    
+         over_8888_0565 =  L1:  18.38  L2:  16.59  M: 12.34 ( 22.29%)  HT: 11.67  VT: 11.71  R: 11.02  RT:  6.89 (  72Kops/s)
+         over_8888_0565 =  L1:  24.96  L2:  22.17  M: 15.11 ( 26.81%)  HT: 14.14  VT: 13.71  R: 13.18  RT:  8.13 (  78Kops/s)
+    
+          over_n_8_0565 =  L1:  14.65  L2:  12.44  M: 11.56 ( 14.50%)  HT: 10.93  VT: 10.39  R: 10.06  RT:  7.05 (  70Kops/s)
+          over_n_8_0565 =  L1:  18.37  L2:  14.98  M: 13.97 ( 16.51%)  HT: 12.67  VT: 10.35  R: 11.80  RT:  8.14 (  74Kops/s)
+    
+    over_n_8888_0565_ca =  L1:  14.27  L2:  12.93  M: 10.52 ( 33.23%)  HT:  9.70  VT:  9.90  R:  9.31  RT:  6.34 (  65Kops/s)
+    over_n_8888_0565_ca =  L1:  19.69  L2:  17.58  M: 13.40 ( 42.35%)  HT: 11.75  VT: 11.33  R: 11.17  RT:  7.49 (  73Kops/s)
+
+commit 2beabd9fed76de0023eb36b0c938b8803aa8d129
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu May 10 16:15:34 2012 -0400
+
+    configure.ac: make -march=loongson2f come before CFLAGS
+    
+    Otherwise we'd have -march=loongson2f being overridden by automake's
+    CFLAGS ordering which causes build failures when -march=<not loongson2f>
+    is specified by the user.
+
+commit dadb9a318b8ca10c65e31e7278f4335a6968d246
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue May 8 10:05:18 2012 -0400
+
+    Add Makefile.win32 and Makefile.win32.common to EXTRA_DIST
+    
+    https://bugs.freedesktop.org/show_bug.cgi?id=46905
+
+commit 3c57ec471e1aacc863747b82bbe0a84c6d776ab7
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed May 9 22:50:50 2012 -0400
+
+    .gitignore: add demos/checkerboard and demos/quad2quad
+
+commit 2d431b53d3cdbf1997e2d3b8e17408c12220c3a1
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Apr 27 14:12:56 2012 -0400
+
+    mmx: Use wpackhus in src_x888_0565 on iwMMXt
+    
+    iwMMXt which has an unsigned saturation pack instruction, while MMX/EXT
+    and Loongson don't.
+    
+    ARM/iwMMXt:
+    src_8888_0565 =  L1: 110.38  L2:  82.33  M: 40.92 ( 73.22%)  HT: 35.63  VT: 32.22  R: 30.07  RT: 18.40 ( 132Kops/s)
+    src_8888_0565 =  L1: 117.91  L2:  83.05  M: 41.52 ( 75.58%)  HT: 37.63  VT: 35.40  R: 29.37  RT: 19.39 ( 134Kops/s)
+
+commit 2ddd1c498b723e8e48a38eef01d5befba30b5259
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu Apr 19 17:33:27 2012 -0400
+
+    mmx: add src_8888_0565
+    
+    Uses the pmadd technique described in
+    http://software.intel.com/sites/landingpage/legacy/mmx/MMX_App_24-16_Bit_Conversion.pdf
+    
+    The technique uses the packssdw instruction which uses signed
+    saturatation. This works in their example because they pack 888 to 555
+    leaving the high bit as zero. For packing to 565, it is unsuitable, so
+    we replace it with an or+shuffle.
+    
+    Loongson:
+    src_8888_0565 =  L1: 106.13  L2:  83.57  M: 33.46 ( 68.90%)  HT: 30.29  VT: 27.67  R: 26.11  RT: 15.06 ( 135Kops/s)
+    src_8888_0565 =  L1: 122.10  L2: 117.53  M: 37.97 ( 78.58%)  HT: 33.14  VT: 30.09  R: 29.01  RT: 15.76 ( 139Kops/s)
+    
+    ARM/iwMMXt:
+    src_8888_0565 =  L1:  67.88  L2:  56.61  M: 31.20 ( 56.74%)  HT: 29.22  VT: 27.01  R: 25.39  RT: 19.29 ( 130Kops/s)
+    src_8888_0565 =  L1: 110.38  L2:  82.33  M: 40.92 ( 73.22%)  HT: 35.63  VT: 32.22  R: 30.07  RT: 18.40 ( 132Kops/s)
+
+commit 3e8fe65a0893fcd82bdea205de49f53be32bb074
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Apr 18 16:24:28 2012 -0400
+
+    mmx: add x8f8g8b8 fetcher
+    
+    Loongson:
+       add_x888_x888 =  L1:  29.36  L2:  27.81  M: 14.05 ( 38.74%)  HT: 12.45  VT: 11.78  R: 11.52  RT:  7.23 (  75Kops/s)
+       add_x888_x888 =  L1:  36.06  L2:  34.55  M: 14.81 ( 41.03%)  HT: 14.01  VT: 13.41  R: 13.06  RT:  9.06 (  90Kops/s)
+    
+     src_x888_8_x888 =  L1:  21.92  L2:  20.15  M: 13.35 ( 41.42%)  HT: 11.70  VT: 10.95  R: 10.53  RT:  6.18 (  65Kops/s)
+     src_x888_8_x888 =  L1:  25.43  L2:  23.51  M: 14.12 ( 44.00%)  HT: 13.14  VT: 12.50  R: 11.86  RT:  7.49 (  76Kops/s)
+    
+    over_x888_8_0565 =  L1:  10.64  L2:  10.17  M:  7.74 ( 21.35%)  HT:  6.83  VT:  6.55  R:  6.34  RT:  4.03 (  46Kops/s)
+    over_x888_8_0565 =  L1:  11.41  L2:  10.97  M:  8.07 ( 22.36%)  HT:  7.42  VT:  7.18  R:  6.92  RT:  4.62 (  52Kops/s)
+    
+    ARM/iwMMXt:
+       add_x888_x888 =  L1:  22.10  L2:  18.93  M: 13.48 ( 32.29%)  HT: 11.32  VT: 10.64  R: 10.36  RT:  6.51 (  61Kops/s)
+       add_x888_x888 =  L1:  24.26  L2:  20.83  M: 14.52 ( 35.64%)  HT: 12.66  VT: 12.98  R: 11.34  RT:  7.69 (  72Kops/s)
+    
+     src_x888_8_x888 =  L1:  19.33  L2:  17.66  M: 14.26 ( 38.43%)  HT: 11.53  VT: 10.83  R: 10.57  RT:  6.12 (  58Kops/s)
+     src_x888_8_x888 =  L1:  21.23  L2:  19.60  M: 15.41 ( 42.55%)  HT: 12.66  VT: 13.30  R: 11.55  RT:  7.32 (  67Kops/s)
+    
+    over_x888_8_0565 =  L1:   8.15  L2:   7.56  M:  6.50 ( 15.58%)  HT:  5.73  VT:  5.49  R:  5.50  RT:  3.53 (  38Kops/s)
+    over_x888_8_0565 =  L1:   8.35  L2:   7.85  M:  6.68 ( 16.40%)  HT:  6.12  VT:  5.97  R:  5.78  RT:  4.03 (  43Kops/s)
+
+commit c2b1630d9603f80c2636e8a8bfebca87707d4235
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Apr 18 16:14:08 2012 -0400
+
+    mmx: add a8 fetcher
+    
+    oprofile of xfce4-terminal-a1
+    210535    9.0407  libpixman-1.so.0.25.3    fetch_scanline_a8
+    144802    6.0054  libpixman-1.so.0.25.3    mmx_fetch_a8
+    
+    Loongson:
+           add_8_8_8 =  L1:  17.98  L2:  17.28  M: 14.28 ( 19.79%)  HT: 11.11  VT: 10.38  R:  9.97  RT:  5.14 (  55Kops/s)
+           add_8_8_8 =  L1:  20.44  L2:  19.65  M: 15.62 ( 21.53%)  HT: 12.86  VT: 11.98  R: 11.32  RT:  6.13 (  64Kops/s)
+    
+     src_8888_8_0565 =  L1:  19.97  L2:  18.59  M: 13.42 ( 32.55%)  HT: 11.46  VT: 10.78  R: 10.33  RT:  5.87 (  61Kops/s)
+     src_8888_8_0565 =  L1:  21.16  L2:  19.68  M: 13.94 ( 33.64%)  HT: 12.31  VT: 11.52  R: 11.02  RT:  6.54 (  68Kops/s)
+    
+     src_x888_8_x888 =  L1:  20.54  L2:  18.88  M: 13.07 ( 40.74%)  HT: 11.05  VT: 10.36  R: 10.02  RT:  5.68 (  60Kops/s)
+     src_x888_8_x888 =  L1:  21.92  L2:  20.15  M: 13.35 ( 41.42%)  HT: 11.70  VT: 10.95  R: 10.53  RT:  6.18 (  65Kops/s)
+    
+    over_x888_8_0565 =  L1:  10.32  L2:   9.85  M:  7.63 ( 21.13%)  HT:  6.56  VT:  6.30  R:  6.12  RT:  3.80 (  43Kops/s)
+    over_x888_8_0565 =  L1:  10.64  L2:  10.17  M:  7.74 ( 21.35%)  HT:  6.83  VT:  6.55  R:  6.34  RT:  4.03 (  46Kops/s)
+    
+    ARM/iwMMXt:
+           add_8_8_8 =  L1:  13.10  L2:  11.67  M: 10.74 ( 13.46%)  HT:  8.62  VT:  8.15  R:  7.94  RT:  4.39 (  44Kops/s)
+           add_8_8_8 =  L1:  13.81  L2:  12.79  M: 11.63 ( 13.93%)  HT:  9.33  VT:  9.20  R:  9.04  RT:  5.43 (  52Kops/s)
+    
+     src_8888_8_0565 =  L1:  16.62  L2:  15.07  M: 12.52 ( 27.46%)  HT: 10.07  VT: 10.17  R:  9.95  RT:  5.64 (  54Kops/s)
+     src_8888_8_0565 =  L1:  16.84  L2:  16.11  M: 13.22 ( 27.71%)  HT: 11.74  VT: 10.90  R: 10.80  RT:  6.66 (  62Kops/s)
+    
+     src_x888_8_x888 =  L1:  17.49  L2:  16.22  M: 13.73 ( 38.73%)  HT: 10.10  VT: 10.33  R:  9.55  RT:  5.21 (  52Kops/s)
+     src_x888_8_x888 =  L1:  19.33  L2:  17.66  M: 14.26 ( 38.43%)  HT: 11.53  VT: 10.83  R: 10.57  RT:  6.12 (  58Kops/s)
+    
+    over_x888_8_0565 =  L1:   7.57  L2:   7.29  M:  6.37 ( 15.97%)  HT:  5.53  VT:  5.33  R:  5.21  RT:  3.22 (  35Kops/s)
+    over_x888_8_0565 =  L1:   8.15  L2:   7.56  M:  6.50 ( 15.58%)  HT:  5.73  VT:  5.49  R:  5.50  RT:  3.53 (  38Kops/s)
+
+commit 20bad64d9a7ff5c2662f12a87f66fcf77c1f3f2c
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Apr 18 16:08:57 2012 -0400
+
+    mmx: add r5g6b5 fetcher
+    
+    Loongson:
+    add_0565_0565 =  L1:  12.73  L2:  12.26  M: 10.05 ( 13.87%)  HT:  8.77  VT:  8.50  R:  8.25  RT:  5.28 (  58Kops/s)
+    add_0565_0565 =  L1:  14.04  L2:  13.63  M: 10.96 ( 15.19%)  HT:  9.73  VT:  9.43  R:  9.11  RT:  5.93 (  64Kops/s)
+    
+    ARM/iwMMXt:
+    add_0565_0565 =  L1:  10.36  L2:  10.03  M:  9.04 ( 10.88%)  HT:  3.11  VT:  7.16  R:  7.72  RT:  5.12 (  51Kops/s)
+    add_0565_0565 =  L1:  10.84  L2:  10.20  M:  9.15 ( 11.46%)  HT:  7.60  VT:  7.82  R:  7.70  RT:  5.41 (  53Kops/s)
+
+commit c136e535adf33069cbf229b8773934d78099af85
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Tue Apr 17 12:16:55 2012 -0400
+
+    mmx: Use Loongson pextrh instruction in expand565
+    
+    Same story as pinsrh in the previous commit.
+    
+     text	data	bss	dec	hex filename
+    25336	1952	  0   27288    6a98 .libs/libpixman_loongson_mmi_la-pixman-mmx.o
+    25072	1952	  0   27024    6990 .libs/libpixman_loongson_mmi_la-pixman-mmx.o
+    
+    -dsll: 95
+    +dsll: 70
+    -dsrl: 135
+    +dsrl: 105
+    -ldc1: 462
+    +ldc1: 445
+    -lw: 721
+    +lw: 700
+    +pextrh: 30
+
+commit facceb4a1fbba476ad98e76d15868bf7eecd3a30
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Tue Apr 17 11:28:33 2012 -0400
+
+    mmx: Use Loongson pinsrh instruction in pack_565
+    
+    The pinsrh instruction is analogous to MMX EXT's pinsrw, except like
+    other Loongson vector instructions it cannot access the general purpose
+    registers. In the cases of other Loongson vector instructions, this is a
+    headache, but it is actually a good thing here. Since the instruction is
+    different from MMX, I've named the intrinsic loongson_insert_pi16.
+    
+     text	data	bss	dec	 hex filename
+    25976	1952	  0   27928	6d18 .libs/libpixman_loongson_mmi_la-pixman-mmx.o
+    25336	1952	  0   27288	6a98 .libs/libpixman_loongson_mmi_la-pixman-mmx.o
+    
+    -and: 181
+    +and: 147
+    -dsll: 143
+    +dsll: 95
+    -dsrl: 87
+    +dsrl: 135
+    -ldc1: 523
+    +ldc1: 462
+    -lw: 767
+    +lw: 721
+    +pinsrh: 35
+
+commit 6d29b7d7557ccb657054e867f4e27f4aa89cb25e
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Feb 24 15:23:09 2012 -0500
+
+    mmx: don't pack and unpack src unnecessarily
+    
+    The combine function was store8888'ing the result, and all consumers
+    were immediately load8888'ing it, causing lots of unnecessary pack and
+    unpack instructions.
+    
+    It's a very straight forward conversion, except for mmx_combine_over_u
+    and mmx_combine_saturate_u. mmx_combine_over_u was testing the integer
+    result to skip pixels, so we use the is_* functions to test the __m64
+    data directly without loading it into an integer register.
+    
+    For mmx_combine_saturate_u there's not a lot we can do, since it uses
+    DIV_UN8.
+
+commit ee750034252fb8f44c871e84a5057bc114699ae7
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Feb 24 17:39:39 2012 -0500
+
+    mmx: introduce is_equal, is_opaque, and is_zero functions
+    
+    To be used by the next commit.
+
+commit 10c77b339f40fc027b682ef16edec234508d327b
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu Feb 23 16:25:11 2012 -0500
+
+    mmx: simplify srcsrcsrcsrc calculation in over_n_8_0565
+
+commit e06947d1010ffec4903493df4979119b0ac080d3
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu Feb 23 16:15:56 2012 -0500
+
+    mmx: remove unnecessary uint64_t<->__m64 conversions
+    
+    Loongson:
+    add_8888_8888 =  L1:  68.73  L2:  55.09  M: 25.39 ( 68.18%)  HT: 25.28 VT: 22.42  R: 20.74  RT: 13.26 ( 131Kops/s)
+    add_8888_8888 =  L1: 159.19  L2: 114.10  M: 30.74 ( 77.91%)  HT: 27.63 VT: 24.99  R: 24.61  RT: 14.49 ( 141Kops/s)
+
+commit c78e986085b3993f1b4355151820228c53d54cad
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Feb 24 12:43:43 2012 -0500
+
+    mmx: compile on MIPS for Loongson MMI optimizations
+    
+                                 image               image16
+               evolution   32.985 ->  29.667    27.314 ->  23.870
+    firefox-planet-gnome  197.982 -> 180.437   220.986 -> 205.057
+    gnome-system-monitor   48.482 ->  49.752    52.820 ->  49.528
+      gnome-terminal-vim   60.799 ->  50.528    51.655 ->  44.131
+          grads-heat-map    3.167 ->   3.181     3.328 ->   3.321
+                    gvim   38.646 ->  32.552    38.126 ->  34.453
+           midori-zoomed   44.371 ->  43.338    28.860 ->  28.865
+               ocitysmap   23.065 ->  18.057    23.046 ->  18.055
+                 poppler   43.676 ->  36.077    43.065 ->  36.090
+      swfdec-giant-steps   20.166 ->  20.365    22.354 ->  16.578
+          swfdec-youtube   31.502 ->  28.118    44.052 ->  41.771
+       xfce4-terminal-a1   69.517 ->  51.288    62.225 ->  53.309
+
+commit 4e0c7902b2c8e517d102a8fccb9cf7da9725f59f
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Feb 15 01:19:07 2012 -0500
+
+    mmx: make ldq_u take __m64* directly
+    
+    Before, if __m64 is allocated in vector or floating-point registers,
+    
+    	__m64 vs = ldq_u((uint64_t *)src);
+    
+    would cause src to be loaded into an integer register and then
+    transferred to an __m64 register. By switching ldq_u's argument type to
+    __m64 we give the compile enough information to recognize that it can
+    load to the vector register directly.
+    
+    This patch is necessary for the Loongson optimizations when __m64 is
+    typedef'd as double.
+
+commit 2e54b76a2d2203b6a70190f488d76d6d409e879a
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Feb 24 12:34:41 2012 -0500
+
+    mmx: add load function and use it in add_8888_8888
+
+commit 084e3f2f4be900041cc35830359606addc1fc3be
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Feb 24 12:32:03 2012 -0500
+
+    mmx: add store function and use it in add_8888_8888
+
+commit e24c1c849d29f43dc6e50e1f15102709059b40f8
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Apr 5 00:52:21 2012 -0400
+
+    bits_image_fetch_pixel_convolution(): Make sure channels are signed
+    
+    In the computation:
+    
+        srtot += RED_8 (pixel) * f
+    
+    RED_8 (pixel) is an unsigned quantity, which means the signed filter
+    coefficient f gets converted to an unsigned integer before the
+    multiplication. We get away with this because when the 32 bit unsigned
+    result is converted to int32_t, the correct sign is produced. But if
+    srtot had been an int64_t, the result would have been a very large
+    positive number.
+    
+    Fix this by explicitly casting the channels to int.
+
+commit 4d2fee14063b960c6b81b55dd3aa94b956d23eeb
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Apr 5 00:42:55 2012 -0400
+
+    test/utils.c: Clip values to the [0, 255] interval
+    
+    Unpremultiplying a superluminescent pixel can result in values greater
+    than 255.
+
+commit e2917645846b64fdc7f2190806c97b0ef4b0fd5b
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Apr 18 18:14:13 2012 -0400
+
+    configure.ac: fix iwMMXt/gcc version error message
+
+commit b87cd1f6059789cb154677d8432045a5ca8e16c1
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Apr 15 14:03:08 2012 -0400
+
+    mmx: fix _mm_shuffle_pi16 function when compiling without optimization
+    
+    The last argument must be an immediate value, and when compiling without
+    optimization the compiler might not recognize this. So use a macro if
+    not optimizing.
+
+commit e927d2397141f80aecd2702ce5f38349c41aebe5
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Apr 15 14:00:17 2012 -0400
+
+    configure.ac: require >= gcc-4.5 for ARM iwMMXt
+    
+    We're using a patched gcc-4.5, and having to modify configure.ac and
+    autoreconf between changes is annoying. And besides, 4.5, 4.6, and 4.7's
+    iwMMXt intrinsic support is equally broken, and we test a known broken
+    intrinsic in the configure test program, so the version check is rather
+    meaningless.
+
+commit 0531170436a2a10a995c7487b396f1378affdb98
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu Apr 5 17:36:05 2012 -0400
+
+    mmx: Use force_inline instead of __inline__ (bug 46906)
+    
+    Fixes the build on MSVC.
+
+commit b950bb12dc2baaee441b875bd81b67e48947d2f6
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu Mar 15 19:16:20 2012 -0400
+
+    mmx: enable over_n_0565 for b5g6r5
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 87ecec8d72be4106358e843a1e7a907b0e814f7f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Apr 2 15:16:18 2012 -0400
+
+    gtk-utils.c: In pixbuf_from_argb32() use a8r8g8b8_to_rgba_np()
+    
+    Instead of inlining a copy of that functionality.
+
+commit d1ec1467f607c21a4d8b445eab5465ca60a12a97
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Apr 2 15:09:16 2012 -0400
+
+    test/utils.c: Rename and export the pngify_pixels() function.
+    
+    This function converts from a8r8g8b8 to non-premultiplied RGBA (the
+    PNG or GdkPixbuf format that has the channels in this order: R, G, B,
+    A in memory regardless of the computer's endianness). The function's
+    new name is a8r8g8b8_to_rgba_np().
+
+commit b16ddf17829633ec6eb54656924b7e841c6c69a4
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Apr 2 14:59:02 2012 -0400
+
+    gtk-utils.c: Don't include pixman-private.h
+    
+    Use pixman_image_get_format() instead of image->bits.format.
+
+commit b9ca23a9c711280a706eb1df30a0cfaf3b2d8e27
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Mar 25 12:14:54 2012 -0400
+
+    Rename fast_composite_add_1000_1000 to _add_1_1()
+    
+    The 1000_1000 name is a relic from before the refactoring.
+
+commit 746291a19ed29e2da6de57b382a1dfaa900d067b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Jan 16 06:46:52 2011 -0500
+
+    Add the original parrot image.
+    
+    This is the Parrot image that was downscaled and cropped before being
+    used in the composite-test.c demo.
+
+commit 451b25ae90ea077a78d2606ce107b826043a252b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Oct 6 06:06:59 2010 -0400
+
+    composite-test.c: Add a parrot image
+    
+    Instead of the yellow square, use a parrot as the source image. This
+    demonstrates the various blend modes much better.
+    
+    The parrot is a cropped version of finger painting by Rubens LP:
+    
+        http://www.flickr.com/photos/dorubens/4030604504/in/set-72157622586088192/
+    
+    where the background has been removed. Used here under Creative
+    Commons Attribution. The artist's web site:
+    
+         http://www.rubenslp.com.br/
+
+commit 3aa45d62e45f40faa98f9bb47046578bf19e7574
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Oct 6 03:56:55 2010 -0400
+
+    composite-test.c: Use similar gradient to the one in the PDF spec.
+
+commit e1b8969e78eecf9abaaf2b317c10fddf64b02799
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Oct 12 04:49:27 2011 -0400
+
+    demos: Add checkerboard demo
+    
+    This is a simple demo that displays a checkboard with a projective
+    transformation.
+
+commit 41863fbabb6dd08871aed47beba5d08ae2ae3cf6
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Oct 12 04:48:33 2011 -0400
+
+    demos: Add quad2quad program
+    
+    This program can compute the projective transformation that transforms
+    one quadrilateral into another. The code is basically maxima[1] output
+    translated into C.
+    
+    [1] http://maxima.sourceforge.net/
+
+commit cf0d0d63645bcb6425a1e2d7b5d9f1e26e205247
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Mar 14 17:11:14 2012 -0400
+
+    Use "=a" and "=d" constraints for rdtsc inline assembly
+    
+    In 32 bit mode the "=A" constraint refers to the register pair
+    edx:eax, but according to GCC developers this is not the case in 64
+    bit mode, where it refers to "rax".
+    
+    Hence, using "=A" for rdtsc is incorrect in 64 bit mode.
+    
+    See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21249
+
+commit 8a8aabf05c8e6c7b68b68c80e4e73877fd35ce78
+Author: Jeremy Huddleston <jeremyhu@apple.com>
+Date:   Fri Mar 16 11:37:23 2012 -0700
+
+    configure.ac: Fix a copy-paste-o in TLS detection
+    
+    Regression from: a069da6c66da407cc52e1e92321d69c68fd6beb5
+    
+    Signed-off-by: Jeremy Huddleston <jeremyhu@apple.com>
+    Tested-by: Matt Turner <mattst88@gmail.com>
+
+commit ee6bac11c28b350c183f190b6c648c316ee1109d
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Mar 14 16:48:00 2012 -0400
+
+    Use AC_LANG_SOURCE for DSPr2 configure program
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 21eeecffa93ba5912487d88851b1a5c60fc37768
+Author: Chun-wei Fan <fanchunwei@src.gnome.org>
+Date:   Fri Mar 9 15:54:06 2012 +0800
+
+    Just include xmmintrin.h on MSVC as well
+    
+    The xmmintrin.h as shipped with recent Visual C++ (2003+) provides
+    _mm_shuffle_pi16 and _mm_mulhi_pu16, so including that header
+    will do for using these functions, and MSVC does not like the GCC-specific
+    implementations of _mm_shuffle_pi16 and _mm_mulhi_pu16 that is
+    currently in the code.
+    
+    _MM_SHUFFLE is declared in the same way in MSVC's xmmintrin.h, so don't
+    re-define it here to avoid a compilation warning.
+
+commit 94aea2e868ae02235785f31b275f89b9661bca0e
+Author: Jeremy Huddleston <jeremyhu@apple.com>
+Date:   Wed Mar 14 10:26:18 2012 -0700
+
+    Fix a false-negative in MMX check
+    
+    Silence warnings that could make -Werror give a false negative
+    Use signed char to avoid cases where int8_t isn't declared
+    
+    Reported-by: Mike Lothian <mike@fireburn.co.uk>
+    Tested-by: Mike Lothian <mike@fireburn.co.uk>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Signed-off-by: Jeremy Huddleston <jeremyhu@apple.com>
+
+commit d2ee5631ae42d031289ae80352e02bafa3f06ed4
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Sun Mar 11 18:52:25 2012 +0100
+
+    MIPS: DSPr2: Added over_n_8888_8888_ca and over_n_8888_0565_ca fast paths.
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz
+    
+    Referent (before):
+    
+    lowlevel-blt-bench:
+         over_n_8888_8888_ca =  L1:   8.32  L2:   7.65  M:  6.38 ( 51.08%)  HT:  5.78  VT:  5.74  R:  5.84  RT:  4.39 (  37Kops/s)
+         over_n_8888_0565_ca =  L1:   7.40  L2:   6.95  M:  6.16 ( 41.06%)  HT:  5.72  VT:  5.52  R:  5.63  RT:  4.28 (  36Kops/s)
+    cairo-perf-trace:
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [ # ]    image: pixman 0.25.3
+    [  0]    image            xfce4-terminal-a1  138.223  139.070   0.33%    6/6
+    [ # ]  image16: pixman 0.25.3
+    [  0]  image16            xfce4-terminal-a1  132.763  132.939   0.06%    5/6
+    
+    Optimized:
+    
+    lowlevel-blt-bench:
+         over_n_8888_8888_ca =  L1:  19.35  L2:  23.84  M: 13.68 (109.39%)  HT: 11.39  VT: 11.19  R: 11.27  RT:  6.90 (  47Kops/s)
+         over_n_8888_0565_ca =  L1:  18.68  L2:  17.00  M: 12.56 ( 83.70%)  HT: 10.72  VT: 10.45  R: 10.43  RT:  5.79 (  43Kops/s)
+    cairo-perf-trace:
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [ # ]    image: pixman 0.25.3
+    [  0]    image            xfce4-terminal-a1  130.400  131.720   0.46%    6/6
+    [ # ]  image16: pixman 0.25.3
+    [  0]  image16            xfce4-terminal-a1  125.830  126.604   0.34%    6/6
+
+commit a069da6c66da407cc52e1e92321d69c68fd6beb5
+Author: Jeremy Huddleston <jeremyhu@apple.com>
+Date:   Thu Mar 8 09:41:34 2012 -0800
+
+    Expand TLS support beyond __thread to __declspec(thread)
+    
+    This code was pretty much coppied from a similar commit that I made to
+    xorg-server in April.
+    
+    cf: xorg/xserver: bb4d145bd25e2aee988b100ecf1105ea3b6a40b8
+    
+    Signed-off-by: Jeremy Huddleston <jeremyhu@apple.com>
+
+commit 61d999b9101c76bd463101923d2143e31857e7f8
+Author: Jeremy Huddleston <jeremyhu@apple.com>
+Date:   Thu Mar 8 09:41:32 2012 -0800
+
+    Disable MMX when incompatible clang is being used.
+    
+    Signed-off-by: Jeremy Huddleston <jeremyhu@apple.com>
+
+commit ad4b6922f23e91b291c794b9fde5ee81941adb64
+Author: Jeremy Huddleston <jeremyhu@apple.com>
+Date:   Thu Mar 8 09:41:33 2012 -0800
+
+    Silence a warning about unused pixman_have_mmx
+    
+    Signed-off-by: Jeremy Huddleston <jeremyhu@apple.com>
+
+commit bb5ff2687815eff20757612de965663ef3a2525b
+Author: Jeremy Huddleston <jeremyhu@apple.com>
+Date:   Thu Mar 8 09:41:31 2012 -0800
+
+    Revert "Disable MMX when Clang is being used."
+    
+    This reverts commit 5eb4c12a79b3017ec6cc22ab756f53f225731533.
+
+commit a6ad5120f7bd4add3b2e9c03c9fd769d5bbfd191
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Mar 8 10:11:20 2012 -0500
+
+    Post-release version bump to 0.25.3
+
+commit f73f7985318bf0e7446941d9bea9a94b35580342
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Mar 8 09:33:16 2012 -0500
+
+    Pre-release version bump to 0.25.2
+
+commit 62df04eb257d16fbb4449855a48f6fdaf567e201
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Mar 8 09:29:46 2012 -0500
+
+    mmx: Squash a warning by making the argument to ldl_u() const
+
+commit 85943733cbd7b62991ee962aa22f28bc5d1be353
+Author: Alan Coopersmith <alan.coopersmith@oracle.com>
+Date:   Fri Feb 24 18:02:56 2012 -0800
+
+    Just use xmmintrin.h when building with Solaris Studio compilers
+    
+    Since the Solaris Studio compilers don't have a mode where MMX
+    instructions are available and SSE instructions are not, we can
+    just use the <xmmintrin.h> header directly.
+    
+    Fixes build failure due to Studio not supporting the __gnu_inline__
+    or __artificial__ attributes.
+    
+    Signed-off-by: Alan Coopersmith <alan.coopersmith@oracle.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit 304f57644ac6a991c6e538675de935356252c0a5
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Wed Feb 29 12:04:33 2012 +0100
+
+    MIPS: DSPr2: Added mips_dspr2_blt and mips_dspr2_fill routines.
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz
+    
+    Referent (before):
+    
+    lowlevel-blt-bench:
+                  src_n_0565 =  L1: 238.14  L2: 233.15  M: 57.88 ( 77.23%)  HT: 53.22  VT: 49.99  R: 47.73  RT: 24.79 (  91Kops/s)
+                  src_n_8888 =  L1: 190.19  L2: 187.57  M: 28.94 ( 77.23%)  HT: 27.91  VT: 27.33  R: 26.64  RT: 14.68 (  77Kops/s)
+    cairo-perf-trace:
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [ # ]    image: pixman 0.25.1
+    [  0]    image         gnome-system-monitor  268.460  269.712   0.22%    6/6
+    
+    Optimized:
+    
+    lowlevel-blt-bench:
+                  src_n_0565 =  L1:1081.39  L2: 258.22  M:189.59 (252.91%)  HT: 60.23  VT: 55.01  R: 53.44  RT: 23.68 (  89Kops/s)
+                  src_n_8888 =  L1: 653.46  L2: 113.55  M:135.26 (360.86%)  HT: 38.99  VT: 37.38  R: 34.95  RT: 18.67 (  84Kops/s)
+    cairo-perf-trace:
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [ # ]    image: pixman 0.25.1
+    [  0]    image         gnome-system-monitor  246.565  246.706   0.04%    6/6
+
+commit 999e72b80bd5e3fab5f45b6ad19511389b58d9ab
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Mar 1 02:24:54 2012 -0500
+
+    pixman-access.c: Remove some unused macros
+    
+    The macros related to palette entries:
+    
+    RGB15_TO_ENTRY,
+    RGB24_TO_ENTRY,
+    RGB24_TO_ENTRY_Y
+    
+    are not used anywhere.
+
+commit c0cb48aae0d09200a187965094138fbf488498cd
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Feb 29 04:44:46 2012 -0500
+
+    pixman-accessors.h: Delete unused macros
+    
+    The MEMCPY_WRAPPED and ACCESS macros are not used anymore.
+
+commit 5adf569317f923cd5eb547209a8d927be0d81049
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Feb 26 17:35:20 2012 -0500
+
+    Move fetching for solid bits images to pixman-noop.c
+    
+    This should be a bit faster because it can reuse the scanline on each iteration.
+
+commit 3c3c70fa0b524569df0ec20c50d481626e518462
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Feb 24 20:11:11 2012 -0500
+
+    lowlevel-blt-bench: add in_8_8 and in_n_8_8
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit fcea053561893d116a79f41a113993f1f61b58cf
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Jan 26 13:16:09 2011 -0500
+
+    Disable implementations mentioned in the PIXMAN_DISABLE environment variable.
+    
+    With this, it becomes possible to do
+    
+         PIXMAN_DISABLE="sse2 mmx" some_app
+    
+    which will run some_app without SSE2 and MMX enabled. This is useful
+    for benchmarking, testing and narrowing down bugs.
+    
+    The current list of implementations that can be disabled:
+    
+        fast
+        mmx
+        sse2
+        arm-simd
+        arm-iwmmxt
+        arm-neon
+        mips-dspr2
+        vmx
+    
+    The general and noop implementations can't be disabled because pixman
+    depends on those being available for correct operation.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit e7574d336b7c812a888fac22f99f1b0e9a3518b0
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Wed Feb 22 14:23:48 2012 +0100
+
+    MIPS: DSPr2: Added fast-paths for SRC operation.
+    
+    Following fast-path functions are implemented (routines 4, 5 and 6 utilize
+    same fast-memcpy routine):
+        1. src_x888_8888
+        2. src_8888_0565
+        3. src_0565_8888
+        4. src_0565_0565
+        5. src_8888_8888
+        6. src_0888_0888
+    
+    Performance numbers before/after on MIPS-74kc @ 1GHz
+    
+    Referent (before):
+    
+    lowlevel-blt-bench:
+            src_x888_8888 =  L1: 199.35  L2:  96.54  M: 18.87 (100.68%)  HT: 17.12  VT: 16.24  R: 15.43  RT:  9.33 (  61Kops/s)
+            src_8888_0565 =  L1:  71.22  L2:  51.95  M: 24.19 ( 96.17%)  HT: 20.71  VT: 19.92  R: 18.15  RT:  9.92 (  63Kops/s)
+            src_0565_8888 =  L1:  38.82  L2:  36.22  M: 18.60 ( 73.95%)  HT: 14.47  VT: 13.19  R: 12.97  RT:  6.61 (  49Kops/s)
+            src_0565_0565 =  L1: 286.05  L2: 155.02  M: 37.68 (100.54%)  HT: 31.08  VT: 28.07  R: 26.26  RT: 11.93 (  68Kops/s)
+            src_8888_8888 =  L1: 454.32  L2: 139.15  M: 19.30 (102.98%)  HT: 17.73  VT: 16.08  R: 16.62  RT: 10.45 (  64Kops/s)
+            src_0888_0888 =  L1: 190.47  L2: 106.14  M: 25.26 (101.08%)  HT: 21.88  VT: 20.32  R: 18.83  RT: 10.10 (  63Kops/s)
+    cairo-perf-trace:
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [ # ]    image: pixman 0.25.1
+    [  0]    image            firefox-asteroids  421.215  421.325   0.01%    4/6
+    [  1]    image         firefox-planet-gnome  647.708  648.486   0.13%    6/6
+    [  2]    image         gnome-system-monitor  276.073  277.506   0.38%    6/6
+    [  3]    image           gnome-terminal-vim  263.866  265.229   0.39%    6/6
+    [  4]    image                      poppler  123.576  124.003   0.15%    6/6
+    
+    Optimized (with these optimizations):
+    
+    lowlevel-blt-bench:
+            src_x888_8888 =  L1: 369.50  L2:  99.37  M: 27.19 (145.07%)  HT: 20.24  VT: 19.48  R: 19.00  RT: 10.22 (  63Kops/s)
+            src_8888_0565 =  L1: 105.65  L2:  67.87  M: 25.41 (101.00%)  HT: 20.78  VT: 19.84  R: 18.52  RT:  9.81 (  63Kops/s)
+            src_0565_8888 =  L1:  77.10  L2:  63.04  M: 23.37 ( 92.90%)  HT: 20.29  VT: 19.37  R: 18.14  RT: 10.02 (  63Kops/s)
+            src_0565_0565 =  L1: 519.02  L2: 241.32  M: 62.35 (166.34%)  HT: 33.74  VT: 27.63  R: 26.12  RT: 11.70 (  67Kops/s)
+            src_8888_8888 =  L1: 390.48  L2: 113.99  M: 30.32 (161.77%)  HT: 19.55  VT: 17.05  R: 17.13  RT: 10.19 (  63Kops/s)
+            src_0888_0888 =  L1: 349.74  L2: 156.68  M: 40.68 (162.78%)  HT: 25.58  VT: 20.57  R: 20.20  RT:  9.96 (  63Kops/s)
+    cairo-perf-trace:
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [ # ]    image: pixman 0.25.1
+    [  0]    image            firefox-asteroids  400.050  400.308   0.04%    6/6
+    [  1]    image         firefox-planet-gnome  628.978  629.364   0.07%    6/6
+    [  2]    image         gnome-system-monitor  270.247  270.313   0.03%    6/6
+    [  3]    image           gnome-terminal-vim  256.413  257.641   0.21%    6/6
+    [  4]    image                      poppler  119.540  120.023   0.21%    6/6
+
+commit 1364c91bd106f8b67c9cd1bda2fdd9d46ac40363
+Author: Nemanja Lukic <nemanja.lukic@rt-rk.com>
+Date:   Wed Feb 22 14:23:47 2012 +0100
+
+    MIPS: DSPr2: Basic infrastructure for MIPS architecture
+    
+    MIPS DSP instruction set extensions
+
+commit e43d65d49da2c3e929cf20e82a2f7ed1fa0d0167
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Feb 24 20:02:55 2012 -0500
+
+    lowlevel-blt: add over_x888_n_8888
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 9f607049956c6858706c7ca45829c5ad19f18191
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Feb 24 19:58:09 2012 -0500
+
+    lowlevel-blt: add over_8888_8888
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 5eb4c12a79b3017ec6cc22ab756f53f225731533
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Feb 23 18:36:04 2012 -0500
+
+    Disable MMX when Clang is being used.
+    
+    There are several issues with the Clang compiler and pixman-mmx.c:
+    
+    - When not optimizing, it doesn't seem to recognize that an argument
+      to an __always_inline__ function is compile-time constant. This
+      results in this error being produced:
+    
+          fatal error: error in backend: Invalid operand for inline asm
+                  constraint 'K'!
+    
+    - This inline assembly:
+    
+          asm ("pmulhuw %1, %0\n\t"
+              : "+y" (__A)
+              : "y" (__B)
+          );
+    
+      results in
+    
+          fatal error: error in backend: Unsupported asm: input constraint
+                  with a matching output constraint of incompatible type!
+    
+    So disable MMX when the compiler is Clang.
+
+commit 350e231b3f01d6f82a2fdc7d9a9945234c404d0a
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Tue Feb 21 23:33:02 2012 -0500
+
+    mmx: make load8888 take a pointer to data instead of the data itself
+    
+    Allows us to tune how we load data into the vector registers.
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+    
+    And squashed in:
+    
+    mmx: define and use load8888u function
+    
+    For unaligned loads.
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit ab68316eda91bbf6bb41158c622347723e1fa8c4
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Tue Feb 21 19:29:59 2012 -0500
+
+    mmx: make store8888 take uint32_t *dest as argument
+    
+    Allows us to tune how we store data from the vector registers.
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 57a245a6e00987191faad9a34bef9f4524a6848c
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Feb 22 16:32:21 2012 -0500
+
+    Update .gitignore with more demos and tests
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 51ae3f2d7f25daebbc767f161f0097b581d1554b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Feb 21 19:30:04 2012 -0500
+
+    mmx: Delete unused function in_over_full_src_alpha()
+    
+    Also a few minor formatting fixes.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit bbd1e6941b39adcdb64c77670889314fa8461c0b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Feb 21 19:23:33 2012 -0500
+
+    mmx: Enable over_x888_8_8888() for x86 as well
+    
+    It used to be slower than the generic code (with the gcc that was
+    current in 2007), but that doesn't seem to be the case anymore:
+    
+    over_x888_8_8888 =  L1:  22.97  L2:  22.88  M: 22.27 (  5.29%)  HT: 18.30  VT: 15.81  R: 15.54  RT: 10.35 ( 131Kops/s)
+    over_x888_8_8888 =  L1:  53.56  L2:  53.20  M: 50.50 ( 11.99%)  HT: 38.60  VT: 31.19  R: 29.00  RT: 17.37 ( 208Kops/s)
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 4fc586c3df9a53cc1406891e751a6eed3d7da400
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Tue Feb 21 16:28:37 2012 -0500
+
+    mmx: fix typo in pix_add_mul on MSVC
+    
+    Typo introduced in commit a075a870.
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 84221f4c1687b8ea14e9cbdc78b2ba7258e62c9e
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Feb 19 18:10:03 2012 -0500
+
+    mmx: Use _mm_shuffle_pi16
+    
+    The pshufw x86 instruction is part of Extended 3DNow! and SSE1. The
+    equivalent ARM wshufh instruction was available from the first iwMMXt
+    instrucion set.
+    
+    This instruction is already used in the SSE2 code.
+    
+    Reduces code size by ~9%.
+    
+    amd64
+      text    data     bss     dec     hex filename
+     29925    2240       0   32165    7da5 .libs/libpixman_mmx_la-pixman-mmx.o
+     27237    2240       0   29477    7325 .libs/libpixman_mmx_la-pixman-mmx.o
+    
+    x86
+      text    data     bss     dec     hex filename
+     27677    1792       0   29469    731d .libs/libpixman_mmx_la-pixman-mmx.o
+     24959    1792       0   26751    687f .libs/libpixman_mmx_la-pixman-mmx.o
+    
+    arm
+      text    data     bss     dec     hex filename
+     30176    1792       0   31968    7ce0 .libs/libpixman_iwmmxt_la-pixman-mmx.o
+     27384    1792       0   29176    71f8 .libs/libpixman_iwmmxt_la-pixman-mmx.o
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 14208344964f341a7b4a704b05cf4804c23792e9
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Feb 19 01:32:31 2012 -0500
+
+    mmx: Use _mm_mulhi_pu16
+    
+    The pmulhuw x86 instruction is part of Extended 3DNow! and SSE1. The
+    equivalent ARM wmuluh instruction was available from the first iwMMXt
+    instrucion set.
+    
+    This instruction is already used in the SSE2 code.
+    
+    Reduces code size by ~5%.
+    
+    amd64
+      text    data     bss     dec     hex filename
+     31325    2240       0   33565    831d .libs/libpixman_mmx_la-pixman-mmx.o
+     29925    2240       0   32165    7da5 .libs/libpixman_mmx_la-pixman-mmx.o
+    
+    x86
+      text    data     bss     dec     hex filename
+     29165    1792       0   30957    78ed .libs/libpixman_mmx_la-pixman-mmx.o
+     27677    1792       0   29469    731d .libs/libpixman_mmx_la-pixman-mmx.o
+    
+    arm
+      text    data     bss     dec     hex filename
+     31632    1792       0   33424    8290 .libs/libpixman_iwmmxt_la-pixman-mmx.o
+     30176    1792       0   31968    7ce0 .libs/libpixman_iwmmxt_la-pixman-mmx.o
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 69ed71fad11d541f89eee1238c587a03a9cf59cb
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Tue Feb 21 00:05:45 2012 +0000
+
+    mmx: enable over_x888_8_8888 on ARM/iwMMXt
+    
+    before: over_x888_8_8888 =  L1:   7.63  L2:   7.72  M:  6.44 ( 19.17%)  HT: 6.24  VT:  6.11  R:  5.87  RT:  4.61 (  51Kops/s)
+    after : over_x888_8_8888 =  L1:  11.88  L2:  11.11  M:  8.70 ( 26.01%)  HT: 8.15  VT:  8.07  R:  7.76  RT:  5.62 (  61Kops/s)
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit a14f0f66bba987d5cdcb4a3e0f3e9f7c35d3f6f0
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon Feb 20 18:36:24 2012 -0500
+
+    autoconf: use #error instead of error
+    
+    We'd rather see the actual #error message rather than a syntax error in
+    config.log.
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit fced5c82c2f0d6d00cb8d0a30ce6a04ec196d274
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Feb 17 18:17:49 2012 -0500
+
+    Convert while (w) to if (w) when possible
+    
+    Missed in commit 57fd8c37.
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit e27bdcd968e786079353432d14816600bf813d76
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Feb 15 18:16:42 2012 -0500
+
+    Make sure to run AC_SUBST IWMMXT_CFLAGS
+    
+    Allows you to compile without -flax-vector-conversions in your CFLAGS,
+    though -march=iwmmxt2 is still necessary since specifying some other
+    -march= value will override it, and disable iwmmxt.
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 82a3980701c68949ed762b1e93dc81698db1613e
+Author: Jeremy Huddleston <jeremyhu@apple.com>
+Date:   Sat Feb 11 01:04:13 2012 -0800
+
+    configure.ac: Add an --enable-libpng option
+    
+    Now there is a way to not link against libpng even if it's available.
+    
+    Signed-off-by: Jeremy Huddleston <jeremyhu@apple.com>
+
+commit 46fc4eb234f5c4f281c2901ea7514ff69e8670a8
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sat Feb 11 23:21:45 2012 -0500
+
+    Use AC_LANG_SOURCE for iwMMXt configure program
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit e5555d7a749c90288463ed1c294f58963c607e52
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Jan 25 14:03:05 2012 -0500
+
+    Revert "Reject trapezoids where top (botttom) is above (below) the edges"
+    
+    Cairo 1.10 will sometimes generate trapezoids like this, so we can't
+    consider them invalid. Fixes bug 45009, reported by Michael Biebl.
+    
+    This reverts commit 2437ae80e5066dec9fe52f56b016bf136d7cea06.
+
+commit 35577876978e86783d49c500b4bb7ea1fc7fa89c
+Author: Bobby Salazar <bobby8934@gmail.com>
+Date:   Thu Jan 26 13:19:18 2012 -0500
+
+    iOS Runtime Detection Support For ARM NEON
+    
+    This patch adds runtime detection support for the ARM NEON fast paths
+    for code compiled with the iOS SDK.
+
+commit 86ce1808829e3fa024acb0ebaa93ef9737ba51af
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Dec 19 19:31:25 2011 -0500
+
+    test: Port composite test over to use new pixel_checker_t object.
+    
+    Also make some tweaks to the way the errors are printed.
+
+commit f57034f678b419c3737b888f643e5bdfcaf727f9
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Dec 19 17:31:06 2011 -0500
+
+    test: Add a new "pixel_checker_t" object.
+    
+    Add a new pixel_checker_t object to test/utils.[ch]. This object
+    should be initialized with a format and can then be used to check
+    whether a given "real" pixel in that format is close enough to a
+    "perfect" pixel given as a double precision ARGB struct.
+    
+    The acceptable deviation is calcuated as follows. Each channel of the
+    perfect pixel has 0.004 subtracted from it and is then converted to
+    the format. The resulting value is the minimum value that will be
+    accepted. Similarly, to compute the maximum value, the channel has
+    0.004 added to it and is then converted to the given format. Checking
+    a pixel is then a matter of splitting it into channels and checking
+    that each is within the computed bounds.
+    
+    The value of 0.004 was chosen because it is the minimum one that will
+    make the existing composite test pass (see next commit). A problem
+    with this value is that it causes 0xFE to be acceptable when the
+    correct value is 1.0, and 0x01 to be acceptable when the correct value
+    is 0. It would be better if, when the result is exactly 0 or exactly
+    1, an a8r8g8b8 pixel were required to produce exactly 0x00 or 0xff to
+    preserve full black and full white. A deviation value of 0.003 would
+    produce this, but currently this would cause tests with operators that
+    involve divisions to fail.
+
+commit 0053a9f8694c837388b78ae26fe81979d0327d28
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Dec 19 19:53:28 2011 -0500
+
+    Rename color_correct() to round_color()
+    
+    And do the rounding from float to int in the same way cairo does: by
+    multiplying with (1 << width), then subtracting one when the input was 1.0.
+
+commit 55a010bf31d2eaf71126bdf93eca99fc02037535
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Dec 22 18:15:02 2011 -0500
+
+    Move the color_correct() function from composite.c to utils.c
+
+commit 065666f33c414582425e4ac0ec9f694e93c2baf1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Jan 8 10:32:47 2012 -0500
+
+    Get rid of delegates for combiners
+    
+    Add a new function _pixman_implementation_lookup_combiner() that will
+    find a usable combiner given an operator and information about whether
+    the combiner should apply component alpha and whether it should be 64
+    bit.
+    
+    In pixman-general.c use this function to look up a combiner up front
+    instead of walking the delegate chain for every scanline.
+
+commit ab584ab500b4e7011a5b82051a90e2eea6744270
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jan 7 17:11:45 2012 -0500
+
+    test/alphamap.c: Make dst and orig_dst more independent of each other
+    
+    When making the copy of the destination, do so separately for the
+    image and the alpha map. This ensures that the alpha channel of the
+    alpha map will be different from the alpha channel of the actual
+    image.
+    
+    Previously, orig_dst would be copied onto dst along with its alpha
+    map, which mean that the alpha map of orig_dst would become the new
+    alpha channel of *both* dst and dst's alpha map. This meant that test
+    didn't actually test that the alpha maps alpha channel was actually
+    fetched.
+
+commit 4613f2caac595b3fa1298ac49f9c9fdcd907f14a
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jan 7 16:48:00 2012 -0500
+
+    Fix bugs with alpha maps
+    
+    The alpha channel from the alpha map must be inserted as the new alpha
+    channel when a scanline is fetched from an image. Previously the alpha
+    map would overwrite the buffer instead. This wasn't caught be the
+    alpha map test because it would only verify that the resulting alpha
+    channel was correct, and not pay attention to incorrect color
+    channels.
+
+commit 8bd63634cd2b2d92145b1d52b54b91ebcb9fb1b4
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jan 7 14:32:08 2012 -0500
+
+    test: In the alphamap test, also test that we get the right red value
+    
+    There is a bug where the red channel of the alpha map of the
+    destination image is used instead of the red channel of the
+    destination image itself.
+
+commit 007d8b1813e34a7f881d9b241806f8323e9667cd
+Author: Alan Coopersmith <alan.coopersmith@oracle.com>
+Date:   Fri Dec 23 16:32:57 2011 -0800
+
+    Make mmx code compatible with Solaris Studio 12.3 compilers
+    
+    Rearranged some of the existing gcc & Intel compiler checks to allow
+    easier sharing of common cases among the compilers.
+    
+    Signed-off-by: Alan Coopersmith <alan.coopersmith@oracle.com>
+
+commit 37572455866114cbb8bb1bf3acfb1c61d200f98c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Dec 20 06:32:26 2011 -0500
+
+    Fix rounding for DIV_UNc()
+    
+    We need to compute floor (a/b * 255 + 0.5), not floor (a / b * 255),
+    so add b/2 to the numerator in the DIV_UNc() macro.
+
+commit 2437ae80e5066dec9fe52f56b016bf136d7cea06
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Dec 22 11:37:26 2011 -0500
+
+    Reject trapezoids where top (botttom) is above (below) the edges
+    
+    When a trapezoid has a top/bottom that is above/below the left/right
+    edges, degenerate trapezoids become possible. For example the edge
+    could be very short and close to horizontal. If the bottom edge is far
+    below the bottom point of such a short edge, the result is that the
+    lower right corner of the trapezoid will be extremely far to the left.
+    
+    This kind of trapezoid causes overflows in the rasterization code, so
+    change pixman_trapezoid_valid() to reject them.
+
+commit 6a8192b6dd88b833bb918de28331d3a85c84a4f7
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Dec 20 06:34:41 2011 -0500
+
+    In MUL_UNc() cast to comp2_t
+    
+    Otherwise, when comp1_t is 16 bits wide, we can end up with a signed
+    integer overflow.
+
+commit 33ac0a9084aabd0e47fb1c9e5638eafc809c52cb
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Dec 21 08:19:05 2011 -0500
+
+    Fix a bunch of signed overflow issues
+    
+    In pixman-fast-path.c: (1 << 31) - 1 causes a signed overflow, so
+    change to (1U << n) - 1.
+    
+    In pixman-image.c: The check for whether m10 == -m01 will overflow
+    when -m01 == INT_MIN. Instead just check whether the variables are 1
+    and -1.
+    
+    In pixman-utils.c: When the depth of the topmost channel is 0, we can
+    end up shifting by 32.
+    
+    In blitters-test.c: Replicating the mask would end up shifting more
+    than 32.
+    
+    In region-contains-test.c: Computing the average of two large integers
+    could overflow. Instead add half the difference between them to the
+    first integer.
+    
+    In stress-test.c: Masking the value in fake_reader() would sometimes
+    shift by 32. Instead just use the most significant bits instead of
+    the least significant.
+    
+    All these issues were found by the IOC tool:
+    
+        http://embed.cs.utah.edu/ioc/
+
+commit d788f762788c2178970ff0ff2cb6e0097171cc3c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Dec 18 09:54:47 2011 -0500
+
+    Add missing cast in _pixman_edge_multi_init()
+    
+    nx and e->dy are both 32 bit quantities, so a cast is needed to make
+    sure their product is 64 bit before subtracting it from a 64 bit
+    quantity.
+
+commit 4f3fe9c9096b2261217c2d4beb7d5eb8e578ed76
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Dec 18 08:16:45 2011 -0500
+
+    Fix some signed overflow bugs
+    
+    In the macros for the PDF blend modes, two comp1_t variables are
+    multiplied together and then used as if the result were a
+    comp4_t. When comp1_t is a uint8_t, this is fine because they are
+    promoted to int, and the product of two uint8_ts fits in an
+    int. However, when comp1_t is uint16, the product does not necessarily
+    fit in an int, so casts are necessary.
+    
+    Fix for bug 43906, reported by Siarhei Siamashka.
+
+commit 3e93bba3b04b42c2ab99d828dae12c18f29bcf7d
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Jan 5 10:37:51 2012 -0500
+
+    pixman-image.c: Fix typo in pixman_image_set_transform()
+    
+    A parenthesis was misplaced so that the size argument to memcmp() was
+    always 0. The bug is harmless except that the flags might be
+    unnecessarily recomputed in some cases.
+    
+    A bug reporting this in Mozilla's fork was discovered here:
+    
+        https://bugzilla.mozilla.org/show_bug.cgi?id=710992
+
+commit ae651e7e739253f79f345f9fcbacad8627da0d85
+Author: Colin Walters <walters@verbum.org>
+Date:   Wed Jan 4 08:06:05 2012 -0500
+
+    autogen.sh: Support GNOME Build API
+    
+    http://people.gnome.org/~walters/docs/build-api.txt
+
+commit 89498a1178bc173857f3d1ee1f889afcc58b21b6
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Dec 18 07:29:59 2011 -0500
+
+    gradient-walker: For NONE repeats, when x < 0 or x > 1, set both colors to 0
+    
+    ec7c9c2b6865b48b8bd14e4 introduced a bug where NONE gradients would be
+    misrendered, causing the area outside the gradient to be treated as a
+    (very) long fade to transparent.The problem was that a check for
+    positions outside the gradients were dropped in favor of relying on
+    the sentinels.
+    
+    Aside from misrendering, this also caused a signed integer overflow
+    when the code would compute a stepper size based on MIN_INT32.
+    
+    This patches fixes the issue by reinstating a check for these cases
+    and setting both the right and left colors to transparent black.
+
+commit d0091a33fcdb49b65a6f20f775cfde520380b1fa
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Dec 21 05:19:00 2011 -0500
+
+    Modify gradient-test to show a bug in NONE processing
+    
+    This patch modifies demos/gradient-test to display a bug in gradients
+    with a repeat mode of NONE. With the current gradient code, the left
+    side will be a solid red (actually an extremely long fade from solid
+    red to transparent) instead of a sharp transition from red to green.
+
+commit 9db980551518a09069b8ade34743238329a36661
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 9 03:59:04 2011 -0500
+
+    region: Add pixman_region{,32}_clear() functions.
+    
+    These functions simply reset the region to empty. They are equivalent
+    to
+    
+          pixman_region_fini (&region);
+          pixman_region_init (&region);
+
+commit 6b9d6a91ed4a85f27d7e5824ce2a63f37876e937
+Author: Bobby Salazar <bobby8934@gmail.com>
+Date:   Tue Dec 13 02:03:16 2011 -0500
+
+    Android Runtime Detection Support For ARM NEON
+    
+    This patch adds runtime detection support for the ARM NEON fast paths
+    for code compiled with the Android NDK. This is the only code change
+    needed to enable the ARM NEON pixman fast paths for the ever growing
+    Android platform (200 million+ smartphones, tablets, etc.). Just make
+    sure to #define USE_ARM_NEON in your makefile.
+
+commit 84450c411cc93309bb1d1b1f555640b3ad105500
+Author: Naohiro Aota <naota@gentoo.org>
+Date:   Thu Nov 24 13:12:15 2011 +0100
+
+    Don't use non-POSIX test
+    
+    test "$test_CFLAGS" == "" &&         \
+    
+    may cause an error on some POSIX shells and uses a style which is not
+    consistent with the other tests in configure.ac
+    
+    Fixes https://bugs.freedesktop.org/show_bug.cgi?id=42588 and
+    https://bugs.gentoo.org/show_bug.cgi?id=387087
+
+commit 9985febd7847b7a9c09d6395db7f89490c83be30
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Tue Nov 8 22:00:46 2011 +0100
+
+    test: Produce autotools-looking report in the win32 build system
+    
+    Tweak the commands used to run the tests on win32 to make the output
+    look mostly like that produced by the autotools test system.
+    
+    In addition to this, make sure that the exit status of the test target
+    is success (0) if and only if no failure occurred.
+
+commit b31da39f6f65d1784fc2f6915c30eb011cc2893b
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Thu Nov 3 11:07:25 2011 +0100
+
+    demos: Consistently use G_N_ELEMENTS()
+    
+    Instead of open-coding G_N_ELEMENTS(), just use it.
+
+commit 1662c94348eda19ec35db2625749febd1dceb35e
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Thu Nov 3 10:53:10 2011 +0100
+
+    test: Reuse the ARRAY_LENGTH() macro
+    
+    It is provided by utils.h, there is no need to redefine it.
+
+commit 97b9fa090c54f6feab54bde272df374a13c0c84d
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Thu Nov 3 10:51:27 2011 +0100
+
+    Use the ARRAY_LENGTH() macro when possible
+    
+    This patch has been generated by the following Coccinelle semantic patch:
+    
+    // Use the ARRAY_LENGTH() macro when possible
+    //
+    // Replace open-coded array length computations with the
+    // ARRAY_LENGTH() macro
+    
+    @@
+    type T;
+    T[] E;
+    @@
+    - (sizeof(E)/sizeof(T))
+    + ARRAY_LENGTH (E)
+
+commit 06760f5cb069bdc041af7a0e73c9d5fc08741f28
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Thu Nov 3 10:40:24 2011 +0100
+
+    test: Cleanup includes
+    
+    All the tests are linked to libutil, hence it makes sence to always
+    include utils.h and reuse what it provides (config.h inclusion, access
+    to private pixman APIs, ARRAY_LENGTH, ...).
+
+commit cbd88a9416d9b33e6589e3f857ee839559a7e4de
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Thu Nov 3 10:21:41 2011 +0100
+
+    Remove useless checks for NULL before freeing
+    
+    This patch has been generated by the following Coccinelle semantic patch:
+    
+    // Remove useless checks for NULL before freeing
+    //
+    // free (NULL) is a no-op, so there is no need to avoid it
+    
+    @@
+    expression E;
+    @@
+    + free (E);
+    + E = NULL;
+    - if (unlikely (E != NULL)) {
+    -   free(E);
+    (
+    -   E = NULL;
+    |
+    -   E = 0;
+    )
+       ...
+    - }
+    
+    @@
+    expression E;
+    @@
+    + free (E);
+    - if (unlikely (E != NULL)) {
+    -   free (E);
+    - }
+
+commit 8d72d35b29b0fe0345e21525db9e5f25876364be
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Nov 6 16:36:01 2011 -0500
+
+    Post-release version bump to 0.25.1
+
+commit 973dc7d319f373fc1bbb91ea54e8a7116cfaa932
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Nov 6 16:10:33 2011 -0500
+
+    Pre-release version bump to 0.24.0
+
+commit 6bf590f38577b4c3c6f4876291360ef95086fb37
+Author: Alan Coopersmith <alan.coopersmith@oracle.com>
+Date:   Sun Oct 30 09:12:06 2011 -0700
+
+    Change MMX ldq_u to return _m64 instead of forcing all callers to cast
+    
+    Sun/Oracle Studio compilers allow the pointers to be cast, but not the
+    non-pointer forms, causing pixman compiles to fail with many errors of:
+    "pixman-mmx.c", line 1411: invalid cast expression
+    
+    Signed-off-by: Alan Coopersmith <alan.coopersmith@oracle.com>
+
+commit 5d7f5bc8ee178588194cb6acc2e0ceb6ff926d72
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Wed Nov 2 18:49:58 2011 -0400
+
+    Add definitions of INT64_MIN and INT64_MAX
+
+commit 697cfe15377a8c420764ff824c0a8c2c8ff2148c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Oct 29 05:51:44 2011 -0400
+
+    Post-release version bump to 0.23.9
+
+commit a0f1b565811388b0567c845b9b7063d5b93d325e
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Oct 29 05:33:44 2011 -0400
+
+    Pre-release version bump to 0.23.8
+
+commit 498138c293a2abce44ce122114852f4e6c5b87fe
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Oct 25 08:45:34 2011 -0400
+
+    Fix use of uninitialized fields reported by valgrind
+    
+    In pixman-noop.c and pixman-sse2.c, we are accessing
+    image->bits.width/height without first making sure the image is a bits
+    image. The warning is harmless because we never act on this
+    information without checking that the image is a8r8g8b8, but valgrind
+    does warn about it.
+    
+    In pixman-noop.c, just reorder the clauses in the if statement; in
+    pixman-sse2.c require images to have the FAST_PATH_BITS_IMAGE flag
+    set.
+
+commit 6131707e8fc39187d1d358481f7c57c57cfab206
+Merge: 3d4d705 ec7c9c2
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Oct 20 09:13:12 2011 -0400
+
+    Merge branch 'gradients'
+
+commit 3d4d705d2ffa4aeab3dc02a23c2aadbea1374a3f
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Tue Oct 18 21:50:18 2011 +0900
+
+    ARM: NEON: Fix assembly typo error in src_n_8_8888
+    
+    Binutils 2.21 does not complain about missing comma between ARM
+    register and alignement specifier in vld/vst instructions which
+    causes build error on binutils 2.20.
+
+commit 19f118f41f8725f22395d31eac5670cb350b55ec
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Mon Sep 26 18:33:27 2011 +0900
+
+    ARM: NEON: Standard fast path src_n_8_8
+    
+    Performance numbers of before/after on cortex-a8 @ 1GHz
+    
+    - before
+    L1:  28.05  L2:  28.26  M: 26.97 (  4.48%)  HT: 19.79  VT: 19.14  R: 17.61  RT:  9.88 ( 101Kops/s)
+    
+    - after
+    L1:1430.28  L2:1252.10  M:421.93 ( 75.48%)  HT:170.16  VT:138.03  R:145.86  RT: 35.51 ( 255Kops/s)
+
+commit 4db9e2bc13d3ed26416f249e57acec4b41f58b7f
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Mon Sep 26 17:03:54 2011 +0900
+
+    ARM: NEON: Standard fast path src_n_8_8888
+    
+    Performance numbers of before/after on cortex-a8 @ 1GHz
+    
+    - before
+    L1:  32.39  L2:  31.79  M: 30.84 ( 13.77%)  HT: 21.58  VT: 19.75  R: 18.83  RT: 10.46 ( 106Kops/s)
+    
+    - after
+    L1: 516.25  L2: 372.00  M:193.49 ( 85.59%)  HT:136.93  VT:109.10  R:104.48  RT: 34.77 ( 253Kops/s)
+
+commit 26659de6cd2775c83a9a6e6660324d5baacf61f9
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Mon Sep 26 19:04:53 2011 +0900
+
+    ARM: NEON: Instruction scheduling of bilinear over_8888_8_8888
+    
+    Instructions are reordered to eliminate pipeline stalls and get
+    better memory access.
+    
+    Performance of before/after on cortex-a8 @ 1GHz
+    
+    << 2000 x 2000 with scale factor close to 1.x >>
+    before : 40.53 Mpix/s
+    after  : 50.76 Mpix/s
+
+commit 4481920f405e47b3a92811a8cb06afbd37dee01b
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Wed Sep 21 15:52:13 2011 +0900
+
+    ARM: NEON: Instruction scheduling of bilinear over_8888_8888
+    
+    Instructions are reordered to eliminate pipeline stalls and get
+    better memory access.
+    
+    Performance of before/after on cortex-a8 @ 1GHz
+    
+    << 2000 x 2000 with scale factor close to 1.x >>
+    before : 50.43 Mpix/s
+    after  : 61.09 Mpix/s
+
+commit 1cd916f3a5ebeb943f66eecf0b8ce99af0b95d11
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Fri Sep 23 00:03:22 2011 +0900
+
+    ARM: NEON: Replace old bilinear scanline generator with new template
+    
+    Bilinear scanline functions in pixman-arm-neon-asm-bilinear.S can
+    be replaced with new template just by wrapping existing macros.
+
+commit 6682b2b3597c9f431900bfe7b1b42dfbe006bae5
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Tue Sep 20 21:32:35 2011 +0900
+
+    ARM: NEON: Bilinear macro template for instruction scheduling
+    
+    This macro template takes 6 code blocks.
+    
+    1. process_last_pixel
+    2. process_two_pixels
+    3. process_four_pixels
+    4. process_pixblock_head
+    5. process_pixblock_tail
+    6. process_pixblock_tail_head
+    
+    process_last_pixel does not need to update horizontal weight. This
+    is done by the template. two and four code block should update
+    horizontal weight inside of them. head/tail/tail_head blocks
+    consist unrolled core loop. You can apply instruction scheduling
+    to the tail_head blocks.
+    
+    You can also specify size of the pixel block. Supported size is 4
+    and 8. If you want to use mask, give BILINEAR_FLAG_USE_MASK flags
+    to the template, then you can use register MASK. When using d8~d15
+    registers, give BILINEAR_FLAG_USE_ALL_NEON_REGS to make sure
+    registers are properly saved on the stack and later restored.
+
+commit b5e4355fa4973e3edd4abeb11bdc47c42371cc76
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Tue Sep 20 19:46:25 2011 +0900
+
+    ARM: NEON: Some cleanup of bilinear scanline functions
+    
+    Use STRIDE and initial horizontal weight update is done before
+    entering interpolation loop. Cache preload for mask and dst.
+
+commit ec7c9c2b6865b48b8bd14e4509538f8fcbe93463
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Oct 14 09:04:48 2011 -0400
+
+    Simplify gradient_walker_reset()
+    
+    The code that searches for the closest color stop to the given
+    position is duplicated across the various repeat modes. Replace the
+    switch with two if/else constructions, and put the search code between
+    them.
+
+commit 2d0da8ab8d8fef60ed1bbb9d6b75f66577c3f85d
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Oct 14 09:02:14 2011 -0400
+
+    Use sentinels instead of special casing first and last stops
+    
+    When storing the gradient stops internally, allocate two more stops,
+    one before the beginning of the stop list and one after the
+    end. Initialize those stops based on the repeat property of the
+    gradient.
+    
+    This allows gradient_walker_reset() to be simplified because it can
+    now simply pick the two closest stops to the position without special
+    casing the first and last stops.
+
+commit 84d6ca7c891601b019d4862a556ed98b7e6fe525
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Oct 14 07:42:00 2011 -0400
+
+    gradient walker: Correct types and fix formatting
+    
+    The type of pos in gradient_walker_reset() and gradient_walker_pixel()
+    is pixman_fixed_48_16_t and not pixman_fixed_32_32. The types of the
+    positions in the walker struct are pixman_fixed_t and not int32_t, and
+    need_reset is a boolean, not an integer. The spread field should be
+    called repeat and have the type pixman_repeat_t.
+    
+    Also fix some formatting issues, make gradient_walker_reset() static,
+    and delete the pointless PIXMAN_GRADIENT_WALKER_NEED_RESET() macro.
+
+commit ace225b53dee88d134753ac901f26ba3db6781da
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Oct 11 16:12:24 2011 -0400
+
+    Add stable release / development snapshot to draft release notes
+    
+    This will hopefully serve as a reminder to me that I should put this
+    information in the release notes.
+
+commit bb7142d361d56d66ac40debb60a7c4d099764ba8
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Oct 11 06:10:39 2011 -0400
+
+    Post-release version bump to 0.23.7
+
+commit e20ac40bd30484f0f711b52d0c1993ef08760284
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Oct 11 06:00:51 2011 -0400
+
+    Pre-release version bump to 0.23.6
+
+commit a43946a51fbbdc76be1af9bc25fe7c5c2a6314bb
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Thu Sep 22 18:42:38 2011 +0900
+
+    Simple repeat: Extend too short source scanlines into temporary buffer
+    
+    Too short scanlines can cause repeat handling overhead and optimized
+    pixman composite functions usually process a bunch of pixels in a
+    single loop iteration it might be beneficial to pre-extend source
+    scanlines. The temporary buffers will usually reside in cache, so
+    accessing them should be quite efficient.
+
+commit eaff774a3f8af6651a245711b9ab8af3211eeb10
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Mon Aug 29 21:44:36 2011 +0900
+
+    Simple repeat fast path
+    
+    We can implement simple repeat by stitching existing fast path
+    functions. First lookup COVER_CLIP function for given input and
+    then stitch horizontally using the function.
+
+commit a258e33fcb6cf08a2ef76e374cb92a12c9adb171
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Thu Sep 22 16:33:02 2011 +0900
+
+    Move _pixman_lookup_composite_function() to pixman-utils.c
+
+commit fc62785aabbe890b656c9cbaa57f2e65e74bbcc2
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jun 27 21:17:04 2011 +0000
+
+    Add src, mask, and dest flags to the composite args struct.
+    
+    These flags are useful in the various compositing routines, and the
+    flags stored in the image structs are missing some bits of information
+    that can only be computed when pixman_image_composite() is called.
+
+commit fa6523d13ae9b7986bb890df5ad66e8599bc3ed8
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Thu Sep 22 16:26:55 2011 +0900
+
+    Add new fast path flag FAST_PATH_BITS_IMAGE
+    
+    This fast path flag indicate that type of the image is bits image.
+
+commit 7272e2fcd2ff8e546cef19929cd370ae2f946135
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Thu Sep 22 16:20:03 2011 +0900
+
+    init/fini functions for pixman_image_t
+    
+    pixman_image_t itself can be on stack or heap. So segregating
+    init/fini from create/unref can be useful when we want to use
+    pixman_image_t on stack or other memory.
+
+commit 4dcf1b0107334857e1f0bb203c34efed1146535c
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Wed Sep 7 23:00:29 2011 +0900
+
+    sse2: Bilinear scaled over_8888_8_8888
+
+commit 81050f2784407b260a1897efa921631a19eeec6b
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Wed Sep 7 22:57:29 2011 +0900
+
+    sse2: Bilinear scaled over_8888_8888
+
+commit d67c0b883daeeaacf3f21f1ddbdcf9ecf94fac43
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Wed Sep 7 22:51:46 2011 +0900
+
+    sse2: Macros for assembling bilinear interpolation code fractions
+    
+    Primitive bilinear interpolation code is reusable to implement other
+    bilinear functions.
+    
+    BILINEAR_DECLARE_VARIABLES
+    - Declare variables needed to interpolate src pixels.
+    
+    BILINEAR_INTERPOLATE_ONE_PIXEL
+    - Interpolate one pixel and advance to next pixel
+    
+    BILINEAR_SKIP_ONE_PIXEL
+    - Skip interpolation and just advance to next pixel
+      This is useful for skipping zero mask
+
+commit 741eb8462c3ff72cbf2d9acfeb1e97208a414fcd
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu Oct 6 17:56:09 2011 -0400
+
+    Correct the minimum gcc version needed for iwmmxt
+    
+    Spotted by Søren Sandmann.
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 0a34277180d29f471a2554afc2e2b682fee33c79
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Oct 5 22:54:36 2011 -0400
+
+    Make sure iwMMXt is only detected on ARM
+    
+    iwMMXt is incorrectly detected on x86 and amd64. This happens because
+    the test uses standard _mm_* intrinsic functions which it compiles with
+    -march=iwmmxt, but when the user has set CFLAGS=-march=k8 for instance,
+    no error is generated from -march=iwmmxt, even though it's not a valid
+    flag on x86/amd64. Passing CFLAGS=-march=native does not override the
+    -march=iwmmxt flag though, which is why it wasn't noticed before.
+    
+    So, just #error out in the test if the __arm__ preprocessor directive
+    isn't defined.
+    
+    Fixes https://bugs.gentoo.org/show_bug.cgi?id=385179
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 879b7c21e45b092272e689e05dc867f6260e258f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Sep 27 11:32:13 2011 -0400
+
+    Don't include stdint.h in scaling-helpers-test.
+    
+    Fixes bug 41257.
+
+commit 01c2dcbe6936a868651160d2617d783d5b8d3d7d
+Author: Benjamin Otte <otte@redhat.com>
+Date:   Wed Sep 14 17:52:03 2011 +0200
+
+    build: replace @VAR@ with $(VAR) in makefiles
+
+commit 100f16eae94a54fbb9ee1f44fa3c34602ba25c4d
+Author: Benjamin Otte <otte@redhat.com>
+Date:   Wed Sep 14 17:01:51 2011 +0200
+
+    tests: Add PNG_CFLAGS/LIBS to tests
+    
+    PNG flags were accidentally included by gdk-pixbuf. This has been fixed
+    recently, so we need to make sure to include it ourselves.
+
+commit d1313febbec2124ee175cd323a338caa3c1a8fc2
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu Sep 22 15:28:00 2011 -0400
+
+    mmx: optimize unaligned 64-bit ARM/iwmmxt loads
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 7ab94c5f99cc524ddfbbcedca4304ec7943f74e1
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Jul 31 22:42:24 2011 -0400
+
+    mmx: compile on ARM for iwmmxt optimizations
+    
+    Check in configure for at least gcc-4.6, since gcc-4.7 (and hopefully
+    4.6) will be the eariest version capable of compiling the _mm_*
+    intrinsics on ARM/iwmmxt. Even for suitable compile versions I use
+    _mm_srli_si64 which is known to cause unpatched compilers to fail.
+    
+    Select iwmmxt at runtime only after NEON, since we expect the NEON
+    optimizations to be more capable and faster than iwmmxt.
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit f66887d9eae9646c838d518020168b1403705b1e
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Sep 4 14:11:46 2011 -0400
+
+    mmx: prepare pixman-mmx.c to be compiled for ARM/iwmmxt
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 7c6d5d1999989187b60f1e0e82e55ed55238eb27
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu Sep 8 20:33:45 2011 +0200
+
+    mmx: fix unaligned accesses
+    
+    Simply return *p in the unaligned access functions, since alignment
+    constraints are very relaxed on x86 and this allows us to generate
+    identical code as before.
+    
+    Tested with the test suite, lowlevel-blit-test, and cairo-perf-trace on
+    ARM and Alpha with no unaligned accesses found.
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 5d98abb14ca9042af6d0ec7c14c8398cf4046b80
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu Sep 22 15:39:53 2011 -0400
+
+    mmx: wrap x86/MMX inline assembly in ifdef USE_X86_MMX
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 02c1f1a022e86ced69fc91376232d75d5d6583c5
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Jul 31 20:20:12 2011 +0000
+
+    mmx: rename USE_MMX to USE_X86_MMX
+    
+    This will make upcoming ARM usage of pixman-mmx.c unambiguous.
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 57fd8c37aa3148b1d70bad65e1a49721e9a47d7e
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Sep 23 14:10:52 2011 -0400
+
+    mmx: convert while (w) to if (w) when possible
+    
+    gcc isn't able to see that w is no greater than 1, so it generates
+    unnecessary loop instructions with while (w).
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 38a7aae1d9c8e1e41de22f9c3846dfc975af6838
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Sep 9 15:33:14 2011 +0200
+
+    mmx: fix formats in commented code
+    
+    b8r8g8 is apparently no longer supported sometime since this code was
+    commented.
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit b6b77488a0259da3662edf68568e78806ca97444
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Sep 9 15:34:04 2011 +0200
+
+    lowlevel-blt: add over_x888_8_8888
+    
+    Signed-off-by: Matt Turner <mattst88@gmail.com>
+
+commit 9126f36b964c71b83c69235df4c3a46ab81ab5d5
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sun May 22 22:51:00 2011 +0300
+
+    BILINEAR->NEAREST filter optimization for simple rotation and translation
+    
+    Simple rotation and translation are the additional cases when BILINEAR
+    filter can be safely reduced to NEAREST.
+
+commit ad5c6bbb36c1c5e72313f7c7bc7c6e6b7e79daba
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Sep 4 02:53:39 2011 -0400
+
+    Strength-reduce BILINEAR filter to NEAREST filter for identity transforms
+    
+    An image with a bilinear filter and an identity transform is
+    equivalent to one with a nearest filter, so there is no reason the
+    standard fast paths shouldn't be usable.
+    
+    But because a BILINEAR filter samples a 2x2 pixel block in the source
+    image, FAST_PATH_SAMPLES_COVER_CLIP can't be set in the case where the
+    source area is the entire image, because some compositing operations
+    might then read pixels outside the image.
+    
+    This patch fixes the problem by splitting the
+    FAST_PATH_SAMPLES_COVER_CLIP flag into two separate flags
+    FAST_PATH_SAMPLES_COVER_CLIP_NEAREST and
+    FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR that indicate that the clip
+    covers the samples taking into account NEAREST/BILINEAR filters
+    respectively.
+    
+    All the existing compositing operations that require
+    FAST_PATH_SAMPLES_COVER_CLIP then have their flags modified to pick
+    either COVER_CLIP_NEAREST or COVER_CLIP_BILINEAR depending on which
+    filter they depend on.
+    
+    In compute_image_info() both COVER_CILP_NEAREST and
+    COVER_CLIP_BILINEAR can be set depending on how much room there is
+    around the clip rectangle.
+    
+    Finally, images with an identity transform and a bilinear filter get
+    FAST_PATH_NEAREST_FILTER set as well as FAST_PATH_BILINEAR_FILTER.
+    
+    Performance measurementas with render_bench against Xephyr:
+    
+    Before
+    
+    *** ROUND 1 ***
+    ---------------------------------------------------------------
+    Test: Test Xrender doing non-scaled Over blends
+    Time: 5.720 sec.
+    ---------------------------------------------------------------
+    Test: Test Xrender (offscreen) doing non-scaled Over blends
+    Time: 5.149 sec.
+    ---------------------------------------------------------------
+    Test: Test Imlib2 doing non-scaled Over blends
+    Time: 6.237 sec.
+    
+    After:
+    
+    *** ROUND 1 ***
+    ---------------------------------------------------------------
+    Test: Test Xrender doing non-scaled Over blends
+    Time: 4.947 sec.
+    ---------------------------------------------------------------
+    Test: Test Xrender (offscreen) doing non-scaled Over blends
+    Time: 4.487 sec.
+    ---------------------------------------------------------------
+    Test: Test Imlib2 doing non-scaled Over blends
+    Time: 6.235 sec.
+
+commit eb2e7ed81b324af730c1a7639c9ca9ed60152875
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Sep 5 14:43:25 2011 -0400
+
+    test: Occasionally use a BILINEAR filter in blitters-test
+    
+    To test that reductions of BILINEAR->NEAREST for identity
+    transformations happen correctly, occasionally use a bilinear filter
+    in blitters test.
+
+commit 2a9f88430e7088eccfbbbd6c6b4f4e534126b1e1
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sun May 22 22:16:38 2011 +0300
+
+    test: better coverage for BILINEAR->NEAREST filter optimization
+    
+    The upcoming optimization which is going to be able to replace BILINEAR filter
+    with NEAREST where appropriate needs to analyze the transformation matrix
+    and not to make any mistakes.
+    
+    The changes to affine-test include:
+    1. Higher chance of using the same scale factor for x and y axes. This can help
+       to stress some special cases (for example the case when both x and y scale
+       factors are integer). The same applies to x/y translation.
+    2. Introduced a small chance for "corrupting" transformation matrix by flipping
+       random bits. This supposedly can help to identify the cases when some of the
+       fast paths or other code logic is wrongly activated due to insufficient checks.
+
+commit 054922e2fce1f8d9db4b9b756e54b0fa5655956d
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Sep 5 00:19:51 2011 -0400
+
+    Eliminate compute_sample_extents() function
+    
+    In analyze_extents(), instead of calling compute_sample_extents() call
+    compute_transformed_extents() and inline the remaining part of
+    compute_sample_extents(). The upcoming bilinear->nearest optimization
+    will do something different with these two pieces of code.
+
+commit 577b6c46fd39c43c2c328fed48854b50b9e85e5b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Sep 4 17:43:29 2011 -0400
+
+    Split computation of sample area into own function
+    
+    compute_sample_extents() have two parts: one that computes the
+    transformed extents, and one that checks whether the computed extents
+    fit within the 16.16 coordinate space.
+    
+    Split the first part into its own function
+    compute_transformed_extents().
+
+commit 5064f1803136cbc28d1e9f636feb2ff8ccfbdded
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Sep 4 17:17:53 2011 -0400
+
+    Remove x and y coordinates from analyze_extents() and compute_sample_extents()
+    
+    These coordinates were only ever used for subtracting from the extents
+    box to put it into the coordinate space of the image, so we might as
+    well do this coordinate translation only once before entering the
+    functions.
+
+commit dbcb4af60d8c688eaaa027c3c5bce9527a410465
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Aug 16 06:13:59 2011 -0400
+
+    Use MAKE_ACCESSORS() to generate accessors for paletted formats
+    
+    Add support in convert_pixel_from_a8r8g8b8() and
+    convert_pixel_to_a8r8g8b8() for conversion to/from paletted formats,
+    then use MAKE_ACCESSORS() to generate accessors for the indexed
+    formats: c8, g8, g4, c4, g1
+
+commit c82c2c38538f5c3f25cf81ad697040d2332d64de
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun May 30 12:36:58 2010 -0400
+
+    Use MAKE_ACCESSORS() to generate accessors for the a1 format.
+    
+    Add FETCH_1 and STORE_1 macros and use them to add support for 1bpp
+    pixels to fetch_and_convert_pixel() and convert_and_store_pixel(),
+    then use MAKE_ACCESSORS() to generate the accessors for the a1
+    format. (Not the g1 format as it is indexed).
+
+commit 2114dd8aa1f292541e55b6b84152732b37c1c1eb
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Aug 16 14:38:44 2011 -0400
+
+    Use MAKE_ACCESSORS() to generate accessors for 24bpp formats
+    
+    Add FETCH_24 and STORE_24 macros and use them to add support for 24bpp
+    pixels in fetch_and_convert_pixel() and
+    convert_and_store_pixel(). Then use MAKE_ACCESSORS() to generate
+    accessors for the 24 bpp formats:
+    
+        r8g8b8
+        b8g8r8
+
+commit f19f5daa1b111368bcf75435dce12483e08756f2
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Aug 18 05:09:07 2011 -0400
+
+    Use MAKE_ACCESSORS() to generate accessors for 4 bpp RGB formats
+    
+    Use FETCH_4 and STORE_4 macros to add support for 4bpp pixels to
+    fetch_and_convert_pixel() and convert_and_store_pixel(), then use
+    MAKE_ACCESSORS() to generate accessors for 4 bpp formats, except g4 and
+    c4 which are indexed:
+    
+        a4
+        r1g2b1
+        b1g2r1
+        a1r1g1b1
+        a1b1g1r1
+
+commit af78fe24e41c30b5c9b3da4449813c75f760c845
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Aug 18 08:13:58 2011 -0400
+
+    Use MAKE_ACCESSORS() to generate accessors for 8bpp RGB formats
+    
+    Add support for 8 bpp formats to fetch_and_convert_pixel() and
+    convert_and_store_pixel(), then use MAKE_ACCESSORS() to generate the
+    accessors for all the 8 bpp formats, except g8 and c8, which are
+    indexed:
+    
+        a8
+        r3g3b2
+        b2g3r3
+        a2r2g2b2
+        a2b2g2r2
+        x4a4
+
+commit 5e1b9f897532ac0fa220880bf94dd660c837afe9
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Aug 18 08:13:44 2011 -0400
+
+    Use MAKE_ACCESSORS() to generate accessors for all the 16bpp formats
+    
+    Add support for 16bpp pixels to fetch_and_convert_pixel() and
+    convert_and_store_pixel(), then use MAKE_ACCESSORS() to generate
+    accessors for all the 16bpp formats:
+    
+        r5g6b5
+        b5g6r5
+        a1r5g5b5
+        x1r5g5b5
+        a1b5g5r5
+        x1b5g5r5
+        a4r4g4b4
+        x4r4g4b4
+        a4b4g4r4
+        x4b4g4r4
+
+commit a77597bcb8d10afd66980b8db8839049181b3743
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Aug 18 08:13:30 2011 -0400
+
+    Use MAKE_ACCESSORS() to generate all the 32 bit accessors
+    
+    Add support for 32bpp formats in fetch_and_convert_pixel() and
+    convert_and_store_pixel(), then use MAKE_ACCESSORS() to generate
+    accessors for all the 32 bpp formats:
+    
+        a8r8g8b8
+        x8r8g8b8
+        a8b8g8r8
+        x8b8g8r8
+        x14r6g6b6
+        b8g8r8a8
+        b8g8r8x8
+        r8g8b8x8
+        r8g8b8a8
+
+commit 814af33df3e9892e4fc790c7ccd2702ce2b8ea97
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Aug 17 17:27:58 2011 -0400
+
+    Add initial version of the MAKE_ACCESSORS() macro
+    
+    This macro will eventually allow the fetchers and storers to be
+    generated automatically. For now, it's just a skeleton that doesn't
+    actually do anything.
+
+commit 5cae7a3fe6e148d2bb42b86efb7daf27dbf12ee0
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Aug 15 18:42:38 2011 -0400
+
+    Add general pixel converter
+    
+    This function can convert between any <= 32 bpp formats. Nothing uses
+    it yet.
+
+commit 22f54dde6bbf87251a0b4ad93bacbdaa7cb508d8
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Aug 15 10:22:05 2011 -0400
+
+    Add a generic unorm_to_unorm() conversion utility
+    
+    This function can convert between normalized numbers of different
+    depths. When converting to higher bit depths, it will replicate the
+    existing bits, when converting to lower bit depths, it will simply
+    truncate.
+    
+    This function replaces the expand16() function in pixman-utils.c
+
+commit d842669a467be490bb0a40000d0c0fccea0b1b85
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Sep 19 09:08:33 2011 -0400
+
+    A few tweaks to a comment in pixman-combine.c.template
+    
+    Include a link to
+    
+    	http://marc.info/?l=xfree-render&m=99792000027857&w=2
+    
+    where Keith explains how the disjoint/conjoint operators work.
+
+commit 3432e1a3444a55f71e294da7d350957a8e1232c3
+Author: Jon TURNEY <jon.turney@dronecode.org.uk>
+Date:   Mon Sep 19 06:17:58 2011 -0400
+
+    Fix build on cygwin after commit efdf65c0c4fff551fb3cd9104deda9adb6261e22
+    
+    libutils depends on pixman and so needs to preceed it in the link order
+    
+    Found by tinderbox, see [1]
+    
+    [1] http://tinderbox.freedesktop.org/builds/2011-09-15-0005/logs/pixman/#build
+    
+    Signed-off-by: Jon TURNEY <jon.turney at dronecode.org.uk>
+
+commit f9faf4df440366ed36b197dc09b1c2b51af3387b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Sep 12 23:17:39 2011 -0400
+
+    test: Use smaller boxes in region_contains_test()
+    
+    The boxes used region_contains_test() sometimes overflow causing
+    
+        *** BUG ***
+        In pixman_region32_union_rect: Invalid rectangle passed
+        Set a breakpoint on '_pixman_log_error' to debug
+    
+    messages to be printed when pixman is compiled with DEBUG. Fix this by
+    dividing the x, y, w, h coordinates by 4 to prevent overflows.
+
+commit 9623b478f7e872af36ca77af5cc9e85f5ea132cf
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Sun Sep 4 21:33:05 2011 +0200
+
+    build-win32: Add 'check' target
+    
+    On win32 the tests are built but they are not run automatically by the
+    build system.
+    
+    A minimal 'check' target (depending on the tests being built) can
+    simply run them and log to the console their success/failure.
+
+commit 479d0944851fffda7ed860523feb388fec028545
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Sun Sep 4 13:52:53 2011 -0700
+
+    test: Do not include config.h unless HAVE_CONFIG_H is defined
+    
+    The win32 build system does not generate config.h and correctly runs
+    the compiler without defining HAVE_CONFIG_H. Nevertheless some files
+    include config.h without checking for its availability, breaking the
+    build from a clean directory:
+    
+    test\utils.h(2) : fatal error C1083: Cannot open include file:
+    'config.h': No such file or directory
+    ...
+
+commit d46a9f3acef21b50639c64f190a0de7eca21747c
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Sun Sep 4 21:56:20 2011 +0200
+
+    build-win32: Add root Makefile.win32
+    
+    Add Makefile.win32 to the pixman root. This makefile can recursively
+    run the other ones to compile the library or the test suite.
+
+commit a76b78c2daa61900572014070d3e856a460fd554
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Sun Sep 4 18:00:38 2011 +0200
+
+    build-win32: Share targets and variables across win32 makefiles
+    
+    The win32 build system repeatedly defines some basic variables
+    (notably program names and flags) and C sources compilation rules.
+    
+    They can be factored out to a common Makefile, to be included in every
+    other Makefile.win32.
+
+commit efdf65c0c4fff551fb3cd9104deda9adb6261e22
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Sun Sep 4 20:07:42 2011 +0200
+
+    build: Reuse test sources
+    
+    Makefile.am and Makefile.win32 should not duplicate content, as this
+    leads to breaking the build when they are not kept in sync.
+    
+    This can be avoided by listing sources, headers and common build
+    variables/rules in a Makefile.sources file.
+    
+    In order to further simplify the test makefiles, the utility functions
+    are now in a static library, which gets linked to all the tests and
+    benchmarks.
+
+commit a4f95d083b1aa644923d79e7b61df6c2eacca7c2
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Sun Sep 4 09:41:41 2011 -0700
+
+    build: Reuse sources and pixman-combine build rules
+    
+    Makefile.am and Makefile.win32 should not duplicate content, as this
+    leads to breaking the build when they are not kept in sync.
+    
+    This can be avoided by listing sources, headers and common build
+    variables/rules in a Makefile.sources file.
+
+commit 25bd96a3d0e935646d54c938bf065696d3a79e07
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Sun Sep 4 20:07:57 2011 +0200
+
+    test: Fix compilation on win32
+    
+    Adding scaling-helpers-test to the testsuite on win32 makes MSVC
+    complain about int64_t being used as an expression:
+    
+    scaling-helpers-test.c(27) : error C2275: 'int64_t' : illegal use of
+    this type as an expression
+
+commit 9882d832f60419094c0b379b88fa344490ea36eb
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Sep 11 19:44:06 2011 -0400
+
+    Use pkg-config to determine the flags to use with libpng
+    
+    Previously we would unconditionally link with -lpng leading to build
+    failures on systems without libpng.
+
+commit 99a53667da3014a463b8a0e2b6c317efe0ebb220
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Feb 22 05:20:36 2011 -0500
+
+    test: New function to save a pixman image to .png
+    
+    When debugging it is often very useful to be able to save an image as
+    a png file. This commit adds a function "write_png()" that does that.
+    
+    If libpng is not available, then the function becomes a noop.
+
+commit 1e1ae0bf6e9dd2189133539b9c34a0f6826b7393
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Sep 9 23:59:20 2011 -0400
+
+    Post-release version bump to 0.23.5
+
+commit f901e3b58b5d710cf136af89fc7395942bea9dfb
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Sep 9 23:51:11 2011 -0400
+
+    Pre-release version bump to 0.23.4
+
+commit f5da52b6774bdefdfa88a28fdc3904797adb7e26
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Mon Aug 22 15:29:25 2011 +0100
+
+    bits: optimise fetching width==1 repeats
+    
+    Profiling ign.com, 20% of the entire render time was absorbed in this
+    single operation:
+    
+    << /content //COLOR_ALPHA /width 480 /height 800 >> surface context
+    << /width 1 /height 677 /format //ARGB32 /source <|!!!@jGb!m5gD']#$jFHGWtZcK&2i)Up=!TuR9`G<8;ZQp[FQk;emL9ibhbEL&NTh-j63LhHo$E=mSG,0p71`cRJHcget4%<S\X+~> >> image pattern
+      //EXTEND_REPEAT set-extend
+      set-source
+    n 0 0 480 677 rectangle
+    fill+
+    pop
+    
+    which is a simple composition of a single pixel wide image. Sadly this
+    is a workaround for lack of independent repeat-x/y handling in cairo and
+    pixman. Worse still is that the worst-case behaviour of the general repeat
+    path is for width 1 images...
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 7ef44cae6ba6d1c2aae94cdc10851fa8d14821f7
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Fri Aug 19 21:20:08 2011 +0900
+
+    ARM: NEON better instruction scheduling of over_n_8888
+    
+    New head, tail, tail/head blocks are added and instructions
+    are reordered to eliminate pipeline stalls
+    
+    Performance numbers of before/after
+    
+    - cortex a8 -
+    before : L1: 375.39  L2: 391.93  M:114.39 ( 40.99%)  HT: 99.37  VT: 98.20  R: 90.24  RT: 32.87 ( 240Kops/s)
+    after  : L1: 481.90  L2: 483.46  M:114.29 ( 40.69%)  HT:106.91  VT: 93.38  R: 90.74  RT: 29.51 ( 236Kops/s)
+    
+    - cortex a9 -
+    before : L1: 324.50  L2: 332.79  M:155.55 ( 47.51%)  HT:111.93  VT: 93.58  R: 71.92  RT: 28.21 ( 233Kops/s)
+    after  : L1: 355.87  L2: 364.49  M:156.90 ( 47.59%)  HT:111.52  VT: 91.76  R: 72.16  RT: 28.22 ( 234Kops/s)
+
+commit 6aa82b7a729ae7f0a26ae5a7c08ac74ebd5051cd
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Tue Aug 23 15:00:11 2011 +0900
+
+    ARM: NEON better instruction scheduling of over_n_8_8888
+    
+    tail/head block is expanded and reordered to eliminate stalls
+    
+    Performance numbers of before/after
+    
+    - cortex a8 -
+    before : L1: 201.35  L2: 190.48  M:101.94 ( 54.85%)  HT: 78.41  VT: 63.83  R: 58.25  RT: 21.74 ( 191Kops/s)
+    after  : L1: 257.65  L2: 255.49  M:102.04 ( 55.33%)  HT: 79.19  VT: 65.46  R: 59.23  RT: 21.12 ( 189Kops/s)
+    
+    - cortex a9 -
+    before : L1: 157.35  L2: 159.81  M:133.00 ( 60.94%)  HT: 82.44  VT: 63.64  R: 51.66  RT: 19.15 ( 179Kops/s)
+    after  : L1: 216.83  L2: 219.40  M:135.83 ( 61.80%)  HT: 85.60  VT: 64.80  R: 52.23  RT: 19.16 ( 179Kops/s)
+
+commit 4ffa077487cb71ab17d12c37d298ca8a17e5bf35
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Sat Aug 13 16:18:17 2011 +0200
+
+    Workaround bug in llvm-gcc
+    
+    llvm-gcc (shipped in Apple XCode 4.1.1 as the default compiler or in
+    the 2.9 release of LLVM) performs an invalid optimization which
+    unifies the empty_region and the bad_region structures because they
+    have the same content.
+    
+    A bugreport has been filed against Apple Developers Tool for this
+    issue. This commit works around this bug by making one of the two
+    structures volatile, so that it cannot be merged.
+    
+    Fixes region-contains-test.
+
+commit a1ebff0dcbb52cd9eba2bf953b3ba251df6dd787
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Wed Jun 29 14:14:38 2011 +0200
+
+    win32: Build benchmarks
+    
+    Add the makefile rules needed to compile lowlevel-blt-bench on win32
+    and fix the compilation errors.
+
+commit 2644d5a947ad82a82e914acf72bbb411097a4bae
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Mar 11 17:09:34 2011 -0500
+
+    Move bilinear interpolation to pixman-inlines.h
+
+commit 12ad42dd32240f08708eddb157a6b23904ae39a7
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Mar 11 16:09:21 2011 -0500
+
+    Use repeat() function from pixman-inlines.h in pixman-bits-image.c
+    
+    The repeat() functionality was duplicated between pixman-bits-image.c
+    and pixman-inlines.h
+
+commit 2f443466bb9b8901f658e30c606ddacc4fed8535
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Mar 11 16:07:24 2011 -0500
+
+    Rename pixman-fast-path.h to pixman-inlines.h
+    
+    It is not really specific to pixman-fast-path.c.
+
+commit e58b208958900803f74d5e20c855bcb14752d976
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Aug 11 06:30:43 2011 -0400
+
+    In pixman_image_create_bits() allow images larger than 2GB
+    
+    There is no reason for pixman_image_create_bits() to check that the
+    image size fits in int32_t. The correct check is against size_t since
+    that is what the argument to calloc() is.
+    
+    This patch fixes this by adding a new _pixman_multiply_overflows_size()
+    and using it in create_bits(). Also prepend an underscore to the names
+    of other similar functions since they are internal to pixman.
+    
+    V2: Use int, not ssize_t for the arguments in create_bits() since
+    width/height are still limited to 32 bits, as pointed out by Chris
+    Wilson.
+
+commit bdfb5944ffd460631c082e560c89a6c9830b37de
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Aug 8 10:18:07 2011 -0400
+
+    Don't include stdint.h in lowlevel-blt-bench.c
+    
+    Some systems don't have the file, and the types are already defined in
+    pixman.h.
+    
+    https://bugs.freedesktop.org//show_bug.cgi?id=37422
+
+commit e5d85ce6629c84b9dad5a9c76bd9f895157c5a74
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Aug 2 03:03:48 2011 -0400
+
+    Use find_box_for_y() in pixman_region_contains_point() too
+    
+    The same binary search from the previous commit can be used in this
+    function too.
+    
+    V2: Remove check from loop that is not needed anymore, pointed out by
+    Andrea Canciani.
+
+commit 04bd4bdca622f060d7d39caddeaa495d3e6eb0cb
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Aug 1 22:32:09 2011 -0400
+
+    Speed up pixman_region{,32}_contains_rectangle()
+    
+    When someone selects some text in Firefox under a non-composited X
+    server and initiates a drag, a shaped window is created with a complex
+    shape corresponding to the outline of the text. Then, on every mouse
+    movement pixman_region_contains_rectangle() is called many times on
+    that complicated region. And pixman_region_contains_rectangle() is
+    doing a linear scan through the rectangles in the region, although the
+    scan does exit when it finds the first box that can't possibly
+    intersect the passed-in rectangle.
+    
+    This patch changes the loop so that it uses a binary search to skip
+    boxes that don't overlap the current y position.  The performance
+    improvement for the text dragging case is easily noticable.
+    
+    V2: Use the binary search for the "getting up to speed or skippping
+    remainder of band" as well.
+
+commit 795ec5af2fc86fb0ebeca9ce82913d6002267a12
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Aug 2 01:32:15 2011 -0400
+
+    New test of pixman_region_contains_{rectangle,point}
+    
+    This test generates random regions and checks whether random boxes and
+    points are contained within them. The results are combined and a CRC32
+    value is computed and compared to a known-correct one.
+
+commit 842591d9d12a24a9a06308ae03996153c5a99e64
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Aug 3 18:38:20 2011 -0400
+
+    Fix lcg_rand_u32() to return 32 random bits.
+    
+    The lcg_rand() function only returns 15 random bits, so lcg_rand_u32()
+    would always have 0 in bit 31 and bit 15. Fix that by calling
+    lcg_rand() three times, to generate 15, 15, and 2 random bits
+    respectively.
+    
+    V2: Use the 10/11 most significant bits from the 3 lcg results and mix
+    them with the low ones from the adjacent one, as suggested by Andrea
+    Canciani.
+
+commit 12da53f81c4a507a963641796132bbafe0cd6224
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Thu Aug 4 22:21:04 2011 +0900
+
+    ARM NEON: Standard fast path out_reverse_8_8888
+    
+    This fast path is frequently used by cairo to do polygon rendering.
+    Existing NEON code generation framework is used.
+
+commit b395c3c5a28570ceac7cea55cb66a94096559897
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Mon Jul 18 08:15:23 2011 +0200
+
+    radial: Fix typos and trailing whitespace
+    
+    Correct a typo reported by James Cloos and some reported by automatic
+    spellchecking.
+    
+    Remove trailing whitespace.
+
+commit b8d6babc91459a9f854695b56f0265298a3c6427
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sat Jul 23 00:27:34 2011 +0300
+
+    ARM: workaround binutils bug #12931 (code sections alignment)
+    
+    More details in binutils bugtracker:
+      http://sourceware.org/bugzilla/show_bug.cgi?id=12931
+    
+    The problem was encountered in the wild by Mozilla:
+      https://bugzilla.mozilla.org/show_bug.cgi?id=672787
+
+commit 5754e5689d4cac8868d6416dffa4a7d0c2d15423
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Jul 15 23:35:21 2011 +0300
+
+    C fast path for scaled src_x888_8888 with nearest filter
+    
+    The necessity is justified by a message in the pixman mailing list:
+      http://lists.freedesktop.org/archives/pixman/2011-July/001330.html
+    
+    NONE repeat is not supported, but could be added by tweaking
+    the interpretation and making use of 'fully_transparent_src'
+    scanline function argument.
+
+commit c06af104546868ed748c8f771817f5e9ae9a6a2d
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Fri Jul 15 22:02:01 2011 +0200
+
+    radial: Improve documentation and naming
+    
+    Add a comment to explain why the tests guarantee that the code always
+    computes the greatest valid root.
+    
+    Rename "det" as "discr" to make it match the mathematical name
+    "discriminant".
+    
+    Based on a patch by Jeff Muizelaar <jmuizelaar@mozilla.com>.
+
+commit e814b50877bf313619fbf777dcab98d39874f8a4
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jul 4 15:55:52 2011 -0400
+
+    Makefile.am: Add pixman@lists.freedesktop.org to RELEASE_ANNOUNCE_LIST
+
+commit ed6d2f1cecb2f6d0cfe92bf493fde1abb4004856
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jul 4 15:35:17 2011 -0400
+
+    Post-release version bump to 0.23.3
+
+commit 6c4001a0e1cc0350147638ba941d23e129d00e0d
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jul 4 08:13:19 2011 -0400
+
+    Pre-release version bump to 0.23.2
+
+commit eff7c8efabe2da33edbf0bdc06e101352981286b
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Mon Jun 13 19:53:49 2011 +0900
+
+    Bilinear REPEAT_NORMAL source line extension for too short src_width
+    
+    To avoid function call and other calculation overhead, extend source
+    scanline into temporary buffer when source width is too small.
+    Temporary buffer will be repeatedly accessed, so extension cost is
+    very small due to cache effect.
+
+commit 828794d328e7ad1efc860baee8d6e72450b486b9
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Wed Jun 8 17:17:42 2011 +0900
+
+    Enable REPEAT_NORMAL bilinear fast path entries
+
+commit 1161b3f9edb6f5c396438b79f2df3218ea8d194e
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Wed Jun 8 17:14:29 2011 +0900
+
+    ARM: Add REPEAT_NORMAL functions to bilinear BIND macros
+    
+    Now bilinear template support REPEAT_NORMAL, so functions for that
+    is added to PIXMAN_ARM_BIND_SCALED_BILINEAR_ macros. Fast path
+    entries are not enabled yet.
+
+commit ebd2f06d96ee91f9f7f13b906ae328862da7dde8
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Wed Jun 8 17:11:24 2011 +0900
+
+    sse2: Declare bilinear src_8888_8888 REPEAT_NORMAL composite function
+    
+    Now bilinear template support REPEAT_NORMAL, so declare composite
+    functions using it. Function is just declared not used yet.
+
+commit 7e22b2f7824f844076e1bb1fb26a6ec5e5d029cd
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Wed Jun 8 15:58:01 2011 +0900
+
+    REPEAT_NORMAL support for bilinear fast path template
+    
+    The basic idea is to break down normal repeat into a set of
+    non-repeat scanline compositions and stitching them together.
+    
+    Bilinear may interpolate last and first pixels of source scanline.
+    In this case, we can use temporary wrap around buffer.
+
+commit 2f025bad436982a2b1c54d7cb49b426ebf198350
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Wed Jun 8 15:37:31 2011 +0900
+
+    Replace boolean arguments with flags for bilinear fast path template
+    
+    By replacing boolean arguments with flags, the code can be more
+    readable and flags can be extended to do some more things later.
+    
+    Currently following flags are defined.
+    
+    FLAG_NONE
+        - No flags are turned on.
+    
+    FLAG_HAVE_SOLID_MASK
+        - Template will generate solid mask composite functions.
+    
+    FLAG_HAVE_NON_SOLID_MASK
+        - Template will generate bits mask composite functions.
+    
+    FLAG_HAVE_SOLID_MASK and FLAG_NON_SOLID_MASK should be mutually
+    exclusive.
+
+commit 4d4d1760e8118aaea06783079a3b87f83deb4907
+Author: Søren Sandmann <ssp@redhat.com>
+Date:   Sat Jun 25 10:16:25 2011 -0400
+
+    test: Make fuzzer-find-diff.pl executable
+
+commit ece8d13bf77d050662bb9db9716576dabff37554
+Author: Søren Sandmann <sandmann@cs.au.dk>
+Date:   Sun Jun 19 20:29:08 2011 -0400
+
+    ARM: Fix two bugs in neon_composite_over_n_8888_0565_ca().
+    
+    The first bug is that a vmull.u8 instruction would store its result in
+    the q1 register, clobbering the d2 register used later on. The second
+    is that a vraddhn instruction would overwrite d25, corrupting the q12
+    register used later.
+    
+    Fixing the second bug caused a pipeline bubble where the d18 register
+    would be unavailable for a clock cycle. This is fixed by swapping the
+    instruction with its successor.
+
+commit 5715a394c41b2fd259ce7bf07b859d2a4eb2ec09
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Jun 19 19:10:45 2011 -0400
+
+    blitters-test: Make common formats more likely to be tested.
+    
+    Move the eight most common formats to the top of the list of image
+    formats and make create_random_image() much more likely to select one
+    of those eight formats.
+    
+    This should help catch more bugs in SIMD optimized operations.
+
+commit d815a1c54ae6ea6ccd16dedb7f83db0d2526d637
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Fri Jun 10 08:56:10 2011 +0200
+
+    Silence autoconf warnings
+    
+    Autoconf 2.86 reports:
+    
+    warning: AC_LANG_CONFTEST: no AC_LANG_SOURCE call detected in body
+    
+    Every code fragment must be wrapped in [AC_LANG_SOURCE([...])]
+
+commit a89f8cfaf11d0149b73ce40eca6e8a7f262f305a
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Mar 25 15:09:17 2011 -0400
+
+    Replace argumentxs to composite functions with a pointer to a struct
+    
+    This allows more information, such as flags or the composite region,
+    to be passed to the composite functions.
+
+commit 99e7d8fab546257ef729ea6db6e9beede984cec1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Mar 25 14:20:43 2011 -0400
+
+    In pixman-general.c rename image_parameters to {src, mask, dest}_image
+    
+    All the fast paths generally use these names as well.
+
+commit 4d713e3120909d82e37b0405d035e85bbc8a61a9
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Mar 25 14:17:08 2011 -0400
+
+    Replace instances of "dst_*" with "dest_*"
+    
+    The variables in question were dst_x, dst_y, dst_image. The majority
+    of _x and _y uses were already dest_x and dest_y, while the majority
+    of _image uses were dst_image.
+
+commit 6aceb767aa6eea38ec3021263ca1d83aa9e0a3df
+Author: Søren Sandmann <ssp@redhat.com>
+Date:   Sat May 28 12:32:35 2011 -0400
+
+    demos: Comment out some unused variables
+
+commit 4abe76432a59dec2e7978bfa4a01ad032178da0a
+Author: Søren Sandmann <ssp@redhat.com>
+Date:   Sat May 28 11:56:32 2011 -0400
+
+    sse2: Delete some unused variables
+
+commit 5c60e1855b082b1a323319e1d0ba2d6f916fb3d5
+Author: Søren Sandmann <ssp@redhat.com>
+Date:   Sat May 28 11:51:31 2011 -0400
+
+    mmx: Delete some unused variables
+
+commit 827e61333865dc94851eb79c8e640b103e3fd629
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Mon May 23 12:08:54 2011 +0200
+
+    Include noop in win32 builds
+
+commit 65b63728cc8ada802c9798e11e6fa92d21f2fbf8
+Author: Nis Martensen <nis.martensen@web.de>
+Date:   Mon May 2 21:43:58 2011 +0200
+
+    Fix a few typos in pixman-combine.c.template
+    
+    Some equations have too much multiplication with alpha.
+
+commit dd449a2a8ee1381fdc5297257917bc0786bf0ac4
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Apr 23 10:26:49 2011 -0400
+
+    Move NOP src iterator into noop implementation.
+    
+    The iterator for sources where neither RGB nor ALPHA is needed, really
+    belongs in the noop implementation.
+
+commit ba480882aa465d8414dc8a4472d89d94911af60a
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Apr 23 10:24:41 2011 -0400
+
+    Move NULL iterator into pixman-noop.c
+    
+    Iterating a NULL image returns NULL for all scanlines. We may as well
+    do this in the noop iterator.
+
+commit a4e984de19f7f2ca30b1d736cdd2dded91a75907
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Feb 8 23:42:36 2011 -0500
+
+    Add a noop src iterator
+    
+    When the image is a8r8g8b8 and not transformed, and the fetched
+    rectangle is within the image bounds, scanlines can be fetched by
+    simply returning a pointer instead of copying the bits.
+
+commit d4fff4a95921f734f26ef51953f4dddfcf423eab
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jan 24 12:16:03 2011 -0500
+
+    Move noop dest fetching to noop implementation
+    
+    It will at some point become useful to have CPU specific destination
+    iterators. However, a problem with that, is that such iterators should
+    not be used if we can composite directly in the destination image.
+    
+    By moving the noop destination iterator to the noop implementation, we
+    can ensure that it will be chosen before any CPU specific iterator.
+
+commit 13ce88f80095d0fa18330a23e03819368987e277
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jan 24 11:35:27 2011 -0500
+
+    Add a noop composite function for the DST operator
+    
+    The DST operator doesn't actually do anything, so add a noop "fast
+    path" for it, instead of checking in pixman_image_composite32().
+    
+    The performance tradeoff here is that we get rid of a test for DST in
+    the common case where the operator is not DST, in return for an extra
+    walk over the clip rectangles in the uncommon case where the operator
+    actually is DST.
+
+commit 8c76235f41b2ac70ce6e652dcd1cab975e1283a4
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jan 24 11:31:49 2011 -0500
+
+    Add a "noop" implementation.
+    
+    This new implementation is ahead of all other implementations in the
+    fallback chain and is supposed to contain operations that are "noops",
+    ie., they don't require any work. For example, it might contain a
+    "fast path" for the DST operator that doesn't actually do anything or
+    an iterator for a8r8g8b8 that just returns a pointer into the image.
+
+commit 0f6a4d45886d64b244d57403609f0377b58cc7fb
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Thu May 5 10:17:08 2011 +0200
+
+    test: Fix compilation on win32
+    
+    MSVC complains about uint32_t being used as an expression:
+    
+    composite.c(902) : error C2275: 'uint32_t' : illegal use of this type
+    as an expression
+
+commit 838c2b593ec5ebbbf82de5b7790f5b68fd86bbc1
+Author: Dave Yeo <dave.r.yeo@gmail.com>
+Date:   Mon May 9 12:38:44 2011 +0200
+
+    Check for working mmap()
+    
+    OS/2 doesn't have a working mmap().
+
+commit c53625a36e28883684c3a6e640aa3656ecca7615
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon May 2 05:11:49 2011 -0400
+
+    Post-release version bump to 0.23.1
+
+commit 918a544406df8f428056daff8a634ad6eadf67c9
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon May 2 05:06:33 2011 -0400
+
+    Pre-release version bump to 0.22.0
+
+commit 71b2e2745be31e7d18a11f8c2cba8f6031ace17c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Apr 19 00:22:29 2011 -0400
+
+    Post-release version bump to 0.21.9
+
+commit 89868e93bd8d66f0fac0f0b42cf7718756992e4e
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Apr 19 00:00:37 2011 -0400
+
+    Pre-release version bump to 0.21.8
+
+commit 33f1652b953467f3910605b3be723e21b3ebe078
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Wed Apr 13 11:57:35 2011 +0900
+
+    ARM: Enable bilinear fast paths using scanline functions in pixman-arm-neon-asm-bilinear.S
+    
+    Enable fast paths which is supported by scanline functions in
+    pixman-arm-neon-asm-bilinear.S
+
+commit e8185f1cb43417d9f7b1d2856bb899f1b84fde81
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Wed Apr 13 11:48:40 2011 +0900
+
+    ARM: NEON scanline functions for bilinear scaling
+    
+    General fetch->combine->store based bilinear scanline functions.
+    Need further optimizations and eventually will be replaced with optimal
+    functions one by one.
+    General functions should be located in pixman-arm-neon-asm-bilinear.S and
+    optimal functions in pixman-arm-neon-asm.S
+    
+    Following general bilinear scanline functions are implemented
+        over_8888_8888
+        add_8888_8888
+        src_8888_8_8888
+        src_8888_8_0565
+        src_0565_8_x888
+        src_0565_8_0565
+        over_8888_8_8888
+        add_8888_8_8888
+
+commit 00939d35628e733fab63606cfb1d7fcb667860d3
+Author: Taekyun Kim <tkq.kim@samsung.com>
+Date:   Wed Apr 13 11:43:44 2011 +0900
+
+    ARM: Common macro for scaled bilinear scanline function with A8 mask
+    
+    Defining PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST macro for declaration of
+    scaled bilinear scanline functions in common header.
+
+commit b455496890f7f941d561c284aca14783300bedd6
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Mar 11 07:52:57 2011 -0500
+
+    Offset rendering in pixman_composite_trapezoids() by (x_dst, y_dst)
+    
+    Previously, this function would do coordinate calculations in such a
+    way that (x_dst, y_dst) would only affect the alignment of the source
+    image, but not of the traps, which would always be considered to be in
+    absolute destination coordinates. This is unlike the
+    pixman_image_composite() function which also registers the mask to the
+    destination.
+    
+    This patch makes it so that traps are also offset by (x_dst, y_dst).
+    
+    Also add a comment explaining how this function is supposed to
+    operate, and update tri-test.c and composite-trap-test.c to deal with
+    the new semantics.
+
+commit e75e6a4ef5c5a8ac8b0e8464f08f83fd2b6e86ed
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Apr 2 23:24:48 2011 -0400
+
+    ARM: Add 'neon_composite_over_n_8888_0565_ca' fast path
+    
+    This improves the performance of the firefox-talos-gfx benchmark with
+    the image16 backend. Benchmark on an 800 MHz ARM Cortex A8:
+    
+    Before:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]  image16            firefox-talos-gfx  121.773  122.218   0.15%    6/6
+    
+    After:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]  image16            firefox-talos-gfx   85.247   85.563   0.22%    6/6
+    
+    V2: Slightly better instruction scheduling based on comments from Taekyun Kim.
+    V3: Eliminate all stalls from the inner loop. Also based on comments from Taekyun Kim.
+
+commit 1670b952143284f480c39ff087b5694a64eb7db3
+Author: Gilles Espinasse <g.esp@free.fr>
+Date:   Tue Apr 12 22:44:56 2011 +0200
+
+    Fix OpenMP not supported case
+    
+    PIXMAN_LINK_WITH_ENV did not fail unless -Wall -Werror is used.
+    So even when the compiler did not support OpenMP, USE_OPENMP was defined.
+    Fix that by running the second OpenMP test only when first AC_OPENMP find supported
+    
+    configure tested in the cases :
+    gcc without libgomp support, no openmp option, --enable-openmp and --disable-openmp
+    gcc with libgomp support, no openmp option, --enable-openmp and --disable-openmp
+    
+    Not tested with autoconf version not knowing openmp (<2.62)
+    
+    Warn when --enable-openmp is requested but no support is found
+    
+    Signed-off-by: Gilles Espinasse <g.esp@free.fr>
+
+commit b9e8f7fb7494e4ee4be56d1555632233a494b28e
+Author: Gilles Espinasse <g.esp@free.fr>
+Date:   Tue Apr 12 22:44:25 2011 +0200
+
+    Fix missing AC_MSG_RESULT value from Werror test
+    
+    Use the correct variable name
+    
+    Signed-off-by: Gilles Espinasse <g.esp@free.fr>
+
+commit caae4e82ffdeebfb9aa98a6c49dd563e065c0959
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Mar 21 20:25:27 2011 +0200
+
+    ARM: pipelined NEON implementation of bilinear scaled 'src_8888_0565'
+    
+    Benchmark on ARM Cortex-A8 r1p3 @600MHz, 32-bit LPDDR @166MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=10020565, speed=33.59 MPix/s
+      after:  op=1, src=20028888, dst=10020565, speed=46.25 MPix/s
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=10020565, speed=63.86 MPix/s
+      after:  op=1, src=20028888, dst=10020565, speed=84.22 MPix/s
+
+commit d080d59b802c351daed84b92bd4eb20c775b81c7
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 16 17:24:49 2011 +0200
+
+    ARM: pipelined NEON implementation of bilinear scaled 'src_8888_8888'
+    
+    Performance of the inner loop when working with the data in L1 cache:
+        ARM Cortex-A8: 41 cycles per 4 pixels (no stalls and partial dual issue)
+        ARM Cortex-A9: 48 cycles per 4 pixels (no stalls)
+    
+    It might be still possible to improve performance even more on ARM Cortex-A8
+    with a better use of dual issue.
+    
+    Benchmark on ARM Cortex-A8 r1p3 @600MHz, 32-bit LPDDR @166MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=20028888, speed=40.38 MPix/s
+      after:  op=1, src=20028888, dst=20028888, speed=48.47 MPix/s
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=20028888, speed=79.68 MPix/s
+      after:  op=1, src=20028888, dst=20028888, speed=93.11 MPix/s
+
+commit b496a8b279baebb8b9ab4fbcb2101583be08fe3b
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Mar 17 19:42:01 2011 +0200
+
+    ARM: support different levels of loop unrolling in bilinear scaler
+    
+    Now an extra 'flag' parameter is supported in bilinear scaline scaling
+    function generation macro. It can be used to enable 4 or 8 pixels per
+    loop iteration unrolling and provide save/restore code for d8-d15
+    registers.
+
+commit 34ca9cf03fa897cd377cdb19acc22e876b2f4b0e
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Mar 21 18:41:53 2011 +0200
+
+    ARM: use less ARM instructions in NEON bilinear scaling code
+    
+    This reduces code size and also puts less pressure on the
+    instruction decoder.
+
+commit 0f7be9f72ef6bfe2555b7f2cc29297c4f4762740
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 16 16:33:41 2011 +0200
+
+    ARM: support for software pipelining in bilinear macros
+    
+    Now it's possible to override the main loop of bilinear scaling code
+    with optimized pipelined implementation.
+
+commit 9638af95832563040d6bd861cf4c20ab632058df
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Mar 10 16:12:23 2011 +0200
+
+    ARM: use aligned memory writes in NEON bilinear scaling code
+
+commit 8bba3a0e1e54f03ea78fb44314f3bfa57ec8da31
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Mar 10 15:34:10 2011 +0200
+
+    ARM: tweaked horizontal weights update in NEON bilinear scaling code
+    
+    Moving horizontal interpolation weights update instructions from the
+    beginning of loop to its end allows to hide some pipeline stalls and
+    improve performance.
+
+commit a2153222677327be43251012f462d19a7e98ce14
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Apr 3 20:32:30 2011 -0400
+
+    ARM: Tiny improvement in over_n_8888_8888_ca_process_pixblock_head
+    
+    Instead of two
+    
+    	mvn d24, d24
+    	mvn d25, d25
+    
+    use just one
+    
+    	mvn q12, q12
+    
+    Also move another vmvn instruction into the created pipeline bubble,
+    as pointed out by Siarhei.
+
+commit 44f99735d9c6a897078db12172d9d2d07b204f37
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Apr 2 14:12:12 2011 -0400
+
+    Makefile.am: Put development releases in "snapshots" directory
+    
+    Up until now, all pixman release, both snapshots and releases were
+    uploaded to the "releases" directory on www.cairographics.org, but
+    it's better to development snapshots in the "snapshots" directory.
+    
+    This patch changes Makefile.am to do that.
+
+commit ad3cbfb073fc325e1b3152898ca71b8255675957
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Mar 22 13:42:05 2011 -0400
+
+    test: Fix infinite loop in composite
+    
+    When run in PIXMAN_RANDOMIZE_TESTS mode, this test would go into an
+    infinite loop because the loop started at 'seed' but the stop
+    condition was still N_TESTS.
+
+commit b514e63cfc58af21f7097db5a1b04292a758782a
+Author: Alexandros Frantzis <alexandros.frantzis@linaro.org>
+Date:   Fri Mar 18 14:37:27 2011 +0200
+
+    Add support for the r8g8b8a8 and r8g8b8x8 formats to the tests.
+
+commit f05a90e5f8d1d0af60e2c684cbe9f1327c33135a
+Author: Alexandros Frantzis <alexandros.frantzis@linaro.org>
+Date:   Fri Mar 18 14:36:15 2011 +0200
+
+    Add simple support for the r8g8b8a8 and r8g8b8x8 formats.
+    
+    This format is particularly useful on big-endian architectures, where RGBA in
+    memory/file order corresponds to r8g8b8a8 as an uint32_t. This is important
+    because RGBA is in some cases the only available choice (for example as a pixel
+    format in OpenGL ES 2.0).
+
+commit 7eb0abb5e819046537b9f809c7ec332c6679c557
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Mar 14 14:56:22 2011 -0400
+
+    test: Randomize some tests if PIXMAN_RANDOMIZE_TESTS is set
+    
+    This patch makes so that composite and stress-test will start from a
+    random seed if the PIXMAN_RANDOMIZE_TESTS environment variable is
+    set. Running the test suite in this mode is useful to get more test
+    coverage.
+    
+    Also, in stress-test.c make it so that setting the initial seed causes
+    threads to be turned off. This makes it much easier to see when
+    something fails.
+
+commit 6b27768d81c254a4f1d05473157328d5a5d99b9c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Mar 12 19:42:58 2011 -0500
+
+    Simplify the prototype for iterator initializers.
+    
+    All of the information previously passed to the iterator initializers
+    is now available in the iterator itself, so there is no need to pass
+    it as arguments anymore.
+
+commit 74d0f44b6d6d613d24541b849835da0464cc6fd0
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Mar 12 19:12:35 2011 -0500
+
+    Fill out parts of iters in _pixman_implementation_{src,dest}_iter_init()
+    
+    This makes _pixman_implementation_{src,dest}_iter_init() responsible
+    for filling parts of the information in the iterators. Specifically,
+    the information passed as arguments is stored in the iterator.
+    
+    Also add a height field to pixman_iter_t().
+
+commit be4eaa0e4f79af38b7b89c5b09ca88d3a88d9396
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Mar 12 19:06:02 2011 -0500
+
+    In delegate_{src,dest}_iter_init() call delegate directly.
+    
+    There is no reason to go through
+    _pixman_implementation_{src,dest}_iter_init(), especially since
+    _pixman_implementation_src_iter_init() is doing various other checks
+    that only need to be done once.
+    
+    Also call delegate->src_iter_init() directly in pixman-sse2.c
+
+commit 70a923882ca24664344ba91a649e7aa12c3063f7
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 9 13:55:48 2011 +0200
+
+    ARM: a bit faster NEON bilinear scaling for r5g6b5 source images
+    
+    Instructions scheduling improved in the code responsible for fetching r5g6b5
+    pixels and converting them to the intermediate x8r8g8b8 color format used in
+    the interpolation part of code. Still a lot of NEON stalls are remaining,
+    which can be resolved later by the use of pipelining.
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=10020565, dst=10020565, speed=32.29 MPix/s
+              op=1, src=10020565, dst=20020888, speed=36.82 MPix/s
+      after:  op=1, src=10020565, dst=10020565, speed=41.35 MPix/s
+              op=1, src=10020565, dst=20020888, speed=49.16 MPix/s
+
+commit fe99673719091d4a880d031add1369332a75731b
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 9 13:27:41 2011 +0200
+
+    ARM: NEON optimization for bilinear scaled 'src_0565_0565'
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=10020565, dst=10020565, speed=3.30 MPix/s
+      after:  op=1, src=10020565, dst=10020565, speed=32.29 MPix/s
+
+commit 29003c3befe2159396d181ef9ac1caaadcabf382
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 9 13:21:53 2011 +0200
+
+    ARM: NEON optimization for bilinear scaled 'src_0565_x888'
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=10020565, dst=20020888, speed=3.39 MPix/s
+      after:  op=1, src=10020565, dst=20020888, speed=36.82 MPix/s
+
+commit 2ee27e7d79637da9173ee1bf3423e5a81534ccb4
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 9 11:53:04 2011 +0200
+
+    ARM: NEON optimization for bilinear scaled 'src_8888_0565'
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=10020565, speed=6.56 MPix/s
+      after:  op=1, src=20028888, dst=10020565, speed=61.65 MPix/s
+
+commit 11a0c5badbc59ce967707ef836313cc98f8aec4e
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 9 11:46:48 2011 +0200
+
+    ARM: use common macro template for bilinear scaled 'src_8888_8888'
+    
+    This is a cleanup for old and now duplicated code. The performance improvement
+    is mostly coming from the enabled use of software prefetch, but instructions
+    scheduling is also slightly better.
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=20028888, speed=53.24 MPix/s
+      after:  op=1, src=20028888, dst=20028888, speed=74.36 MPix/s
+
+commit 34098dba6763afd3636a14f9c2a079ab08f23b2d
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 9 11:34:15 2011 +0200
+
+    ARM: NEON: common macro template for bilinear scanline scalers
+    
+    This allows to generate bilinear scanline scaling functions targeting
+    various source and destination color formats. Right now a8r8g8b8/x8r8g8b8
+    and r5g6b5 color formats are supported. More formats can be added if needed.
+
+commit 66f4ee1b3bccf4516433d61dbf2035551a712fa2
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Mar 9 10:59:46 2011 +0200
+
+    ARM: new bilinear fast path template macro in 'pixman-arm-common.h'
+    
+    It can be reused in different ARM NEON bilinear scaling fast path functions.
+
+commit 5921c17639fe5fdc595c850e3347281c1c8746ba
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sun Mar 6 22:16:32 2011 +0200
+
+    ARM: assembly optimized nearest scaled 'src_8888_8888'
+    
+    Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=20028888, speed=44.36 MPix/s
+      after:  op=1, src=20028888, dst=20028888, speed=39.79 MPix/s
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=20028888, speed=102.36 MPix/s
+      after:  op=1, src=20028888, dst=20028888, speed=163.12 MPix/s
+
+commit f3e17872f5522e25da8e32de83e62bee8cc198d7
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Mar 7 03:10:43 2011 +0200
+
+    ARM: common macro for nearest scaling fast paths
+    
+    The code of nearest scaled 'src_0565_0565' function was generalized
+    and moved to a common macro, so that it can be reused for other
+    fast paths.
+
+commit bb3d1b67fd0f42ae00af811c624ea1c44541034d
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sun Mar 6 16:17:12 2011 +0200
+
+    ARM: use prefetch in nearest scaled 'src_0565_0565'
+    
+    Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=10020565, dst=10020565, speed=75.02 MPix/s
+      after:  op=1, src=10020565, dst=10020565, speed=73.63 MPix/s
+    
+    Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=10020565, dst=10020565, speed=176.12 MPix/s
+      after:  op=1, src=10020565, dst=10020565, speed=267.50 MPix/s
+
+commit 84e361c8e357e26f299213fbeefe64c73447b116
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Mar 4 15:51:18 2011 -0500
+
+    test: Do endian swapping of the source and destination images.
+    
+    Otherwise the test fails on big endian. Fix for bug 34767, reported by
+    Siarhei Siamashka.
+
+commit 84f3c5a71a2de1a96dcf0c7f9ab0a8ee1b1b158f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Mar 7 13:45:54 2011 -0500
+
+    test: In image_endian_swap() use pixman_image_get_format() to get the bpp.
+    
+    There is no reason to pass in the bpp as an argument; it can be gotten
+    directly from the image.
+
+commit 17feaa9c50bb8521b0366345efe181bd99754957
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Feb 22 18:45:03 2011 +0200
+
+    ARM: NEON optimization for bilinear scaled 'src_8888_8888'
+    
+    Initial NEON optimization for bilinear scaling. Can be probably
+    improved more.
+    
+    Benchmark on ARM Cortex-A8:
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=20028888, speed=6.70 MPix/s
+      after:  op=1, src=20028888, dst=20028888, speed=44.27 MPix/s
+
+commit 350029396d911941591149cc82b5e68a78ad6747
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Feb 21 20:18:02 2011 +0200
+
+    SSE2 optimization for bilinear scaled 'src_8888_8888'
+    
+    A primitive naive implementation of bilinear scaling using SSE2 intrinsics,
+    which only handles one pixel at a time. It is approximately 2x faster than
+    pixman general compositing path. Single pass processing without intermediate
+    temporary buffer contributes to ~15% and loop unrolling contributes to ~20%
+    of this speedup.
+    
+    Benchmark on Intel Core i7 (x86-64):
+     Using cairo-perf-trace:
+      before: image        firefox-planet-gnome   12.566   12.610   0.23%    6/6
+      after:  image        firefox-planet-gnome   10.961   11.013   0.19%    5/6
+    
+     Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+      before: op=1, src=20028888, dst=20028888, speed=70.48 MPix/s
+      after:  op=1, src=20028888, dst=20028888, speed=165.38 MPix/s
+
+commit 0df43b8ae5031dd83775d00b57b6bed809db0e89
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Feb 21 02:07:09 2011 +0200
+
+    test: check correctness of 'bilinear_pad_repeat_get_scanline_bounds'
+    
+    Individual correctness check for the new bilinear scaling related
+    supplementary function. This test program uses a bit wider range
+    of input arguments, not covered by other tests.
+
+commit d506bf68fd0e9a1c5dd484daee70631699918387
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Feb 21 01:29:02 2011 +0200
+
+    Main loop template for fast single pass bilinear scaling
+    
+    Can be used for implementing SIMD optimized fast path
+    functions which work with bilinear scaled source images.
+    
+    Similar to the template for nearest scaling main loop, the
+    following types of mask are supported:
+    1. no mask
+    2. non-scaled a8 mask with SAMPLES_COVER_CLIP flag
+    3. solid mask
+    
+    PAD repeat is fully supported. NONE repeat is partially
+    supported (right now only works if source image has alpha
+    channel or when alpha channel of the source image does not
+    have any effect on the compositing operation).
+
+commit 9ebde285fa990bfa1524f166fbfb1368c346b14a
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Thu Feb 24 12:53:39 2011 +0100
+
+    test: Silence MSVC warnings
+    
+    MSVC does not notice non-returning functions (abort() / assert(0))
+    and warns about paths which end with them in non-void functions:
+    
+    c:\cygwin\home\ranma42\code\fdo\pixman\test\fetch-test.c(114) :
+    warning C4715: 'reader' : not all control paths return a value
+    c:\cygwin\home\ranma42\code\fdo\pixman\test\stress-test.c(133) :
+    warning C4715: 'real_reader' : not all control paths return a value
+    c:\cygwin\home\ranma42\code\fdo\pixman\test\composite.c(431) :
+    warning C4715: 'calc_op' : not all control paths return a value
+    
+    These warnings can be silenced by adding a return after the
+    termination call.
+
+commit 8868778ea1fdc8e70da76b3b00ea78106c5840d8
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Tue Feb 22 22:43:48 2011 +0100
+
+    Do not include unused headers
+    
+    pixman-combine32.h is included without being used both in
+    pixman-image.c and in pixman-general.c.
+
+commit 72f5e5f608506c18c484bc5bc3e58bd83aeb7691
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Tue Feb 22 22:04:49 2011 +0100
+
+    test: Add Makefile for Win32
+
+commit 11305b4ecdd36a17592c5c75de9157874853ab20
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Tue Feb 22 21:46:37 2011 +0100
+
+    test: Fix tests for compilation on Windows
+    
+    The Microsoft C compiler cannot handle subobject initialization and
+    Win32 does not provide snprintf.
+    
+    Work around these limitations by using normal struct initialization
+    and using sprintf (a manual check shows that the buffer size is
+    sufficient).
+
+commit 20ed723a5a42fb8636bc9a5f32974dec1b66a785
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Thu Feb 24 10:44:04 2011 +0100
+
+    Fix compilation on Win32
+    
+    Makefile.win32 contained a typo and was missing the dependency from
+    the built sources.
+
+commit 48e951000c7ff14f40c671f3efb6abb18162c840
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Feb 22 16:13:32 2011 -0500
+
+    Post-release version bump to 0.21.7
+
+commit 8b3332166094db657e96c365a524b2cd7513359b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Feb 22 15:43:41 2011 -0500
+
+    Pre-release version bump to 0.21.6
+
+commit 2cb67d2a0b6bed4490a41c34a185cc54a445559a
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Feb 22 15:40:34 2011 -0500
+
+    Minor fix to the RELEASING file
+
+commit 3cdf74257bdb9d054637252f4fa7503abf580db9
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Feb 22 15:28:17 2011 -0500
+
+    Delete pixman-x64-mmx-emulation.h from pixman/Makefile.am
+
+commit 65919ad17fd7b4c6f963690fc78155c7cfe1a51a
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Feb 22 19:28:08 2011 +0200
+
+    Ensure that tests run as the last step of a build for 'make check'
+    
+    Previously 'make check' would compile and run tests first, and only
+    then proceed to compiling demos. Which is not very convenient
+    because of the need to scroll back console output to see the
+    tests verdict. Swapping order of SUBDIRS variable entries in
+    Makefile.am resolves this.
+
+commit 34a7ac047411d6c1f1708cb8dd4469cd1aa40b31
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Feb 18 07:38:49 2011 -0500
+
+    sse2: Minor coding style cleanups.
+    
+    Also make pixman_fill_sse2() static.
+
+commit 10f69e5ec844e2630f8e5b21fd5392719d34d060
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Feb 18 07:40:02 2011 -0500
+
+    sse2: Remove pixman-x64-mmx-emulation.h
+    
+    Also stop including mmintrin.h
+
+commit 984be4def2e62a05e9a91e77ac8c703fed30718b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Feb 18 07:38:03 2011 -0500
+
+    sse2: Delete obsolete or redundant comments
+
+commit 33d98902261ad73c1b6b1366968e49a1cb2bf68b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Feb 18 07:07:45 2011 -0500
+
+    sse2: Remove all the core_combine_* functions
+    
+    Now that _mm_empty() is not used anymore, they are no longer different
+    from the sse2_combine_* functions, so they can be consolidated.
+
+commit 87cd6b8056bbacb835eeb991f03b9135dcd58334
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Feb 18 05:15:50 2011 -0500
+
+    sse2: Don't compile pixman-sse2.c with -mmmx anymore
+    
+    It's not necessary now that the file doesn't use MMX instructions.
+
+commit e7fe5e35e9640c6d6bb08c24b96ce882434a7f9f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Feb 18 05:07:08 2011 -0500
+
+    sse2: Delete unused MMX functions and constants and all _mm_empty()s
+    
+    These are not needed because the SSE2 implementation doesn't use MMX
+    anymore.
+
+commit f88ae14c15040345a12ff0488c7b23d25639e49b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Feb 18 03:56:20 2011 -0500
+
+    sse2: Convert all uses of MMX registers to use SSE2 registers instead.
+    
+    By avoiding use of MMX registers we won't need to call emms all over
+    the place, which avoids various miscompilation issues.
+
+commit 7fb75bb3e6c3e004374d186ea2d6f02d1caccba4
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Feb 18 03:57:55 2011 -0500
+
+    Coding style:  core_combine_in_u_pixelsse2 -> core_combine_in_u_pixel_sse2
+
+commit 510c0d088a975efe75cc2b796547f3aaed1c18e6
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Feb 15 09:11:44 2011 -0500
+
+    In pixman_image_set_transform() allow NULL for transform
+    
+    Previously, this would crash unless the existing transform were also
+    NULL.
+
+commit 7feb710e60cdab5c448a396537a8de16e72091e2
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Feb 15 04:55:02 2011 -0500
+
+    Avoid marking images dirty when properties are reset
+    
+    When an image property is set to the same value that it already is,
+    there is no reason to mark the image dirty and incur a recomputation
+    of the flags.
+
+commit 3598ec26ecf761488e2ac1536553eaf3bb361e72
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Feb 11 08:57:42 2011 -0500
+
+    Add new public function pixman_add_triangles()
+    
+    This allows some more code to be deleted from the X server. The
+    implementation consists of converting to trapezoids, and is shared
+    with pixman_composite_triangles().
+
+commit 964c7e7cd20a6ed414fdf92b71fdc83db91d7578
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Jan 14 06:19:08 2011 -0500
+
+    Optimize adding opaque trapezoids onto a8 destination.
+    
+    When the source is opaque and the destination is alpha only, we can
+    avoid the temporary mask and just add the trapezoids directly.
+
+commit 0bc03482f10d7bfe64a4199e9cd484ff1129d709
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Jan 12 03:02:59 2011 -0500
+
+    Add a test program, tri-test
+    
+    This program tests whether the new triangle support works.
+
+commit 79e69aac8cfe7d45707098735376a6e6c2dcfa06
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jan 11 10:15:21 2011 -0500
+
+    Add support for triangles to pixman.
+    
+    The Render X extension can draw triangles as well as trapezoids, but
+    the implementation has always converted them to trapezoids. This patch
+    moves the X server's triangle conversion code into pixman, where we
+    can reuse the pixman_composite_trapezoid() code.
+
+commit 4e6dd4928d817338ae406a620f5658bbddb66df1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Feb 10 10:37:08 2011 -0500
+
+    Add a test program for pixman_composite_trapezoids().
+    
+    A CRC32 based test program to check that pixman_composite_trapezoids()
+    actually works.
+
+commit 803272e38c5b9b9abe347390c2ecd2ac4be7b9be
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jan 11 09:23:43 2011 -0500
+
+    Add pixman_composite_trapezoids().
+    
+    This function is an implementation of the X server request
+    Trapezoids. That request is what the X backend of cairo is using all
+    the time; by moving it into pixman we can hopefully make it faster.
+
+commit 1feaf6bea707a97db44643c5bfa6218afea9b6be
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jan 18 19:40:53 2011 -0500
+
+    test/Makefile.am: Move all the TEST_LDADD into a new global LDADD.
+    
+    This gets rid of a bunch of replicated *_LDADD clauses
+
+commit 1237fd9bc84a27f232ceddf1c7b72645fcc99aec
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jan 18 19:20:18 2011 -0500
+
+    Add @TESTPROGS_EXTRA_LDFLAGS@ to AM_LDFLAGS
+    
+    Instead of explicitly adding it to each test program.
+
+commit 7dfe845786920d50c6f93165ef6f539e6f4d1b53
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jan 18 19:16:39 2011 -0500
+
+    Move all the GTK+ based test programs to a new subdir, "demos"
+    
+    This separates the test suite from the random gtk+ using test
+    programs. "demos" is somewhat misleading because the programs there
+    are not particularly exciting (with the possible exception of
+    composite-test which shows off all the compositing operators).
+
+commit 8e4100260bbdb827abc45a2a5e352a53246fe614
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Feb 4 00:47:36 2011 +0200
+
+    SSE2 optimization for nearest scaled over_8888_n_8888
+    
+    This operation shows up a little bit in some of the html5 based
+    games from http://www.kesiev.com/akihabara/
+    
+    === Cairo trace of the game intro animation for 'Legend of Sadness' ===
+    
+    before:
+    [  0]    image    firefox-legend-of-sadness   46.286   46.298   0.01%    5/6
+    
+    after:
+    [  0]    image    firefox-legend-of-sadness   45.088   45.102   0.04%    6/6
+    
+    === Microbenchmark (scaling ~2000x~2000 -> ~2000x~2000) ===
+    
+    before:
+        translucent: op=3, src=8888, mask=s dst=8888, speed=131.30 MPix/s
+        transparent: op=3, src=8888, mask=s dst=8888, speed=132.38 MPix/s
+        opaque:      op=3, src=8888, mask=s dst=8888, speed=167.90 MPix/s
+    after:
+        translucent: op=3, src=8888, mask=s dst=8888, speed=301.93 MPix/s
+        transparent: op=3, src=8888, mask=s dst=8888, speed=770.70 MPix/s
+        opaque:      op=3, src=8888, mask=s dst=8888, speed=301.80 MPix/s
+
+commit 39b86b032d1b81958d4dfc880ba7f129aecb1de0
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 3 15:22:28 2010 +0200
+
+    ARM: NEON optimization for nearest scaled over_0565_8_0565
+    
+    In some cases may be used for html5 video when hardware acceleration
+    is not available.
+
+commit 9a90c1c90f1d128de68b3ed855a2ea1c3bed20c3
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 3 15:16:28 2010 +0200
+
+    ARM: NEON optimization for nearest scaled over_8888_8_0565
+    
+    In some cases may be used for html5 video when hardware acceleration
+    is not available.
+
+commit cd1062ded44978fa97aa3d3295af016c80c6e2eb
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 3 15:15:15 2010 +0200
+
+    ARM: new macro template for using scaled fast paths with a8 mask
+
+commit b099957887ef69b795d542f8f2980b5a94fb823f
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Feb 2 18:14:56 2011 +0200
+
+    Better support for NONE repeat in nearest scaling main loop template
+    
+    Scaling function now gets an extra boolean argument, which is set
+    to TRUE when we are fetching padding pixels for NONE repeat. This
+    allows to make a decision whether to interpret alpha as 0xFF or 0x00
+    for such pixels when working with formats which don't have alpha
+    channel (for example x8r8g8b8 and r5g6b5).
+
+commit 14f82083a12be07f340fdea491759b3bb77b4e66
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Oct 22 17:54:41 2010 +0300
+
+    Support for a8 and solid mask in nearest scaling main loop template
+    
+    In addition to the most common case of not having any mask at all, two
+    variants of scaling with mask show up in cairo traces:
+    1. non-scaled a8 mask with SAMPLES_COVER_CLIP flag
+    2. solid mask
+    
+    This patch extends the nearest scaling main loop template to also
+    support these cases.
+
+commit e83cee5aac26522f31a7e81ea3f972ae2248f6b0
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Oct 22 16:29:01 2010 +0300
+
+    test: Extend scaling-test to support a8/solid mask and ADD operation
+    
+    Image width also has been increased because SIMD optimizations typically
+    do more unrolling in the inner loops, and this needs to be tested.
+
+commit 97447f440fec9889bba6cc21c6d9366183c47e7e
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Jan 17 02:29:43 2011 +0200
+
+    Use const modifiers for source buffers in nearest scaling fast paths
+
+commit 8d359b00c5bb9960c3c584a7f77a943c0ce61368
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Jul 30 18:37:51 2010 +0300
+
+    C fast paths for a simple 90/270 degrees rotation
+    
+    Depending on CPU architecture, performance is in the range of 1.5 to 4 times
+    slower than simple nonrotated copy (which would be an ideal case, perfectly
+    utilizing memory bandwidth), but still is more than 7 times faster if
+    compared to general path.
+    
+    This implementation sets a performance baseline for rotation. The use
+    of SIMD instructions may further improve memory bandwidth utilization.
+
+commit e0c7948c970b816f323a6402241ca70fa855c12c
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Jul 29 17:58:13 2010 +0300
+
+    New flags for 90/180/270 rotation
+    
+    These flags are set when the transform is a simple nonscaled 90/180/270
+    degrees rotation.
+
+commit 3b68c295fd45297a631569b35608364dbcb6d452
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Oct 26 15:40:01 2010 +0300
+
+    test: affine-test updated to stress 90/180/270 degrees rotation more
+
+commit 56f173f0af5a59a12596cf1ed9d6fb7c8ebe6318
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Feb 10 05:21:42 2011 -0500
+
+    Add pixman-conical-gradient.c to Makefile.win32.
+    
+    Pointed out by Kirill Tishin.
+
+commit 7fd4897730412977f730b850e6e697156fb3734b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Jan 23 16:53:26 2011 -0500
+
+    Add SSE2 fetcher for 0565
+    
+    Before:
+    
+    add_0565_0565 = L1:  61.08  L2:  61.03  M: 60.57 ( 10.95%)  HT: 46.85  VT: 45.25  R: 39.99  RT: 20.41 ( 233Kops/s)
+    
+    After:
+    
+    add_0565_0565 = L1:  77.84  L2:  76.25  M: 75.38 ( 13.71%)  HT: 55.99  VT: 54.56  R: 45.41  RT: 21.95 ( 255Kops/s)
+
+commit 8414aa76c20732a6ed29a2d80175936570c5e592
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 31 00:57:46 2010 -0500
+
+    Improve performance of sse2_combine_over_u()
+    
+    Split this function into two, one that has a mask, and one that
+    doesn't. This is a fairly substantial speed-up in many cases.
+    
+    New output of lowlevel-blt-bench over_x888_8_0565:
+    
+    over_x888_8_0565 =  L1:  63.76  L2:  62.75  M: 59.37 ( 21.55%)  HT: 45.89  VT: 43.55  R: 34.51  RT: 16.80 ( 201Kops/s)
+
+commit 08e855f15cba24aac83145b994069d0bb50be5a1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Jan 23 16:17:17 2011 -0500
+
+    Add SSE2 fetcher for a8
+    
+    New output of lowlevel-blt-bench over_x888_8_0565:
+    
+    over_x888_8_0565 =  L1:  57.85  L2:  56.80  M: 54.14 ( 19.50%)  HT: 42.64  VT: 40.56  R: 32.67  RT: 16.22 ( 195Kops/s)
+    
+    Based in part on code by Steve Snyder from
+    
+        https://bugs.freedesktop.org/show_bug.cgi?id=21173
+
+commit 2b6b0cf3591ce4438f7e0571c7a762972a999cd8
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Jan 12 06:38:54 2011 -0500
+
+    Add SSE2 fetcher for x8r8g8b8
+    
+    New output of lowlevel-blt-bench over_x888_8_0565:
+    
+    over_x888_8_0565 =  L1:  55.68  L2:  55.11  M: 52.83 ( 19.04%)  HT: 39.62  VT: 37.70  R: 30.88  RT: 14.62 ( 174Kops/s)
+    
+    The fetcher is looked up in a table, so that other fetchers can easily
+    be added.
+    
+    See also https://bugs.freedesktop.org/show_bug.cgi?id=20709
+
+commit 13aed37758d1af5b5bc2a80d886b764d4c45827e
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jan 22 17:13:19 2011 -0500
+
+    Add a test for over_x888_8_0565 in lowlevel_blt_bench().
+    
+    The next few commits will speed this up quite a bit.
+    
+    Current output:
+    
+    ---
+    reference memcpy speed = 2217.5MB/s (554.4MP/s for 32bpp fills)
+    ---
+    over_x888_8_0565 =  L1:  54.67  L2:  54.01  M: 52.33 ( 18.88%)  HT: 37.19  VT: 35.54  R: 29.40  RT: 13.63 ( 162Kops/s)
+
+commit 2de397c272fd60d6ce4311b411ad37a8e39daff6
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jan 24 12:24:42 2011 -0500
+
+    Move fallback decisions from implementations into pixman-cpu.c.
+    
+    Instead of having each individual implementation decide which fallback
+    to use, move it into pixman-cpu.c, where a more global decision can be
+    made.
+    
+    This is accomplished by adding a "fallback" argument to all the
+    pixman_implementation_create_*() implementations, and then in
+    _pixman_choose_implementation() pass in the desired fallback.
+
+commit ed781df1cc30748c8193be9b9a497def0b768b6b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Jan 21 14:47:33 2011 -0500
+
+    Print a warning when a development snapshot is being configured.
+    
+    It seems to be relatively common for people to use development
+    snapshots of pixman thinking they are ordinary releases. This patch
+    makes it such that if the current minor version is odd, configure will
+    print a banner explaining the version number scheme plus information
+    about where to report bugs.
+
+commit fead9eb82a7fc78a4927fff960d4cacea799bd9b
+Author: Rolland Dudemaine <rolland@ghs.com>
+Date:   Tue Jan 25 15:08:26 2011 +0200
+
+    Fix "variable was set but never used" warnings
+    
+    Removes useless variable declarations. This can only result in more
+    efficient code, as these variables where sometimes assigned, but
+    their values were never used.
+
+commit 32e556df33b3cd3b31de8184f144b3065206406b
+Author: Rolland Dudemaine <rolland@ghs.com>
+Date:   Tue Jan 25 14:14:57 2011 +0200
+
+    test: Use the right enum types instead of int to fix warnings
+    
+    Green Hills Software MULTI compiler was producing a number
+    of warnings due to incorrect uses of int instead of the correct
+    corresponding pixman_*_t type.
+
+commit b61ec0a6862ba101fff0afa082fb7490a0c44785
+Author: Rolland Dudemaine <rolland@ghs.com>
+Date:   Tue Jan 25 14:52:49 2011 +0200
+
+    Correct the initialization of 'max_vx'
+    
+    http://lists.freedesktop.org/archives/pixman/2011-January/000937.html
+
+commit e8a1b1c4e502ecbb70028bd5a86034bfe1b16997
+Author: Rolland Dudemaine <rolland@ghs.com>
+Date:   Tue Jan 25 13:55:28 2011 +0200
+
+    test: Fix for mismatched 'fence_malloc' prototype/implementation
+    
+    Solves compilation problem when 'mprotect' is not available. For
+    example, when using Green Hills Software MULTI compiler or mingw:
+    http://lists.freedesktop.org/archives/pixman/2011-January/000939.html
+
+commit a8e4677ecc2fcbf16a53902e26fc82d0860e9a21
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Jan 10 21:01:16 2011 +0200
+
+    The code in 'bitmap_addrect' already assumes non-null 'reg->data'
+    
+    So the check of 'reg->data' pointer can be safely removed.
+
+commit a6a04c07c354e10d787193af8d2f6a6d27f374ad
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Jan 19 07:47:52 2011 -0500
+
+    Post-release version bump to 0.21.5
+
+commit 4e56cec5649b7e122ccfc815b4ff45611953afce
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Jan 19 07:38:24 2011 -0500
+
+    Pre-release version bump to 0.21.4
+
+commit 1d7195dd6c68eab73d063f37de3a9331446111d4
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jan 17 14:12:20 2011 -0500
+
+    Fix dangling-pointer bug in bits_image_fetch_bilinear_no_repeat_8888().
+    
+    The mask_bits variable is only declared in a limited scope, so the
+    pointer to it becomes invalid instantly. Somehow this didn't actually
+    trigger any bugs, but Brent Fulgham reported that Bounds Checker was
+    complaining about it.
+    
+    Fix the bug by moving mask_bits to the function scope.
+
+commit 2ac4ae1ae253f7c2efedab036a677dac2f9c9eed
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Wed Jan 12 17:43:40 2011 +0100
+
+    Add a test for radial gradients
+    
+    radial-test is a port of the radial-gradient test from the cairo test
+    suite. It has been modified so that some pixels have 0 in both the a
+    and b coefficients of the quadratic equation solved by the rasterizer,
+    to expose a division by zero in the original implementation.
+
+commit 7f4eabbeec92e55fd8f812c0e5d8568eacbb633d
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Dec 12 07:34:42 2010 -0500
+
+    Fix destination fetching
+    
+    When fetching from destinations, we need to ignore transformations,
+    repeat and filtering. Currently we don't ignore them, which means all
+    kinds of bad things can happen.
+    
+    This bug fixes this problem by directly calling the scanline fetchers
+    for destinations instead of going through the full
+    get_scanline_32/64().
+
+commit 9489c2e04a5361fe19a89a0da9d7be28436c0a4b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Dec 12 09:19:13 2010 -0500
+
+    Turn on testing for destination transformation
+
+commit fffeda703e40ced90ec5ad6d6cd37a44294d3fe4
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Dec 11 08:10:04 2010 -0500
+
+    Skip fetching pixels when possible
+    
+    Add two new iterator flags, ITER_IGNORE_ALPHA and ITER_IGNORE_RGB that
+    are set when the alpha and rgb values are not needed. If both are set,
+    then we can skip fetching entirely and just use
+    _pixman_iter_get_scanline_noop.
+
+commit 3e635d6491d883304662aff3c72558dc9065f1f1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 10 16:55:55 2010 -0500
+
+    Add direct-write optimization back
+    
+    Introduce a new ITER_LOCALIZED_ALPHA flag that indicates that the
+    alpha value computed is used only for the alpha channel of the output;
+    it doesn't affect the RGB channels.
+    
+    Then in pixman-bits-image.c, if a destination is either a8r8g8b8 or
+    x8r8g8b8 with localized alpha, the iterator will return a pointer
+    directly into the image.
+
+commit 0f1a5c4a27d34dcf4525dc38fcb48c14f653e828
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 10 15:18:48 2010 -0500
+
+    Get rid of the classify methods
+    
+    They are not used anymore, and the linear gradient is now doing the
+    optimization in a different way.
+
+commit b66cabb88488413c4787845c7da67901dc988ee6
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 10 15:14:24 2010 -0500
+
+    Linear: Optimize for horizontal gradients
+    
+    If the gradient is horizontal, we can reuse the same scanline over and
+    over. Add support for this optimization to
+    _pixman_linear_gradient_iter_init().
+
+commit cf14189c6993e42ae71977a4a4061417941ffee8
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 10 14:59:20 2010 -0500
+
+    Consolidate the various get_scanline_32() into get_scanline_narrow()
+    
+    The separate get_scanline_32() functions in solid, linear, radial and
+    conical images are no longer necessary because all access to these
+    images now go through iterators.
+
+commit 0a6360a7ee0983dd52d368f5352d8c313fb0570b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 10 14:44:22 2010 -0500
+
+    Allow NULL property_changed function
+    
+    Initialize the field to NULL, and then delete the empty functions from
+    the solid, linear, radial, and conical images.
+
+commit 34b5633105e5e2838ac8deb32d26e3bbe73a3d1a
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 10 14:39:01 2010 -0500
+
+    Move get_scanline_32/64 to the bits part of the image struct
+    
+    At this point these functions are basically a cache that the bits
+    image uses for its fetchers, so they can be moved to the bits image.
+    
+    With the scanline getters only being initialized in the bits image,
+    the _pixman_image_get_scanline_generic_64 can be moved to
+    pixman-bits-image.c. That gets rid of the final user of
+    _pixman_image_get_scanline_32/64, so these can be deleted.
+
+commit d6b13f99b41eac535d961b89d4b53f616c910c1e
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 10 10:53:02 2010 -0500
+
+    Use an iterator in pixman_image_get_solid()
+    
+    This is a step towards getting rid of the
+    _pixman_image_get_scanline_32/64() functions.
+
+commit 51a5e949f394560b057911d46aab768f8e07bd54
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 10 13:26:53 2010 -0500
+
+    Virtualize iterator initialization
+    
+    Make src_iter_init() and dest_iter_init() virtual methods in the
+    implementation struct. This allows individual implementations to plug
+    in their own CPU specific scanline fetchers.
+
+commit 6503c6edccbc6b08ea8efe398da3265126efa896
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 10 12:40:26 2010 -0500
+
+    Move iterator initialization to the respective image files
+    
+    Instead of calling _pixman_image_get_scanline_32/64(), move the
+    iterator initialization into the respecive image implementations and
+    call the scanline generators directly.
+
+commit 23c6e1d2c007cc661b31e1bcdfd84604d7a9a560
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 10 12:31:29 2010 -0500
+
+    Eliminate the _pixman_image_store_scanline_32/64 functions
+    
+    They were only called from next_line_write_narrow/wide, so they could
+    simply be absorbed into those functions.
+
+commit b2c9eaa5020d08cfaac6c2296895e5a65c971ffd
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 10 12:19:50 2010 -0500
+
+    Move initialization of iterators for bits images to pixman-bits-image.c
+    
+    pixman_iter_t is now defined in pixman-private.h, and iterators for
+    bits images are being initialized in pixman-bits-image.c
+
+commit 15b1645c7b96498788c9376e3bb7d8a5e7b4e584
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Dec 10 11:30:27 2010 -0500
+
+    Add iterators in the general implementation
+    
+    We add a new structure called a pixman_iter_t that encapsulates the
+    information required to read scanlines from an image. It contains two
+    functions, get_scanline() and write_back(). The get_scanline()
+    function will generate pixels for the current scanline. For iterators
+    for source images, it will also advance to the next scanline. The
+    write_back() function is only called for destination images. Its
+    function is to write back the modified pixels to the image and then
+    advance to the next scanline.
+    
+    When an iterator is initialized, it is passed this information:
+    
+       - The image to iterate
+    
+       - The rectangle to be iterated
+    
+       - A buffer that the iterator may (but is not required to) use. This
+         buffer is guaranteed to have space for at least width pixels.
+    
+       - A flag indicating whether a8r8g8b8 or a16r16g16b16 pixels should
+         be fetched
+    
+    There are a number of (eventual) benefits to the iterators:
+    
+       - The initialization of the iterator can be virtualized such that
+         implementations can plug in their own CPU specific get_scanline()
+         and write_back() functions.
+    
+       - If an image is horizontal, it can simply plug in an appropriate
+         get_scanline(). This way we can get rid of the annoying
+         classify() virtual function.
+    
+       - In general, iterators can remember what they did on the last
+         scanline, so for example a REPEAT_NONE image might reuse the same
+         data for all the empty scanlines generated by the zero-extension.
+    
+       - More detailed information can be passed to iterator, allowing
+         more specialized fetchers to be used.
+    
+       - We can fix the bug where destination filters and transformations
+         are not currently being ignored as they should be.
+    
+    However, this initial implementation is not optimized at all. We lose
+    several existing optimizations:
+    
+       - The ability to composite directly in the destination
+       - The ability to only fetch one scanline for horizontal images
+       - The ability to avoid fetching the src and mask for the CLEAR
+         operator
+    
+    Later patches will re-introduce these optimizations.
+
+commit 255d624e508e29b452e567c249ac75ae8d8e2abe
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Jan 11 14:36:24 2011 +0200
+
+    ARM: do /proc/self/auxv based cpu features detection only in linux
+    
+    This method is linux specific, but earlier it was tried for any platform
+    that did not have _MSC_VER macro defined.
+
+commit 2bbd553bd21dcc1b199eb11ec6cb78a5b9769d49
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Sep 13 04:21:33 2010 +0300
+
+    A new configure option --enable-static-testprogs
+    
+    This option can be used for building fully static binaries of the test
+    programs so that they can be easily run using qemu-user. With binfmt-misc
+    configured, 'make check' works fine for crosscompiled pixman builds.
+
+commit 55bbccf84e475b2e3c4536606cd08c946c041fd0
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Jan 10 18:29:33 2011 +0200
+
+    Make 'fast_composite_scaled_nearest_*' less suspicious
+    
+    Taking address of a variable and then using it as an array looks suspicious
+    to static code analyzers. So change it into an array with 1 element to make
+    them happy. Both old and new variants of this code are correct because 'vx'
+    and 'unit_x' arguments are set to 0 and it means that the called scanline
+    function can only access a single element of 'zero' buffer.
+
+commit ae70b38d40a587e29dc5e0dfe6250693598beca7
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Jan 10 18:09:16 2011 +0200
+
+    Bugfix for a corner case in 'pixman_transform_is_inverse'
+    
+    When 'pixman_transform_multiply' fails, the result of multiplication just
+    could not have been identity matrix (one of the values in the resulting
+    matrix can't be represented as 16.16 fixed point value). So it is safe
+    to return FALSE.
+
+commit ab3809f4da0d833944363c5c039c3a2e6a8389c5
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Jan 4 13:42:29 2011 +0200
+
+    Workaround for a preprocessor issue in old Sun Studio
+    
+    Patch from Peter O'Gorman with some modifications
+    
+    https://bugs.freedesktop.org//show_bug.cgi?id=32764
+
+commit f5c0a60ac8c32ac37aaf58f67048585af58f3141
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Jan 4 08:41:02 2011 +0200
+
+    Fix for "syntax error: empty declaration" Solaris Studio warnings
+
+commit c71e24c9fc312cf0b8ec56d2e657efe79d062d2f
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Jan 4 08:18:38 2011 +0200
+
+    Revert "Fix "syntax error: empty declaration" warnings."
+    
+    This reverts commit b924bb1f8191cc7c386d8211d9822aeeaadcab44.
+    
+    There is a better fix for these Solaris Studio warnings.
+
+commit 29439bd7724031504e965ffe5b366baaeeae07d8
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Tue Nov 23 11:37:54 2010 +0100
+
+    Improve handling of tangent circles
+    
+    When b is 0, avoid the division by zero and just return transparent
+    black.
+    
+    When the solution t would have an invalid radius (negative or outside
+    [0,1] for none-extended gradients), return transparent black.
+
+commit a484a9c49c98dfad0d74af4440039f61bef24d48
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Dec 20 16:11:48 2010 -0500
+
+    sse2: Skip src pixels that are zero in sse2_composite_over_8888_n_8888()
+    
+    This is a big speed-up in the SVG helicopter game:
+    
+       http://ie.microsoft.com/testdrive/Performance/Helicopter/Default.xhtml
+    
+    when rendered by Firefox 4 since it is compositing big images
+    consisting almost entirely of zeros.
+
+commit 2610323545cb5ee3dff0b7d7da505a1cd1e01b73
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Dec 18 06:06:39 2010 -0500
+
+    Fix divide-by-zero in set_lum().
+    
+    When (l - min) or (max - l) are zero, simply set all the channels to
+    the limit, 0 in the case of (l - min), and a in the case of (max - l).
+
+commit 3479050216a65e3ef6e966a8e801415145261216
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Dec 18 06:05:52 2010 -0500
+
+    Add a test compositing with the various PDF operators.
+    
+    The test has floating point exceptions enabled, and currently fails
+    with a divide-by-zero.
+
+commit 45a2d010773d05666e87b7a6502e8fcb25add4eb
+Author: Cyril Brulebois <kibi@debian.org>
+Date:   Sun Dec 19 19:37:26 2010 +0100
+
+    Fix linking issues when HAVE_FEENABLEEXCEPT is set.
+    
+    All objects using test/util.c fail to link:
+    |   CCLD   region-test
+    | /usr/bin/ld: utils.o: in function enable_fp_exceptions:utils.c(.text+0x939): error: undefined reference to 'feenableexcept'
+    
+    There's indeed no explicit dependency on -lm, and if HAVE_FEENABLEEXCEPT
+    happens to be set, test/util.c uses feenableexcept(), which is nowhere
+    to be found while linking.
+    
+    Fix this by adding -lm to TEST_LDADD, although two alternatives could be
+    thought of:
+     - Only specifying -lm for objects using util.c.
+     - Introducing a conditional to add -lm only when configure detects
+       have_feenableexcept=yes.
+    
+    Signed-off-by: Cyril Brulebois <kibi@debian.org>
+
+commit 303de045ff21bd5c9cb756d50a41fe4cb8bc97b8
+Author: Jon TURNEY <jon.turney@dronecode.org.uk>
+Date:   Sat Dec 18 18:32:39 2010 +0000
+
+    Remove stray #include <fenv.h>
+    
+    Remove a stray #include <fenv.h> added in commit 2444b2265abeaf6dcf3df1763bc2711684e63bb8
+    to fix compilation on platforms which don't have fenv.h
+    
+    Signed-off-by: Jon TURNEY <jon.turney@dronecode.org.uk>
+
+commit f914cf448630d4ba4af6603b827c621ae6705387
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Aug 23 21:55:02 2010 -0400
+
+    Add a stress-test program.
+    
+    This test program tries to use as many rarely-used features as
+    possible, including alpha maps, accessor functions, oddly-sized
+    images, strange transformations, conical gradients, etc.
+    
+    The hope is to provoke crashes or irregular behavior in pixman.
+
+commit 7d7b03c0911584f687a7fd57a3f5d5eed21080e0
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Oct 12 10:56:26 2010 -0400
+
+    Make the argument to fence_malloc() an int64_t
+    
+    That way we can detect if someone attempts to allocate a negative size
+    and abort instead of just returning NULL and segfaulting later.
+
+commit d41522113ec84e74f7915599fd7624f842be8862
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Aug 29 18:02:02 2010 -0400
+
+    test/utils.c: Initialize palette->rgba to 0.
+    
+    That way it can be used with palettes that are not statically
+    allocated, without causing valgrind issues.
+
+commit 337f0bff0d8965cb702175e0eedbf418b1e7f0b5
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Aug 23 21:02:02 2010 -0400
+
+    test: Move palette initialization to utils.[ch]
+
+commit 2444b2265abeaf6dcf3df1763bc2711684e63bb8
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Oct 20 13:12:37 2010 -0400
+
+    Extend gradient-crash-test
+    
+    Test the gradients with various transformations, and test cases where
+    the gradients are specified with two identical points.
+
+commit de2e51dacb1ccd312c0461088b942ef4e93e2731
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Oct 20 13:53:07 2010 -0400
+
+    Add enable_fp_exceptions() function in utils.[ch]
+    
+    This function enables floating point traps if possible.
+
+commit a2afcc9ba4ed5a2843fd133ca23704960846185b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Aug 23 20:56:11 2010 -0400
+
+    test: Make composite test use some existing macros instead of defining its own
+    
+    Also move the ARRAY_LENGTH macro into utils.h so it can be used elsewhere.
+
+commit 4d8d2fa47e457e3c8a5ab956b52cff4785aa45c3
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Dec 17 15:29:58 2010 +0200
+
+    COPYING: added Nokia to the list of copyright holders
+
+commit 3d094997b1820719d15cec7dc633ed37e1912bfc
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Nov 30 00:31:06 2010 +0200
+
+    Fix for potential unaligned memory accesses
+    
+    The temporary scanline buffer allocated on stack was declared
+    as uint8_t array. As a result, the compiler was free to select
+    any arbitrary alignment for it (even though there is typically
+    no reason to use really weird alignments here and the stack is
+    normally at least 4 bytes aligned on most platforms). Having
+    improper alignment is non-portable and can impact performance
+    or even make the code misbehave depending on the target platform.
+    
+    Using uint64_t type for this array should ensure that any possible
+    memory accesses done by pixman code are going to be handled correctly
+    (pixman-combine64.c can access this buffer via uint64_t * pointer).
+    
+    Some alignment related problem was reported in:
+    http://lists.freedesktop.org/archives/pixman/2010-November/000747.html
+
+commit 985e59a82fa5e644cb6516dc174ab3f79f1448df
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Nov 25 02:28:29 2010 +0200
+
+    ARM: added 'neon_src_rpixbuf_8888' fast path
+    
+    With this optimization added, pixman assisted conversion from
+    non-premultiplied to premultiplied alpha format is now fully
+    NEON optimized (both with and without R/B color components
+    swapping in the process).
+
+commit 733f68912f4a44c24ad3973049a7e1d98f4c6ea8
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Nov 29 09:11:29 2010 +0200
+
+    ARM: added 'neon_composite_in_n_8' fast path
+
+commit af7a69d90ea2b43a4e850870727723d719f09a1c
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Nov 29 09:00:46 2010 +0200
+
+    ARM: added flags parameter to some asm fast path wrapper macros
+    
+    Not all types of operations can be skipped when having transparent
+    solid source or transparent solid mask. Add an extra flags parameter
+    for providing this information to the wrappers.
+
+commit f6843e3797eea7e4aed7614b1086f5cefc06c0f9
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Nov 29 03:31:32 2010 +0200
+
+    ARM: added 'neon_composite_add_8888_n_8888' fast path
+
+commit b066b520dfaf0a9f4d1bc9a73c789091e9ce7cc8
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Nov 29 02:38:52 2010 +0200
+
+    ARM: added 'neon_composite_add_n_8_8888' fast path
+
+commit 1fba7790367d7b726d05a33bbbcebe10b9280a31
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Nov 29 02:10:22 2010 +0200
+
+    ARM: better NEON instructions scheduling for add_8888_8888_8888
+    
+    Provides a minor performance improvement by using pipelining and hiding
+    instructions latencies. Also do not clobber d0-d3 registers (source
+    image pixels) while doing calculations in order to allow the use of
+    the same macro for add_n_8_8888 fast path later.
+    
+    Benchmark from ARM Cortex-A8 @500MHz:
+    
+    == before ==
+    
+      add_8888_8888_8888 = L1:  95.94  L2:  42.27  M: 25.60 (121.09%)
+                           HT:  14.54  VT:  13.13  R: 12.77  RT:  4.49 (48Kops/s)
+         add_8888_8_8888 = L1: 104.51  L2:  57.81  M: 36.06 (106.62%)
+                           HT:  19.24  VT:  16.45  R: 14.71  RT:  4.80 (51Kops/s)
+    
+    == after ==
+    
+      add_8888_8888_8888 = L1: 106.66  L2:  47.82  M: 27.32 (129.30%)
+                           HT:  15.44  VT:  13.96  R: 12.86  RT:  4.48 (48Kops/s)
+         add_8888_8_8888 = L1: 107.72  L2:  61.02  M: 38.26 (113.16%)
+                           HT:  19.48  VT:  16.72  R: 14.82  RT:  4.80 (51Kops/s)
+
+commit c3f48b6aa2f9354af02ffc8c938ec6753fdcbde3
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sun Nov 28 22:05:53 2010 +0200
+
+    ARM: added 'neon_composite_add_8888_8_8888' fast path
+
+commit 6d2f7f981b52b41f4321071c325babcf792bd666
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sat Nov 27 15:53:54 2010 +0200
+
+    ARM: added 'neon_composite_over_0565_n_0565' fast path
+
+commit 3990931bf6197eff1cec06cf24bce53ddf9a539a
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sat Nov 27 04:47:39 2010 +0200
+
+    ARM: reuse common NEON code for over_{n_8|8888_n|8888_8}_0565
+    
+    Renamed suppementary macros from 'over_n_8_0565' to 'over_8888_8_0565',
+    because they can actually support all variants of this operation:
+    over_8888_8_0565/over_n_8_0565/over_8888_n_0565.
+    
+    Also 'over_8888_8_0565' now uses more optimized common code instead of its
+    own variant, improving performance a bit. Even though this operation is
+    still memory bandwidth limited, scaled variants of these fast paths may
+    put more stress on CPU later.
+    
+    Benchmarked on ARM Cortex-A8 @500MHz:
+    
+    == before ==
+    
+        over_8888_8_0565 =  L1:  67.10  L2:  53.82  M: 44.70 (105.17%)
+                            HT:  18.73  VT:  16.91  R: 14.25  RT:  4.80 (52Kops/s)
+    
+    == after ==
+    
+        over_8888_8_0565 =  L1:  77.83  L2:  58.14  M: 44.82 (105.52%)
+                            HT:  20.58  VT:  17.44  R: 15.05  RT:  4.88 (52Kops/s)
+
+commit a7c36681c0c1955ff9110b81f1789e56abb10a95
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sat Nov 27 03:53:12 2010 +0200
+
+    ARM: added 'neon_composite_over_8888_n_0565' fast path
+
+commit e6814837a6ccd3e4db329e0131eaf2055d2c864b
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Nov 26 17:06:58 2010 +0200
+
+    ARM: better NEON instructions scheduling for over_n_8_0565
+    
+    Code rearranged to get better instructions scheduling for ARM Cortex-A8/A9.
+    Now it is ~30% faster for the pixel data in L1 cache and makes better use
+    of memory bandwidth when running at lower clock frequencies (ex. 500MHz).
+    Also register d24 (pixels from the mask image) is now not clobbered by
+    supplementary macros, which allows to reuse them for the other variants
+    of compositing operations later.
+    
+    Benchmark from ARM Cortex-A8 @500MHz:
+    
+    == before ==
+    
+        over_n_8_0565 =  L1:  63.90  L2:  63.15  M: 60.97 ( 73.53%)
+                         HT:  28.89  VT:  24.14  R: 21.33  RT:  6.78 (  67Kops/s)
+    
+    == after ==
+    
+        over_n_8_0565 =  L1:  82.64  L2:  75.19  M: 71.52 ( 84.14%)
+                         HT:  30.49  VT:  25.56  R: 22.36  RT:  6.89 (  68Kops/s)
+
+commit 3be86a92ccab240859062a541cdb871d81c9501a
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sun Nov 28 21:45:06 2010 +0200
+
+    ARM: introduced 'fetch_mask_pixblock' macro to simplify code
+    
+    This macro hides the implementation details of pixels fetching
+    for the mask image just like 'fetch_src_pixblock' does for the
+    source image. This provides more possibilities for reusing the
+    same code blocks in different compositing functions.
+    
+    This patch does not introduce any functional changes and the
+    resulting code in the compiled object file is exactly the same.
+
+commit 98d08b37f17a3379d0ceff8bb7de8f943873fbd8
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Nov 26 08:55:49 2010 +0200
+
+    ARM: added 'neon_composite_over_n_8_8' fast path
+
+commit 4b5b5a2a832cd67f2a0ec231f75a2825b45571fa
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Nov 15 18:26:43 2010 +0200
+
+    C fast path for a1 fill operation
+    
+    Can be used as one of the solutions to fix bug
+    https://bugs.freedesktop.org/show_bug.cgi?id=31604
+
+commit 654961efe405ad1a7e54a77548ca8af322ecc1f8
+Author: Alan Coopersmith <alan.coopersmith@oracle.com>
+Date:   Sun Nov 21 11:42:22 2010 -0800
+
+    Sun's copyrights belong to Oracle now
+    
+    Signed-off-by: Alan Coopersmith <alan.coopersmith@oracle.com>
+
+commit e7ee43c39d2370716a4d011afa8f5067eced9899
+Author: Cyril Brulebois <kibi@debian.org>
+Date:   Wed Nov 17 16:16:56 2010 +0100
+
+    Fix argument quoting for AC_INIT.
+    
+    One gets rid of this accordingly:
+    | autoreconf -vfi
+    | autoreconf: Entering directory `.'
+    | autoreconf: configure.ac: not using Gettext
+    | autoreconf: running: aclocal --force
+    | configure.ac:61: warning: AC_INIT: not a literal: "pixman@lists.freedesktop.org"
+    | autoreconf: configure.ac: tracing
+    | configure.ac:61: warning: AC_INIT: not a literal: "pixman@lists.freedesktop.org"
+    
+    Signed-off-by: Cyril Brulebois <kibi@debian.org>
+
+commit c59db8af66510e8e0a852e5775cff46f7476c71c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Nov 16 17:14:47 2010 -0500
+
+    Post-release version bump to 0.21.3
+
+commit 4646c238589986499834b28ed903c366b5ba15ed
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Nov 16 16:43:26 2010 -0500
+
+    Pre-release version bump
+
+commit 536cf4dd3bd144ad1c65fc05f4883a31247a0f5d
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Nov 2 23:38:10 2010 -0400
+
+    Generate {a,x}8r8g8b8, a8, 565 fetchers for nearest/affine images
+    
+    There are versions for all combinations of x8r8g8b8/a8r8g8b8 and
+    pad/repeat/none/normal repeat modes. The bulk of each function is an
+    inline function that takes a format and a repeat mode as parameters.
+
+commit da0176e8534e5b027818f6b695343d3e04130a93
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Tue Nov 2 17:04:35 2010 +0100
+
+    Improve conical gradients opacity check
+    
+    Conical gradients are completely opaque if all of their stops are
+    opaque and the repeat mode is not 'none'.
+
+commit 151f2554fc9c098ff86b0fdc0d785aa3ff496328
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Tue Nov 2 17:02:01 2010 +0100
+
+    Fix opacity check
+    
+    Radial gradients are "conical", thus they can have some non-opaque
+    parts even if all of their stops are completely opaque.
+    
+    To guarantee that a radial gradient is actually opaque, it needs to
+    also have one of the two circles containing the other one. In this
+    case when extrapolating, the whole plane is completely covered (as
+    explained in the comment in pixman-radial-gradient.c).
+
+commit 19ed415b74521ad5dcc7b6e3ed4bb644711c7bef
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Sun Oct 31 16:59:45 2010 +0100
+
+    Remove unused stop_range field
+
+commit d8fe87a6262ee661af8fb0d46bab223e4ab3d88e
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Oct 4 01:56:59 2010 +0300
+
+    ARM: optimization for scaled src_0565_0565 with nearest filter
+    
+    The performance improvement is only in the ballpark of 5% when
+    compared against C code built with a reasonably good compiler
+    (gcc 4.5.1). But gcc 4.4 produces approximately 30% slower code
+    here, so assembly optimization makes sense to avoid dependency
+    on the compiler quality and/or optimization options.
+    
+    Benchmark from ARM11:
+        == before ==
+        op=1, src_fmt=10020565, dst_fmt=10020565, speed=34.86 MPix/s
+    
+        == after ==
+        op=1, src_fmt=10020565, dst_fmt=10020565, speed=36.62 MPix/s
+    
+    Benchmark from ARM Cortex-A8:
+        == before ==
+        op=1, src_fmt=10020565, dst_fmt=10020565, speed=89.55 MPix/s
+    
+        == after ==
+        op=1, src_fmt=10020565, dst_fmt=10020565, speed=94.91 MPix/s
+
+commit b8007d042354fd9bd15711d9921e6f1ebb1c3c22
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Nov 2 16:12:42 2010 +0200
+
+    ARM: NEON optimization for scaled src_0565_8888 with nearest filter
+    
+    Benchmark from ARM Cortex-A8 @720MHz:
+        == before ==
+        op=1, src_fmt=10020565, dst_fmt=20028888, speed=8.99 MPix/s
+    
+        == after ==
+        op=1, src_fmt=10020565, dst_fmt=20028888, speed=76.98 MPix/s
+    
+        == unscaled ==
+        op=1, src_fmt=10020565, dst_fmt=20028888, speed=137.78 MPix/s
+
+commit 2e855a2b4a2bb7b3d2ed1826cb4426d14080ca67
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Nov 2 15:25:51 2010 +0200
+
+    ARM: NEON optimization for scaled src_8888_0565 with nearest filter
+    
+    Benchmark from ARM Cortex-A8 @720MHz:
+        == before ==
+        op=1, src_fmt=20028888, dst_fmt=10020565, speed=42.51 MPix/s
+    
+        == after ==
+        op=1, src_fmt=20028888, dst_fmt=10020565, speed=55.61 MPix/s
+    
+        == unscaled ==
+        op=1, src_fmt=20028888, dst_fmt=10020565, speed=117.99 MPix/s
+
+commit 4a09e472b8fbfae3e67d05a26ecc9c8a17225053
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Nov 2 14:39:02 2010 +0200
+
+    ARM: NEON optimization for scaled over_8888_0565 with nearest filter
+    
+    Benchmark from ARM Cortex-A8 @720MHz:
+        == before ==
+        op=3, src_fmt=20028888, dst_fmt=10020565, speed=10.29 MPix/s
+    
+        == after ==
+        op=3, src_fmt=20028888, dst_fmt=10020565, speed=36.36 MPix/s
+    
+        == unscaled ==
+        op=3, src_fmt=20028888, dst_fmt=10020565, speed=79.40 MPix/s
+
+commit 67a4991f3341d38bc3477c8f99f2ef581cd609e3
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Nov 2 14:29:57 2010 +0200
+
+    ARM: NEON optimization for scaled over_8888_8888 with nearest filter
+    
+    Benchmark from ARM Cortex-A8 @720MHz:
+        == before ==
+        op=3, src_fmt=20028888, dst_fmt=20028888, speed=12.73 MPix/s
+    
+        == after ==
+        op=3, src_fmt=20028888, dst_fmt=20028888, speed=28.75 MPix/s
+    
+        == unscaled ==
+        op=3, src_fmt=20028888, dst_fmt=20028888, speed=53.03 MPix/s
+
+commit 0b56244ac81f2bb2402629f8720c7e22893a24df
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Nov 2 19:16:46 2010 +0200
+
+    ARM: performance tuning of NEON nearest scaled pixel fetcher
+    
+    Interleaving the use of NEON registers helps to avoid some stalls
+    in NEON pipeline and provides a small performance improvement.
+
+commit 6e76af0d4b60ab74b309994926f28c532c5af155
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Nov 2 14:26:13 2010 +0200
+
+    ARM: macro template in C code to simplify using scaled fast paths
+    
+    This template can be used to instantiate scaled fast path functions
+    by providing main loop code and calling NEON assembly optimized
+    scanline processing functions from it. Another macro can be used
+    to simplify adding entries to fast path tables.
+
+commit 88014a0e6ffaa22b3ac363c2c73b72530cdba0cc
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Nov 1 10:03:59 2010 +0200
+
+    ARM: nearest scaling support for NEON scanline compositing functions
+    
+    Now it is possible to generate scanline processing functions
+    for the case when the source image is scaled with NEAREST filter.
+    
+    Only 16bpp and 32bpp pixel formats are supported for now. But the
+    others can be also added later when needed. All the existing NEON
+    fast path functions should be quite easy to reuse for implementing
+    fast paths which can work with scaled source images.
+
+commit 324712e48cf04df3cfcfc463fb221fcdf96e020a
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Nov 1 05:10:34 2010 +0200
+
+    ARM: NEON: source image pixel fetcher can be overrided now
+    
+    Added a special macro 'pixld_src' which is now responsible for fetching
+    pixels from the source image. Right now it just passes all its arguments
+    directly to 'pixld' macro, but it can be used in the future to provide
+    a special pixel fetcher for implementing nearest scaling.
+    
+    The 'pixld_src' has a lot of arguments which define its behavior. But
+    for each particular fast path implementation, we already know NEON
+    registers allocation and how many pixels are processed in a single block.
+    That's why a higher level macro 'fetch_src_pixblock' is also introduced
+    (it's easier to use because it has no arguments) and used everywhere
+    in 'pixman-arm-neon-asm.S' instead of VLD instructions.
+    
+    This patch does not introduce any functional changes and the resulting code
+    in the compiled object file is exactly the same.
+
+commit cb3f1830257a56f56abf7d50a8b34e215c616aec
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Nov 2 22:53:55 2010 +0200
+
+    ARM: fix 'vld1.8'->'vld1.32' typo in add_8888_8888 NEON fast path
+    
+    This was mostly harmless and had no effect on little endian systems.
+    But wrong vector element size is at least inconsistent and also
+    can theoretically cause problems on big endian ARM systems.
+
+commit fed4a2fde540916fc182917762b85b38052c04de
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Sep 24 16:36:16 2010 +0300
+
+    Do CPU features detection from 'constructor' function when compiled with gcc
+    
+    There is attribute 'constructor' supported since gcc 2.7 which allows
+    to have a constructor function for library initialization. This eliminates
+    an extra branch for each composite operation and also helps to avoid
+    complains from race condition detection tools like helgrind.
+    
+    The other compilers may or may not support this attribute properly.
+    Ideally, the compilers should fail to compile the code with unknown
+    attribute, so the configure check should do the right job. But in
+    reality the problems are surely possible. Fortunately such problems
+    should be quite easy to find because NULL pointer dereference should
+    happen almost immediately if the constructor fails to run.
+    
+    clang 2.7:
+      supports __attribute__((constructor)) properly and pretends to be gcc
+    
+    tcc 0.9.25:
+      ignores __attribute__((constructor)), but does not pretend to be gcc
+
+commit 99699771cd82e108fbace655bf44013bdccde3bf
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Oct 31 01:40:57 2010 -0400
+
+    Delete the source_image_t struct.
+    
+    It serves no purpose anymore now that the source_class_t field is gone.
+
+commit f405b4079872ecc312f9514fdadc5287e8f20b08
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Oct 30 17:20:22 2010 -0400
+
+    [mmx] Mark some of the output variables as early-clobber.
+    
+    GCC assumes that input variables in inline assembly are fully consumed
+    before any output variable is written. This means it may allocate the
+    variables in the same register unless the output variables are marked
+    as early-clobber.
+    
+    From Jeremy Huddleston:
+    
+        I noticed a problem building pixman with clang and reported it to
+        the clang developers.  They responded back with a comment about
+        the inline asm in pixman-mmx.c and suggested a fix:
+    
+        """
+        Incidentally, Jeremy, in the asm that reads
+        __asm__ (
+        "movq %7, %0\n"
+        "movq %7, %1\n"
+        "movq %7, %2\n"
+        "movq %7, %3\n"
+        "movq %7, %4\n"
+        "movq %7, %5\n"
+        "movq %7, %6\n"
+        : "=y" (v1), "=y" (v2), "=y" (v3),
+          "=y" (v4), "=y" (v5), "=y" (v6), "=y" (v7)
+        : "y" (vfill));
+    
+        all the output operands except the last one should be marked as
+        earlyclobber ("=&y"). This is working by accident with gcc.
+        """
+    
+    Cc: jeremyhu@apple.com
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 9c19a85b0037d48fdd180a2c59ef05bdc4f46680
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Oct 28 20:14:03 2010 -0400
+
+    Remove workaround for a bug in the 1.6 X server.
+    
+    There used to be a bug in the X server where it would rely on
+    out-of-bounds accesses when it was asked to composite with a
+    window as the source. It would create a pixman image pointing
+    to some bogus position in memory, but then set a clip region
+    to the position where the actual bits were.
+    
+    Due to a bug in old versions of pixman, where it would not clip
+    against the image bounds when a clip region was set, this would
+    actually work. So when the pixman bug was fixed, a workaround was
+    added to allow certain out-of-bound accesses.
+    
+    However, the 1.6 X server is so old now that we can remove this
+    workaround. This does mean that if you update pixman to 0.22 or later,
+    you will need to use a 1.7 X server or later.
+
+commit 56748ea9a698daec8f445d2bebbbaed5515380af
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sat Oct 30 15:51:30 2010 +0300
+
+    Fixed broken configure check for __thread support
+    
+    Somehow the patch from [1] was not applied correctly, fixing that.
+    
+    1. http://lists.cairographics.org/archives/cairo/2010-September/020826.html
+
+commit ecc3612995d5d699a3dd49016a7e9ed40f0a4564
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Nov 1 17:52:29 2010 -0400
+
+    COPYING: Stop saying that a modification is currently under discussion.
+    
+    Also put the copyright text into a C comment for easier cut and paste.
+
+commit c993cd9614a47657228e3125bdcedc0bd0e34164
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Oct 27 17:21:06 2010 -0400
+
+    Version bump 0.21.1.
+    
+    The previous bump to 0.20.1 was a mistake; it belongs on the 0.20 branch.
+
+commit d890b684f68905ea5e242360f20e2a70251c89fd
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Oct 27 16:58:29 2010 -0400
+
+    Post-release version bump to 0.20.1
+
+commit c5e048d46c32c43172fb8d1c067e82587f916953
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Oct 27 16:51:40 2010 -0400
+
+    Pre-release version bump to 0.20.0
+
+commit 6a6d9758af478e9f5eae48ccf15f1cbea2cf30ed
+Author: Scott McCreary <scottmc2@gmail.com>
+Date:   Wed Oct 27 12:31:27 2010 -0700
+
+    Added check to find pthread on Haiku.
+
+commit 00fdb3d8e8d5c04d01c352315b6a8e2e2dfe53ae
+Author: Jon TURNEY <jon.turney@dronecode.org.uk>
+Date:   Sun Oct 24 15:58:39 2010 +0100
+
+    Plug another leak in alphamap test
+    
+    Even after commit e46be417cebac984a858da05e61d924889695c9e alphamap
+    test is still leaking the alphamap pixmap, leading to mmap() failures
+    on cygwin
+    
+    Signed-off-by: Jon TURNEY <jon.turney@dronecode.org.uk>
+
+commit 1c23142efa056124c594c72022e7f383e839d3b1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Oct 20 16:31:57 2010 -0400
+
+    Post-release version bump to 0.19.7
+
+commit d1051340155a099a523e71377b1d889eec8b972e
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Oct 20 16:25:55 2010 -0400
+
+    Pre-release version bump to 0.19.6
+
+commit a966cd04c16ad0c34b0f17e9021a4f3532575ca4
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Tue Oct 12 15:38:20 2010 +0200
+
+    Fix an overflow in the new radial gradient code
+    
+    huge-radial in the cairo test suite pointed out an undocumented
+    overflow in the radial gradient code.
+    By casting to pixman_fixed_48_16_t before doing the operations,
+    the overflow can be avoided.
+
+commit 70658f0a6bd451a21fbb43df7865a7dac95abe24
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Oct 20 16:09:44 2010 -0400
+
+    Remove the class field from source_image_t
+    
+    The linear gradient was the only image type that relied on the class
+    being stored in the image struct itself. With the previous changes, it
+    doesn't need that anymore, so we can delete the field.
+
+commit 741c30d9d9cf445fa2e3a2c43d37c221d49831b4
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Wed Oct 20 21:24:32 2010 +0200
+
+    Remove unused enum value
+    
+    The new linear gradient code doesn't use SOURCE_IMAGE_CLASS_VERTICAL
+    anymore and it was not used anywhere else.
+
+commit 9b72fd1b857494ea928795c89a4f827e56fe26d3
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Mon Oct 18 22:21:52 2010 +0200
+
+    Make classification consistent with rasterization
+    
+    Use the same computations to classify the gradient and to
+    rasterize it.
+    This improves the correctness of the classification by
+    avoiding integer division.
+
+commit 1d4f2d71facd5f2bbce74fbe3407ccea6cf4bea1
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Wed Aug 11 09:58:05 2010 +0200
+
+    Improve precision of linear gradients
+    
+    Integer division (without keeping the remainder) can discard a lot
+    of information. Doing the division maths in floating point (and
+    paying attention to error propagation) allows to greatly improve
+    the precision of linear gradients.
+
+commit f6ab20ca6604739b82311fc078d6ce850f43adc0
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Tue Oct 12 09:52:53 2010 +0200
+
+    Add comments about errors
+    
+    Explain how errors are introduced in the computation performed for
+    radial gradients.
+
+commit 1ca715ed1e6914e9bd9f050065e827d7a9e2efc9
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Sun Aug 15 09:07:33 2010 +0200
+
+    Draw radial gradients with PDF semantics
+    
+    Change radial gradient computations and definition to reflect the
+    radial gradients in PDF specifications (see section 8.7.4.5.4,
+    Type 3 (Radial) Shadings of the PDF Reference Manual).
+    
+    Instead of having a valid interpolation parameter value for every
+    point of the plane, define it only for points withing the area
+    covered by the family of circles generated by interpolating or
+    extrapolating the start and end circles.
+    
+    Points outside this area are now transparent black (rgba 0 0 0 0).
+    Points within this area have the color assiciated with the maximum
+    value of the interpolation parameter in that point (if multiple
+    solutions exist within the range specified by the extend mode).
+
+commit e46be417cebac984a858da05e61d924889695c9e
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Oct 8 07:44:20 2010 -0400
+
+    Plug leak in the alphamap test.
+    
+    The images are being created with non-NULL data, so we have to free it
+    outselves. This is important because the Cygwin tinderbox is running
+    out of memory and produces this:
+    
+        mmap failed on 20000 1507328
+        mmap failed on 40000 1507328
+        mmap failed on 20000 1507328
+        mmap failed on 40000 1507328
+        mmap failed on 40000 1507328
+        mmap failed on 40000 1507328
+    
+    http://tinderbox.x.org/builds/2010-10-05-0014/logs/pixman/#check
+
+commit 6ed7164de5f74b752d85834b53e89810f1d0a560
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Oct 6 02:40:39 2010 -0400
+
+    Add no-op combiners for DST and the CA versions of the HSL operators.
+    
+    We already exit early for DST, but for the HSL operators with
+    component alpha, we crash at the moment. Fix that by adding a dummy
+    combine_dst() function.
+
+commit 233b27257b63ecd502c6392e5ef3a7f736f14365
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Oct 5 11:05:25 2010 -0400
+
+    test: Add some more colors to the color table in composite.c
+    
+    Specifically, add transparent black and superluminescent white with
+    alpha = 0.
+
+commit 3f7da59352b604bd6974230d0b149e8e7da77b5c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Oct 5 09:49:45 2010 -0400
+
+    test: Parallize composite.c with OpenMP
+    
+    Each test uses the test number as the random number seed; if it
+    didn't, all the threads would run the same tests since they would all
+    start from the same seed.
+
+commit a10ccc9f303ca6b4577afe68cc6b2d8840de5a27
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Mar 7 11:26:16 2010 -0500
+
+    test: Change composite so that it tests randomly generated images
+    
+    Previously this test would try to exhaustively test all combinations
+    of formats and operators, which meant that it would take hours to run.
+    Instead, generate images randomly and test compositing those.
+    
+    Cc: chris@chris-wilson.co.uk
+
+commit 55e4065cbbc5ffe2ce1986b51ef63e8a0b50fccb
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Mar 7 11:24:30 2010 -0500
+
+    test: Fix eval_diff() so that it provides useful error values.
+    
+    Previously, this function would evaluate the error under the
+    assumption that the format was 565 or wider. This patch changes it to
+    take the actual format into account.
+    
+    With that fixed, we can turn on testing for the rest of the formats.
+    
+    Cc: chris@chris-wilson.co.uk
+
+commit fe411cf2ac4d5b26a319b906dee87e0cc69d2ad6
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Mar 7 10:31:04 2010 -0500
+
+    test: Fix bug in color_correct() in composite.c
+    
+    This function was using the number of bits in a channel as if it were
+    a mask, which lead to many spurious errors. With that fixed, we can
+    turn on testing for all formats where all channels have 5 or more
+    bits.
+    
+    Cc: chris@chris-wilson.co.uk
+
+commit 4e89a5b7f3b039fcc86dff7fb8bec79884c913e8
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Oct 5 11:08:42 2010 -0400
+
+    Remove broken optimizations in combine_disjoint_over_u()
+    
+    The first broken optimization is that it checks "a != 0x00" where it
+    should check "s != 0x00". The other is that it skips the computation
+    when alpha is 0xff. That is wrong because in the formula:
+    
+         min (1, (1 - Aa)/Ab)
+    
+    the render specification states that if Ab is 0, the quotient is
+    defined to positive infinity. That is the case even if (1 - Aa) is 0.
+
+commit 8d76c1b3391e1165aaf9e0f331749aee1394f62c
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Oct 4 04:49:08 2010 +0300
+
+    ARM: restore fallback to ARMv6 implementation from NEON in the delegate chain
+    
+    After fast path cache introduction, the overhead of having this fallback is
+    insignificant. On the other hand, some of the ARM assembly optimizations (for
+    example nearest neighbor scaling) do not need NEON.
+
+commit c748650d700c2f18f1587f06ada3b58d6ddc18d3
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Sep 8 09:30:23 2010 +0300
+
+    Use more unrolling for scaled src_0565_0565 with nearest filter
+    
+    Benchmark from Intel Core i7 860:
+    
+        == before ==
+        op=1, src_fmt=10020565, dst_fmt=10020565, speed=1335.29 MPix/s
+    
+        == after ==
+        op=1, src_fmt=10020565, dst_fmt=10020565, speed=1550.96 MPix/s
+    
+        == performance of nonscaled src_0565_0565 operation as a reference ==
+        op=1, src_fmt=10020565, dst_fmt=10020565, speed=2401.31 MPix/s
+    
+    Benchmark from ARM Cortex-A8:
+    
+        == before ==
+        op=1, src_fmt=10020565, dst_fmt=10020565, speed=81.79 MPix/s
+    
+        == after ==
+        op=1, src_fmt=10020565, dst_fmt=10020565, speed=89.55 MPix/s
+    
+        == performance of nonscaled src_0565_0565 operation as a reference ==
+        op=1, src_fmt=10020565, dst_fmt=10020565, speed=197.44 MPix/s
+
+commit a520c15e1134d9e801bc2ab461a3c5ade60544f2
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Sep 23 23:41:50 2010 +0300
+
+    ARM: added 'neon_composite_out_reverse_8_0565' fast path
+    
+    == before ==
+    
+        outrev_8_0565 =  L1:  22.91  L2:  22.40  M: 18.75 ( 10.47%)
+                         HT: 12.62   VT: 12.22   R: 11.32  RT:  5.30 (  58Kops/s)
+    
+    == after ==
+    
+        outrev_8_0565 =  L1: 176.27  L2: 151.70  M:108.79 ( 60.81%)
+                         HT: 50.43   VT: 37.16   R: 32.26  RT:  9.62 (  97Kops/s)
+
+commit d8820360f79d07e03c91ecd201880bc6b1706f19
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Sep 23 22:28:55 2010 +0300
+
+    ARM: added 'neon_composite_add_0565_8_0565' fast path
+    
+    == before ==
+    
+        add_0565_8_0565 =  L1:  14.05  L2:  14.03  M: 11.57 ( 12.94%)
+                           HT:  8.31   VT:  8.10   R:  7.47  RT:  3.64 (  42Kops/s)
+    
+    == after ==
+    
+        add_0565_8_0565 =  L1: 123.36  L2:  94.70  M: 74.36 ( 83.15%)
+                           HT: 31.17   VT:  23.97  R: 21.06  RT:  6.42 (  70Kops/s)
+
+commit 2f6c7b4f9d36261d2efe494a925faf063376ba30
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri May 21 16:31:03 2010 +0300
+
+    ARM: NEON: added forgotten cache preload for over_n_8888/over_n_0565
+    
+    Prefetch provides up to 40-50% better performance when working
+    with large images and/or when having lots of L2 cache misses
+    on ARM Cortex-A8 @ 720MHz:
+    
+    == before ==
+    
+        over_n_8888 =  L1: 225.83  L2: 181.02  M: 55.57 ( 41.41%)
+                       HT: 38.96   VT: 36.92   R: 32.84  RT: 14.15 ( 123Kops/s)
+    
+        over_n_0565 =  L1: 153.91  L2: 149.69  M: 83.17 ( 30.95%)
+                       HT: 50.41   VT: 49.15   R: 40.56  RT: 15.45 ( 131Kops/s)
+    
+    == after ==
+    
+        over_n_8888 =  L1: 222.39  L2: 170.95  M: 76.86 ( 57.27%)
+                       HT: 58.80   VT: 53.03   R: 45.51  RT: 14.13 ( 124Kops/s)
+    
+        over_n_0565 =  L1: 151.87  L2: 149.54  M:125.63 ( 46.80%)
+                       HT: 67.85   VT: 57.54   R: 50.21  RT: 15.32 ( 130Kops/s)
+
+commit b924bb1f8191cc7c386d8211d9822aeeaadcab44
+Author: Mika Yrjola <mika.yrjola@movial.com>
+Date:   Fri Oct 1 16:17:50 2010 +0300
+
+    Fix "syntax error: empty declaration" warnings.
+    
+    These minor changes should fix a large number of
+    macro declaration - related "syntax error:  empty declaration" warnings
+    which are seen while compiling the code with the Solaris Studio
+    compiler.
+
+commit 73c1fefa1b99efa36b74599f455df9426209378e
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Sep 28 00:51:07 2010 -0400
+
+    Delete simple repeat code
+    
+    This was supposedly an optimization, but it has pathological cases
+    where it definitely isn't. For example a 1 x n image will cause it to
+    have terrible memory access patterns and to generate a ton of modulus
+    operations.
+    
+    Since no one has ever measured whether it actually is an improvement,
+    and since it is doing the repeating at the wrong the stage in the
+    pipeline, and since with the previous commit it can't be triggered
+    anymore because we now require SAMPLES_COVER_CLIP for regular fast
+    paths, just delete it.
+
+commit a4d1c9d3831751008db61a48d6a6cb12ed33f314
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Sep 28 00:42:25 2010 -0400
+
+    Fix bug in FAST_PATH_STD_FAST_PATH
+    
+    The standard fast paths deal with two kinds of images: solids and
+    bits. These two image types require different flags, but
+    PIXMAN_STD_FAST_PATH uses the same ones for both.
+    
+    This patch makes it so that solid images just get the standard flags,
+    while bits images must be untransformed contain the destination clip
+    within the sample grid.
+    
+    This means that the old FAST_PATH_COVERS_CLIP flag is now not used
+    anymore, so it can be deleted.
+
+commit 10e13135c3538f0909f27eaacc17e9e13f199a7c
+Author: Dmitri Vorobiev <dmitri.vorobiev@movial.com>
+Date:   Tue Sep 28 14:42:02 2010 +0300
+
+    Some clean-ups in fence_malloc() and fence_free()
+    
+    This patch removes an unnecessary typecast of MAP_FAILED,
+    replaces an erroneous free() by the correct munmap() in the
+    error path for a failing mprotect(), and, finally, removes
+    redundant calls to mprotect() that aren't necessary, because
+    munmap() doesn't call for any specific memory protection.
+
+commit ba693d2e88b6f4c871d804fb62d7435915c85dfc
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Sep 28 02:52:02 2010 -0400
+
+    Fix search-and-replace issue in lowlevel-blt-bench.c
+
+commit 77d3e5f6ff719f53398b5675e5219d0e3b9746c1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Sep 17 09:21:09 2010 -0400
+
+    Rename all the fast paths with _8000 in their names to _8
+    
+    This inconsistent naming somehow survived the refactoring from a while
+    back.
+
+commit ba69989374fe9cbe5151c5aac7b824da0806f94a
+Author: Liu Xinyun <xinyun.liu@intel.com>
+Date:   Sat Sep 25 14:56:38 2010 +0800
+
+    Remove cache prefetch code.
+    
+    The performance is decreased with cache prefetch, especially for
+    ATOM. So remove these code. Following is the experiment.
+    
+    old: 0.19.5-with-cache-prefetch
+    new: 0.19.5-without-cache-prefetch
+    
+    CPU: Intel Atom N270@1.6GHz
+    OS: MeeGo (32 bits)
+    Speedups
+    ========
+    image-rgba                    poppler-0    17125.68 (17279.58 0.92%) -> 14765.36 (15926.49 3.54%):  1.16x speedup
+    image-rgba                  ocitysmap-0    9008.25 (9040.41 7.50%) -> 8277.94 (8343.09 5.44%):  1.09x speedup
+    image-rgba          xfce4-terminal-a1-0    18020.76 (18230.68 0.97%) -> 16703.77 (16712.42 1.22%):  1.08x speedup
+    image-rgba         gnome-terminal-vim-0    25081.38 (25133.38 0.24%) -> 23407.47 (23652.98 0.54%):  1.07x speedup
+    image-rgba          firefox-talos-gfx-0    57916.97 (57973.20 0.11%) -> 54556.64 (54624.55 0.39%):  1.06x speedup
+    image-rgba       firefox-planet-gnome-0    102377.47 (103496.63 0.70%) -> 96816.65 (97075.54 0.15%):  1.06x speedup
+    image-rgba         swfdec-giant-steps-0    12376.24 (12616.84 1.02%) -> 11705.30 (11825.20 1.06%):  1.06x speedup
+    
+    CPU: Intel Core(TM)2 Duo CPU T9600@2.80GHz
+    OS: Ubuntu 10.04 (64bits)
+    Speedups
+    ========
+    image-rgba                  ocitysmap-0    2671.46 (2691.82 8.55%) -> 2296.20 (2307.26 5.77%):  1.16x speedup
+    image-rgba         swfdec-giant-steps-0    1614.55 (1615.18 1.68%) -> 1532.84 (1538.52 0.72%):  1.05x speedup
+    
+    Signed-off-by: Liu Xinyun <xinyun.liu@intel.com>
+    Signed-off-by: Chen Miaobo <miaobo.chen@intel.com>
+
+commit 56777f3f675869806cd30bcd21a5b39d788507cb
+Author: Dmitri Vorobiev <dmitri.vorobiev@movial.com>
+Date:   Wed Sep 22 12:34:57 2010 +0300
+
+    Use <sys/mman.h> macros only when they are available
+    
+    Not all systems are regular Unices, so let's be careful with the
+    mmap()-related stuff, which might be unavailable. This patch makes
+    sure that mmap() and friends is used only when the <sys/mman.h>
+    header is found.
+
+commit 39524a4687391c68f4177e8671f4b2bd39e05850
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Sep 21 14:20:43 2010 -0400
+
+    Revert "add enable-cache-prefetch option"
+    
+    Revert this accidentally committed patch.
+    
+    This reverts commit 19ea0e16b958e5abe491365c203293ab372f3586.
+
+commit e97da2104967f4c99aed40e89f3e0141ceed7040
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Sep 21 14:12:00 2010 -0400
+
+    If MAP_ANONYMOUS is not defined, define it to MAP_ANON.
+    
+    This hopefully fixes the build failure on OS X.
+
+commit 19ea0e16b958e5abe491365c203293ab372f3586
+Author: Liu Xinyun <xinyun.liu@intel.com>
+Date:   Wed Sep 22 00:15:10 2010 +0800
+
+    add enable-cache-prefetch option
+    
+    OK. here is the work to clear all cache prefetch. Please review it. 3x
+    
+    On Tue, Sep 21, 2010 at 11:36:30PM +0800, Soeren Sandmann wrote:
+    > Liu Xinyun <xinyun.liu@intel.com> writes:
+    >
+    > >    This patch is to add a new configuration option: enable-cache-prefetch,
+    > > which is default yes.
+    > >
+    > >    Here is a link which talks on cache issue.
+    > >    http://lists.freedesktop.org/archives/pixman/2010-June/000218.html
+    > >
+    > >    When disable it on Atom CPU(configured with --enable-cache-prefetch=no),
+    > > it will have a little performance gain. Here is the patch.
+    >
+    > I think the cache prefetch code should just be deleted outright. No
+    > benchmarks that I'm aware of show it to be an improvement.
+    >
+    >
+    > Thanks,
+    > Soren
+    
+    >From bca2192ef524bcae4eea84d0ffed9e8c4855675f Mon Sep 17 00:00:00 2001
+    From: Liu Xinyun <xinyun.liu@intel.com>
+    Date: Wed, 22 Sep 2010 00:11:56 +0800
+    Subject: [PATCH] remove cache prefetch
+
+commit edd173396604b052fd76971d0efa0c8db40cf1f3
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Sep 21 10:18:44 2010 -0400
+
+    Post-release version bump to 0.19.5
+
+commit e5b3a6e7105af590d72e2ae986f9985f71cc88f5
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Sep 21 10:11:34 2010 -0400
+
+    Pre-release version bump to 0.19.4
+
+commit 0742ba41646853a5edf90c2f3102f49b248321ee
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Sep 21 10:05:52 2010 -0400
+
+    compute_composite_region32: Zero extents before returning FALSE.
+    
+    If the extents of the composite region are broken such that x2 <= x1
+    or y2 <= y1, then we need to zero the extents before returning so that
+    the region won't be completely broken when calling
+    pixman_region32_fini().
+
+commit 7cd4f2fa201c4dc846153c022423e3dced2cfb13
+Author: Jonathan Morton <jonathan.morton@movial.com>
+Date:   Fri Sep 17 17:52:23 2010 +0300
+
+    Add a lowlevel blitter benchmark
+    
+    This test is a modified version of Siarhei's compositor throughput
+    benchmark.  It's expanded with explicit reporting of memory bandwidth
+    consumption for the M-test, and with an additional 8x8-random test
+    intended to determine peak ops/sec capability.  There are also quite a
+    lot more operations tested for.
+
+commit eab3a77877b0e850c46f95dacffb31994e6a7e41
+Author: Dmitri Vorobiev <dmitri.vorobiev@movial.com>
+Date:   Fri Sep 17 17:52:22 2010 +0300
+
+    Add noinline macro
+    
+    This patch adds a noinline macro, which expands to compiler-dependent
+    keywords that tell the compiler to never inline a function.
+
+commit cab3261c0da6e833d803a7f3ccab600adca7abe1
+Author: Dmitri Vorobiev <dmitri.vorobiev@movial.com>
+Date:   Fri Sep 17 17:52:21 2010 +0300
+
+    Add gettime() routine to test utils
+    
+    Impending benchmark code will need a function to get current time
+    in seconds, and this patch introduces such routine. We try to use
+    the POSIX gettimeofday() function when available, and fall back to
+    clock() when not.
+
+commit fd3c87d460a6d1803880d17af416cce344a086c4
+Author: Dmitri Vorobiev <dmitri.vorobiev@movial.com>
+Date:   Fri Sep 17 17:52:20 2010 +0300
+
+    Move aligned_malloc() to utils
+    
+    The aligned_malloc() routine will be used in more than one test utility.
+    At least, a low-level blitter benchmark needs it. Therefore, let's make
+    this function a part of common test utilities code.
+
+commit f474783607e51183d31814972d0f055907876079
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Sep 16 10:33:23 2010 -0400
+
+    Enable bits_image_fetch_bilinear_affine_normal_r5g6b5
+
+commit 91521d30ab9b033a35fb7797e4566d575ad1c1dc
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Sep 16 10:33:10 2010 -0400
+
+    Enable bits_image_fetch_bilinear_affine_reflect_r5g6b5
+
+commit 372d7b954aee4f3a2ad94ed8484a2b4084db0c7c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Sep 16 10:33:00 2010 -0400
+
+    Enable bits_image_fetch_bilinear_affine_none_r5g6b5
+
+commit a826ae0e3a0279557e892856ef1333971b105d01
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Sep 16 10:32:44 2010 -0400
+
+    Enable bits_image_fetch_bilinear_affine_pad_r5g6b5
+
+commit c5238bd1809433af5b0efc3add23c1ccb4da884c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Sep 16 10:32:27 2010 -0400
+
+    Enable bits_image_fetch_bilinear_affine_normal_a8
+
+commit d12daefcdb8845e539309df46b08916829a86d9c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Sep 16 10:32:12 2010 -0400
+
+    Enable bits_image_fetch_bilinear_affine_reflect_a8
+
+commit 9388be32932898ed424c8916a57a6201f995416b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Sep 16 10:31:57 2010 -0400
+
+    Enable bits_image_fetch_bilinear_affine_none_a8
+
+commit 8e4d4e8d110c379cb85f53752660c6b2fab33d5e
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Sep 16 10:31:45 2010 -0400
+
+    Enable bits_image_fetch_bilinear_affine_pad_a8
+
+commit ce1f6c50b4ddf8f7c48a3b272c19d281beca4b34
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Aug 28 02:41:20 2010 -0400
+
+    Enable bits_image_fetch_bilinear_affine_normal_x8r8g8b8
+
+commit 83f2ee3e958a02fc85a2dc6eddc048b63d74cd5c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Aug 28 02:41:08 2010 -0400
+
+    Enable bits_image_fetch_bilinear_affine_reflect_x8r8g8b8
+
+commit be37ae331c6e5e9539b0c1eac6e196366532df29
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Aug 28 02:40:56 2010 -0400
+
+    Enable bits_image_fetch_bilinear_affine_none_x8r8g8b8
+
+commit 5f8a9bebc04deb55de79e7443578779a93b8cfa6
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Aug 28 02:40:46 2010 -0400
+
+    Enable bits_image_fetch_bilinear_affine_pad_x8r8g8b8
+
+commit c59584cb862ef8774a2ef1eabb87fef18506d10f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Aug 28 02:40:16 2010 -0400
+
+    Enable bits_image_fetch_bilinear_affine_normal_a8r8g8b8
+
+commit 2292cff304fd5aad6dbcc86342a57ea523136de6
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Aug 28 02:40:03 2010 -0400
+
+    Enable bits_image_fetch_bilinear_affine_reflect_a8r8g8b8
+
+commit 8b29162693adc30dbb5c0f60098d2853c3942c36
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Aug 28 02:39:51 2010 -0400
+
+    Enable bits_image_fetch_bilinear_affine_none_a8r8g8b8
+
+commit e8555874e122f6e113f85e37059932457ee509cb
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Aug 28 02:39:37 2010 -0400
+
+    Enable bits_image_fetch_bilinear_affine_pad_a8r8g8b8
+
+commit f9778c15e9c01c02e0002edfc4d4a1d517d14d87
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun May 23 04:44:33 2010 -0400
+
+    Use a macro to generate some {a,x}8r8g8b8, a8, and r5g6b5 bilinear fetchers.
+    
+    There are versions for all combinations of x8r8g8b8/a8r8g8b8 and
+    pad/repeat/none/normal repeat modes. The bulk of each scaler is an
+    inline function that takes a format and a repeat mode as parameters.
+    
+    The new scalers are all commented out, but the next commits will
+    enable them one at a time to facilitate bisecting.
+
+commit 6d1e10a8b5c456ee501a309f5cf2f801efcf63b0
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Jul 14 16:27:27 2010 -0400
+
+    test: Add affine-test
+    
+    This test tests compositing with various affine transformations. It is
+    almost identical to scaling-test, except that it also applies a random
+    rotation in addition to the random scaling and translation.
+
+commit 4fa33537d7093ac759b7ded1718a628dacd2aff4
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Sep 12 06:07:41 2010 -0400
+
+    analyze_extents: Fast path for non-transformed BITS images
+    
+    Profiling various cairo traces showed that we were spending a lot of
+    time in analyze_extents and compute_sample_extents(). This was
+    especially bad for glyphs where all this computation was completely
+    unnecessary.
+    
+    This patch adds a fast path for the case of non-transformed BITS
+    images. The result is approximately a 6% improvement on the
+    firefox-talos-gfx benchmark:
+    
+    Before:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image            firefox-talos-gfx   13.797   13.848   0.20%    6/6
+    
+    After:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image            firefox-talos-gfx   12.946   13.018   0.39%    6/6
+
+commit c97881fe3c3a0af78cf5953d2c135654440b0269
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Sep 16 08:35:05 2010 -0400
+
+    Move some of the FAST_PATH_COVERS_CLIP computation to pixman-image.c
+    
+    When an image is solid or repeating, the FAST_PATH_COVERS_CLIP flag
+    can be set in compute_image_info().
+    
+    Also the code that turned this flag off in pixman.c was not correct;
+    it didn't take transformations into account. With this patch, pixman.c
+    doesn't set the flag by default, but instead relies on the call to
+    compute_samples_extents() to set it when possible.
+
+commit 3411f9399c3ab6d642f350ea8e4c355f719d01d9
+Author: Tor Lillqvist <tml@iki.fi>
+Date:   Wed Sep 15 11:53:47 2010 -0400
+
+    Support __thread on MINGW 4.5
+    
+    By the way, it seems that with gcc 4.5.0 from mingw.org, __thread, sse
+    and mmx work fine.
+    
+    I added the below to pixman 0.18 and as far as I can see, it works.
+    make check reports no problems. (Earlier I had to use --disable-mmx
+    and --disable-sse2.) Also gtk-demo and gimp run fine.
+    
+    (Also a change to get rid of the warnings about -fvisibility being ignored.)
+
+commit add0fd1bac84a5b6dddf7632b4100d6b3f2ebc18
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Aug 29 22:46:09 2010 -0400
+
+    Clip composite region against the destination alpha map extents.
+    
+    Otherwise we can end up writing outside the alpha map.
+
+commit af2f0080feada1abe569e2031acacf51be7f8f68
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Aug 29 17:07:40 2010 -0400
+
+    Remove FAST_PATH_NARROW_FORMAT flag if there is a wide alpha map
+    
+    If an image has an alpha map that has wide components, then we need to
+    use 64 bit processing for that image. We detect this situation in
+    pixman-image.c and remove the FAST_PATH_NARROW_FORMAT flag.
+    
+    In pixman-general, the wide/narrow decision is now based on the flags
+    instead of on the formats.
+
+commit 0afc61341526887c59d6dd9e43073f73451a74c6
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Aug 29 17:03:01 2010 -0400
+
+    Rename FAST_PATH_NO_WIDE_FORMAT to FAST_PATH_NARROW_FORMAT
+    
+    This avoids a negative in the name. Also, by renaming the "wide"
+    variable in pixman-general.c to "narrow" and fixing up the logic
+    correspondingly, the code there reads a lot more straightforwardly.
+
+commit ae77548f0d9ca95a86a466fc4ff099e000716067
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Aug 29 16:59:02 2010 -0400
+
+    Update and extend the alphamap test
+    
+    - Test many more combinations of formats
+    
+    - Test destination alpha maps
+    
+    - Test various different alpha origins
+    
+    Also add a transformation to the destination, but comment it out
+    because it is actually broken at the moment (and pretty difficult to
+    fix).
+
+commit dc9fe269ea2a1a0b8334d0936e2541af48b81bc7
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Sep 13 14:34:34 2010 -0400
+
+    Add fence_malloc() and fence_free().
+    
+    These variants of malloc() and free() try to surround the allocated
+    memory with protected pages so that out-of-bounds accessess will cause
+    a segmentation fault.
+    
+    If mprotect() and getpagesize() are not available, these functions are
+    simply equivalent to malloc() and free().
+
+commit f4dc73bad4f662bdc3c94cb1e224f9a1989beba5
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Sep 12 04:35:08 2010 -0400
+
+    Do opacity computation with shifts instead of comparing with 0
+    
+    Also add a COMPILE_TIME_ASSERT() macro and use it to assert that the
+    shift is correct.
+
+commit 517a77a992255cb6dae7e74bc6f6b9ac21003ac1
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Sep 8 09:16:12 2010 +0300
+
+    SSE2 optimization for scaled over_8888_8888 operation with nearest filter
+    
+    This is the first demo implementation, it should be possible to
+    generalize it later to cover more operations with less lines of code.
+    
+    It should be also possible to introduce the use of '__builtin_constant_p'
+    gcc builtin function for an efficient way of checking if 'unit_x' is known
+    to be zero at compile time (when processing padding pixels for NONE, or
+    PAD repeat).
+    
+    Benchmarks from Intel Core i7 860:
+    
+    == before (nearest OVER) ==
+    op=3, src_fmt=20028888, dst_fmt=20028888, speed=142.01 MPix/s
+    
+    == after (nearest OVER) ==
+    op=3, src_fmt=20028888, dst_fmt=20028888, speed=314.99 MPix/s
+    
+    == performance of nonscaled operation as a reference ==
+    op=3, src_fmt=20028888, dst_fmt=20028888, speed=652.09 MPix/s
+
+commit abc90dad57f03bf9293fc825835c6f0fddc6771b
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Sep 16 18:25:40 2010 +0300
+
+    NONE repeat support for fast scaling with nearest filter
+    
+    Implemented very similar to PAD repeat.
+    
+    And gcc also seems to be able to completely eliminate the
+    code responsible for left and right padding pixels for OVER
+    operation with NONE repeat.
+
+commit 45833d5b198507e9e69b918459eaaf6088e5de00
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Sep 16 17:10:40 2010 +0300
+
+    PAD repeat support for fast scaling with nearest filter
+    
+    When processing pixels from the left and right padding, the same
+    scanline function is used with 'unit_x' set to 0.
+    
+    Actually appears that gcc can handle this quite efficiently. When
+    using 'restrict' keyword, it is able to optimize the whole operation
+    performed on left or right padding pixels to a small unrolled loop
+    (the code is reduced to a simple fill implementation):
+    
+        9b30:       89 08                   mov    %ecx,(%rax)
+        9b32:       89 48 04                mov    %ecx,0x4(%rax)
+        9b35:       48 83 c0 08             add    $0x8,%rax
+        9b39:       49 39 c0                cmp    %rax,%r8
+        9b3c:       75 f2                   jne    9b30
+    
+    Without 'restrict' keyword, there is one instruction more: reloading
+    source pixel data from memory in the beginning of each iteration. That
+    is slower, but also acceptable.
+
+commit 3db0cc5c75a4a764726059511fa6d67082fbeb64
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Sep 17 16:22:25 2010 +0300
+
+    Introduce a fake PIXMAN_REPEAT_COVER constant
+    
+    We need to implement a true PIXMAN_REPEAT_NONE support later (padding
+    the source with zero pixels). So it's better not to use PIXMAN_REPEAT_NONE
+    for handling FAST_PATH_SAMPLES_COVER_CLIP special case.
+
+commit e9b0740af76853f58df72cd40cd7cb4e2ac7261b
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Sep 16 13:02:18 2010 +0300
+
+    Nearest scaling fast path macro split into two parts
+    
+    Scanline processing is now split into a separate function. This provides
+    an easy way of overriding it with a platform specific implementation,
+    which may use SIMD optimizations. Only basic C data types are used as
+    the arguments for this function, so it may be implemented entirely in
+    assembly or be generated by some JIT engine.
+    
+    Also as a result of this split, the complexity of code is reduced a
+    bit and now it should be easier to introduce support for the currently
+    missing NONE, PAD and REFLECT repeat types.
+
+commit 066ce191a6d3bb970b5024c070193cac4c130418
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Sep 16 12:31:27 2010 +0300
+
+    Nearest scaling fast path macros moved to 'pixman-fast-path.h'
+    
+    These macros with some modifications can can be reused later by
+    various platform specific implementations, introducing SIMD
+    optimizations for nearest scaling fast paths.
+
+commit fb819c0e93b301757f8549cf7738c2b8c356ee7e
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Aug 29 16:26:45 2010 -0400
+
+    Add FAST_PATH_NO_ALPHA_MAP to the standard destination flags.
+    
+    We can't in general take a fast path if the destination has an alpha
+    map.
+
+commit ba6c98fc4b8f0ee02b846fd31c7e93e18e92d0af
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Sep 9 12:02:59 2010 +0300
+
+    test: detection of possible floating point registers corruption
+    
+    Added a pair of macros which can help to detect corruption
+    of floating point registers after a function call. This may
+    happen if _mm_empty() call is forgotten in MMX/SSE2 fast
+    path code, or ARM NEON assembly optimized function
+    forgets to save/restore d8-d15 registers before use.
+
+commit e470c0dc5bcbf1e153bf035a823a7bdf629e6e25
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Sep 7 01:15:57 2010 +0300
+
+    ARM: added 'neon_composite_over_0565_8_0565' fast path
+
+commit a5bf7c3b1a103c6b676c864df009b1f0ad3f8195
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Sep 7 01:10:43 2010 +0300
+
+    ARM: helper macros for conversion between 8888/x888/0565 formats
+
+commit 8e299702f315fc1f0f97ab93d905ed5d9c41410e
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Sep 7 01:05:44 2010 +0300
+
+    ARM: common init/cleanup macro for saving/restoring NEON registers
+    
+    This is a typical prologue/epilogue for many NEON fast path functions, so
+    it makes sense to provide common reusable macros for it in the header file.
+
+commit e29d9dfcb5935777333f6239b95c18c3da697ab2
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Sep 2 19:43:08 2010 -0400
+
+    Silence some warnings about uninitialized variables
+    
+    Neither were real problems, but GCC was complaining about them.
+
+commit 27f7852b5ac8d137c917e653fb7113f419a4c77a
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Aug 31 00:30:54 2010 -0400
+
+    When pixman_compute_composite_region32() returns FALSE, don't fini the region.
+    
+    The rule is that the region passed in must be initialized and that the
+    region returned will still be valid. Ie., the lifecycle is the
+    responsibility of the caller, regardless of what the function returns.
+    
+    Previously, compute_composite_region32() would finalize the region and
+    then return FALSE, and then the caller would finalize the region
+    again, leading to memory corruption in some cases.
+
+commit df6dbc90248a41b5b8362010e5b8d34358688786
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Aug 30 00:16:07 2010 -0400
+
+    Store a2b2g2r2 pixel through the WRITE macro
+    
+    Otherwise, accessor functions won't work.
+
+commit f42419a3e493bb325163a711fe50296c4c948edd
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Aug 23 18:24:32 2010 +0300
+
+    ARM: added 'neon_composite_over_8888_8_0565' fast path
+
+commit 765bde32e0a2e81fbbe15acc0f491695ba2726e8
+Author: Maarten Bosmans <mkbosmans@gmail.com>
+Date:   Mon Aug 30 08:55:00 2010 +0200
+
+    Add *.exe to .gitignore
+
+commit 85964082618fc5350cafcd22b48ba1e02cbc4276
+Author: Maarten Bosmans <mkbosmans@gmail.com>
+Date:   Sun Aug 29 06:28:42 2010 +0200
+
+    Use windows.h directly for mingw32 build
+    
+    This patch adresses the issue discussed in
+    http://lists.freedesktop.org/archives/pixman/2010-April/000163.html
+    
+    There were only two clashing identifiers.  The first one is IN, which
+    obviously causes problems in Pixman for lines like
+    
+        PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
+    
+    Fortunately the mingw headers provide a solution: by defining
+    _NO_W32_PSEUDO_MODIFIERS, these stupid symbols are skipped.
+    
+    The other name is UINT64, used in pixman-mmx.c. I renamed that
+    function to to_uint64, but may be another name is more appropriate.
+
+commit 5b99710042e812d294f571ad6d86fb003a8071e3
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Aug 23 09:27:38 2010 -0400
+
+    Be more paranoid about checking for GTK+
+    
+    From time to time people run into issues where the configure script
+    detects GTK+ when it is either not installed, or not functional due to
+    a missing pixman. Most recently:
+    
+      https://bugs.freedesktop.org/show_bug.cgi?id=29736
+    
+    This patch makes the configure script more paranoid by
+    
+    - always using PKG_CHECK_MODULES and not PKG_CHECK_EXISTS, since it
+    seems PKG_CHECK_EXISTS will sometimes return true even if a dependency
+    of GTK+, such as pixman-1, is missing.
+    
+    - explicitly checking that pixman-1 is installed before enabling GTK+.
+    
+    Cc: my.somewhat.lengthy.loginname@gmail.com
+
+commit 5530bcab26508f38a25d2afffa7fef20f35a68e1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Aug 22 11:09:45 2010 -0400
+
+    Merge pixman_image_composite32() and do_composite().
+    
+    There is not much point having a separate function that just validates
+    the images. Also add a boolean return to lookup_composite_function()
+    so that we can return if no composite function is found.
+
+commit a8ea889e5e3029c2aad0e54e849783242daca274
+Author: Benjamin Otte <otte@redhat.com>
+Date:   Mon Aug 23 18:20:09 2010 +0200
+
+    region: Fix pixman_region_translate() clipping bug
+    
+    Fixes the region-translate test case by clipping region translations to
+    the newly defined PIXMAN_REGION_MIN/MAX and using the newly introduced
+    type overflow_int_t to check for the overflow.
+    Also uses INT16_MAX or INT32_MAX for these values instead of relying on
+    the size of short and int types.
+
+commit 4d8fb1bc01654ba0d331e6aea8127920e8cdf0b8
+Author: Benjamin Otte <otte@redhat.com>
+Date:   Tue Aug 24 12:17:18 2010 +0200
+
+    region: Add a new test region-translate
+    
+    This test exercises a bug in pixman_region32_translate(). The function
+    clips the region to int16 coordinates SHRT_MIN/SHRT_MAX.
+
+commit 5ff359b8a0a4573722b1cba141b8f00cf24b6f09
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Aug 21 06:39:44 2010 -0400
+
+    Post-release version bump to 0.19.3
+
+commit 39308ed3b07afb92140770007124b7e544b83090
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Aug 21 06:33:19 2010 -0400
+
+    Pre-release version bump to 0.19.2
+
+commit 393ccab74e9aa466e2fdd91319012e2c18f4ef84
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Aug 16 07:24:48 2010 -0400
+
+    Only try to compute the FAST_SAMPLES_COVER_CLIP for bits images
+    
+    It doesn't make sense in other cases, and the computation would make
+    use of image->bits.{width,height} which lead to uninitialized memory
+    accesses when the image wasn't of type BITS.
+
+commit da6f33a798bf2ea10df610ccf1d9506d63d1a28c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Aug 9 20:54:49 2010 -0400
+
+    Introduce new FAST_PATH_SAMPLES_OPAQUE flag
+    
+    This flag is set whenever the pixels of a bits image don't have an
+    alpha channel. Together with FAST_PATH_SAMPLES_COVER_CLIP it implies
+    that the image effectively is opaque, so we can do operator reductions
+    such as OVER->SRC.
+
+commit 4e5d6f00bf409259ff6f5d5c3ef4b016146bcbb3
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Aug 4 17:51:49 2010 -0400
+
+    pixman_image_set_alpha_map(): Disallow alpha map cycles
+    
+    If someone tries to set an alpha map that itself has an alpha map,
+    simply return. Also, if someone tries to add an alpha map to an image
+    that is being _used_ as an alpha map, simply return.
+    
+    This ensures that an alpha map can never have an alpha map.
+
+commit 9fe7d32c4b704a10e780444530eaea28b4351110
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Aug 4 17:55:14 2010 -0400
+
+    Add alpha-loop test program
+    
+    This tests what happens if you attempt to make an image with an alpha
+    map that has the image as its alpha map. This results in an infinite
+    loop in _pixman_image_validate(), so the test sets up a SIGALRM to
+    exit if it runs for more than five seconds.
+
+commit 8a5d1be1dab799ed23239f3471b4a351d8356368
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon May 31 19:24:43 2010 +0300
+
+    ARM: 'neon_combine_out_reverse_u' combiner
+    
+    This operation was seen in mozilla browser profiling logs.
+    Implemented so that 'over' and 'out_reverse' operations
+    now reuse common parts of code.
+
+commit 731e9feaa6988f99e1e38e1b92ed1f15ba706da5
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Mar 19 12:21:32 2010 +0200
+
+    Code simplification (no need advancing 'vx' at the end of scanline)
+
+commit 41584f8fe140b7374a5ef5d437b070c1f32763bb
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Jul 2 14:14:21 2010 -0400
+
+    Store the various bits image fetchers in a table with formats and flags.
+    
+    Similarly to how the fast paths are done, put the various bits_image
+    fetchers in a table, so that we can quickly find the best one based on
+    the image's flags and format.
+
+commit 8e33643f44c397a37b822a95e071880d9a8e792a
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Jul 2 12:53:56 2010 -0400
+
+    Add some new FAST_PATH flags
+    
+    The flags are:
+    
+     *  AFFINE_TRANSFORM, for affine transforms
+    
+     *  Y_UNIT_ZERO, for when the 10 entry in the transformation is zero
+    
+     *  FILTER_BILINEAR, for when the image has a bilinear filter
+    
+     *  NO_NORMAL_REPEAT, for when the repeat mode is not NORMAL
+    
+     *  HAS_TRANSFORM, for when the transform is not NULL
+    
+    Also add some new FAST_PATH_REPEAT_* macros. These are just shorthands
+    for the image not having any of the other repeat modes. For example
+    REPEAT_NORMAL is (NO_NONE | NO_PAD | NO_REFLECT).
+
+commit 6f62231d1580f5b67f36ec81b6c59a7e2f4978cb
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Jul 2 12:45:44 2010 -0400
+
+    Remove "_raw_" from all the accessors.
+    
+    There are no non-raw accessors anymore.
+
+commit 807fd3c08491c8baffaad993d8b867141fa55319
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Jul 2 12:34:42 2010 -0400
+
+    Eliminate the store_scanline_{32,64} function pointers.
+    
+    Now that we can't recurse on alpha maps, they are not needed anymore.
+
+commit e213d5fd6207873638a86d908d06d7597cb88422
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Jul 2 12:31:50 2010 -0400
+
+    Split bits_image_fetch_transformed() into two functions.
+    
+    One function deals with the common affine, no-alpha-map case. The
+    other deals with perspective transformations and alpha maps.
+
+commit cbb2a0d7929ec27e0a135d7fa11e1acf3942bce2
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Jul 2 12:11:44 2010 -0400
+
+    Eliminate get_pixel_32() and get_pixel_64() from bits_image.
+    
+    These functions can simply be passed as arguments to the various pixel
+    fetchers. We don't need to store them. Since they are known at compile
+    time and the pixel fetchers are force_inline, this is not a
+    performance issue.
+    
+    Also temporarily make all pixel access go through the alpha path.
+
+commit 6480c92312e1fb6662ad0d10940660a9439667ea
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Jul 2 11:58:23 2010 -0400
+
+    Eliminate recursion from alpha map code
+    
+    Alpha maps with alpha maps are no longer supported. It's not a useful
+    feature and it could could lead to infinite recursion.
+
+commit 1cc750ed92a936d84b47cac696aaffd226e1c02e
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Jul 22 04:27:45 2010 -0400
+
+    Replace compute_src_extent_flags() with analyze_extents()
+    
+    This commit fixes two separate problems: 1. Incorrect computation of
+    the FAST_PATH_SAMPLES_COVER_CLIP flag, and 2. FAST_PATH_16BIT_SAFE is
+    a nonsensical thing to compute.
+    
+    == 1. Incorrect computation of SAMPLES_COVER_CLIP:
+    
+    Previously we were using pixman_transform_bounds() to compute which
+    source samples would be used for a composite operation. This is
+    incorrect for several reasons:
+    
+    (a) pixman_transform_bounds() is transforming the integer bounding box
+    of the destination samples, where it should be transforming the
+    bounding box of the samples themselves. In other words, it is too
+    pessimistic in some cases.
+    
+    (b) pixman_transform_bounds() is not rounding the same way as we do
+    during sampling. For example, for a NEAREST filter we subtract
+    pixman_fixed_e before rounding off to the nearest sample so that a
+    transformed value of 1 will round to the sample at 0.5 and not to the
+    one at 1.5. However, pixman_transform_bounds() would simply truncate
+    to 1 which would imply that the first sample to be used was the one at
+    1.5. In other words, it is too optimistic in some cases.
+    
+    (c) The result of pixman_transform_bounds() does not account for the
+    interpolation filter applied to the source.
+    
+    == 2. FAST_PATH_16BIT_SAFE is nonsensical
+    
+    The FAST_PATH_16BIT_SAFE is a flag that indicates that various
+    computations can be safely done within a 16.16 fixed-point
+    variable. It was used by certain fast paths who relied on those
+    computations succeeding. The problem is that many other compositing
+    functions were making similar assumptions but not actually requiring
+    the flag to be set. Notably, all the general compositing functions
+    simply walk the source region using 16.16 variables. If the
+    transformation happens to overflow, strange things will happen.
+    
+    So instead of computing this flag in certain cases, it is better to
+    simply detect that overflows will happen and not try to composite at
+    all in that case. This has the advantage that most compositing
+    functions can be written naturally way.
+    
+    It does have the disadvantage that we are giving up on some cases that
+    previously worked, but those are all corner cases where the areas
+    involved were very close to the limits of the coordinate
+    system. Relying on these working reliably was always a somewhat
+    dubious proposition. The most important case that might have worked
+    previously was untransformed compositing involving images larger than
+    32 bits. But even in those cases, if you had REPEAT_PAD or
+    REPEAT_REFLECT turned on, you would hit bits_image_fetch_transformed()
+    which has the 16 bit limitations.
+    
+    == Fixes
+    
+    This patch fixes both problems by introducing a new function called
+    analyze_extents() that has the responsibility to reject corner cases,
+    and to compute flags based on the extents.
+    
+    It does this through a new compute_sample_extents() function that will
+    compute a conservative (but tight) approximation to the bounding box
+    of the samples that will actually be needed. By basing the computation
+    on the positions of the _sample_ locations in the destination, and by
+    taking the interpolation filter into account, it fixes problem one.
+    
+    The same function is also used with a one-pixel expanded version of
+    the destination extents. By checking if the transformed bounding box
+    will overflow 16.16 fixed point, it fixes problem two.
+
+commit 5b289d39cfd5e5cd8b1e0a7b654574ed3e7e90ac
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Jul 28 02:11:08 2010 -0400
+
+    Extend scaling-crash-test in various ways
+    
+    This extends scaling-crash-test to test some more things:
+    
+    - All combinations of NEAREST/BILINEAR/CONVOLUTION filters and
+      NORMAL/PAD/REFLECT repeat modes.
+    
+    - Tests various scale factors very close to 1/7th such that the source
+      area is very close to edge of the source image.
+    
+    - The same things, only with scale factors very close to 1/32767th.
+    
+    - Enables the commented-out tests for accessing memory outside the
+      source buffer.
+    
+    Also there is now a border around the source buffer which has a
+    different color than the source buffer itself so that if we sample
+    outside, it will show up.
+    
+    Finally, the test now allows the destination buffer to not be changed
+    at all. This allows pixman to simply bail out in cases where the
+    transformation too strange.
+
+commit 71ff55a3e567ace21e9120f826270253e7ec5edd
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Aug 5 19:00:56 2010 -0400
+
+    Fix Altivec/OpenBSD patch
+    
+    As Brad pointed out, I pushed the wrong version of this patch.
+
+commit cb50e9cc95a780a5e60d557f2aa23d82d2280b73
+Author: Brad Smith <brad@comstyle.com>
+Date:   Sat Jul 31 05:07:02 2010 -0400
+
+    Add support for AltiVec detection for OpenBSD/PowerPC.
+    
+    Bug 29331.
+
+commit 664132128ec430e28dad9f8088a3f6f2a1903f8e
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Aug 4 09:50:30 2010 -0400
+
+    CODING_STYLE: Delete the stuff about trailing spaces
+    
+    Also fix various other minor issues.
+
+commit cc9221ce96c23f6d6f1a17d98e5221e3aeff6567
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Jul 28 03:17:35 2010 -0400
+
+    If we bail out of do_composite, make sure to undo any workarounds.
+    
+    The workaround for an old X bug has to be undone if we bail from
+    do_composite, so we can't just return.
+
+commit b243a66041456dba278b04f813deac4f99bbe621
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Aug 4 08:58:51 2010 -0400
+
+    Add x14r6g6b6 format to blitters-test
+
+commit d6a7b1542448e7ee41f2c2a129bd0af2668185bb
+Author: Marek Vasut <marek.vasut@gmail.com>
+Date:   Sun Aug 1 02:18:52 2010 +0200
+
+    Add support for 32bpp X14R6G6B6 format.
+    
+    This format is used on PXA framebuffer with some boards. It uses only 18 bits
+    from the 32 bit framebuffer to interpret color.
+    
+    Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
+
+commit 226a6df4f947f718d82e85ca53561a968ec0c0a1
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Jul 14 16:43:16 2010 +0300
+
+    test: 'scaling-test' updated to provide better coverage
+    
+    Negative scale factors are now also tested. A small additional
+    translate transform helps to stress the use of fractional
+    coordinates better.
+    
+    Also the number of iterations to run by default increased in order
+    to compensate increased variety of operations to be tested.
+
+commit af3eeaeb1352148ca671a45768d11160fcfd8567
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Jul 19 20:25:05 2010 +0300
+
+    test: 'scaling-crash-test' added
+    
+    This test tries to exploit some corner cases and previously known
+    bugs in nearest neighbor scaling fast path code, attempting to
+    crash pixman or cause some other nasty effect.
+
+commit 90483fcabbd19b35ded094a6a592ee224029fd07
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Jul 15 23:40:28 2010 -0400
+
+    bits: Fix potential divide-by-zero in projective code
+    
+    If the homogeneous coordinate is 0, just set the coordinates to 0.
+
+commit bf125fbbb701788d5d9ed9ff368bb4fe9d9c895e
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Apr 25 20:25:50 2010 -0400
+
+    [sse2] Add sse2_composite_add_n_8()
+    
+    This shows up when epiphany displays the "ImageTest" on
+    glimr.rubyforge.org/cake/canvas.html
+
+commit 16ae3285e6601ea177637dddd20d2857d13decac
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Apr 25 19:54:28 2010 -0400
+
+    [sse2] Add sse2_composite_in_n_8()
+    
+    This shows up when epiphany displays the "ImageTest" on
+    glimr.rubyforge.org/cake/canvas.html
+
+commit e0b430a13ee4619bd6d82c4ebff8a401a254e9bc
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jul 13 00:31:35 2010 -0400
+
+    [sse2] Add sse2_composite_src_x888_8888()
+    
+    This operation shows up when Firefox displays
+    http://dougx.net/plunder/plunder.html
+
+commit 16bae8347529c1c976e6d7af90e0fb5811605af1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jul 13 00:08:10 2010 -0400
+
+    [fast] Add fast_composite_src_x888_8888()
+    
+    This shows up on when Firefox displays http://dougx.net/plunder/plunder.html
+
+commit 9399b1a5af69cc9890aa7918cd09318ddeefc05d
+Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi>
+Date:   Wed Jul 14 09:51:27 2010 +0300
+
+    Fix thinko in configure.ac's macro to test linking.
+    
+    Copy-paste carnage.  Renames save_{cflags,libs,ldflags} to
+    save_{CFLAGS,LIBS,LDFLAGS}.
+
+commit 5537e51cd0ffda53cc392a4bafe05070954fc36d
+Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi>
+Date:   Sun Jul 11 19:59:01 2010 +0300
+
+    Avoid trailing slashes on automake install dirs.
+    
+    The install-sh on a Solaris box couldn't copy with
+    trailing slashes.
+
+commit 1d9c6fa62385c42d67926982704c398d8b495d47
+Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi>
+Date:   Sat Jul 10 15:36:41 2010 +0300
+
+    Check for specific flags by actually trying to compile and link.
+    
+    Instead of relying on preprocessor version checks to see if a
+    some compiler flags are supported, actually try to compile and
+    link a test program with the flags.
+
+commit d95ae7060442712315d29c8b307df131ba9ffce6
+Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi>
+Date:   Sat Jul 10 02:41:01 2010 +0100
+
+    Check that the OpenMP pragmas don't cause link errors.
+    
+    This patch adds extra guards around our use of
+    OpenMP pragmas and checks that the pragmas won't
+    cause link errors.  This fixes the build on
+    Tru64 and Solaris with the native compilers and clang.
+
+commit eb247ac377623d2a722aab1e6eae7adab5f7ebea
+Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi>
+Date:   Fri Jul 9 12:09:07 2010 +0300
+
+    Don't trust OpenBSD's gcc to produce working code for __thread.
+    
+    The gcc on OpenBSD 4.5 to 4.7 at least produces bad code for __thread,
+    without as much as a warning.
+    
+    See PR #6410 "Using __thread TLS variables compiles ok but segfault at runtime."
+    
+    http://cvs.openbsd.org/cgi-bin/query-pr-wrapper?full=yes&numbers=6410
+
+commit dbf35f1f276a673bc4a1eb932dd5cf9266f948da
+Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi>
+Date:   Fri Jul 9 12:07:35 2010 +0300
+
+    Try harder to find suitable flags for pthreads.
+    
+    The flags -D_REENTRANT -lpthread work on more systems than
+    does -pthread unfortunately, so give that a go too.
+
+commit 9897bb4eeed165b76001dfefd3a89bcb96d38a72
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jul 12 15:13:49 2010 -0400
+
+    Check for read accessors before taking the bilinear fast path
+    
+    The bilinear fast path accesses pixels directly, so if the image has a
+    read accessor, then it can't be used.
+
+commit ce3d9fca73bb8abe4d5b1023cfdb06ca53b6161c
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Jul 11 19:58:49 2010 -0400
+
+    fast-path: Some formatting fixes
+    
+    Add spaces before parentheses; fix indentation in the macro.
+
+commit 839326e471a8a6c96dea1693501550d79043bb81
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Jul 11 19:57:29 2010 -0400
+
+    In the FAST_NEAREST macro call the function 8888_8888 and not x888_x888
+    
+    The x888 suggests that they have something to do with the x8r8g8b8
+    formats, but that's not the case; they are assuming a8r8g8b8
+    formats. (Although in some cases they also work for x8r8g8b8 type
+    formats).
+
+commit e13d9f9684a47a6e0be4f8ae1a39cce8b1334238
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Jul 11 19:45:22 2010 -0400
+
+    Make the repeat mode explicit in the FAST_NEAREST macro.
+    
+    Before, it was 0 or 1 meaning 'no repeat' and 'normal repeat'
+    respectively. Now we explicitly pass in either NONE or NORMAL.
+
+commit 2e7fb6655334789f8a5e290245d47c8d6b221c24
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jul 10 20:47:01 2010 -0400
+
+    When converting indexed formats to 64 bits, don't correct for channel widths
+    
+    Indexed formats are mapped to a8r8g8b8 with full precision, so when
+    expanding we shouldn't correct for the width of the channels
+
+commit 2df6dac0be678e1683223faeddadb35b1d2dbe36
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jul 10 18:40:06 2010 -0400
+
+    test: Make sure the palettes for indexed format roundtrip properly
+    
+    The palettes for indexed formats must satisfy the condition that if
+    some index maps to a color C, then the 15 bit version of that color
+    must map back to the index. This ensures that the destination operator
+    is always a no-op, which seems like a reasonable assumption to make.
+
+commit 5dd59c8b7cf1543605713a2ac30f31d8726f5444
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jul 10 16:49:51 2010 -0400
+
+    Split the fast path caching into its own force_inline function
+    
+    The do_composite() function is a lot more readable this way.
+
+commit 98d19d9abd9d62b8d2871871b0be74e022f1f89f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jul 10 16:08:51 2010 -0400
+
+    Cache the implementation along with the fast paths.
+    
+    When calling a fast path, we need to pass the corresponding
+    implementation since it might contain information necessary to run the
+    fast path.
+
+commit f18bcf1f6e984c33dca30ad1ce03c58628fe39df
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jul 10 15:47:12 2010 -0400
+
+    Hide the global implementation variable behind a force_inline function.
+    
+    Previously the global variable was called 'imp' which was confusing
+    with the argument to various other functions also being called imp.
+
+commit 5c935473d8a193b3510f8605a6658ea6ac998fd1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Jun 30 02:31:10 2010 -0400
+
+    Fix memory leak in the pthreads thread local storage code
+    
+    When a thread exits, we leak whatever is stored in thread local
+    variables, so install a destructor to free it.
+
+commit 7114b2d63bd5702c94cb8aa9401c023e550c77bc
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Jul 1 16:54:30 2010 -0400
+
+    Make the combiner macros less likely to cause name collisions.
+    
+    Protect the arguments to the combiner macros with parentheses, and
+    postfix their temporary variables with underscores to avoid name space
+    collisions with the surrounding code.
+    
+    Alexander Shulgin pointed out that underscore-prefixed identifiers are
+    reserved for the C implementation, so we use postfix underscores
+    instead.
+
+commit a92e4a6a9475e07435efb60aa2fde5fa04592d89
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Jun 21 15:30:46 2010 -0400
+
+    Minor tweaks to README
+
+commit ca846806cbc4e11cd134e464c7740c1cde19422b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Jun 20 13:12:27 2010 -0400
+
+    Store the conical angle in floating point radians, not fixed point degrees
+    
+    This is a slight simplification.
+
+commit 3074d57b560d5ec9be2a0e1a6846012698f51208
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jun 19 18:57:45 2010 -0400
+
+    Fix conical gradients to match QConicalGradient from Qt
+    
+    Under the assumption that pixman gradients are supposed to match
+    QConicalgradient, described here:
+    
+            http://doc.trolltech.com/4.4/qconicalgradient.html
+    
+    this patch fixes two separate bugs in pixman-conical-gradient.c.
+    
+    The first bug is that the output of atan2() is in the range of [-pi,
+    pi], which means the parameter into the gradient can be negative. This
+    is wrong since a QConicalGradient always interpolates around the
+    center from 0 to 1. The fix for that is to (a) make sure the given
+    angle is between 0 and 360, and (b) add or subtract 2 * M_PI if the
+    computed angle ends up outside [0, 2 * pi].
+    
+    The other bug is that we were interpolating clockwise, whereas
+    QConicalGradient calls for a counter-clockwise interpolation. This is
+    easily fixed by subtracting the parameter from 1.
+    
+    Finally, this patch encapsulates the computation in a new force-inline
+    function so that it can be reused in both the affine and non-affine
+    case.
+
+commit 66365b5ef1bb85863669227ae6e31134d8d57013
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun May 30 18:26:28 2010 -0400
+
+    Make separate gray scanline storers.
+    
+    For gray formats the palettes are indexed by luminance, not RGB, so we
+    can't use the color storers for gray too.
+
+commit 4e1d4847c9199f96b73376bef061c6728742e621
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun May 30 16:52:09 2010 -0400
+
+    When storing a g1 pixel, store the lowest bit, rather than comparing with 0.
+
+commit 445eb6385f60d09058826b44894e17165c91381c
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Wed Jun 9 16:35:37 2010 +0200
+
+    test: verify that gradients do not crash pixman
+    
+    Test gradients under particular conditions (no stops, all the stops
+    at the same offset) to check that pixman does not misbehave.
+
+commit de0320258167c24fc652d28f4aeca8713243323e
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Tue Jun 8 20:36:15 2010 +0200
+
+    support single-stop gradients
+    
+    Just like conical gradients, linear and radial gradients can now
+    have a single stop.
+
+commit 32bd31d677ab018849af5e0165d1dfacb1e01ed0
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue May 18 22:27:46 2010 -0400
+
+    Eliminate mask_bits from all the scanline fetchers.
+    
+    Back in the day, the mask_bits argument was used to distinguish
+    between masks used for component alpha (where it was 0xffffffff) and
+    masks for unified alpha (where it was 0xff000000). In this way, the
+    fetchers could check if just the alpha channel was 0 and in that case
+    avoid fetching the source.
+    
+    However, we haven't actually used it like that for a long time; it is
+    currently always either 0xffffffff or 0 (if the mask is NULL). It also
+    doesn't seem worthwhile resurrecting it because for premultiplied
+    buffers, if alpha is 0, then so are the color channels
+    normally.
+    
+    This patch eliminates the mask_bits and changes the fetchers to just
+    assume it is 0xffffffff if mask is non-NULL.
+
+commit 78778e5963c948de5ce5f7c5a2a3bb9f279a8eda
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Mon Mar 15 14:56:38 2010 +0200
+
+    create getter for component alpha
+    
+    This patch comes from the mozilla central tree. See
+    http://hg.mozilla.org/mozilla-central/rev/89338a224278 for the
+    original changeset.
+    
+    Signed-off-by: Jeff Muizelaar <jmuizelaar@mozilla.com>
+    Signed-off-by: Egor Starkov <egor.starkov@nokia.com>
+    Signed-off-by: Rami Ylimaki <ext-rami.ylimaki@nokia.com>
+    Signed-off-by: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+
+commit cfc4e38852dc244198a9bfcab07d9014bba21d53
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed May 12 01:34:57 2010 +0300
+
+    test: added OpenMP support for better utilization of multiple CPU cores
+    
+    Some of the tests are quite heavy CPU users and may benefit from
+    using multiple CPU cores, so the programs from 'test' directory
+    are now built with OpenMP support. OpenMP is easy to use, portable
+    and also takes care of making a decision about how many threads
+    to spawn.
+
+commit f905ebb03d8ed8a3ceb76c84a10735aa209168d3
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed May 12 00:10:04 2010 +0300
+
+    test: scaling-test updated to use new fuzzer_test_main() function
+
+commit be387701a5b44e68110d5c9df07924d1029e87ac
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue May 11 23:21:05 2010 +0300
+
+    test: blitters-test updated to use new fuzzer_test_main() function
+
+commit 9ed9abd1541a0353ba4234dc77dd46d6b8771d88
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue May 11 22:57:48 2010 +0300
+
+    test: blitters-test-bisect.rb converted to perl
+    
+    This new script can be used to run continuously to compare two test
+    programs based on fuzzer_test_main() function from 'util.c' and
+    narrow down to a single problematic test from the batch which results
+    in different behavior.
+
+commit 30c3e91c3f97cf3d5932ba639d8ac126b83efb70
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue May 11 22:46:47 2010 +0300
+
+    test: main loop from blitters-test added as a new function to utils.c
+    
+    This new generalized function can be reused in both blitters-test
+    and scaling-test. Final checksum calculation changed in order to make
+    it parallelizable (it is a sum of individual 32-bit values returned
+    by a callback function, which is now responsible for running test-specific
+    code). Return values may be crc32, some other hash or even just zero on
+    success and non-zero on error (in this case, the expected result of the
+    whole test run should be 0).
+
+commit 164fe215f2c904cf74537caf9d76b7f9ce2667ec
+Merge: e1594f2 5158d67
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun May 9 14:24:24 2010 -0400
+
+    Merge branch 'for-master'
+
+commit e1594f204d3a3c2d2083793c8830f0ebf390ed66
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu May 6 01:05:40 2010 +0300
+
+    test/gtk-utils: Set the size of the window to the size of the image
+
+commit 2f4f2fb4859931bf6dc5632d8c919e7296736427
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Tue May 4 11:55:30 2010 -0400
+
+    Add support for compiling pixman without thread/tls support
+
+commit 5158d6740c8e2643611a623a0caa649f4b0bc5bd
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Apr 24 18:43:38 2010 -0400
+
+    Add macros for thread local storage on MinGW 32
+    
+    These macros are identical to the ones that Tor Lillqvist posted here:
+    
+        http://lists.freedesktop.org/archives/pixman/2010-April/000160.html
+    
+    with one exception: the variable is allocated with calloc() and not
+    malloc().
+    
+    Cc: tml@iki.fi
+
+commit 582fa58bba7008c2b852ba56557612866f7522d5
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Apr 23 12:34:19 2010 -0400
+
+    Don't use __thread on MinGW.
+    
+    It is apparently broken. See this:
+    
+    http://mingw-users.1079350.n2.nabble.com/gcc-4-4-multi-threaded-exception-handling-thread-specifier-not-working-td3440749.html
+    
+    We'll need to support thread local storage on MinGW32 some other way.
+    
+    Cc: tml@iki.fi
+
+commit 95d4026866b4655c88de75c9756e9f18881b7c29
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Mar 28 23:02:43 2010 -0400
+
+    Add support for 8bpp to pixman_fill_sse2()
+
+commit d539e0c661e2ec8e8405c0110469e117c5c70526
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Apr 24 13:11:50 2010 -0400
+
+    sse2: Add sse2_composite_over_reverse_n_8888
+    
+    This is a small speed-up for the poppler benchmark:
+    
+    Before:
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image                      poppler    4.443    4.474   0.31%    6/6
+    
+    After:
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image                      poppler    4.224    4.248   0.42%    6/6
+
+commit 2d65fb033b57c701e2c16747470f86bda1d861e0
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Apr 24 15:15:05 2010 -0400
+
+    Don't consider indexed formats opaque.
+    
+    The indexed formats have 0 bits of alpha, but can't be considered
+    opaque because there may be non-opaque colors in the palette.
+
+commit 19459672ce68b6ad6a4d376cb2d7c9a6d889ae01
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Feb 24 19:21:50 2010 -0500
+
+    Add an over_8888_8888_8888 sse2 fast path.
+
+commit a3d29157b4a33162cabbda616c34c00d9a13f7a8
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Feb 17 23:03:25 2009 -0500
+
+    Add pixman_region{,32}_intersect_rect()
+
+commit c0d0d20bd282c1d049b5bce4d23e10ab5b28751e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 21 20:52:26 2009 -0400
+
+    Rename fast_composite_src_8888_x888 to fast_composite_src_memcpy()
+    
+    Then generalize it and use it for SRC copying between various
+    identical formats.
+
+commit 1f0cba3bdcc5d9a48c9189e8110c90d79260888a
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Tue Apr 27 15:23:20 2010 -0400
+
+    Add missing HAVE_CONFIG_H guards for config.h inclusion
+
+commit 526132fa652a42d94826760aa9c72537e3ecaf35
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Apr 22 12:14:23 2010 -0400
+
+    Remove alphamap from the GTK+ part of tests/Makefile.am
+    
+    It doesn't use GTK+ and it was already listed in the non-GTK+ part.
+
+commit 8f7cc5e4388e83eb1b77aea978f3c58338232320
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Apr 21 09:59:29 2010 -0400
+
+    Add pixman_image_get_format() accessor
+
+commit 2b1cae1ef62289288ef00ea7cc1dfef8e01750e6
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Apr 21 09:55:35 2010 -0400
+
+    Some minor updates to README
+
+commit 15f5868f6301a51d46cdb0833bc538f2fc68e3df
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Apr 18 16:24:39 2010 -0400
+
+    Update README to mention the pixman mailing list
+
+commit a652d5c15476cb60e1ca96ac115df625f8a1b76f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Apr 7 19:34:41 2010 -0400
+
+    [mmx] Fix mask creation bugs
+    
+    This line:
+    
+        mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
+    
+    only works when mask has 0s in the lower 24 bits, so add
+    
+         mask &= 0xff000000;
+    
+    before.
+    
+    Reported by Todd Rinaldo on the #cairo IRC channel.
+
+commit 714559dccda3165a72f0a9935c1edc3aef535f30
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Apr 7 01:44:12 2010 -0400
+
+    Fixes for pthread thread local storage.
+    
+    The tls_name_key variable is passed to tls_name_get(), and the first
+    time this happens it isn't initialized. tls_name_get() then passes it
+    on to tls_name_alloc() which passes it on to pthread_setspecific()
+    leading to undefined behavior.
+    
+    None of this is actually necessary at all because there is only one
+    such variable per thread local variable, so it doesn't need to passed
+    as a parameter at all.
+    
+    All of this was pointed out by Tor Lillqvist on the cairo mailing
+    list.
+
+commit 634ba33b5b1fcfd5a0e7910f9991b4ed4f674549
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Apr 7 01:39:14 2010 -0400
+
+    Fix uninitialized cache when pthreads are used
+    
+    The thread local cache is allocated with malloc(), but we rely on it
+    being initialized to zero, so allocate it with calloc() instead.
+
+commit bc11545a1b5c22fe74fc954e26e8a8e9d7cfa39e
+Author: Siddharth Agarwal <sid.bugzilla@gmail.com>
+Date:   Tue Apr 13 10:15:29 2010 -0400
+
+    Visual Studio 2010 includes stdint.h
+    
+    Use the builtin version instead of defining the types ourselves.
+
+commit 0345c343e55ec19ae3c8c8ed598eab7e1c1e12f3
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Apr 1 06:21:21 2010 -0400
+
+    Post-release version bump to 0.19.1
+
+commit e9dc568d6f585a153c47e970168a9c71d3e45fde
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Apr 1 05:23:31 2010 -0400
+
+    Pre-release version bump to 0.18.0
+
+commit efd41c62875d97c5127233cb6a4c353b4d495531
+Author: Matthias Hopf <mhopf@suse.de>
+Date:   Wed Mar 24 18:54:29 2010 +0100
+
+    Revert "Improve PIXREGION_NIL to return true on degenerated regions."
+    
+    This reverts commit ebba1493136a5a0dd7667073165b2115de203eda.
+    Scheduled for re-discussion after stable 0.18 has been released.
+
+commit ebba1493136a5a0dd7667073165b2115de203eda
+Author: Matthias Hopf <mhopf@suse.de>
+Date:   Wed Mar 24 12:00:21 2010 +0100
+
+    Improve PIXREGION_NIL to return true on degenerated regions.
+    
+    Fixes Novell bug 568811.
+
+commit c0f8d417b512b7d526fb6127954a50d14214f420
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Mar 23 17:25:54 2010 -0400
+
+    Post-release version bump to 0.17.15
+
+commit b35f0b0158cd7aac388ba4c72c6c8aada77d2e22
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Mar 23 16:52:02 2010 -0400
+
+    Pre-release version bump to 0.17.14
+
+commit 27a9f0468bdfa257e70270bf9addd5ad064f918b
+Merge: 69f1ec9 3ef2033
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Mar 23 11:00:04 2010 -0400
+
+    Merge remote branch 'ssvb/arm-fixes'
+
+commit 3ef203331f124bf137c6e0c8d5516b1209c92dd9
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Mar 22 21:56:17 2010 +0200
+
+    ARM: SIMD optimizations moved to a separate .S file
+    
+    This should be the last step in providing full armv4t compatibility
+    with CPU features runtime autodetection in pixman.
+
+commit 0a0591c2f7abde8880f4aebd510c27517a414450
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Mar 22 19:51:00 2010 +0200
+
+    ARM: SIMD optimizations updated to use common assembly calling conventions
+
+commit c1e8d4533aea3aa10c49465cf5e9a44d946f70bb
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Mar 22 18:51:54 2010 +0200
+
+    ARM: Helper ARM NEON assembly binding macros moved into a separate header
+    
+    This is needed for future reuse of the same macros for the other
+    ARM assembly optimizations (armv4t, armv6)
+
+commit 5791026e45f79d8f5168e302a498455870363ac6
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sun Dec 27 00:27:53 2009 +0200
+
+    ARM: Workaround for a NEON bug in assembler from binutils 2.18
+    
+    The problem was reported as bug 25534 against pixman in
+    freedesktop.org bugzila. Link to a patch for binutils:
+    http://sourceware.org/ml/binutils/2008-03/msg00260.html
+    
+    For pixman the impact is a build failure when using
+    binutils 2.18. Versions 2.19 and higer are fine. Still
+    some distros may be using older versions of binutils and
+    this is causing problems.
+    
+    This patch workarounds the problem by replacing a problematic
+    "vmov a, b" instruction with equivalent "vorr a, b, b". Actually
+    they even map to the same instruction opcode in the generated
+    code, so the resulting binary is identical with and without patch.
+
+commit 68d8d83223b5a35e25d379c2ee9e2e3a1d242323
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Mar 22 11:54:51 2010 +0200
+
+    ARM: Use '.object_arch' directive in NEON assembly file
+    
+    This can be used to override the architecture recorded in the EABI object
+    attribute section. We set a minimum arch to 'armv4'. Binutils documentation
+    recommends to use this directive with the code performing runtime detection
+    of CPU features.
+    
+    Additionally NEON/VFP EABI attributes are suppressed. And the instruction
+    set to use is explicitly set to '.arm'.
+    
+    Configure test for NEON support is also updated to include a bunch of
+    these new directives (if any of these is unsupported by the assembler,
+    it is better to fail configure test than to fail library build).
+    
+    All these changes are required to fix SIGILL problem on armv4t, reported in
+    http://lists.freedesktop.org/archives/pixman/2010-March/000123.html
+
+commit 69f1ec9a7827aeb522fcae99846237ef0f896e7b
+Author: Jon TURNEY <jon.turney@dronecode.org.uk>
+Date:   Wed Mar 17 21:07:06 2010 +0000
+
+    Avoid a potential division-by-zero exeception in window-test
+    
+    Avoid a division-by-zero exception if the first number returned by
+    rand() is a multiple of 500, causing us to create a zero width pixmap,
+    and then attempt to use get_rand(0) when generating a random stride...
+    
+    Fixes https://bugs.freedesktop.org/attachment.cgi?id=34162
+
+commit 50713d9d0d9241597724551315f05d958ce7a283
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Mar 17 15:12:06 2010 -0400
+
+    Post-release version bump to 0.17.13
+
+commit fb68d6c14dd76121af009213df46e37ee17e38d7
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Mar 17 13:46:44 2010 -0400
+
+    Pre-release version bump to 0.17.12
+
+commit 265ea1fb4d05a920323f23a02f9dc379312bbdae
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Mar 17 10:50:42 2010 -0400
+
+    Specialize the fast_composite_scaled_nearest_* scalers to positive x units
+    
+    This avoids a test in the inner loop, which improves performance
+    especially for tiled sources.
+    
+    On x86-32, I get these results:
+    
+    Before:
+    op=1, src_fmt=20028888, dst_fmt=20028888, speed=306.96 MPix/s (73.18 FPS)
+    op=1, src_fmt=20028888, dst_fmt=10020565, speed=102.67 MPix/s (24.48 FPS)
+    op=1, src_fmt=10020565, dst_fmt=10020565, speed=324.85 MPix/s (77.45 FPS)
+    
+    After:
+    op=1, src_fmt=20028888, dst_fmt=20028888, speed=332.19 MPix/s (79.20 FPS)
+    op=1, src_fmt=20028888, dst_fmt=10020565, speed=110.41 MPix/s (26.32 FPS)
+    op=1, src_fmt=10020565, dst_fmt=10020565, speed=363.28 MPix/s (86.61 FPS)
+
+commit 9cd1051523493e0926b146f05cdde34158391602
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Mar 17 10:35:34 2010 -0400
+
+    Add a FAST_PATH_X_UNIT_POSITIVE flag
+    
+    This is the common case for a lot of transformed images. If the unit
+    were negative, the transformation would be a reflection which is
+    fairly rare.
+
+commit a5b51bb03c5c1258d7558efa13eca6c570e34ce6
+Author: Alexander Larsson <alexl@redhat.com>
+Date:   Wed Mar 17 11:58:05 2010 +0100
+
+    Use the right format for the OVER_8888_565 fast path
+
+commit 3b92b711d031a7752e06d0a5f688f4c54f50a1e6
+Author: Alexander Larsson <alexl@redhat.com>
+Date:   Fri Mar 12 15:45:04 2010 +0100
+
+    Add specialized fast nearest scalers
+    
+    This is a macroized version of SRC/OVER repeat normal/unneeded nearest
+    neighbour scaling instantiated for some common 8888 and 565 formats.
+    
+    Based on work by Siarhei Siamashka
+
+commit 5750408e48259f42373a5233231104d9bd3eb35a
+Author: Alexander Larsson <alexl@redhat.com>
+Date:   Fri Mar 12 15:41:01 2010 +0100
+
+    Add FAST_PATH_SAMPLES_COVER_CLIP and FAST_PATH_16BIT_SAFE
+    
+    FAST_PATH_SAMPLES_COVER_CLIP:
+    
+    This is set of the source sample grid, unrepeated but transformed
+    completely completely covers the clip destination. If this is set
+    you can use a simple scaled that doesn't have to care about the repeat
+    mode.
+    
+    FAST_PATH_16BIT_SAFE:
+    
+    This signifies two things:
+    1) The size of the src/mask fits in a 16.16 fixed point, so something like:
+    
+        max_vx = src_image->bits.width << 16;
+    
+        Is allowed and is guaranteed to not overflow max_vx
+    
+    2) When stepping the source space we're guaranteed to never overflow
+       a 16.16 bit fix point variable, even if we step one extra step
+       in the destination space. This means that a loop doing:
+    
+       x = vx >> 16;
+       vx += unit_x;								   d = src_row[x];
+    
+       will never overflow vx causing x to be negative.
+    
+       And additionally, if you track vx like above and apply NORMAL repeat
+       after the vx addition with something like:
+    
+       while (vx >= max_vx) vx -= max_vx;
+    
+       This will never overflow the vx even on the final increment that
+       takes vx one past the end of where we will read, which makes the
+       repeat loop safe.
+
+commit cba6fbbddce5edfd8e28ef570c493b044761f870
+Author: Alexander Larsson <alexl@redhat.com>
+Date:   Fri Mar 12 15:40:07 2010 +0100
+
+    Add FAST_PATH_NO_NONE_REPEAT flag
+
+commit 7ec023ede155b9dacf574c4323740ef981802aa9
+Author: Alexander Larsson <alexl@redhat.com>
+Date:   Tue Mar 16 14:18:29 2010 +0100
+
+    Add CONVERT_8888_TO_8888 and CONVERT_0565_TO_0565 macros
+    
+    These are useful for macroization
+
+commit c903d03052e1c34478556964338959b34928a388
+Author: Alexander Larsson <alexl@redhat.com>
+Date:   Fri Mar 12 16:23:42 2010 +0100
+
+    Add CONVERT_0565_TO_8888 macro
+    
+    This lets us simplify some fast paths since we get a consistent
+    naming that always has 8888 and gets some value for alpha.
+
+commit de27f45ddd46fc48ec9598f2f177155328d55580
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Mar 15 11:51:09 2010 -0400
+
+    Ensure that only the low 4 bit of 4 bit pixels are stored.
+    
+    In some cases we end up trying to use the STORE_4 macro with an 8 bit
+    values, which resulted in other pixels getting overwritten. Fix this
+    by always masking off the low 4 bits.
+    
+    This fixes blitters-test on big-endian machines.
+
+commit 6532f8488abffb89501cb76de7d80b8ab2d49aed
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Mar 16 08:17:10 2010 -0400
+
+    Fix contact address in configure.ac
+
+commit 7c9f121efe7ee6afafad8b294974f5498054559b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Mar 16 12:23:50 2010 -0400
+
+    Add PIXMAN_DEFINE_THREAD_LOCAL() and PIXMAN_GET_THREAD_LOCAL() macros
+    
+    These macros hide the various types of thread local support. On Linux
+    and Unix, they expand to just __thread. On Microsoft Visual C++, they
+    expand to __declspec(thread).
+    
+    On OS X and other systems that don't have __thread, they expand to a
+    complicated concoction that uses pthread_once() and
+    pthread_get/set_specific() to get thread local variables.
+
+commit 6b9c54820015f69e667ed54441e83042c9a84cc1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Mar 16 11:01:08 2010 -0400
+
+    Add checks for various types of thread local storage.
+    
+    OS X does not support __thread, so we have to check for it before
+    using it.  It does however support pthread_get/setspecific(), so if we
+    don't have __thread, check if those are available.
+
+commit 313353f1fb9d40d0c3aaf7cfb99ca978b29003a4
+Author: Alan Coopersmith <alan.coopersmith@sun.com>
+Date:   Mon Mar 15 15:20:05 2010 -0700
+
+    Add Sun cc to thread-local support checks in pixman-compiler.h
+    
+    Clears '#warning: "unknown compiler"' messages when building
+    
+    Signed-off-by: Alan Coopersmith <alan.coopersmith@sun.com>
+
+commit b67f784a5dc51f41e40bb1a165411f5259ea0ee4
+Author: Alan Coopersmith <alan.coopersmith@sun.com>
+Date:   Mon Mar 15 10:52:17 2010 -0700
+
+    Make .s target asm flag selection more portable
+    
+    The previous code worked in GNU make, but caused a syntax error in Solaris
+    make ( https://bugs.freedesktop.org/show_bug.cgi?id=27062 ) - this seems to
+    work in both, and should hopefully not cause syntax errors in any versions
+    of make not supporting the macro-substitution-in-macro-name feature, just
+    cause the macro to expand to nothing.
+    
+    Signed-off-by: Alan Coopersmith <alan.coopersmith@sun.com>
+
+commit 7a5dc747852d46fa382ef885bb6299723ef6ed00
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Mar 15 07:40:46 2010 -0400
+
+    Fix typo: WORDS_BIG_ENDIAN => WORDS_BIGENDIAN in pixman-edge.c
+    
+    Pointed out by Andreas Falkenhahn on the cairo mailing list.
+
+commit ff30a5cbb941a9559082c6a6052ef761c7de949c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Mar 3 13:24:13 2010 -0500
+
+    test: Add support for indexed formats to blitters-test
+    
+    These formats work fine, they just need to have a palette set.
+
+commit 2b5f7be6c05ce3643b7d29e7237f91bfaedd80e5
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Mar 1 10:32:39 2010 -0500
+
+    pixman.h: Only define stdint types when PIXMAN_DONT_DEFINE_STDINT is undefined
+    
+    In SPICE, with Microsoft Visual C++, pixman.h is included after
+    another file that defines these types, which causes warnings and
+    errors.
+    
+    This patch allows such code to just define PIXMAN_DONT_DEFINE_STDINT
+    to use its own version of those types.
+
+commit f4da05c9f988133079cac2b8d54589386f46398f
+Merge: a12d868 f534509
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Mar 14 12:12:05 2010 -0400
+
+    Merge branch 'operator-table'
+
+commit a12d868df8b673df2b563f309563954e2b3f977d
+Merge: 18f0de4 54e39e0
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Mar 14 12:12:00 2010 -0400
+
+    Merge branch 'fast-path-cache'
+
+commit f534509d007de40592dedc574e7eb78445453ec0
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sat Jan 30 11:37:25 2010 -0500
+
+    Change operator table to be an array of arrays of four bytes.
+    
+    This makes gcc generate slightly better code for optimize_operator.
+
+commit 94d75ebd2167b44c142a6202b2d7bbe238dfd830
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Sep 19 02:40:56 2009 -0400
+
+    Strength reduce certain conjoint/disjoint to their normal counterparts.
+    
+    This allows us to not test for them later on.
+
+commit 58be9c71d2b1d0ed9d8feed1db0581b250d0a7d2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Sep 19 02:32:28 2009 -0400
+
+    Store the operator table more compactly.
+    
+    The four cases for each operator:
+    
+        none-are-opaque, src-is-opaque, dest-is-opaque, both-are-opaque
+    
+    are packed into one uint32_t per operator. The relevant strength
+    reduced operator can then be found by packing the source-is-opaque and
+    dest-is-opaque into two bits and shifting that number of bytes.
+    
+    Chris Wilson pointed out a bug in the original version of this commit:
+    dest_is_opaque and source_is_opaque were used as booleans, but their
+    actual values were the results of a logical AND with the
+    FAST_PATH_OPAQUE flag, so the shift value was wildly wrong.
+    
+    The only reason it actually passed the test suite (on x86) was that
+    the compiler computed the shift amount in the cl register, and the low
+    byte of FAST_PATH_OPAQUE happens to be 0, so no shifting actually took
+    place, and the original operator was returned.
+
+commit 7fe35f0e6b660f5667ff653f3b753bc3e5d07901
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Sep 18 14:10:31 2009 -0400
+
+    Make the operator strength reduction constant time.
+    
+    By extending the operator information table to cover all operators we
+    can replace the loop with a table look-up. At the same time, base the
+    operator optimization on the computed flags rather than the ones in
+    the image struct.
+    
+    Finally, as an extra optimization, we no longer ignore the case where
+    there is a mask. Instead we consider the source opaque if both source
+    and mask are opaque, or if the source is opaque and the mask is
+    missing.
+
+commit 18f0de452dc7e12e4cb544d761a626d5c6031663
+Author: Loïc Minier <loic.minier@ubuntu.com>
+Date:   Tue Mar 9 20:57:34 2010 +0100
+
+    ARM: SIMD: Try without any CFLAGS before forcing -mcpu=
+    
+    http://bugs.launchpad.net/bugs/535183
+
+commit 933540861383da27402680593edefe8d61e6fb02
+Author: Egor Starkov <starkov.egor@gmail.com>
+Date:   Fri Mar 12 09:47:59 2010 -0500
+
+    Eliminate trailing comma in enum
+    
+    https://bugs.freedesktop.org/show_bug.cgi?id=27050
+    
+    Pixman is not compiling with c++ compiler. During compilation it gives
+    the following error:
+    
+    /usr/include/pixman-1/pixman.h:335: error: comma at end of enumerator list
+    
+    Signed-off-by: Søren Sandmann Pedersen <ssp@redhat.com>
+
+commit 54e39e00386fd2fd0eb76ead6396ddb93f1cf6c2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Sep 17 03:16:27 2009 -0400
+
+    Add a fast path cache
+    
+    This patch adds a cache in front of the fast path tables to reduce the
+    overhead of pixman_composite(). It is fixed size with move-to-front to
+    make sure the most popular fast paths are at the beginning of the cache.
+    
+    The cache is thread local to avoid locking.
+
+commit 84b009ae9f128c838d0e046e07947f8f9b2ce879
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Mar 5 20:40:41 2010 -0500
+
+    Post-release version bump to 0.17.11
+
+commit 14fd287efb63c1f31d37053ebbbf500d0841c053
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Fri Mar 5 20:06:08 2010 -0500
+
+    Pre-release version bump to 0.17.10
+
+commit bd9934551f72f4993016cab1d7be3b1e545629b1
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Feb 26 14:15:22 2010 -0500
+
+    Move __force_align_arg_pointer workaround before composite32()
+    
+    Since otherwise the workaround won't take effect when you call
+    pixman_image_composite32() directly.
+
+commit 14bb054d9695abb284e22a1de31337e0e41bb4e3
+Merge: 9a8e404 3db76b9
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Mar 4 02:30:22 2010 -0500
+
+    Merge branch 'more-flags'
+
+commit 9a8e404d44b6ed9817d088966cec324a38e60897
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Feb 27 00:37:19 2010 -0500
+
+    test: Remove obsolete comment
+
+commit 182e4c2635fdb90c50b2e86253738b7e9c8ea282
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Feb 24 04:14:45 2010 +0200
+
+    ARM: added 'neon_composite_over_reverse_n_8888' fast path
+    
+    This fast path function improves performance of 'poppler' cairo-perf trace.
+    
+    Benchmark from ARM Cortex-A8 @720MHz
+    
+    before:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image                      poppler   38.986   39.158   0.23%    6/6
+    
+    after:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image                      poppler   24.981   25.136   0.28%    6/6
+
+commit 072a7d31a8c872666787b69a6bd1b537565c5b96
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Feb 24 02:26:57 2010 +0200
+
+    ARM: added 'neon_composite_src_x888_8888' fast path
+    
+    This fast path function improves performance of 'gnome-system-monitor'
+    cairo-perf trace.
+    
+    Benchmark from ARM Cortex-A8 @720MHz
+    
+    before:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image         gnome-system-monitor   68.838   68.899   0.05%    5/6
+    
+    after:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image         gnome-system-monitor   53.336   53.384   0.09%    6/6
+
+commit 2ed7c13922f83404bd9976c00d00738d0314693f
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Feb 24 01:44:00 2010 +0200
+
+    ARM: added 'neon_composite_over_n_8888_8888_ca' fast path
+    
+    This fast path function improves performance of 'firefox-talos-gfx'
+    cairo-perf trace.
+    
+    Benchmark from ARM Cortex-A8 @720MHz
+    
+    before:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image            firefox-talos-gfx  139.969  141.176   0.35%    6/6
+    
+    after:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image            firefox-talos-gfx  111.810  112.196   0.23%    6/6
+
+commit 3db76b90049f23723a0519d572b9cda7c672f7d5
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Feb 14 19:18:35 2010 -0500
+
+    Restructure the flags computation in compute_image_info().
+    
+    Restructure the code to use switches instead of ifs. This saves a few
+    comparisons and make the code slightly easier to follow. Also add some
+    comments.
+
+commit ac44db334066f68a837914a52d8d1368c85161ad
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Feb 14 19:14:44 2010 -0500
+
+    Move workaround code to pixman-image.c
+    
+    It is more natural to put it where all the other flags are computed.
+
+commit 35af45d5e3d3f893ccaa4ab2f947100eb9d840ac
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Feb 22 06:06:22 2010 -0500
+
+    Turn need_workaround into another flag.
+    
+    Instead of storing it as a boolean in the image struct, just use
+    another flag for it.
+
+commit f27f17ce22b6d0ac587600930c3657180066aac8
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Sep 18 11:54:21 2009 -0400
+
+    Eliminate _pixman_image_is_opaque() in favor of a new FAST_PATH_IS_OPAQUE flag
+    
+    The new FAST_PATH_IS_OPAQUE flag is computed along with the others in
+    _pixman_image_validate().
+
+commit 2a6ba862abd8859014d11a742247fa1f1225729b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Sep 18 04:17:57 2009 -0400
+
+    Eliminate _pixman_image_is_solid()
+    
+    Instead of calling this function in compute_image_info(), just do the
+    relevant checks when the extended format is computed.
+    
+    Move computation of solidness to validate
+
+commit 45006e5e648b85df65b922f893c9802c9ecce38e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Sep 18 04:06:30 2009 -0400
+
+    Move computation of extended format code to validate.
+    
+    Instead of computing the extended format on every composite, just
+    compute it once and store it in the image.
+
+commit fb0096a282c5b6e7ca9eb59a05d9ff738dccfd4b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Mon Feb 22 05:16:27 2010 -0500
+
+    Add new FAST_PATH_SIMPLE_REPEAT flag
+    
+    This flags indicates that the image is untransformed an
+    repeating. Such images can be composited quickly by simply repeating
+    the composite operation.
+
+commit a7ad9c7c9dcb78e0c5ad00145b119dfe929eb307
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Sep 18 03:54:36 2009 -0400
+
+    Compute the image flags at validation time instead of composite time
+    
+    Instead of computing all the image flags at composite time, we compute
+    them once in _pixman_image_validate() and cache them in the image.
+
+commit 7bc4cd42c3549f3f2354f50a7cf21ce9ccc8de7b
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Feb 24 22:09:41 2010 -0500
+
+    RELEASING: Update the release instructions.
+
+commit 7392a350f2808146842be0924ca289c5df6c8922
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Feb 24 22:02:13 2010 -0500
+
+    Post-release version bump
+
+commit 4d1c216af3d6fc58829c2f5ea434e97ff8012493
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Feb 24 21:52:30 2010 -0500
+
+    Pre-release version bump
+
+commit e0f1d8410715083498a35284ea7e5bb71fabe090
+Merge: 16ef3ab 282f5cf
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Feb 24 21:01:29 2010 -0500
+
+    Merge branch 'trap-fixes'
+
+commit 16ef3ab230047221f813905d390bf762a3d8508a
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Feb 24 20:51:25 2010 -0500
+
+    Add a1-trap-test
+    
+    When a trapezoid sample point is exactly on a polygon edge, the rule
+    is that it is considered inside the trapezoid if the edge is a top or
+    left edge, but outside for bottom and right edges.
+    
+    This program tests that for a1 trapezoids.
+
+commit ad5cbba4c05f8521004c6aa1afd5aa74040afad0
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Sun Feb 21 02:01:16 2010 -0500
+
+    Hide the C++ extern "C" declarations behind macros.
+    
+    That way they don't confuse the indenting algorithm in editors such as
+    Emacs.
+
+commit 14f201dc47ba76fcf677936f4f809249054fd6ad
+Merge: 94f5859 6b2da68
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Feb 20 13:09:01 2010 -0500
+
+    Merge branch 'eliminate-composite'
+    
+    Conflicts:
+    	pixman/pixman-sse2.c
+
+commit 94f585916a2385146ee5c803f7850b21149d728b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Feb 13 20:08:13 2010 -0500
+
+    Move all code to do debugging spew into pixman-private.
+    
+    Rather than the region code having its own little debug system, move
+    all of it into pixman-private where there is already return_if_fail()
+    macros etc. These macros are now enabled in development snapshots and
+    nowhere else. Previously they were never enabled unless you modified
+    the code.
+    
+    At the same time, remove all the asserts from the region code since we
+    can never turn them on anyway, and replace them with
+    critical_if_fail() macros that will print spew to standard error when
+    DEBUG is defined.
+    
+    Finally, also change the debugging spew in pixman-bits-image.c to use
+    return_val_if_fail() instead of its own fprintf().
+
+commit f32d585069e77f09f84de42eda8ed8f6849aab57
+Author: Alexander Larsson <alexl@redhat.com>
+Date:   Fri Feb 19 11:22:52 2010 +0100
+
+    Test pixman_region32_init_from_image in region-test
+
+commit 48ef4befd88e06e83a583a70f0172f1a08a65cda
+Author: Alexander Larsson <alexl@redhat.com>
+Date:   Mon Feb 15 09:40:50 2010 +0100
+
+    Add pixman_region{32}_init_from_image
+    
+    This creates a region from an image in PIXMAN_a1 format.
+
+commit 5dee05fcab16dbd3e2c5e4d85b9edf26cf523e3f
+Author: Alexander Larsson <alexl@redhat.com>
+Date:   Mon Feb 15 09:39:59 2010 +0100
+
+    Move SCREEN_SHIFT_LEFT/RIGHT to pixman-private.h
+    
+    This is needed for later use in other code.
+
+commit 61f4ed9c7a3ff6afbbb42d3f3b8dc3b9331bdcbd
+Author: Makoto Kato <m_kato@ga2.so-net.ne.jp>
+Date:   Thu Feb 18 14:30:01 2010 +0900
+
+    Compile by USE_SSE2 only without USE_MMX
+    
+    Although we added MMX emulation for Microsoft Visual C++ compiler for x64,
+    USE_SSE2 still requires USE_MMX.  So we remove dependency of USE_MMX
+    for Windows x64.
+    
+    Signed-off-by: Makoto Kato <m_kato@ga2.so-net.ne.jp>
+
+commit 6b2da683debd1b3fc1862752cb26e6799b644e05
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Sep 16 07:29:08 2009 -0400
+
+    Move NULL check out of get_image_info()
+    
+    The NULL check is only necessary for masks, so there is no reason to
+    do it for destinations and sources.
+
+commit 1dd8744f40f4b754fb3aa26a3c7f4fbe54c27155
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Sep 16 06:54:43 2009 -0400
+
+    Add a fast path for non-repeating sources in walk_region_internal().
+    
+    In the common case where there is no repeating, the loop in
+    walk_region_internal() reduces to just walking of the boxes involved
+    and calling the composite function.
+
+commit 362a9f564a9a58c48ab0129ca3ac997d0cb84bab
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Sep 15 01:34:33 2009 -0400
+
+    Move more things out of the inner loop in do_composite().
+    
+    Specifically,
+    
+    - the src_ and mask_repeat computations
+    
+    - the check for whether the involved images cover the composite
+      region.
+
+commit 129d9c187146a060863598d154e6770394547afd
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Sep 15 01:16:40 2009 -0400
+
+    Move region computation out of the loop in do_composite()
+    
+    We only need to compute the composite region once, not on every
+    iteration.
+
+commit 4c185503d26374915942d1f64c02134e4a2c5a99
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Sep 15 01:14:41 2009 -0400
+
+    Move get_image_info() out of the loop in do_composite
+    
+    The computation of image formats and flags is invariant to the loop,
+    so it can all be moved out.
+
+commit 81b7d7b18050c770f272fd10aa7f5cf85ddc25cc
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Sep 15 01:13:36 2009 -0400
+
+    Manually inline _pixman_run_fast_path()
+    
+    Move all of the code into do_composite().
+
+commit e914cccb24c1391aa25eca8df87a08bd935cd870
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Sep 15 01:11:04 2009 -0400
+
+    Move compositing functionality from pixman-utils.c into pixman.c
+    
+    _pixman_run_fast_path() and pixman_compute_composite_region() are both
+    moved to pixman-image, since at this point that's the only place they
+    are being called from.
+
+commit 0eeb197599cca78a645f8a5498b0124ef170c523
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Nov 7 15:13:03 2009 -0500
+
+    Move compositing to its own function, do_composite()
+
+commit f831552bce70f2619ea8db00983d80dd1abd0003
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Sep 15 00:58:51 2009 -0400
+
+    Optimize for the common case wrt. the workaround.
+    
+    In the common case no images need the workaround, so we check for that
+    first, and only if an image does need a workaround do we check which
+    one of the images actually need it.
+
+commit fa4df6225d4fa0b740c0ce69361e2f7cee1686f9
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Sep 15 00:48:12 2009 -0400
+
+    Eliminate all the composite methods.
+    
+    They are no longer necessary because we will just walk the fast path
+    tables, and the general composite path is treated as another fast
+    path.
+    
+    This unfortunately means that sse2_composite() can no longer be
+    responsible for realigning the stack to 16 bytes, so we have to move
+    that to pixman_image_composite().
+
+commit c3d7b5125585a7d974ccd904100777a0e18d425f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Sep 15 00:23:11 2009 -0400
+
+    Delete unused _pixman_walk_composite_region() function
+
+commit 488480301c7ca9cb4e41c8d0f489fb56e5d9efdd
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Nov 7 15:28:57 2009 -0500
+
+    Don't call _pixman_implementation_composite() anymore.
+    
+    Instead just call _pixman_run_fast_path(). Since we view
+    general_composite() as a fast path now, we know that it will find
+    *some* compositing routine.
+
+commit 06ae5ed5971fe616b96bb97a63abf6cc27c5b669
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Sep 14 23:58:40 2009 -0400
+
+    Delete unused sources_cover() function
+
+commit 543a04a3bbd2c622842739ab923ff8761c05ed83
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Sep 14 23:47:39 2009 -0400
+
+    Store a pointer to the array of fast paths in the implementation struct.
+    
+    Also add an empty fast path table to the vmx implementation, so that
+    we can assume sure the pointer is never NULL.
+
+commit 376f2a3f853f829c78983a51bffc1bacb9bec9a3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Sep 15 00:20:22 2009 -0400
+
+    Make fast_composite_scaled_nearest() another fast path.
+    
+    This requires another couple of flags
+    
+         FAST_PATH_SCALE_TRANSFORM
+         FAST_PATH_NEAREST_FILTER
+
+commit 87430cfc35c6e51bb1a947795e0ddb198c460253
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Sep 13 05:29:48 2009 -0400
+
+    Make general_composite_rect() just another fast path.
+    
+    We introduce a new PIXMAN_OP_any fake operator and a PIXMAN_any fake
+    format that match anything. Then general_composite_rect() can be used
+    as another fast path.
+    
+    Because general_composite_rect() does not require the sources to cover
+    the clip region, we add a new flag FAST_PATH_COVERS_CLIP which is part
+    of the set of standard flags for fast paths.
+    
+    Because this flag cannot be computed until after the clip region is
+    available, we have to call pixman_compute_composite_region32() before
+    checking for fast paths. This will resolve itself when we get to the
+    point where _pixman_run_fast_path() is only called once per composite
+    operation.
+
+commit d7e281e0a1f7b1aecd245070736e03d2953b0911
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Feb 13 18:23:34 2010 -0500
+
+    Post-release version bump
+
+commit 9bcadc340866c49dab1cb40ff79c683972e8a37d
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Feb 13 18:12:32 2010 -0500
+
+    Pre-release version bump
+
+commit 97a12457394b36b5b052927af65ac3944ceccf09
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun May 31 15:06:33 2009 -0400
+
+    Once unrolled version of fast_path_composite_nearest_scaled()
+    
+    Separate out the fetching and combining code in two inline
+    functions. Then do two pixels per iteration.
+
+commit e5972110750b32929a474c35362f4639dbbd1222
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 30 02:50:38 2009 -0400
+
+    Generalize and optimize fast_composite_src_scaled_nearest()
+    
+    - Make it work for PIXMAN_OP_OVER
+    
+    - Split repeat computation for x and y, and only the x part in the
+      inner loop.
+    
+    - Move stride multiplication outside of inner loop
+
+commit 337e916473069a76a44757b3664f8d49da350773
+Merge: bdc4a6a 8e85059
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Feb 13 12:26:09 2010 -0500
+
+    Merge branch 'bitmasks'
+
+commit bdc4a6afe0bcea6dfb0df221006f4fe188324678
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Feb 13 11:18:13 2010 -0500
+
+    Makefile.am: Remove 'check' from release-check
+    
+    It's already included in distcheck.
+
+commit edee4be052cf0d466922759efd2613e5a2be9e2b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Feb 13 09:40:33 2010 -0500
+
+    Turn off asserts in development snapshots (bug 26314).
+    
+    There is not much real benefit in having asserts turned on in
+    snapshots because it doesn't lead to any new bug reports, just to
+    people not installing development snapshots since they case X server
+    crashes. So just turn them off.
+    
+    While we are at it, limit the number of messages to stderr to 5
+    instead of 50.
+
+commit cf1f034fef34478c528bedf1e59be443fa72429c
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sun Feb 7 07:33:52 2010 +0200
+
+    ARM: Remove any use of environment variables for cpu features detection
+    
+    Old code assumed that all ARMv7 processors support NEON instructions
+    unless overrided by environment variable ARM_TRUST_HWCAP. This causes
+    X server to die with SIGILL if NEON support is disabled in the kernel
+    configuration. Additionally, ARMv7 processors lacking NEON unit are
+    going to become available eventually.
+    
+    The problem was reported by user bearsh at irc.freenode.net #gentoo-embedded
+
+commit 865c37d57421f6888151486ae1a2ec986a7cd9d2
+Author: Alexander Larsson <alexl@redhat.com>
+Date:   Tue Feb 9 15:57:18 2010 +0100
+
+    Add pixman_image_get_destroy_data()
+    
+    This way you can get back user data that was set using
+    pixman_image_set_destroy_function().
+
+commit cca1cef3f29d244f0a57bd3ed8b09e2892f8934a
+Author: Alexander Larsson <alexl@redhat.com>
+Date:   Tue Feb 9 13:22:38 2010 +0100
+
+    Add extern "C" guards for c++
+
+commit 8e8505943651ac46e0ad5a2dd0b9e85704095cc1
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Sep 13 04:28:20 2009 -0400
+
+    Move checks for src/mask repeat right before walking the region.
+    
+    Also add a couple of additional checks to the src/mask repeat check.
+
+commit eea58eab93aefd4430544754f8a0f5460b4a30aa
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Sep 13 03:43:16 2009 -0400
+
+    Compute src, mask, dest flags and base fast path decisions on them.
+    
+    This makes sets the stage for caching the information by image instead
+    of computing it on each composite invocation.
+    
+    This patch also computes format codes for images such as PIXMAN_solid,
+    so that we can no longer end up in the situation that a fast path is
+    selected for a 1x1 solid image, when that fast path doesn't actually
+    understand repeating.
+
+commit 6197db91a32da7ea281fd87b59f5bb74b989361b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Sep 13 02:34:32 2009 -0400
+
+    Add src_, mask_, and dest_flags fields to fast path arrays
+    
+    Update all the fast path tables to match using a new
+    PIXMAN_STD_FAST_PATH macro.
+    
+    For now, use 0 for the flags fields.
+
+commit ff6eaac50eaa8778ba15fd0f796e94cc751dea0a
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Sep 12 05:26:50 2009 -0400
+
+    Move calls to source_is_fastpathable() into get_source_format()
+
+commit 171dc4875644f72d65ff2e31533edacc781069ec
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Sep 12 05:09:53 2009 -0400
+
+    Fold get_fast_path() into _pixman_run_fast_path()
+    
+    Also factor out the source format code computation to its own
+    function.
+
+commit 459c7a52f67c9628e94107599e3abbc6463cbd0f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Sep 12 04:30:22 2009 -0400
+
+    Consolidate the source and mask sanity checks in a function
+
+commit 27a4fb4747426ee935d2149cca2197a369c4556d
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Sep 12 04:10:30 2009 -0400
+
+    Move pixbuf checks after src_format and mask_format have been computed.
+
+commit 2def1a8867a1ab0ccab720d1cc3f3c7b61c74619
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Sep 12 04:03:25 2009 -0400
+
+    Move the sanity checks for src, mask and destination into get_fast_path()
+
+commit d76aab4d03d9e700c3c431b077a4b506a5e53df1
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Wed Jan 27 07:11:11 2010 -0500
+
+    Turn some uint16_t variables to int32_t in the fast paths.
+    
+    This is necessary now that we have a 32 bit version of
+    pixman_image_composite().
+
+commit 15d07d6c2ac4ed5d41dc80b476e09e8d7dd6a84a
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Thu Jan 21 13:43:53 2010 -0500
+
+    Implement get_scanline_64() correctly for solid fill images.
+    
+    Previously they would be evaluated at 8 bits and then expanded.
+
+commit 0e8550798f69ef69dbde59eda6341ab4e0801069
+Author: Benjamin Otte <otte@redhat.com>
+Date:   Tue Jan 26 19:37:34 2010 +0100
+
+    Make pixman_image_fill_rectangles() call pixman_image_fill_boxes()
+    
+    Avoids duplication of code
+
+commit d0d284da0a8810e7435b8e932ac5de352793a39a
+Author: Benjamin Otte <otte@redhat.com>
+Date:   Tue Jan 26 19:03:38 2010 +0100
+
+    Add pixman_image_fill_boxes() API
+    
+    It's basically the 32bit version of pixman_image_fill_rectangles(), just
+    with a saner data type.
+
+commit e841c556d59ca0aa6d86eaf6dbf061ae0f4287de
+Author: Benjamin Otte <otte@redhat.com>
+Date:   Tue Jan 26 18:52:27 2010 +0100
+
+    Add pixman_image_composite32()
+    
+    This is equal to pixman_image_composite(), just with 32bit parameters.
+    pixman_image_composite() now just calls pixman_image_composite32()
+
+commit 78b6c470789eb226708a5d98bb06a962d2ae0b0d
+Author: Benjamin Otte <otte@redhat.com>
+Date:   Tue Jan 26 19:09:56 2010 +0100
+
+    Make region argument to pixman_region(32)_init_rects() const
+    
+    No indenting of the header to keep git blame working
+
+commit b194bb78c8a32b7252cccaebdc085cd8e759427d
+Author: Benjamin Otte <otte@redhat.com>
+Date:   Tue Jan 26 19:08:29 2010 +0100
+
+    Fix typo
+
+commit c066c347aeaa779b7a3c9e3cde45413277370f0f
+Author: Søren Sandmann Pedersen <ssp@redhat.com>
+Date:   Tue Jan 19 14:23:57 2010 -0500
+
+    Fix some warnings
+
+commit 8fce7b18f3033aa1423b96b9847f6ae3133fad7a
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jan 17 19:34:27 2010 -0500
+
+    Post-release version bump
+
+commit 23e1ba3c062711fe256612ca7f39478e048a6708
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jan 17 18:56:11 2010 -0500
+
+    Pre-release version bump
+
+commit 8dabd1fdd8f0030086cfe70f0baba7c502a0e1b8
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jan 17 16:45:23 2010 -0500
+
+    bits: Print an error if someone tries to create an image with bpp < depth
+    
+    Something in the X server apparently does this.
+
+commit 2c3cbc83c4018173d9deae3f24c457b3ca16dbcd
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Jan 16 10:31:22 2010 -0500
+
+    When fetching from an alpha map, replace the alpha channel of the image
+    
+    Previously it would be multiplied onto the image pixel, but the Render
+    specification is pretty clear that the alpha map should be used
+    *instead* of any alpha channel within the image.
+    
+    This makes the assumption that the pixels in the image are already
+    premultiplied with the alpha channel from the alpha map. If we don't
+    make this assumption and the image has an alpha channel of its own, we
+    would have to first unpremultiply that pixel, and then premultiply the
+    alpha value onto the color channels, and then replace the alpha
+    channel.
+
+commit 0df6098f3d941608f945d02e2af65b70ac499e0a
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Jan 16 10:09:25 2010 -0500
+
+    pixman_image_validate() needs to also validate the alpha map.
+    
+    This is the other half of bug 25950.
+
+commit 7f00dc62e4aa4b2b417ca1c86813a6b4c7f78673
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Jan 16 10:07:48 2010 -0500
+
+    When fetching from an alpha map, use the alpha map's fetch function.
+    
+    Don't use the one from the image. This is the first half of bug 25950.
+
+commit 042f978b04aefe56ec912c88ec879e668153a287
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Jan 16 09:48:45 2010 -0500
+
+    test: Add new alphamap test program.
+    
+    This program demonstrates three bugs relating to alpha maps:
+    
+    - When fetching from an alpha map into 32 bit intermediates, we use
+      the fetcher from the image, and not the one from the alpha map.
+    
+    - For 64 bit intermediates we call fetch_pixel_generic_lossy_32()
+      which then calls fetch_pixel_raw_64, which is NULL because alpha
+      images are never validated.
+    
+    - The alpha map should be used *in place* of any existing alpha
+      channel, but we are actually multiplying it onto the image.
+
+commit 05c38141b4861348bf61235341d634019e39e8a9
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Jan 16 07:40:07 2010 -0500
+
+    fetch-test: Fix spelling error (pallete -> palette)
+
+commit c46a87e45afc6eb53ae93f9ca3c1545bd26d18f5
+Author: Alan Coopersmith <alan.coopersmith@sun.com>
+Date:   Thu Jan 14 09:42:34 2010 -0800
+
+    Update Sun license notices to current X.Org standard form
+    
+    Signed-off-by: Alan Coopersmith <alan.coopersmith@sun.com>
+
+commit 3df6cb34315ebaeb2ce3f341160355650d856518
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jan 10 09:15:24 2010 -0500
+
+    fetch-test: Various formatting fixes
+
+commit 7862f9b96e8e8456cc60852790c7f244a5e3425e
+Author: Pierre-Loup A. Griffais <pgriffais@nvidia.com>
+Date:   Wed Jan 6 01:26:07 2010 +0200
+
+    Interpret the angle of a conical gradient in degrees.
+    
+    The conical gradient angle's fixed point degrees to
+    radians conversion code is missing a factor of pi.
+
+commit 54f51c4a7595d685397838006ba67473eee47e7d
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Dec 11 11:14:19 2009 -0500
+
+    region: Enable or disable fatal errors and selfchecks based on version number
+    
+    There is a couple of bugs in bugzilla where bugs in the X server
+    triggered asserts in the pixman region code. It is probably better to
+    let the X server survive this. (In fact, I thought I had disabled them
+    for 0.16.0, but apparently not).
+    
+    The patch below uses these rules:
+    
+        - In _stable_ pixman releases, assertions and selfchecks are turned
+          off. Assertions, so that the X server doesn't die. Selfchecks,
+          for performance reasons.
+    
+        - In _unstable_ pixman releases, both assertions and selfcheck are
+          turned on. These releases are what get added to development
+          distributions such as rawhide, so we want as much self-checking
+          as possible.
+    
+        - In _random git checkouts_, assertions are enabled, so that bugs
+          are caught, but selfchecks are disabled so that you can use them
+          for performance work without having to fiddle with turning
+          selfchecks off.
+
+commit 91ec7fecc9b16ec1d18c46d08698e6128baaba1d
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Sep 12 03:27:44 2009 -0400
+
+    Some minor formatting fixes.
+
+commit 97cf4d494cb6fc0a75eec7d6f06f81ba7644d820
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Dec 16 17:54:41 2009 -0500
+
+    arm-simd: Whitespace fixes
+
+commit 28778c997e60af35d26df61fd82860748deb6fab
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Dec 16 17:49:44 2009 -0500
+
+    mmx: Eliminate trailing whitespace.
+
+commit c6c43c65f76aa84c57a94155117487199bc0323e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Dec 16 15:23:50 2009 -0500
+
+    Add 'check' to release-check make target
+
+commit b3afacf9c970fa7de5ffdebebbd8526a55d3e7d8
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Dec 16 15:15:17 2009 -0500
+
+    Reorder tests so that they fastest ones run first.
+
+commit bbc5108bf8cfcb1f2334e51a8e904b5be48376e1
+Author: Marvin Schmidt <marv@exherbo.org>
+Date:   Sun Nov 15 16:04:09 2009 +0100
+
+    Build tests and run non-GTK+ ones on `make check`
+    
+    Setting TESTS will run the tests on `make check`
+    
+    Bug 25131
+
+commit 44768320709183a341d219f97c03c5b592a69355
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Dec 10 00:25:58 2009 +0200
+
+    ARM: added 'neon_combine_add_u' function
+
+commit f2c7a04c41440b15a5ce1db7ab87dd5bd8c088da
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Dec 10 00:22:12 2009 +0200
+
+    ARM: added 'neon_combine_over_u' function
+
+commit 24cd286af6f4507eb9937ced6d9998d296c77a0a
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Dec 9 23:49:04 2009 +0200
+
+    ARM: macro template for single scanline compositing functions
+    
+    Existing template already supports 2D images processing,
+    but pixman also needs some NEON optimized functions for
+    improving performance when compositing is decoupled
+    into "fetch -> process -> store" stages and done via
+    temporary scanline buffer. That's why a new simplified
+    template which deals only with the generation of single
+    scanline processing functions is handy.
+
+commit ae8d9df6248445170702c244cd60f894aa761267
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Dec 14 19:14:36 2009 +0200
+
+    Use canonical pixman license notice for recently added ARM NEON assembly files
+
+commit ce78288d7783a27700223c39e23880f4f425f70b
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Nov 6 02:25:47 2009 +0200
+
+    ARM: added 'neon_composite_src_pixbuf_8888' fast path
+    
+    This is ARM NEON optimized conversion of native RGBA format used by
+    GTK/GDK into native 32bpp RGBA format used by cairo/pixman.
+
+commit a732d3baeb0697b91a713fd6b51b68ee7ca68e03
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Nov 5 20:27:38 2009 +0200
+
+    ARM: added 'neon_composite_src_0888_0565_rev' fast path
+    
+    This is ARM NEON optimized conversion of native RGB format used by
+    GTK/GDK into r5g6b5 format.
+
+commit a1386a1ceb0c50d2e23cf30be30ea165d2d2ea7c
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Nov 5 19:43:09 2009 +0200
+
+    ARM: added 'neon_src_0888_8888_rev' fast path
+    
+    This is ARM NEON optimized conversion of native RGB format used by
+    GTK/GDK into native 32bpp RGB format used by cairo/pixman.
+
+commit 78a60047ac0f85423e0474ef54930e1f537f646b
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Dec 9 11:29:13 2009 +0200
+
+    ARM: added 'neon_composite_over_n_8888' fast path
+
+commit 96fd17488f0966d2df53623195810dc640bf5ca6
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Dec 9 11:02:04 2009 +0200
+
+    ARM: added 'neon_composite_over_n_0565' fast path
+
+commit 2d332c7a569803107e11b41c7b2c020b4050e26e
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Dec 9 10:33:01 2009 +0200
+
+    ARM: added 'neon_composite_src_0565_8888' fast path
+
+commit 062da411d81c7d970a302dd2c283ef5327b867da
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Dec 8 15:04:41 2009 +0200
+
+    ARM: added 'neon_composite_add_8888_8888_8888' fast path
+
+commit 3d0eedb5d9af97fed68e2da03d6aee40197e2a76
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Dec 8 14:39:41 2009 +0200
+
+    ARM: added 'neon_composite_add_8888_8888' fast path
+
+commit 86b54c6701666d087f0234047128fbf0fd6468b6
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Dec 7 22:53:30 2009 +0200
+
+    ARM: added 'neon_composite_over_8888_8_8888' fast path
+
+commit aec1524e773758369ab627553dc5c23d18619a85
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Dec 7 22:42:17 2009 +0200
+
+    ARM: added 'neon_composite_over_8888_8888_8888' fast path
+
+commit ba59d53d0b61effc422c4004a9f0e6cf848598d8
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Dec 8 14:13:12 2009 +0200
+
+    ARM: minor source formatting changes
+    
+    Now it's a bit harder to exceed 80 characters line limit
+    when binding assembly functions.
+
+commit a47b5167c4c1b55b2f51e29ab0782c2659bec312
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Dec 8 08:52:34 2009 +0200
+
+    ARM: added '.arch armv7a' directive to NEON assembly file
+    
+    This fix prevents build failure due to not accepting PLD instruction when
+    compiling for armv4 cpu with the relevant -mcpu/-march options set in CFLAGS.
+
+commit 3fba7dc6fa52bbf01cfc5c4aab1ab06d49a117b2
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Sat Nov 28 11:38:43 2009 +0100
+
+    Make test program not throw warnings about undefined variables
+
+commit 10ab592d57bbeefb3e3297c4a905e5cec233a006
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Fri Nov 27 22:02:54 2009 +0100
+
+    Fix bug that prevented pixman_fill MMX and SSE paths for 16 and 8bpp
+
+commit 7c7b6f5de75a998deaab5d00baf69a895ceba795
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 18 04:26:18 2009 +0200
+
+    ARM: NEON optimized pixman_blt
+    
+    NEON unit has fast access to L1/L2 caches and even simple
+    copy of memory buffers using NEON provides more than 1.5x
+    performance improvement on ARM Cortex-A8.
+
+commit dce6e1bd6840ce0646d8738aaa0927c003dbb361
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Thu Nov 5 23:15:27 2009 +0200
+
+    test: support for testing pixbuf fast path functions in blitters-test
+
+commit 0901ef41fbca2b8fb504c64a2b694bd764770292
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Sun Nov 22 04:34:01 2009 +0100
+
+    Remove nonexistant function from header
+
+commit c97b1e803fc214e9880eaeff98410c8fa37f9ddc
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Nov 20 12:02:50 2009 +0100
+
+    Post-release version bump
+
+commit 5a7597f81862ecf7b098ed254fb8e4197ccae3a8
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Nov 20 11:55:40 2009 +0100
+
+    Pre-release version bump
+
+commit 95a08dece37080e199e436fa6f2dc02e60d346dd
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Nov 20 09:35:48 2009 +0100
+
+    Remove stray semicolon from blitters-test.c
+    
+    Pointed out by scottmc2@gmail.com in bug 25137.
+
+commit 6e2c7d54c6786b52ae7dc683d2dbb4c7c033bb09
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Nov 9 14:10:00 2009 +0200
+
+    C fast path function for 'over_n_1_0565'
+    
+    This function is needed to improve performance of xfce4 terminal when
+    using bitmap fonts and running with 16bpp desktop. Some other applications
+    may potentially benefit too.
+    
+    After applying this patch, top functions from Xorg process in
+    oprofile log change from
+    
+    samples  %        image name               symbol name
+    13296    29.1528  libpixman-1.so.0.17.1    combine_over_u
+    6452     14.1466  libpixman-1.so.0.17.1    fetch_scanline_r5g6b5
+    5516     12.0944  libpixman-1.so.0.17.1    fetch_scanline_a1
+    2273      4.9838  libpixman-1.so.0.17.1    store_scanline_r5g6b5
+    1741      3.8173  libpixman-1.so.0.17.1    fast_composite_add_1000_1000
+    1718      3.7669  libc-2.9.so              memcpy
+    
+    to
+    
+    samples  %        image name               symbol name
+    5594     14.7033  libpixman-1.so.0.17.1    fast_composite_over_n_1_0565
+    4323     11.3626  libc-2.9.so              memcpy
+    3695      9.7119  libpixman-1.so.0.17.1    fast_composite_add_1000_1000
+    
+    when scrolling text in terminal (reading man page).
+
+commit 282f5cf8b821a34bab1e32957913ef8d9f9ee43c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Nov 12 17:54:40 2009 -0500
+
+    Round horizontal sampling points towards northwest.
+    
+    This is a similar change as the top/bottom one, but in this case the
+    rounding is simpler because it's just always rounding down.
+    
+    Based on a patch by M Joonas Pihlaja.
+
+commit f44431986f667eb49571e9365960524361f833c5
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Nov 12 17:20:32 2009 -0500
+
+    Fix rounding of top and bottom coordinates.
+    
+    The rules for trap rasterization is that coordinates are rounded
+    towards north-west.
+    
+    The pixman_sample_ceil() function is used to compute the first
+    (top-most) sample row included in the trap, so when the input
+    coordinate is already exactly on a sample row, no rounding should take
+    place.
+    
+    On the other hand, pixman_sample_floor() is used to compute the final
+    (bottom-most) sample row, so if the input is precisely on a sample
+    row, it needs to be rounded down to the previous row.
+    
+    This commit fixes the rounding computation. The idea of the
+    computation is like this:
+    
+    Floor operation that rounds exact matches down: First subtract
+    pixman_fixed_e to make sure input already on a sample row gets rounded
+    down. Then find out how many small steps are between the input and the
+    first fraction. Then add those small steps to the first fraction.
+    
+    The ceil operation first adds (small_step + pixman_e), then runs a
+    floor. This ensures that exact matches are not rounded off.
+    
+    Based on a patch by M Joonas Pihlaja.
+
+commit 3bea18e3ea587c84423e9f7bafff21150c37d287
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Nov 12 17:03:53 2009 -0500
+
+    Fix slightly skewed sampling grid for antialiased traps
+    
+    The sampling grid is slightly skewed in the antialiased case. Consider
+    the case where we have n = 8 bits of alpha.
+    
+    The small step is
+    
+         small_step = fixed_1 / 15 = 65536 / 15 = 4369
+    
+    The first fraction is then
+    
+         frac_first = (small_step / 2) = (65536 - 15) / 2 = 2184
+    
+    and the last fraction becomes
+    
+         frac_last
+              = frac_first + (15 - 1) * small_step = 2184 + 14 * 4369 = 63350
+    
+    which means the size of the last bit of the pixel is
+    
+         65536 - 63350 = 2186
+    
+    which is 2 bigger than the first fraction. This is not the end of the
+    world, but it would be more correct to have 2185 and 2185, and we can
+    accomplish that simply by making the first fraction half the *big*
+    step instead of half the small step.
+    
+    If we ever move to coordinates with 8 fractional bits, the
+    corresponding values become 8 and 10 out of 256, where 9 and 9 would
+    be better.
+    
+    Similarly in the X direction.
+
+commit 98bb0a509f401563b8e6e15f4ee26947e9c3d419
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Sep 12 04:24:04 2009 -0400
+
+    Delete the flags field from fast_path_info_t
+
+commit b7fb7e6c700891a12300aaf5c8a4c4b2584a194f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Sep 12 02:47:39 2009 -0400
+
+    Eliminate NEED_PIXBUF flag.
+    
+    Instead introduce two new fake formats
+    
+    	PIXMAN_pixbuf
+    	PIXMAN_rpixbuf
+    
+    and compute whether the source and mask have them in
+    find_fast_path(). This lead to some duplicate entries in the fast path
+    tables that could then be removed.
+
+commit 542b79c30d88788028d391285aa8cd038e96f2b3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Sep 12 02:11:12 2009 -0400
+
+    Compute src_format outside the fast path loop.
+    
+    Inside the loop all we have to do is check that the formats match.
+
+commit 12108ecbe488d1b65e6787585e1caa57af17a008
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Sep 12 01:53:54 2009 -0400
+
+    Eliminate the NEED_COMPONENT_ALPHA flag.
+    
+    Instead introduce two new fake formats
+    
+    	PIXMAN_a8r8g8b8_ca
+    	PIXMAN_a8b8g8r8_ca
+    
+    that are used in the fast path tables for this case.
+
+commit 4686d1f53b09b5dd12df6f10f8c0403b2a1e2427
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Sep 12 01:35:56 2009 -0400
+
+    Eliminate the NEED_SOLID_MASK flag
+    
+    This flag was used to indicate that the mask was solid while still
+    allowing a specific format to be required. However, there is not
+    actually any need for this because the fast paths all used
+    _pixman_image_get_solid() which already allowed arbitrary formats.
+    
+    The one thing that had to be dealt with was component alpha. In
+    addition to interpreting the presence of the NEED_COMPONENT_ALPHA
+    flag, we now also interprete the *absence* of this flag as a
+    requirement that the mask does *not* have component alpha.
+    
+    Siarhei Siamashka pointed out that the first version of this commit
+    had a bug, in which a NEED_SOLID_MASK was accidentally not turned into
+    a PIXMAN_solid in the ARM NEON implementation.
+
+commit 2ef8b394d72d6c13f96347626b09613f805d9f8c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Sep 19 06:14:38 2009 -0400
+
+    Use the destination buffer directly in more cases instead of fetching.
+    
+    When the destination buffer is either a8r8g8b8 or x8r8g8b8, we can use
+    it directly instead of fetching into a temporary buffer. When the
+    format is x8r8g8b8, we require the operator to not make use of
+    destination alpha, but when it is a8r8g8b8, there are no restrictions.
+    
+    This is approximately a 5% speedup on the poppler cairo benchmark:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    
+    Before:
+    [  0]    image                      poppler    6.661    6.709   0.59%    6/6
+    
+    After:
+    [  0]    image                      poppler    6.307    6.320   0.12%    5/6
+
+commit 13f4e02b1429d62b08487beebd8697887a5a9608
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Nov 10 15:48:36 2009 -0500
+
+    test: Move image_endian_swap() from blitters-test.c to utils.[ch]
+
+commit 24e203a8a8394edb3a89f3d6be1bdcab41fbe7f9
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Nov 10 15:45:17 2009 -0500
+
+    test: Move random number generator from blitters/scaling-test to utils.[ch]
+
+commit cc34554652bf9a402127fa06a03105b49a425895
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Nov 10 15:32:12 2009 -0500
+
+    test: In scaling-test use the crc32 from utils.c
+
+commit b465b8b79dc008f4f4dcddf45754fef260e51619
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Nov 10 15:29:20 2009 -0500
+
+    test: Move CRC32 code from blitters-test to new files utils.[ch]
+
+commit 56bd91340102e915a239d2afa1db223109cf6639
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Nov 10 14:58:19 2009 -0500
+
+    test: Rename utils.[ch] to gtk-utils.[ch]
+
+commit 7be529f3bd6455259e24163a27a0a5a761ee0cc3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Sep 20 17:37:36 2009 -0400
+
+    sse2: Add a fast path for OVER 8888 x 8 x 8888
+    
+    This is a small speedup on the swfdec-youtube benchmark:
+    
+    Before:
+    [  0]    image               swfdec-youtube    5.789    5.806   0.20%    6/6
+    
+    After:
+    [  0]    image               swfdec-youtube    5.489    5.524   0.27%    6/6
+    
+    Ie., approximately 5% faster.
+
+commit abefe68ae2a422fecf315f17430c0cda5561be66
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 4 17:14:14 2009 +0200
+
+    ARM: enabled 'neon_composite_add_8000_8000' fast path
+
+commit 635f389ff477a0afe82c6038a835e262d5034d99
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 4 17:13:31 2009 +0200
+
+    ARM: enabled 'neon_composite_add_8_8_8' fast path
+
+commit 7e1bfed6767774a43c288ab780f62a20eccff805
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 4 17:12:56 2009 +0200
+
+    ARM: enabled 'neon_composite_add_n_8_8' fast path
+
+commit deeb67b13a0f9267b59d9755e7a0102da29a6747
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 4 17:12:14 2009 +0200
+
+    ARM: enabled 'neon_composite_over_8888_8888' fast path
+
+commit f449364849b2cc75a48cc3b35d2a373d38b71c09
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 4 17:11:32 2009 +0200
+
+    ARM: enabled 'neon_composite_over_8888_0565' fast path
+
+commit 2dfbf6c4a520da4647bb480a124dfe5cbece225b
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 4 17:10:55 2009 +0200
+
+    ARM: enabled 'neon_composite_over_8888_n_8888' fast path
+
+commit 43824f98f1fc41d923dd8ddd97e74942c01aadf8
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 4 17:10:09 2009 +0200
+
+    ARM: enabled 'neon_composite_over_n_8_8888' fast path
+
+commit 189d0d783cc62aa3b739218689042c9235c04fa1
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 4 17:09:31 2009 +0200
+
+    ARM: enabled 'neon_composite_over_n_8_0565' fast path
+
+commit cccfc87f4f597f99b74691af172126a2346f9239
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 4 17:08:48 2009 +0200
+
+    ARM: enabled 'neon_composite_src_0888_0888' fast path
+
+commit e89b4f8105beaa27b6098a5dc7dfec62879ebd1d
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 4 17:08:09 2009 +0200
+
+    ARM: enabled 'neon_composite_src_8888_0565' fast path
+
+commit 2d54ed46fb7428aa1d9f114450554fc33acff2c4
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 4 17:07:36 2009 +0200
+
+    ARM: enabled 'neon_composite_src_0565_0565' fast path
+
+commit 5d695cb86eaad151c9402ead5dfb7e867ff58d29
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 4 17:05:46 2009 +0200
+
+    ARM: added 'bindings' for NEON assembly optimized functions
+    
+    These functions serve as 'adaptors', converting standard internal
+    pixman fast path function arguments into arguments expected
+    by assembly functions.
+
+commit dcfade3df96559ce942df5d16b7915c94f7d9e57
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 4 15:29:27 2009 +0200
+
+    ARM: enabled new implementation for pixman_fill_neon
+
+commit bcb4bc79321659635d706bade25851cddf563856
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 4 15:18:38 2009 +0200
+
+    ARM: introduction of the new framework for NEON fast path optimizations
+    
+    GNU assembler and its macro preprocessor is now used to generate
+    NEON optimized functions from a common template. This automatically
+    takes care of nuisances like ensuring optimal alignment, dealing with
+    leading/trailing pixels, doing prefetch, etc.
+    
+    Implementations for a lot of compositing functions are also added,
+    but not enabled.
+
+commit 1eff0ab487efe4720451b8bd92c8423b9772a69a
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Nov 4 14:25:27 2009 +0200
+
+    ARM: removed old ARM NEON optimizations
+
+commit b8898d77d0e7cc1c50321fcb216af3ba6c634959
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Nov 7 14:47:22 2009 -0500
+
+    Define PIXMAN_USE_INTERNAL_API in pixman-private.h
+    
+    Instead of mucking around with CFLAGS in configure.ac, preventing
+    users from setting their own CFLAGS, just define the
+    PIXMAN_USE_INTERNAL_API and PIXMAN_DISABLE_DEPRECATED in
+    pixman-private.h
+
+commit 67bf739187cd43b5fff754b25693f76bb788d1fa
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Oct 27 09:11:28 2009 -0400
+
+    Include <inttypes.h> when compiled with HP's C compiler.
+    
+    Fixes bug 23169.
+
+commit 384fb88b905823e62c1f1733a20073bfea15d411
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Oct 27 12:25:13 2009 +0200
+
+    C fast path function for 'over_n_1_8888'
+    
+    This function is needed to improve performance of xfce4 terminal.
+    Some other applications may potentially benefit too.
+
+commit a2985da94704af050b4422dca70fd2dd770faba4
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Oct 27 12:11:05 2009 +0200
+
+    C fast path function for 'add_1000_1000'
+    
+    This function is needed to improve performance of xfce4 terminal.
+    Some other applications may potentially benefit too.
+
+commit 5f429e45106d79c48ee102987ef84be54fd421d8
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Oct 23 20:56:30 2009 +0300
+
+    blitters-test updated to also randomly generate mask_x/mask_y
+
+commit 0d5562747ce25ecac06f4c44e935662eb6ee328a
+Author: André Tupinambá <andrelrt@gmail.com>
+Date:   Sat Sep 19 23:01:50 2009 -0400
+
+    Add fast path scaled, bilinear fetcher.
+    
+    This adds a bilinear fetcher for the case where the image has a scaled
+    transformation, does not repeat, and the format {ax}8r8g8b8.
+    
+    Results for the swfdec-youtube benchmark
+    
+    Before:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image               swfdec-youtube    7.841    7.915   0.72%    6/6
+    
+    After:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image               swfdec-youtube    6.677    6.780   0.94%    6/6
+    
+    These results were measured on a faster machine than the ones in the
+    previous commit, so the numbers are not comparable.
+    
+    Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>
+
+commit 88323c5abe68906472049537b54b0e7eea343f43
+Author: André Tupinambá <andrelrt@gmail.com>
+Date:   Sat Sep 19 09:32:37 2009 -0400
+
+    Speed up bilinear interpolation.
+    
+    Speed up bilinear interpolation by processing more than one component
+    at a time on 64 bit architectures, and by precomputing the dist{ixiy}
+    products on 32 bit architectures.
+    
+    Previously bilinear interpolation for one pixel would take 24
+    multiplications. With this improvement it takes 12 on 64 bit, and 20
+    on 32 bit.
+    
+    This is a small but consistent speedup on the swfdec-youtube
+    benchmark:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    Before:
+    [  0]    image               swfdec-youtube   18.010   18.020   0.09%    4/5
+    
+    After:
+    [  0]    image               swfdec-youtube   17.488   17.584   0.22%    5/6
+    
+    Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>
+
+commit f0c157f888185279681bad305973f246dca2e535
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Sep 27 09:41:25 2009 -0400
+
+    Extend scaling-test to also test bilinear filtering.
+
+commit eab882ef38509bfc9719fcee6020d882ee030694
+Author: Jeremy Huddleston <jeremyhu@freedesktop.org>
+Date:   Wed Oct 21 12:47:27 2009 -0700
+
+    This is not a GNU project, so declare it foreign.
+    
+    On Wed, 2009-10-21 at 13:36 +1000, Peter Hutterer wrote:
+    > On Tue, Oct 20, 2009 at 08:23:55PM -0700, Jeremy Huddleston wrote:
+    > > I noticed an INSTALL file in xlsclients and libXvMC today, and it
+    > > was quite annoying to work around since 'autoreconf -fvi' replaces
+    > > it and git wants to commit it.  Should these files even be in git?
+    > > Can I nuke them for the betterment of humanity and since they get
+    > > created by autoreconf anyways?
+    >
+    > See https://bugs.freedesktop.org/show_bug.cgi?id=24206
+    
+    As an interim measure, replace AM_INIT_AUTOMAKE([dist-bzip2]) with
+    AM_INIT_AUTOMAKE([foreign dist-bzip2]). This will prevent the generation
+    of the INSTALL file. It is also part of the 24206 solution.
+    
+    Signed-off-by: Jeremy Huddleston <jeremyhu@freedesktop.org>
+
+commit dc46ad274a47d351bacf3c2167c359d23dbaf8b3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Oct 19 20:32:37 2009 -0400
+
+    Make walk_region_internal() use 32 bit dimensions
+
+commit bb3698d47925db77925810c3128be1641f455c60
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Oct 19 20:31:54 2009 -0400
+
+    Make pixman_compute_composite_region32() use 32 bit dimensions
+
+commit 895c281c4094844f9f955621e4ac1e4394d865f0
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Oct 19 20:30:22 2009 -0400
+
+    Change prototype of _pixman_walk_composite_region from int16_t to int32_t
+
+commit 9cd470665b1bb7e0cb810f2457d3788f7c981072
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Oct 19 20:27:36 2009 -0400
+
+    Remove unused color_table and color_table_size fields
+
+commit 8186937637f25088e61c22a3ce1740a56f5d6e13
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Oct 18 03:02:28 2009 -0400
+
+    Remove BOUNDS() macro.
+    
+    It was bounding the clip region to INT16_MIN, INT16_MAX, but this was
+    a relic from the X server. We don't need it since we are already
+    restricting the clip region to the geometry of the destination.
+
+commit 9bcfc0ac547277d3a3f4e5ff0922450566ad8be8
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Wed Sep 30 08:02:39 2009 +0200
+
+    --enable-maintainer-mode is gone from configure, so remove it
+
+commit fa49ef81f7b39d32b626ed235958448835e2d2c2
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Thu Sep 17 13:19:04 2009 +0200
+
+    Add default cases for all switch statements
+    
+    Fixes compilation with -Wswitch-default. Compilation with -Wswitch-enums
+    works fine as is.
+
+commit 5c3ef4e9798f3395c55fe7b57df32f77c0de2c71
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Thu Sep 17 13:18:22 2009 +0200
+
+    Fix compile warnings
+
+commit ad484078854572cf640d7ffbb66f1e99328e79b8
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Jul 27 01:21:26 2009 +0300
+
+    ARM: Removal of unused/broken NEON code
+
+commit 358f96d20219b4460bfd8ecf88e69ff10044b577
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Oct 8 13:01:27 2009 -0400
+
+    Fix double semicolon; pointed out by Travis Griggs
+
+commit 93acc10617c88fbf933120c6980ae8ef80cf94f0
+Author: Gerdus van Zyl <gerdusvanzyl@gmail.com>
+Date:   Tue Sep 29 12:28:03 2009 +0200
+
+    Fix build with Visual Studio 2008
+    
+    moved __m64 ms declaration in sse2_composite_over_x888_8_8888 to top
+    of function so it compiles with visual studio 2008
+
+commit f135f74ff3a4f55262b611b452566daff5e936ce
+Author: Andrea Canciani <ranma42@gmail.com>
+Date:   Sun Sep 27 11:40:52 2009 +0200
+
+    Fix composite on big-endian systems.
+    
+    Data narrower than 32bpp is padded to an unsigned long and on
+    big-endian systems this shifts the value by the padding bits.
+
+commit 15c14691a71daa29c86cce40ac0b4a14acf8f2fc
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Sep 26 13:12:14 2009 -0400
+
+    Fix fetch-test for big-endian systems.
+    
+    Data narrower than 32bpp should be stored in the correct
+    endian. Reported by Andrea Canciani.
+
+commit 02d70998885065bcea55cb2a8bfa75473083bc17
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Sep 24 08:57:26 2009 -0400
+
+    Add missing break in composite.c
+
+commit 8ce004af3670a183c78fc7c61fbfcfd8c7f17e54
+Author: Guillem Jover <guillem@hadrons.org>
+Date:   Tue Sep 22 19:51:13 2009 +0200
+
+    pixman: Update .gitignore
+    
+    Generalize to catch all .pc files. Add more tests.
+    
+    Signed-off-by: Guillem Jover <guillem@hadrons.org>
+
+commit 59e877cffe6497d865031d79e9a742414407d544
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Sep 24 08:10:00 2009 -0400
+
+    In the compositing test, Don't try to use component alpha with solid fills.
+    
+    It's not supported yet.
+
+commit 16adb09c8a003936a1ef17042776a725c9aa6813
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Sep 18 11:33:18 2009 -0400
+
+    Update CRC value in blitters-test for the new bug fixes
+
+commit e156964d3e005be3dbc9ff80580d98c6dd617afd
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Sep 18 08:16:56 2009 -0400
+
+    Fix bug in blitters-test with BGRA formats.
+    
+    When masking out the x bits, blitter-test would make the incorrect
+    assumption that the they were always in the topmost position. This is
+    not correct for formats of type PIXMAN_TYPE_BGRA.
+
+commit eb72bfb97d10283964c070f0a0e26f0520a22ff3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Sep 18 09:43:14 2009 -0400
+
+    Fix bugs in fetch_*_b2g3r3().
+    
+    The red channel should only be shifted five positions, not six.
+
+commit b4f6113cb975110c33f607aa39d19290f58be398
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Sep 24 07:48:46 2009 -0400
+
+    Fix bugs in a1b2g1r1.
+    
+    The first bug is that it is treating the input as if it were a1r1g1b1;
+    the second one is that the red channel should only be shifted two
+    bits, not three.
+
+commit efdf15e677d506c2049a34e92eb2172712101afa
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Sep 18 08:48:04 2009 -0400
+
+    Fix shift bug in fetch_scanline/pixel_a2b2g2r2()
+    
+    0x30 * 0x55 is 0xff0, so the red channel should be shifted four bits,
+    not six.
+
+commit 679c2dabda094491599ce770ddba11611d08efc8
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Sep 18 08:13:46 2009 -0400
+
+    Fix four bit formats.
+    
+    The original Render code used to index pixels with their position in
+    bits in the image. When the scanline code was introduced pixels were
+    indexed in bytes, but the FETCH/STORE_4/8 macros still assumed bits.
+    
+    This commit fixes that by making the FETCH/STORE_4 macros first
+    convert the index to bit position.
+
+commit 3d1714cd1f8ae7d47ad5f01a1140133ae71a00e2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Sep 20 16:50:37 2009 -0400
+
+    Hide PIXMAN_OP_NONE and PIXMAN_N_OPERATORS behind PIXMAN_INTERNAL_API.
+    
+    These cannot sanely be used by applications since they may change in
+    new versions.
+
+commit 0683f34c418bc5fb2fa5e2a41bdc102195edbe67
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Sep 18 08:06:32 2009 -0400
+
+    Add a few notes about testing to TODO
+
+commit 48ba7d946146ea7b0e33e963942bedde22a3b806
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Sep 18 09:11:04 2009 -0400
+
+    Fix alpha handling for 10 bpc formats.
+    
+    These generally extracted the 2 bits of alpha, then shifted them 62
+    bits and replicated across 16 bits. Then they were shifted another 48
+    bits, making the resulting alpha channel 0.
+
+commit c673c83e070ed2392c00716fe20a80a798588b39
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Sep 24 05:22:33 2009 -0400
+
+    Return result from pixman_image_set_transform().
+    
+    Previously it would always return TRUE, even when malloc() had failed.
+
+commit eb16d171882d200a390345ec8a3db1b931e9676a
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Sep 15 07:43:23 2009 -0400
+
+    Revert "Enable component alpha on solid masks."
+    
+    For consistency we will probably want to allow component alpha to be
+    set on all masks at some point, but this commit only enabled it for
+    solid images.
+    
+    This reverts commit 29e22cf38e8abc54b9dddbdeb3909d02866a82a0.
+
+commit b96e37f8d0f5c94f5d117e1622d7cae7246d6345
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Tue Sep 15 13:16:17 2009 +0100
+
+    [Makefile] Set the SIMD specific CFLAGS for inspecting asm.
+
+commit 273e89750b3ce901fa6769a835fa441ee986d508
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Sep 14 18:48:32 2009 -0400
+
+    Remove optimization for 0xffffffff and 0xff the add_n_8888_8888_ca fast path
+    
+    This is an ADD operation, not an OVER. Fixes bug 23934, reported by
+    Siarhei Siamashka.
+
+commit ec7c1affcc66c12af1fc29fd000f9885a5d48320
+Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi>
+Date:   Mon Sep 14 22:52:29 2009 +0300
+
+    Don't prefetch from NULL in the SSE2 fast paths.
+    
+    On an Athlon64 box prefetch from NULL slows down
+    the rgba OVER rgba fast for predominantly solid sources
+    by up to 3.5x in the one-rounded-rectangle test case
+    when run using a tiling polygon renderer.  This patch
+    conditionalises the prefetches of the mask everywhere
+    where the mask pointer may be NULL in a fast path.
+
+commit 1b5269a585d8dcdb8f5ff9f71113bcf2d5efab26
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Sep 14 06:58:03 2009 -0400
+
+    Reformat test/composite.c to follow the standard coding style.
+
+commit 0431a0af6c566c8990c88dc22fd0dc76fdd72cf2
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Sun Sep 13 18:02:10 2009 +0100
+
+    [test] Exercise repeating patterns for composite.
+
+commit c28e39f17a87cdaa7ce43ec99f2f764cc935f484
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Sun Sep 13 15:04:30 2009 +0100
+
+    [build] Add rule to generate asm for inspection.
+
+commit 823bb1a9430bc0c4735ffefbbe19efe45887e32c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Sun Sep 13 15:04:54 2009 +0100
+
+    [sse2] Don't emit prefetch 0 for an absent mask
+
+commit 8f2daa7ca25de754522abfb9ed1158d090f00780
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Sun Sep 13 15:07:08 2009 +0100
+
+    [test] Add composite test from rendercheck
+    
+    Iterate over all destination formats for dst, src and composite and
+    compare the result of all oprators with a selection of colours.
+
+commit cda0ee5165812b86a052ceb01830a1d42d02a03b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Thu Aug 27 09:19:14 2009 +0100
+
+    build: Suppress verbose compile lines
+    
+    Compile warnings are being lost in the sea of noise. Automake-1.11 finally
+    introduced AM_SILENT_RULES to suppress the echoing of the compile line for
+    every object. Enable this to bring sanity to the pixman build.
+
+commit 56cc06f89b7db733e5036a00df7aea27cf8d0951
+Merge: 8aff99e 8035df8
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Sun Sep 13 16:32:27 2009 +0100
+
+    Merge branch '0.16'
+    
+    Conflicts:
+    	configure.ac
+    	pixman/pixman-sse2.c
+
+commit 8035df8bcb01c2df42b8adf8b96c7ac796f384cc
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Sun Aug 16 12:16:46 2009 +0100
+
+    Remove duplicated declaration
+    
+    The pixman_tranform_pixman_f_transform() declaration is repeated 4 lines
+    down.
+
+commit 29e22cf38e8abc54b9dddbdeb3909d02866a82a0
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Sun Sep 13 16:26:29 2009 +0100
+
+    Enable component alpha on solid masks.
+
+commit 9fe2628702785e8db45593709c0aec54043a50e7
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Sun Sep 13 16:26:52 2009 +0100
+
+    [sse2] Bit-reversing typo: src != dst
+
+commit 8aff99e231dcb83fa6c08e760711b0a1e979d012
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Sep 10 21:33:24 2009 -0400
+
+    Fix off-by-one error in source_image_needs_out_of_bounds_workaround()
+    
+    If extents->x2/y2 are equal to image->width/height, then the clip is
+    still inside the drawable, so no workaround is necessary.
+
+commit fefe2a5d24591846281bb9bf0e85d42822e1716e
+Author: Gaetan Nadon <memsize@videotron.ca>
+Date:   Tue Sep 8 20:06:19 2009 -0400
+
+    Remove unused generated libcomp.pc #23801
+
+commit 2186bc89486f9f11161b0db280a869c6849c867e
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Sep 4 14:14:00 2009 +0300
+
+    Change CFLAGS order for PPC and ARM configure tests
+    
+    CFLAGS are always appended to the end of gcc options when compiling
+    sources in autotools based projects. Configure tests should do the
+    same. Otherwise build fails on PPC when using CFLAGS="-O2 -mno-altivec"
+    for example. Similar problem affects ARM.
+
+commit 15304e3cddd6568ba6d5d1d3030568c3db7b05cc
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Sep 2 19:46:47 2009 +0300
+
+    ARM: Remove fallback to ARMv6 implementation from NEON delegate chain
+    
+    This can help to fix build problems with '-mthumb' gcc option in CFLAGS.
+    ARMv6 optimized code can't be compiled for thumb (because of its inline
+    assembly) and gets automatically disabled in configure. Reference
+    to it from NEON optimized code resulted in linking problems.
+    
+    Every ARMv6 optimized fast path function also has a better NEON
+    counterpart, so there is no need to fallback to ARMv6. Shorter
+    delegate chain should additionally result in a bit better performance.
+
+commit 2679d93e22b4f3922a39bc53680f1aab6ea7c73c
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Sep 4 14:14:00 2009 +0300
+
+    Change CFLAGS order for PPC and ARM configure tests
+    
+    CFLAGS are always appended to the end of gcc options when compiling
+    sources in autotools based projects. Configure tests should do the
+    same. Otherwise build fails on PPC when using CFLAGS="-O2 -mno-altivec"
+    for example. Similar problem affects ARM.
+
+commit 91232ee40d8499cb91ad27717f751f15c805b4a6
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Sep 2 19:46:47 2009 +0300
+
+    ARM: Remove fallback to ARMv6 implementation from NEON delegate chain
+    
+    This can help to fix build problems with '-mthumb' gcc option in CFLAGS.
+    ARMv6 optimized code can't be compiled for thumb (because of its inline
+    assembly) and gets automatically disabled in configure. Reference
+    to it from NEON optimized code resulted in linking problems.
+    
+    Every ARMv6 optimized fast path function also has a better NEON
+    counterpart, so there is no need to fallback to ARMv6. Shorter
+    delegate chain should additionally result in a bit better performance.
+
+commit 61b616067c3e8b2ff84fbf57f479a90cc9fa5344
+Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi>
+Date:   Mon Aug 31 23:02:53 2009 +0100
+
+    Default to optimised builds when using a Sun Studio compiler.
+    
+    Autoconf's AC_PROG_CC sets the default CFLAGS to -O2 -g for
+    gcc and -g for every other compiler.  This patch defaults
+    CFLAGS to the equivalent -O -g when we're using Sun Studio's cc
+    if the user or site admin hasn't already set CFLAGS.
+
+commit 20acda6fde8441e18aab33980a33b099a16063eb
+Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi>
+Date:   Mon Aug 31 20:27:32 2009 +0100
+
+    Work around a Sun Studio 12 code generation bug involving _mm_set_epi32().
+    
+    Calling a static function wrapper around _mm_set_epi32() when not
+    using optimisation causes Sun Studio 12's cc to emit a spurious
+    floating point load which confuses the assembler.  Using a macro wrapper
+    rather than a function steps around the problem.
+
+commit e30c0037d44bf76a26182080be24c7037d7be5b5
+Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi>
+Date:   Mon Aug 31 20:24:04 2009 +0100
+
+    Work around differing _mm_prefetch() prototypes on Solaris.
+    
+    Sun Studio 12 expects the address to prefetch to be
+    a const char pointer rather than a __m128i pointer or
+    void pointer.
+
+commit 29e7d6063f7b93dd4fde3d42a2931ec0f55158c3
+Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi>
+Date:   Mon Aug 31 23:02:53 2009 +0100
+
+    Default to optimised builds when using a Sun Studio compiler.
+    
+    Autoconf's AC_PROG_CC sets the default CFLAGS to -O2 -g for
+    gcc and -g for every other compiler.  This patch defaults
+    CFLAGS to the equivalent -O -g when we're using Sun Studio's cc
+    if the user or site admin hasn't already set CFLAGS.
+
+commit e7018685f0618640221ebc61446ee98ea3056bbb
+Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi>
+Date:   Mon Aug 31 20:27:32 2009 +0100
+
+    Work around a Sun Studio 12 code generation bug involving _mm_set_epi32().
+    
+    Calling a static function wrapper around _mm_set_epi32() when not
+    using optimisation causes Sun Studio 12's cc to emit a spurious
+    floating point load which confuses the assembler.  Using a macro wrapper
+    rather than a function steps around the problem.
+
+commit 04ade7b68c620a62daff6212eee4d1b96bfbc3c9
+Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi>
+Date:   Mon Aug 31 20:24:04 2009 +0100
+
+    Work around differing _mm_prefetch() prototypes on Solaris.
+    
+    Sun Studio 12 expects the address to prefetch to be
+    a const char pointer rather than a __m128i pointer or
+    void pointer.
+
+commit 698b686d58c510e1b8a9183750d00cbd9ed504b2
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Aug 28 22:34:21 2009 +0300
+
+    ARM: workaround for gcc bug in vshll_n_u8 intrinsic
+    
+    Some versions of gcc (cs2009q1, 4.4.1) incorrectly reject
+    shift operand having value >= 8, claiming that it is out of
+    range. So inline assembly is used as a workaround.
+
+commit 3e228377f9e7159a52a3716d8e4930c5a9dbb1af
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Aug 28 22:34:21 2009 +0300
+
+    ARM: workaround for gcc bug in vshll_n_u8 intrinsic
+    
+    Some versions of gcc (cs2009q1, 4.4.1) incorrectly reject
+    shift operand having value >= 8, claiming that it is out of
+    range. So inline assembly is used as a workaround.
+
+commit 632125d4108f9a53d625a6b997832fa45a295807
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 2 08:27:33 2009 -0400
+
+    Enable the x888_8_8888 sse2 fast path.
+
+commit b02b644d7017f794be2296c6354e44fd119d2477
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Sep 2 16:09:32 2009 -0400
+
+    Set version number to 0.16.1
+
+commit d465f854b3d5f2ffcc122aebfbead2d64cca7169
+Author: Makoto Kato <m_kato@ga2.so-net.ne.jp>
+Date:   Tue Sep 1 10:59:05 2009 +0900
+
+    Add CPU detection for VC++ x64
+    
+    VC++ x64 has no inline assembler and x64 mode supports SSE2.
+    So, it is unnecessary to call cpuid.
+
+commit 097342a65d81fb957dfc17486f615f887540e146
+Author: Makoto Kato <m_kato@ga2.so-net.ne.jp>
+Date:   Tue Sep 1 10:59:05 2009 +0900
+
+    Add CPU detection for VC++ x64
+    
+    VC++ x64 has no inline assembler and x64 mode supports SSE2.
+    So, it is unnecessary to call cpuid.
+
+commit 64085c91b6a1deca4007b18d63b707b896653ee9
+Author: Søren Sandmann Pedersen <ssp@dhcp-100-3-19.bos.redhat.com>
+Date:   Tue Sep 1 08:23:23 2009 -0400
+
+    Change names of add_8888_8_8 fast paths to add_n_8_8
+    
+    The source is solid in those.
+
+commit 7af985a69a9147e54dd5946a8062dbc2e534b735
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 28 08:14:04 2009 -0400
+
+    Post-release version bump
+
+commit 57812465bf975e943e78d59dabbd5e6bb2ad87ef
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 28 07:55:30 2009 -0400
+
+    Pre-release version bump
+
+commit 9e1a34a0d177e8c2381f419b0a04310da8cdde2b
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Fri Aug 28 06:31:06 2009 -0400
+
+    _pixman_run_fast_path: typo
+    
+    This is one example of a compiler warning that was lost amit the build
+    noise.
+    
+    The error here is that in a list of required conditions we used ';'
+    instead of '&&' with the result of continuing to use the fast-path
+    even if we had a wide mask.
+    
+    Another error is that it was testing src, not mask as it should.
+
+commit 83d607cbf1d3852f91b52a427bee30fffc3029e7
+Author: Makoto Kato <m_kato@ga2.so-net.ne.jp>
+Date:   Fri Aug 28 04:09:15 2009 -0400
+
+    Remove spurious spaces in pixman-x64-mmx-emulation.h
+
+commit ce966f4d8e8aa9a4465e2ab28666bae891194a72
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Aug 12 14:08:58 2009 -0400
+
+    Check if we have posix_memalign() in configure.ac. [23260, 23261]
+    
+    Fall back to malloc() in blitters-test.c if we don't.
+
+commit 29c2ae4a0cf924cb011467687a4c43237fb2316c
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Aug 12 20:22:24 2009 +0300
+
+    ARM: a fix to pass blitters-test for 'neon_composite_over_n_8_0565'
+    
+    Inline assembly for handling <8 pixels width did not pass blitters-test.
+    Fortunately gcc has no problems compiling alternative implementation
+    which is using RVCT style intrinsics, so it can be used instead.
+
+commit ed4598f08a359346d14fe5b2bbb7b74c40332b18
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Aug 11 14:03:24 2009 -0400
+
+    Post-release version bump
+
+commit bfdae053eaff528aa869bc05a0520ab22d540a08
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Aug 11 13:56:16 2009 -0400
+
+    Pre-release version-bump
+
+commit d6016d406a649f7a95bec2a477dfd89ba280188d
+Merge: 93923c6 e084351
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Aug 11 02:04:40 2009 -0400
+
+    Merge branch 'blitter-test'
+
+commit 93923c626f2df18e29e215410e6d4bb2f5c1616f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Aug 10 20:47:36 2009 -0400
+
+    Fix x/y mixup in bits_image_fetch_pixel_convolution()
+    
+    Bug 23224, reported by Michel Dänzer.
+
+commit e084351b13faad6a3ba67808b5721957b51d16f0
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 00:45:53 2009 -0400
+
+    Update CRC value in blitters-test.
+    
+    At this point, the SIMD, SSE2, MMX and general implementations all
+    agree.
+
+commit ba5c5325e77b36374d3be22bd92816c332a321bb
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 00:25:56 2009 -0400
+
+    Various formatting fixes
+
+commit cc5c59b3f25dc595ce17a876e89ca549bb477e46
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Aug 5 16:28:10 2009 -0400
+
+    Add the ability to print intermediate CRC values
+
+commit 0bc4adae3eb758a2cd9026397c284bd6cc7bcd65
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Aug 5 15:53:33 2009 -0400
+
+    Reenable commented-out tests in blitter-test.
+    
+    The crashes and valgrind issues are all fixed at this point.
+
+commit 9ee18806a944ddde36dc1b045f89f02d025cbe48
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Aug 3 00:01:01 2009 +0300
+
+    One more update to blitters-test - use aligned memory
+    allocations in order in order to make reproducibility
+    of alignment sensitive bugs more deterministic
+    Also testing of masks is reenabled
+
+commit 4fc0f9dd456bb4ad1f47e1733b02a3b491f425ed
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sat Aug 1 02:20:12 2009 +0300
+
+    HACK: updated test to better cover new neon optimizations
+
+commit 67769ad5bf15450d0fd0d83643e3533a9f563916
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Jul 22 01:29:51 2009 +0300
+
+    Test program for stressing the use of different formats and operators
+    
+    The code and overall method is mostly based on scaling-test. This one
+    focuses on trying to stress as many different color formats and types
+    of composition operations as possible.
+    
+    This is an initial implementation which may need more tuning. Also
+    not all color format and operator combinations are actually used.
+    
+    When cpu specific optimizations are disabled, this test provides
+    identical deterministic results on x86, PPC and ARM.
+    
+    Script blitters-test-bisect.rb now works in non-stop mode, until
+    it finds any problem. This allows to run it for example overnight
+    in order to test a lot more variants of pixman calls and increase
+    chances of detecting problems in pixman. Just like with scaling-test,
+    running blitters-test binary alone with no command line arguments
+    runs a small predefined number of tests and compares checksum
+    with a reference value for quick verification.
+
+commit 51f597ad3258dd85b4620ac2bf0df8ca2e0ed182
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 12:00:07 2009 -0400
+
+    Delete commented out code in pixman-vmx.c
+
+commit a590eabead0a0c405a7293d8689b9992de5a689b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 11:53:50 2009 -0400
+
+    Misc formatting fixes for pixman-vmx.c
+
+commit 0ebb587e2460024fb306597799ae4974441511ec
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 11:48:22 2009 -0400
+
+    In vmx_combine_atop_reverse_ca() extract alpha after inversing
+
+commit 3d2f00783f2972ba5311937057ea8d452f942a36
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 11:46:09 2009 -0400
+
+    Really fix vmx_combine_over_reverse_ca()
+    
+    The inverse destination alpha is just one component, not four.
+
+commit 2f62a4f46c1e99ddb1b7ca6d5db9410d12f32e63
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 11:40:42 2009 -0400
+
+    Fix vmx_combine_out_reverse_ca()
+    
+    The source alpha is just one component, not four.
+
+commit 7e58323385e442fb2cea207780db5e30be88be96
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 11:38:03 2009 -0400
+
+    Fix vmx_over_reverse_ca()
+    
+    Destination alpha must be extracted after inversing, otherwise we end
+    up with 0xFFs in the rgb channels.
+
+commit 2382bd9e2724944a05ce8a581e9ddc31e299a0c6
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 11:35:20 2009 -0400
+
+    Multiply with the alpha of dest, not inverse alpha
+
+commit 498df0f0bf2437130ed305fb757ae0fae90bebb7
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 11:32:31 2009 -0400
+
+    Fix vmx_combine_vmx_atop_ca()
+    
+    It didn't compute the mask correct before.
+
+commit 9650cd7432ef03c05895df04940e2ab6245f2618
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 11:26:23 2009 -0400
+
+    Fix vmx_combine_over_ca().
+    
+    In the non-vector code, the mask needs to be multiplied with source
+    alpha.
+
+commit 38b9589fe6b14c822a2a4000df364d132e390f7a
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 11:21:43 2009 -0400
+
+    In vmx_combine_out_ca() multiply with the alpha of the negated vdest.
+
+commit de180baba3a3e7eedeb09ff7d5f4d3eff3ffc6f4
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 11:16:31 2009 -0400
+
+    Fix vmx_combine_out_ca()
+    
+    It should multiply with just the destination alpha channel, not all
+    four channels.
+
+commit 5191421d1f143cca76afa1f4fbffa68f89a5d393
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 11:07:16 2009 -0400
+
+    Do the full four-component IN computation in vmx_combine_in_ca().
+
+commit 27fb8378fdae930475cf4528c539a78bfbd751c5
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 10:54:16 2009 -0400
+
+    Fix bug in vmx_combine_xor_ca()
+    
+    The destination needs to be inverted before the alpha channel is
+    extracted; otherwise, the RGB channels of da will be 0xff.
+
+commit c750667d7ac542dfa922a7970961b7095b44b8d3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 01:07:01 2009 -0400
+
+    Make pix_multiply bit-exact
+
+commit 6243a0a015043f39531b98b9e8c4167f8bd47d82
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Aug 6 23:50:32 2009 -0400
+
+    Change the SSE2 versions of pix_add_multiply() to produce bit-exact results.
+
+commit 404f4a6f3e71de5e411cb3bb1107d8ffb7c52e62
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Aug 6 23:52:11 2009 -0400
+
+    Fix a couple of alpha==0 vs src==0 issues in pixman-sse2.c
+
+commit d9f80370a4d2ab54688e75256b3ea4267d8cc602
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Aug 6 23:05:36 2009 -0400
+
+    Rename mmx_composite_add_8888_8_8() to mmx_composite_add_n_8_8().
+
+commit 04619c3636697684fdd9ada9842845f6c8dd3914
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Aug 6 22:46:50 2009 -0400
+
+    Fix a couple more alpha==0 vs src==0 bugs in pixman-mmx.c
+
+commit a075a870fd7e1fa70ae176d5089c695011667388
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Aug 6 22:42:25 2009 -0400
+
+    Make pix_add_mul() in pixman-mmx.c produce exact results.
+    
+    Previously this routine would compute (x * a + y * b) / 255. Now it
+    computes (x * a) / 255 + (y * b) / 255, so that the results are
+    bitwise equivalent to the non-mmx versions.
+
+commit f7463ffafb8876c1f47ed9c527df33d45255e16c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Aug 6 20:29:44 2009 -0400
+
+    Rewrite the two-component arithmetic macros.
+    
+    Previously they were not bit-for-bit equivalent to the one-component
+    versions. The new code is also simpler and easier to read because it
+    factors out some common sub-macros.
+    
+    The x * a + y * b macro now only uses four multiplications - the
+    previous version used eight.
+
+commit 04ae08992f6381a8ffb50d8cba37753fdb58e3bf
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Aug 6 20:41:04 2009 -0400
+
+    Fix a bunch of srca == 0 checks that should be src == 0 in pixman-mmx.c
+
+commit 8bb58a3ce83d6b9c1f6796ce8e62450bdaa52cf0
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Aug 5 21:24:50 2009 -0400
+
+    Don't run fast paths if the format requires wide compositing.
+    
+    This could happen because the wide formats would still be considered
+    solid if the image was 1x1 and repeating.
+
+commit d937b622389797a8c605b2cc50c24ca759dc57d2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Aug 5 21:16:14 2009 -0400
+
+    Fix bug in combine_mask_alpha_ca()
+    
+    If the mask was 0xffffffff, the source would end up being shifted
+    twice by A_SHIFT.
+
+commit 0d576b965c34a6d89b00f7b93dba6a7b8737c731
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Aug 5 20:40:36 2009 -0400
+
+    Fix another case of changing the solid source.
+    
+    This time in fast_path_composite_n_8888_8888().
+
+commit 8b82cbb69197f9c367069a77ba992f3163d40230
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Aug 5 20:31:41 2009 -0400
+
+    Fix incorrect optimization in combine_over_ca().
+    
+    Previously the code assumed that an alpha of 0 meant that no change
+    would take place. This is incorrect because an alpha of 0 can happen
+    as the result of the source having alpha=0, but rgb != 0.
+
+commit ec8b36f01030fd2fa67595f2aef4ca568b060899
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Aug 5 18:18:37 2009 -0400
+
+    Don't change the constant source in fast_composite_over_n_8888_0565.
+
+commit de8fff746bfa80278f85859bef2dc0ab166f7a69
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Aug 5 16:17:52 2009 -0400
+
+    Fix bugs in combine_over_reverse_ca().
+    
+    The computation cannot be optimized away when alpha is 0 because that
+    can happen when the source has alpha zero and rgb non-zero.
+
+commit 7b1df41b6110424b8dca9fa655dbc8dd95a76882
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jul 31 17:27:38 2009 -0400
+
+    Add a dirty bit to the image struct, and validate before using the image.
+    
+    This cuts down the number of property_changed calls significantly.
+
+commit 942c4ac28209381668208a39ccc9aec4f11bf63f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jul 31 10:39:41 2009 -0400
+
+    Add sse2 version of add_n_8888_8888()
+
+commit 23d38201165876c031d314f73e09a75afcac4f00
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jul 31 10:26:10 2009 -0400
+
+    Add a fast path for the add_n_8888_8888() operation.
+    
+    It shows up on gnome-terminal traces.
+
+commit c606a05213d1fe5d73b39454407414a2a245da39
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jul 31 07:29:31 2009 -0400
+
+    Move bounds checks for REPEAT_NONE to get_pixel()
+    
+    On a P4, this is a large speedup for the swfdec-fill-rate-2xaa trace:
+    
+    After:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image        swfdec-fill-rate-2xaa   33.061   33.061   0.00%    1/1
+    
+    Before:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image        swfdec-fill-rate-2xaa   40.342   40.342   0.00%    1/1
+    
+    Pixman 0.14.0 produces this:
+    
+    [ # ]  backend                         test   min(s) median(s) stddev. count
+    [  0]    image        swfdec-fill-rate-2xaa   36.896   36.896   0.00%    1/1
+
+commit 1bec3e8395a307812b25fb195823ac7cf2915340
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jul 30 10:51:38 2009 -0400
+
+    Remove leftover 0xffffffff in repeat()
+
+commit 1b98166b016af5fa374ad534d53b772c7fd2c4a5
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jul 30 10:45:18 2009 -0400
+
+    Remove unused function
+
+commit 06836d35d26941e826e99fe35e06da50756da641
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jul 30 10:03:44 2009 -0400
+
+    Misc formatting
+
+commit 7c8959ea3b2ff3d3abf995b3feccc677e15b4e27
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jul 30 09:58:12 2009 -0400
+
+    Change all the fetch_pixels() functions to only fetch one pixel.
+
+commit 31096446b6866de0a85ca6eb4fb68a45b21c4b49
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 28 09:43:12 2009 -0400
+
+    Add fetch_pixel_raw_32 and fetch_pixel_32 virtual functions.
+    
+    By default both are intialized to bits_image_fetch_pixel_raw(), but if
+    there is an alpha map, then fetch_pixel_32() is set to
+    bits_image_fetch_pixel_alpha().
+
+commit a233b332cd9408d35e57a400874cca6188347cc2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 28 09:12:51 2009 -0400
+
+    Various renamings and clean-ups
+
+commit 073399b09f073c44dd10b027788c09eddfcdf2e0
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 28 08:58:41 2009 -0400
+
+    Change bits_image_fetch_alpha_pixels() to fetch just one pixel.
+
+commit 6d1dfc3945917b507d40f1f3c1b1cf07858d18dd
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 28 08:44:40 2009 -0400
+
+    Change bits_image_fetch_pixels_convolution() to fetch just one pixel.
+
+commit b3f849f74f848c407afda1be15b966e1d6eda745
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 28 08:33:28 2009 -0400
+
+    Change bits_image_fetch_bilinear_pixels() to fetch one pixel at a time.
+
+commit a37383a2c646ee10ebe36d03df6bd1c0f8a75052
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 28 08:03:44 2009 -0400
+
+    Make the repeat routine work on only one coordinate at a time.
+
+commit a4f3fd3b2592b1b4791075187016ad444c2d60d4
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 28 07:55:27 2009 -0400
+
+    Make bits_image_fetch_nearest() return one pixel.
+    
+    Previously it would work on a buffer of coordinates.
+
+commit f382865ebe5e1e8d4b5299b908dab9b719fcb8ec
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 28 07:42:34 2009 -0400
+
+    Change bits_image_fetch_transformed() to work one pixel at a time.
+    
+    Previously, it would generate a buffer of coordinates, then pass that
+    off to a pixel fetcher, but this caused a large performance regression
+    with the swfdec-fill-rate-2xfsaa cairo trace.
+    
+    This is the first step towards fixing that.
+
+commit 4ef2807c3a6697731ada43ddad2fa915ed7cfe11
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Aug 7 00:11:20 2009 -0400
+
+    Only define PIXMAN_TIMERS if timers are actually enabled [bug 23169]
+
+commit 9dec2e352b24bdccaac4f570b8cf12e61a9194ee
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 28 09:58:52 2009 -0400
+
+    Various updates to the CODING_STYLE document
+
+commit 2abd56e9e3d012fcb0b7c6d459ed4831464c0f2f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 28 04:05:26 2009 -0400
+
+    Add a CODING_STYLE document based on the one from cairo.
+
+commit fdd01bcbd473f7a46c66ce8538657e32400974ed
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jul 22 04:51:08 2009 -0400
+
+    Remove a couple of unused variables
+
+commit 845910c200db3f279229da67fbd330d903776777
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jul 22 04:32:07 2009 -0400
+
+    Rename source_pict_class_t to source_image_class_t
+
+commit edd476d5be8f248ea21d9d80cf1986d0ccdecc3b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jul 22 04:28:08 2009 -0400
+
+    Replace a bunch of 'pict's with 'image'
+
+commit 11d888a2837b3fe309348126b4f7c56df559df4e
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Fri Jul 24 09:36:08 2009 +0100
+
+    Explain how we can simplify the radial gradient computation
+    
+    Soeren rightfully complained that I had removed all the comments from
+    André's patch, most importantly that explain why the transformation is
+    valid. So add a few details to show that B varies linearly across the
+    scanline and how we can therefore reduce the per-pixel cost of evaluating
+    B.
+
+commit 20d2df03059d6a5941464d80e81e8116ebf4dbfe
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Thu Jul 23 19:08:40 2009 +0100
+
+    Fix inversion of radial gradients when r2 > r1
+    
+    Fixes: Bug 22908 -- Invalid output of radial gradient
+    http://bugs.freedesktop.org/show_bug.cgi?id=22908
+    
+    We also include a modified patch by André Tupinambá <andrelrt@gmail.com>,
+    to pull constant expressions out of the inner radial gradient walker.
+
+commit 84b7df09394ac7237fb42fe25c0fbde77e065a16
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Thu Jul 23 09:54:49 2009 +0200
+
+    Don't warn for empty rectangles, only degenerate ones
+
+commit 1435c8aa3db3b6bde26216e260cc94baba225664
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Tue Jul 21 15:00:52 2009 +0200
+
+    Log errors for invalid rectangles passed to region code
+
+commit 1796e6bf17a5d20039e098c4e352cd2765ed444e
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Tue Jul 21 14:57:59 2009 +0200
+
+    Simplify code that logs errors
+
+commit 85d56f3f7cb9c90c5af52b28fb6f7c1b14f09f07
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Tue Jul 21 14:50:30 2009 +0200
+
+    Make the text when reporting a broken region more useful
+
+commit a77d4ffeb661d4d75109fc368bded26843662259
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 21 07:24:40 2009 -0400
+
+    Post-release version bump
+
+commit f3ac1368775542e09f3741d2ad7b72af20bd9663
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 21 07:20:57 2009 -0400
+
+    Pre-release version bump
+
+commit 7c56911e3b5b97b26dceff9b68d9fed32693d57b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 21 07:01:10 2009 -0400
+
+    Don't assert when malformed regions are detected.
+    
+    Instead print a message to stderr so that it will end up in the X log
+    file.
+
+commit f9660ce29ed072c6cbaec711c5d18b9f0ba113ae
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 21 04:23:56 2009 -0400
+
+    Fix another search and replace issue
+
+commit b3196b63274134a594fc091ec2f8be3b44734411
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 21 04:18:35 2009 -0400
+
+    Fix search-and-replace issue pointed out by Koen Kooi.
+
+commit 0ff5733c16804d5b10782556eeeade7061924846
+Author: George Yohng <georgefd@oss3d.com>
+Date:   Tue Jul 21 03:43:42 2009 -0400
+
+    Add implementation of MMX __m64 functions for MSVC x64.
+    
+    Microsoft C++ does not define __m64 and all related MMX functions in
+    x64.  However, it succeeds in generating object files for SSE2 code
+    inside pixman.
+    
+    The real problem happens during linking, when it cannot find MMX functions
+    (which are not defined as intrinsics for AMD64 platform).
+    
+    I have implemented those missing functions using general programming.
+    
+    MMX __m64 is used relatively scarcely within SSE2 implementation, and the
+    performance impact probably is negligible.
+    
+    Bug 22390.
+
+commit 0b95afd259bb839a026955e7fda15b44fa22a805
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Mon Jul 20 14:07:18 2009 +0100
+
+    Fix read of BITS members from a solid image.
+    
+    During the fast-path query, the read_func and write_func from the bits
+    structure are queried for the solid image.
+    
+    ==32723== Conditional jump or move depends on uninitialised value(s)
+    ==32723==    at 0x412AF20: _pixman_run_fast_path (pixman-utils.c:681)
+    ==32723==    by 0x4136319: sse2_composite (pixman-sse2.c:5554)
+    ==32723==    by 0x4100CD2: _pixman_implementation_composite
+    (pixman-implementation.c:227)
+    ==32723==    by 0x412396E: pixman_image_composite (pixman.c:140)
+    ==32723==    by 0x4123D64: pixman_image_fill_rectangles (pixman.c:322)
+    ==32723==    by 0x40482B7: _cairo_image_surface_fill_rectangles
+    (cairo-image-surface.c:1180)
+    ==32723==    by 0x4063BE7: _cairo_surface_fill_rectangles
+    (cairo-surface.c:1883)
+    ==32723==    by 0x4063E38: _cairo_surface_fill_region
+    (cairo-surface.c:1840)
+    ==32723==    by 0x4067FDC: _clip_and_composite_trapezoids
+    (cairo-surface-fallback.c:625)
+    ==32723==    by 0x40689C5: _cairo_surface_fallback_paint
+    (cairo-surface-fallback.c:835)
+    ==32723==    by 0x4065731: _cairo_surface_paint (cairo-surface.c:1923)
+    ==32723==    by 0x4044098: _cairo_gstate_paint (cairo-gstate.c:900)
+    ==32723==  Uninitialised value was created by a heap allocation
+    ==32723==    at 0x402732D: malloc (vg_replace_malloc.c:180)
+    ==32723==    by 0x410099F: _pixman_image_allocate (pixman-image.c:100)
+    ==32723==    by 0x41265B8: pixman_image_create_solid_fill
+    (pixman-solid-fill.c:75)
+    ==32723==    by 0x4123CE1: pixman_image_fill_rectangles (pixman.c:314)
+    ==32723==    by 0x40482B7: _cairo_image_surface_fill_rectangles
+    (cairo-image-surface.c:1180)
+    ==32723==    by 0x4063BE7: _cairo_surface_fill_rectangles
+    (cairo-surface.c:1883)
+    ==32723==    by 0x4063E38: _cairo_surface_fill_region
+    (cairo-surface.c:1840)
+    ==32723==    by 0x4067FDC: _clip_and_composite_trapezoids
+    (cairo-surface-fallback.c:625)
+    ==32723==    by 0x40689C5: _cairo_surface_fallback_paint
+    (cairo-surface-fallback.c:835)
+    ==32723==    by 0x4065731: _cairo_surface_paint (cairo-surface.c:1923)
+    ==32723==    by 0x4044098: _cairo_gstate_paint (cairo-gstate.c:900)
+    ==32723==    by 0x403C10B: cairo_paint (cairo.c:2052)
+
+commit c7b84f8b043018368fade4ad13730cfcaaf5c8cc
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 21 00:17:15 2009 -0400
+
+    Only apply the workaround if the clip region extends beyond the drawable.
+    
+    This works because the X server always attempts to set a clip region
+    within the bounds of the drawable, and it only fails at it when it is
+    computing the wrong translation and therefore needs the workaround.
+
+commit 6bd17f1e9861693262fa88bfeff5d3279b3f6e7d
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 20 23:46:06 2009 -0400
+
+    Rework the workaround for bogus X server images.
+    
+    Bug 22844 demonstrates that it is not sufficient to play tricks with
+    the clip regions to work around the bogus images from the X
+    server. The problem there is that if the operation hits the general
+    path and the destination has a different format than a8r8g8b8, the
+    destination pixels will be fetched into a temporary array. But because
+    those pixels would be outside the clip region, they would be fetched
+    as black. The previous workaround was relying on fast paths fetching
+    those pixels without checking the clip region.
+    
+    In the new scheme we work around the problem at the
+    pixman_image_composite() level. If an image is determined to need a
+    work around, we translate both the bits pointer, the coordinates, and
+    the clip region, thus effectively undoing the X server's broken
+    computation.
+
+commit dfdb8509e2160a0db7d72e775dd348090e6fb968
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 20 22:45:47 2009 -0400
+
+    Add test case for bug 22844.
+
+commit 96340123eba05bff85433bb2db3a7ad80c8e57ba
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 21 01:24:43 2009 -0400
+
+    Fix typo in sse2_combine_over_pixbuf_0565()
+
+commit 767542cfb955ba22dad1259eff8a2fe16e7b8ba4
+Author: Adrian Bunk <adrian.bunk@movial.com>
+Date:   Mon Jul 20 20:16:32 2009 -0400
+
+    Fix NEON build for older ARM CPUs
+    
+    The pld instruction used in the NEON assembler code is only available
+    for ARMv5e and >= ARMv6.
+    
+    Set -mcpu=cortex-a8 when compiling the source file (similar to what is
+    already done for the SIMD build).
+
+commit 184cd80aa46dd9d8bd023d3b70a345330b72d7e7
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 20 19:56:46 2009 -0400
+
+    Some formatting changes to pixman-arm-neon.c
+
+commit 5d2c527a2234d34b6269c561b08ebcaabf0b3ea3
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Jul 17 13:03:21 2009 +0300
+
+    ARM: Fixes for the inline assembly constraints in pixman_fill_neon
+    
+    Some of the variables in the inline assembly arguments list are
+    actually modified by the assembly code, they are now marked
+    appropriately.
+
+commit c27a60f94cea7deb0afb21e734c892d475bfa06d
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Jul 17 12:54:44 2009 +0300
+
+    ARM: Workaround cs2007q3 compiler bug for NEON registers clobber list
+    
+    128-bit registers "qX" are incorrectly handled in inline assembly
+    clobber list for codesourcery cs2007q3 gcc toolchain. Only the
+    first 64-bit half is saved and restored by gcc. Changing clobber
+    list to use only 64-bit register aliases can solve this problem.
+    For example, 128-bit register q0 is mapped to two 64-bit
+    registers d0 and d1, q1 is mapped to d2 and d3, etc.
+
+commit cb4a5fd18f20f49ed2721f04a886c2ffd1645d09
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Jul 17 00:11:14 2009 +0300
+
+    ARM: Commented out the rest of buggy NEON optimizations
+    
+    These functions have problems with invalid memory accesses and often
+    crash X server
+
+commit 1aee6813ac45e6b206522623f58f1110a54186b1
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Jul 17 00:08:42 2009 +0300
+
+    ARM: Use Ian's variant of 'neon_composite_over_n_8_0565' function again
+    
+    This patch effectively reverts the changes done by commit
+    8eeeca993252edc39da9c5c57545b81215fafc81 which was causing
+    severe stability issues, and restores old variant of
+    'neon_composite_over_n_8_0565' function, which used to work
+    correctly.
+
+commit 2356ba38fd0c0002be4484adb8ca51de32b2ff81
+Author: Miha Vrhovnik <miha.vrhovnik@cordia.si>
+Date:   Mon Jul 20 19:30:59 2009 -0400
+
+    Update Makefile.win32 to make it work again.
+
+commit d4b22bd9935662912641abe5fd010d906c839405
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Tue Jul 21 00:49:40 2009 +0200
+
+    Add a lot more sanity checks to region code
+    
+    - Introduce a GOOD_RECT() macro that checks that a pixman_box_t is not
+      empty or degenerate an use it.
+    - Use GOOD_RECT() instead of magic if statements for funtions that take
+      x, y, width, height arguments
+    - Use GOOD_RECT() in _reset(). The checks in the previous code seemed to
+      allow an empty box, but then created a broken region from it.
+    - Add GOOD(region) check at the end of _translate()
+
+commit a3ad8bb5412f47776285bfc954d2275f075c8796
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Mon Jul 20 23:38:36 2009 +0200
+
+    Handle degenerate case in pixman_init_extents()
+    
+    Create an empty region instead
+
+commit 854ec2ea4d0fc940e91c4ec1c419fb4e5635dc95
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 19 12:29:42 2009 -0400
+
+    Replace // comments with /* */ comments in various places
+
+commit 54cad29dc55fc0a670bf87abacd5f45e1289db54
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Jul 18 17:59:43 2009 -0400
+
+    Align the stack in _pixman_implementation_create_sse2()
+    
+    When compiled without optimization, GCC will place various temporaries
+    on the stack. Since Firefox sometimes causes the stack to be aligned
+    to four bytes, this causes movdqa to generate faults.
+
+commit 6aa26296f5831bddc9b3f3e3e2ea018fc0cefb75
+Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi>
+Date:   Sun Jul 19 18:20:53 2009 +0300
+
+    Check whether the linker understands the hwcap file before using it.
+    
+    If we're trying to use the GNU linker on Solaris we shouldn't use
+    our solaris-hwcap.mapfile since it doesn't grok the mapfile format.
+
+commit 934f4f4604ccf06db5d5aec07e58f0a0fbe7d283
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jul 17 22:40:41 2009 -0400
+
+    Move read and write functions to the bits_image_t struct.
+    
+    Those fields were duplicated between image_common and bits_image_t
+    before.
+
+commit 737d00063e8b8aaeaab9aecd0fbe731e8ab3f6b3
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Fri Jul 17 16:01:59 2009 +0200
+
+    Handle degenerate case in pixman_init_rect()
+    
+    Create an empty region in that case.
+
+commit e3a6df08a4bedfe82a8d3a7c1143e4db00a18d27
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jul 17 06:43:35 2009 -0400
+
+    Add back check for need_workaround that got removed during reindenting
+
+commit ecc54a7f02dbb6f57043e51173584f96c42fd2cc
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jul 16 10:38:22 2009 -0400
+
+    Fix combine_src_ca() to fill out all of the destination line.
+
+commit 4df925bb28196974617804d680380522c048dedd
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jul 16 09:34:22 2009 -0400
+
+    Change composite test to use a rainbow gradient as source.
+    
+    Also make the destination a yellow patch. This makes the output a bit
+    more comparable to the image in the PDF specification.
+
+commit 4f369faffa7670e0e57c83c298359992223a998b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jul 16 06:23:25 2009 -0400
+
+    Various minor formatting changes
+
+commit 3d3baa3c5e76a4f851614a7794d92d15a56ac04e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jul 16 06:06:17 2009 -0400
+
+    Change name of macro from RGB16_TO_ENTRY to RGB15_TO_ENTRY
+
+commit 12e829a8de1e45708b5dfeaa8137af6eeaf0f662
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Thu Jul 16 15:33:43 2009 +0200
+
+    Clarify color burn code - no semantical changes
+    
+    - Improve documentation to be equal to Proposed ISO32000 Extension
+    - Simplify code in default case
+
+commit acf5738a82afb51a2284f5e61d9fac8dae7a47d5
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Thu Jul 16 15:28:05 2009 +0200
+
+    Clarify color dodge code - no semantical changes
+    
+    - Improve documentation to be equal to Proposed ISO32000 Extension
+    - Simplify code in default case
+
+commit 3dafe926b2405ba3404a41bb5b2842413308c8fa
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Thu Jul 16 15:02:51 2009 +0200
+
+    Fix terms in comments describing the blend modes
+
+commit 90ac94b9cb3defa8bf174af8c7c9fc6c42e8762b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 14 18:49:43 2009 -0400
+
+    Add -fno-strict-aliasing
+
+commit 268561a3c674c5a4da945124b7b8f075b792a170
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 13 22:22:20 2009 -0400
+
+    Post-release version bump
+
+commit 466cf2b4452ec9bf4fa17cbf2186f5c472b66c26
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 13 22:00:52 2009 -0400
+
+    Pre-release version bump
+
+commit 83f6e2eacff826ef9dbdefb95fcb76fa1247ac4e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 13 19:58:04 2009 -0400
+
+    Reindent and rename identifiers in scaling-test.c
+
+commit 9a6ad34810421a30250ef331fb75b2a48ce9e564
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 13 02:37:19 2009 -0400
+
+    Reformat pixman.h
+
+commit 22f322fe246155d40465d4e14d65051a204f27f6
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 13 01:35:15 2009 -0400
+
+    Reindent and reformat pixman-private.h
+
+commit b4d196009881a4121b49996bdc87f7770bfa5c1b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 13 01:17:53 2009 -0400
+
+    Reindent and reformat pixman-combine.h.template
+
+commit f54c776e75a44a095304dd7374384a2a0c96d479
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 13 01:13:47 2009 -0400
+
+    Reindent and reformat pixman-combine.c.template
+
+commit d57b55bb2662837feafb4f9f88d10549164ee142
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 22:05:32 2009 -0400
+
+    Reindent and reformat pixman-vmx.c
+
+commit 01b604c606cd0842c8f4eccc41511a472e4450e9
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 21:57:08 2009 -0400
+
+    Reformat and reindent pixman-utils.c
+
+commit 1d52ecbbe04c810d3f30e7915663b2cd21befcba
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 21:40:41 2009 -0400
+
+    Reformat and reindent pixman-trap.c
+
+commit c1178e49417bbea7f91b23f71c9ba957500da0ff
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 21:37:16 2009 -0400
+
+    Reformat pixman-timer.c
+
+commit 74774bad001504b4b2283689b6b55e21fa943dd8
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 21:36:32 2009 -0400
+
+    Reformat and reindent pixman-sse2.c
+
+commit 7dc3593d8172e292b39a7d6cc7772fcf9a04bb1f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 20:31:26 2009 -0400
+
+    Reformat and reindent pixman-solid-fill.c
+
+commit 3db9f5ff6e32c353cff640d3504eb54bd2a69ed3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 20:30:44 2009 -0400
+
+    Reformat and reindent pixman-region.c
+
+commit 317df68e94498b6a287eb736a6e5991e8b7d2d78
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 19:59:17 2009 -0400
+
+    Reindent and reformat pixman-radial-gradient.c
+
+commit 8820c81b50299f13791594fe6ddd01d536745231
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 19:37:45 2009 -0400
+
+    Reindent and reformat pixman-mmx.c
+
+commit c68283360d2e5917f15bddc0a14aa7a1c1b3852e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 19:24:31 2009 -0400
+
+    Reindent and reformat pixman-matrix.c
+
+commit 19397bc732b30a861416220974edca6404d2890b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 19:19:13 2009 -0400
+
+    Reindent and reformat pixman-linear-gradient.c
+
+commit e8e08b35e7a8d221378e3a411efdfad74b1614e3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 19:15:17 2009 -0400
+
+    Reindent and reformat pixman-implementation.c
+
+commit 2c74165179b07f31b82402d74dc9fbaf8bf52191
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 19:13:17 2009 -0400
+
+    Reindent and reformat pixman-image.c
+
+commit 5aadc28e19328054b15c7ee88996c407a9a7d9b3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 19:10:53 2009 -0400
+
+    Reindent and reformat pixman-gradient-walker.c
+
+commit ac043ac2da643d872f519971a316f8bc6bdca0f8
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 19:09:24 2009 -0400
+
+    Reindent and reformat pixman-general.c
+
+commit 7b3f5fdc571e8d6b4d64f950f2578d47b1056c86
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 19:06:30 2009 -0400
+
+    Reindent and reformat pixman-fastpath.c
+
+commit c332e229bb274447b8b46c8f8ba7bce8cfaa21b2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 19:02:29 2009 -0400
+
+    Reindent and reformat pixman-edge.c
+
+commit 4ba9a44e8f4098fc61bfb62650c521b2e37cf9cb
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 18:59:10 2009 -0400
+
+    Reindent and reformat pixman-cpu.c
+
+commit e01fc6bba7f90c80e86b206f28efa3f1f94a083b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 18:55:45 2009 -0400
+
+    Reindent and reformat pixman-conical-gradient.c
+
+commit 2871add52ece8bc4a02c0f4fae98912a0f4a830c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 18:54:31 2009 -0400
+
+    Reindent and reformat pixman.c
+
+commit 0e6e08d380a16f8804706270e74f66960a681167
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 18:52:33 2009 -0400
+
+    Reindent and reformat pixman-bits-image.c
+
+commit 89eda86549e8cf938556b1a238960f2fbb7b911c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 18:45:56 2009 -0400
+
+    Reindent and reformat pixman-arm-simd.c
+
+commit 9a26a60a233955aadab65fde5bf31fc0199663ea
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 18:42:31 2009 -0400
+
+    Reindent and reformat pixman-arm-neon.c
+
+commit 0af8ef742c6e1aa150b591bc7cdacb8d2293f7af
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 18:04:21 2009 -0400
+
+    Reindent and reformat pixman-access.c.
+
+commit be3a1b04ae9ef52f60fa1c6423d743b628aa57ca
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Mon Jul 13 18:02:09 2009 -0400
+
+    Fix burn and dodge operators to match acroread output
+
+commit 7dc2c48bcab7404ace9b41959e2233d0025801b3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 13 05:58:43 2009 -0400
+
+    Return immediately if the operator is CONJOINT_DST or DISJOINT_DST
+    
+    These are noops just like plain DST is.
+
+commit bb383def00b3d30d991295274b58a841a7162620
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 13 05:39:40 2009 -0400
+
+    Fix bits_image_fetch_{un}transformed() for 64 bit buffers.
+    
+    The buffer pointer has to advance twice as far as in the 32 bit case.
+
+commit c88b75d24c6fc618d638373dce5e5f0281de1f40
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 13 04:43:37 2009 -0400
+
+    Make sure we get all 64 bits in bits_image_fetch_solid_64()
+    
+    Previously we would only store the first 32 bits.
+
+commit f73ecb3f0ff516e4411de9a2738b8851e679a163
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 13 04:04:59 2009 -0400
+
+    Fix typo in bits_image_fetch_solid_64().
+    
+    Found by blitters-test.
+
+commit 03c6b294a4517f5dfbc87504fceb3a88efef6a17
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 12 03:55:23 2009 -0400
+
+    Only destroy the regions when pixman_compute_composite_region() returns TRUE
+
+commit 2af5f64e938f0d3b81f3f014441cdff650fe5457
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Jul 11 03:25:25 2009 -0400
+
+    Move workaround logic to pixman-bits-image.c.
+    
+    Instead of computing whether a workaround is needed on every call to
+    _pixman_run_fast_path(), just cache this information in the image.
+    
+    Also, when workarounds are needed, clip against the source geometry to
+    prevent out of bound reads.
+
+commit 5088ca8d97c9c918746c3e261a31b6edab6c964b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jul 10 18:18:00 2009 -0400
+
+    Only apply the workaround to source images when out_of_bounds_workaround is set.
+    
+    Pointed out by Siarhei Siamashka.
+
+commit 51418786e4cba2e8fbb44052fbed2f107244b733
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Jul 10 13:50:15 2009 -0400
+
+    Return nonzero code from scaling-test program in case of failure.
+    
+    This can potentially help in tests automation.
+
+commit 71862fe84e5eeb0b178ed3a7dec8430d506b8515
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Fri Jul 10 11:49:53 2009 +0300
+
+    Fix scaling-test to work on big endian systems
+
+commit 0f8c5d2fd447d2d9a0350c33715f140ab0dac452
+Author: Michel Dänzer <daenzer@vmware.com>
+Date:   Fri Jul 10 11:28:11 2009 +0200
+
+    Convert some leftover instances of Alpha() in pixman-vmx.c.
+    
+    They were probably missed due to the space before the parens.
+
+commit 0fce356762864572ae126733f657600fbb9116ce
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jul 9 01:35:11 2009 -0400
+
+    Add workarounds for X servers doing out-of-bounds accesses.
+    
+    Old X servers rely on out-of-bounds accesses when they are asked
+    to composite with a window as the source. They create a pixman image
+    pointing to some bogus position in memory, but then they set a clip
+    region to the position where the actual bits are.
+    
+    Due to a bug in old versions of pixman, where it would not clip
+    against the image bounds when a clip region was set, this would
+    actually work.
+    
+    The workaround added by this commit is to try and detect whether a
+    source drawable is actually a window without a client clip set. Such a
+    window will generally have a clip region that corresponds exactly to
+    the hierarchy clip in the server, whereas pixmaps will have a clip
+    region that is an exact match to the drawable.
+    
+    When we detect such a window, we allow a fast path to run that would
+    normally be rejected due to the sources not completely subsuming the
+    composite region.
+    
+    Fixed X servers should call the new function
+    pixman_disable_out_of_bounds_workaround() to disable the workaround.
+    
+    This was reported in bug 22484.
+
+commit 61254a3c09497214a9c7ca89e275286533a3be2e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jul 9 23:17:56 2009 -0400
+
+    Add a BUILT_SOURCES with pixman-combine{32,64}.{c.h}
+    
+    Bug 22681.
+
+commit 34ec50a4761cab50c6216b08ef5cfb36bf94209e
+Author: Adrian Bunk <adrian.bunk@movial.com>
+Date:   Fri Jul 10 00:11:15 2009 +0300
+
+    Fix the NEON build after "Convert CamelCase names to underscore_names."
+    
+        This patch fixes the following build error caused
+        by commit a98b71eff4041df58c9dcc2b1e25cefa38f364ff
+        (Convert CamelCase names to underscore_names.):
+    
+        <--   snip  -->
+    
+        ...
+        pixman-arm-neon.c: In function 'neon_composite_over_n_8_0565':
+        pixman-arm-neon.c:1784: error: 'x_dst' undeclared (first use in this function)
+        pixman-arm-neon.c:1784: error: (Each undeclared identifier is reported only once
+        pixman-arm-neon.c:1784: error: for each function it appears in.)
+        pixman-arm-neon.c:1785: error: 'p_dst' undeclared (first use in this function)
+        pixman-arm-neon.c: In function 'neon_composite_over_n_0565':
+        pixman-arm-neon.c:1937: error: 'x_dst' undeclared (first use in this function)
+        pixman-arm-neon.c:1938: error: 'p_dst' undeclared (first use in this function)
+        pixman-arm-neon.c: In function 'neon_composite_over_8888_0565':
+        pixman-arm-neon.c:2074: error: 'x_dst' undeclared (first use in this function)
+        pixman-arm-neon.c:2075: error: 'p_dst' undeclared (first use in this function)
+        make[3]: *** [libpixman_arm_neon_la-pixman-arm-neon.lo] Error 1
+        ...
+    
+        <--  snip  -->
+
+commit 6b34482870fd53a9285f795f47656ac73fd706b0
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jul 9 04:04:00 2009 -0400
+
+    Update the CRC value in scaling-test.c.
+    
+    The changes in pixman behavior justifying this are:
+    
+    - New clipping rules
+    
+    - Bug fixes in region code. In particular, when
+      pixman_region_init_rects() is called on these two boxes:
+    
+    	{ 2, 6, 7, 6 }
+    	{ 4, 1, 6, 7 }
+    
+      it now ignores the first one, which is empty and produces
+    
+    	{ 4, 1, 6, 7 }
+    
+      Previously, it would produce:
+    
+    	{ 2, 1, 7, 7 }
+    
+      for some reason.
+
+commit 4c60ed5ec662e2d7088a7800dd7c71d3926c42a4
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jul 9 04:00:19 2009 -0400
+
+    Fix bug in pixman-region.c where empty regions would not be properly initialized.
+    
+    Also add a couple more tests to region-test.c.
+
+commit bcf01c21d704717264011182e71cfaaf6922a437
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jul 9 01:47:19 2009 -0400
+
+    Fix a couple of problems with the tests when HAVE_GTK is not defined.
+    
+    - Make sure the non-gtk+ test programs are added to noinst_PROGRAMS
+      when HAVE_GTK is not set.
+    
+    - Don't include glib.h in oob-test.c
+
+commit 0db0430d1d410855863e669f0de9e8b5d26db7fd
+Merge: 31a40a1 b3cf3f0
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jul 8 18:59:15 2009 -0400
+
+    Merge branch 'naming'
+
+commit 31a40a172591ab373add9dd41a52881bea9dc6f2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jul 8 18:41:41 2009 -0400
+
+    Eliminate boxes with x1 > x2 or y1 > y2 in pixman_region_init_rects().
+
+commit 706e6594310a490956d21e939c23de2b5dbe1561
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jul 8 18:39:43 2009 -0400
+
+    Add a box with y2 < y1 in region-test.c
+
+commit b3cf3f0c2be462cd61e63e07655d1b45e55f4a7b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 6 20:33:05 2009 -0400
+
+    Fix up some overeager search-and-replace renamings
+
+commit c2e331693d858c01b69135342c139546780b7021
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 6 12:48:59 2009 -0400
+
+    Rename num_rects back to numRects.
+    
+    The name numRects is public API and can't be changed.
+
+commit 8261b4d57cfdf77d7fdd4e4c0fc805ba48f7e0a0
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 5 02:12:21 2009 -0400
+
+    Rename combine_*_c to combine_*_ca
+    
+    s/combine_(.+)_c([^a-z0-9A-Z])/combine_$1_ca$2/g;
+
+commit 3c03990ba214bff000d3494587353b94f9432453
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 5 01:42:28 2009 -0400
+
+    Various sse2 renamings
+
+commit 9d0be1d4c81153ef2407518f605bc55380485955
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 5 01:38:10 2009 -0400
+
+    s/sse2combine/sse2_combine/g
+
+commit a98b71eff4041df58c9dcc2b1e25cefa38f364ff
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 5 01:35:14 2009 -0400
+
+    Convert CamelCase names to underscore_names.
+    
+    s/sizeRI/size_ri/g;
+    s/numRI/num_ri/g;
+    s/RepeatNone/REPEAT_NONE/g;
+    s/fbOver/over/g;
+    s/fbIn/in/g;
+    s/iSrc/src_image/g;
+    s/iMask/mask_image/g;
+    s/iDst/dest_image/g;
+    s/SaDa/Sa.Da/g;
+    s/FbMaskBits/MASK_BITS/g;
+    s/RenderSamplesX/RENDER_SAMPLES_X/g;
+    s/MMXData/mmx_data_t/g;
+    s/RegionInfo/region_info_t/g;
+    
+    s/([^0x])([a-z])([A-Z])/$1$2_\l$3/g;
+    s/([^0x])([A-Z])([A-Z])([a-z])/$1$2_\l$3$4/g;
+    s/([^0x])([A-Z])([a-z]+)_([a-z])/$1\l$2$3_$4/g;
+    s/([a-z])_([A-Z])/$1_\l$2/g;
+    
+    s/su_sE/SuSE/g;
+    s/X_Free86/XFree86/g;
+    s/X_free86/XFree86/g;
+    
+    s/_ULL/ULL/g;
+    s/_uLL/ULL/g;
+    
+    s/U_nc/UNc/g;
+    s/combine ##/combine_ ##/g;
+    s/## U/## _u/g;
+    s/## C/## _c/g;
+    s/UNc_aDD/UNc_ADD/g;
+    
+    s/BLEND_MODE \((.+)\)/BLEND_MODE (\l$1)/g;
+    s/blend_(.+)/blend_\l$1/g;
+    
+    s/AN_ds/ANDs/g;
+    s/O_rs/ORs/g;
+    s/over565/over_565/g;
+    s/8pix/8_pix/g;
+    s/Over565/over_565/g;
+    s/inU/in_u/g;
+    s/inPart/in_part/g;
+    s/inC/in_c/g;
+    s/inreverse/in_reverse/g;
+    s/get_exception_code/GetExceptionCode/g; # GetExceptionCode is WinCE API
+    s/CP_us/CPUs/g;
+    s/authentic_aMD/AuthenticAMD/g;
+    s/op_sR_cx_mAS_kx_dST/op_src_mask_dest/g;
+    s/no_VERBOSE/noVERBOSE/g;
+    s/mc_cormack/McCormack/g;
+    s/r1band/r1_band/g;
+    s/r2band/r2_band/g;
+    s/as GOOD things/as good things/g;
+    s/brokendata/broken_data/g;
+    s/X_render/XRender/g;
+    s/__open_bSD__/__OpenBSD__/g;
+    s/^Quick/quick/g;
+    s/NextRect/next_rect/g;
+    s/RectIn/rect_in/g;
+    s/pboxout/pbox_out/g;
+    s/F_sorted/FSorted/g;
+    s/usse2/u_sse2/g;
+    s/csse2/c_sse2/g;
+    s/cPixelsse2/c_pixel_sse2/g;
+    s/Mask565/mask_565/g;
+    s/565fix_rB/565_fix_rb/g;
+    s/565fix_g/565_fix_g/g;
+    s/565r/565_r/g;
+    s/565g/565_g/g;
+    s/565b/565_b/g;
+    s/uPixelsse2/u_pixel_sse2/g;
+    s/Mask00ff/mask_00ff/g;
+    s/Mask0080/mask_0080/g;
+    s/Mask0101/mask_0101/g;
+    s/Maskffff/mask_ffff/g;
+    s/Maskff000000/mask_ff000000/g;
+    s/load128Aligned/load_128_aligned/g;
+    s/load128Unaligned/load_128_unaligned/g;
+    s/save128Aligned/save_128_aligned/g;
+    s/save128Unaligned/save_128_unaligned/g;
+    s/fillsse2/fill_sse2/g;
+    s/unpack565/unpack_565/g;
+    s/pack565/pack_565/g;
+    s/bltsse2/blt_sse2/g;
+    s/x565Unpack/x565_unpack/g;
+    s/r1End/r1_end/g;
+    s/r2End/r2_end/g;
+    s/argb8Pixels/argb8_pixels/g;
+
+commit 437ab049872063c78ee934766596dc6859749a3d
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 5 01:34:07 2009 -0400
+
+    Remove reference to 8888_RevNP
+
+commit 55e63bd0f09290cf1165030edbb4e92efb09ee6e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 5 00:55:45 2009 -0400
+
+    Remove reference to 8888RevNP
+
+commit 01994a59ca642f4e5ce126d3ad01e864d3daa0bb
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 5 00:41:53 2009 -0400
+
+    NoFeatures => NO_FEATURES
+
+commit 309d358ea673b5d4c163670c3c449fb855df7775
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 5 00:31:07 2009 -0400
+
+    s/FbScrRight/SCREEN_SHIFT_RIGHT/g
+
+commit 71fe4e3e5c64f177a8756e51eddc190b3a08ea40
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 5 00:26:30 2009 -0400
+
+    CPUFeatures => cpu_features
+
+commit 255ddbe5358b0ed4a7a01ef0ab127833dba94b02
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 5 00:19:18 2009 -0400
+
+    Rename FbGet8 to GET8
+
+commit 446276c36fd336531745fc1427c4af2ccdbe9875
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 5 00:11:57 2009 -0400
+
+    Rename RBmask/Gmask => rb_mask/g_mask in pixman-arm-neon.c
+
+commit 412b4b50f7bd8ac29e4c9b20e613154c1b5e371a
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 5 00:06:59 2009 -0400
+
+    Use ALPHA_8 in pixman-image.c instead of Alpha
+
+commit 887383b0adab89bcc131a9a28c4d60af9e4773d1
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 5 00:05:42 2009 -0400
+
+    Uppercase a few more macros in pixman-combine.c.template
+
+commit 4153361c52f332bce9e9cc32adf1e01064014e15
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 5 00:02:45 2009 -0400
+
+    Rename macros for non-separable blend modes
+    
+    Lum => LUM
+    Sat => SAT
+    Min => CH_MIN
+    Max => CH_MAX
+
+commit 68405c326db4cd087bdb6290ae42953a98b81838
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Jul 4 23:45:01 2009 -0400
+
+    Rename some macros in pixman-combine.c.template
+    
+    s/Combine([AB])([a-zA-Z]+)([^a-zA-Z])/COMBINE_$1_\U$2$3/g;
+    s/CombineA/COMBINE_A/g;
+    s/CombineB/COMBINE_B/g;
+    s/CombineXor/COMBINE_XOR/g;
+    s/CombineClear/COMBINE_CLEAR/g;
+
+commit 835520b28ff1412bd9b00460a107e72c9ea21e35
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Jul 4 23:24:27 2009 -0400
+
+    Rename U{no}mask => U_{no_}mask in pixman-vmx.c
+
+commit f9bdd1a82c7629a360109bdf4519c73ba5a99225
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Jul 4 23:13:55 2009 -0400
+
+    Change name fbComposeGetStart to PIXMAN_IMAGE_GET_LINE.
+
+commit e064aa761831296c8570e0fdfaa0c3585c4a3871
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Jul 4 23:12:18 2009 -0400
+
+    Rename fbCombine* to combine*
+    
+        s/fbCombine/combine/g;
+
+commit f61855e186519a490b5d013d2de67dcc8da7a0ac
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 12:51:28 2009 -0400
+
+    Fix overeager search and replace
+
+commit 1de32ae2ef8044b349f3ec87ae339fdcedeb83ef
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 12:07:56 2009 -0400
+
+    Uppercase some more macro names
+
+commit 47296209dae2e3d33426532a3e896e06373fc088
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 11:40:21 2009 -0400
+
+    Consolidate channel macros in pixman-combine.h
+    
+    There are now RED_8/RED_16 etc. macros instead of the old Red/Green/Blue.
+
+commit 2f3e3d62f7727a652090ea003c98218f3b550818
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 11:17:28 2009 -0400
+
+    Change some macro names to be all uppercase
+
+commit 8339a4abc4edcaee6fafbde1a147ba7fcaa9c108
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 09:29:32 2009 -0400
+
+    Change names of the FbByte* macros to be more descriptive.
+    
+    But also more cryptic unfortunately. For example FbByteMul() becomes
+    UN8x4_MUL_UN8() to indicate that it multiplies 4 UN8 numbers with one
+    UN8 number.
+
+commit e7f162a5a81221ca6abca79a9a77924d39bf4e16
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 08:42:19 2009 -0400
+
+    Clarify a couple of comments
+
+commit b02c33e7da3eb733ca4ada66a6c35b293a191144
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 08:30:36 2009 -0400
+
+    Change name of macros that operate on normalized integers.
+    
+    For example IntMul becomes MUL_UN8 to indicate that it multiplies two
+    unsigned normalized 8 bit integers.
+
+commit d4a366193b12cf241980a621a15ec0ee67e8f6bb
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 08:10:20 2009 -0400
+
+    Fix names in the trap rasterizer.
+    
+    s/Shift4/SHIFT_4/g;
+    s/Get4/GET_4/g;
+    s/Put4/PUT_4/g;
+    s/DefineAlpha/DEFINE_ALPHA/g;
+    s/AddAlpha/ADD_ALPHA/g;
+    s/StepAlpha/STEP_ALPHA/g;
+    s/add_saturate_8/ADD_SATURATE_8/g;
+    s/RenderEdgeStepSmall/RENDER_EDGE_STEP_SMALL/g;
+    s/RenderEdgeStepBig/RENDER_EDGE_STEP_BIG/g;
+    s/fbRasterizeEdges/b00_re/g;
+    s/rasterizeEdges/RASTERIZE_EDGES/g;
+    s/b00_re/rasterize_edges_/g;
+
+commit bcdf0861be346a8a4662376f4305474da9236163
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 08:02:45 2009 -0400
+
+    Rename QuadwordCopy_neon to neon_quadword_copy
+    
+        s/QuadwordCopy_neon/neon_quadword_copy/g;
+
+commit a08548bd5275c69c1e7a7fd894a844ad6ad59638
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 08:00:56 2009 -0400
+
+    Fix up the names in pixman_compute_composite_region()
+    
+        s/miClipPictureSrc/clip_source_image/g;
+        s/miClipPictureReg/clip_general_image/g;
+
+commit e27b2a1fcc890d3abf272cc27fa2c0a2e8d7ab09
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 07:59:10 2009 -0400
+
+    Fix some more pFoo names
+    
+    s/([^a-z])pReg/$1region/g;
+    s/([^a-z])pY/$1y_line/g;
+    s/([^a-z])pU/$1u_line/g;
+    s/([^a-z])pV/$1v_line/g;
+    s/([^a-z])p([A-Z])/$1\l$2/g;
+
+commit 006f21b02b23e1865c0e35d0f9b97af63f52a469
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 07:54:10 2009 -0400
+
+    Fix the names of some common parameters
+    
+        s/xDst/dest_x/g;
+        s/yDst/dest_y/g;
+        s/xMask/mask_x/g;
+        s/yMask/mask_y/g;
+        s/xSrc/src_x/g;
+        s/ySrc/src_y/g;
+
+commit d2a4281376786fc7f31f7367807c7caa8a99d414
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 07:46:11 2009 -0400
+
+    Various simple renamings
+    
+        s/CvtR8G8B8toY15/CONVERT_RGB24_TO_Y15/g;
+        s/cvt8888to0565/CONVERT_8888_TO_0565/g;
+        s/cvt0565to0888/CONVERT_0565_TO_0888/g;
+        s/miIndexToEnt15/RGB16_TO_ENTRY/g;
+        s/miIndexToEnt24/RGB24_TO_ENTRY/g;
+        s/miIndexToEntY24/RGB24_TO_ENTRY_Y/g;
+        s/miCvtR8G8B8to15/CONVERT_RGB24_TO_RGB15/g;
+        s/is_same/IS_SAME/g;
+        s/is_zero/IS_ZERO/g;
+        s/is_int([ (])/IS_INT$1/g;
+        s/is_one/IS_ONE/g;
+        s/is_unit/IS_UNIT/g;
+        s/Fetch4/FETCH_4/g;
+        s/Store4/STORE_4/g;
+        s/Fetch8/FETCH_8/g;
+        s/Store8/STORE_8/g;
+        s/Fetch24/fetch_24/g;
+        s/Store24/store_24/g;
+        s/_64_generic/64_generic/g;
+        s/64_generic/_generic_64/g;
+        s/32_generic_lossy/_generic_lossy_32/g;
+        s/PdfSeparableBlendMode/PDF_SEPARABLE_BLEND_MODE/g;
+        s/PdfNonSeparableBlendMode/PDF_NON_SEPARABLE_BLEND_MODE/g;
+        s/([^_])HSL/$1Hsl/g;
+        s/Blend/blend_/g;
+        s/FbScrLeft/SCREEN_SHIFT_LEFT/g;
+        s/FbScrRigth/SCREEN_SHIFT_RIGHT/g;
+        s/FbLeftMask/LEFT_MASK/g;
+        s/FbRightMask/RIGHT_MASK/g;
+        s/Splita/SPLIT_A/g;
+        s/Split/SPLIT/g;
+        s/MMX_Extensions/MMX_EXTENSIONS/g;
+
+commit 1c5774bf6d39e7b349c03866c96811ee1754c9d7
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 07:35:40 2009 -0400
+
+    Get rid of pFoo names.
+    
+        s/([^o])pSrc/$1src_image/g;
+        s/([^o])pDst/$1dst_image/g;
+        s/([^o])pMask/$1mask_image/g;
+        s/pRegion/region/g;
+        s/pNextRect/next_rect/g;
+
+commit e3489730c317061a2cd888b927d36bda0590a3f2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 07:30:47 2009 -0400
+
+    Change the name of some routines that were simply misnamed.
+    
+        s/Src_pixbuf/_over_pixbuf/g;
+        s/Src_x888_n/_over_x888_n/g;
+        s/CompositeSrc_8888_8888/composite_over_8888_8888/g;
+        s/CompositeSrc_8888_0565/composite_over_8888_0565/g;
+        s/CompositeSrc_8888_8_8888/composite_over_8888_n_8888/g;
+
+commit 90cac1115551c0fd70ace419179bcf2a30d6b1c2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 28 21:06:01 2009 -0400
+
+    Fix up names of compositing functions
+    
+        s/SrcAdd/Add/g;
+        s/SolidMaskSrc/Src/g;
+        s/SolidMaskIn/In/g;
+        s/SolidMask/Over/g;
+        s/Solid_n/Over_n/g;
+        s/SrcIn/In/g;
+    
+        s/(fb)(Composite.*)sse2/sse2_$2/g;
+        s/(fb)(Composite.*)mmx/mmx_$2/g;
+        s/(fb)(Composite.*)neon/neon_$2/g;
+        s/(fb)(Composite.*)arm/arm_$2/g;
+        s/(fb)(Composite.*)vmx/vmx_$2/g;
+        s/(fb)(Composite.*)/fast_$2/g;
+    
+        s/b8g8r8x8/f00bar/g;
+        s/8888C/8888_ca/g;
+        s/0565C/0565_ca/g;
+        s/8888RevNPx/pixbuf_x_/g;
+        s/8x0/8_x_0/g;
+        s/00x8/00_x_8/g;
+        s/8x8/8_x_8/g;
+        s/8x8/8_x_8/g;
+        s/nx8/n_x_8/g;
+        s/24x16/24_x_16/g;
+        s/16x16/16_x_16/g;
+        s/8xx8/8_x_x8/g;
+        s/8xn/8_x_n/g;
+        s/nx0/n_x_0/g;
+        s/_x_/_/g;
+        s/f00bar/b8g8r8x8/;
+    
+        # Fix up NEON type names
+        s/uint8_8/uint8x8/g;
+
+commit e987661667ac5c650af1c3a2ba173558ff287e06
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 28 20:45:58 2009 -0400
+
+    Rename fetchers.
+    
+    s/fbFetchPixel/fetch_pixels/g;
+    s/fbFetch/fetch_scanline/g;
+    s/fbStore/store_scanline/g;
+
+commit 2d32d91e5d89bb04fcbaffb23244a9f023d39239
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jul 8 00:38:28 2009 -0400
+
+    Use postfix decrement, not prefix, in region-test.c
+
+commit 4e41905bacbf533740e999ba79e0620f358c0597
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jul 8 00:08:49 2009 -0400
+
+    Eliminate empty rectangles in pixman_region_init_rects().
+    
+    Otherwise they show up in the validated regions.
+
+commit 967ff0bdc7f46806b7a6d16332ad39cf2c1f01c1
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jul 7 22:55:32 2009 -0400
+
+    Add an initialization with an empty rectangle to region-test.c
+    
+    This should produce a valid region without empty rectangles in
+    it. Currently it doesn't.
+
+commit 40fcc14d1cf8cb6b6c71f27b0a3d3ccc9a845949
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jul 6 20:37:14 2009 -0400
+
+    Expand comment in miClipPictureSrc() to explain why a client clip is required.
+
+commit eba3be7b7a2b9a8df235af6255b9d8c70d2b8c93
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jul 5 00:35:31 2009 -0400
+
+    Fix forgotten use BITMAP_BIT_ORDER to be ifndef WORDS_BIG_ENDIAN
+
+commit 06f5b51fee35727a823bd86294654178cbfac629
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Jul 4 22:49:16 2009 -0400
+
+    Return TRUE for the two new formats in pixman_format_supported_source().
+
+commit b0f220b7f236b5dea30ddc5dec51b73c11120e10
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 11:21:33 2009 -0400
+
+    Inlucde pixman-private.h in pixman-region.c
+    
+    Delete some duplicated macros.
+
+commit f6ef071e2805bcf52473f06cd7171097b4afd926
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 29 11:07:20 2009 -0400
+
+    Rename OptimizedOperatorInfo to optimized_operator_info_t
+
+commit 7b7e4b23cab361b444d0c69a1b9c1678d3c5df2b
+Author: Ben Skeggs <bskeggs@redhat.com>
+Date:   Wed Jul 1 10:18:29 2009 +1000
+
+    Add accessor functions for PIXMAN_x2r10g10b10 and PIXMAN_a2r10g10b10
+
+commit 968f720d0e8b97bbeb2db9edb75ec524d697e1d6
+Author: Jonathan Morton <jmorton@sd070.hel.movial.fi>
+Date:   Wed Jul 1 16:29:48 2009 +0300
+
+    Avoid overrunning scanlines in NEON blitters.
+
+commit 863f9e9b7599b89c9dd42dd9c7913c2513384761
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 28 18:54:32 2009 -0400
+
+    Change comma to semicolon in pixman-combine.c.template
+
+commit 10aa32315529eaff848b8348cad47b2673f853cf
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Sat Jun 27 11:56:38 2009 +0300
+
+    Really fix ARM build.
+    
+    Commit 9d3f71d726c8b959b64c3e6b43ca4d3ccb320c32 broke the build
+    on ARM architectures by just removing custom include files and
+    not providing "pixman-private.h" as a replacement.
+
+commit 996e59f7f81864f7935d6dd58d8efd5a5ea265ea
+Author: Guillaume Letellier <glet.n800@googlemail.com>
+Date:   Fri Jun 26 19:02:08 2009 -0400
+
+    Fix ARM build.
+    
+    Commit 6e20c2574354d1cb071a1201ff166cb5e92c00d2 broke the build on ARM
+    architectures by not updating the use of fbComposeGetSolid()
+    correctly.
+
+commit e8addcc69a36375d1330749e00854d9651c8f8d0
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jun 26 18:58:23 2009 -0400
+
+    Change checks for srca == 0 to src == 0
+    
+    It is not generally correct to bail out just because the source alpha
+    is 0. The color channels still mig not be and in that case the correct
+    result is:
+    
+           s + (1 - srca) * d = s + d
+    
+    which is not generally 0.
+
+commit 9a7ce32ef5cf70a17d83154cfd1b96aa54ce9232
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jun 25 12:42:03 2009 -0400
+
+    Make arm compositing functions static
+
+commit 9d3f71d726c8b959b64c3e6b43ca4d3ccb320c32
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jun 25 10:20:44 2009 -0400
+
+    Delete ARM header files
+
+commit 9837465fd9a5d4e7280d4c79c41d2d9a9c8f71c0
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Jun 24 01:30:34 2009 +0300
+
+    Use -mcpu instead of -march for ARM SIMD runtime autodetection
+    
+    Option -mcpu has higher priority than -march with the current versions
+    of gcc and that's why it is better to use. There is no particular
+    reason why 'arm1136j-s' is used in this patch, it could be any armv6
+    compatible core.
+
+commit 6b8251039a905114e4b0776c3f8f58cb0678a532
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 24 20:08:50 2009 -0400
+
+    Add test cases to oob-test using PIXMAN_{a,x}2b10g10r10
+
+commit f94053cd9b1dc8db6c924c8cf50d75ccc1898cce
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 24 13:12:07 2009 -0400
+
+    Post-release version bump
+
+commit f6faa06ef85fc4c9ff38dbc9243c060b4cdacc1a
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 24 12:51:40 2009 -0400
+
+    Pre-release version bump
+
+commit 084392fbd72e55f87e9bc37dd02384fc145f7d36
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 24 05:44:18 2009 -0400
+
+    Delete scanFetchProc type. Use fetch_scanline_t instead.
+    
+    fetch_scanline_t now takes a pixman_image_t argument instead of an
+    bits_image_t, so there is also a bunch of updates in pixman-access.c
+
+commit 588b42dc1e8fe252bde1eb0905bb9fac806e8ca3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 21:28:28 2009 -0400
+
+    Constify the mask argument to scanline fetchers.
+
+commit 5cfdee917d3cac38b103f7453c5a8a0047b95337
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 21:22:06 2009 -0400
+
+    Add a mask and mask_bits argument to the raw scanline fetchers.
+
+commit d3bebaf731b4e1714653b50a4a861171f497b42f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 21:10:52 2009 -0400
+
+    Rename fetchProc32 to fetch_scanline_t and fetch_pixels_32_t to fetch_pixels_t
+
+commit 6af8672c69b770ce229bd1d156f1fe70d74800f9
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 20:58:39 2009 -0400
+
+    Get rid of the 64 bit fetcher types.
+    
+    It's simpler to just declare everything as 32 bit fetchers and do the
+    conversion in the few functions that actually need to know the size of
+    the pixel type.
+
+commit 70cba5cfa8a5d702c32170c511a7318766e45731
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 20:38:58 2009 -0400
+
+    Consolidate the three scanline store types into one.
+    
+    The 64 bit storers do their own type conversion.
+
+commit 973ebf1631de695483fcb4b5e4c2b27e037ca3bf
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 19:03:11 2009 -0400
+
+    Get rid of remaining scanFetchProc casts
+
+commit 24303475c26dada40474f5972b1abee2315ba8f9
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 19:02:10 2009 -0400
+
+    Get rid of scanFetchProc casts in pixman-radial-gradient.c
+
+commit 99780d3b2264f6e2bb210d3fdc1237c8fbfc3f25
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 19:00:52 2009 -0400
+
+    Get rid of scanFetchProc casts in pixman-conical-gradient.c
+
+commit 2d2d3a2625fcc1151f61d0dc1a6ff268d7491be8
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 18:58:39 2009 -0400
+
+    Get rid of casts to scanFetchProc in pixman-bits-image.c
+    
+    Instead just declare the functions with the required type and do any
+    type conversions in the function itself.
+
+commit 4597ad88d9ade51b5a0b4eb87503e1278b29ef56
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 18:44:01 2009 -0400
+
+    Fix bug where 64 bit pixel were fetched as 32 bit ones.
+
+commit aa6adb646a2c61062d867cece2b0669f658abb39
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 18:41:35 2009 -0400
+
+    Delete FbIntMult and FbIntDiv macros, and move FbIntAdd to pixman-combine.h
+
+commit 53ada03119d44984775877f2a2fee5ce442ac1c8
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 24 12:10:48 2009 -0400
+
+    Add a table to oob-test so that it can test more than one setup.
+
+commit 895a8da63370635b05ffb91d3d670c6627d8b2ab
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 24 11:28:03 2009 -0400
+
+    Fix offset bug in pixman_run_fast_path().
+    
+    Fast paths should only run when the source images complete cover the
+    composite region, because otherwise they would be required to sample
+    the border, and fast paths generally don't know how to do that.
+    
+    The check for this did not work right because it didn't take the
+    offset generated by the composite coordinates into account. This
+    commits fixes that by adding (x, y) coordinates to image cover
+    indicating the new position of the source in destination coordinates.
+    
+    Based on this we now compare against the region extents which are
+    already in destination coordinates.
+
+commit fd90429a32927d8aa516a3d26cc309ca7043e4d3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 24 11:23:04 2009 -0400
+
+    Fix typo in oob-test.c
+
+commit bed9c378ff9d01c8e646241dd96a43e2eb870cca
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 24 10:37:07 2009 -0400
+
+    Add test case for out-of-bounds memory access.
+
+commit b6c97ae2c934ca5adade10303d6faa6e827f826d
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 24 09:04:54 2009 -0400
+
+    Fix comment in pixman-utils to have the right sense.
+
+commit c0047fbfd54d519698a0991111f2440dc8e081b9
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 16:55:53 2009 -0400
+
+    Subtract x_off/y_off before conversion to integer.
+    
+    They are fixed-point values, not integers.
+    
+    Bug 22437, reported by Michel Dänzer.
+
+commit 905856f43d38b5f2932d8b459e805e1c86b7a2f3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 16:37:35 2009 -0400
+
+    Add convolution-test.c program
+
+commit 79d397003f56238aa680b0670e1e7d7ba1594dda
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 16:23:26 2009 -0400
+
+    Delete leftover use of PIXMAN_OP_FLASH_SUBTRACT
+
+commit ebc4a4df9c92934891d202ae2603216a046ec939
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 14:55:36 2009 -0400
+
+    Remove support for component alpha with HSL blend modes.
+    
+    It isn't clear that component alpha makes sense with HSL blend modes.
+
+commit ca4ff13027b76d0ac7398f159a731f7606b7bd51
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 14:39:49 2009 -0400
+
+    Remove FLASH_SUBTRACT blend mode.
+    
+    We may resurrect it later, but leave it out for now, as the closest
+    thing we have to a spec:
+    
+    http://www.kaourantin.net/2005/09/some-word-on-blend-modes-in-flash.html
+    
+    claims that alpha values should be subtracted, whereas real-world flash
+    files indicate that they shouldn't.
+
+commit 5dab62a2f922a515634d65b133aeb089e855b399
+Author: Carlos Garcia Campos <carlosgc@gnome.org>
+Date:   Tue Jun 23 17:12:39 2009 +0200
+
+    Fix BlendColorBurn
+    
+    It should return 0 when sa == 0
+
+commit e3a94e892850f91d2cb0463dc2c86f7217deb8a4
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Jun 20 20:19:57 2009 -0400
+
+    Add screen-test.c test program
+
+commit 16873f6d1baa3b0c26b31e71ad6d36d53efaf9e3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 17 11:46:01 2009 -0400
+
+    Make the composite test window bigger by default.
+    
+    Also rearrange the squares to better match typical aspect ratios.
+
+commit eb4fd0477a4f3acd111fc9132f2dec7f1f63f3e1
+Author: Carlos Garcia Campos <carlosgc@gnome.org>
+Date:   Thu Jun 18 15:24:33 2009 +0200
+
+    Use floating point in SetLum
+
+commit 96d5044c0c4a9e34deb97655679f1d688c192c99
+Author: Carlos Garcia Campos <carlosgc@gnome.org>
+Date:   Wed Jun 17 17:40:52 2009 +0200
+
+    Do not use combineMask in component-alpha functions
+
+commit bf356c6d8cdbabf2faf4b6d77f94ccd3bd0459fb
+Author: Carlos Garcia Campos <carlosgc@gnome.org>
+Date:   Wed Jun 17 16:59:45 2009 +0200
+
+    Fix typo
+
+commit cdae71ee85c74f702a8f0b999432e4d6d5caf766
+Author: Carlos Garcia Campos <carlosgc@gnome.org>
+Date:   Wed Jun 17 10:46:44 2009 +0200
+
+    [TEST] Update composite-test to test more operators
+
+commit c35685255f07a5a1f656d3153b5534876481b65b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Nov 29 15:12:45 2008 -0500
+
+    Fix ColorDodge and ColorBurn to conform to the spec
+
+commit 49a4fc09694d241f6b9f725a084c27eba3e31d00
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Nov 29 15:11:07 2008 -0500
+
+    Remove optimizations that I'm not convinced are correct
+
+commit 3fb71f8b41dedd55982eccd16b8518cce10258fa
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Nov 29 15:02:04 2008 -0500
+
+    Fix some more problems in MultiplyC
+
+commit 254e62159b4a8652c1dd9c47d0e5e0d0ff2ced3a
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Nov 29 14:53:57 2008 -0500
+
+    Fix various problems in FbCombineMultiplyC
+    
+    Don't read the destination unless we have to.  fbByteMulAddC()
+    produces its result in its first argument, not its last.
+
+commit a158d7f14f4b987e9e6380ffe896dbcfd41799ec
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Nov 26 13:52:00 2008 -0500
+
+    Add some comments about the linearity of the non-separable blend modes
+
+commit cae5062d3bf16d32cf675ccb0030e29cc940b25e
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Tue Nov 25 22:53:37 2008 +0100
+
+    fix component-alpha versions to set source to 0 when mask is 0
+
+commit 9df72ebdb8280c7ca3b2696c3f7f48b69438502b
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Tue Nov 25 22:50:54 2008 +0100
+
+    remove debugging leftovers
+
+commit 19aae37bfb8fb349258675dd96872c5ba65dcce1
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Tue Nov 25 22:50:17 2008 +0100
+
+    correct subtract implementation
+
+commit f130d99c94edbf5aeebeb317df64dbd7a6d20efd
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Sun Nov 23 18:36:32 2008 +0100
+
+    fix Multiply component-alpha version
+
+commit 4bb1eac4e7c6c785da3c2b2b1836c83446befc80
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Sun Nov 23 18:34:50 2008 +0100
+
+    fix comment
+
+commit 73810b320ec5eab5bcbd9137f012cf0e4bf6867f
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Sun Nov 23 17:03:50 2008 +0100
+
+    fix component-alpha versions for seperable blend modes
+
+commit 4b921c1d910a5d78ca4784a6879789a5af6718d3
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Sun Nov 23 16:23:22 2008 +0100
+
+    rename operator SUBTRACT to FLASH_SUBTRACT
+    
+    Also document it and move it out of the PDF blend modes to make clear
+    that it is not in any way related to PDF.
+
+commit 7cbfe3ba214006dda5fa6d21871ef6fc61067005
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Sun Nov 23 15:42:53 2008 +0100
+
+    rework blend-mode documentation to match current code better
+
+commit f26c9ec438c97515ae874711859e012971ea920a
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Wed Nov 19 21:07:13 2008 +0100
+
+    typo fix: Seperable => Separable
+
+commit ea17e2e2e43e578b3799fe9a6f7533569aed880c
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Wed Nov 19 21:06:06 2008 +0100
+
+    remove semicolon at end of macro
+
+commit ecf9f83ac64236b0834d268e6235306ab84fb749
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Wed Nov 19 21:05:16 2008 +0100
+
+    remove a leftover debugging statement
+    
+    That was an assertion check by infinite loop
+
+commit c061b4dd16af52383afae470e845bd43a552d925
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Thu Nov 13 17:40:10 2008 +0100
+
+    invent a Subtract operator for component alpha
+    
+    This seems to make sense, and as I can't test it against Adobe's Flash
+    player as that one can't do component alpha, this one looks best.
+
+commit 93e32235e6a72bfea14d36a0407fbbe6482e20d9
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Thu Nov 13 17:34:19 2008 +0100
+
+    add non-seperable versions for component alpha
+
+commit 239cc46aa77b4be71d738c0136a5465796a29886
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Thu Nov 13 17:29:00 2008 +0100
+
+    add component-alpha versions of the seperable blend-modes
+
+commit 2f57b6f4e9020654ad175a593b17ff07fc3f5cbd
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Thu Nov 13 16:12:22 2008 +0100
+
+    rewrite nonseperable blend modes the same way as seperable ones
+
+commit fd1bec2859f775feaff329315cdf16ad27ec4728
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Thu Nov 13 15:13:17 2008 +0100
+
+    clean up seperable blend modes
+    
+    The code is now shorter and faster than before
+
+commit e8b4394a409cda48b6598847292b768ad027dbf0
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Wed Nov 12 19:26:13 2008 +0100
+
+    prefix HSL operators with HSL
+    
+    This is necessary to distinguish SATURATE from PDF's HSL SATURATION
+
+commit f08263a25181a5f18991490629ca2e9582836ac6
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Wed Nov 12 19:12:12 2008 +0100
+
+    remove invert operator
+    
+    src INVERT dest == (white IN src) DIFFERENCE dest
+
+commit 755638d73cfc5879bd440f0148e982e562509fd0
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Thu Oct 23 21:20:23 2008 +0200
+
+    add nonseperable blend modes from PDF spec
+
+commit e3ad87033e3771a3c54b1b8e49813a6959315cd7
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Thu Oct 9 21:46:50 2008 +0200
+
+    fix ColorDodge and ColorBurn to conform to the PDF spec
+
+commit 35bb57e7234994c4169458275e362f02cb5138aa
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Thu Oct 9 18:00:45 2008 +0200
+
+    use PDF algorithm for soft-light
+
+commit 0735aeeaeba04f0c33f22b25a191cfd1f27c271d
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Tue Oct 7 15:13:45 2008 +0200
+
+    Add INVERT and SUBTRACT blend modes used in Flash
+
+commit 740425ab969adda1aaf36d8f52dec6f6e5303ed6
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Wed Aug 29 23:15:33 2007 +0200
+
+    use a pixman_op_t here
+    
+    This improves the readability in gdb when debugging this structure
+
+commit 94e9673eaaf9e22530159f0335a0a30d2f2a0047
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Thu Sep 25 12:53:06 2008 +0200
+
+    Add support for extended blend mode. First pass.
+    
+    This adds support only for FbCombineU function.
+    This work is based on equations provided in SVG 1.2 specification draft.
+    
+    http://www.w3.org/TR/SVG12/
+    
+    Based on a previous patch by Emmanuel Pacaud <emmanuel.pacaud@free.fr>
+
+commit 99108040f03726bf4bddf55baa7ff6acd796fcf0
+Author: Michel Dänzer <michel@daenzer.net>
+Date:   Tue Jun 23 14:02:26 2009 -0400
+
+    Fix the build on big endian machines.
+
+commit bb3b3da18ac6e1f935008fa50cd854b3de19afc3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 13:44:48 2009 -0400
+
+    Rename PIXMAN_FORMAT_16BPC macro to PIXMAN_FORMAT_IS_WIDE
+
+commit 039d4618f79e384d93a7548466f80acae6da738c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 13:41:27 2009 -0400
+
+    Write alpha map fetching with FbByteMul() instead of div_255()
+    
+    Delete the div_255/div_65535 macros.
+
+commit 3e39b566ee2aaa414b95b0dae98cc5971c399359
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 23 07:34:17 2009 -0400
+
+    Remove unused typedefs.
+
+commit 2c70814b6bff2091bcc55ae4252fe82ae53439e4
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 22 20:43:08 2009 -0400
+
+    Delete unused _pixman_image_get_fetcher() function
+
+commit b3bd7394477a64ca0460655ca3a8e5326c402167
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 22 19:51:13 2009 -0400
+
+    Move accessor macros to their own header.
+    
+    Also rearrange some things in pixman-private.h
+
+commit fe8ef09e9835f90b669a2b1ddfda49e839d6de53
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 22 19:38:58 2009 -0400
+
+    Move FbGet8() macro into pixman-bits-image.c
+    
+    It is only used for bilinear filtering now. Also some formatting
+    changes in pixman-private.h
+
+commit 03587764455bd41684bf29bbecb657ba45b0c341
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 22 19:35:11 2009 -0400
+
+    Delete FbInOverC macro
+
+commit 1c429b4fbedc5287659c836c0d30801a6209bf57
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 22 19:25:25 2009 -0400
+
+    Make pixman-mmx.c compile again.
+
+commit 7bb9df038293b591e687cbf3a9830476bef7f9fc
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 22 16:34:15 2009 -0400
+
+    Fix typo in CLIP macro.
+
+commit 63b050de5b2627aee0d75c66244e55757ba007ab
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 22 16:05:27 2009 -0400
+
+    Turn the FbAdd() macro into an FbIntAdd() which doesn't take a channel.
+    
+    The only use of the channel argument could be written better with FbByteAdd().
+
+commit 026ef583288e1e63e5a84ba998aea2e674f02a17
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 22 15:58:28 2009 -0400
+
+    Delete FbInU macro.
+    
+    Replace uses of it with FbIntMult().
+
+commit 5028c1599ad9119dbb7b58d2f93e60c857aec769
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 22 15:21:40 2009 -0400
+
+    Use fbOver() instead of fbOver24.
+    
+    fbOver() is faster anyway, and this lets us get rid of fbOverU.
+    
+    Also use FbByteMul() in fbIn instead of four times FbInU.
+    
+    Finally, delete FbOverC and FbInC since they weren't used.
+
+commit ca4750be0a5ea0a6910ad9f4eed6a9989c91c230
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 22 08:09:11 2009 -0400
+
+    Eliminate Fetch/Store24 macros.
+    
+    Replace them with inline functions in pixman-bits-image.c.
+
+commit e68f8bc1187785309ed3befcda1e1a211fe624e6
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 22 07:05:24 2009 -0400
+
+    Remove unused ACCESS macros in pixman-bits-image.c
+
+commit 233d8907ed02d6624f458dd40c9db46055fc7630
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 22 06:51:04 2009 -0400
+
+    Various minor changes
+    
+    - Add underscores to the pixman_image_get_solid(),
+      pixman_image_is_solid(), and pixman_is_opaque() names.
+    
+    - A number of formatting fixes
+    
+    - Move debug code to the end of pixman-private.h
+    
+    - Collect all prototypes for image methods in one place
+
+commit 950bcd7d4a6226d969b0b69513f6806a2d40e08e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 22:33:31 2009 -0400
+
+    Some formatting changes
+
+commit 8b616c5725891f2f2d21b71796fb9af5644260e4
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 22:16:39 2009 -0400
+
+    Delete struct point
+
+commit 653fe825c92935318e0d2d552c3a0336ef82a1de
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 22:12:25 2009 -0400
+
+    Move region helpers into pixman-utils.c
+
+commit 00d852c96931f4bc27dfec124062e71eb49dc9bc
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 22:11:07 2009 -0400
+
+    Move code around
+
+commit a4ef790faac2c822df8336ee00c6fc5ea84aaa53
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 22:05:49 2009 -0400
+
+    Move pixman_version() to pixman.c
+
+commit f1049c61d6b6b977f56533644bbfa7e2a95ca3a1
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 22:01:58 2009 -0400
+
+    Move pixman_compute_composite_region() into pixman-utils.c
+
+commit 7690af20fcf7f341a5162b77a66660cd05a155b9
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 21:42:14 2009 -0400
+
+    Eliminate pointless Red/Green/Blue macros
+
+commit f6faf538eebed4722c085c2eef7b3ae524e3e00c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jun 4 07:39:13 2009 -0400
+
+    Get rid of indexed argument to store functions
+
+commit fdb25d97477635dafb0f8c328de65727a2d73a48
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 21:36:43 2009 -0400
+
+    Move macros around in pixman-private.h
+
+commit 76bf3073d45e184973cfc992d8f366a4a5ed0127
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 21:32:23 2009 -0400
+
+    Move some macros into pixman-access.c
+
+commit e2b5b05b3818f6a4ecf24dd0030e22784af22e22
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 21:28:38 2009 -0400
+
+    Delete obsolete comment
+
+commit 271a0d34a07ee04d8de0cb435ab9242aeb0a4c5c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 21:28:24 2009 -0400
+
+    Move edge stepper macros into pixman-edge.c
+
+commit 92eca118ad9cdeb61a00a591916f4e34aaaab916
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 19:45:29 2009 -0400
+
+    Delete FB_MASK and FB_ALLONES macros
+
+commit 9541538a97b1101a886a26653a8b416701b2e065
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 19:38:57 2009 -0400
+
+    Implement fbStore_a2g2b2r2
+
+commit 433d94e60b8404df39582b6149e60a5faa965160
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 19:35:20 2009 -0400
+
+    Replace switch functions in pixman-access.c with a table of accessors.
+    
+    Also delete unused orig_data pointer.
+
+commit d78e30b26be15683062a1a3b76fbbe7d3b5abe0f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 19:06:25 2009 -0400
+
+    Rename pixman_image_can_get_solid() to pixman_image_is_solid
+
+commit 6e20c2574354d1cb071a1201ff166cb5e92c00d2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 18:58:53 2009 -0400
+
+    Turn fbComposeGetSolid() macro into a pixman_image_get_solid() function.
+
+commit 76aa72e8cac12400ac8f635b81642335b0d27310
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 18:51:36 2009 -0400
+
+    Delete unused WRITE_ACCESS() macro
+
+commit 3c0ed5b92dc205d4fa6c9fa2f2772022f2404549
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 18:51:06 2009 -0400
+
+    Move pixman_image_fill_rectangles() to pixman.c
+
+commit fb0fe616f2e0ce8f31f88887ca2a7ec394886b90
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 18:43:27 2009 -0400
+
+    Delete unused mod macro
+
+commit bfa6f8c0b0418a3b4337da6c8bd0d4e9eda7e83e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 18:40:25 2009 -0400
+
+    Eliminate bit fiddling macros from pixman-private.h.
+    
+    There was one remaining use of FbMaskBits in the a1 trap rasterizer;
+    just move that macro there.
+
+commit 84886292e2c9be4149a32c7499015960331db426
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 18:29:39 2009 -0400
+
+    Implement fbComposeGetSolid() as a call to pixman_image_get_scanline()
+
+commit 8e40734174e97ff319c31ba49096cc8b7d5117ae
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 16:45:17 2009 -0400
+
+    Eliminate FbStipMask macro.
+    
+    It was only used for storing into a1 images, and that code could be
+    written more clearly by computing the bit index directly.
+
+commit 590d034bb399d28b191ac50c764d03ebd342e149
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 16:07:50 2009 -0400
+
+    Implement pixman_format_supported_destination() in terms of pixman_format_supported_source()
+
+commit 16a87a89e1330c18876aaf17ccc6f07243062ca8
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 15:09:02 2009 -0400
+
+    Move FbGen macro into pixman-combine.c
+
+commit d18722cdb6ddde7abba9cd1492e636f2668fadf1
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 13:18:46 2009 -0400
+
+    Use DIV instead of _div In pixman-trap.c.
+
+commit 81d6725f3106a888fe0fbffba4a0d05d553d0777
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 11:19:00 2009 -0400
+
+    Move edge utilities into pixman-trap.c
+
+commit 793c92dadb6f9c82ace50711c0e8c51e62368f19
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 11:15:30 2009 -0400
+
+    Move compiler dependencies to a new pixman-compiler.h file
+
+commit 5624ca0417bf7a30b5b05235e902b237a77b8543
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 10:12:15 2009 -0400
+
+    Rename FastPathInfo to pixman_fast_path_t
+
+commit afcfc8efc48630f0f349aefc8c86619fc7514647
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 10:09:22 2009 -0400
+
+    Eliminate MSBFirst, LSBFirst, IMAGE_BYTE_ORDER, and BITMAP_BIT_ORDER.
+    
+    Just use WORDS_BIGENDIAN instead.
+
+commit fd83e3594b440ade9acc1263dcd2a0980aa7ebcc
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 09:50:24 2009 -0400
+
+    Rename combine.inc and combine.h.inc to pixman-combine.{c,h}.template.
+
+commit d7234efc9a5d6a371692287555820fcd0f7ba48d
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 21 09:44:22 2009 -0400
+
+    Only use force_align_arg_pointer on gcc/x86-32
+
+commit 093112a1b720c3a74b28b7b4289feb16fbe4afd1
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Jun 20 20:28:36 2009 -0400
+
+    Rename pixman-pict.c to pixman.c
+    
+    There are no traces of fbpict.c in it anymore.
+
+commit b7b6847b6692796a5da8590dd6254add6d566a7a
+Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi>
+Date:   Fri Jun 19 17:29:11 2009 +0300
+
+    Remove redundant NULL checks from general_composite_rect().
+    
+    The general_composite_rect() function has two invocations
+    of the return_if_fail() macro before any of its variable
+    declarations.  Removing them allows for compilation to
+    succeed using a pre-C99 compiler.
+
+commit d4dc812380f937908e466bfab52bfcc3b5334ebe
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jun 19 13:19:02 2009 -0400
+
+    Get rid of pixman_region_internal_set_static_pointers()
+    
+    Instead just define the function in pixman-region16.c
+
+commit 8b344e417e06f80a24bff9b6fadf4d82b54ab911
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jun 4 07:31:39 2009 -0400
+
+    Rename PixmanTimer to pixman_timer_t
+
+commit 2f9787a9cf3fe0783d1b46a01534ba6588b53e3f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jun 4 07:29:14 2009 -0400
+
+    Rename GradientWalker to pixman_gradient_walker_t
+
+commit cacfd7fe33e7e7643199de0dffb8312c0c432ccf
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jun 19 13:14:11 2009 -0400
+
+    Delete unused IS_SOURCE_IMAGE() macro
+
+commit 216f46eb7e3f468f2b64421bdfbcb6e58eafc7e8
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jun 4 07:17:36 2009 -0400
+
+    Remove commented-out fbAddTriangles
+
+commit 43f3825660914aae7786537ad069758a057488ce
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jun 19 13:04:26 2009 -0400
+
+    Remove useless FbBits typedef
+
+commit 8821885207f74bf9a18b374a1ee5de2442f603a3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jun 4 06:52:32 2009 -0400
+
+    Delete unused CombineFunc{32,64} types
+
+commit e063bd5555ed874a351bada2ef2a7082c42cb426
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jun 19 12:57:56 2009 -0400
+
+    Rename bits_image.fetch_pixels_{32,64} to fetch_pixels_raw_{32,64}
+    
+    Also add a couple of comments about what these functions do.
+
+commit ce2944747455265d24bbbd6ab4b843bf974c8126
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jun 19 12:28:59 2009 -0400
+
+    Delete empty FASTCALL macro
+
+commit 207c9480b5fdb30dd5b9bfc37707ff9cbf1d2d8a
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jun 19 12:15:19 2009 -0400
+
+    Delete FbComposeData type
+
+commit 9dfaa6365f247c1fefb84805ecf850deebb05193
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jun 4 06:02:32 2009 -0400
+
+    Remove unused pixmanFetchGradient() declaration
+
+commit da001051d876051763dc0bc1a90d58ec7ca31e96
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 3 22:51:18 2009 -0400
+
+    Remove dstMask from pixman-fast-path.c
+    
+    These were used to zero the x8 channel of PIXMAN_x8r8g8b8
+    destinations. However, we treat this channel as undefined, so there is
+    no need to zero it.
+
+commit 304412752e2cbb7a8d407ca1af45d4ec1508e5b2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 3 22:48:57 2009 -0400
+
+    Eliminate trivial READ and WRITE macros in pixman-fast-path.c
+
+commit ac3fdeb97b21bd03e1902166310533377abd441d
+Author: William Bonnet <william@wbonnet.net>
+Date:   Fri Jun 19 07:27:28 2009 -0400
+
+    Fix build on Sun Studio.
+    
+    Don't use return in a void function.
+
+commit 94964c221fe8141e5177d98f5357dca33fa00544
+Author: Jonathan Morton <jmorton@sd070.hel.movial.fi>
+Date:   Tue Jun 16 12:08:29 2009 -0400
+
+    [NEON] Add ARGB8-over-RGB565 compositing blitter.
+
+commit af660613eefbb474fd62f01b6f073fae389bd6f7
+Author: Jonathan Morton <jmorton@sd070.hel.movial.fi>
+Date:   Tue Jun 16 12:08:29 2009 -0400
+
+    [NEON] Add transparent rect blitter.
+
+commit 8eeeca993252edc39da9c5c57545b81215fafc81
+Author: Jonathan Morton <jmorton@sd070.hel.movial.fi>
+Date:   Tue Jun 16 12:08:29 2009 -0400
+
+    [NEON] Replace Ian's glyph-blitter with a better one.
+    
+    Each scanline of the destination is bulk-loaded into a cached buffer on
+    the stack (using the QuadWordCopy routine) before being processed.  This
+    is the primary benefit on uncached framebuffers, since it is necessary
+    to minimise the number of accesses to such things and avoid
+    write-to-read turnarounds.
+    
+    This also simplifies edge handling, since QuadWordCopy() can do a
+    precise writeback efficiently via the write-combiner, allowing the main
+    routine to "over-read" the scanline edge safely when required.  This is
+    why the glyph's mask data is also copied into a temporary buffer of
+    known size.
+    
+    Each group of 8 pixels is then processed using fewer instructions,
+    taking advantage of the lower precision requirements of the 6-bit
+    destination (so a simpler pixel multiply can be used) and using a more
+    efficient bit-repacking method.
+    
+    (As an aside, this patch removes nearly twice as much code as it
+    introduces.  Most of this is due to duplication of Ian's inner loop,
+    since he has to handle narrow cases separately.  RVCT support is of
+    course preserved.)
+    
+    We measured the doubling of performance by rendering 96-pixel height
+    glyph strings, which are fillrate limited rather than latency/overhead
+    limited.  The performance is also improved, albeit by a smaller amount,
+    on the more usual smaller text, demonstrating that internal overhead is
+    not a problem.
+
+commit 1a7f25946b5b64aa604fab0f6d428bacb5296a4e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 16 11:59:20 2009 -0400
+
+    Post-release version bump
+
+commit 9733b2c4d4ed8fbd3f6e770446b457e0526db152
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 16 11:52:48 2009 -0400
+
+    Pre-release version bump
+
+commit b1cb5922f785310ef790811b52e4e2b0c85dfccc
+Author: Jonathan Morton <jmorton@sd070.hel.movial.fi>
+Date:   Mon Jun 15 16:09:32 2009 +0300
+
+    Add RVCT support for straight blitter.
+
+commit b6a3868ced67eb363273bfbee0d850c4d06cca34
+Author: Jonathan Morton <jmorton@sd070.hel.movial.fi>
+Date:   Mon Jun 15 16:02:04 2009 +0300
+
+    Better CFLAGS handling for recent ARM platforms.
+
+commit 1217c11a02ef60a3955fd98f7cec48de4cb9561b
+Author: Jonathan Morton <jmorton@sd070.hel.movial.fi>
+Date:   Wed May 27 15:31:59 2009 +0300
+
+    Misc warning fixes.
+
+commit 68ec1244cdd4aa2703739a19c7c3917231b7b889
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Jun 13 09:32:59 2009 -0400
+
+    Add API to set a function to be called when the image is destroyed.
+
+commit ebc39ed35a9f79ac9bb329bfc7dc27f290f6e1b0
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 10 08:52:31 2009 -0400
+
+    Work around X server bug.
+    
+    X servers prior to
+    
+    	ebfd6688d1927288155221e7a78fbca9f9293952
+    
+    relied on pixman not clipping to destination geometry whenever an
+    explicit clip region was set. Since only X servers set
+    source_clipping, we can just trigger off of that.
+
+commit 08eb065c568de5c0cb67b7b02ccb17bf72d5059c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 3 05:21:29 2009 -0400
+
+    Move region computation closer to the region walking.
+    
+    Computing the composite is region is a bit expensive, so only compute
+    it if we are likely to actually walk it.
+
+commit 78ca4eea6467dbb6b9da1198b9526750a0a8dca3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 2 23:17:34 2009 -0400
+
+    Simplify clipping rule
+    
+    The new rule is:
+    
+    - Output is clipped to the destination clip region.
+    
+    - If a source image has the clip_sources property set, then there
+      is an additional step, after repeating and transforming, but before
+      compositing, where pixels that are not in the source clip are
+      rejected. Rejected means no compositing takes place (not that the
+      pixel is treated as 0). By default source clipping is turned off;
+      when they are turned on, only client-set clips are honored.
+    
+    The old rules were unclear and inconsistently implemented.
+
+commit b9683cb2ae519707e06a0b9302f8a373d336da12
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 3 00:25:54 2009 -0400
+
+    Fix pixman_image_is_opaque()
+    
+    - Don't claim that non-repeating bits images are opaque.
+    
+    - Don't claim that conical gradients are opaque ever.
+
+commit 7aeed3fc08b3359a3e4e6178f569dbb28ffdad08
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 2 22:57:23 2009 -0400
+
+    Only call fast paths when the images cover the composite region
+
+commit e67c7eedf203f4424bdfac7982d2bc7c6e1748d2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 2 22:17:00 2009 -0400
+
+    Pass the region to walk_region_internal()
+
+commit 85a2f55e6b55833cb4092c6e9e58497fbd9e7167
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 2 22:08:02 2009 -0400
+
+    Remove srcRepeat and maskRepeat arguments from _pixman_walk_composite_region()
+
+commit dc0a9dd65ab2622646d1220adf3e5ea70dcae951
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 2 22:04:47 2009 -0400
+
+    Remove all the srcRepeat/srcTransform stuff from the general implementation.
+
+commit f885caad4a709d7d2c4f0bf63d735080bcca3c24
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 2 22:02:52 2009 -0400
+
+    Make _pixman_walk_composite_region() a wrapper around an internal function
+
+commit d5768884a1576e7ad4a9d1e24063d214babb7157
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 2 21:31:58 2009 -0400
+
+    Handle repeat_none/normal for 64 bit fetchers
+
+commit c9ea4a9722bc3c2223e8c8d72aa1b23598db489e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 2 21:20:42 2009 -0400
+
+    Make the untransformed path handle REPEAT_NONE and REPEAT_NORMAL
+
+commit cf7bf4eb57351b44f467eda9f4d9fa8f97754550
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jun 5 01:33:28 2009 -0400
+
+    Post-release version bump
+
+commit b721bc49199a24364bceb6e76ad9c6e6b2996905
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jun 5 01:10:00 2009 -0400
+
+    Pre-release version bump
+    
+    Also squash some warnings and correct the variable name in RELEASING.
+
+commit 5f086792eeaea6b2c401105b8bbf0c92fb7d192e
+Author: Jonathan Morton <jmorton@sd070.hel.movial.fi>
+Date:   Thu Jun 4 11:24:26 2009 -0400
+
+    [NEON] Really fix filler bug.
+    
+    Advance the destination pointer (r4 register) properly.
+    Found by Siarhei Siamashka.
+
+commit 3b12cc7a23f81581b027764be96bf028785b1b5f
+Author: Jonathan Morton <jmorton@sd070.hel.movial.fi>
+Date:   Thu Jun 4 11:24:22 2009 -0400
+
+    [NEON] Fix filler bug.
+    
+    r5 is being sourced explicitly instead of the %[width] reference.
+    It's probably a copy-paste bug, not spotted because I didn't
+    originally write it.
+
+commit 3c570a815afb282df01f41acad385ff0e3e33899
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Jun 4 00:05:06 2009 -0400
+
+    Add an --enable-timers configure option to enable the TIMER_BEGIN/END macros
+
+commit 7077138fb3c633e8791b2a4139ade07dbc677dd3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 3 23:54:57 2009 -0400
+
+    Some cleanups in the configure.ac file
+
+commit 9d442a6bc6f1ae28c3f36247bf3e2ab959fb2712
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 3 22:25:25 2009 -0400
+
+    Correct link to bugzilla in README
+
+commit 4465866cba3700e831101ea429d5de2a95cf7470
+Merge: a673a89 d4d716c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 3 21:49:59 2009 -0400
+
+    Merge branch 'many-pixels'
+
+commit a673a898e1e119836c9c68eff71feaec49f97bf1
+Author: Jonathan Morton <jmorton@sd070.hel.movial.fi>
+Date:   Wed Jun 3 10:43:42 2009 -0400
+
+    Delete now-unused fbCompositeSrc_x888x0565neon().
+
+commit 7b3e90c361602c4599ec592d83b4eb1eb0ea76cf
+Author: Jonathan Morton <jmorton@sd070.hel.movial.fi>
+Date:   Wed Jun 3 10:43:42 2009 -0400
+
+    Replace fbCompositeSrc_x888x0565neon with fbCompositeSrc_24x16neon.
+
+commit 0bfd9904e4adafbaa04ddfe1c0b22df1dac411e5
+Author: Jonathan Morton <jmorton@sd070.hel.movial.fi>
+Date:   Wed Jun 3 10:43:41 2009 -0400
+
+    Enable NEON straight blits.
+
+commit 4da5316285976f43d19231548f79c8b3b02ce060
+Author: Jonathan Morton <jmorton@sd070.hel.movial.fi>
+Date:   Wed Jun 3 10:43:41 2009 -0400
+
+    Enable NEON copies.
+
+commit 15ec3977843029f61f9e869610123977da8b446a
+Author: Jonathan Morton <jmorton@sd070.hel.movial.fi>
+Date:   Wed Jun 3 10:43:41 2009 -0400
+
+    Enable NEON fills.
+
+commit 78faaa58d60f982eb4fdb674b7740315282d4d65
+Author: Jonathan Morton <jmorton@sd070.hel.movial.fi>
+Date:   Wed Jun 3 10:43:41 2009 -0400
+
+    Add more NEON fast paths
+
+commit d4d716cc25536b5a9db3ed216d64f5f9be8b69a2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 2 03:27:46 2009 -0400
+
+    Only advance the Z coordinate for non-affine transformations
+
+commit c2b119492949d89ae1823961438e7086c700ea3f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 2 03:25:46 2009 -0400
+
+    Consistently use 256 pixels as the size of the temp buffers
+
+commit 10bc25b01a00b94eac72d9afe890bd0ff3699951
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Jun 1 19:51:06 2009 -0400
+
+    Inline repeating instead of doing it as a separate pass
+
+commit db4f7fc9df581af54c4ed760dee14ef8a09873d2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 23 21:10:46 2009 -0400
+
+    Move pixman_expand/contract to pixman-utils.c
+
+commit abb60f43f46b10f8057baa0fcc3eb480883ef23b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 23 21:03:39 2009 -0400
+
+    Change pixel wise fetcher to fetch many pixels at a time instead of just one
+
+commit 9a7ded161c014ba51f9d3723f29a32b759717673
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 22 11:37:01 2009 -0400
+
+    Remove unused access macro
+
+commit 2b82a4c14d8e8c8686a8b2d38abdc0df259e087c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 22 09:00:24 2009 -0400
+
+    Add a 64 bit pixel fetcher and use it for solid colors
+
+commit f9fa5bcac04af660a2c873e7cfbc969cb37bee77
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 22 08:47:51 2009 -0400
+
+    Move simple fetchers after transformed fetcher
+
+commit c981eb95bb3f0806ad92a13e45cfff1ad6cf362c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 22 08:39:18 2009 -0400
+
+    Rename some of the fetchers
+
+commit e043530553ebfcebe106b5cc3a35448727e597aa
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 22 08:31:50 2009 -0400
+
+    Split filter switching into its own bits_image_fetch_filtered() function
+
+commit cb04bfd6b4932030b6e9114b926d6c71ce57b97f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 22 08:28:35 2009 -0400
+
+    Move 'adjust' code into the individual filters
+
+commit 94c6abe8fcddce64efa7975e7f71296b1a049b1c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 22 08:18:40 2009 -0400
+
+    Add bits_image prefix to fetchers
+
+commit 5b8304fd17b86639a3adb3eec8ce1116a9d3425e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 22 08:15:35 2009 -0400
+
+    Move some code around - use image->fetch_pixel in FbFetchSolid
+
+commit 48a2d0bba24ac5c9c1426efd3cab08c652ab8952
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 22 08:13:39 2009 -0400
+
+    Rename _pixman_image_fetch_pixels() to bits_image_fetch_alpha_pixels
+
+commit 0486f0f3241225c887549def31e5e246ff16255f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 22 08:04:38 2009 -0400
+
+    Get rid of the StoreExternalAlpha() functions
+
+commit d9b045d18e4723e710dab410fc011d36fc5dd327
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 22 07:49:55 2009 -0400
+
+    Add fetch_scanline_raw{32,64}
+
+commit 67cd7fcbdfe026ddc4967c07939c2a548a80d6b5
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 22 07:13:51 2009 -0400
+
+    Add store_scanline_raw_{32,64} virtual functions to bits image
+
+commit 2434524fd9b8258af88afb1d71fe25813a5def2e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 20 20:32:42 2009 -0400
+
+    Move remaining pixman-transformed.c code into pixman-bits-image.c
+
+commit 4bd73c4d6a1d0489c8eb35a867ef55187ca75650
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 20 20:27:51 2009 -0400
+
+    Fix coordinate computation for perspective transformations
+    
+    - Don't convert to integer
+    
+    - Saturate to INT16_MIN/MAX on overflow
+
+commit 3dd2496890ccf499721df8b86f95c8f5b8ad196a
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 20 18:39:26 2009 -0400
+
+    Get rid of pixman-transformed-accessors
+
+commit df23b360a0330e89c1f38f9d64e8ac150ea740e4
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 20 16:52:31 2009 -0400
+
+    Remove unused code from pixman-transformed.c
+
+commit 2557931bac461d8a0274ad638c12687afbe26145
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 20 12:44:55 2009 -0400
+
+    Handle alpha maps in _pixman_image_fetch_pixels()
+
+commit 72ae714b7400db7282aa0f92cc740bc106685e54
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 20 08:28:54 2009 -0400
+
+    Only do region lookups when the source region is different from the full region
+
+commit ccbe5cf8f39f57a973e5901ad5fe583557947e98
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 20 07:40:29 2009 -0400
+
+    Fix typo in fetch_bilinear_pixels()
+    
+    Change the number of temp pixels in FbFetchTransformed() to something a little more reasonable.
+
+commit 72a3e20c722b16f1b28975451d33e934f54da46f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue May 19 10:04:33 2009 -0400
+
+    Process the correct number of coordinates for the NEAREST filter
+    
+    The number of coordinates to process is twice the number of pixels.
+
+commit e8b4ebf59143a04f5b6f10fb112d39ca50250293
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue May 19 09:23:01 2009 -0400
+
+    Fix a couple of bugs in the bilinear fetcher
+    
+    - The x and y distances are the most significant fractional bits.
+    
+    - We need to fetch four times the number pixels produced.
+
+commit b24fc024fabed9406958611edc607f2af51e46cd
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue May 19 08:30:01 2009 -0400
+
+    Make fbFetchTransformed() used the new filtered many-pixel fetchers
+
+commit 8e0ad050e7ce7b3200e6126f782bad94d9df97f6
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue May 19 08:09:03 2009 -0400
+
+    Add fetch_convolution_pixels() function
+
+commit 1510ffb750b8b74c32dffd11cc0f20ce091767c1
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue May 19 06:59:13 2009 -0400
+
+    Add fetch_bilinear_pixels() function
+    
+    A bilinear fetcher that fetches many pixels instead of just one.
+
+commit a156e4e097f424722c4f1d03f0cf4bb9370962d4
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue May 19 06:18:00 2009 -0400
+
+    Add fetch_extended() function
+    
+    This function takes a list of coordinates and transforms it into
+    another list of coordinates, according to the repeat method of the
+    picture.
+
+commit d2cbfeca0efbf108c320e38bb39970af57c84438
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue May 19 05:23:29 2009 -0400
+
+    Add _pixman_image_fetch_pixels()
+    
+    Including a virtual fetch_pixel() function in bits_image_t.
+
+commit bd1cc87da39ad2e631bec5fa988a2e03eae0f929
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 2 16:51:28 2009 -0400
+
+    Get rid of toplevel argument to implementation constructors.
+    
+    It was always NULL anyway.
+
+commit 8d523bd9f3eb44b9e9a3c64f153626c39a0fffdc
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Jun 2 07:47:29 2009 -0400
+
+    Make sure the whole delegate chain has the correct toplevel
+
+commit 812a993843542f1ff051c46fe627315fcb73bc56
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 30 22:23:27 2009 -0400
+
+    Post-release version bump
+
+commit 3bad5eefd0d4e6ceb4ea52dd5117bf38649b31aa
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 30 22:09:11 2009 -0400
+
+    Pre-release version bump.
+    
+    Also delete non-existant header files from pixman/Makefile.am
+
+commit e3dba0f61af9583728f94210392cc12b602acc2e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 30 21:44:08 2009 -0400
+
+    Create a vmx pixman_implementation_t
+
+commit 0c92309a8ab887efd73737b627baca36d800c6f8
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Sun May 24 18:41:06 2009 +0200
+
+    Update vmxCombine*U to the new interface
+    
+    Make the functions use different codepaths depending on mask being non
+    NULL. It could be speed up a bit more but should work as before
+    speedwise. Conformance checked with cairo tests.
+
+commit 21034db1daf90ac2b17f6929e72b3a0b953e81c4
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed May 27 22:46:23 2009 +0300
+
+    Scaling test updated to provide better coverage for problematic cases
+    
+    Now scaling test should reliably detect problems in new scaling code.
+    Maximum image size reduced to improve performance (more tests can be
+    run per second) and also simplify detected errors analysis.
+
+commit 53ce8838254d436b6a4d527aacdece7dba7ceacd
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 29 22:21:37 2009 -0400
+
+    In pixman-sse2.c test for non-zero source, not just non-zero source alpha.
+
+commit da9f3266fd00a5634fd2fb8a9cffbf24d668aaab
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 29 21:20:20 2009 -0400
+
+    In the mmx implementation, check for source == 0 rather than alpha == 0.
+    
+    Otherwise we compute the incorrect value when the source has zero in
+    the alpha channel, but non-zero in the color channels.
+
+commit f889ad9f362293f00c142aa14e87fd212aea54c1
+Author: Jonathan Morton <jonathan.morton@movial.com>
+Date:   Fri May 29 13:38:45 2009 -0700
+
+    Fixup the arm-simd and arm-neon implementations.
+
+commit d6dfafd9584c37d0d382c5ca974eab04209ad834
+Author: Magnus Kessler <Magnus.Kessler@gmx.net>
+Date:   Thu May 28 12:09:07 2009 +0100
+
+    pixman: define pixman_have_{mmx,sse2} on 64-bit Linux
+    
+    The refactoring of pixman removed pixman-sse2.h and pixman-mmx.h in commit
+    41a9a17e0308f2075bb1bd59c4411e43a67d49ec
+    (http://cgit.freedesktop.org/pixman/commit/?id=41a9a17e0308f2075bb1bd59c4411e43a67d49ec).
+    On 64-bit Linux this breaks linking of new programs as well as execution of
+    existing programs with the following errors:
+    
+    ../pixman/.libs/libpixman-1.so: undefined reference to `pixman_have_mmx'
+    ../pixman/.libs/libpixman-1.so: undefined reference to `pixman_have_sse2'
+    
+    This patch fixes the issue for me by re-introducing the definitions for these
+    functions. It might be preferable, though, to create proper trivial static
+    inline functions instead.
+    
+    Signed-off-by: Magnus Kessler <Magnus.Kessler@gmx.net>
+
+commit 3d93070db88563b5a8f1e07f53f86c5e8ada8bbf
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 27 21:51:00 2009 -0400
+
+    Really fix PPC build.
+    
+    Add a pixman_composeFunctions variable to pixman-vmx.c.
+
+commit 3f5c2936c67d2b0dcf08b80e11c954ba73602ee2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 27 21:15:18 2009 -0400
+
+    Add back pixman_composeFunctions since vmx is not ported to implementations yet
+
+commit 6f93d36915fe3a8d8c75d26af8d5b9ba58aba4e3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 20 06:04:15 2009 -0400
+
+    In _pixman_implementation_fill() don't call the delegate; call the actual implementation
+
+commit a5a249613ba44ff791a7415f32192b1a0cc717db
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun May 17 04:56:13 2009 -0400
+
+    Call the toplevel implementation for combining
+
+commit e5c367120adaa5ae265866336d097f0435300706
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 16 11:22:54 2009 -0400
+
+    Set up combiner functions for an implementation directly in combine.inc.
+    
+    Previously it would go through two big tables, and the general
+    implementation would select the correct one dynmcailly.
+
+commit fb272d1464f12bd913d3fdbc4ec512758b5c4c98
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 16 10:24:30 2009 -0400
+
+    Consolidate the general implementation into one function
+
+commit 6a22abd899b2c226c01be055145c6ee3e469ee3c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 16 09:56:31 2009 -0400
+
+    Move the argument struct into pixman_image_composite_rect
+
+commit 4983f6b26cdd36eafbb97c21e5eb8d54ba59fa21
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 11:32:59 2009 -0400
+
+    Make a couple of functions static
+
+commit 41a9a17e0308f2075bb1bd59c4411e43a67d49ec
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 11:31:30 2009 -0400
+
+    Delete pixman-sse2.h and pixman-mmx.h
+
+commit 5dc9671b2588bfe084d69789e5c367474c5efa92
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 11:27:01 2009 -0400
+
+    Make the fast_path implementation run the c_fast_paths
+
+commit 364e218ad6a68028b4c11d051faee33f80513af4
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 11:24:28 2009 -0400
+
+    Split fill implementations out in the implementations
+
+commit 24e73d69ee99c2dc19d474b75f262e6efddfccf1
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 11:04:07 2009 -0400
+
+    Add alignment workaround to sse2
+
+commit 1369b0b9d4ce89c50f56ec1c552b534f96273c1c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 11:03:15 2009 -0400
+
+    Add a general_blt() that just returns FALSE
+
+commit 9955b1516902d7671d41777bf1989f23cb0a87ed
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 10:56:15 2009 -0400
+
+    Move sse2 and mmx blt implementations to their respective files
+
+commit 46f0707481d50950fdb5d4588486affef0baa9ef
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 10:32:36 2009 -0400
+
+    Move gcc alignment workaround to pixman-sse2.c
+
+commit 53150f4fcafba0a5a69fddaee4b2ae687f0a2149
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 10:28:41 2009 -0400
+
+    Set up SSE2 combiners
+
+commit 63c1ab031347dd2f26a25f29589516e1e59ba8db
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 10:23:01 2009 -0400
+
+    Make pixman_implementation call the right combiner
+
+commit c8a2c336a7a90abc094ec57a4ae15ffabf6e1763
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 10:15:58 2009 -0400
+
+    Use the implementation's combiner's
+
+commit cb236a85df18f0f5e04698fb63c3895c2a2762dc
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 10:06:29 2009 -0400
+
+    Move SSE2 variable initializations to pixman_implementations_create_sse2
+
+commit 03fa1bcb9af2cf48148b03c9a02cf5b4a7340356
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 09:50:55 2009 -0400
+
+    Move mmx fast path code to pixman-mmx.c
+
+commit 6e13149f99d7922ae84086f7867c9a9b69a49203
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 09:44:45 2009 -0400
+
+    Move sse2 fast path running to the sse2 implementation
+
+commit cb8608bba4f212aceef0cf579c650ee4988f56bd
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 09:42:42 2009 -0400
+
+    Change pixman_lookup_fast_path() to actually run the fast path
+    
+    Then just return in the general implementation if we ran a fast path.
+
+commit bee5549f6b469989a45cb3bcd4a916a6799c182d
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 09:26:42 2009 -0400
+
+    Add _pixman_choose_implementation
+
+commit 713fb295761f13989bc0da31c26b3a1535ab449e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 09:21:37 2009 -0400
+
+    Remove fast path lookup code from pixman-general
+
+commit f5837da6e24cb1adf116f42724b83948c70476f0
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 09:12:04 2009 -0400
+
+    Beginning of sse2 implementation
+
+commit 9a25f0fb672c2b6aee488958cf7f7c6e9ea3a33b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 09:06:19 2009 -0400
+
+    Copy fast path lookup code into pixman-utils.c
+
+commit 248ef3ec24bfcb4759f12e1839456c0c422b994c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 08:51:09 2009 -0400
+
+    Initial fast path implementation
+    
+    Move fbSrcScaleNearest() here, and move
+    _pixman_walk_composite_region() to pixman-utils.c
+
+commit 2c64b2a6487114263be8f26fc9328ddc36c61b9a
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 08:14:01 2009 -0400
+
+    Change prototypes for compositing functions to use 32 bit integers
+
+commit d6345a69fb7179ce6dc71117423e83baef427071
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 05:12:52 2009 -0400
+
+    Add component alpha combiners in pixman-implementation.c
+
+commit 918f763a9111f643d5b8a1460258089e79f68fb0
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 04:52:28 2009 -0400
+
+    Beginning of MMX implementation
+
+commit 4b8f440d494f675c2ae5b9d41d950a6c79d14548
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 04:32:32 2009 -0400
+
+    Move entire C implementation into pixman-general.c
+
+commit 12726de921a621b8147d12d7e0788076bc4cc80d
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 04:04:36 2009 -0400
+
+    Add beginning of general implementation
+
+commit d2faa63aee2179188dba712835c40068729565ff
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 03:40:05 2009 -0400
+
+    Formatting
+
+commit a17e27c2b4afc6118e5aeae380eb96d98f982033
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 03:31:11 2009 -0400
+
+    Beginning of pluggable implementations
+
+commit 25509f4b0b3a6b17810605706e5b93e0b9f4cb08
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 20 18:28:03 2009 -0400
+
+    Move fbStoreExternalAlpha{,64} into pixman-bits-image.c
+
+commit d74ad7c0fe9bd50ae04b59806f6c2bb9e6289634
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu May 21 10:45:51 2009 -0400
+
+    Add new store_scanline_{32,64} in bits_image_t
+    
+    Also move fbStore and fbStore64 into pixman-bits-image.c
+
+commit 74f837b1a2e85f2bfcaaf5c659077e4883fe6ac7
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 23 11:33:21 2009 -0400
+
+    Post-release version bump
+
+commit a282b640becfa1bb4979382f6a49cb59a7f992e2
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Fri May 22 12:13:57 2009 -0700
+
+    NEON: Remove some unneeded casts
+    
+    There are some unnecessary (void*) casts. Eliminate some of them. Doesn't
+    change the generated code.
+
+commit ff866e70e399e655ad9b5a851bb682463fdda5ac
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Fri May 22 11:56:44 2009 -0700
+
+    Fix uses of dst_keep
+
+commit 19d6669aacd8f0ba5246448e423c1cbce9cb4fd3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 22 12:07:55 2009 -0400
+
+    Pre-release version bump
+
+commit 67addb4b3800f754155c8607bca85d23d840e056
+Author: Jonathan Morton <jonathan.morton@movial.com>
+Date:   Fri May 22 12:01:26 2009 -0400
+
+    Initialize the ARM SIMD fast path array.
+
+commit 2f1732359787f946bd1efd92be1f2f86aa91be3c
+Author: Jonathan Morton <jonathan.morton@movial.com>
+Date:   Fri May 22 08:25:26 2009 -0700
+
+    Fix compile error caused by e42fae9e8364f5f0791f9fce749ab18b33acf598
+
+commit 85b390cadf8c60808ed17df95885e72c082ad180
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 20 10:45:29 2009 -0400
+
+    Fix alpha map computation in pixman_compute_composite_region()
+    
+    According to the RENDER spec, the origin of the alpha map is
+    interpreted relative to the origin of the drawable of the image, not
+    the origin of the drawable of the alpha map.
+    
+    This commit fixes that and adds an alpha-test.c test program.
+    
+    The only use of alpha maps I have been able to find is in Qt and they
+    don't use a non-zero alpha origin.
+
+commit cb4085bdb5a40c38209f69c26b3ffe60d08ff4de
+Author: Jonathan Morton <jonathan.morton@movial.com>
+Date:   Thu May 21 07:16:34 2009 -0400
+
+    Avoid malloc() by allocating a fixed set of boxes on the stack
+
+commit 5424d0245b28dff81032341a60dea1dd70c594b7
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu May 21 05:07:19 2009 -0400
+
+    Fix build on ppc. Pointed out by Chris Ball
+
+commit 14cd45dc4a63296a549bcc53453ca40beed67f51
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 05:53:40 2009 -0400
+
+    Make SSE2 fast paths static and remove them from the header file
+
+commit 0f1a212bf24490cbf80d6135bac17c5122d18cd2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 05:49:13 2009 -0400
+
+    Make MMX fast paths static and remove them from the header file
+
+commit 87f18154c1198752f2217241c568c28a103e69f6
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue May 5 09:07:32 2009 -0400
+
+    Notes on component alpha
+
+commit ac2299693f76be9c0d19a015096497d26aaf2c7d
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue May 5 08:49:49 2009 -0400
+
+    Note about glyphs polygons
+
+commit c093ee8a415602d78b53dbe936ca743ed816d393
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue May 5 07:31:52 2009 -0400
+
+    Notes on output kernels
+
+commit 90ae09f2e4826d21ebab21c6538cfa7fe1e0b90b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue May 5 03:10:44 2009 -0400
+
+    Further notes on the rendering pipeline
+
+commit fa274ffc6180fc0d57f11bf7b691fe95f344c5d9
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue May 5 01:58:48 2009 -0400
+
+    Some roadmap notes
+
+commit ba1dcec76ae1033b0cbb3048c3d82450922a02cc
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon May 4 17:39:19 2009 -0400
+
+    Describe alpha map in the pipeline
+
+commit 3fdefd683b5cbaaa4a93f1737197954f1df8bc57
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun May 3 22:46:34 2009 -0400
+
+    Notes on the rendering pipeline
+
+commit e07a4c6e8c1571f762c6f583204f16e3aca42882
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun May 3 21:07:06 2009 -0400
+
+    Move C fast paths to their own file pixman-fast-path.c
+
+commit e42fae9e8364f5f0791f9fce749ab18b33acf598
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun May 3 21:05:45 2009 -0400
+
+    Move the arch specific fast path tables into their arch files
+
+commit 93900a591c530a310542dfcca7e41d3391dc3565
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun May 3 19:12:02 2009 -0400
+
+    Move CPU detection code to its own file
+
+commit e6e6f6350230cc2e10e7dfe0ebd89ec4b587b660
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun May 3 01:18:49 2009 -0400
+
+    Move conical gradient code to pixman-conical-gradient.c and delete pixman-source.c
+
+commit 47abb3c7659a4eb1214c358796965f92f98fc901
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun May 3 01:08:54 2009 -0400
+
+    Move the radial gradient code form pixman-source.c into pixman-radial-gradient.c
+
+commit a10b0e7e136116cea95d6717f119d92599491f27
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun May 3 00:59:36 2009 -0400
+
+    Duplicate some code that was shared between radial and conical gradients.
+    
+    It is going to live in separate files.
+
+commit 9a867fa231e37d945f1dc3d18cb17359b24dbde3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun May 3 00:56:16 2009 -0400
+
+    Move the linear gradient code from pixman-source.c into pixman-linear-gradient.c
+
+commit ade664ced3b9ac64120424f0fc80dc0deef69b00
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun May 3 00:46:30 2009 -0400
+
+    Rename pixmanFetchSourcePict to pixmanFetchGradient
+    
+    Move the solid fill parts into pixman-solid-fill.c
+
+commit 8267d8d38f794c51e09f440c470f1c23c59e11aa
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 2 23:26:30 2009 -0400
+
+    Add a generic 64 bit fetcher and use it for gradients and transformed images
+
+commit ecaaef2f505fb61b383b194236b68ee59d52ecda
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 2 23:08:59 2009 -0400
+
+    Move the gradient walker code to its own file
+
+commit 51d972ecd885b05165a09d19fb3491ecb3ce813a
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 2 22:55:56 2009 -0400
+
+    Replace pixman_image_get_fetchers() with pixman_image_get_scanline_{32,64}
+
+commit b7f113200e285c003b9225de83d8fe83492717ee
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 2 22:46:20 2009 -0400
+
+    Set up scanline getters for bits images
+
+commit b496d566dcc3e277f9ed9a8e93dbb3963a6d14e6
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 2 22:42:59 2009 -0400
+
+    Set up scanline getters for source pictures
+
+commit c62f2a14f433a07c5333cfefeed934214507d63a
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 2 22:26:23 2009 -0400
+
+    Store get_scanline() functions in the image struct
+
+commit 0b497b33fe8bdfc404ed377f3b7525b4e5c11ad5
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 2 22:00:25 2009 -0400
+
+    Add stubs for property_changed virtual functions
+
+commit 7bb615f6baf39e3d7c31a8ce521c0ff0b5172d7e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 2 21:14:36 2009 -0400
+
+    Split pixel images into pixman-bits-image.c
+
+commit 53bae97c7e7bf9b20ddfd400fd0bd11d03431d39
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 2 21:08:12 2009 -0400
+
+    Split conical gradient images into pixman-conical-gradient.c
+
+commit c43c3628935722f489d5e5359413dbb17d4c4a44
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 2 21:06:23 2009 -0400
+
+    Split radial gradient images into pixman-radial-gradient.c
+
+commit 76418e388e1439f8e7f33eb777856c8eb475a2fc
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 2 20:54:17 2009 -0400
+
+    Split linear gradient images into pixman-linear-gradient.c
+
+commit 58de62bfada0d0ca945350fe3da38dee48aac7b4
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 2 20:40:16 2009 -0400
+
+    Split solid fill images into pixman-solid-fill.c
+
+commit aa234489b0653ef63cf1b3d162aa7a339779c4da
+Merge: 7a9bfa1 010e286
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 16 12:51:05 2009 -0400
+
+    Merge branch 'master' of git+ssh://sandmann@git.freedesktop.org/git/pixman
+
+commit 010e28653f95bb78215e3cacb6a4f47d9a289fde
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 16 08:33:35 2009 -0400
+
+    Don't read potentially uninitialized data in pixman_CombineMaskU()
+    
+    This is mainly to quiet valgrind. The data in question would only be
+    uninitialized when the corresponding mask pixel was zero, so the end
+    result is zero in any case.
+
+commit 822cd47562c138002b45b24e6d4e25de3893088d
+Author: Loïc Minier <lool@dooz.org>
+Date:   Fri May 15 16:11:16 2009 +0200
+
+    Fix fd check after auxv file open() [Bug 21749]
+    
+    Signed-off-by: Julien Cristau <jcristau@debian.org>
+
+commit 58e08374e1cd01371786469787b3709eca27f463
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 15 07:47:59 2009 -0400
+
+    Don't call hyphenated git commands as they don't exist anymore
+
+commit b365547e7dd9cb02fb43d85ae4104903083c4ebf
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 15 07:47:22 2009 -0400
+
+    Post-release version bump
+
+commit 564ecfe8e9a1aba41f5a798de461294ae2fe1c6d
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 15 06:57:59 2009 -0400
+
+    Pre-release version bump
+
+commit e483af47db769fcba559dda72699bc80d154b575
+Author: Adam Jackson <ajax@nwnk.net>
+Date:   Fri May 15 06:26:48 2009 -0400
+
+    Fix overflows during trap rasterization. [Bug 16560].
+    
+    Avoid overflows when rasterizing traps that fall entirely in the space
+    between the final sample row and the end of the coordinate system, or
+    in the space between the beginning of the coordinate system and the
+    first sample row. Such traps don't contain any sample points, so the
+    top and bottom of the edges can safely be moved to the beginning/end.
+
+commit 7a9bfa146154e555a9e2e8a807bb7df2b30f860c
+Merge: 81b94d7 e17fc72
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 13:10:01 2009 -0400
+
+    Merge branch 'master' of git+ssh://sandmann@git.freedesktop.org/git/pixman
+
+commit 81b94d725834d03f7856ae0d505a7340f1135326
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 13:09:45 2009 -0400
+
+    Add test that shows difference in clipping with transformed vs untransformed
+
+commit e17fc72e958e1ddee0b24e8a90ae9609e1e44391
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 06:17:55 2009 -0400
+
+    Remove unused CombineMask64 type
+
+commit a1bc6bf15995fae8be2de61f859fcc73d80f7b64
+Merge: ffce146 e74a284
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed May 13 05:58:36 2009 -0400
+
+    Merge branch 'master' into refactor
+
+commit e74a2847ddcb3b4c1675efaaa923e78556277dff
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 9 08:34:12 2009 -0400
+
+    Fix divide-by-zero crash
+
+commit a95c39c7d533ed7d6c8c7708604c5844cdc22dfe
+Author: Jonathan Morton <jmorton@sd070.hel.movial.fi>
+Date:   Thu May 7 11:54:15 2009 +0300
+
+    Test USE_GCC_INLINE_ASM instead of USE_NEON_INLINE_ASM.
+    
+    The former is now Autoconf enabled, and does what it says on the tin.
+
+commit 62af131a5a7222c58ed9aac38b7dddb75c0e87f7
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue May 5 15:25:43 2009 +0300
+
+    Fixed rendering bug for source alpha == 0 in OVER fastpath functions
+    
+    Handling of the case when source alpha is zero was keeping destination
+    pixel unmodified. But this is different from how generic path behaves.
+    For example fbOver(0x00200483, 0x9CAC7E9F) == 0x9CCC82FF and the
+    destination pixel changes from 0x9CAC7E9F to 0x9CCC82FF in spite
+    of having zero alpha.
+    
+    Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>
+
+commit a589afa3a7f3430a5b37bb9efb6574fe6ac8d9af
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Wed May 6 13:30:29 2009 -0400
+
+    Enable NEON assembly when we can build it
+    
+    This adds detection for ARM NEON build support as well as gnu assembler
+    syntax detection from Jonathan Morton.
+
+commit ffce1461502c9cc4dbf613b64eddba4c4de6a4d4
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat May 2 19:55:10 2009 -0400
+
+    Remove unused combineMaskU functions
+
+commit 38e5929400ea8d8bdf0830006f761a5498f558a5
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 14:18:32 2009 -0400
+
+    Optimize source reading in combine4
+
+commit 3d6720a22777523c428914c2f84439d240778484
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 03:04:36 2009 -0400
+
+    Enable mmxCombineSaturateU
+
+commit 742d444f96bf160d2b7707cc894dd9b516f3179c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 03:04:29 2009 -0400
+
+    Enable mmxCombineAddU
+
+commit fd31818cfba0a750672bf50fbe550fa29ec77d99
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 03:04:23 2009 -0400
+
+    Enable mmxCombineXorU
+
+commit b7fe2f3378c6fb0828e863cb1a2df9191fb3e25e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 03:04:16 2009 -0400
+
+    Enable mmxCombineAtopReverseU
+
+commit 55a703f88c60acef5f1053d2d409c6e7048a714c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 03:04:10 2009 -0400
+
+    Enable mmxCombineAtopU
+
+commit f747b4184865c5e8b1c36c7116b6a47560f26e8d
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 03:04:02 2009 -0400
+
+    Enable mmxCombineOutReverseU
+
+commit 3c6fd2699dc2741b6ad121eb441a32b52b169a82
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 03:03:50 2009 -0400
+
+    Enable mmxCombineOutU
+
+commit 9d13da03b7d4525aa8ffbb9b787dee8964323810
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 03:03:39 2009 -0400
+
+    Enable mmxCombineInReverseU
+
+commit 2262f0084722d8548071730f8fcbe318560e9fbf
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 03:03:32 2009 -0400
+
+    Enable mmxCombineInU
+
+commit 5e5c78a6cc962f154b749d954c35ac663f8ac483
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 03:03:24 2009 -0400
+
+    Enable mmxCombineOverReverseU
+
+commit 81342af3bda044c059a13a37a9ede542212dc5a2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 03:03:16 2009 -0400
+
+    Enable mmxCombineOverU
+
+commit 3d684556dbdb087fa6d0631f06ccde38bb02dea5
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 03:02:58 2009 -0400
+
+    Implement the mmx combiners with masks (disabled)
+
+commit cdb6aa49bec3595a00eb203c258111c713de9bbc
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 01:01:09 2009 -0400
+
+    Enable sse2CombineSaturateU
+
+commit 29528b9523e779ff59029f11f79f1e22cbeaf4cd
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 01:00:52 2009 -0400
+
+    Enable sse2CombineAddU
+
+commit 374ad0c363baf93e724409f575e1bbd7cfd8914a
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 01:00:47 2009 -0400
+
+    Enable sse2CombineAtopXorU
+
+commit c1bdbff80ac724cab8213d41f91c525e10ca9ff1
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 01:00:39 2009 -0400
+
+    Enable sse2CombineAtopReverseU
+
+commit 74d79f271c45807bf23b395e7050130f7da1139c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 01:00:33 2009 -0400
+
+    Enable sse2CombineAtopU
+
+commit c3d92fe51869c4e7b4ed83fb3bed5d0e7e651782
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 01:00:26 2009 -0400
+
+    Enable sse2CombineOutReverseU
+
+commit 53809bde5265378c400792bdb0b2639a0cde6c08
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 01:00:20 2009 -0400
+
+    Enable sse2CombineOutU
+
+commit 9293a51323e7e2b4aedb75c3fa55475aa4a269e7
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 01:00:12 2009 -0400
+
+    Enable sse2CombineInReverseU
+
+commit d45c0db9603a84989d59e5e1519b424ab756f221
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 01:00:07 2009 -0400
+
+    Enable sse2CombineInU
+
+commit 92c1199bf7e9379beca52fa880a0a436ffdda7e2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 00:59:59 2009 -0400
+
+    Enable sse2CombineOverReverseU
+
+commit d1879bc048be083198a35bb037273171bc07a211
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 00:59:51 2009 -0400
+
+    Enable sse2CombineOverU
+
+commit 22fda2d1aba7368a7545d1659b737e695a6c5b26
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 00:59:29 2009 -0400
+
+    Implement the sse2 combiners with masks (disabled)
+
+commit 1ddd91bfee87c13ce18d82d9ab9b2fb2de7cca22
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri May 1 00:58:38 2009 -0400
+
+    Use memcpy() in fbCombineSrcU when there is no mask
+
+commit 24012542295f80455c8df01262099b98d2b2de37
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Apr 30 17:59:09 2009 -0400
+
+    Have the generic code go through the component alpha path always
+
+commit 8b2e08d494c6da1512f44d0754b0f52a184cc6f3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Apr 30 17:53:48 2009 -0400
+
+    Get rid of separate combineU and combineC types
+
+commit 6d6e33d33818b56982f15da1943da499db220bc1
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Apr 30 17:14:04 2009 -0400
+
+    Change type of combineU to take a mask
+
+commit fe571035f8889fd12892f2257b64536feced3f4e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Apr 30 19:50:24 2009 -0400
+
+    Make combineC function type take const args
+    
+    Fix type of mmx component alpha combiners
+    
+    Fix type of sse2 component alpha combiners
+    
+    Fix type of vmx component alpha combiners
+
+commit f9a9ce8940c5644f25721abe6af6c72c4eabcfe7
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Apr 30 02:54:32 2009 -0400
+
+    Remove accessor version of pixman-compose.c
+
+commit 0236393b031798a36144820a6254b646f9279580
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Apr 30 02:51:09 2009 -0400
+
+    Remove unused xoff/yoff variables
+
+commit d0a6c1e9a5447e982dc4d544146c1b5234e490cf
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Apr 30 02:50:18 2009 -0400
+
+    Move store logic into pixman-image.c
+
+commit 363be5285950d20cc77cf4a7eb50d5f1f5fea0f7
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Apr 30 02:42:47 2009 -0400
+
+    Move fetch logic to pixman-image.c
+
+commit bf879f1b37cfe5ee2ec921d26bf9d9126ca59b9c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Apr 30 02:25:48 2009 -0400
+
+    Simplify logic to compute store function
+
+commit 20cedd756f54bc735fe25ab29aafd3cdfeddda30
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Apr 30 02:14:13 2009 -0400
+
+    Formatting
+
+commit 4c74f519ca3788fe357caf54e22e6cab609b681e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Apr 30 02:08:44 2009 -0400
+
+    Factor out duplicated fetcher finding code
+
+commit eb5d30a9d3bfb1bddaf9e60e2092353fe6b1dd48
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Apr 30 02:03:03 2009 -0400
+
+    Get rid of SOURCE_IMAGE_CLASS_NEITHER again
+
+commit 87922006e506a252c81d42f0c1bacb59d1c67e60
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Apr 30 01:49:13 2009 -0400
+
+    More refactoring notes
+
+commit 8c646172743568584f7cefd3177b410fd3b22b2d
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Apr 29 23:13:14 2009 -0400
+
+    Add notes on how Render actually works
+
+commit 57a3d09b01834103e61785c6269d152bdfd91a4f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Apr 29 20:15:20 2009 -0400
+
+    Move calls of the classification out of the if statements.
+
+commit cb73922ab9ab7d627f059601a03714cfff07d25b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Apr 29 19:55:19 2009 -0400
+
+    Move SourcePictClassify into pixman-image.c
+    
+    In preparation for making pixman_image_t more of a real object, move
+    SourcePictClassify into pixman-image.c and expose it through a
+    function pointer. Later, this function will be split into smaller
+    functions depending on the exact type of the image.
+
+commit 3d73ce6813743811ff4413df554b438d3790c361
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Apr 29 01:44:56 2009 -0400
+
+    More refactoring notes
+
+commit 4d255141f78451ec5edb27ed29437651d6f64491
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Apr 28 23:02:49 2009 -0400
+
+    Add refactoring notes
+
+commit f98c800fba076197c56df7a990a30a98a115b9e0
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon May 4 18:54:27 2009 -0400
+
+    Mention utils.h in test/Makefile.am so that make dist will pick it up
+    
+    Pointed out by Julien Cristau.
+
+commit e047f605e2e9ef4f23e63c38259c5ceb720060dc
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon May 4 18:34:54 2009 -0400
+
+    Move all the GTK+ test code into its own file, utils.c
+
+commit c882260d4b481283c3d59385bfa31bfeffc0a58f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Apr 28 22:49:31 2009 -0400
+
+    Include string.h and fix warning in trap-test.c
+
+commit c74becfdb939af56d19b1d8cef94f3cfc11f238c
+Author: Alan Coopersmith <alan.coopersmith@sun.com>
+Date:   Tue Apr 28 08:44:47 2009 -0700
+
+    Add solaris-hwcap.mapfile to EXTRA_DIST
+    
+    Signed-off-by: Alan Coopersmith <alan.coopersmith@sun.com>
+
+commit dbb72c02fda0d59d0da4ba100c7120ebc244835f
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Mon Apr 20 16:13:44 2009 +0300
+
+    Updated scaling test to support rgb565, source clipping, etc.
+    
+    Now test provides better coverage for various image scaling
+    cases. Unused byte for x8r8g8b8 format is ignored. Running
+    the test program without any command line options provides
+    a PASS/FAIL verdict based on the precalculated crc32 value
+    for using pixman with all the fastpath functions disabled,
+    which should simplify testing for correcteness.
+
+commit 880afeecc0d8bd610733292fd1cb692bba98dd5b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Apr 24 21:35:46 2009 -0400
+
+    Add trap-crasher.c test program
+    
+    Based off of Pavel Kankovsky's test case in bug 16560.
+
+commit eac663a7c8d254842224f1aed992c91691a425b8
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Tue Apr 21 17:32:38 2009 -0400
+
+    Enable fbCompositeSolidMask_nx8x8888neon
+
+commit 99fd917adf9bf649fb94b32feae466250433a6ff
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Tue Apr 21 17:32:38 2009 -0400
+
+    Enable fbCompositeSolidMask_nx8x0565neon
+
+commit c0436ed25598bff4b6c426faf528e56726632ed2
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Tue Apr 21 17:32:38 2009 -0400
+
+    Enable fbCompositeSrc_8888x8x8888neon
+
+commit f2b3dfcce4add4027163289c0a282430418a4857
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Tue Apr 21 17:32:38 2009 -0400
+
+    Enable fbCompositeSrc_8888x8888neon
+
+commit 6da2f2b360225b6f1f20c00734db66499b94bfa0
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Tue Apr 21 17:32:37 2009 -0400
+
+    Enable fbCompositeSrc_x888x0565neon
+
+commit b8625fa2dd0811092f6c96b31596277bb0c61021
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Tue Apr 21 17:32:37 2009 -0400
+
+    Enable fbCompositeSrcAdd_8000x8000neon
+
+commit 452ed13867d4a7b1509030f7f79cbb17b51e7b36
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Tue Apr 21 17:32:37 2009 -0400
+
+    Enable fbCompositeSrcAdd_8888x8x8neon
+
+commit f2af00bf02dcf3e7e27ac3e035d41c387fc9400b
+Author: Ian Rickards <Ian.Rickards@arm.com>
+Date:   Tue Apr 21 17:32:36 2009 -0400
+
+    Add support for ARM NEON fast paths
+    
+    Currently disabled
+
+commit 9fdca26d3087da5a620d720f5a56ccbfdf55587e
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Tue Apr 21 17:32:36 2009 -0400
+
+    Add support for doing ARM simd detection on Windows
+
+commit 2423118e239e9c85dd68403bf8b97b30965df38e
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Wed Apr 15 10:38:37 2009 -0400
+
+    Add support for doing ARM simd detection on Windows
+
+commit 3d9716f44ea799e003c19783f087239fe89c88dc
+Author: Søren Sandmann Pedersen <ssp@dhcp-100-2-40.bos.redhat.com>
+Date:   Thu Apr 9 20:38:53 2009 -0400
+
+    Post-release version bump
+
+commit 0a63858b07e1d7bccf54a69881e50246a530785d
+Author: Søren Sandmann Pedersen <ssp@dhcp-100-2-40.bos.redhat.com>
+Date:   Thu Apr 9 16:38:05 2009 -0400
+
+    Pre-release version bump
+
+commit 86ec5419b238cba841bed303679fdaf8b4299a6f
+Author: Søren Sandmann Pedersen <ssp@dhcp-100-2-40.bos.redhat.com>
+Date:   Mon Apr 6 15:25:03 2009 -0400
+
+    Fix search and replace issue
+
+commit 179750544d911adf5b01749e33b3ef44a62b0b0e
+Author: Alan Coopersmith <alan.coopersmith@sun.com>
+Date:   Fri Apr 3 13:02:37 2009 -0700
+
+    Replace custom type ullong with standard uint64_t in pixman-mmx.c
+    
+    Signed-off-by: Alan Coopersmith <alan.coopersmith@sun.com>
+
+commit fd7449c4855e77d31770f9042ba43e186c541ce5
+Author: Alan Coopersmith <alan.coopersmith@sun.com>
+Date:   Fri Apr 3 12:40:10 2009 -0700
+
+    Fix MMX & SSE intrinsics to work with Sun compilers & Solaris
+    
+    Signed-off-by: Alan Coopersmith <alan.coopersmith@sun.com>
+
+commit 47dabe1d025c420a07ac940ab46e5d00c752d2d8
+Author: Søren Sandmann Pedersen <ssp@dhcp-100-2-40.bos.redhat.com>
+Date:   Sat Apr 4 06:04:42 2009 -0400
+
+    Fix pixbuf_from_argb32() to take premultiplied alpha into account
+
+commit fb8e9b16d5760aa82c1ca4c180faed964a4e7ff5
+Author: Søren Sandmann Pedersen <ssp@dhcp-100-2-40.bos.redhat.com>
+Date:   Sat Apr 4 05:57:20 2009 -0400
+
+    Add a new trap-test test program.
+    
+    Also some tweaks to the build system in the test directory to make it
+    build non-gtk+-using application when use of gtk+ is disabled.
+
+commit cdcbd9be3a2318bf650b8f56fea51acb5991b075
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Wed Apr 1 18:26:24 2009 -0400
+
+    Set srcRepeat = False when using fbCompositeSrcScaleNearest
+    
+    NORMAL repeat was broken (the optimized function can handle repeat operation
+    itself and can be screwed up if 'pixman_walk_composite_region' tries to help it
+    by splitting the work into handling multiple separate areas).
+    
+    Splitting work into handling different areas does not work right for the
+    transform case (and it is never used for generic path).  The point is that this
+    splitting only has full pixel precision at the moment, while correct blitting
+    needs to preserve some fractional part in calculations when moving from one
+    "tile" to another.
+
+commit a9adae3dc38764fe055b66e38175be5220fb3f9a
+Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date:   Tue Mar 31 20:58:56 2009 +0300
+
+    Image scaling regression test script
+    
+    This test script can help in finding regressions in image scaling
+    fastpath implementations. It uses test program compiled with
+    and without fastpath code and can compare results of execution
+    for different pseudorandom compositing operations involving scaling.
+    
+    Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>
+
+commit 29e8556814ddddf269da90989e0de6d406d0afe6
+Author: Mark Kettenis <kettenis@openbsd.org>
+Date:   Tue Mar 24 14:28:38 2009 -0400
+
+    Add support for BGRA and BGRx formats.
+
+commit e92417b7805315ff38a3d5758bd7075418d3ae6d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Tue Mar 17 13:28:31 2009 +0000
+
+    Check for failure when intersecting regions.
+    
+    Need to check and report the failure of intersecting the rectangle with
+    the clip region during pixman_image_fill_rectangles().
+
+commit 9ba3236354deb472edf109b6842a5b8749bd746c
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Tue Mar 17 08:39:48 2009 +0000
+
+    Check for allocation errors during pixman_op()
+    
+    Propagate the error returns from pixman_rect_alloc().
+
+commit 2664c2d57552176052d753def2d307f63c2c9ff4
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Tue Feb 10 19:47:19 2009 -0500
+
+    Remove stale comment
+
+commit ced5a4f356f5f2322a8aeb2876348707cf56dbcd
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Feb 6 19:41:24 2009 -0500
+
+    Add license and copyright holders to COPYING
+
+commit 2d9c7cd84b276ebe2ff72d03c34a2d7f4f98b9f9
+Author: Thomas Jaeger <ThJaeger@gmail.com>
+Date:   Tue Jan 20 18:40:46 2009 -0500
+
+    Implement PIXMAN_REPEAT_REFLECT for images
+
+commit 3d0911dee7f1d00b1e61fb183ab337c693b49adc
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Feb 6 18:03:31 2009 -0500
+
+    Reinstate SrcScaledNearest optimization
+
+commit 6815e754d31d5a431028c8ca62911c07b753edc8
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Feb 6 18:01:57 2009 -0500
+
+    Bump version number post-release
+
+commit 6df6a43dc7119a510cf9db2e62fcc970a539e5a3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Feb 6 17:31:32 2009 -0500
+
+    Bump version number pre release
+
+commit 6e6c7ac5e0bce2e1893675eb45a8d98876085794
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Feb 6 17:30:24 2009 -0500
+
+    Comment out SrcScaledNearest optimization that hasn't been tested much
+
+commit e651118b67111d36193f55a752d13e66df5ca953
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Feb 6 17:29:04 2009 -0500
+
+    Fix release targets to remove all hashfiles before generating tar balls
+
+commit 072d848f592530973f1f0a0066a320ec5965625c
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Fri Jan 9 12:48:22 2009 -0500
+
+    Add pixman-matrix.c to Makefile.win32
+
+commit c55db2905706ae78364bfb63dcfa62c00cc486c7
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Fri Jan 9 12:48:20 2009 -0500
+
+    Conditionally include config.h in pixman-matrix.c to fix win32 build
+
+commit 8f98ffadf58de1e28294b3ab2c09f380ccc535e5
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Sat Dec 20 17:18:51 2008 +0000
+
+    Fix pixman-1-uninstalled.pc to point to the libtool library
+    
+    Otherwise we fail to link when compiling cairo against the uninstalled
+    library.
+
+commit 9d726712c22d8555d00b9f1ebacd5425dc9a5b61
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Fri Nov 21 01:20:38 2008 +0000
+
+    Allocate initial array of RegionInfo on the stack.
+    
+    The region validate() code is frequently called by cairo as it is used to
+    extract regions from the trapezoids for fast-paths through the drawing
+    code and also for fast-path clipping and the RegionInfo allocation (as
+    well as the pixman_rect_alloc during the final union) appears as a hot
+    spot on application memory profiles.
+
+commit 08530f5bf23386355a19b83db88173302c7a5300
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Wed Dec 17 10:35:03 2008 -0800
+
+    Don't treat PIXMAN_TYPE_YUY2 and PIXMAN_TYPE_YV12 as PIXMAN_FORMAT_COLOR.
+    
+    Various pieces of code expect PIXMAN_FORMAT_COLOR (and its less cool older
+    brother, PICT_FORMAT_COLOR) formats to have ARGB bits, and the YUV formats do
+    not.
+
+commit 4546234c18f5bb5e2d193d2fa8ff5c3ca78bc716
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Fri Dec 5 12:01:03 2008 -0500
+
+    [arm-simd] Add a comment about aligning source and destination pointers.
+
+commit 985829f26b15aaa3e336127412c771027577313f
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Fri Dec 5 11:45:03 2008 -0500
+
+    Check alignment of 'src' pointer in optimized ARM routines
+    
+    fbCompositeSrcAdd_8000x8000arm() tries to align 'dst' already but must check
+    'src' too.  Otherwise, the next 4-byte copy loop might access an odd 'src' address
+    causing an alignment trap.
+    
+    Patch from Enrico Scholz
+
+commit 4238047c228ca885a24bd341aa48a3ad54590837
+Merge: bfa76d4 d625ca5
+Author: Keith Packard <keithp@keithp.com>
+Date:   Tue Nov 25 22:04:29 2008 -0800
+
+    Merge commit 'origin/master'
+
+commit bfa76d47ac85c88fbb9d7226f09c6c6654b10342
+Author: Keith Packard <keithp@keithp.com>
+Date:   Tue Nov 25 22:03:55 2008 -0800
+
+    Bump to 0.13.3 after 0.13.2 release
+
+commit 0191d1a41ea273e5b1920ed83dfa33820870ebae
+Author: Keith Packard <keithp@keithp.com>
+Date:   Tue Nov 25 21:37:54 2008 -0800
+
+    Bump version to 0.13.2 for release
+
+commit 6002963ea32d05592da05a6eeafd5d8ee9d9d496
+Author: Keith Packard <keithp@keithp.com>
+Date:   Mon Nov 24 11:49:32 2008 -0800
+
+    Move matrix operations from X server to pixman
+    
+    Signed-off-by: Keith Packard <keithp@keithp.com>
+
+commit d625ca5f291c01b3672648e5897f30a17326367f
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Mon Nov 24 15:11:24 2008 -0500
+
+    Optimize rectilinear nearest-neighbour scaling
+    
+    Add a special case for a source transformation that is only a scale and
+    preserves rectangular pixels and doesn't rotate the image. Currently, only
+    SOURCE is special cased, however I plan to do more work in this area as needed.
+    The biggest advantage the specialization currently has is writing directly to
+    the destination surface instead of a temporary scanline buffer. However, it is
+    still pretty unoptimized but I want to keep things simple for now.
+
+commit 0c3dd54f6bf02156e4b94a2b5bfadef148715643
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Fri Nov 21 21:35:30 2008 -0500
+
+    Unify scanline buffer types
+    
+    Change the type of the stack based scanline buffer to uint8_t to match the rest
+    of the variables. Also premultiply the scanline buffer size by sizeof(uint32_t)
+    because the bpp can be either sizeof(uint32_t) or sizeof(uint64_t).
+
+commit e201504da81cd9ceb7d20d12b3a923289f93b108
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Thu Nov 20 10:41:40 2008 -0500
+
+    [arm-simd] Fix typo found by 王新拓
+    
+    'and r7, %[upper_component_mask]' appears to by a short hand for
+    'and r7, %[upper_component_mask], %[upper_component_mask]'. Use
+    the explicit form to avoid any confusion.
+
+commit cd2a79ab81045aa7e35bc901081e57dea6ac4845
+Author: David Woodhouse <dwmw2@infradead.org>
+Date:   Tue Nov 18 16:01:11 2008 -0500
+
+    Less fragile Linux altivec detection
+    
+    Instead of using really fragile SIGILL trapping, use a more reliable
+    detection method by checking what the CPU really supports.
+    
+    https://bugzilla.redhat.com/show_bug.cgi?id=472000
+    https://bugzilla.redhat.com/show_bug.cgi?id=451831
+
+commit b1b0507c24d7a3afb1ee09fc23783fa22cd0e56e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Nov 10 23:18:09 2008 -0500
+
+    Make comments about PIXMAN_REFLECT more useful
+
+commit 056c6d97db753a928ac2794ec215c86cceffe901
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Nov 5 23:58:56 2008 -0500
+
+    Various formatting fixes, and a simplification of the adjusting code
+
+commit 607562b2a6cc8536350d0a9bcb6fe99224ad4f1f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Nov 5 17:51:55 2008 -0500
+
+    Inline the fetchers
+
+commit 7a1717e605e502b52ebca999991d2e07791e0cd1
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Nov 5 17:23:45 2008 -0500
+
+    Use fetch_nearest() instead of having duplicated code
+
+commit 49647e705438e0827d4a0e955dfaaf01cfae7524
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Nov 5 17:12:22 2008 -0500
+
+    Do the fetch_bilinear inline rather than in separate functions
+
+commit c8b314c43bd70a1c375aef3cacfe717ca9dbc85b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Nov 5 16:59:56 2008 -0500
+
+    Make use of fetch_bilinear() in the various bilinear implementations
+
+commit bad1ee39d3cc27ec07303f6484515a886430cda6
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Nov 5 16:38:33 2008 -0500
+
+    Add fetch_bilinear function
+
+commit 95f2af9584f8f4327ddf6d6948dee17ab48ad8b3
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Mon Nov 3 13:09:02 2008 -0500
+
+    Add missing pixman-arm-simd.[ch] files.
+    
+    Pointed out by Chris Ball and Adrian Bunk.
+
+commit 1d5bb7a3f17fb88cdabee8a27b79fb9fb129e189
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Oct 31 15:08:03 2008 -0400
+
+    Inlucde inttypes.h on digital Tru64
+    
+    Bug 18007, reported by Joonas Pihlaja.
+
+commit f9db3ec7b94db45f388b210d7bed639048f1aa23
+Author: Adrian Bunk <adrian.bunk@movial.fi>
+Date:   Fri Oct 31 14:59:30 2008 -0400
+
+    Rename the current ARM code to ARM SIMD
+    
+    This code is only for CPUs supporting the SIMD instructions, not for all ARM
+    CPUs.
+    
+    I stumbled above the recent commit with the ARM SIMD code while preparing a
+    patch that models the patch from #13445 after the MMX and SSE2 cases:
+    
+    The ARM SIMD option currently uses --disable-arm, although this code is only
+    for CPUs >= ARMv6. That's as if one would call the option to disable the SSE2
+    code --disable-x86.
+    
+    This patch therefore renames the configure option and the function and file
+    names to arm-simd/arm_simd.
+
+commit b9e2dd783e34969aa7c41877b0aa49920788637b
+Author: Alan Coopersmith <alan.coopersmith@sun.com>
+Date:   Wed Oct 29 18:13:59 2008 -0700
+
+    Define force_inline for compilers other than gcc & MS Visual C
+
+commit 3a775610f22d09fc030804c6dd02208d36920b54
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Wed Oct 29 17:14:41 2008 -0700
+
+    Cleanup inline definitions
+    
+    Replace all inline definitions with a common one in pixman-private.h. Also, add
+    'force_inline' and replace all existing uses of 'inline' as a forced inline
+    with 'force_inline'.
+
+commit d68ebb7701e61d7f19c87d2d3686eb30e85e2ede
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Oct 15 18:46:38 2008 -0400
+
+    Remove unused AddMul_256 macro
+
+commit 8f5cb6916ddea072e453681d80a879a64bd22f8c
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Mon Oct 13 00:05:40 2008 +0200
+
+    unswitch red and blue
+    
+    The previous code assumed a color format of ABGR when naming the
+    variables. The true color format is ARGB. This did not cause any bugs
+    because no functions rely on the order of colors so far. This patch
+    renames the variables just to avoid confusion.
+
+commit f5d4e01c399d6d23fd7e4cfaa26e0b07e2279690
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Wed Oct 8 21:21:58 2008 +0200
+
+    update .gitignore
+
+commit 69dadf231283fadcb117b4d9e799e8633a0e4dab
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Wed Oct 8 21:18:12 2008 +0200
+
+    force alignment of arg pointer in potential SSE users
+    
+    Fix for bug 17824
+
+commit 1c5de7b05831b3d66821707276b71974a232f5c7
+Author: David Müller <dave.mueller@gmx.ch>
+Date:   Tue Sep 23 07:45:51 2008 -0400
+
+    Move _mm_empty() to correct place
+    
+    The "fbComposeSetupSSE2()" function is guarding most of its code
+    depending on the capabilities of the CPU, but unfortunately the call
+    to "_mm_empty()" is not part of this code path but executed
+    unconditionally.  This results in a "illegal instruction" crash on
+    non-MMX / non-SSE capable CPUs caused by the the "emms" instruction
+    (embedded in "_mm_empty()").
+    
+    Fix bug 17729.
+
+commit aadcc7f011004794cf88c126641ef8258183878f
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Sun Sep 21 11:01:07 2008 -0400
+
+    Update TODO
+
+commit 9cb60e142bad01fd54cb7e6f3fa2504ddc87a7da
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Sun Sep 21 11:00:33 2008 -0400
+
+    Make use of SSE2 blt/fill in more places
+
+commit d69d2705d16c813756acd8a685dc9a28a178423a
+Merge: b5a9002 412b0d5
+Author: Julien Cristau <jcristau@debian.org>
+Date:   Fri Sep 19 10:29:05 2008 +0200
+
+    Merge tag 'pixman-0.12.0'
+    
+    Conflicts:
+    
+    	configure.ac
+
+commit b5a9002d2e3f0d961bcfe2acee6e9a0f1c583d5d
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Thu Sep 18 19:28:31 2008 -0400
+
+    Inline fetch functions in pixman-transformed.c
+    
+    There was significant performance overhead associated with indirect
+    call to the fetch functions, so inline them.
+    
+    Also, the old code assumed that if the number of rectangles in the
+    source clip was 1, then the clip was identical to the image
+    boundaries. Fix that by running a full region_contains_point()
+    whenever the source clip is different from pict->common.full_region.
+    
+    Based on a patch from Antoine Azar.
+
+commit a57e7bf34a2f312285c8065b8b1328bd5650788a
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Thu Sep 18 11:44:36 2008 -0400
+
+    Fix inner branch code.
+    
+    The entire source must be 0 not just the alpha component.
+    Fix some comments too.
+
+commit 54ee41a6603441bf09bbc95c2fec3d0a41c82d0f
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Wed Sep 17 14:38:47 2008 -0400
+
+    Post release version bump
+
+commit 24de3b74f4db3db569e1edface5e5804b58c02b5
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Wed Sep 17 14:03:23 2008 -0400
+
+    Pre-release version bump
+
+commit 7180230d4d87c55dfef1e17a0cc3b125d45aa3a0
+Author: Vladimir Vukicevic <vladimir@slide.(none)>
+Date:   Wed Sep 17 16:01:31 2008 -0400
+
+    Add SRC x888x0565 C fast path
+
+commit d0b181f347ef4720d130beee3f03196afbd28aba
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Wed Sep 17 15:53:20 2008 -0400
+
+    Add support for ARMv6 SIMD fastpaths.
+
+commit 412b0d5cbc2c0a5200649cbb0b5e26f8b874437d
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Wed Sep 17 14:03:23 2008 -0400
+
+    Pre-release version bump
+
+commit 3f5d6f90b753175a888f36a93d1e79fdc80d95de
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Wed Sep 17 09:50:57 2008 -0400
+
+    Don't include stdio.h
+
+commit eba402092082bf48072671e04e224589af872acd
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Sun Sep 14 14:58:00 2008 -0400
+
+    [sse2] Fix rounding bug in conversion from 565 to 8888
+    
+    When converting from 565 to 8888, replicate the topmost bits instead
+    of appending zeros.
+
+commit 6f00d98f87c019849c611d27e9593c5eecfef4c2
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Tue Sep 9 10:49:56 2008 -0400
+
+    Fix for bug 17477.
+    
+    over_2x128 was changing the alphaLo and alphaHi arguments, causing
+    stripes.
+
+commit bf76505cc6cc5e54c25eb145748e9e364fb367e9
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Sun Sep 7 00:40:09 2008 -0400
+
+    Update TODO
+
+commit da18a5675b3107c9bf99e228d85619d247fa19a6
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Sun Sep 7 00:13:10 2008 -0400
+
+    Extend clip-test to demonstrate a bug in source clipping
+
+commit 00f3d6ef22b5a062323208fd540a17ca65dca42b
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Sat Sep 6 23:49:25 2008 -0400
+
+    Fix bug in pixman_image_is_opaque()
+    
+    Non-repeating gradient images would be reported as opaque. Also add
+    new test program to test source clipping.
+
+commit d5b4fd7e11c2f2b2e8ab3cb95bef252ce142982e
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Sat Sep 6 06:17:32 2008 -0400
+
+    Update RELEASING
+
+commit 35fcdf352a29241f235f2bc7a692c20ad8baf240
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Sat Sep 6 06:15:31 2008 -0400
+
+    Bump release
+
+commit 5e7388540f2cd201331cb3d1f616e3c300dbc45f
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Sat Sep 6 05:14:18 2008 -0400
+
+    Check for __sun || __sun in pixman.h. Update TODO
+    
+    Reported by Bernd Nies.
+
+commit f369d612b3d65529e4b10d8a0b1e015407357d9b
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Sat Sep 6 04:33:16 2008 -0400
+
+    Use error instead of #error in a couple of other places
+
+commit f921c8c57ffdd6d0afd3d41d50e3565084ebd49c
+Author: Peter O'Gorman <pogma@thewrittenword.com>
+Date:   Fri Aug 15 15:00:24 2008 -0500
+
+    Minor portability fixes
+    
+    Use AC_C_INLINE to figure out `inline'.
+    IRIX compiler does not exit with a non-zero exit status when it sees #error
+
+commit e2cbe1a0a4db750ab05d804901f155adb312746b
+Author: Frédéric Plourde <frederic.plourde@polymtl.ca>
+Date:   Thu Sep 4 16:30:21 2008 -0400
+
+    Win32 build system fixes
+    
+    Signed-off-by: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+
+commit ed862f1b2f62ee27884b9b429c54162039f3cb10
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Thu Sep 4 16:21:08 2008 -0400
+
+    Make sure pixman-combine{32,64}.h are disted
+
+commit f9d3f372f907c997abe4c4a65cc4a3dbe0bb41e2
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Aug 24 00:40:16 2008 -0400
+
+    Rename pixman-sse.h pixman-sse2.h
+
+commit fdff58cfa2ed77d2ceb38f48711ac5c91513aab1
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Aug 23 23:59:49 2008 -0400
+
+    A few other renamings of SSE->SSE2
+
+commit 9bfa8aaf17b256d90832802dcd386c05b904b97e
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Aug 23 23:54:24 2008 -0400
+
+    Be consistent in naming SSE2 related things SSE2
+
+commit 00841cb314a3b737dc5f492e113f36c19ba336e1
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sat Aug 23 23:42:36 2008 -0400
+
+    Remove use of MMX extensions.
+    
+    They were never a big improvement anyway, and now that we have SSE2
+    support, they would only be useful on Pentium IIIs, and only if
+    explicitly enabled, which most distributions couldn't do anyway.
+
+commit 3cd6acfe74dabfbc18af49bed077c2a39b9a742d
+Author: Andre Tupinamba <andrelrt@gmail.com>
+Date:   Thu Aug 21 14:43:17 2008 -0700
+
+    [sse2] Change pixman-sse to pass __mm128i args as pointers, so as not to confuse MSVC
+
+commit 9b9f7b59e5ce17735157ca9b154e8bc545f5c96b
+Author: Ginn Chen <ginn.chen@sun.com>
+Date:   Thu Aug 21 14:21:01 2008 -0700
+
+    Use hidden attribute for private functions when compiling with Sun Studio
+    
+    https://bugs.freedesktop.org/show_bug.cgi?id=17183
+    
+    Signed-off-by: Alan Coopersmith <alan.coopersmith@sun.com>
+
+commit e78eee87319e0290025c9d87bffe379bec440587
+Author: Vladimir Vukicevic <vladimir@pobox.com>
+Date:   Tue Aug 19 11:57:53 2008 -0700
+
+    Add sys/inttypes.h include for AIX
+
+commit cb9d5750582ea93ef1902a5185164088cdaa0140
+Merge: daf1745 dfe1f63
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Thu Jul 31 13:32:59 2008 -0400
+
+    Merge branch 'master' of sandmann@git.freedesktop.org:/git/pixman
+
+commit daf17450607e533dc590b4673c88241862b6b138
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Thu Jul 31 13:32:38 2008 -0400
+
+    Bug 16921. MMX and SSE2 intrinsics not enabled when compiling with Intel's icc
+    
+    Remove GCC specific inline-growth flags, and use __force_inline__ in
+    pixman-sse2 instead. Based on patch by Matt Turner.
+
+commit dfe1f63f709fc711c15d1be317dc1404d72b3efd
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Fri Jul 25 10:14:29 2008 -0700
+
+    Add depth 30 formats to pixman_format_supported_source.
+    
+    Thanks to Julien Cristau for pointing out that these were missing.
+
+commit db3fb5eb605c4e1a6fcb93902389a22fc496151c
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Mon Jul 21 22:06:02 2008 -0400
+
+    Don't require GCC 4.2 on x86-64
+
+commit 53fa7133fdf7f5879f10847228f478983c480b79
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Mon Jul 21 21:55:55 2008 -0400
+
+    TODO
+
+commit 58ab45b85d1732da7c84a274acdca3bfcf1c36b1
+Author: André Tupinambá <andrelrt@gmail.com>
+Date:   Mon Jul 21 21:53:20 2008 -0400
+
+    Use CopyAreasse2, plus a compatibility fix
+
+commit 51576cda8ba830c0d7dbbeaebb79fbef276f7e2d
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jul 18 14:49:35 2008 -0400
+
+    Fix typo in sse2 configure logic
+
+commit 596218e488fc92c5a03d07892015a106dcd541b4
+Author: Frederic Plourde <frederic.plourde@polymtl.ca>
+Date:   Thu Jul 17 13:51:31 2008 -0700
+
+    Win32 Makefile fix
+
+commit f729457da5de4a96ccd220ce71c583cdec971483
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Thu Jul 17 14:13:34 2008 -0400
+
+    Update RELEASING and release targets in Makefile.am
+
+commit 72045eabd57cf793266424c39246b435b655b19f
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Thu Jul 17 14:07:23 2008 -0400
+
+    Post-release version-bump
+
+commit 2a14ecf1cd07e9a3f367ea66a7b10da929ccc06d
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Thu Jul 17 13:38:57 2008 -0400
+
+    Pre-release version bump
+
+commit 411c0e990f7a96d4e15f2cbbe07d3b50b6a20f95
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Thu Jul 17 13:37:56 2008 -0400
+
+    Make a couple of functions static
+
+commit 96f57c07f24cd6d86c0aad624bd1a16b85f08e04
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Thu Jul 17 13:32:45 2008 -0400
+
+    Speed up fbOver
+    
+    Use FbByteMulAdd to operate on two components at a time and force the function
+    to be inlined.
+
+commit d4855cd9d028c49c2a12f61c6227579fcb8af5b6
+Author: Jeff Muizelaar <jmuizelaar@mozilla.com>
+Date:   Thu Jul 17 13:32:08 2008 -0400
+
+    Comment FbByteMulAdd
+
+commit dcbe4b3f80fb8dc6a83efa7e66a891f8b102608d
+Author: Julien Cristau <jcristau@debian.org>
+Date:   Wed Jul 16 22:03:29 2008 +0200
+
+    fix --enable-{mmx,sse2,vmx}
+    
+    If --enable-foo is used, don't treat it as --disable-foo, and
+    error out if the appropriate compiler support isn't detected.
+
+commit e21f00db50c30e80c8969fbf63982aeb7434dc1d
+Author: Damien Carbery <damien.carbery@sun.com>
+Date:   Wed Jul 16 15:47:38 2008 -0400
+
+    Add pixman-1-uninstalled.pc file
+    
+    Signed-off-by: Søren Sandmann <sandmann@redhat.com>
+
+commit 1570746350baf82d36aeb7ea1702350700b795d7
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Wed Jul 16 15:19:17 2008 -0400
+
+    Use -no-undefined -- fixes bug 15927
+
+commit 99fead412e7602daf193035831b952e252dac0e1
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Wed Jul 16 15:07:30 2008 -0400
+
+    Use <inttypes.h> on most types of Unix.
+    
+    Fix for bug 15560.
+
+commit c5bb4ab8fcb784d479c71996f2ff081374741d43
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Wed Jul 16 14:56:48 2008 -0400
+
+    Update TODO
+
+commit 2070f10db927a97d7d38024e607093f5a6e00291
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Wed Jul 16 14:30:46 2008 -0400
+
+    Remove unused pixman-combine.c
+
+commit 9d54568d5f93701313fdbb49c1ef9dc79fe5850c
+Author: André Tupinambá <andrelrt@gmail.com>
+Date:   Wed Jul 16 14:29:29 2008 -0400
+
+    Fix SSE2 bug where x888 pixels were treated as 8888
+
+commit 7dfd023e94d9ab5940d7062c2e5cde11ab02f43e
+Author: Benjamin Otte <otte@gnome.org>
+Date:   Wed Jul 16 20:41:02 2008 +0200
+
+    fix Altivec detection
+    
+    the old code used to cause infinite looping on G3 machines.
+
+commit 0a92401678286eb438fe24979fd032efba540a0a
+Author: Behdad Esfahbod <behdad@behdad.org>
+Date:   Tue Jul 15 16:49:32 2008 -0700
+
+    Fix fbCompositeSrc_x888xnx8888mmx to properly ignore source alpha
+
+commit 687176023caf53f2fe234827d152f270048b1cd3
+Author: Julien Cristau <jcristau@debian.org>
+Date:   Tue Jul 15 16:59:21 2008 +0200
+
+    make --{en,dis}able-gtk work correctly
+    
+    --enable-gtk had the same effect as --disable-gtk.
+    Now we check for it by default, and error out if we can't
+    find it and it was explicitly enabled.
+
+commit 9d97716d2fd91d5611a5e7a7b29b38887b672e9f
+Author: André Tupinambá <andrelrt@gmail.com>
+Date:   Tue Jul 8 14:15:27 2008 -0400
+
+    Fix bug 16310 in the SSE2 fast path in function fbCompositeSolidMask_nx8888x0565Csse2
+
+commit 47b95be38ae0ebc61963c0b3b4dddb240a59c18a
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Tue Jul 8 14:10:56 2008 -0400
+
+    Update TODO
+
+commit eb53d111bd8651cd00c1b728b09e8be09b8482f7
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Wed Jun 25 17:55:28 2008 -0700
+
+    Add a lossy 32-bit generic pixel fetch function for wide surfaces.
+    
+    The transformed fetch path currently only works at 32-bit precision.  Until a
+    wide version of that function is added, we need to have a path to perform a
+    wide FetchPixel and then contract it down to a8r8g8b8.
+    
+    Also, use the right format when expanding the result in fbFetchTransformed64.
+    fbFetchTransformed returns a8r8g8b8 results.
+
+commit 3c43b869f3821495978c61b9195f0b0e9e9e1245
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Fri Jun 27 01:11:30 2008 -0400
+
+    Unexport or delete various functions
+    
+    - Delete pixman_region_append()
+    - Delete pixman_region_empty()
+    - Make pixman_region_validate() static and don't export it.
+    
+    Reported by Julien Cristau.
+
+commit 36b05f2cac7fa28e31131ca0d6b8b133ccd95ab8
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Fri Jun 27 01:05:30 2008 -0400
+
+    Only export the 16 bit version of pixman_region_set_static_pointers()
+
+commit ab4d45806aacf6208820af07b42852c9e4c89fff
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Wed Jun 25 09:46:12 2008 -0400
+
+    Update versioning documentation
+
+commit a3c12f4ccfda470574bfe8b7796c0b90237dabe0
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Wed Jun 25 09:36:13 2008 -0400
+
+    Update release instructions
+
+commit ca6edbee41e2e4bd18e3615a86bae441822b90e3
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Wed Jun 25 09:24:00 2008 -0400
+
+    Explicitly use my GPG key for signing.
+
+commit c48e3bc5ccf3a2f0aa4cadbaa4a0ae968a5d5a91
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Wed Jun 25 09:16:09 2008 -0400
+
+    Post-release version bump
+
+commit d8e5ff20f12c52a32dcf0543ab436eb7194b794c
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Wed Jun 25 07:58:21 2008 -0400
+
+    Pre-release version bump
+
+commit 4a9df4be7e384cf18e3d7a65d1e0023d2e2a280f
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 22 13:44:23 2008 -0400
+
+    TODO
+
+commit a766b62880108f278478888f5167a5fbf2819a97
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Sun Jun 22 13:42:21 2008 -0400
+
+    Add configure time options to disable mmx/sse2/vmx
+
+commit 02268903e4311709744c11e495f9b17f171ec5e9
+Author: David Sharp <whereami@gmail.com>
+Date:   Thu Jun 19 20:23:33 2008 -0700
+
+    pixman-sse.c: silence pointer-cast compiler warnings.
+    
+    Cast pointers to words of the same size, not 32-bits.
+    
+    Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>
+
+commit 534e65d54831018b47c169932a04224e5ba53cb8
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jun 13 16:16:59 2008 -0400
+
+    Update TODO
+
+commit eb2d95de98683a387153f010077ad9c3c3b1b01d
+Author: Luo Jinghua <sunmoon1997@gmail.com>
+Date:   Sat Jun 14 09:07:22 2008 +0800
+
+    Fix implicit declaration of function 'free'.
+    
+    Otherwise pointer will be truncated on 64bit arch and your programs will crash.
+
+commit 29d144712e558aaeb49f4384028dd669d76a410b
+Author: Maximilian Grothusmann <maxi@own-hero.net>
+Date:   Fri Jun 13 12:44:50 2008 -0700
+
+    Fix memory leak by freeing boxes{16,32}.
+    
+    After calling pixman_region_init_rects() or
+    pixman_region32_init_rects(), boxes{16,32} were not freed before
+    returning. Fixes bug 16312.
+
+commit 5d32519316b40b35113c6df9e15d955a16709ba2
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Fri Jun 13 09:52:53 2008 -0700
+
+    Use pixman_malloc_ab instead of plain malloc for the fbStore64_generic scratch buffer.
+
+commit b1c70c4e6435d7f15751111828c381feb1d139cf
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Jun 13 00:25:45 2008 -0400
+
+    Move PIXMAN_FORMAT_16BPC to pixman-private.h
+
+commit 7fa966df0ee781fa486715710f389b148c11d36e
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Fri Jun 6 23:02:51 2008 -0700
+
+    Decide based on the image formats whether we need wide compositing.
+    
+    Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>
+
+commit 7cb735c9c0fa55ae1f4d8d13da9f33e3da2ae8fe
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Fri Jun 6 19:40:25 2008 -0700
+
+    Take the source format into account in pixman_expand.
+    
+    Extract the original bits of the source image for each component and then
+    replicate up to 16 bits to fill the wide components.  Make sure to hard-code the
+    alpha value to 1 if the source format didn't have alpha.
+    
+    Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>
+
+commit c0d98e96605c6d03f4b02f337f2f5827165bb092
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Fri Jun 6 18:51:48 2008 -0700
+
+    Make expansion and contraction loops clearer.
+    
+    Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>
+
+commit fc0b28bf6af81428b7ac045614eea97fbf9c4a70
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Fri Jun 6 18:05:15 2008 -0700
+
+    Add wide source picture, external alpha, and transformed image routines.
+    
+    The wide external alpha path should work correctly with wide formats.  The wide
+    transformed fetch code for now just does a 32-bit fetch and then expands, which
+    will lose precision.  Source pictures, for now, are evaluated at depth 32 and
+    then are expanded to depth 64.  We could get higher precision by evaluating them
+    directly at depth 64, but this should be good enough for now.
+    
+    Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>
+
+commit 42a3a2d6fc8d3e521c6914ff8fb89f1fbc673e28
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Fri Jun 6 15:45:17 2008 -0700
+
+    Add wide fetch/store functions.
+    
+    Use the narrow ones and expand/contract where necessary.  Still need wide paths
+    for fancy pictures, but this gets the basic stuff working.
+    
+    Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>
+
+commit 7591d3f1d7e0884f9362018edf720724095cf380
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Fri Jun 6 15:30:52 2008 -0700
+
+    Add expand and contract functions to convert between ARGB8 and ARGB16.
+    
+    The expansion function isn't quite correct, but gives reasonable results.
+    
+    Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com>
+
+commit bae5db893f20eaf9c8ec56057617a8ce800cce48
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Mon Jun 9 13:06:50 2008 -0400
+
+    Post-release version bump
+
+commit 88e02d36792d3ad2615a775c9d21136c760087ef
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Mon Jun 9 12:32:56 2008 -0400
+
+    Pre-release version bump
+
+commit fe43a2c1638ee75ca6bbc437a59461cc5c2b6f46
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Mon Jun 9 12:28:05 2008 -0400
+
+    Add back prototypes for pixman_version() and pixman_version_string() to pixman.h
+
+commit fd380c7764c9cbc05f070b178a4b38b342503471
+Author: Julien Cristau <jcristau@debian.org>
+Date:   Mon Jun 9 12:01:37 2008 +0200
+
+    Add a couple more missing PIXMAN_EXPORTs
+
+commit f8f3b454a8dc4f91b4aff0e3c7ab28d65383ec8f
+Author: Julien Cristau <jcristau@debian.org>
+Date:   Mon Jun 9 11:53:23 2008 +0200
+
+    Add PIXMAN_EXPORT for pixman_region_equal
+    
+    This used to be exported, so removing the export broke ABI.
+
+commit 6e8f785ae7b629f5f81627fdb61f07d99df0b9ba
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Mon Jun 9 02:43:37 2008 -0400
+
+    Post-release version bump
+
+commit 26b486b9a9e599a5b45a73f8b3ec72f9a3560cfb
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Mon Jun 9 01:59:56 2008 -0400
+
+    Bump version number
+
+commit 95e749168d08cd35e151e612404c9318dcb0df9e
+Merge: f7f5da7 ae1f016
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Mon Jun 9 01:57:26 2008 -0400
+
+    Merge region32 branch; fix conflicts
+
+commit ae1f016d404d323158072499f83185d4e83eaedb
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Mon Jun 9 01:53:03 2008 -0400
+
+    Unbump version number
+
+commit f7f5da7fc446c3cadb3f82a2ddc6ae57f4c4b795
+Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk>
+Date:   Mon Jun 9 01:52:36 2008 -0400
+
+    Unbump version number; remove file added to EXTRA_DIST
+
+commit e84db894933b25ef9ab83a6c15a0521e15d61bae
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Mon Jun 9 01:29:14 2008 -0400
+
+    Bump version number and make it distcheck
+
+commit 7145d6dbdab79930ed923ae9551501127be28864
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Mon Jun 9 01:29:14 2008 -0400
+
+    Bump version number and make it distcheck
+
+commit a76826f19893a0575c0e16079fc1ec6d33171594
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Mon Jun 9 01:03:15 2008 -0400
+
+    Fix uses of short variables in pixman-region.c
+
+commit 85bad33380b7b78bdd6a7238c1439bcc42e28231
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Mon Jun 9 00:51:28 2008 -0400
+
+    Add infinite loop test
+
+commit 62e9b4d6cfcbc10046539b8e9643691bb02bea39
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Mon Jun 9 00:00:07 2008 -0400
+
+    Export pixman_compute_composite_region32() and use it in walk_region
+
+commit 57819ae3c219252db98df5eefa36499a6b77970c
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 23:53:35 2008 -0400
+
+    Fix bug in pixman_compute_composite_region()
+    
+    It was using the output region as the input. Add and use
+    pixman_region16_copy_from_region32().
+
+commit 664b891aac50642d6d2ab6c482f4765029ae9b91
+Merge: 0b4c6dc 2b91152
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 23:39:29 2008 -0400
+
+    Merge branch 'master' into region32
+
+commit 2b9115293e5fca70ca9ffe44ef74c80885dcedbb
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 23:39:03 2008 -0400
+
+    Call _mm_empty() at the end of fbComposeSetupSSE
+
+commit 0b4c6dcefd63a43aa9bb6556017e259589116522
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 22:48:59 2008 -0400
+
+    Add pixman_image_set_clip_region32
+
+commit 703f82cd02f5224632b4b7f7f3f072067fa4f76d
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 22:20:44 2008 -0400
+
+    Use 32 bit regions internally
+
+commit de150bf82fbe0e346fa38eae10a5bd43538bb3d9
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 22:07:46 2008 -0400
+
+    Add pixman_region32_copy_from_region16
+
+commit e30f7e2eb56b53667ee83e2cad942f171a9486a0
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 21:43:01 2008 -0400
+
+    Some formatting fixing
+
+commit cb7cc369f500a7828dc3c9935d8d82af47573df5
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 21:41:54 2008 -0400
+
+    Move all the PIXMAN_EXPORT into .c files
+
+commit 890f1a4280af4c7b8d8913ba592a9dd617482463
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 21:13:52 2008 -0400
+
+    Move PIXMAN_EXPORT into pixman-region.c
+
+commit 7a32c864e95d35e13d5473f5519639d91f62e20a
+Merge: 81369a4 1248418
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 20:14:52 2008 -0400
+
+    Merge branch 'master' into region32
+
+commit 1248418854b0e6e2f7fa8c2760a05b6604d3ded7
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 20:01:37 2008 -0400
+
+    Fix forgotten use of pixman-combine.h
+
+commit fb8f17fdf1eaec4ab8edba1486bfa83c0965d738
+Merge: 9267b0b 1063933
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 19:55:43 2008 -0400
+
+    Merge branch 'vmx'
+
+commit 1063933bacb8b5d06b42b7b06a116339ce7c1f0c
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 19:55:35 2008 -0400
+
+    Rename pixman-combine.h -> pixman-combin32.h
+
+commit 81369a4c4f09bc6cceef173947df4c9a78952c7a
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 19:32:15 2008 -0400
+
+    Fix up types in pixman32-region.c
+
+commit 4cf2b696266214482dad6e40184b74e1161e8ea7
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 19:31:31 2008 -0400
+
+    Add prototypes for 32 bit region methods
+
+commit 7d0c507b084e78217145ab82c35d579e5c8c26db
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 19:31:04 2008 -0400
+
+    Add pixman-region32.c
+
+commit c22df027d6b9c9957e73ea9e34f14d504deb9d91
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 19:24:28 2008 -0400
+
+    Add prefixes to global primitive regions
+
+commit cab6a175ec4a1b29b51060293aacaf2cf2f987a0
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 19:20:27 2008 -0400
+
+    Delete more unused stuff
+
+commit 9395f08d6e6316ee853a976b806e57821e13991e
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 19:16:55 2008 -0400
+
+    Delete more unused stuff
+
+commit b00fcb6d627370930daec9d1e9601af7ce28944c
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 19:15:41 2008 -0400
+
+    More data types; remove unused stuff
+
+commit 4bdcd3bdb1223d5e611af9721e2eceb7e867b138
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 19:12:29 2008 -0400
+
+    Add point_type_t
+
+commit abf6b6ca6ce8f54cb8ba9d34570d4cdf0537bbd2
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 19:11:24 2008 -0400
+
+    Replace pixman_box16_t with box_type_t
+
+commit 68ccaa06751e76b9d9c70a7c0b8e9b22cf7d6f62
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 19:07:30 2008 -0400
+
+    Add pixman-region16.c; compile that instead of pixman-region.c
+
+commit 149477457c9463e22350c15ccfca5ddf8a78e7af
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sun Jun 8 19:05:43 2008 -0400
+
+    macroize pixman-region.c
+
+commit 567b4c255050ee3cc2dd0c03fb091d1f981332eb
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Sat Jun 7 19:38:01 2008 +0200
+
+    Use sigaction instead of signal to restore the previous handler
+
+commit 7ef19261ee5bb4c78ca55533c67e1f267faed61e
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Sat Jun 7 19:28:10 2008 +0200
+
+    Use combine macros from the generated header
+
+commit 795fd8a4c0f9417fb92beaff8595064c573b7652
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Sat Jun 7 19:25:09 2008 +0200
+
+    Split combine.inc generated files in source and header
+
+commit 9267b0bb549044591f28124a86e169b2235339b6
+Author: Dimitrios Apostolou <jimis@gmx.net>
+Date:   Tue Jun 3 16:33:58 2008 -0400
+
+    Fix compilation on SGI
+    
+    Signed-off-by: Søren Sandmann <sandmann@redhat.com>
+
+commit 8ef3f49a9580fb148c2e5f567c0aafddd4b0f136
+Merge: 27b753c 9a6d3a1
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Sun Jun 1 16:37:52 2008 +0200
+
+    Fixup
+
+commit 9a6d3a1dcf89fc04f71a9dfed1aeeda1e3fb83bc
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Fri May 2 17:33:17 2008 -0700
+
+    Fix wide alpha fetch macro.
+    
+    Signed-off-by: Soren Sandmann Pedersen <sandmann@redhat.com>
+
+commit 86ed05b0f93505c136fb279fa4529596fc7c682a
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Wed Apr 23 17:30:14 2008 -0700
+
+    Use wide compositing functions when wide == 1.
+    
+    Signed-off-by: Soren Sandmann Pedersen <sandmann@redhat.com>
+
+commit 4e2d2546b79354a1accff8614d50eb8f75a15c98
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Tue Apr 15 13:58:27 2008 -0700
+
+    Add infrastructure for allocating wide scanline buffers. Not yet used.
+    
+    Signed-off-by: Soren Sandmann Pedersen <sandmann@redhat.com>
+
+commit 598334a15723dc3857d1e932c17365a1f8c5f094
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Tue Apr 15 13:13:46 2008 -0700
+
+    Split fetch/fetchPixel/store proc types into 32-bit and 64-bit versions.
+    
+    Signed-off-by: Soren Sandmann Pedersen <sandmann@redhat.com>
+
+commit 4a7e1676fd381bda53ece2f13204fbe568e07b0d
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Fri Apr 4 14:43:48 2008 -0700
+
+    Move the scanline buffer allocation logic into pixman_composite_rect_general.
+    
+    Pass the src, mask, and dest buffers into pixman_composite_rect_general_* as
+    void* pointers since those functions should not do pointer arithmetic.
+    
+    Signed-off-by: Soren Sandmann Pedersen <sandmann@redhat.com>
+
+commit 30bdbbed073750efa0a0c1e3f68925196e118953
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Thu May 15 01:07:59 2008 -0400
+
+    Update TODO
+
+commit d71bfc7777a48edfeb0aee0e3da97ac700a3dc90
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Thu May 15 01:00:28 2008 -0400
+
+    Work around GCC bug causing crashes in Mozilla with SSE2
+    
+    When using SSE2 intrinsics, gcc assumes that the stack is 16 byte
+    aligned. Unfortunately some code, such as Mozilla and Mono contain
+    code that aligns the stack to 4 bytes.
+    
+    The __force_align_arg_pointer__ makes gcc generate a prologue that
+    realigns the stack pointer to 16 bytes.  Fix bug 15693.
+
+commit 1f275b4bdc28b121f4afd3cfd2df187bebedf35d
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Wed May 7 22:27:56 2008 -0400
+
+    Add support for 8 bit fills in pixman_fill_mmx()
+
+commit a56df9149d2af0414ba8311b413dbc2f01c94427
+Author: Richard Hult <richard@imendio.com>
+Date:   Sat May 3 15:03:00 2008 -0400
+
+    Add --disable-gtk switch to configure
+    
+    Signed-off-by: Søren Sandmann <sandmann@redhat.com>
+
+commit 9aa1c4e5d2db3793ae6e301496fe39475de84382
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Fri May 2 21:10:07 2008 -0400
+
+    Update TODO
+
+commit 1e1c8a1f52183edecfea346257468681d6a88c36
+Author: Oswald Buddenhagen <ossi@kde.org>
+Date:   Thu May 1 10:50:58 2008 +0200
+
+    fix pixman compile with srcdir != builddir
+    
+    Signed-off-by: Alan Coopersmith <alan.coopersmith@sun.com>
+
+commit 510ee5d8c23e2d9133dc83634e15b7660f411a1f
+Author: Thomas Zimmermann <kuhundbaer@web.de>
+Date:   Fri Apr 25 22:13:24 2008 -0400
+
+    Remove trailing comma from enum (bug 15364)
+
+commit 3cea659d72bc3429a8766e63c66346e76e61c9b5
+Author: Alan Coopersmith <alan.coopersmith@sun.com>
+Date:   Thu Apr 24 18:10:56 2008 -0700
+
+    Fix pixman/Makefile.am to work with Solaris make
+
+commit 27b753c9deabe5ac775021abfae98a6a1830cfc2
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Thu Apr 24 01:08:29 2008 +0200
+
+    Remove unused macro
+
+commit 584118fb6c15d695b6a203c2df51411958957880
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Thu Apr 24 01:06:38 2008 +0200
+
+    Remove VMX from CPUFeatures, ppc isn't using it at all
+
+commit fc96121afd5d8451c9d8ba8a693e589d1999d131
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Thu Apr 24 01:03:08 2008 +0200
+
+    Simplify cpu feature check
+
+commit 08b317a5f519978cfabebd75d5595b19fc1d1425
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Thu Apr 24 00:41:16 2008 +0200
+
+    Refactor path selection
+
+commit 083cadd4c7d1270b0ee9f0365327b872898d1561
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Thu Apr 24 00:36:51 2008 +0200
+
+    Force inlining
+
+commit 92ef26dfed3337831dd5156bfe0d20b132a26a29
+Author: André Tupinambá <andrelrt@gmail.com>
+Date:   Wed Apr 23 00:18:39 2008 -0400
+
+    Add SSE2 implementations of many compositing operations.
+
+commit 8e68544e0d8cc7af24fb8b298fd6afd47c620136
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Sat Apr 12 13:16:46 2008 +0200
+
+    Unbreak vmx pixman
+
+commit 1ec7bd2cb2d02caca06742b0091f293d29d95a44
+Merge: e63bf15 5388222
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Sat Apr 12 09:53:24 2008 +0200
+
+    Merge branch 'master' into vmx
+    
+    Conflicts:
+    
+    	pixman/pixman-pict.c
+
+commit 53882228c9bbd50609e2858502b9bc087ca76903
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Sat Apr 12 00:22:35 2008 -0400
+
+    Simplify handling compiler handling in MMX code
+    
+    Instead of using the parameters --param inline-unit-growth=10000 and
+    --param large-function-growth=10000", just make the inline functions
+    __always_inline__.
+    
+    Also support Intel's compiler in pixman-mmx.c.
+    
+    Based on a patch by Serhat <ubunturk@gmail.com>
+
+commit 9f76747adec7ea00e31dd817427c1ed1bfa86aa8
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Thu Apr 3 17:27:08 2008 -0700
+
+    Use a macro to append _accessors to things.
+    
+    Signed-off-by: Søren Sandmann <sandmann@redhat.com>
+
+commit a7065162aff9d1de9fc3db9756e4e76f6b8d063b
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Tue Apr 1 17:26:03 2008 -0700
+
+    Generate 64-bit combining functions.
+    
+    Copy some macros from pixman-private.h into combine.inc and update them to
+    generate 64-bit versions as appropriate.  Add a rule to generate
+    pixman-combine64.c and add it to the build.
+    
+    Signed-off-by: Søren Sandmann <sandmann@redhat.com>
+
+commit 48521e6fe4378f9cc49d2dfb8d87490e65267a49
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Tue Apr 1 16:31:26 2008 -0700
+
+    Replace hardcoded numbers and uint*_t in combine.inc.
+    
+    The masks and shifts have been replaced with preprocessor defines generated by
+    combine.pl.  The uint*_t types have been replaced with comp4_t, comp2_t, and
+    comp1_t depending on how many components the value is supposed to hold.
+    
+    Signed-off-by: Søren Sandmann <sandmann@redhat.com>
+
+commit e0e5c4b72937728d0b36b1077d94ce92a2374c9a
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Mon Mar 31 16:42:53 2008 -0700
+
+    Tack 32 onto the ends of the combining function types.
+    
+    Signed-off-by: Søren Sandmann <sandmann@redhat.com>
+
+commit f88519ed3fed42e41aa5623540466d0bee1a887b
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Mon Mar 31 16:33:09 2008 -0700
+
+    Move combining routines into combine.inc and add a Perl rule to generate it.
+    
+    This will eventually be used to search & replace types and mask/shift
+    calculations to generate a wide version of these functions.
+    
+    Signed-off-by: Søren Sandmann <sandmann@redhat.com>
+
+commit 30746b1e1e5101fd1502c676e777e27953772f75
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Mon Mar 31 16:09:44 2008 -0700
+
+    Move combining macros into pixman-combine.c.
+    
+    Signed-off-by: Søren Sandmann <sandmann@redhat.com>
+
+commit 1ae751215e255adecacad960b5bc98ff86039a07
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Wed Apr 9 21:39:29 2008 -0400
+
+    Require GCC 3.4 for SSE and SSE2.
+    
+    Also include <emmintrin.h> in the SSE2 test and use SSE_FLAGS rather than MMX_FLAGS
+    
+    Reported by Dave Yeo.
+
+commit a1f51cb09ead3e0007c12e98ce74948a92e80ec5
+Author: Søren Sandmann <sandmann@redhat.com>
+Date:   Tue Apr 8 21:58:00 2008 -0400
+
+    Fix bug in FbStore_b2g3r3
+
+commit 0b207ae11065c740f2644a89fc13207a5343554e
+Author: Antoine Azar <cairo@antoineazar.com>
+Date:   Sun Apr 6 10:56:53 2008 -0400
+
+    Optimize operators based on source or dest opacity.
+    
+    Check if we can replace our operator by a simpler one if the src or
+    dest are opaque The output operator should be mathematically
+    equivalent to the source.
+
+commit 2976e690410d10b916014effe5d1842d0c8a7908
+Author: Julien Cristau <jcristau@debian.org>
+Date:   Sun Apr 6 17:12:12 2008 +0200
+
+    Revert "pixman-version.h is generated, don't distribute it"
+    
+    This reverts commit 4fd8910ea499eb484e29c44dbee7dbc029656e9c.
+
+commit 4fd8910ea499eb484e29c44dbee7dbc029656e9c
+Author: Julien Cristau <jcristau@debian.org>
+Date:   Fri Apr 4 21:36:02 2008 +0200
+
+    pixman-version.h is generated, don't distribute it
+
+commit 4cde0886b52c82b792e8fbf2248bf8ff9aa079fa
+Author: Frederic Plourde <frederic.plourde@polymtl.ca>
+Date:   Thu Apr 3 13:52:54 2008 -0700
+
+    Add CopyAreammx fast path for argb32 SRC xrgb32 and abgr32 SRC xbgr32
+
+commit f45b331f7bb7d7effe279159d8c899952b52a270
+Author: Julien Cristau <jcristau@debian.org>
+Date:   Sun Mar 30 20:53:42 2008 +0200
+
+    Remove prototype for nonexistent pixman_image_set_filter_params
+
+commit a331519b865d157ac5fec231fda02ee74ba1ede1
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Fri Mar 28 12:26:18 2008 -0700
+
+    Get rid of pixman_composeFunctions_accessors.
+    
+    The combining functions operate on scratch memory, so they don't need the access
+    wrappers.  There's also no reason not to use the MMX combining functions in the
+    accessor path.
+
+commit 0c3547bad41e1a5b12c0ffaa1c106043399dc3fc
+Author: Aaron Plattner <aplattner@nvidia.com>
+Date:   Fri Mar 28 12:16:07 2008 -0700
+
+    Fix test build when srcdir != builddir.
+    
+    The tests were including pixman.h, but pixman.h couldn't find pixman-version.h
+    because it was in $(top_builddir)/pixman rather than $(top_srcdir)/pixman.
+
+commit cd3799317fced697e4bc729c3ea8d42ed7edf526
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Fri Mar 28 05:23:34 2008 -0400
+
+    Post-release version bump
+
+commit 92b675db8d3ef44c3c87110443b15a68a871cf7b
+Author: Søren Sandmann Pedersen <sandmann@redhat.com>
+Date:   Thu Mar 27 10:08:35 2008 -0400
+
+    Fix log generation
+
 commit 0c33317f59b93f5cab348619b1c38a5dce97de94
 Author: Søren Sandmann Pedersen <sandmann@redhat.com>
 Date:   Thu Mar 27 10:07:11 2008 -0400
@@ -106,6 +19603,25 @@ Date:   Tue Mar 25 11:45:56 2008 -0700
     
     .. otherwise it's already a macro evaluating to FALSE.
 
+commit e63bf1554b4adf9e687ec86213a97caab2218a77
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Sun Mar 23 16:12:31 2008 +0100
+
+    Make configure message alike the mmx/sse/sse2 ones
+
+commit dcc530178050522705e70ff2f09b9da2b358ac01
+Merge: 550e5f5 29a8ae4
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Sun Mar 23 16:04:26 2008 +0100
+
+    Update vmx
+
+commit 550e5f54abe4f3f0b6fcd278c3b4533036276e3f
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Sat Mar 22 11:28:48 2008 +0100
+
+    update patch
+
 commit 4f33f454c6dbaf356d20cee8d35fdf02f9a0317d
 Author: Aaron Plattner <aplattner@nvidia.com>
 Date:   Fri Mar 21 18:59:45 2008 -0700
@@ -208,13 +19724,6 @@ Date:   Mon Mar 10 23:41:52 2008 -0400
     
     Reported by Robert O'Callahan.
 
-commit 52f7f3909f71da1e4331f0333cfed17682984c12
-Merge: b7f65f3... 5bcde57...
-Author: Matthieu Herrb <matthieu@bluenote.herrb.net>
-Date:   Sat Mar 8 21:13:22 2008 +0100
-
-    Merge branch 'master' into obsd
-
 commit 5bcde57da704000f0fbb52291d01c7f3c0dc8655
 Author: Antoine Azar <cairo@antoineazar.com>
 Date:   Wed Mar 5 15:27:11 2008 -0800
@@ -283,7 +19792,7 @@ Date:   Tue Jan 22 16:33:58 2008 -0800
     drop the #else part.
 
 commit bcac3335893a24e1d16790cb38d8a105b1e19951
-Merge: e5ceddf... 054be10...
+Merge: e5ceddf 054be10
 Author: Carl Worth <cworth@cworth.org>
 Date:   Tue Jan 22 16:31:44 2008 -0800
 
@@ -341,7 +19850,7 @@ commit 9a0639650e276c4c0da5dfe37a8d3ba5a6b81712
 Author: Vladimir Vukicevic <vladimir@pobox.com>
 Date:   Tue Nov 6 16:15:01 2007 -0800
 
-    Remove last CVS $Id: ChangeLog,v 1.5 2008/04/10 21:04:35 matthieu Exp $ tags
+    Remove last CVS $Id: ChangeLog,v 1.6 2013/06/07 17:18:00 matthieu Exp $ tags
 
 commit df964790e893a8b511e9322e7161087d8ba182cc
 Author: Søren Sandmann <sandmann@redhat.com>
@@ -350,12 +19859,12 @@ Date:   Thu Dec 20 00:23:18 2007 -0500
     Don't use the pixbuf fast paths when the source picture has alpha. Bug
     13650, reported by Wu Nian.
 
-commit b7f65f3f75e127b1f4ac280f1141801ad547ebd4
-Merge: 5b62915... 72b46bc...
-Author: Matthieu Herrb <matthieu@bluenote.herrb.com>
-Date:   Sun Dec 9 10:20:33 2007 +0100
+commit 49240111dbb31c335856f9653544a039275bf033
+Merge: 808e4f5 72b46bc
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Sun Dec 16 00:38:16 2007 +0100
 
-    Merge branch 'master' into obsd
+    Merge branch 'master' of git://anongit.freedesktop.org/pixman
 
 commit 72b46bcf345db668b3ec00e7f27c5454cf2ad8b5
 Author: Søren Sandmann <sandmann@redhat.com>
@@ -423,47 +19932,14 @@ Date:   Sat Dec 1 19:56:26 2007 -0500
     Add tables with information about the MMX and plain-C fast paths.
     Update TODO.
 
-commit 5b629154a48eded16a562a0ff5661b76e97c5f48
-Author: Matthieu Herrb <matthieu@bluenote.herrb.com>
-Date:   Thu Nov 22 14:31:26 2007 +0100
-
-    pixman 1.9.6
-
-commit 621ec860c023ec6577ef1b6ea063302e26d54823
-Merge: 40a4dd4... f1f52ae...
-Author: Matthieu Herrb <matthieu@bluenote.herrb.com>
-Date:   Thu Nov 22 14:29:40 2007 +0100
-
-    Merge branch 'master' into obsd
-
-commit 40a4dd4c1a2b389a6d4ce704ede7207bb3a5ca76
-Merge: 98f7081... 399ef1f...
-Author: Matthieu Herrb <matthieu@bluenote.herrb.com>
-Date:   Thu Nov 22 14:28:44 2007 +0100
-
-    Merge branch 'obsd' of ssh://xenocara.org/git/xenocara/lib/pixman into obsd
-
-commit 98f7081ae9fd5d5271feaf77cd7d9bd1d1be8367
-Author: Matthieu Herrb <matthieu@bluenote.herrb.com>
-Date:   Thu Nov 22 14:27:30 2007 +0100
-
-    Bump libpixman revision to have it at least equal to the version from fd.o
-
 commit f1f52ae4b1b2cca27104c1210625356084baf2f8
 Author: Carl Worth <cworth@cworth.org>
 Date:   Tue Oct 30 11:48:31 2007 -0700
 
     Track name change from pixman.pc to pixman-1.pc in .gitignore
 
-commit 274a34b84a1df33b233981bdf0a2c49b71e84db3
-Merge: 80c6b00... d4d78c8...
-Author: Matthieu Herrb <matthieu@bluenote.herrb.com>
-Date:   Sun Oct 28 08:07:15 2007 +0100
-
-    Merge branch 'master' into obsd
-
 commit d4d78c8c1a73d1007ebaae3117923bb72d09118f
-Merge: 85bccce... b39ca42...
+Merge: 85bccce b39ca42
 Author: Alan Hourihane <alanh@tungstengraphics.com>
 Date:   Wed Oct 24 21:39:34 2007 +0100
 
@@ -476,7 +19952,7 @@ Date:   Wed Oct 24 15:48:45 2007 -0400
     Bump version number
 
 commit 85bccce4d863b99be4b9ce62a8ac7d95f0acab3d
-Merge: 8aa38d2... 2853243...
+Merge: 8aa38d2 2853243
 Author: Alan Hourihane <alanh@tungstengraphics.com>
 Date:   Thu Oct 18 17:33:43 2007 +0100
 
@@ -513,7 +19989,7 @@ Date:   Wed Oct 17 18:40:28 2007 -0400
     Add README file based on text from Bjorn Lindquist
 
 commit 8aa38d2256c191bf3437034f6176bae30c3c3d19
-Merge: cea752b... 39a67d3...
+Merge: cea752b 39a67d3
 Author: Alan Hourihane <alanh@tungstengraphics.com>
 Date:   Wed Oct 17 19:39:41 2007 +0100
 
@@ -523,18 +19999,12 @@ Date:   Wed Oct 17 19:39:41 2007 +0100
     
     	pixman/pixman-image.c
 
-commit 80c6b00af24c3f997eac30a1618669030edcf7bd
-Author: Matthieu Herrb <matthieu@bluenote.herrb.com>
-Date:   Wed Oct 3 23:00:44 2007 +0200
-
-    library is called libpixman-1 for some reason.
-
-commit f41fd34f65bfb6f13d299466b8e1368384cbd305
-Merge: 340d678... 39a67d3...
-Author: Matthieu Herrb <matthieu@bluenote.herrb.com>
-Date:   Wed Oct 3 22:42:44 2007 +0200
+commit 808e4f541b4cfde40c91e6c6cd942f9074d38e94
+Merge: 33d4028 39a67d3
+Author: Luca Barbato <lu_zero@gentoo.org>
+Date:   Mon Oct 1 22:13:05 2007 +0000
 
-    Merge branch 'master' into obsd
+    Merge branch 'master' of git://anongit.freedesktop.org/pixman
 
 commit 39a67d35f05aa47cf50191e0837a2125593a7bbc
 Author: Tilman Sauerbeck <tilman@code-monkey.de>
@@ -675,18 +20145,6 @@ Date:   Tue Sep 4 17:24:04 2007 -0700
     blends RGB24 over ARGB32 and notices that "alpha" values are
     making it from the source to the destination.
 
-commit 399ef1f44e7c0059f96ce01be83455a2fbe38ec8
-Author: Matthieu Herrb <matthieu.herrb@laas.fr>
-Date:   Sun Sep 2 18:13:15 2007 +0200
-
-    regen
-
-commit 340d678af2ec0876b7c86a6817cd38037f424cec
-Author: Matthieu Herrb <matthieu@bluenote.herrb.com>
-Date:   Sat Sep 1 16:28:05 2007 +0200
-
-    BSD Makefile & rerun automake.
-
 commit 8ff7213f39edc1b2b8b60d6b0cc5d5f14ca1928d
 Author: Vladimir Vukicevic <vladimir@pobox.com>
 Date:   Sat Aug 25 23:30:41 2007 -0700
@@ -724,7 +20182,7 @@ Date:   Fri Aug 24 16:12:30 2007 -0400
     Add conjoint and disjoint operators to pixman.h
 
 commit 245a5e04eb4bf3b973d32ce5f21e6e2eac00b48b
-Merge: 25846ed... 9c09561...
+Merge: 25846ed 9c09561
 Author: Søren Sandmann Pedersen <sandmann@redhat.com>
 Date:   Tue Aug 21 16:31:45 2007 -0400
 
@@ -847,6 +20305,12 @@ Date:   Mon Jul 2 12:18:42 2007 -0400
 
     Port Vlad's fixes for integer overflows with malloc().
 
+commit 33d4028e3fffa231f40d66b5843de589ec2642fe
+Author: root <root@echo.(none)>
+Date:   Sun Jul 1 11:42:49 2007 +0000
+
+    First import of vmx
+
 commit 2e61f30e4c8d0e01e175495e13a5f132521ad6f2
 Author: Søren Sandmann <sandmann@redhat.com>
 Date:   Fri Jun 22 13:37:46 2007 -0400
@@ -982,7 +20446,7 @@ Date:   Sun Jun 17 18:50:00 2007 -0400
     Remove accidentally committed use of TIMER_BEGIN/END
 
 commit e71844095ea75b4f9f66c85c87b4b3b6c287e02f
-Merge: 4f9f7ae... 647852d...
+Merge: 4f9f7ae 647852d
 Author: Søren Sandmann Pedersen <sandmann@redhat.com>
 Date:   Sun Jun 17 18:49:02 2007 -0400
 
diff --git a/lib/pixman/Makefile.am b/lib/pixman/Makefile.am
index ba579e5be..6e56d5140 100644
--- a/lib/pixman/Makefile.am
+++ b/lib/pixman/Makefile.am
@@ -10,7 +10,7 @@ snapshot:
 	test -d "$(srcdir)/.git" && distdir=$$distdir-`cd "$(srcdir)" && git rev-parse HEAD | cut -c 1-6`; \
 	$(MAKE) $(AM_MAKEFLAGS) distdir="$$distdir" dist
 
-GPGKEY=6FF7C1A8
+GPGKEY=3892336E
 USERNAME=$$USER
 RELEASE_OR_SNAPSHOT = $$(if test "x$(PIXMAN_VERSION_MINOR)" = "x$$(echo "$(PIXMAN_VERSION_MINOR)/2*2" | bc)" ; then echo release; else echo snapshot; fi)
 RELEASE_CAIRO_HOST =	$(USERNAME)@cairographics.org
@@ -121,7 +121,7 @@ release-publish-message: $(HASHFILES) ensure-prev
 	@echo ""
 	@echo "GPG signature:"
 	@echo "	$(RELEASE_CAIRO_URL)/$(gpg_file)"
-	@echo "	(signed by `git config --get user.name` <`git config --get user.email`>)"
+	@echo "	(signed by`gpg --list-keys $(GPGKEY) | grep uid | cut -b4- | tr -s " "`)"
 	@echo ""
 	@echo "Git:"
 	@echo "	git://git.freedesktop.org/git/pixman"
diff --git a/lib/pixman/Makefile.bsd-wrapper b/lib/pixman/Makefile.bsd-wrapper
index 8340aaca4..e06c1ce13 100644
--- a/lib/pixman/Makefile.bsd-wrapper
+++ b/lib/pixman/Makefile.bsd-wrapper
@@ -1,8 +1,8 @@
-# $OpenBSD: Makefile.bsd-wrapper,v 1.19 2013/02/19 20:56:17 brad Exp $
+# $OpenBSD: Makefile.bsd-wrapper,v 1.20 2013/06/07 17:18:00 matthieu Exp $
 
 .include <bsd.own.mk>
 
-SHARED_LIBS=	pixman-1 28.0
+SHARED_LIBS=	pixman-1 30.0
 
 .if ${MACHINE_ARCH} == arm
 CONFIGURE_ARGS +=  --disable-arm-simd --disable-arm-neon
diff --git a/lib/pixman/Makefile.in b/lib/pixman/Makefile.in
index 43a0fbc82..6cfa1d14d 100644
--- a/lib/pixman/Makefile.in
+++ b/lib/pixman/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.12.3 from Makefile.am.
+# Makefile.in generated by automake 1.12.6 from Makefile.am.
 # @configure_input@
 
 # Copyright (C) 1994-2012 Free Software Foundation, Inc.
@@ -247,6 +247,8 @@ PIXMAN_VERSION_MAJOR = @PIXMAN_VERSION_MAJOR@
 PIXMAN_VERSION_MICRO = @PIXMAN_VERSION_MICRO@
 PIXMAN_VERSION_MINOR = @PIXMAN_VERSION_MINOR@
 PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
 PNG_CFLAGS = @PNG_CFLAGS@
 PNG_LIBS = @PNG_LIBS@
 PTHREAD_LDFLAGS = @PTHREAD_LDFLAGS@
@@ -318,7 +320,7 @@ top_srcdir = @top_srcdir@
 SUBDIRS = pixman  
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = pixman-1.pc
-GPGKEY = 6FF7C1A8
+GPGKEY = 3892336E
 USERNAME = $$USER
 RELEASE_OR_SNAPSHOT = $$(if test "x$(PIXMAN_VERSION_MINOR)" = "x$$(echo "$(PIXMAN_VERSION_MINOR)/2*2" | bc)" ; then echo release; else echo snapshot; fi)
 RELEASE_CAIRO_HOST = $(USERNAME)@cairographics.org
@@ -964,7 +966,7 @@ release-publish-message: $(HASHFILES) ensure-prev
 	@echo ""
 	@echo "GPG signature:"
 	@echo "	$(RELEASE_CAIRO_URL)/$(gpg_file)"
-	@echo "	(signed by `git config --get user.name` <`git config --get user.email`>)"
+	@echo "	(signed by`gpg --list-keys $(GPGKEY) | grep uid | cut -b4- | tr -s " "`)"
 	@echo ""
 	@echo "Git:"
 	@echo "	git://git.freedesktop.org/git/pixman"
diff --git a/lib/pixman/README b/lib/pixman/README
index 3cfbc5053..6d8cfd8ad 100644
--- a/lib/pixman/README
+++ b/lib/pixman/README
@@ -1,22 +1,116 @@
-pixman is a library that provides low-level pixel manipulation
+Pixman is a library that provides low-level pixel manipulation
 features such as image compositing and trapezoid rasterization.
 
-All questions regarding this software should be directed to the pixman
+Questions, bug reports and patches should be directed to the pixman
 mailing list:
 
         http://lists.freedesktop.org/mailman/listinfo/pixman
 
-Please send patches and bug reports either to the mailing list above,
-or file them at the freedesktop bug tracker:
+You can also file bugs at
 
         https://bugs.freedesktop.org/enter_bug.cgi?product=pixman
 
-The master development code repository can be found at:
+For real time discussions about pixman, feel free to join the IRC
+channels #cairo and #xorg-devel on the FreeNode IRC network.
+
+
+Contributing
+------------
+
+In order to contribute to pixman, you will need a working knowledge of
+the git version control system. For a quick getting started guide,
+there is the "Everyday Git With 20 Commands Or So guide"
+
+        http://www.kernel.org/pub/software/scm/git/docs/everyday.html
+
+from the Git homepage. For more in depth git documentation, see the
+resources on the Git community documentation page:
+
+        http://git-scm.com/documentation
+
+Pixman uses the infrastructure from the freedesktop.org umbrella
+project. For instructions about how to use the git service on
+freedesktop.org, see:
+
+        http://www.freedesktop.org/wiki/Infrastructure/git/Developers
+
+The Pixman master repository can be found at:
 
 	git://anongit.freedesktop.org/git/pixman
 
-	http://gitweb.freedesktop.org/?p=pixman;a=summary
+and browsed on the web here:
+
+	http://cgit.freedesktop.org/pixman/
+
+
+Sending patches
+---------------
+
+The general workflow for sending patches is to first make sure that
+git can send mail on your system. Then, 
+
+ - create a branch off of master in your local git repository
+
+ - make your changes as one or more commits
+
+ - use the 
+
+        git send-email
+
+   command to send the patch series to pixman@lists.freedesktop.org.
+
+In order for your patches to be accepted, please consider the
+following guidelines:
+
+ - This link:
+
+        http://www.kernel.org/pub/software/scm/git/docs/user-manual.html#patch-series
+
+   describes how what a good patch series is, and to create one with
+   git.
+
+ - At each point in the series, pixman should compile and the test
+   suite should pass.
+
+   The exception here is if you are changing the test suite to
+   demonstrate a bug. In this case, make one commit that makes the
+   test suite fail due to the bug, and then another commit that fixes
+   the bug.
+
+   You can run the test suite with 
+
+        make check
+
+   It will take around two minutes to run on a modern PC.
+
+ - Follow the coding style described in the CODING_STYLE file
+
+ - For bug fixes, include an update to the test suite to make sure
+   the bug doesn't reappear.
+
+ - For new features, add tests of the feature to the test
+   suite. Also, add a program demonstrating the new feature to the
+   demos/ directory.
+
+ - Write descriptive commit messages. Useful information to include:
+        - Benchmark results, before and after
+	- Description of the bug that was fixed
+	- Detailed rationale for any new API
+	- Alternative approaches that were rejected (and why they
+          don't work)
+	- If review comments were incorporated, a brief version
+          history describing what those changes were.
+
+ - For big patch series, send an introductory email with an overall
+   description of the patch series, including benchmarks and
+   motivation. Each commit message should still be descriptive and
+   include enough information to understand why this particular commit
+   was necessary.
 
-For more information on the git code manager, see:
+Pixman has high standards for code quality and so almost everybody
+should expect to have the first versions of their patches rejected.
 
-	http://wiki.x.org/wiki/GitPage
+If you think that the reviewers are wrong about something, or that the
+guidelines above are wrong, feel free to discuss the issue on the
+list. The purpose of the guidelines and code review is to ensure high
+code quality; it is not an exercise in compliance.
diff --git a/lib/pixman/aclocal.m4 b/lib/pixman/aclocal.m4
index 04c2ce6a6..ed2570902 100644
--- a/lib/pixman/aclocal.m4
+++ b/lib/pixman/aclocal.m4
@@ -1,4 +1,4 @@
-# generated automatically by aclocal 1.12.3 -*- Autoconf -*-
+# generated automatically by aclocal 1.12.6 -*- Autoconf -*-
 
 # Copyright (C) 1996-2012 Free Software Foundation, Inc.
 
@@ -8607,6 +8607,7 @@ m4_ifndef([_LT_PROG_FC],		[AC_DEFUN([_LT_PROG_FC])])
 m4_ifndef([_LT_PROG_CXX],		[AC_DEFUN([_LT_PROG_CXX])])
 
 # pkg.m4 - Macros to locate and utilise pkg-config.            -*- Autoconf -*-
+# serial 1 (pkg-config-0.24)
 # 
 # Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
 #
@@ -8633,8 +8634,12 @@ m4_ifndef([_LT_PROG_CXX],		[AC_DEFUN([_LT_PROG_CXX])])
 # ----------------------------------
 AC_DEFUN([PKG_PROG_PKG_CONFIG],
 [m4_pattern_forbid([^_?PKG_[A-Z_]+$])
-m4_pattern_allow([^PKG_CONFIG(_PATH)?$])
-AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])dnl
+m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$])
+m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$])
+AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])
+AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path])
+AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path])
+
 if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
 	AC_PATH_TOOL([PKG_CONFIG], [pkg-config])
 fi
@@ -8647,7 +8652,6 @@ if test -n "$PKG_CONFIG"; then
 		AC_MSG_RESULT([no])
 		PKG_CONFIG=""
 	fi
-		
 fi[]dnl
 ])# PKG_PROG_PKG_CONFIG
 
@@ -8656,34 +8660,32 @@ fi[]dnl
 # Check to see whether a particular set of modules exists.  Similar
 # to PKG_CHECK_MODULES(), but does not set variables or print errors.
 #
-#
-# Similar to PKG_CHECK_MODULES, make sure that the first instance of
-# this or PKG_CHECK_MODULES is called, or make sure to call
-# PKG_CHECK_EXISTS manually
+# Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG])
+# only at the first occurence in configure.ac, so if the first place
+# it's called might be skipped (such as if it is within an "if", you
+# have to call PKG_CHECK_EXISTS manually
 # --------------------------------------------------------------
 AC_DEFUN([PKG_CHECK_EXISTS],
 [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
 if test -n "$PKG_CONFIG" && \
     AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then
-  m4_ifval([$2], [$2], [:])
+  m4_default([$2], [:])
 m4_ifvaln([$3], [else
   $3])dnl
 fi])
 
-
 # _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES])
 # ---------------------------------------------
 m4_define([_PKG_CONFIG],
-[if test -n "$PKG_CONFIG"; then
-    if test -n "$$1"; then
-        pkg_cv_[]$1="$$1"
-    else
-        PKG_CHECK_EXISTS([$3],
-                         [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`],
-			 [pkg_failed=yes])
-    fi
-else
-	pkg_failed=untried
+[if test -n "$$1"; then
+    pkg_cv_[]$1="$$1"
+ elif test -n "$PKG_CONFIG"; then
+    PKG_CHECK_EXISTS([$3],
+                     [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`
+		      test "x$?" != "x0" && pkg_failed=yes ],
+		     [pkg_failed=yes])
+ else
+    pkg_failed=untried
 fi[]dnl
 ])# _PKG_CONFIG
 
@@ -8725,16 +8727,17 @@ and $1[]_LIBS to avoid the need to call pkg-config.
 See the pkg-config man page for more details.])
 
 if test $pkg_failed = yes; then
+   	AC_MSG_RESULT([no])
         _PKG_SHORT_ERRORS_SUPPORTED
         if test $_pkg_short_errors_supported = yes; then
-	        $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "$2"`
+	        $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1`
         else 
-	        $1[]_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "$2"`
+	        $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1`
         fi
 	# Put the nasty error message in config.log where it belongs
 	echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
 
-	ifelse([$4], , [AC_MSG_ERROR(dnl
+	m4_default([$4], [AC_MSG_ERROR(
 [Package requirements ($2) were not met:
 
 $$1_PKG_ERRORS
@@ -8742,28 +8745,67 @@ $$1_PKG_ERRORS
 Consider adjusting the PKG_CONFIG_PATH environment variable if you
 installed software in a non-standard prefix.
 
-_PKG_TEXT
-])],
-		[AC_MSG_RESULT([no])
-                $4])
+_PKG_TEXT])[]dnl
+        ])
 elif test $pkg_failed = untried; then
-	ifelse([$4], , [AC_MSG_FAILURE(dnl
+     	AC_MSG_RESULT([no])
+	m4_default([$4], [AC_MSG_FAILURE(
 [The pkg-config script could not be found or is too old.  Make sure it
 is in your PATH or set the PKG_CONFIG environment variable to the full
 path to pkg-config.
 
 _PKG_TEXT
 
-To get pkg-config, see <http://www.freedesktop.org/software/pkgconfig>.])],
-		[$4])
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.])[]dnl
+        ])
 else
 	$1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
 	$1[]_LIBS=$pkg_cv_[]$1[]_LIBS
         AC_MSG_RESULT([yes])
-	ifelse([$3], , :, [$3])
+	$3
 fi[]dnl
 ])# PKG_CHECK_MODULES
 
+
+# PKG_INSTALLDIR(DIRECTORY)
+# -------------------------
+# Substitutes the variable pkgconfigdir as the location where a module
+# should install pkg-config .pc files. By default the directory is
+# $libdir/pkgconfig, but the default can be changed by passing
+# DIRECTORY. The user can override through the --with-pkgconfigdir
+# parameter.
+AC_DEFUN([PKG_INSTALLDIR],
+[m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])])
+m4_pushdef([pkg_description],
+    [pkg-config installation directory @<:@]pkg_default[@:>@])
+AC_ARG_WITH([pkgconfigdir],
+    [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],,
+    [with_pkgconfigdir=]pkg_default)
+AC_SUBST([pkgconfigdir], [$with_pkgconfigdir])
+m4_popdef([pkg_default])
+m4_popdef([pkg_description])
+]) dnl PKG_INSTALLDIR
+
+
+# PKG_NOARCH_INSTALLDIR(DIRECTORY)
+# -------------------------
+# Substitutes the variable noarch_pkgconfigdir as the location where a
+# module should install arch-independent pkg-config .pc files. By
+# default the directory is $datadir/pkgconfig, but the default can be
+# changed by passing DIRECTORY. The user can override through the
+# --with-noarch-pkgconfigdir parameter.
+AC_DEFUN([PKG_NOARCH_INSTALLDIR],
+[m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])])
+m4_pushdef([pkg_description],
+    [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@])
+AC_ARG_WITH([noarch-pkgconfigdir],
+    [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],,
+    [with_noarch_pkgconfigdir=]pkg_default)
+AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir])
+m4_popdef([pkg_default])
+m4_popdef([pkg_description])
+]) dnl PKG_NOARCH_INSTALLDIR
+
 # Copyright (C) 2002-2012 Free Software Foundation, Inc.
 #
 # This file is free software; the Free Software Foundation
@@ -8779,7 +8821,7 @@ AC_DEFUN([AM_AUTOMAKE_VERSION],
 [am__api_version='1.12'
 dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
 dnl require some minimum version.  Point them to the right macro.
-m4_if([$1], [1.12.3], [],
+m4_if([$1], [1.12.6], [],
       [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
 ])
 
@@ -8795,7 +8837,7 @@ m4_define([_AM_AUTOCONF_VERSION], [])
 # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
 # This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
 AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
-[AM_AUTOMAKE_VERSION([1.12.3])dnl
+[AM_AUTOMAKE_VERSION([1.12.6])dnl
 m4_ifndef([AC_AUTOCONF_VERSION],
   [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
 _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
@@ -9171,15 +9213,6 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
      [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
 ])
 
-# Copyright (C) 1996-2012 Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# AM_CONFIG_HEADER is obsolete.  It has been replaced by AC_CONFIG_HEADERS.
-AU_DEFUN([AM_CONFIG_HEADER], [AC_CONFIG_HEADERS($@)])
-
 # Do all the work for Automake.                             -*- Autoconf -*-
 
 # Copyright (C) 1996-2012 Free Software Foundation, Inc.
diff --git a/lib/pixman/config.guess b/lib/pixman/config.guess
index aa04f04bd..872b96a16 100644
--- a/lib/pixman/config.guess
+++ b/lib/pixman/config.guess
@@ -4,7 +4,7 @@
 #   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
 #   2011, 2012 Free Software Foundation, Inc.
 
-timestamp='2012-06-17'
+timestamp='2012-09-25'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -306,7 +306,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
 	echo arm-acorn-riscix${UNAME_RELEASE}
 	exit ;;
-    arm:riscos:*:*|arm:RISCOS:*:*)
+    arm*:riscos:*:*|arm*:RISCOS:*:*)
 	echo arm-unknown-riscos
 	exit ;;
     SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
@@ -805,6 +805,9 @@ EOF
     i*:CYGWIN*:*)
 	echo ${UNAME_MACHINE}-pc-cygwin
 	exit ;;
+    *:MINGW64*:*)
+	echo ${UNAME_MACHINE}-pc-mingw64
+	exit ;;
     *:MINGW*:*)
 	echo ${UNAME_MACHINE}-pc-mingw32
 	exit ;;
@@ -1205,6 +1208,9 @@ EOF
     BePC:Haiku:*:*)	# Haiku running on Intel PC compatible.
 	echo i586-pc-haiku
 	exit ;;
+    x86_64:Haiku:*:*)
+	echo x86_64-unknown-haiku
+	exit ;;
     SX-4:SUPER-UX:*:*)
 	echo sx4-nec-superux${UNAME_RELEASE}
 	exit ;;
@@ -1334,9 +1340,6 @@ EOF
 	exit ;;
 esac
 
-#echo '(No uname command or uname output not recognized.)' 1>&2
-#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
-
 eval $set_cc_for_build
 cat >$dummy.c <<EOF
 #ifdef _SEQUENT_
diff --git a/lib/pixman/config.h.in b/lib/pixman/config.h.in
index c06fbaa41..d26107ff8 100644
--- a/lib/pixman/config.h.in
+++ b/lib/pixman/config.h.in
@@ -15,6 +15,9 @@
 /* Define to 1 if we have <fenv.h> */
 #undef HAVE_FENV_H
 
+/* Whether the tool chain supports __float128 */
+#undef HAVE_FLOAT128
+
 /* Define to 1 if you have the `getisax' function. */
 #undef HAVE_GETISAX
 
@@ -165,3 +168,6 @@
 #ifndef __cplusplus
 #undef inline
 #endif
+
+/* Define to sqrt if you do not have the `sqrtf' function. */
+#undef sqrtf
diff --git a/lib/pixman/config.sub b/lib/pixman/config.sub
index aa2cf19b8..8df551109 100644
--- a/lib/pixman/config.sub
+++ b/lib/pixman/config.sub
@@ -4,7 +4,7 @@
 #   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
 #   2011, 2012 Free Software Foundation, Inc.
 
-timestamp='2012-06-17'
+timestamp='2012-12-06'
 
 # This file is (in principle) common to ALL GNU software.
 # The presence of a machine in this file suggests that SOME GNU software
@@ -123,7 +123,7 @@ esac
 maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
 case $maybe_os in
   nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
-  linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
+  linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
   knetbsd*-gnu* | netbsd*-gnu* | \
   kopensolaris*-gnu* | \
   storm-chaos* | os2-emx* | rtmk-nova*)
@@ -156,7 +156,7 @@ case $os in
 	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
 	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
 	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
-	-apple | -axis | -knuth | -cray | -microblaze)
+	-apple | -axis | -knuth | -cray | -microblaze*)
 		os=
 		basic_machine=$1
 		;;
@@ -259,8 +259,10 @@ case $basic_machine in
 	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
 	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
 	| am33_2.0 \
-	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
-        | be32 | be64 \
+	| arc \
+	| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
+	| avr | avr32 \
+	| be32 | be64 \
 	| bfin \
 	| c4x | clipper \
 	| d10v | d30v | dlx | dsp16xx \
@@ -273,7 +275,7 @@ case $basic_machine in
 	| le32 | le64 \
 	| lm32 \
 	| m32c | m32r | m32rle | m68000 | m68k | m88k \
-	| maxq | mb | microblaze | mcore | mep | metag \
+	| maxq | mb | microblaze | microblazeel | mcore | mep | metag \
 	| mips | mipsbe | mipseb | mipsel | mipsle \
 	| mips16 \
 	| mips64 | mips64el \
@@ -389,7 +391,8 @@ case $basic_machine in
 	| lm32-* \
 	| m32c-* | m32r-* | m32rle-* \
 	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
-	| m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
+	| m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
+	| microblaze-* | microblazeel-* \
 	| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
 	| mips16-* \
 	| mips64-* | mips64el-* \
@@ -788,9 +791,13 @@ case $basic_machine in
 		basic_machine=ns32k-utek
 		os=-sysv
 		;;
-	microblaze)
+	microblaze*)
 		basic_machine=microblaze-xilinx
 		;;
+	mingw64)
+		basic_machine=x86_64-pc
+		os=-mingw64
+		;;
 	mingw32)
 		basic_machine=i386-pc
 		os=-mingw32
@@ -1019,7 +1026,11 @@ case $basic_machine in
 		basic_machine=i586-unknown
 		os=-pw32
 		;;
-	rdos)
+	rdos | rdos64)
+		basic_machine=x86_64-pc
+		os=-rdos
+		;;
+	rdos32)
 		basic_machine=i386-pc
 		os=-rdos
 		;;
@@ -1359,8 +1370,8 @@ case $os in
 	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
 	      | -chorusos* | -chorusrdb* | -cegcc* \
 	      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-	      | -mingw32* | -linux-gnu* | -linux-android* \
-	      | -linux-newlib* | -linux-uclibc* \
+	      | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
+	      | -linux-newlib* | -linux-musl* | -linux-uclibc* \
 	      | -uxpv* | -beos* | -mpeix* | -udk* \
 	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
 	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
diff --git a/lib/pixman/configure b/lib/pixman/configure
index fdc5e5289..2858a5212 100644
--- a/lib/pixman/configure
+++ b/lib/pixman/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for pixman 0.28.0.
+# Generated by GNU Autoconf 2.69 for pixman 0.30.0.
 #
 # Report bugs to <pixman@lists.freedesktop.org>.
 #
@@ -590,8 +590,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='pixman'
 PACKAGE_TARNAME='pixman'
-PACKAGE_VERSION='0.28.0'
-PACKAGE_STRING='pixman 0.28.0'
+PACKAGE_VERSION='0.30.0'
+PACKAGE_STRING='pixman 0.30.0'
 PACKAGE_BUGREPORT='pixman@lists.freedesktop.org'
 PACKAGE_URL=''
 
@@ -647,6 +647,8 @@ HAVE_GTK_FALSE
 HAVE_GTK_TRUE
 GTK_LIBS
 GTK_CFLAGS
+PKG_CONFIG_LIBDIR
+PKG_CONFIG_PATH
 PKG_CONFIG
 PIXMAN_TIMERS
 TESTPROGS_EXTRA_LDFLAGS
@@ -837,6 +839,8 @@ CCAS
 CCASFLAGS
 CPP
 PKG_CONFIG
+PKG_CONFIG_PATH
+PKG_CONFIG_LIBDIR
 GTK_CFLAGS
 GTK_LIBS
 PNG_CFLAGS
@@ -1381,7 +1385,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures pixman 0.28.0 to adapt to many kinds of systems.
+\`configure' configures pixman 0.30.0 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1451,7 +1455,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of pixman 0.28.0:";;
+     short | recursive ) echo "Configuration of pixman 0.30.0:";;
    esac
   cat <<\_ACEOF
 
@@ -1510,6 +1514,10 @@ Some influential environment variables:
   CCASFLAGS   assembler compiler flags (defaults to CFLAGS)
   CPP         C preprocessor
   PKG_CONFIG  path to pkg-config utility
+  PKG_CONFIG_PATH
+              directories to add to pkg-config's search path
+  PKG_CONFIG_LIBDIR
+              path overriding pkg-config's built-in search path
   GTK_CFLAGS  C compiler flags for GTK, overriding pkg-config
   GTK_LIBS    linker flags for GTK, overriding pkg-config
   PNG_CFLAGS  C compiler flags for PNG, overriding pkg-config
@@ -1581,7 +1589,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-pixman configure 0.28.0
+pixman configure 0.30.0
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2179,7 +2187,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by pixman $as_me 0.28.0, which was
+It was created by pixman $as_me 0.30.0, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -3003,7 +3011,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='pixman'
- VERSION='0.28.0'
+ VERSION='0.30.0'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -12187,11 +12195,11 @@ fi
 
 
 
-LT_VERSION_INFO="28:0:28"
+LT_VERSION_INFO="30:0:30"
 
 PIXMAN_VERSION_MAJOR=0
 
-PIXMAN_VERSION_MINOR=28
+PIXMAN_VERSION_MINOR=30
 
 PIXMAN_VERSION_MICRO=0
 
@@ -13254,6 +13262,11 @@ fi
 
 
 
+
+
+
+
+
 if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
 	if test -n "$ac_tool_prefix"; then
   # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args.
@@ -13366,7 +13379,6 @@ $as_echo "yes" >&6; }
 $as_echo "no" >&6; }
 		PKG_CONFIG=""
 	fi
-
 fi
 
 if test $enable_gtk = yes ; then
@@ -13420,46 +13432,46 @@ pkg_failed=no
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GTK" >&5
 $as_echo_n "checking for GTK... " >&6; }
 
-if test -n "$PKG_CONFIG"; then
-    if test -n "$GTK_CFLAGS"; then
-        pkg_cv_GTK_CFLAGS="$GTK_CFLAGS"
-    else
-        if test -n "$PKG_CONFIG" && \
+if test -n "$GTK_CFLAGS"; then
+    pkg_cv_GTK_CFLAGS="$GTK_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+    if test -n "$PKG_CONFIG" && \
     { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"gtk+-2.0 pixman-1\""; } >&5
   ($PKG_CONFIG --exists --print-errors "gtk+-2.0 pixman-1") 2>&5
   ac_status=$?
   $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
   test $ac_status = 0; }; then
   pkg_cv_GTK_CFLAGS=`$PKG_CONFIG --cflags "gtk+-2.0 pixman-1" 2>/dev/null`
+		      test "x$?" != "x0" && pkg_failed=yes
 else
   pkg_failed=yes
 fi
-    fi
-else
-	pkg_failed=untried
+ else
+    pkg_failed=untried
 fi
-if test -n "$PKG_CONFIG"; then
-    if test -n "$GTK_LIBS"; then
-        pkg_cv_GTK_LIBS="$GTK_LIBS"
-    else
-        if test -n "$PKG_CONFIG" && \
+if test -n "$GTK_LIBS"; then
+    pkg_cv_GTK_LIBS="$GTK_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+    if test -n "$PKG_CONFIG" && \
     { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"gtk+-2.0 pixman-1\""; } >&5
   ($PKG_CONFIG --exists --print-errors "gtk+-2.0 pixman-1") 2>&5
   ac_status=$?
   $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
   test $ac_status = 0; }; then
   pkg_cv_GTK_LIBS=`$PKG_CONFIG --libs "gtk+-2.0 pixman-1" 2>/dev/null`
+		      test "x$?" != "x0" && pkg_failed=yes
 else
   pkg_failed=yes
 fi
-    fi
-else
-	pkg_failed=untried
+ else
+    pkg_failed=untried
 fi
 
 
 
 if test $pkg_failed = yes; then
+   	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
 
 if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
         _pkg_short_errors_supported=yes
@@ -13467,9 +13479,9 @@ else
         _pkg_short_errors_supported=no
 fi
         if test $_pkg_short_errors_supported = yes; then
-	        GTK_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "gtk+-2.0 pixman-1"`
+	        GTK_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "gtk+-2.0 pixman-1" 2>&1`
         else
-	        GTK_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "gtk+-2.0 pixman-1"`
+	        GTK_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "gtk+-2.0 pixman-1" 2>&1`
         fi
 	# Put the nasty error message in config.log where it belongs
 	echo "$GTK_PKG_ERRORS" >&5
@@ -13483,9 +13495,10 @@ installed software in a non-standard prefix.
 
 Alternatively, you may set the environment variables GTK_CFLAGS
 and GTK_LIBS to avoid the need to call pkg-config.
-See the pkg-config man page for more details.
-" "$LINENO" 5
+See the pkg-config man page for more details." "$LINENO" 5
 elif test $pkg_failed = untried; then
+     	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
 	{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
 as_fn_error $? "The pkg-config script could not be found or is too old.  Make sure it
@@ -13496,14 +13509,14 @@ Alternatively, you may set the environment variables GTK_CFLAGS
 and GTK_LIBS to avoid the need to call pkg-config.
 See the pkg-config man page for more details.
 
-To get pkg-config, see <http://www.freedesktop.org/software/pkgconfig>.
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.
 See \`config.log' for more details" "$LINENO" 5; }
 else
 	GTK_CFLAGS=$pkg_cv_GTK_CFLAGS
 	GTK_LIBS=$pkg_cv_GTK_LIBS
         { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
-	:
+
 fi
 fi
 
@@ -13558,46 +13571,46 @@ pkg_failed=no
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GTK" >&5
 $as_echo_n "checking for GTK... " >&6; }
 
-if test -n "$PKG_CONFIG"; then
-    if test -n "$GTK_CFLAGS"; then
-        pkg_cv_GTK_CFLAGS="$GTK_CFLAGS"
-    else
-        if test -n "$PKG_CONFIG" && \
+if test -n "$GTK_CFLAGS"; then
+    pkg_cv_GTK_CFLAGS="$GTK_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+    if test -n "$PKG_CONFIG" && \
     { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"gtk+-2.0 pixman-1\""; } >&5
   ($PKG_CONFIG --exists --print-errors "gtk+-2.0 pixman-1") 2>&5
   ac_status=$?
   $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
   test $ac_status = 0; }; then
   pkg_cv_GTK_CFLAGS=`$PKG_CONFIG --cflags "gtk+-2.0 pixman-1" 2>/dev/null`
+		      test "x$?" != "x0" && pkg_failed=yes
 else
   pkg_failed=yes
 fi
-    fi
-else
-	pkg_failed=untried
+ else
+    pkg_failed=untried
 fi
-if test -n "$PKG_CONFIG"; then
-    if test -n "$GTK_LIBS"; then
-        pkg_cv_GTK_LIBS="$GTK_LIBS"
-    else
-        if test -n "$PKG_CONFIG" && \
+if test -n "$GTK_LIBS"; then
+    pkg_cv_GTK_LIBS="$GTK_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+    if test -n "$PKG_CONFIG" && \
     { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"gtk+-2.0 pixman-1\""; } >&5
   ($PKG_CONFIG --exists --print-errors "gtk+-2.0 pixman-1") 2>&5
   ac_status=$?
   $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
   test $ac_status = 0; }; then
   pkg_cv_GTK_LIBS=`$PKG_CONFIG --libs "gtk+-2.0 pixman-1" 2>/dev/null`
+		      test "x$?" != "x0" && pkg_failed=yes
 else
   pkg_failed=yes
 fi
-    fi
-else
-	pkg_failed=untried
+ else
+    pkg_failed=untried
 fi
 
 
 
 if test $pkg_failed = yes; then
+   	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
 
 if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
         _pkg_short_errors_supported=yes
@@ -13605,17 +13618,17 @@ else
         _pkg_short_errors_supported=no
 fi
         if test $_pkg_short_errors_supported = yes; then
-	        GTK_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "gtk+-2.0 pixman-1"`
+	        GTK_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "gtk+-2.0 pixman-1" 2>&1`
         else
-	        GTK_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "gtk+-2.0 pixman-1"`
+	        GTK_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "gtk+-2.0 pixman-1" 2>&1`
         fi
 	# Put the nasty error message in config.log where it belongs
 	echo "$GTK_PKG_ERRORS" >&5
 
-	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-                enable_gtk=no
+	enable_gtk=no
 elif test $pkg_failed = untried; then
+     	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
 	enable_gtk=no
 else
 	GTK_CFLAGS=$pkg_cv_GTK_CFLAGS
@@ -13805,6 +13818,68 @@ $as_echo "#define HAVE_GETTIMEOFDAY 1" >>confdefs.h
 fi
 
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing sqrtf" >&5
+$as_echo_n "checking for library containing sqrtf... " >&6; }
+if ${ac_cv_search_sqrtf+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char sqrtf ();
+int
+main ()
+{
+return sqrtf ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' m; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_search_sqrtf=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext
+  if ${ac_cv_search_sqrtf+:} false; then :
+  break
+fi
+done
+if ${ac_cv_search_sqrtf+:} false; then :
+
+else
+  ac_cv_search_sqrtf=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_sqrtf" >&5
+$as_echo "$ac_cv_search_sqrtf" >&6; }
+ac_res=$ac_cv_search_sqrtf
+if test "$ac_res" != no; then :
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+
+$as_echo "#define sqrtf sqrt" >>confdefs.h
+
+fi
+
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for thread local storage (TLS) support" >&5
 $as_echo_n "checking for thread local storage (TLS) support... " >&6; }
 if ${ac_cv_tls+:} false; then :
@@ -13834,7 +13909,7 @@ main ()
 }
 _ACEOF
 if ac_fn_c_try_compile "$LINENO"; then :
-  ac_cv_tls=$kw
+  ac_cv_tls=$kw; break
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
     done
@@ -14217,6 +14292,32 @@ $as_echo "$support_for_attribute_constructor" >&6; }
 
 
 
+support_for_float128=no
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __float128" >&5
+$as_echo_n "checking for __float128... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+__float128 a = 1.0Q, b = 2.0Q; int main (void) { return a + b; }
+
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  support_for_float128=yes
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+
+if test x$support_for_float128 = xyes; then
+
+$as_echo "#define HAVE_FLOAT128 /**/" >>confdefs.h
+
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $support_for_float128" >&5
+$as_echo "$support_for_float128" >&6; }
+
+
 # Check whether --enable-libpng was given.
 if test "${enable_libpng+set}" = set; then :
   enableval=$enable_libpng; have_libpng=$enableval
@@ -14231,46 +14332,46 @@ pkg_failed=no
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for PNG" >&5
 $as_echo_n "checking for PNG... " >&6; }
 
-if test -n "$PKG_CONFIG"; then
-    if test -n "$PNG_CFLAGS"; then
-        pkg_cv_PNG_CFLAGS="$PNG_CFLAGS"
-    else
-        if test -n "$PKG_CONFIG" && \
+if test -n "$PNG_CFLAGS"; then
+    pkg_cv_PNG_CFLAGS="$PNG_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+    if test -n "$PKG_CONFIG" && \
     { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libpng\""; } >&5
   ($PKG_CONFIG --exists --print-errors "libpng") 2>&5
   ac_status=$?
   $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
   test $ac_status = 0; }; then
   pkg_cv_PNG_CFLAGS=`$PKG_CONFIG --cflags "libpng" 2>/dev/null`
+		      test "x$?" != "x0" && pkg_failed=yes
 else
   pkg_failed=yes
 fi
-    fi
-else
-	pkg_failed=untried
+ else
+    pkg_failed=untried
 fi
-if test -n "$PKG_CONFIG"; then
-    if test -n "$PNG_LIBS"; then
-        pkg_cv_PNG_LIBS="$PNG_LIBS"
-    else
-        if test -n "$PKG_CONFIG" && \
+if test -n "$PNG_LIBS"; then
+    pkg_cv_PNG_LIBS="$PNG_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+    if test -n "$PKG_CONFIG" && \
     { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libpng\""; } >&5
   ($PKG_CONFIG --exists --print-errors "libpng") 2>&5
   ac_status=$?
   $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
   test $ac_status = 0; }; then
   pkg_cv_PNG_LIBS=`$PKG_CONFIG --libs "libpng" 2>/dev/null`
+		      test "x$?" != "x0" && pkg_failed=yes
 else
   pkg_failed=yes
 fi
-    fi
-else
-	pkg_failed=untried
+ else
+    pkg_failed=untried
 fi
 
 
 
 if test $pkg_failed = yes; then
+   	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
 
 if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
         _pkg_short_errors_supported=yes
@@ -14278,9 +14379,9 @@ else
         _pkg_short_errors_supported=no
 fi
         if test $_pkg_short_errors_supported = yes; then
-	        PNG_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "libpng"`
+	        PNG_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libpng" 2>&1`
         else
-	        PNG_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "libpng"`
+	        PNG_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libpng" 2>&1`
         fi
 	# Put the nasty error message in config.log where it belongs
 	echo "$PNG_PKG_ERRORS" >&5
@@ -14294,9 +14395,10 @@ installed software in a non-standard prefix.
 
 Alternatively, you may set the environment variables PNG_CFLAGS
 and PNG_LIBS to avoid the need to call pkg-config.
-See the pkg-config man page for more details.
-" "$LINENO" 5
+See the pkg-config man page for more details." "$LINENO" 5
 elif test $pkg_failed = untried; then
+     	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
 	{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
 as_fn_error $? "The pkg-config script could not be found or is too old.  Make sure it
@@ -14307,14 +14409,14 @@ Alternatively, you may set the environment variables PNG_CFLAGS
 and PNG_LIBS to avoid the need to call pkg-config.
 See the pkg-config man page for more details.
 
-To get pkg-config, see <http://www.freedesktop.org/software/pkgconfig>.
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.
 See \`config.log' for more details" "$LINENO" 5; }
 else
 	PNG_CFLAGS=$pkg_cv_PNG_CFLAGS
 	PNG_LIBS=$pkg_cv_PNG_LIBS
         { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
-	:
+
 fi ;;
 	xno) ;;
 	*)
@@ -14322,46 +14424,46 @@ pkg_failed=no
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for PNG" >&5
 $as_echo_n "checking for PNG... " >&6; }
 
-if test -n "$PKG_CONFIG"; then
-    if test -n "$PNG_CFLAGS"; then
-        pkg_cv_PNG_CFLAGS="$PNG_CFLAGS"
-    else
-        if test -n "$PKG_CONFIG" && \
+if test -n "$PNG_CFLAGS"; then
+    pkg_cv_PNG_CFLAGS="$PNG_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+    if test -n "$PKG_CONFIG" && \
     { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libpng\""; } >&5
   ($PKG_CONFIG --exists --print-errors "libpng") 2>&5
   ac_status=$?
   $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
   test $ac_status = 0; }; then
   pkg_cv_PNG_CFLAGS=`$PKG_CONFIG --cflags "libpng" 2>/dev/null`
+		      test "x$?" != "x0" && pkg_failed=yes
 else
   pkg_failed=yes
 fi
-    fi
-else
-	pkg_failed=untried
+ else
+    pkg_failed=untried
 fi
-if test -n "$PKG_CONFIG"; then
-    if test -n "$PNG_LIBS"; then
-        pkg_cv_PNG_LIBS="$PNG_LIBS"
-    else
-        if test -n "$PKG_CONFIG" && \
+if test -n "$PNG_LIBS"; then
+    pkg_cv_PNG_LIBS="$PNG_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+    if test -n "$PKG_CONFIG" && \
     { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libpng\""; } >&5
   ($PKG_CONFIG --exists --print-errors "libpng") 2>&5
   ac_status=$?
   $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
   test $ac_status = 0; }; then
   pkg_cv_PNG_LIBS=`$PKG_CONFIG --libs "libpng" 2>/dev/null`
+		      test "x$?" != "x0" && pkg_failed=yes
 else
   pkg_failed=yes
 fi
-    fi
-else
-	pkg_failed=untried
+ else
+    pkg_failed=untried
 fi
 
 
 
 if test $pkg_failed = yes; then
+   	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
 
 if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
         _pkg_short_errors_supported=yes
@@ -14369,17 +14471,17 @@ else
         _pkg_short_errors_supported=no
 fi
         if test $_pkg_short_errors_supported = yes; then
-	        PNG_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "libpng"`
+	        PNG_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libpng" 2>&1`
         else
-	        PNG_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "libpng"`
+	        PNG_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libpng" 2>&1`
         fi
 	# Put the nasty error message in config.log where it belongs
 	echo "$PNG_PKG_ERRORS" >&5
 
-	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-                have_libpng=no
+	have_libpng=no
 elif test $pkg_failed = untried; then
+     	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
 	have_libpng=no
 else
 	PNG_CFLAGS=$pkg_cv_PNG_CFLAGS
@@ -14975,7 +15077,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by pixman $as_me 0.28.0, which was
+This file was extended by pixman $as_me 0.30.0, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -15041,7 +15143,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-pixman config.status 0.28.0
+pixman config.status 0.30.0
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/lib/pixman/configure.ac b/lib/pixman/configure.ac
index 38e669818..dfd68e4fe 100644
--- a/lib/pixman/configure.ac
+++ b/lib/pixman/configure.ac
@@ -53,7 +53,7 @@ AC_PREREQ([2.57])
 #
 
 m4_define([pixman_major], 0)
-m4_define([pixman_minor], 28)
+m4_define([pixman_minor], 30)
 m4_define([pixman_micro], 0)
 
 m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
@@ -64,7 +64,7 @@ AM_INIT_AUTOMAKE([foreign dist-bzip2])
 # Suppress verbose compile lines
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 
-AM_CONFIG_HEADER(config.h)
+AC_CONFIG_HEADERS(config.h)
 
 AC_CANONICAL_HOST
 
@@ -845,6 +845,13 @@ if test x$have_gettimeofday = xyes && test x$have_sys_time_h = xyes; then
 fi
 
 dnl =====================================
+dnl Check for missing sqrtf() as, e.g., for Solaris 9
+
+AC_SEARCH_LIBS([sqrtf], [m], [],
+               [AC_DEFINE([sqrtf], [sqrt],
+                          [Define to sqrt if you do not have the `sqrtf' function.])])
+
+dnl =====================================
 dnl Thread local storage
 
 AC_MSG_CHECKING(for thread local storage (TLS) support)
@@ -860,7 +867,7 @@ AC_CACHE_VAL(ac_cv_tls, [
 #error OpenBSD has broken __thread support
 #endif
 
-int $kw test;], [], ac_cv_tls=$kw)
+int $kw test;], [], [ac_cv_tls=$kw; break])
     done
 ])
 AC_MSG_RESULT($ac_cv_tls)
@@ -969,6 +976,22 @@ fi
 AC_MSG_RESULT($support_for_attribute_constructor)
 AC_SUBST(TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR)
 
+dnl =====================================
+dnl __float128
+
+support_for_float128=no
+
+AC_MSG_CHECKING(for __float128)
+AC_LINK_IFELSE([AC_LANG_SOURCE([[
+__float128 a = 1.0Q, b = 2.0Q; int main (void) { return a + b; }
+]])], support_for_float128=yes)
+
+if test x$support_for_float128 = xyes; then
+   AC_DEFINE([HAVE_FLOAT128], [], [Whether the tool chain supports __float128])
+fi
+
+AC_MSG_RESULT($support_for_float128)
+
 dnl ==================
 dnl libpng
 
diff --git a/lib/pixman/demos/Makefile.am b/lib/pixman/demos/Makefile.am
index f324f5f5b..9be9ab670 100644
--- a/lib/pixman/demos/Makefile.am
+++ b/lib/pixman/demos/Makefile.am
@@ -4,9 +4,10 @@ AM_CFLAGS = $(OPENMP_CFLAGS)
 AM_LDFLAGS = $(OPENMP_CFLAGS)
 
 LDADD = $(top_builddir)/pixman/libpixman-1.la -lm $(GTK_LIBS) $(PNG_LIBS)
-INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS) $(PNG_CFLAGS)
+AM_CPPFLAGS = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS) $(PNG_CFLAGS)
 
-GTK_UTILS = gtk-utils.c gtk-utils.h ../test/utils.c ../test/utils.h
+GTK_UTILS = gtk-utils.c gtk-utils.h ../test/utils.c ../test/utils.h \
+            ../test/utils-prng.c ../test/utils-prng.h
 
 DEMOS =				\
 	clip-test		\
@@ -14,6 +15,8 @@ DEMOS =				\
 	composite-test		\
 	gradient-test		\
 	radial-test		\
+	linear-gradient		\
+	conical-test		\
 	alpha-test		\
 	screen-test		\
 	convolution-test	\
@@ -22,9 +25,10 @@ DEMOS =				\
 	quad2quad		\
 	checkerboard		\
 	srgb-trap-test		\
-	srgb-test
+	srgb-test		\
+	scale
 
-EXTRA_DIST = parrot.c parrot.jpg
+EXTRA_DIST = parrot.c parrot.jpg scale.ui
 
 gradient_test_SOURCES = gradient-test.c $(GTK_UTILS)
 alpha_test_SOURCES = alpha-test.c $(GTK_UTILS)
@@ -35,10 +39,13 @@ trap_test_SOURCES = trap-test.c $(GTK_UTILS)
 screen_test_SOURCES = screen-test.c $(GTK_UTILS)
 convolution_test_SOURCES = convolution-test.c $(GTK_UTILS)
 radial_test_SOURCES = radial-test.c $(GTK_UTILS)
+linear_gradient_SOURCES = linear-gradient.c $(GTK_UTILS)
+conical_test_SOURCES = conical-test.c $(GTK_UTILS)
 tri_test_SOURCES = tri-test.c $(GTK_UTILS)
 checkerboard_SOURCES = checkerboard.c $(GTK_UTILS)
 srgb_test_SOURCES = srgb-test.c $(GTK_UTILS)
 srgb_trap_test_SOURCES = srgb-trap-test.c $(GTK_UTILS)
+scale_SOURCES = scale.c $(GTK_UTILS)
 
 noinst_PROGRAMS = $(DEMOS)
 
diff --git a/lib/pixman/demos/Makefile.in b/lib/pixman/demos/Makefile.in
index d4aed19b4..277649371 100644
--- a/lib/pixman/demos/Makefile.in
+++ b/lib/pixman/demos/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.12.3 from Makefile.am.
+# Makefile.in generated by automake 1.12.6 from Makefile.am.
 # @configure_input@
 
 # Copyright (C) 1994-2012 Free Software Foundation, Inc.
@@ -64,15 +64,19 @@ CONFIG_CLEAN_FILES =
 CONFIG_CLEAN_VPATH_FILES =
 @HAVE_GTK_TRUE@am__EXEEXT_1 = clip-test$(EXEEXT) clip-in$(EXEEXT) \
 @HAVE_GTK_TRUE@	composite-test$(EXEEXT) gradient-test$(EXEEXT) \
-@HAVE_GTK_TRUE@	radial-test$(EXEEXT) alpha-test$(EXEEXT) \
+@HAVE_GTK_TRUE@	radial-test$(EXEEXT) linear-gradient$(EXEEXT) \
+@HAVE_GTK_TRUE@	conical-test$(EXEEXT) alpha-test$(EXEEXT) \
 @HAVE_GTK_TRUE@	screen-test$(EXEEXT) convolution-test$(EXEEXT) \
 @HAVE_GTK_TRUE@	trap-test$(EXEEXT) tri-test$(EXEEXT) \
 @HAVE_GTK_TRUE@	quad2quad$(EXEEXT) checkerboard$(EXEEXT) \
-@HAVE_GTK_TRUE@	srgb-trap-test$(EXEEXT) srgb-test$(EXEEXT)
+@HAVE_GTK_TRUE@	srgb-trap-test$(EXEEXT) srgb-test$(EXEEXT) \
+@HAVE_GTK_TRUE@	scale$(EXEEXT)
 PROGRAMS = $(noinst_PROGRAMS)
 am__alpha_test_SOURCES_DIST = alpha-test.c gtk-utils.c gtk-utils.h \
-	../test/utils.c ../test/utils.h
-@HAVE_GTK_TRUE@am__objects_1 = gtk-utils.$(OBJEXT) utils.$(OBJEXT)
+	../test/utils.c ../test/utils.h ../test/utils-prng.c \
+	../test/utils-prng.h
+@HAVE_GTK_TRUE@am__objects_1 = gtk-utils.$(OBJEXT) utils.$(OBJEXT) \
+@HAVE_GTK_TRUE@	utils-prng.$(OBJEXT)
 @HAVE_GTK_TRUE@am_alpha_test_OBJECTS = alpha-test.$(OBJEXT) \
 @HAVE_GTK_TRUE@	$(am__objects_1)
 alpha_test_OBJECTS = $(am_alpha_test_OBJECTS)
@@ -86,7 +90,8 @@ am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
 am__v_lt_0 = --silent
 am__v_lt_1 = 
 am__checkerboard_SOURCES_DIST = checkerboard.c gtk-utils.c gtk-utils.h \
-	../test/utils.c ../test/utils.h
+	../test/utils.c ../test/utils.h ../test/utils-prng.c \
+	../test/utils-prng.h
 @HAVE_GTK_TRUE@am_checkerboard_OBJECTS = checkerboard.$(OBJEXT) \
 @HAVE_GTK_TRUE@	$(am__objects_1)
 checkerboard_OBJECTS = $(am_checkerboard_OBJECTS)
@@ -95,7 +100,8 @@ checkerboard_LDADD = $(LDADD)
 @HAVE_GTK_TRUE@	$(top_builddir)/pixman/libpixman-1.la \
 @HAVE_GTK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 am__clip_in_SOURCES_DIST = clip-in.c gtk-utils.c gtk-utils.h \
-	../test/utils.c ../test/utils.h
+	../test/utils.c ../test/utils.h ../test/utils-prng.c \
+	../test/utils-prng.h
 @HAVE_GTK_TRUE@am_clip_in_OBJECTS = clip-in.$(OBJEXT) $(am__objects_1)
 clip_in_OBJECTS = $(am_clip_in_OBJECTS)
 clip_in_LDADD = $(LDADD)
@@ -103,7 +109,8 @@ clip_in_LDADD = $(LDADD)
 @HAVE_GTK_TRUE@	$(top_builddir)/pixman/libpixman-1.la \
 @HAVE_GTK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 am__clip_test_SOURCES_DIST = clip-test.c gtk-utils.c gtk-utils.h \
-	../test/utils.c ../test/utils.h
+	../test/utils.c ../test/utils.h ../test/utils-prng.c \
+	../test/utils-prng.h
 @HAVE_GTK_TRUE@am_clip_test_OBJECTS = clip-test.$(OBJEXT) \
 @HAVE_GTK_TRUE@	$(am__objects_1)
 clip_test_OBJECTS = $(am_clip_test_OBJECTS)
@@ -112,7 +119,8 @@ clip_test_LDADD = $(LDADD)
 @HAVE_GTK_TRUE@	$(top_builddir)/pixman/libpixman-1.la \
 @HAVE_GTK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 am__composite_test_SOURCES_DIST = composite-test.c gtk-utils.c \
-	gtk-utils.h ../test/utils.c ../test/utils.h
+	gtk-utils.h ../test/utils.c ../test/utils.h \
+	../test/utils-prng.c ../test/utils-prng.h
 @HAVE_GTK_TRUE@am_composite_test_OBJECTS = composite-test.$(OBJEXT) \
 @HAVE_GTK_TRUE@	$(am__objects_1)
 composite_test_OBJECTS = $(am_composite_test_OBJECTS)
@@ -120,8 +128,19 @@ composite_test_LDADD = $(LDADD)
 @HAVE_GTK_TRUE@composite_test_DEPENDENCIES =  \
 @HAVE_GTK_TRUE@	$(top_builddir)/pixman/libpixman-1.la \
 @HAVE_GTK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
+am__conical_test_SOURCES_DIST = conical-test.c gtk-utils.c gtk-utils.h \
+	../test/utils.c ../test/utils.h ../test/utils-prng.c \
+	../test/utils-prng.h
+@HAVE_GTK_TRUE@am_conical_test_OBJECTS = conical-test.$(OBJEXT) \
+@HAVE_GTK_TRUE@	$(am__objects_1)
+conical_test_OBJECTS = $(am_conical_test_OBJECTS)
+conical_test_LDADD = $(LDADD)
+@HAVE_GTK_TRUE@conical_test_DEPENDENCIES =  \
+@HAVE_GTK_TRUE@	$(top_builddir)/pixman/libpixman-1.la \
+@HAVE_GTK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 am__convolution_test_SOURCES_DIST = convolution-test.c gtk-utils.c \
-	gtk-utils.h ../test/utils.c ../test/utils.h
+	gtk-utils.h ../test/utils.c ../test/utils.h \
+	../test/utils-prng.c ../test/utils-prng.h
 @HAVE_GTK_TRUE@am_convolution_test_OBJECTS =  \
 @HAVE_GTK_TRUE@	convolution-test.$(OBJEXT) $(am__objects_1)
 convolution_test_OBJECTS = $(am_convolution_test_OBJECTS)
@@ -130,7 +149,8 @@ convolution_test_LDADD = $(LDADD)
 @HAVE_GTK_TRUE@	$(top_builddir)/pixman/libpixman-1.la \
 @HAVE_GTK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 am__gradient_test_SOURCES_DIST = gradient-test.c gtk-utils.c \
-	gtk-utils.h ../test/utils.c ../test/utils.h
+	gtk-utils.h ../test/utils.c ../test/utils.h \
+	../test/utils-prng.c ../test/utils-prng.h
 @HAVE_GTK_TRUE@am_gradient_test_OBJECTS = gradient-test.$(OBJEXT) \
 @HAVE_GTK_TRUE@	$(am__objects_1)
 gradient_test_OBJECTS = $(am_gradient_test_OBJECTS)
@@ -138,6 +158,16 @@ gradient_test_LDADD = $(LDADD)
 @HAVE_GTK_TRUE@gradient_test_DEPENDENCIES =  \
 @HAVE_GTK_TRUE@	$(top_builddir)/pixman/libpixman-1.la \
 @HAVE_GTK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
+am__linear_gradient_SOURCES_DIST = linear-gradient.c gtk-utils.c \
+	gtk-utils.h ../test/utils.c ../test/utils.h \
+	../test/utils-prng.c ../test/utils-prng.h
+@HAVE_GTK_TRUE@am_linear_gradient_OBJECTS = linear-gradient.$(OBJEXT) \
+@HAVE_GTK_TRUE@	$(am__objects_1)
+linear_gradient_OBJECTS = $(am_linear_gradient_OBJECTS)
+linear_gradient_LDADD = $(LDADD)
+@HAVE_GTK_TRUE@linear_gradient_DEPENDENCIES =  \
+@HAVE_GTK_TRUE@	$(top_builddir)/pixman/libpixman-1.la \
+@HAVE_GTK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 quad2quad_SOURCES = quad2quad.c
 quad2quad_OBJECTS = quad2quad.$(OBJEXT)
 quad2quad_LDADD = $(LDADD)
@@ -145,7 +175,8 @@ quad2quad_LDADD = $(LDADD)
 @HAVE_GTK_TRUE@	$(top_builddir)/pixman/libpixman-1.la \
 @HAVE_GTK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 am__radial_test_SOURCES_DIST = radial-test.c gtk-utils.c gtk-utils.h \
-	../test/utils.c ../test/utils.h
+	../test/utils.c ../test/utils.h ../test/utils-prng.c \
+	../test/utils-prng.h
 @HAVE_GTK_TRUE@am_radial_test_OBJECTS = radial-test.$(OBJEXT) \
 @HAVE_GTK_TRUE@	$(am__objects_1)
 radial_test_OBJECTS = $(am_radial_test_OBJECTS)
@@ -153,8 +184,18 @@ radial_test_LDADD = $(LDADD)
 @HAVE_GTK_TRUE@radial_test_DEPENDENCIES =  \
 @HAVE_GTK_TRUE@	$(top_builddir)/pixman/libpixman-1.la \
 @HAVE_GTK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
+am__scale_SOURCES_DIST = scale.c gtk-utils.c gtk-utils.h \
+	../test/utils.c ../test/utils.h ../test/utils-prng.c \
+	../test/utils-prng.h
+@HAVE_GTK_TRUE@am_scale_OBJECTS = scale.$(OBJEXT) $(am__objects_1)
+scale_OBJECTS = $(am_scale_OBJECTS)
+scale_LDADD = $(LDADD)
+@HAVE_GTK_TRUE@scale_DEPENDENCIES =  \
+@HAVE_GTK_TRUE@	$(top_builddir)/pixman/libpixman-1.la \
+@HAVE_GTK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 am__screen_test_SOURCES_DIST = screen-test.c gtk-utils.c gtk-utils.h \
-	../test/utils.c ../test/utils.h
+	../test/utils.c ../test/utils.h ../test/utils-prng.c \
+	../test/utils-prng.h
 @HAVE_GTK_TRUE@am_screen_test_OBJECTS = screen-test.$(OBJEXT) \
 @HAVE_GTK_TRUE@	$(am__objects_1)
 screen_test_OBJECTS = $(am_screen_test_OBJECTS)
@@ -163,7 +204,8 @@ screen_test_LDADD = $(LDADD)
 @HAVE_GTK_TRUE@	$(top_builddir)/pixman/libpixman-1.la \
 @HAVE_GTK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 am__srgb_test_SOURCES_DIST = srgb-test.c gtk-utils.c gtk-utils.h \
-	../test/utils.c ../test/utils.h
+	../test/utils.c ../test/utils.h ../test/utils-prng.c \
+	../test/utils-prng.h
 @HAVE_GTK_TRUE@am_srgb_test_OBJECTS = srgb-test.$(OBJEXT) \
 @HAVE_GTK_TRUE@	$(am__objects_1)
 srgb_test_OBJECTS = $(am_srgb_test_OBJECTS)
@@ -172,7 +214,8 @@ srgb_test_LDADD = $(LDADD)
 @HAVE_GTK_TRUE@	$(top_builddir)/pixman/libpixman-1.la \
 @HAVE_GTK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 am__srgb_trap_test_SOURCES_DIST = srgb-trap-test.c gtk-utils.c \
-	gtk-utils.h ../test/utils.c ../test/utils.h
+	gtk-utils.h ../test/utils.c ../test/utils.h \
+	../test/utils-prng.c ../test/utils-prng.h
 @HAVE_GTK_TRUE@am_srgb_trap_test_OBJECTS = srgb-trap-test.$(OBJEXT) \
 @HAVE_GTK_TRUE@	$(am__objects_1)
 srgb_trap_test_OBJECTS = $(am_srgb_trap_test_OBJECTS)
@@ -181,7 +224,8 @@ srgb_trap_test_LDADD = $(LDADD)
 @HAVE_GTK_TRUE@	$(top_builddir)/pixman/libpixman-1.la \
 @HAVE_GTK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 am__trap_test_SOURCES_DIST = trap-test.c gtk-utils.c gtk-utils.h \
-	../test/utils.c ../test/utils.h
+	../test/utils.c ../test/utils.h ../test/utils-prng.c \
+	../test/utils-prng.h
 @HAVE_GTK_TRUE@am_trap_test_OBJECTS = trap-test.$(OBJEXT) \
 @HAVE_GTK_TRUE@	$(am__objects_1)
 trap_test_OBJECTS = $(am_trap_test_OBJECTS)
@@ -190,7 +234,8 @@ trap_test_LDADD = $(LDADD)
 @HAVE_GTK_TRUE@	$(top_builddir)/pixman/libpixman-1.la \
 @HAVE_GTK_TRUE@	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 am__tri_test_SOURCES_DIST = tri-test.c gtk-utils.c gtk-utils.h \
-	../test/utils.c ../test/utils.h
+	../test/utils.c ../test/utils.h ../test/utils-prng.c \
+	../test/utils-prng.h
 @HAVE_GTK_TRUE@am_tri_test_OBJECTS = tri-test.$(OBJEXT) \
 @HAVE_GTK_TRUE@	$(am__objects_1)
 tri_test_OBJECTS = $(am_tri_test_OBJECTS)
@@ -234,18 +279,21 @@ am__v_CCLD_0 = @echo "  CCLD    " $@;
 am__v_CCLD_1 = 
 SOURCES = $(alpha_test_SOURCES) $(checkerboard_SOURCES) \
 	$(clip_in_SOURCES) $(clip_test_SOURCES) \
-	$(composite_test_SOURCES) $(convolution_test_SOURCES) \
-	$(gradient_test_SOURCES) quad2quad.c $(radial_test_SOURCES) \
-	$(screen_test_SOURCES) $(srgb_test_SOURCES) \
+	$(composite_test_SOURCES) $(conical_test_SOURCES) \
+	$(convolution_test_SOURCES) $(gradient_test_SOURCES) \
+	$(linear_gradient_SOURCES) quad2quad.c $(radial_test_SOURCES) \
+	$(scale_SOURCES) $(screen_test_SOURCES) $(srgb_test_SOURCES) \
 	$(srgb_trap_test_SOURCES) $(trap_test_SOURCES) \
 	$(tri_test_SOURCES)
 DIST_SOURCES = $(am__alpha_test_SOURCES_DIST) \
 	$(am__checkerboard_SOURCES_DIST) $(am__clip_in_SOURCES_DIST) \
 	$(am__clip_test_SOURCES_DIST) \
 	$(am__composite_test_SOURCES_DIST) \
+	$(am__conical_test_SOURCES_DIST) \
 	$(am__convolution_test_SOURCES_DIST) \
-	$(am__gradient_test_SOURCES_DIST) quad2quad.c \
-	$(am__radial_test_SOURCES_DIST) \
+	$(am__gradient_test_SOURCES_DIST) \
+	$(am__linear_gradient_SOURCES_DIST) quad2quad.c \
+	$(am__radial_test_SOURCES_DIST) $(am__scale_SOURCES_DIST) \
 	$(am__screen_test_SOURCES_DIST) $(am__srgb_test_SOURCES_DIST) \
 	$(am__srgb_trap_test_SOURCES_DIST) \
 	$(am__trap_test_SOURCES_DIST) $(am__tri_test_SOURCES_DIST)
@@ -331,6 +379,8 @@ PIXMAN_VERSION_MAJOR = @PIXMAN_VERSION_MAJOR@
 PIXMAN_VERSION_MICRO = @PIXMAN_VERSION_MICRO@
 PIXMAN_VERSION_MINOR = @PIXMAN_VERSION_MINOR@
 PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
 PNG_CFLAGS = @PNG_CFLAGS@
 PNG_LIBS = @PNG_LIBS@
 PTHREAD_LDFLAGS = @PTHREAD_LDFLAGS@
@@ -402,14 +452,18 @@ top_srcdir = @top_srcdir@
 @HAVE_GTK_TRUE@AM_CFLAGS = $(OPENMP_CFLAGS)
 @HAVE_GTK_TRUE@AM_LDFLAGS = $(OPENMP_CFLAGS)
 @HAVE_GTK_TRUE@LDADD = $(top_builddir)/pixman/libpixman-1.la -lm $(GTK_LIBS) $(PNG_LIBS)
-@HAVE_GTK_TRUE@INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS) $(PNG_CFLAGS)
-@HAVE_GTK_TRUE@GTK_UTILS = gtk-utils.c gtk-utils.h ../test/utils.c ../test/utils.h
+@HAVE_GTK_TRUE@AM_CPPFLAGS = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS) $(PNG_CFLAGS)
+@HAVE_GTK_TRUE@GTK_UTILS = gtk-utils.c gtk-utils.h ../test/utils.c ../test/utils.h \
+@HAVE_GTK_TRUE@            ../test/utils-prng.c ../test/utils-prng.h
+
 @HAVE_GTK_TRUE@DEMOS = \
 @HAVE_GTK_TRUE@	clip-test		\
 @HAVE_GTK_TRUE@	clip-in			\
 @HAVE_GTK_TRUE@	composite-test		\
 @HAVE_GTK_TRUE@	gradient-test		\
 @HAVE_GTK_TRUE@	radial-test		\
+@HAVE_GTK_TRUE@	linear-gradient		\
+@HAVE_GTK_TRUE@	conical-test		\
 @HAVE_GTK_TRUE@	alpha-test		\
 @HAVE_GTK_TRUE@	screen-test		\
 @HAVE_GTK_TRUE@	convolution-test	\
@@ -418,9 +472,10 @@ top_srcdir = @top_srcdir@
 @HAVE_GTK_TRUE@	quad2quad		\
 @HAVE_GTK_TRUE@	checkerboard		\
 @HAVE_GTK_TRUE@	srgb-trap-test		\
-@HAVE_GTK_TRUE@	srgb-test
+@HAVE_GTK_TRUE@	srgb-test		\
+@HAVE_GTK_TRUE@	scale
 
-@HAVE_GTK_TRUE@EXTRA_DIST = parrot.c parrot.jpg
+@HAVE_GTK_TRUE@EXTRA_DIST = parrot.c parrot.jpg scale.ui
 @HAVE_GTK_TRUE@gradient_test_SOURCES = gradient-test.c $(GTK_UTILS)
 @HAVE_GTK_TRUE@alpha_test_SOURCES = alpha-test.c $(GTK_UTILS)
 @HAVE_GTK_TRUE@composite_test_SOURCES = composite-test.c $(GTK_UTILS)
@@ -430,10 +485,13 @@ top_srcdir = @top_srcdir@
 @HAVE_GTK_TRUE@screen_test_SOURCES = screen-test.c $(GTK_UTILS)
 @HAVE_GTK_TRUE@convolution_test_SOURCES = convolution-test.c $(GTK_UTILS)
 @HAVE_GTK_TRUE@radial_test_SOURCES = radial-test.c $(GTK_UTILS)
+@HAVE_GTK_TRUE@linear_gradient_SOURCES = linear-gradient.c $(GTK_UTILS)
+@HAVE_GTK_TRUE@conical_test_SOURCES = conical-test.c $(GTK_UTILS)
 @HAVE_GTK_TRUE@tri_test_SOURCES = tri-test.c $(GTK_UTILS)
 @HAVE_GTK_TRUE@checkerboard_SOURCES = checkerboard.c $(GTK_UTILS)
 @HAVE_GTK_TRUE@srgb_test_SOURCES = srgb-test.c $(GTK_UTILS)
 @HAVE_GTK_TRUE@srgb_trap_test_SOURCES = srgb-trap-test.c $(GTK_UTILS)
+@HAVE_GTK_TRUE@scale_SOURCES = scale.c $(GTK_UTILS)
 all: all-am
 
 .SUFFIXES:
@@ -492,18 +550,27 @@ clip-test$(EXEEXT): $(clip_test_OBJECTS) $(clip_test_DEPENDENCIES) $(EXTRA_clip_
 composite-test$(EXEEXT): $(composite_test_OBJECTS) $(composite_test_DEPENDENCIES) $(EXTRA_composite_test_DEPENDENCIES) 
 	@rm -f composite-test$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(composite_test_OBJECTS) $(composite_test_LDADD) $(LIBS)
+conical-test$(EXEEXT): $(conical_test_OBJECTS) $(conical_test_DEPENDENCIES) $(EXTRA_conical_test_DEPENDENCIES) 
+	@rm -f conical-test$(EXEEXT)
+	$(AM_V_CCLD)$(LINK) $(conical_test_OBJECTS) $(conical_test_LDADD) $(LIBS)
 convolution-test$(EXEEXT): $(convolution_test_OBJECTS) $(convolution_test_DEPENDENCIES) $(EXTRA_convolution_test_DEPENDENCIES) 
 	@rm -f convolution-test$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(convolution_test_OBJECTS) $(convolution_test_LDADD) $(LIBS)
 gradient-test$(EXEEXT): $(gradient_test_OBJECTS) $(gradient_test_DEPENDENCIES) $(EXTRA_gradient_test_DEPENDENCIES) 
 	@rm -f gradient-test$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(gradient_test_OBJECTS) $(gradient_test_LDADD) $(LIBS)
+linear-gradient$(EXEEXT): $(linear_gradient_OBJECTS) $(linear_gradient_DEPENDENCIES) $(EXTRA_linear_gradient_DEPENDENCIES) 
+	@rm -f linear-gradient$(EXEEXT)
+	$(AM_V_CCLD)$(LINK) $(linear_gradient_OBJECTS) $(linear_gradient_LDADD) $(LIBS)
 quad2quad$(EXEEXT): $(quad2quad_OBJECTS) $(quad2quad_DEPENDENCIES) $(EXTRA_quad2quad_DEPENDENCIES) 
 	@rm -f quad2quad$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(quad2quad_OBJECTS) $(quad2quad_LDADD) $(LIBS)
 radial-test$(EXEEXT): $(radial_test_OBJECTS) $(radial_test_DEPENDENCIES) $(EXTRA_radial_test_DEPENDENCIES) 
 	@rm -f radial-test$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(radial_test_OBJECTS) $(radial_test_LDADD) $(LIBS)
+scale$(EXEEXT): $(scale_OBJECTS) $(scale_DEPENDENCIES) $(EXTRA_scale_DEPENDENCIES) 
+	@rm -f scale$(EXEEXT)
+	$(AM_V_CCLD)$(LINK) $(scale_OBJECTS) $(scale_LDADD) $(LIBS)
 screen-test$(EXEEXT): $(screen_test_OBJECTS) $(screen_test_DEPENDENCIES) $(EXTRA_screen_test_DEPENDENCIES) 
 	@rm -f screen-test$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(screen_test_OBJECTS) $(screen_test_LDADD) $(LIBS)
@@ -531,16 +598,20 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clip-in.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clip-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/composite-test.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/conical-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/convolution-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gradient-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gtk-utils.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/linear-gradient.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/quad2quad.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radial-test.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scale.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/screen-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/srgb-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/srgb-trap-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/trap-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tri-test.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utils-prng.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utils.Po@am__quote@
 
 .c.o:
@@ -578,6 +649,20 @@ utils.obj: ../test/utils.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o utils.obj `if test -f '../test/utils.c'; then $(CYGPATH_W) '../test/utils.c'; else $(CYGPATH_W) '$(srcdir)/../test/utils.c'; fi`
 
+utils-prng.o: ../test/utils-prng.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT utils-prng.o -MD -MP -MF $(DEPDIR)/utils-prng.Tpo -c -o utils-prng.o `test -f '../test/utils-prng.c' || echo '$(srcdir)/'`../test/utils-prng.c
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/utils-prng.Tpo $(DEPDIR)/utils-prng.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='../test/utils-prng.c' object='utils-prng.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o utils-prng.o `test -f '../test/utils-prng.c' || echo '$(srcdir)/'`../test/utils-prng.c
+
+utils-prng.obj: ../test/utils-prng.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT utils-prng.obj -MD -MP -MF $(DEPDIR)/utils-prng.Tpo -c -o utils-prng.obj `if test -f '../test/utils-prng.c'; then $(CYGPATH_W) '../test/utils-prng.c'; else $(CYGPATH_W) '$(srcdir)/../test/utils-prng.c'; fi`
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/utils-prng.Tpo $(DEPDIR)/utils-prng.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='../test/utils-prng.c' object='utils-prng.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o utils-prng.obj `if test -f '../test/utils-prng.c'; then $(CYGPATH_W) '../test/utils-prng.c'; else $(CYGPATH_W) '$(srcdir)/../test/utils-prng.c'; fi`
+
 mostlyclean-libtool:
 	-rm -f *.lo
 
diff --git a/lib/pixman/demos/conical-test.c b/lib/pixman/demos/conical-test.c
new file mode 100644
index 000000000..6b3243016
--- /dev/null
+++ b/lib/pixman/demos/conical-test.c
@@ -0,0 +1,100 @@
+#include "../test/utils.h"
+#include "gtk-utils.h"
+
+#define SIZE 128
+#define GRADIENTS_PER_ROW 7
+#define NUM_ROWS ((NUM_GRADIENTS + GRADIENTS_PER_ROW - 1) / GRADIENTS_PER_ROW)
+#define WIDTH (SIZE * GRADIENTS_PER_ROW)
+#define HEIGHT (SIZE * NUM_ROWS)
+#define NUM_GRADIENTS 35
+
+#define double_to_color(x)					\
+    (((uint32_t) ((x)*65536)) - (((uint32_t) ((x)*65536)) >> 16))
+
+#define PIXMAN_STOP(offset,r,g,b,a)		\
+    { pixman_double_to_fixed (offset),		\
+	{					\
+	    double_to_color (r),		\
+		double_to_color (g),		\
+		double_to_color (b),		\
+		double_to_color (a)		\
+	}					\
+    }
+
+
+static const pixman_gradient_stop_t stops[] = {
+    PIXMAN_STOP (0.25,       1, 0, 0, 0.7),
+    PIXMAN_STOP (0.5,        1, 1, 0, 0.7),
+    PIXMAN_STOP (0.75,       0, 1, 0, 0.7),
+    PIXMAN_STOP (1.0,        0, 0, 1, 0.7)
+};
+
+#define NUM_STOPS (sizeof (stops) / sizeof (stops[0]))
+
+static pixman_image_t *
+create_conical (int index)
+{
+    pixman_point_fixed_t c;
+    double angle;
+
+    c.x = pixman_double_to_fixed (0);
+    c.y = pixman_double_to_fixed (0);
+
+    angle = (0.5 / NUM_GRADIENTS + index / (double)NUM_GRADIENTS) * 720 - 180;
+
+    return pixman_image_create_conical_gradient (
+	&c, pixman_double_to_fixed (angle), stops, NUM_STOPS);
+}
+
+int
+main (int argc, char **argv)
+{
+    pixman_transform_t transform;
+    pixman_image_t *src_img, *dest_img;
+    int i;
+
+    enable_divbyzero_exceptions ();
+
+    dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8,
+					 WIDTH, HEIGHT,
+					 NULL, 0);
+ 
+    draw_checkerboard (dest_img, 25, 0xffaaaaaa, 0xff888888);
+
+    pixman_transform_init_identity (&transform);
+
+    pixman_transform_translate (NULL, &transform,
+				pixman_double_to_fixed (0.5),
+				pixman_double_to_fixed (0.5));
+
+    pixman_transform_scale (NULL, &transform,
+			    pixman_double_to_fixed (SIZE),
+			    pixman_double_to_fixed (SIZE));
+    pixman_transform_translate (NULL, &transform,
+				pixman_double_to_fixed (0.5),
+				pixman_double_to_fixed (0.5));
+
+    for (i = 0; i < NUM_GRADIENTS; i++)
+    {
+	int column = i % GRADIENTS_PER_ROW;
+	int row = i / GRADIENTS_PER_ROW;
+
+	src_img = create_conical (i); 
+	pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL);
+   
+	pixman_image_set_transform (src_img, &transform);
+	
+	pixman_image_composite32 (
+	    PIXMAN_OP_OVER, src_img, NULL,dest_img,
+	    0, 0, 0, 0, column * SIZE, row * SIZE,
+	    SIZE, SIZE);
+	
+	pixman_image_unref (src_img);
+    }
+
+    show_image (dest_img);
+
+    pixman_image_unref (dest_img);
+
+    return 0;
+}
diff --git a/lib/pixman/demos/gtk-utils.c b/lib/pixman/demos/gtk-utils.c
index 8291a1ed2..32d4aecc7 100644
--- a/lib/pixman/demos/gtk-utils.c
+++ b/lib/pixman/demos/gtk-utils.c
@@ -3,6 +3,72 @@
 #include "../test/utils.h"
 #include "gtk-utils.h"
 
+pixman_image_t *
+pixman_image_from_file (const char *filename, pixman_format_code_t format)
+{
+    GdkPixbuf *pixbuf;
+    pixman_image_t *image;
+    int width, height;
+    uint32_t *data, *d;
+    uint8_t *gdk_data;
+    int n_channels;
+    int j, i;
+    int stride;
+
+    if (!(pixbuf = gdk_pixbuf_new_from_file (filename, NULL)))
+	return NULL;
+
+    image = NULL;
+
+    width = gdk_pixbuf_get_width (pixbuf);
+    height = gdk_pixbuf_get_height (pixbuf);
+    n_channels = gdk_pixbuf_get_n_channels (pixbuf);
+    gdk_data = gdk_pixbuf_get_pixels (pixbuf);
+    stride = gdk_pixbuf_get_rowstride (pixbuf);
+
+    if (!(data = malloc (width * height * sizeof (uint32_t))))
+	goto out;
+
+    d = data;
+    for (j = 0; j < height; ++j)
+    {
+	uint8_t *gdk_line = gdk_data;
+
+	for (i = 0; i < width; ++i)
+	{
+	    int r, g, b, a;
+	    uint32_t pixel;
+
+	    r = gdk_line[0];
+	    g = gdk_line[1];
+	    b = gdk_line[2];
+
+	    if (n_channels == 4)
+		a = gdk_line[3];
+	    else
+		a = 0xff;
+
+	    r = (r * a + 127) / 255;
+	    g = (g * a + 127) / 255;
+	    b = (b * a + 127) / 255;
+
+	    pixel = (a << 24) | (r << 16) | (g << 8) | b;
+
+	    *d++ = pixel;
+	    gdk_line += n_channels;
+	}
+
+	gdk_data += stride;
+    }
+
+    image = pixman_image_create_bits (
+	format, width, height, data, width * 4);
+
+out:
+    g_object_unref (pixbuf);
+    return image;
+}
+
 GdkPixbuf *
 pixbuf_from_argb32 (uint32_t *bits,
 		    int width,
@@ -29,14 +95,31 @@ pixbuf_from_argb32 (uint32_t *bits,
 static gboolean
 on_expose (GtkWidget *widget, GdkEventExpose *expose, gpointer data)
 {
-    GdkPixbuf *pixbuf = data;
+    pixman_image_t *pimage = data;
+    int width = pixman_image_get_width (pimage);
+    int height = pixman_image_get_height (pimage);
+    int stride = pixman_image_get_stride (pimage);
+    cairo_surface_t *cimage;
+    cairo_format_t format;
+    cairo_t *cr;
+
+    if (pixman_image_get_format (pimage) == PIXMAN_x8r8g8b8)
+	format = CAIRO_FORMAT_RGB24;
+    else
+	format = CAIRO_FORMAT_ARGB32;
+
+    cimage = cairo_image_surface_create_for_data (
+	(uint8_t *)pixman_image_get_data (pimage),
+	format, width, height, stride);
     
-    gdk_draw_pixbuf (widget->window, NULL,
-		     pixbuf, 0, 0, 0, 0,
-		     gdk_pixbuf_get_width (pixbuf),
-		     gdk_pixbuf_get_height (pixbuf),
-		     GDK_RGB_DITHER_NONE,
-		     0, 0);
+    cr = gdk_cairo_create (widget->window);
+
+    cairo_rectangle (cr, 0, 0, width, height);
+    cairo_set_source_surface (cr, cimage, 0, 0);
+    cairo_fill (cr);
+
+    cairo_destroy (cr);
+    cairo_surface_destroy (cimage);
     
     return TRUE;
 }
@@ -45,7 +128,6 @@ void
 show_image (pixman_image_t *image)
 {
     GtkWidget *window;
-    GdkPixbuf *pixbuf;
     int width, height;
     int argc;
     char **argv;
@@ -66,22 +148,15 @@ show_image (pixman_image_t *image)
 
     format = pixman_image_get_format (image);
 
-    /* Three cases:
-     *
-     *  - image is a8r8g8b8_sRGB: we will display without modification
-     *    under the assumption that the monitor is sRGB
-     *
-     *  - image is a8r8g8b8: we will display without modification
-     *    under the assumption that whoever created the image
-     *    probably did it wrong by using sRGB inputs
-     *
-     *  - other: we will convert to a8r8g8b8 under the assumption that
-     *    whoever created the image probably did it wrong.
+    /* We always display the image as if it contains sRGB data. That
+     * means that no conversion should take place when the image
+     * has the a8r8g8b8_sRGB format.
      */
     switch (format)
     {
     case PIXMAN_a8r8g8b8_sRGB:
     case PIXMAN_a8r8g8b8:
+    case PIXMAN_x8r8g8b8:
 	copy = pixman_image_ref (image);
 	break;
 
@@ -95,11 +170,7 @@ show_image (pixman_image_t *image)
 	break;
     }
 
-    pixbuf = pixbuf_from_argb32 (pixman_image_get_data (copy),
-				 width, height,
-				 pixman_image_get_stride (copy));
-    
-    g_signal_connect (window, "expose_event", G_CALLBACK (on_expose), pixbuf);
+    g_signal_connect (window, "expose_event", G_CALLBACK (on_expose), copy);
     g_signal_connect (window, "delete_event", G_CALLBACK (gtk_main_quit), NULL);
     
     gtk_widget_show (window);
diff --git a/lib/pixman/demos/gtk-utils.h b/lib/pixman/demos/gtk-utils.h
index 55cb7018a..36be4def6 100644
--- a/lib/pixman/demos/gtk-utils.h
+++ b/lib/pixman/demos/gtk-utils.h
@@ -6,6 +6,9 @@
 
 void show_image (pixman_image_t *image);
 
+pixman_image_t *
+pixman_image_from_file (const char *filename, pixman_format_code_t format);
+
 GdkPixbuf *pixbuf_from_argb32 (uint32_t *bits,
                                int width,
                                int height,
diff --git a/lib/pixman/demos/linear-gradient.c b/lib/pixman/demos/linear-gradient.c
new file mode 100644
index 000000000..46433a6e5
--- /dev/null
+++ b/lib/pixman/demos/linear-gradient.c
@@ -0,0 +1,50 @@
+#include "../test/utils.h"
+#include "gtk-utils.h"
+
+#define WIDTH 1024
+#define HEIGHT 640
+
+int
+main (int argc, char **argv)
+{
+    pixman_image_t *src_img, *dest_img;
+    pixman_gradient_stop_t stops[] = {
+        { 0x00000, { 0x0000, 0x0000, 0x4444, 0xdddd } },
+        { 0x10000, { 0xeeee, 0xeeee, 0x8888, 0xdddd } },
+#if 0
+        /* These colors make it very obvious that dithering
+         * is useful even for 8-bit gradients
+         */
+	{ 0x00000, { 0x6666, 0x3333, 0x3333, 0xffff } },
+	{ 0x10000, { 0x3333, 0x6666, 0x6666, 0xffff } },
+#endif
+    };
+    pixman_point_fixed_t p1, p2;
+
+    enable_divbyzero_exceptions ();
+
+    dest_img = pixman_image_create_bits (PIXMAN_x8r8g8b8,
+					 WIDTH, HEIGHT,
+					 NULL, 0);
+
+    p1.x = p1.y = 0x0000;
+    p2.x = WIDTH << 16;
+    p2.y = HEIGHT << 16;
+    
+    src_img = pixman_image_create_linear_gradient (&p1, &p2, stops, ARRAY_LENGTH (stops));
+
+    pixman_image_composite32 (PIXMAN_OP_OVER,
+			      src_img,
+			      NULL,
+			      dest_img,
+			      0, 0,
+			      0, 0,
+			      0, 0,
+			      WIDTH, HEIGHT);
+
+    show_image (dest_img);
+
+    pixman_image_unref (dest_img);
+
+    return 0;
+}
diff --git a/lib/pixman/demos/radial-test.c b/lib/pixman/demos/radial-test.c
index e64f3577f..08a367cd2 100644
--- a/lib/pixman/demos/radial-test.c
+++ b/lib/pixman/demos/radial-test.c
@@ -1,7 +1,7 @@
 #include "../test/utils.h"
 #include "gtk-utils.h"
 
-#define NUM_GRADIENTS 7
+#define NUM_GRADIENTS 9
 #define NUM_STOPS 3
 #define NUM_REPEAT 4
 #define SIZE 128
@@ -28,6 +28,9 @@
  * centers (0, 0) and (1, 0), but with different radiuses. From left
  * to right:
  *
+ * - Degenerate start circle completely inside the end circle
+ *     0.00 -> 1.75; dr = 1.75 > 0; a = 1 - 1.75^2 < 0
+ *
  * - Small start circle completely inside the end circle
  *     0.25 -> 1.75; dr =  1.5 > 0; a = 1 - 1.50^2 < 0
  *
@@ -49,15 +52,20 @@
  * - Small end circle completely inside the start circle
  *     1.75 -> 0.25; dr = -1.5 > 0; a = 1 - 1.50^2 < 0
  *
+ * - Degenerate end circle completely inside the start circle
+ *     0.00 -> 1.75; dr = 1.75 > 0; a = 1 - 1.75^2 < 0
+ *
  */
 
 const static double radiuses[NUM_GRADIENTS] = {
+    0.00,
     0.25,
     0.50,
     0.50,
     1.00,
     1.00,
     1.50,
+    1.75,
     1.75
 };
 
@@ -139,6 +147,8 @@ main (int argc, char **argv)
 					 WIDTH, HEIGHT,
 					 NULL, 0);
 
+    draw_checkerboard (dest_img, 25, 0xffaaaaaa, 0xffbbbbbb);
+    
     pixman_transform_init_identity (&transform);
 
     /*
diff --git a/lib/pixman/demos/scale.c b/lib/pixman/demos/scale.c
new file mode 100644
index 000000000..869ada12b
--- /dev/null
+++ b/lib/pixman/demos/scale.c
@@ -0,0 +1,436 @@
+/*
+ * Copyright 2012, Red Hat, Inc.
+ * Copyright 2012, Soren Sandmann
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Soren Sandmann <soren.sandmann@gmail.com>
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include <math.h>
+#include <gtk/gtk.h>
+#include <pixman.h>
+#include <stdlib.h>
+#include "gtk-utils.h"
+
+typedef struct
+{
+    GtkBuilder *        builder;
+    pixman_image_t *	original;
+    GtkAdjustment *     scale_x_adjustment;
+    GtkAdjustment *     scale_y_adjustment;
+    GtkAdjustment *     rotate_adjustment;
+    GtkAdjustment *	subsample_adjustment;
+    int                 scaled_width;
+    int                 scaled_height;
+} app_t;
+
+static GtkWidget *
+get_widget (app_t *app, const char *name)
+{
+    GtkWidget *widget = GTK_WIDGET (gtk_builder_get_object (app->builder, name));
+
+    if (!widget)
+        g_error ("Widget %s not found\n", name);
+
+    return widget;
+}
+
+static double
+min4 (double a, double b, double c, double d)
+{
+    double m1, m2;
+
+    m1 = MIN (a, b);
+    m2 = MIN (c, d);
+    return MIN (m1, m2);
+}
+
+static double
+max4 (double a, double b, double c, double d)
+{
+    double m1, m2;
+
+    m1 = MAX (a, b);
+    m2 = MAX (c, d);
+    return MAX (m1, m2);
+}
+
+static void
+compute_extents (pixman_f_transform_t *trans, double *sx, double *sy)
+{
+    double min_x, max_x, min_y, max_y;
+    pixman_f_vector_t v[4] =
+    {
+	{ { 1, 1, 1 } },
+	{ { -1, 1, 1 } },
+	{ { -1, -1, 1 } },
+	{ { 1, -1, 1 } },
+    };
+
+    pixman_f_transform_point (trans, &v[0]);
+    pixman_f_transform_point (trans, &v[1]);
+    pixman_f_transform_point (trans, &v[2]);
+    pixman_f_transform_point (trans, &v[3]);
+
+    min_x = min4 (v[0].v[0], v[1].v[0], v[2].v[0], v[3].v[0]);
+    max_x = max4 (v[0].v[0], v[1].v[0], v[2].v[0], v[3].v[0]);
+    min_y = min4 (v[0].v[1], v[1].v[1], v[2].v[1], v[3].v[1]);
+    max_y = max4 (v[0].v[1], v[1].v[1], v[2].v[1], v[3].v[1]);
+
+    *sx = (max_x - min_x) / 2.0;
+    *sy = (max_y - min_y) / 2.0;
+}
+
+typedef struct
+{
+    char		name [20];
+    pixman_kernel_t	value;
+} named_int_t;
+
+static const named_int_t filters[] =
+{
+    { "Box",			PIXMAN_KERNEL_BOX },
+    { "Impulse",		PIXMAN_KERNEL_IMPULSE },
+    { "Linear",			PIXMAN_KERNEL_LINEAR },
+    { "Cubic",			PIXMAN_KERNEL_CUBIC },
+    { "Lanczos2",		PIXMAN_KERNEL_LANCZOS2 },
+    { "Lanczos3",		PIXMAN_KERNEL_LANCZOS3 },
+    { "Lanczos3 Stretched",	PIXMAN_KERNEL_LANCZOS3_STRETCHED },
+    { "Gaussian",		PIXMAN_KERNEL_GAUSSIAN },
+};
+
+static const named_int_t repeats[] =
+{
+    { "None",                   PIXMAN_REPEAT_NONE },
+    { "Normal",                 PIXMAN_REPEAT_NORMAL },
+    { "Reflect",                PIXMAN_REPEAT_REFLECT },
+    { "Pad",                    PIXMAN_REPEAT_PAD },
+};
+
+static pixman_kernel_t
+get_value (app_t *app, const named_int_t table[], const char *box_name)
+{
+    GtkComboBox *box = GTK_COMBO_BOX (get_widget (app, box_name));
+
+    return table[gtk_combo_box_get_active (box)].value;
+}
+
+static void
+copy_to_counterpart (app_t *app, GObject *object)
+{
+    static const char *xy_map[] =
+    {
+	"reconstruct_x_combo_box", "reconstruct_y_combo_box",
+	"sample_x_combo_box",      "sample_y_combo_box",
+	"scale_x_adjustment",      "scale_y_adjustment",
+    };
+    GObject *counterpart = NULL;
+    int i;
+
+    for (i = 0; i < G_N_ELEMENTS (xy_map); i += 2)
+    {
+	GObject *x = gtk_builder_get_object (app->builder, xy_map[i]);
+	GObject *y = gtk_builder_get_object (app->builder, xy_map[i + 1]);
+
+	if (object == x)
+	    counterpart = y;
+	if (object == y)
+	    counterpart = x;
+    }
+
+    if (!counterpart)
+	return;
+    
+    if (GTK_IS_COMBO_BOX (counterpart))
+    {
+	gtk_combo_box_set_active (
+	    GTK_COMBO_BOX (counterpart),
+	    gtk_combo_box_get_active (
+		GTK_COMBO_BOX (object)));
+    }
+    else if (GTK_IS_ADJUSTMENT (counterpart))
+    {
+	gtk_adjustment_set_value (
+	    GTK_ADJUSTMENT (counterpart),
+	    gtk_adjustment_get_value (
+		GTK_ADJUSTMENT (object)));
+    }
+}
+
+static double
+to_scale (double v)
+{
+    return pow (1.15, v);
+}
+
+static void
+rescale (GtkWidget *may_be_null, app_t *app)
+{
+    pixman_f_transform_t ftransform;
+    pixman_transform_t transform;
+    double new_width, new_height;
+    double fscale_x, fscale_y;
+    double rotation;
+    pixman_fixed_t *params;
+    int n_params;
+    double sx, sy;
+
+    pixman_f_transform_init_identity (&ftransform);
+
+    if (may_be_null && gtk_toggle_button_get_active (
+	    GTK_TOGGLE_BUTTON (get_widget (app, "lock_checkbutton"))))
+    {
+	copy_to_counterpart (app, G_OBJECT (may_be_null));
+    }
+    
+    fscale_x = gtk_adjustment_get_value (app->scale_x_adjustment);
+    fscale_y = gtk_adjustment_get_value (app->scale_y_adjustment);
+    rotation = gtk_adjustment_get_value (app->rotate_adjustment);
+
+    fscale_x = to_scale (fscale_x);
+    fscale_y = to_scale (fscale_y);
+    
+    new_width = pixman_image_get_width (app->original) * fscale_x;
+    new_height = pixman_image_get_height (app->original) * fscale_y;
+
+    pixman_f_transform_scale (&ftransform, NULL, fscale_x, fscale_y);
+
+    pixman_f_transform_translate (&ftransform, NULL, - new_width / 2.0, - new_height / 2.0);
+
+    rotation = (rotation / 360.0) * 2 * M_PI;
+    pixman_f_transform_rotate (&ftransform, NULL, cos (rotation), sin (rotation));
+
+    pixman_f_transform_translate (&ftransform, NULL, new_width / 2.0, new_height / 2.0);
+
+    pixman_f_transform_invert (&ftransform, &ftransform);
+
+    compute_extents (&ftransform, &sx, &sy);
+    
+    pixman_transform_from_pixman_f_transform (&transform, &ftransform);
+    pixman_image_set_transform (app->original, &transform);
+
+    params = pixman_filter_create_separable_convolution (
+        &n_params,
+        sx * 65536.0 + 0.5,
+	sy * 65536.0 + 0.5,
+	get_value (app, filters, "reconstruct_x_combo_box"),
+	get_value (app, filters, "reconstruct_y_combo_box"),
+	get_value (app, filters, "sample_x_combo_box"),
+	get_value (app, filters, "sample_y_combo_box"),
+	gtk_adjustment_get_value (app->subsample_adjustment),
+	gtk_adjustment_get_value (app->subsample_adjustment));
+
+    pixman_image_set_filter (app->original, PIXMAN_FILTER_SEPARABLE_CONVOLUTION, params, n_params);
+
+    pixman_image_set_repeat (
+        app->original, get_value (app, repeats, "repeat_combo_box"));
+    
+    free (params);
+
+    app->scaled_width = ceil (new_width);
+    app->scaled_height = ceil (new_height);
+    
+    gtk_widget_set_size_request (
+        get_widget (app, "drawing_area"), new_width + 0.5, new_height + 0.5);
+
+    gtk_widget_queue_draw (
+        get_widget (app, "drawing_area"));
+}
+
+static gboolean
+on_expose (GtkWidget *da, GdkEvent *event, gpointer data)
+{
+    app_t *app = data;
+    GdkRectangle *area = &event->expose.area;
+    cairo_surface_t *surface;
+    pixman_image_t *tmp;
+    cairo_t *cr;
+    uint32_t *pixels;
+
+    pixels = calloc (1, area->width * area->height * 4);
+    tmp = pixman_image_create_bits (
+        PIXMAN_a8r8g8b8, area->width, area->height, pixels, area->width * 4);
+
+    if (area->x < app->scaled_width && area->y < app->scaled_height)
+    {
+        pixman_image_composite (
+            PIXMAN_OP_SRC,
+            app->original, NULL, tmp,
+            area->x, area->y, 0, 0, 0, 0,
+            app->scaled_width - area->x, app->scaled_height - area->y);
+    }
+
+    surface = cairo_image_surface_create_for_data (
+        (uint8_t *)pixels, CAIRO_FORMAT_ARGB32,
+        area->width, area->height, area->width * 4);
+
+    cr = gdk_cairo_create (da->window);
+
+    cairo_set_source_surface (cr, surface, area->x, area->y);
+
+    cairo_paint (cr);
+
+    cairo_destroy (cr);
+    cairo_surface_destroy (surface);
+    free (pixels);
+    pixman_image_unref (tmp);
+
+    return TRUE;
+}
+
+static void
+set_up_combo_box (app_t *app, const char *box_name,
+                  int n_entries, const named_int_t table[])
+{
+    GtkWidget *widget = get_widget (app, box_name);
+    GtkListStore *model;
+    GtkCellRenderer *cell;
+    int i;
+
+    model = gtk_list_store_new (1, G_TYPE_STRING);
+    
+    cell = gtk_cell_renderer_text_new ();
+    gtk_cell_layout_pack_start (GTK_CELL_LAYOUT (widget), cell, TRUE);
+    gtk_cell_layout_set_attributes (GTK_CELL_LAYOUT (widget), cell,
+				    "text", 0,
+				    NULL);
+
+    gtk_combo_box_set_model (GTK_COMBO_BOX (widget), GTK_TREE_MODEL (model));
+    
+    for (i = 0; i < n_entries; ++i)
+    {
+	const named_int_t *info = &(table[i]);
+	GtkTreeIter iter;
+
+	gtk_list_store_append (model, &iter);
+	gtk_list_store_set (model, &iter, 0, info->name, -1);
+    }
+
+    gtk_combo_box_set_active (GTK_COMBO_BOX (widget), 0);
+
+    g_signal_connect (widget, "changed", G_CALLBACK (rescale), app);
+}
+
+static void
+set_up_filter_box (app_t *app, const char *box_name)
+{
+    set_up_combo_box (app, box_name, G_N_ELEMENTS (filters), filters);
+}
+
+static char *
+format_value (GtkWidget *widget, double value)
+{
+    return g_strdup_printf ("%.4f", to_scale (value));
+}
+
+static app_t *
+app_new (pixman_image_t *original)
+{
+    GtkWidget *widget;
+    app_t *app = g_malloc (sizeof *app);
+    GError *err = NULL;
+
+    app->builder = gtk_builder_new ();
+    app->original = original;
+
+    if (!gtk_builder_add_from_file (app->builder, "scale.ui", &err))
+	g_error ("Could not read file scale.ui: %s", err->message);
+
+    app->scale_x_adjustment =
+        GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "scale_x_adjustment"));
+    app->scale_y_adjustment =
+        GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "scale_y_adjustment"));
+    app->rotate_adjustment =
+        GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "rotate_adjustment"));
+    app->subsample_adjustment =
+	GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "subsample_adjustment"));
+
+    g_signal_connect (app->scale_x_adjustment, "value_changed", G_CALLBACK (rescale), app);
+    g_signal_connect (app->scale_y_adjustment, "value_changed", G_CALLBACK (rescale), app);
+    g_signal_connect (app->rotate_adjustment, "value_changed", G_CALLBACK (rescale), app);
+    g_signal_connect (app->subsample_adjustment, "value_changed", G_CALLBACK (rescale), app);
+    
+    widget = get_widget (app, "scale_x_scale");
+    gtk_scale_add_mark (GTK_SCALE (widget), 0.0, GTK_POS_LEFT, NULL);
+    g_signal_connect (widget, "format_value", G_CALLBACK (format_value), app);
+    widget = get_widget (app, "scale_y_scale");
+    gtk_scale_add_mark (GTK_SCALE (widget), 0.0, GTK_POS_LEFT, NULL);
+    g_signal_connect (widget, "format_value", G_CALLBACK (format_value), app);
+    widget = get_widget (app, "rotate_scale");
+    gtk_scale_add_mark (GTK_SCALE (widget), 0.0, GTK_POS_LEFT, NULL);
+
+    widget = get_widget (app, "drawing_area");
+    g_signal_connect (widget, "expose_event", G_CALLBACK (on_expose), app);
+
+    set_up_filter_box (app, "reconstruct_x_combo_box");
+    set_up_filter_box (app, "reconstruct_y_combo_box");
+    set_up_filter_box (app, "sample_x_combo_box");
+    set_up_filter_box (app, "sample_y_combo_box");
+
+    set_up_combo_box (
+        app, "repeat_combo_box", G_N_ELEMENTS (repeats), repeats);
+
+    g_signal_connect (
+	gtk_builder_get_object (app->builder, "lock_checkbutton"),
+	"toggled", G_CALLBACK (rescale), app);
+    
+    rescale (NULL, app);
+    
+    return app;
+}
+
+int
+main (int argc, char **argv)
+{
+    GtkWidget *window;
+    pixman_image_t *image;
+    app_t *app;
+    
+    gtk_init (&argc, &argv);
+
+    if (argc < 2)
+    {
+	printf ("%s <image file>\n", argv[0]);
+	return -1;
+    }
+
+    if (!(image = pixman_image_from_file (argv[1], PIXMAN_a8r8g8b8)))
+    {
+	printf ("Could not load image \"%s\"\n", argv[1]);
+	return -1;
+    }
+
+    app = app_new (image);
+    
+    window = get_widget (app, "main");
+
+    g_signal_connect (window, "delete_event", G_CALLBACK (gtk_main_quit), NULL);
+    
+    gtk_window_set_default_size (GTK_WINDOW (window), 1024, 768);
+    
+    gtk_widget_show_all (window);
+    
+    gtk_main ();
+
+    return 0;
+}
diff --git a/lib/pixman/demos/scale.ui b/lib/pixman/demos/scale.ui
new file mode 100644
index 000000000..b3450d34d
--- /dev/null
+++ b/lib/pixman/demos/scale.ui
@@ -0,0 +1,332 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<interface>
+  <!-- interface-requires gtk+ 2.12 -->
+  <!-- interface-naming-policy toplevel-contextual -->
+  <object class="GtkAdjustment" id="rotate_adjustment">
+    <property name="lower">-180</property>
+    <property name="upper">190</property>
+    <property name="step_increment">1</property>
+    <property name="page_increment">10</property>
+    <property name="page_size">10</property>
+  </object>
+  <object class="GtkAdjustment" id="scale_y_adjustment">
+    <property name="lower">-32</property>
+    <property name="upper">42</property>
+    <property name="step_increment">1</property>
+    <property name="page_increment">10</property>
+    <property name="page_size">10</property>
+  </object>
+  <object class="GtkAdjustment" id="scale_x_adjustment">
+    <property name="lower">-32</property>
+    <property name="upper">42</property>
+    <property name="step_increment">1</property>
+    <property name="page_increment">10</property>
+    <property name="page_size">10</property>
+  </object>
+  <object class="GtkAdjustment" id="subsample_adjustment">
+    <property name="lower">1</property>
+    <property name="upper">12</property>
+    <property name="step_increment">1</property>
+    <property name="page_increment">1</property>
+    <property name="page_size">0</property>
+    <property name="value">4</property>
+  </object>
+  <object class="GtkWindow" id="main">
+    <child>
+      <object class="GtkHBox" id="u">
+        <property name="visible">True</property>
+        <property name="spacing">12</property>
+        <child>
+          <object class="GtkScrolledWindow" id="scrolledwindow1">
+            <property name="visible">True</property>
+            <property name="can_focus">True</property>
+            <property name="shadow_type">in</property>
+            <child>
+              <object class="GtkViewport" id="viewport1">
+                <property name="visible">True</property>
+                <child>
+                  <object class="GtkDrawingArea" id="drawing_area">
+                    <property name="visible">True</property>
+                  </object>
+                </child>
+              </object>
+            </child>
+          </object>
+          <packing>
+            <property name="position">0</property>
+          </packing>
+        </child>
+        <child>
+          <object class="GtkVBox" id="box1">
+            <property name="visible">True</property>
+	    <property name="spacing">12</property>
+            <child>
+              <object class="GtkHBox" id="box2">
+                <property name="visible">True</property>
+                <property name="homogeneous">True</property>
+                <child>
+                  <object class="GtkVBox" id="box3">
+                    <property name="visible">True</property>
+                    <property name="spacing">6</property>
+                    <child>
+                      <object class="GtkLabel" id="label1">
+                        <property name="visible">True</property>
+                        <property name="label" translatable="yes">&lt;b&gt;Scale X&lt;/b&gt;</property>
+                        <property name="use_markup">True</property>
+                      </object>
+                      <packing>
+                        <property name="expand">False</property>
+                        <property name="position">0</property>
+                      </packing>
+                    </child>
+                    <child>
+                      <object class="GtkVScale" id="scale_x_scale">
+                        <property name="visible">True</property>
+                        <property name="can_focus">True</property>
+                        <property name="adjustment">scale_x_adjustment</property>
+                        <property name="fill_level">32</property>
+                        <property name="value_pos">right</property>
+                      </object>
+                      <packing>
+                        <property name="position">1</property>
+                      </packing>
+                    </child>
+                  </object>
+                  <packing>
+                    <property name="expand">False</property>
+                    <property name="position">0</property>
+                  </packing>
+                </child>
+                <child>
+                  <object class="GtkVBox" id="box4">
+                    <property name="visible">True</property>
+                    <property name="spacing">6</property>
+                    <child>
+                      <object class="GtkLabel" id="label2">
+                        <property name="visible">True</property>
+                        <property name="label" translatable="yes">&lt;b&gt;Scale Y&lt;/b&gt;</property>
+                        <property name="use_markup">True</property>
+                      </object>
+                      <packing>
+                        <property name="expand">False</property>
+                        <property name="position">0</property>
+                      </packing>
+                    </child>
+                    <child>
+                      <object class="GtkVScale" id="scale_y_scale">
+                        <property name="visible">True</property>
+                        <property name="can_focus">True</property>
+                        <property name="adjustment">scale_y_adjustment</property>
+                        <property name="fill_level">32</property>
+                        <property name="value_pos">right</property>
+                      </object>
+                      <packing>
+                        <property name="position">1</property>
+                      </packing>
+                    </child>
+                  </object>
+                  <packing>
+                    <property name="expand">False</property>
+                    <property name="position">1</property>
+                  </packing>
+                </child>
+                <child>
+                  <object class="GtkVBox" id="box5">
+                    <property name="visible">True</property>
+                    <property name="spacing">6</property>
+                    <child>
+                      <object class="GtkLabel" id="label3">
+                        <property name="visible">True</property>
+                        <property name="label" translatable="yes">&lt;b&gt;Rotate&lt;/b&gt;</property>
+                        <property name="use_markup">True</property>
+                      </object>
+                      <packing>
+                        <property name="expand">False</property>
+                        <property name="position">0</property>
+                      </packing>
+                    </child>
+                    <child>
+                      <object class="GtkVScale" id="rotate_scale">
+                        <property name="visible">True</property>
+                        <property name="can_focus">True</property>
+                        <property name="adjustment">rotate_adjustment</property>
+                        <property name="fill_level">180</property>
+                        <property name="value_pos">right</property>
+                      </object>
+                      <packing>
+                        <property name="position">1</property>
+                      </packing>
+                    </child>
+                  </object>
+                  <packing>
+                    <property name="expand">False</property>
+                    <property name="position">2</property>
+                  </packing>
+                </child>
+              </object>
+              <packing>
+                <property name="padding">6</property>
+                <property name="position">0</property>
+              </packing>
+            </child>
+            <child>
+              <object class="GtkVBox" id="box6">
+                <property name="visible">True</property>
+		<child>
+		  <object class="GtkCheckButton"
+			  id="lock_checkbutton">
+		    <property name="label" translatable="yes">Lock X and Y Dimensions</property>
+		    <property name="xalign">0.0</property>
+		  </object>
+                  <packing>
+                    <property name="expand">False</property>
+                    <property name="fill">False</property>
+                    <property name="padding">6</property>
+                    <property name="position">1</property>
+                  </packing>
+		</child>
+                <child>
+                  <object class="GtkTable" id="grid1">
+                    <property name="visible">True</property>
+                    <property name="column_spacing">8</property>
+                    <property name="row_spacing">6</property>
+                    <child>
+                      <object class="GtkLabel" id="label4">
+                        <property name="visible">True</property>
+                        <property name="xalign">1</property>
+                        <property name="label" translatable="yes">&lt;b&gt;Reconstruct X:&lt;/b&gt;</property>
+                        <property name="use_markup">True</property>
+                      </object>
+                    </child>
+                    <child>
+                      <object class="GtkLabel" id="label5">
+                        <property name="visible">True</property>
+                        <property name="xalign">1</property>
+                        <property name="label" translatable="yes">&lt;b&gt;Reconstruct Y:&lt;/b&gt;</property>
+                        <property name="use_markup">True</property>
+                      </object>
+                      <packing>
+                        <property name="top_attach">1</property>
+                      </packing>
+                    </child>
+                    <child>
+                      <object class="GtkLabel" id="label6">
+                        <property name="visible">True</property>
+                        <property name="xalign">1</property>
+                        <property name="label" translatable="yes">&lt;b&gt;Sample X:&lt;/b&gt;</property>
+                        <property name="use_markup">True</property>
+                      </object>
+                      <packing>
+                        <property name="top_attach">2</property>
+                      </packing>
+                    </child>
+                    <child>
+                      <object class="GtkLabel" id="label7">
+                        <property name="visible">True</property>
+                        <property name="xalign">1</property>
+                        <property name="label" translatable="yes">&lt;b&gt;Sample Y:&lt;/b&gt;</property>
+                        <property name="use_markup">True</property>
+                      </object>
+                      <packing>
+                        <property name="top_attach">3</property>
+                      </packing>
+                    </child>
+                    <child>
+                      <object class="GtkLabel" id="label8">
+                        <property name="visible">True</property>
+                        <property name="xalign">1</property>
+                        <property name="label" translatable="yes">&lt;b&gt;Repeat:&lt;/b&gt;</property>
+                        <property name="use_markup">True</property>
+                      </object>
+                      <packing>
+                        <property name="top_attach">4</property>
+                      </packing>
+                    </child>
+                    <child>
+                      <object class="GtkLabel" id="label9">
+                        <property name="visible">True</property>
+                        <property name="xalign">1</property>
+                        <property name="label" translatable="yes">&lt;b&gt;Subsample:&lt;/b&gt;</property>
+                        <property name="use_markup">True</property>
+                      </object>
+                      <packing>
+                        <property name="top_attach">5</property>
+                      </packing>
+                    </child>
+                    <child>
+                      <object class="GtkComboBox" id="reconstruct_x_combo_box">
+                        <property name="visible">True</property>
+                      </object>
+                      <packing>
+                        <property name="left_attach">1</property>
+                      </packing>
+                    </child>
+                    <child>
+                      <object class="GtkComboBox" id="reconstruct_y_combo_box">
+                        <property name="visible">True</property>
+                      </object>
+                      <packing>
+                        <property name="left_attach">1</property>
+                        <property name="top_attach">1</property>
+                      </packing>
+                    </child>
+                    <child>
+                      <object class="GtkComboBox" id="sample_x_combo_box">
+                        <property name="visible">True</property>
+                      </object>
+                      <packing>
+                        <property name="left_attach">1</property>
+                        <property name="top_attach">2</property>
+                      </packing>
+                    </child>
+                    <child>
+                      <object class="GtkComboBox" id="sample_y_combo_box">
+                        <property name="visible">True</property>
+                      </object>
+                      <packing>
+                        <property name="left_attach">1</property>
+                        <property name="top_attach">3</property>
+                      </packing>
+                    </child>
+                    <child>
+                      <object class="GtkComboBox" id="repeat_combo_box">
+                        <property name="visible">True</property>
+                      </object>
+                      <packing>
+                        <property name="left_attach">1</property>
+                        <property name="top_attach">4</property>
+                      </packing>
+                    </child>
+                    <child>
+                      <object class="GtkSpinButton" id="subsample_spin_button">
+                        <property name="visible">True</property>
+			<property name="adjustment">subsample_adjustment</property>
+                      </object>
+                      <packing>
+                        <property name="left_attach">1</property>
+                        <property name="top_attach">5</property>
+                      </packing>
+                    </child>
+                  </object>
+                  <packing>
+                    <property name="expand">False</property>
+                    <property name="padding">6</property>
+                    <property name="position">1</property>
+                  </packing>
+                </child>
+              </object>
+              <packing>
+                <property name="expand">False</property>
+                <property name="position">0</property>
+              </packing>
+            </child>
+          </object>
+          <packing>
+            <property name="expand">False</property>
+            <property name="position">1</property>
+          </packing>
+        </child>
+      </object>
+    </child>
+  </object>
+</interface>
diff --git a/lib/pixman/depcomp b/lib/pixman/depcomp
index 0544c6835..e1f51f482 100644
--- a/lib/pixman/depcomp
+++ b/lib/pixman/depcomp
@@ -74,6 +74,9 @@ tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
 
 rm -f "$tmpdepfile"
 
+# Avoid interferences from the environment.
+gccflag= dashmflag=
+
 # Some modes work just like other modes, but use different flags.  We
 # parameterize here, but still list the modes in the big case below,
 # to make depend.m4 easier to write.  Note that we *cannot* use a case
@@ -108,7 +111,7 @@ if test "$depmode" = msvc7msys; then
 fi
 
 if test "$depmode" = xlc; then
-   # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency informations.
+   # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
    gccflag=-qmakedep=gcc,-MF
    depmode=gcc
 fi
@@ -142,13 +145,17 @@ gcc3)
   ;;
 
 gcc)
+## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
+## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
+## (see the conditional assignment to $gccflag above).
 ## There are various ways to get dependency output from gcc.  Here's
 ## why we pick this rather obscure method:
 ## - Don't want to use -MD because we'd like the dependencies to end
 ##   up in a subdir.  Having to rename by hand is ugly.
 ##   (We might end up doing this anyway to support other compilers.)
 ## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
-##   -MM, not -M (despite what the docs say).
+##   -MM, not -M (despite what the docs say).  Also, it might not be
+##   supported by the other compilers which use the 'gcc' depmode.
 ## - Using -M directly means running the compiler twice (even worse
 ##   than renaming).
   if test -z "$gccflag"; then
diff --git a/lib/pixman/pixman/Makefile.am b/lib/pixman/pixman/Makefile.am
index cbed0855a..b9ea75424 100644
--- a/lib/pixman/pixman/Makefile.am
+++ b/lib/pixman/pixman/Makefile.am
@@ -58,7 +58,9 @@ noinst_LTLIBRARIES += libpixman-arm-simd.la
 libpixman_arm_simd_la_SOURCES = \
 	pixman-arm-simd.c	\
 	pixman-arm-common.h	\
-	pixman-arm-simd-asm.S
+	pixman-arm-simd-asm.S   \
+	pixman-arm-simd-asm-scaled.S \
+	pixman-arm-simd-asm.h
 libpixman_1_la_LIBADD += libpixman-arm-simd.la
 
 ASM_CFLAGS_arm_simd=
@@ -85,7 +87,7 @@ noinst_LTLIBRARIES += libpixman-iwmmxt.la
 libpixman_1_la_LIBADD += libpixman-iwmmxt.la
 
 libpixman_iwmmxt_la-pixman-mmx.lo: pixman-mmx.c
-	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $(IWMMXT_CFLAGS) -MT libpixman_iwmmxt_la-pixman-mmx.lo -MD -MP -MF $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo -c -o libpixman_iwmmxt_la-pixman-mmx.lo `test -f 'pixman-mmx.c' || echo '$(srcdir)/'`pixman-mmx.c
+	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(AM_CPPFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $(IWMMXT_CFLAGS) -MT libpixman_iwmmxt_la-pixman-mmx.lo -MD -MP -MF $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo -c -o libpixman_iwmmxt_la-pixman-mmx.lo `test -f 'pixman-mmx.c' || echo '$(srcdir)/'`pixman-mmx.c
 	$(AM_V_at)$(am__mv) $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Plo
 
 libpixman_iwmmxt_la_DEPENDENCIES = $(am__DEPENDENCIES_1)
diff --git a/lib/pixman/pixman/Makefile.in b/lib/pixman/pixman/Makefile.in
index 14747fd53..ec79ebf6d 100644
--- a/lib/pixman/pixman/Makefile.in
+++ b/lib/pixman/pixman/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.12.3 from Makefile.am.
+# Makefile.in generated by automake 1.12.6 from Makefile.am.
 # @configure_input@
 
 # Copyright (C) 1994-2012 Free Software Foundation, Inc.
@@ -132,14 +132,14 @@ libpixman_1_la_DEPENDENCIES = $(am__append_3) $(am__append_5) \
 am__objects_1 = pixman.lo pixman-access.lo pixman-access-accessors.lo \
 	pixman-bits-image.lo pixman-combine32.lo \
 	pixman-combine-float.lo pixman-conical-gradient.lo \
-	pixman-x86.lo pixman-mips.lo pixman-arm.lo pixman-ppc.lo \
-	pixman-edge.lo pixman-edge-accessors.lo pixman-fast-path.lo \
-	pixman-glyph.lo pixman-general.lo pixman-gradient-walker.lo \
-	pixman-image.lo pixman-implementation.lo \
-	pixman-linear-gradient.lo pixman-matrix.lo pixman-noop.lo \
-	pixman-radial-gradient.lo pixman-region16.lo \
-	pixman-region32.lo pixman-solid-fill.lo pixman-timer.lo \
-	pixman-trap.lo pixman-utils.lo
+	pixman-filter.lo pixman-x86.lo pixman-mips.lo pixman-arm.lo \
+	pixman-ppc.lo pixman-edge.lo pixman-edge-accessors.lo \
+	pixman-fast-path.lo pixman-glyph.lo pixman-general.lo \
+	pixman-gradient-walker.lo pixman-image.lo \
+	pixman-implementation.lo pixman-linear-gradient.lo \
+	pixman-matrix.lo pixman-noop.lo pixman-radial-gradient.lo \
+	pixman-region16.lo pixman-region32.lo pixman-solid-fill.lo \
+	pixman-timer.lo pixman-trap.lo pixman-utils.lo
 am__objects_2 =
 am_libpixman_1_la_OBJECTS = $(am__objects_1) $(am__objects_2)
 libpixman_1_la_OBJECTS = $(am_libpixman_1_la_OBJECTS)
@@ -162,9 +162,11 @@ libpixman_arm_neon_la_OBJECTS = $(am_libpixman_arm_neon_la_OBJECTS)
 @USE_ARM_NEON_TRUE@am_libpixman_arm_neon_la_rpath =
 libpixman_arm_simd_la_LIBADD =
 am__libpixman_arm_simd_la_SOURCES_DIST = pixman-arm-simd.c \
-	pixman-arm-common.h pixman-arm-simd-asm.S
+	pixman-arm-common.h pixman-arm-simd-asm.S \
+	pixman-arm-simd-asm-scaled.S pixman-arm-simd-asm.h
 @USE_ARM_SIMD_TRUE@am_libpixman_arm_simd_la_OBJECTS =  \
-@USE_ARM_SIMD_TRUE@	pixman-arm-simd.lo pixman-arm-simd-asm.lo
+@USE_ARM_SIMD_TRUE@	pixman-arm-simd.lo pixman-arm-simd-asm.lo \
+@USE_ARM_SIMD_TRUE@	pixman-arm-simd-asm-scaled.lo
 libpixman_arm_simd_la_OBJECTS = $(am_libpixman_arm_simd_la_OBJECTS)
 @USE_ARM_SIMD_TRUE@am_libpixman_arm_simd_la_rpath =
 libpixman_iwmmxt_la_LIBADD =
@@ -366,6 +368,8 @@ PIXMAN_VERSION_MAJOR = @PIXMAN_VERSION_MAJOR@
 PIXMAN_VERSION_MICRO = @PIXMAN_VERSION_MICRO@
 PIXMAN_VERSION_MINOR = @PIXMAN_VERSION_MINOR@
 PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
 PNG_CFLAGS = @PNG_CFLAGS@
 PNG_LIBS = @PNG_LIBS@
 PTHREAD_LDFLAGS = @PTHREAD_LDFLAGS@
@@ -442,6 +446,7 @@ libpixman_sources = \
 	pixman-combine32.c		\
 	pixman-combine-float.c		\
 	pixman-conical-gradient.c	\
+	pixman-filter.c			\
 	pixman-x86.c			\
 	pixman-mips.c			\
 	pixman-arm.c			\
@@ -515,7 +520,9 @@ EXTRA_DIST = \
 @USE_ARM_SIMD_TRUE@libpixman_arm_simd_la_SOURCES = \
 @USE_ARM_SIMD_TRUE@	pixman-arm-simd.c	\
 @USE_ARM_SIMD_TRUE@	pixman-arm-common.h	\
-@USE_ARM_SIMD_TRUE@	pixman-arm-simd-asm.S
+@USE_ARM_SIMD_TRUE@	pixman-arm-simd-asm.S   \
+@USE_ARM_SIMD_TRUE@	pixman-arm-simd-asm-scaled.S \
+@USE_ARM_SIMD_TRUE@	pixman-arm-simd-asm.h
 
 @USE_ARM_SIMD_TRUE@ASM_CFLAGS_arm_simd = 
 @USE_ARM_NEON_TRUE@libpixman_arm_neon_la_SOURCES = \
@@ -661,6 +668,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-arm-neon-asm-bilinear.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-arm-neon-asm.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-arm-neon.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-arm-simd-asm-scaled.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-arm-simd-asm.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-arm-simd.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-arm.Plo@am__quote@
@@ -671,6 +679,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-edge-accessors.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-edge.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-fast-path.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-filter.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-general.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-glyph.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-gradient-walker.Plo@am__quote@
@@ -1017,7 +1026,7 @@ uninstall-am: uninstall-libLTLIBRARIES \
 
 
 @USE_ARM_IWMMXT_TRUE@libpixman_iwmmxt_la-pixman-mmx.lo: pixman-mmx.c
-@USE_ARM_IWMMXT_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $(IWMMXT_CFLAGS) -MT libpixman_iwmmxt_la-pixman-mmx.lo -MD -MP -MF $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo -c -o libpixman_iwmmxt_la-pixman-mmx.lo `test -f 'pixman-mmx.c' || echo '$(srcdir)/'`pixman-mmx.c
+@USE_ARM_IWMMXT_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(AM_CPPFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $(IWMMXT_CFLAGS) -MT libpixman_iwmmxt_la-pixman-mmx.lo -MD -MP -MF $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo -c -o libpixman_iwmmxt_la-pixman-mmx.lo `test -f 'pixman-mmx.c' || echo '$(srcdir)/'`pixman-mmx.c
 @USE_ARM_IWMMXT_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Plo
 
 @USE_ARM_IWMMXT_TRUE@libpixman-iwmmxt.la: libpixman_iwmmxt_la-pixman-mmx.lo $(libpixman_iwmmxt_la_DEPENDENCIES) 
diff --git a/lib/pixman/pixman/Makefile.sources b/lib/pixman/pixman/Makefile.sources
index 5351fb03d..c624eb9a8 100644
--- a/lib/pixman/pixman/Makefile.sources
+++ b/lib/pixman/pixman/Makefile.sources
@@ -6,6 +6,7 @@ libpixman_sources =			\
 	pixman-combine32.c		\
 	pixman-combine-float.c		\
 	pixman-conical-gradient.c	\
+	pixman-filter.c			\
 	pixman-x86.c			\
 	pixman-mips.c			\
 	pixman-arm.c			\
diff --git a/lib/pixman/pixman/pixman-arm-common.h b/lib/pixman/pixman/pixman-arm-common.h
index fa436ad77..3a7cb2bef 100644
--- a/lib/pixman/pixman/pixman-arm-common.h
+++ b/lib/pixman/pixman/pixman-arm-common.h
@@ -1,5 +1,5 @@
 /*
- * Copyright � 2010 Nokia Corporation
+ * Copyright © 2010 Nokia Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
diff --git a/lib/pixman/pixman/pixman-arm-neon-asm.S b/lib/pixman/pixman/pixman-arm-neon-asm.S
index 42f1f57ac..187197dc3 100644
--- a/lib/pixman/pixman/pixman-arm-neon-asm.S
+++ b/lib/pixman/pixman/pixman-arm-neon-asm.S
@@ -1,5 +1,5 @@
 /*
- * Copyright � 2009 Nokia Corporation
+ * Copyright © 2009 Nokia Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
diff --git a/lib/pixman/pixman/pixman-arm-neon-asm.h b/lib/pixman/pixman/pixman-arm-neon-asm.h
index a4d60b47e..d0d92d74c 100644
--- a/lib/pixman/pixman/pixman-arm-neon-asm.h
+++ b/lib/pixman/pixman/pixman-arm-neon-asm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright � 2009 Nokia Corporation
+ * Copyright © 2009 Nokia Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -385,7 +385,7 @@
  * execute simultaneously with NEON and be completely shadowed by it. Thus
  * we get no performance overhead at all (*). This looks like a very nice
  * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher,
- * but still can implement some rather advanced prefetch logic in sofware
+ * but still can implement some rather advanced prefetch logic in software
  * for almost zero cost!
  *
  * (*) The overhead of the prefetcher is visible when running some trivial
diff --git a/lib/pixman/pixman/pixman-arm-simd-asm-scaled.S b/lib/pixman/pixman/pixman-arm-simd-asm-scaled.S
new file mode 100644
index 000000000..711099548
--- /dev/null
+++ b/lib/pixman/pixman/pixman-arm-simd-asm-scaled.S
@@ -0,0 +1,165 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ * Copyright © 2010 Nokia Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Jeff Muizelaar (jeff@infidigm.net)
+ *
+ */
+
+/* Prevent the stack from becoming executable */
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+	.text
+	.arch armv6
+	.object_arch armv4
+	.arm
+	.altmacro
+	.p2align 2
+
+/* Supplementary macro for setting function attributes */
+.macro pixman_asm_function fname
+	.func fname
+	.global fname
+#ifdef __ELF__
+	.hidden fname
+	.type fname, %function
+#endif
+fname:
+.endm
+
+/*
+ * Note: This code is only using armv5te instructions (not even armv6),
+ *       but is scheduled for ARM Cortex-A8 pipeline. So it might need to
+ *       be split into a few variants, tuned for each microarchitecture.
+ *
+ * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
+ * have efficient write combining), it needs to be changed to use 16-byte
+ * aligned writes using STM instruction.
+ *
+ * Nearest scanline scaler macro template uses the following arguments:
+ *  fname                     - name of the function to generate
+ *  bpp_shift                 - (1 << bpp_shift) is the size of pixel in bytes
+ *  t                         - type suffix for LDR/STR instructions
+ *  prefetch_distance         - prefetch in the source image by that many
+ *                              pixels ahead
+ *  prefetch_braking_distance - stop prefetching when that many pixels are
+ *                              remaining before the end of scanline
+ */
+
+.macro generate_nearest_scanline_func fname, bpp_shift, t,      \
+                                      prefetch_distance,        \
+                                      prefetch_braking_distance
+
+pixman_asm_function fname
+	W		.req	r0
+	DST		.req	r1
+	SRC		.req	r2
+	VX		.req	r3
+	UNIT_X		.req	ip
+	TMP1		.req	r4
+	TMP2		.req	r5
+	VXMASK		.req	r6
+	PF_OFFS		.req	r7
+	SRC_WIDTH_FIXED	.req	r8
+
+	ldr	UNIT_X, [sp]
+	push	{r4, r5, r6, r7, r8, r10}
+	mvn	VXMASK, #((1 << bpp_shift) - 1)
+	ldr	SRC_WIDTH_FIXED, [sp, #28]
+
+	/* define helper macro */
+	.macro	scale_2_pixels
+		ldr&t	TMP1, [SRC, TMP1]
+		and	TMP2, VXMASK, VX, asr #(16 - bpp_shift)
+		adds	VX, VX, UNIT_X
+		str&t	TMP1, [DST], #(1 << bpp_shift)
+9:		subpls	VX, VX, SRC_WIDTH_FIXED
+		bpl	9b
+
+		ldr&t	TMP2, [SRC, TMP2]
+		and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
+		adds	VX, VX, UNIT_X
+		str&t	TMP2, [DST], #(1 << bpp_shift)
+9:		subpls	VX, VX, SRC_WIDTH_FIXED
+		bpl	9b
+	.endm
+
+	/* now do the scaling */
+	and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
+	adds	VX, VX, UNIT_X
+9:	subpls	VX, VX, SRC_WIDTH_FIXED
+	bpl	9b
+	subs	W, W, #(8 + prefetch_braking_distance)
+	blt	2f
+	/* calculate prefetch offset */
+	mov	PF_OFFS, #prefetch_distance
+	mla	PF_OFFS, UNIT_X, PF_OFFS, VX
+1:	/* main loop, process 8 pixels per iteration with prefetch */
+	pld	[SRC, PF_OFFS, asr #(16 - bpp_shift)]
+	add	PF_OFFS, UNIT_X, lsl #3
+	scale_2_pixels
+	scale_2_pixels
+	scale_2_pixels
+	scale_2_pixels
+	subs	W, W, #8
+	bge	1b
+2:
+	subs	W, W, #(4 - 8 - prefetch_braking_distance)
+	blt	2f
+1:	/* process the remaining pixels */
+	scale_2_pixels
+	scale_2_pixels
+	subs	W, W, #4
+	bge	1b
+2:
+	tst	W, #2
+	beq	2f
+	scale_2_pixels
+2:
+	tst	W, #1
+	ldrne&t	TMP1, [SRC, TMP1]
+	strne&t	TMP1, [DST]
+	/* cleanup helper macro */
+	.purgem	scale_2_pixels
+	.unreq	DST
+	.unreq	SRC
+	.unreq	W
+	.unreq	VX
+	.unreq	UNIT_X
+	.unreq	TMP1
+	.unreq	TMP2
+	.unreq	VXMASK
+	.unreq	PF_OFFS
+	.unreq  SRC_WIDTH_FIXED
+	/* return */
+	pop	{r4, r5, r6, r7, r8, r10}
+	bx	lr
+.endfunc
+.endm
+
+generate_nearest_scanline_func \
+    pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
+
+generate_nearest_scanline_func \
+    pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2,  , 48, 32
diff --git a/lib/pixman/pixman/pixman-arm-simd-asm.S b/lib/pixman/pixman/pixman-arm-simd-asm.S
index d7cf0f5d4..c20968879 100644
--- a/lib/pixman/pixman/pixman-arm-simd-asm.S
+++ b/lib/pixman/pixman/pixman-arm-simd-asm.S
@@ -1,14 +1,14 @@
 /*
- * Copyright � 2008 Mozilla Corporation
- * Copyright © 2010 Nokia Corporation
+ * Copyright © 2012 Raspberry Pi Foundation
+ * Copyright © 2012 RISC OS Open Ltd
  *
  * Permission to use, copy, modify, distribute, and sell this software and its
  * documentation for any purpose is hereby granted without fee, provided that
  * the above copyright notice appear in all copies and that both that
  * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Mozilla Corporation not be used in
+ * documentation, and that the name of the copyright holders not be used in
  * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Mozilla Corporation makes no
+ * specific, written prior permission.  The copyright holders make no
  * representations about the suitability of this software for any purpose.  It
  * is provided "as is" without express or implied warranty.
  *
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
  * SOFTWARE.
  *
- * Author:  Jeff Muizelaar (jeff@infidigm.net)
+ * Author:  Ben Avison (bavison@riscosopen.org)
  *
  */
 
@@ -37,412 +37,577 @@
 	.altmacro
 	.p2align 2
 
-/* Supplementary macro for setting function attributes */
-.macro pixman_asm_function fname
-	.func fname
-	.global fname
-#ifdef __ELF__
-	.hidden fname
-	.type fname, %function
-#endif
-fname:
-.endm
+#include "pixman-arm-simd-asm.h"
 
-/*
- * The code below was generated by gcc 4.3.4 from the commented out
- * functions in 'pixman-arm-simd.c' file with the following optimization
- * options: "-O3 -mcpu=arm1136jf-s -fomit-frame-pointer"
- *
- * TODO: replace gcc generated code with hand tuned versions because
- * the code quality is not very good, introduce symbolic register
- * aliases for better readability and maintainability.
+/* A head macro should do all processing which results in an output of up to
+ * 16 bytes, as far as the final load instruction. The corresponding tail macro
+ * should complete the processing of the up-to-16 bytes. The calling macro will
+ * sometimes choose to insert a preload or a decrement of X between them.
+ *   cond           ARM condition code for code block
+ *   numbytes       Number of output bytes that should be generated this time
+ *   firstreg       First WK register in which to place output
+ *   unaligned_src  Whether to use non-wordaligned loads of source image
+ *   unaligned_mask Whether to use non-wordaligned loads of mask image
+ *   preload        If outputting 16 bytes causes 64 bytes to be read, whether an extra preload should be output
  */
 
-pixman_asm_function pixman_composite_add_8_8_asm_armv6
-	push	{r4, r5, r6, r7, r8, r9, r10, r11}
-	mov	r10, r1
-	sub	sp, sp, #4
-	subs	r10, r10, #1
-	mov	r11, r0
-	mov	r8, r2
-	str	r3, [sp]
-	ldr	r7, [sp, #36]
-	bcc	0f
-6:	cmp	r11, #0
-	beq	1f
-	orr	r3, r8, r7
-	tst	r3, #3
-	beq	2f
-	mov	r1, r8
-	mov	r0, r7
-	mov	r12, r11
-	b	3f
-5:	tst	r3, #3
-	beq	4f
-3:	ldrb	r2, [r0], #1
-	subs	r12, r12, #1
-	ldrb	r3, [r1]
-	uqadd8	r3, r2, r3
-	strb	r3, [r1], #1
-	orr	r3, r1, r0
-	bne	5b
-1:	ldr	r3, [sp]
-	add	r8, r8, r3
-	ldr	r3, [sp, #40]
-	add	r7, r7, r3
-10:	subs	r10, r10, #1
-	bcs	6b
-0:	add	sp, sp, #4
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
-	bx	lr
-2:	mov	r12, r11
-	mov	r1, r8
-	mov	r0, r7
-4:	cmp	r12, #3
-	subgt	r6, r12, #4
-	movgt	r9, r12
-	lsrgt	r5, r6, #2
-	addgt	r3, r5, #1
-	movgt	r12, #0
-	lslgt	r4, r3, #2
-	ble	7f
-8:	ldr	r3, [r0, r12]
-	ldr	r2, [r1, r12]
-	uqadd8	r3, r3, r2
-	str	r3, [r1, r12]
-	add	r12, r12, #4
-	cmp	r12, r4
-	bne	8b
-	sub	r3, r9, #4
-	bic	r3, r3, #3
-	add	r3, r3, #4
-	subs	r12, r6, r5, lsl #2
-	add	r1, r1, r3
-	add	r0, r0, r3
-	beq	1b
-7:	mov	r4, #0
-9:	ldrb	r3, [r1, r4]
-	ldrb	r2, [r0, r4]
-	uqadd8	r3, r2, r3
-	strb	r3, [r1, r4]
-	add	r4, r4, #1
-	cmp	r4, r12
-	bne	9b
-	ldr	r3, [sp]
-	add	r8, r8, r3
-	ldr	r3, [sp, #40]
-	add	r7, r7, r3
-	b	10b
-.endfunc
-
-pixman_asm_function pixman_composite_over_8888_8888_asm_armv6
-	push	{r4, r5, r6, r7, r8, r9, r10, r11}
-	sub	sp, sp, #20
-	cmp	r1, #0
-	mov	r12, r2
-	str	r1, [sp, #12]
-	str	r0, [sp, #16]
-	ldr	r2, [sp, #52]
-	beq	0f
-	lsl	r3, r3, #2
-	str	r3, [sp]
-	ldr	r3, [sp, #56]
-	mov	r10, #0
-	lsl	r3, r3, #2
-	str	r3, [sp, #8]
-	mov	r11, r3
-	b	1f
-6:	ldr	r11, [sp, #8]
-1:	ldr	r9, [sp]
-	mov	r0, r12
-	add	r12, r12, r9
-	mov	r1, r2
-	str	r12, [sp, #4]
-	add	r2, r2, r11
-	ldr	r12, [sp, #16]
-	ldr	r3, =0x00800080
-	ldr	r9, =0xff00ff00
-	mov	r11, #255
-	cmp	r12, #0
-	beq	4f
-5:	ldr	r5, [r1], #4
-	ldr	r4, [r0]
-	sub	r8, r11, r5, lsr #24
-	uxtb16	r6, r4
-	uxtb16	r7, r4, ror #8
-	mla	r6, r6, r8, r3
-	mla	r7, r7, r8, r3
-	uxtab16	r6, r6, r6, ror #8
-	uxtab16	r7, r7, r7, ror #8
-	and	r7, r7, r9
-	uxtab16	r6, r7, r6, ror #8
-	uqadd8	r5, r6, r5
-	str	r5, [r0], #4
-	subs	r12, r12, #1
-	bne	5b
-4:	ldr	r3, [sp, #12]
-	add	r10, r10, #1
-	cmp	r10, r3
-	ldr	r12, [sp, #4]
-	bne	6b
-0:	add	sp, sp, #20
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
-	bx	lr
-.endfunc
-
-pixman_asm_function pixman_composite_over_8888_n_8888_asm_armv6
-	push	{r4, r5, r6, r7, r8, r9, r10, r11}
-	sub	sp, sp, #28
-	cmp	r1, #0
-	str	r1, [sp, #12]
-	ldrb	r1, [sp, #71]
-	mov	r12, r2
-	str	r0, [sp, #16]
-	ldr	r2, [sp, #60]
-	str	r1, [sp, #24]
-	beq	0f
-	lsl	r3, r3, #2
-	str	r3, [sp, #20]
-	ldr	r3, [sp, #64]
-	mov	r10, #0
-	lsl	r3, r3, #2
-	str	r3, [sp, #8]
-	mov	r11, r3
-	b	1f
-5:	ldr	r11, [sp, #8]
-1:	ldr	r4, [sp, #20]
-	mov	r0, r12
-	mov	r1, r2
-	add	r12, r12, r4
-	add	r2, r2, r11
-	str	r12, [sp]
-	str	r2, [sp, #4]
-	ldr	r12, [sp, #16]
-	ldr	r2, =0x00800080
-	ldr	r3, [sp, #24]
-	mov	r11, #255
-	cmp	r12, #0
-	beq	3f
-4:	ldr	r5, [r1], #4
-	ldr	r4, [r0]
-	uxtb16	r6, r5
-	uxtb16	r7, r5, ror #8
-	mla	r6, r6, r3, r2
-	mla	r7, r7, r3, r2
-	uxtab16	r6, r6, r6, ror #8
-	uxtab16	r7, r7, r7, ror #8
-	uxtb16	r6, r6, ror #8
-	uxtb16	r7, r7, ror #8
-	orr	r5, r6, r7, lsl #8
-	uxtb16	r6, r4
-	uxtb16	r7, r4, ror #8
-	sub	r8, r11, r5, lsr #24
-	mla	r6, r6, r8, r2
-	mla	r7, r7, r8, r2
-	uxtab16	r6, r6, r6, ror #8
-	uxtab16	r7, r7, r7, ror #8
-	uxtb16	r6, r6, ror #8
-	uxtb16	r7, r7, ror #8
-	orr	r6, r6, r7, lsl #8
-	uqadd8	r5, r6, r5
-	str	r5, [r0], #4
-	subs	r12, r12, #1
-	bne	4b
-3:	ldr	r1, [sp, #12]
-	add	r10, r10, #1
-	cmp	r10, r1
-	ldr	r12, [sp]
-	ldr	r2, [sp, #4]
-	bne	5b
-0:	add	sp, sp, #28
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
-	bx	lr
-.endfunc
-
-pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
-	push	{r4, r5, r6, r7, r8, r9, r10, r11}
-	sub	sp, sp, #28
-	cmp	r1, #0
-	ldr	r9, [sp, #60]
-	str	r1, [sp, #12]
-	bic	r1, r9, #-16777216
-	str	r1, [sp, #20]
-	mov	r12, r2
-	lsr	r1, r9, #8
-	ldr	r2, [sp, #20]
-	bic	r1, r1, #-16777216
-	bic	r2, r2, #65280
-	bic	r1, r1, #65280
-	str	r2, [sp, #20]
-	str	r0, [sp, #16]
-	str	r1, [sp, #4]
-	ldr	r2, [sp, #68]
-	beq	0f
-	lsl	r3, r3, #2
-	str	r3, [sp, #24]
-	mov	r0, #0
-	b	1f
-5:	ldr	r3, [sp, #24]
-1:	ldr	r4, [sp, #72]
-	mov	r10, r12
-	mov	r1, r2
-	add	r12, r12, r3
-	add	r2, r2, r4
-	str	r12, [sp, #8]
-	str	r2, [sp]
-	ldr	r12, [sp, #16]
-	ldr	r11, =0x00800080
-	ldr	r2, [sp, #4]
-	ldr	r3, [sp, #20]
-	cmp	r12, #0
-	beq	3f
-4:	ldrb	r5, [r1], #1
-	ldr	r4, [r10]
-	mla	r6, r3, r5, r11
-	mla	r7, r2, r5, r11
-	uxtab16	r6, r6, r6, ror #8
-	uxtab16	r7, r7, r7, ror #8
-	uxtb16	r6, r6, ror #8
-	uxtb16	r7, r7, ror #8
-	orr	r5, r6, r7, lsl #8
-	uxtb16	r6, r4
-	uxtb16	r7, r4, ror #8
-	mvn	r8, r5
-	lsr	r8, r8, #24
-	mla	r6, r6, r8, r11
-	mla	r7, r7, r8, r11
-	uxtab16	r6, r6, r6, ror #8
-	uxtab16	r7, r7, r7, ror #8
-	uxtb16	r6, r6, ror #8
-	uxtb16	r7, r7, ror #8
-	orr	r6, r6, r7, lsl #8
-	uqadd8	r5, r6, r5
-	str	r5, [r10], #4
-	subs	r12, r12, #1
-	bne	4b
-3:	ldr	r4, [sp, #12]
-	add	r0, r0, #1
-	cmp	r0, r4
-	ldr	r12, [sp, #8]
-	ldr	r2, [sp]
-	bne	5b
-0:	add	sp, sp, #28
-	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
-	bx	lr
-.endfunc
+.macro blit_init
+        line_saved_regs STRIDE_D, STRIDE_S
+.endm
 
-/*
- * Note: This code is only using armv5te instructions (not even armv6),
- *       but is scheduled for ARM Cortex-A8 pipeline. So it might need to
- *       be split into a few variants, tuned for each microarchitecture.
- *
- * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
- * have efficient write combining), it needs to be changed to use 16-byte
- * aligned writes using STM instruction.
- *
- * Nearest scanline scaler macro template uses the following arguments:
- *  fname                     - name of the function to generate
- *  bpp_shift                 - (1 << bpp_shift) is the size of pixel in bytes
- *  t                         - type suffix for LDR/STR instructions
- *  prefetch_distance         - prefetch in the source image by that many
- *                              pixels ahead
- *  prefetch_braking_distance - stop prefetching when that many pixels are
- *                              remaining before the end of scanline
+.macro blit_process_head   cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+        pixld   cond, numbytes, firstreg, SRC, unaligned_src
+.endm
+
+.macro blit_inner_loop  process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment
+    WK4     .req    STRIDE_D
+    WK5     .req    STRIDE_S
+    WK6     .req    MASK
+    WK7     .req    STRIDE_M
+110:    pixld   , 16, 0, SRC, unaligned_src
+        pixld   , 16, 4, SRC, unaligned_src
+        pld     [SRC, SCRATCH]
+        pixst   , 16, 0, DST
+        pixst   , 16, 4, DST
+        subs    X, X, #32*8/src_bpp
+        bhs     110b
+    .unreq  WK4
+    .unreq  WK5
+    .unreq  WK6
+    .unreq  WK7
+.endm
+
+generate_composite_function \
+    pixman_composite_src_8888_8888_asm_armv6, 32, 0, 32, \
+    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
+    4, /* prefetch distance */ \
+    blit_init, \
+    nop_macro, /* newline */ \
+    nop_macro, /* cleanup */ \
+    blit_process_head, \
+    nop_macro, /* process tail */ \
+    blit_inner_loop
+
+generate_composite_function \
+    pixman_composite_src_0565_0565_asm_armv6, 16, 0, 16, \
+    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
+    4, /* prefetch distance */ \
+    blit_init, \
+    nop_macro, /* newline */ \
+    nop_macro, /* cleanup */ \
+    blit_process_head, \
+    nop_macro, /* process tail */ \
+    blit_inner_loop
+
+generate_composite_function \
+    pixman_composite_src_8_8_asm_armv6, 8, 0, 8, \
+    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \
+    3, /* prefetch distance */ \
+    blit_init, \
+    nop_macro, /* newline */ \
+    nop_macro, /* cleanup */ \
+    blit_process_head, \
+    nop_macro, /* process tail */ \
+    blit_inner_loop
+
+/******************************************************************************/
+
+.macro src_n_8888_init
+        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
+        mov     STRIDE_S, SRC
+        mov     MASK, SRC
+        mov     STRIDE_M, SRC
+.endm
+
+.macro src_n_0565_init
+        ldrh    SRC, [sp, #ARGS_STACK_OFFSET]
+        orr     SRC, SRC, lsl #16
+        mov     STRIDE_S, SRC
+        mov     MASK, SRC
+        mov     STRIDE_M, SRC
+.endm
+
+.macro src_n_8_init
+        ldrb    SRC, [sp, #ARGS_STACK_OFFSET]
+        orr     SRC, SRC, lsl #8
+        orr     SRC, SRC, lsl #16
+        mov     STRIDE_S, SRC
+        mov     MASK, SRC
+        mov     STRIDE_M, SRC
+.endm
+
+.macro fill_process_tail  cond, numbytes, firstreg
+    WK4     .req    SRC
+    WK5     .req    STRIDE_S
+    WK6     .req    MASK
+    WK7     .req    STRIDE_M
+        pixst   cond, numbytes, 4, DST
+    .unreq  WK4
+    .unreq  WK5
+    .unreq  WK6
+    .unreq  WK7
+.endm
+
+generate_composite_function \
+    pixman_composite_src_n_8888_asm_armv6, 0, 0, 32, \
+    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
+    0, /* prefetch distance doesn't apply */ \
+    src_n_8888_init \
+    nop_macro, /* newline */ \
+    nop_macro /* cleanup */ \
+    nop_macro /* process head */ \
+    fill_process_tail
+
+generate_composite_function \
+    pixman_composite_src_n_0565_asm_armv6, 0, 0, 16, \
+    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
+    0, /* prefetch distance doesn't apply */ \
+    src_n_0565_init \
+    nop_macro, /* newline */ \
+    nop_macro /* cleanup */ \
+    nop_macro /* process head */ \
+    fill_process_tail
+
+generate_composite_function \
+    pixman_composite_src_n_8_asm_armv6, 0, 0, 8, \
+    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \
+    0, /* prefetch distance doesn't apply */ \
+    src_n_8_init \
+    nop_macro, /* newline */ \
+    nop_macro /* cleanup */ \
+    nop_macro /* process head */ \
+    fill_process_tail
+
+/******************************************************************************/
+
+.macro src_x888_8888_pixel, cond, reg
+        orr&cond WK&reg, WK&reg, #0xFF000000
+.endm
+
+.macro pixman_composite_src_x888_8888_process_head   cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+        pixld   cond, numbytes, firstreg, SRC, unaligned_src
+.endm
+
+.macro pixman_composite_src_x888_8888_process_tail   cond, numbytes, firstreg
+        src_x888_8888_pixel cond, %(firstreg+0)
+ .if numbytes >= 8
+        src_x888_8888_pixel cond, %(firstreg+1)
+  .if numbytes == 16
+        src_x888_8888_pixel cond, %(firstreg+2)
+        src_x888_8888_pixel cond, %(firstreg+3)
+  .endif
+ .endif
+.endm
+
+generate_composite_function \
+    pixman_composite_src_x888_8888_asm_armv6, 32, 0, 32, \
+    FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_SCRATCH, \
+    3, /* prefetch distance */ \
+    nop_macro, /* init */ \
+    nop_macro, /* newline */ \
+    nop_macro, /* cleanup */ \
+    pixman_composite_src_x888_8888_process_head, \
+    pixman_composite_src_x888_8888_process_tail
+
+/******************************************************************************/
+
+.macro src_0565_8888_init
+        /* Hold loop invariants in MASK and STRIDE_M */
+        ldr     MASK, =0x07E007E0
+        mov     STRIDE_M, #0xFF000000
+        /* Set GE[3:0] to 1010 so SEL instructions do what we want */
+        ldr     SCRATCH, =0x80008000
+        uadd8   SCRATCH, SCRATCH, SCRATCH
+.endm
+
+.macro src_0565_8888_2pixels, reg1, reg2
+        and     SCRATCH, WK&reg1, MASK             @ 00000GGGGGG0000000000gggggg00000
+        bic     WK&reg2, WK&reg1, MASK             @ RRRRR000000BBBBBrrrrr000000bbbbb
+        orr     SCRATCH, SCRATCH, SCRATCH, lsr #6  @ 00000GGGGGGGGGGGG0000ggggggggggg
+        mov     WK&reg1, WK&reg2, lsl #16          @ rrrrr000000bbbbb0000000000000000
+        mov     SCRATCH, SCRATCH, ror #19          @ GGGG0000ggggggggggg00000GGGGGGGG
+        bic     WK&reg2, WK&reg2, WK&reg1, lsr #16 @ RRRRR000000BBBBB0000000000000000
+        orr     WK&reg1, WK&reg1, WK&reg1, lsr #5  @ rrrrrrrrrr0bbbbbbbbbb00000000000
+        orr     WK&reg2, WK&reg2, WK&reg2, lsr #5  @ RRRRRRRRRR0BBBBBBBBBB00000000000
+        pkhtb   WK&reg1, WK&reg1, WK&reg1, asr #5  @ rrrrrrrr--------bbbbbbbb--------
+        sel     WK&reg1, WK&reg1, SCRATCH          @ rrrrrrrrggggggggbbbbbbbb--------
+        mov     SCRATCH, SCRATCH, ror #16          @ ggg00000GGGGGGGGGGGG0000gggggggg
+        pkhtb   WK&reg2, WK&reg2, WK&reg2, asr #5  @ RRRRRRRR--------BBBBBBBB--------
+        sel     WK&reg2, WK&reg2, SCRATCH          @ RRRRRRRRGGGGGGGGBBBBBBBB--------
+        orr     WK&reg1, STRIDE_M, WK&reg1, lsr #8 @ 11111111rrrrrrrrggggggggbbbbbbbb
+        orr     WK&reg2, STRIDE_M, WK&reg2, lsr #8 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB
+.endm
+
+/* This version doesn't need STRIDE_M, but is one instruction longer.
+   It would however be preferable for an XRGB target, since we could knock off the last 2 instructions, but is that a common case?
+        and     SCRATCH, WK&reg1, MASK             @ 00000GGGGGG0000000000gggggg00000
+        bic     WK&reg1, WK&reg1, MASK             @ RRRRR000000BBBBBrrrrr000000bbbbb
+        orr     SCRATCH, SCRATCH, SCRATCH, lsr #6  @ 00000GGGGGGGGGGGG0000ggggggggggg
+        mov     WK&reg2, WK&reg1, lsr #16          @ 0000000000000000RRRRR000000BBBBB
+        mov     SCRATCH, SCRATCH, ror #27          @ GGGGGGGGGGGG0000ggggggggggg00000
+        bic     WK&reg1, WK&reg1, WK&reg2, lsl #16 @ 0000000000000000rrrrr000000bbbbb
+        mov     WK&reg2, WK&reg2, lsl #3           @ 0000000000000RRRRR000000BBBBB000
+        mov     WK&reg1, WK&reg1, lsl #3           @ 0000000000000rrrrr000000bbbbb000
+        orr     WK&reg2, WK&reg2, WK&reg2, lsr #5  @ 0000000000000RRRRRRRRRR0BBBBBBBB
+        orr     WK&reg1, WK&reg1, WK&reg1, lsr #5  @ 0000000000000rrrrrrrrrr0bbbbbbbb
+        pkhbt   WK&reg2, WK&reg2, WK&reg2, lsl #5  @ --------RRRRRRRR--------BBBBBBBB
+        pkhbt   WK&reg1, WK&reg1, WK&reg1, lsl #5  @ --------rrrrrrrr--------bbbbbbbb
+        sel     WK&reg2, SCRATCH, WK&reg2          @ --------RRRRRRRRGGGGGGGGBBBBBBBB
+        sel     WK&reg1, SCRATCH, WK&reg1          @ --------rrrrrrrrggggggggbbbbbbbb
+        orr     WK&reg2, WK&reg2, #0xFF000000      @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB
+        orr     WK&reg1, WK&reg1, #0xFF000000      @ 11111111rrrrrrrrggggggggbbbbbbbb
+*/
+
+.macro src_0565_8888_1pixel, reg
+        bic     SCRATCH, WK&reg, MASK              @ 0000000000000000rrrrr000000bbbbb
+        and     WK&reg, WK&reg, MASK               @ 000000000000000000000gggggg00000
+        mov     SCRATCH, SCRATCH, lsl #3           @ 0000000000000rrrrr000000bbbbb000
+        mov     WK&reg, WK&reg, lsl #5             @ 0000000000000000gggggg0000000000
+        orr     SCRATCH, SCRATCH, SCRATCH, lsr #5  @ 0000000000000rrrrrrrrrr0bbbbbbbb
+        orr     WK&reg, WK&reg, WK&reg, lsr #6     @ 000000000000000gggggggggggg00000
+        pkhbt   SCRATCH, SCRATCH, SCRATCH, lsl #5  @ --------rrrrrrrr--------bbbbbbbb
+        sel     WK&reg, WK&reg, SCRATCH            @ --------rrrrrrrrggggggggbbbbbbbb
+        orr     WK&reg, WK&reg, #0xFF000000        @ 11111111rrrrrrrrggggggggbbbbbbbb
+.endm
+
+.macro src_0565_8888_process_head   cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ .if numbytes == 16
+        pixldst ld,, 8, firstreg, %(firstreg+2),,, SRC, unaligned_src
+ .elseif numbytes == 8
+        pixld   , 4, firstreg, SRC, unaligned_src
+ .elseif numbytes == 4
+        pixld   , 2, firstreg, SRC, unaligned_src
+ .endif
+.endm
+
+.macro src_0565_8888_process_tail   cond, numbytes, firstreg
+ .if numbytes == 16
+        src_0565_8888_2pixels firstreg, %(firstreg+1)
+        src_0565_8888_2pixels %(firstreg+2), %(firstreg+3)
+ .elseif numbytes == 8
+        src_0565_8888_2pixels firstreg, %(firstreg+1)
+ .else
+        src_0565_8888_1pixel firstreg
+ .endif
+.endm
+
+generate_composite_function \
+    pixman_composite_src_0565_8888_asm_armv6, 16, 0, 32, \
+    FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER, \
+    3, /* prefetch distance */ \
+    src_0565_8888_init, \
+    nop_macro, /* newline */ \
+    nop_macro, /* cleanup */ \
+    src_0565_8888_process_head, \
+    src_0565_8888_process_tail
+
+/******************************************************************************/
+
+.macro add_8_8_8pixels  cond, dst1, dst2
+        uqadd8&cond  WK&dst1, WK&dst1, MASK
+        uqadd8&cond  WK&dst2, WK&dst2, STRIDE_M
+.endm
+
+.macro add_8_8_4pixels  cond, dst
+        uqadd8&cond  WK&dst, WK&dst, MASK
+.endm
+
+.macro add_8_8_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+    WK4     .req    MASK
+    WK5     .req    STRIDE_M
+ .if numbytes == 16
+        pixld   cond, 8, 4, SRC, unaligned_src
+        pixld   cond, 16, firstreg, DST, 0
+        add_8_8_8pixels cond, firstreg, %(firstreg+1)
+        pixld   cond, 8, 4, SRC, unaligned_src
+ .else
+        pixld   cond, numbytes, 4, SRC, unaligned_src
+        pixld   cond, numbytes, firstreg, DST, 0
+ .endif
+    .unreq  WK4
+    .unreq  WK5
+.endm
+
+.macro add_8_8_process_tail  cond, numbytes, firstreg
+ .if numbytes == 16
+        add_8_8_8pixels cond, %(firstreg+2), %(firstreg+3)
+ .elseif numbytes == 8
+        add_8_8_8pixels cond, firstreg, %(firstreg+1)
+ .else
+        add_8_8_4pixels cond, firstreg
+ .endif
+.endm
+
+generate_composite_function \
+    pixman_composite_add_8_8_asm_armv6, 8, 0, 8, \
+    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_PRESERVES_SCRATCH, \
+    2, /* prefetch distance */ \
+    nop_macro, /* init */ \
+    nop_macro, /* newline */ \
+    nop_macro, /* cleanup */ \
+    add_8_8_process_head, \
+    add_8_8_process_tail
+
+/******************************************************************************/
+
+.macro over_8888_8888_init
+        /* Hold loop invariant in MASK */
+        ldr     MASK, =0x00800080
+        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+        uadd8   SCRATCH, MASK, MASK
+        line_saved_regs STRIDE_D, STRIDE_S, ORIG_W
+.endm
+
+.macro over_8888_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+    WK4     .req    STRIDE_D
+    WK5     .req    STRIDE_S
+    WK6     .req    STRIDE_M
+    WK7     .req    ORIG_W
+        pixld   , numbytes, %(4+firstreg), SRC, unaligned_src
+        pixld   , numbytes, firstreg, DST, 0
+    .unreq  WK4
+    .unreq  WK5
+    .unreq  WK6
+    .unreq  WK7
+.endm
+
+.macro over_8888_8888_check_transparent  numbytes, reg0, reg1, reg2, reg3
+        /* Since these colours a premultiplied by alpha, only 0 indicates transparent (any other colour with 0 in the alpha byte is luminous) */
+        teq     WK&reg0, #0
+ .if numbytes > 4
+        teqeq   WK&reg1, #0
+  .if numbytes > 8
+        teqeq   WK&reg2, #0
+        teqeq   WK&reg3, #0
+  .endif
+ .endif
+.endm
+
+.macro over_8888_8888_prepare  next
+        mov     WK&next, WK&next, lsr #24
+.endm
+
+.macro over_8888_8888_1pixel src, dst, offset, next
+        /* src = destination component multiplier */
+        rsb     WK&src, WK&src, #255
+        /* Split even/odd bytes of dst into SCRATCH/dst */
+        uxtb16  SCRATCH, WK&dst
+        uxtb16  WK&dst, WK&dst, ror #8
+        /* Multiply through, adding 0.5 to the upper byte of result for rounding */
+        mla     SCRATCH, SCRATCH, WK&src, MASK
+        mla     WK&dst, WK&dst, WK&src, MASK
+        /* Where we would have had a stall between the result of the first MLA and the shifter input,
+         * reload the complete source pixel */
+        ldr     WK&src, [SRC, #offset]
+        /* Multiply by 257/256 to approximate 256/255 */
+        uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
+        /* In this stall, start processing the next pixel */
+ .if offset < -4
+        mov     WK&next, WK&next, lsr #24
+ .endif
+        uxtab16 WK&dst, WK&dst, WK&dst, ror #8
+        /* Recombine even/odd bytes of multiplied destination */
+        mov     SCRATCH, SCRATCH, ror #8
+        sel     WK&dst, SCRATCH, WK&dst
+        /* Saturated add of source to multiplied destination */
+        uqadd8  WK&dst, WK&dst, WK&src
+.endm
+
+.macro over_8888_8888_process_tail  cond, numbytes, firstreg
+    WK4     .req    STRIDE_D
+    WK5     .req    STRIDE_S
+    WK6     .req    STRIDE_M
+    WK7     .req    ORIG_W
+        over_8888_8888_check_transparent numbytes, %(4+firstreg), %(5+firstreg), %(6+firstreg), %(7+firstreg)
+        beq     10f
+        over_8888_8888_prepare  %(4+firstreg)
+ .set PROCESS_REG, firstreg
+ .set PROCESS_OFF, -numbytes
+ .rept numbytes / 4
+        over_8888_8888_1pixel %(4+PROCESS_REG), %(0+PROCESS_REG), PROCESS_OFF, %(5+PROCESS_REG)
+  .set PROCESS_REG, PROCESS_REG+1
+  .set PROCESS_OFF, PROCESS_OFF+4
+ .endr
+        pixst   , numbytes, firstreg, DST
+10:
+    .unreq  WK4
+    .unreq  WK5
+    .unreq  WK6
+    .unreq  WK7
+.endm
+
+generate_composite_function \
+    pixman_composite_over_8888_8888_asm_armv6, 32, 0, 32 \
+    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
+    2, /* prefetch distance */ \
+    over_8888_8888_init, \
+    nop_macro, /* newline */ \
+    nop_macro, /* cleanup */ \
+    over_8888_8888_process_head, \
+    over_8888_8888_process_tail
+
+/******************************************************************************/
+
+/* Multiply each byte of a word by a byte.
+ * Useful when there aren't any obvious ways to fill the stalls with other instructions.
+ * word  Register containing 4 bytes
+ * byte  Register containing byte multiplier (bits 8-31 must be 0)
+ * tmp   Scratch register
+ * half  Register containing the constant 0x00800080
+ * GE[3:0] bits must contain 0101
  */
+.macro mul_8888_8  word, byte, tmp, half
+        /* Split even/odd bytes of word apart */
+        uxtb16  tmp, word
+        uxtb16  word, word, ror #8
+        /* Multiply bytes together with rounding, then by 257/256 */
+        mla     tmp, tmp, byte, half
+        mla     word, word, byte, half /* 1 stall follows */
+        uxtab16 tmp, tmp, tmp, ror #8  /* 1 stall follows */
+        uxtab16 word, word, word, ror #8
+        /* Recombine bytes */
+        mov     tmp, tmp, ror #8
+        sel     word, tmp, word
+.endm
+
+/******************************************************************************/
+
+.macro over_8888_n_8888_init
+        /* Mask is constant */
+        ldr     MASK, [sp, #ARGS_STACK_OFFSET+8]
+        /* Hold loop invariant in STRIDE_M */
+        ldr     STRIDE_M, =0x00800080
+        /* We only want the alpha bits of the constant mask */
+        mov     MASK, MASK, lsr #24
+        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+        uadd8   SCRATCH, STRIDE_M, STRIDE_M
+        line_saved_regs Y, STRIDE_D, STRIDE_S, ORIG_W
+.endm
+
+.macro over_8888_n_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+    WK4     .req    Y
+    WK5     .req    STRIDE_D
+    WK6     .req    STRIDE_S
+    WK7     .req    ORIG_W
+        pixld   , numbytes, %(4+(firstreg%2)), SRC, unaligned_src
+        pixld   , numbytes, firstreg, DST, 0
+    .unreq  WK4
+    .unreq  WK5
+    .unreq  WK6
+    .unreq  WK7
+.endm
+
+.macro over_8888_n_8888_1pixel src, dst
+        mul_8888_8  WK&src, MASK, SCRATCH, STRIDE_M
+        sub     WK7, WK6, WK&src, lsr #24
+        mul_8888_8  WK&dst, WK7, SCRATCH, STRIDE_M
+        uqadd8  WK&dst, WK&dst, WK&src
+.endm
+
+.macro over_8888_n_8888_process_tail  cond, numbytes, firstreg
+    WK4     .req    Y
+    WK5     .req    STRIDE_D
+    WK6     .req    STRIDE_S
+    WK7     .req    ORIG_W
+        over_8888_8888_check_transparent numbytes, %(4+(firstreg%2)), %(5+(firstreg%2)), %(6+firstreg), %(7+firstreg)
+        beq     10f
+        mov     WK6, #255
+ .set PROCESS_REG, firstreg
+ .rept numbytes / 4
+  .if numbytes == 16 && PROCESS_REG == 2
+        /* We're using WK6 and WK7 as temporaries, so half way through
+         * 4 pixels, reload the second two source pixels but this time
+         * into WK4 and WK5 */
+        ldmdb   SRC, {WK4, WK5}
+  .endif
+        over_8888_n_8888_1pixel  %(4+(PROCESS_REG%2)), %(PROCESS_REG)
+  .set PROCESS_REG, PROCESS_REG+1
+ .endr
+        pixst   , numbytes, firstreg, DST
+10:
+    .unreq  WK4
+    .unreq  WK5
+    .unreq  WK6
+    .unreq  WK7
+.endm
+
+generate_composite_function \
+    pixman_composite_over_8888_n_8888_asm_armv6, 32, 0, 32 \
+    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
+    2, /* prefetch distance */ \
+    over_8888_n_8888_init, \
+    nop_macro, /* newline */ \
+    nop_macro, /* cleanup */ \
+    over_8888_n_8888_process_head, \
+    over_8888_n_8888_process_tail
+
+/******************************************************************************/
+
+.macro over_n_8_8888_init
+        /* Source is constant, but splitting it into even/odd bytes is a loop invariant */
+        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
+        /* Not enough registers to hold this constant, but we still use it here to set GE[3:0] */
+        ldr     SCRATCH, =0x00800080
+        uxtb16  STRIDE_S, SRC
+        uxtb16  SRC, SRC, ror #8
+        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+        uadd8   SCRATCH, SCRATCH, SCRATCH
+        line_saved_regs Y, STRIDE_D, STRIDE_M, ORIG_W
+.endm
+
+.macro over_n_8_8888_newline
+        ldr     STRIDE_D, =0x00800080
+        b       1f
+ .ltorg
+1:
+.endm
+
+.macro over_n_8_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+    WK4     .req    STRIDE_M
+        pixld   , numbytes/4, 4, MASK, unaligned_mask
+        pixld   , numbytes, firstreg, DST, 0
+    .unreq  WK4
+.endm
+
+.macro over_n_8_8888_1pixel src, dst
+        uxtb    Y, WK4, ror #src*8
+        /* Trailing part of multiplication of source */
+        mla     SCRATCH, STRIDE_S, Y, STRIDE_D
+        mla     Y, SRC, Y, STRIDE_D
+        mov     ORIG_W, #255
+        uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
+        uxtab16 Y, Y, Y, ror #8
+        mov     SCRATCH, SCRATCH, ror #8
+        sub     ORIG_W, ORIG_W, Y, lsr #24
+        sel     Y, SCRATCH, Y
+        /* Then multiply the destination */
+        mul_8888_8  WK&dst, ORIG_W, SCRATCH, STRIDE_D
+        uqadd8  WK&dst, WK&dst, Y
+.endm
+
+.macro over_n_8_8888_process_tail  cond, numbytes, firstreg
+    WK4     .req    STRIDE_M
+        teq     WK4, #0
+        beq     10f
+ .set PROCESS_REG, firstreg
+ .rept numbytes / 4
+        over_n_8_8888_1pixel  %(PROCESS_REG-firstreg), %(PROCESS_REG)
+  .set PROCESS_REG, PROCESS_REG+1
+ .endr
+        pixst   , numbytes, firstreg, DST
+10:
+    .unreq  WK4
+.endm
+
+generate_composite_function \
+    pixman_composite_over_n_8_8888_asm_armv6, 0, 8, 32 \
+    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \
+    2, /* prefetch distance */ \
+    over_n_8_8888_init, \
+    over_n_8_8888_newline, \
+    nop_macro, /* cleanup */ \
+    over_n_8_8888_process_head, \
+    over_n_8_8888_process_tail
+
+/******************************************************************************/
 
-.macro generate_nearest_scanline_func fname, bpp_shift, t,      \
-                                      prefetch_distance,        \
-                                      prefetch_braking_distance
-
-pixman_asm_function fname
-	W		.req	r0
-	DST		.req	r1
-	SRC		.req	r2
-	VX		.req	r3
-	UNIT_X		.req	ip
-	TMP1		.req	r4
-	TMP2		.req	r5
-	VXMASK		.req	r6
-	PF_OFFS		.req	r7
-	SRC_WIDTH_FIXED	.req	r8
-
-	ldr	UNIT_X, [sp]
-	push	{r4, r5, r6, r7, r8, r10}
-	mvn	VXMASK, #((1 << bpp_shift) - 1)
-	ldr	SRC_WIDTH_FIXED, [sp, #28]
-
-	/* define helper macro */
-	.macro	scale_2_pixels
-		ldr&t	TMP1, [SRC, TMP1]
-		and	TMP2, VXMASK, VX, asr #(16 - bpp_shift)
-		adds	VX, VX, UNIT_X
-		str&t	TMP1, [DST], #(1 << bpp_shift)
-9:		subpls	VX, VX, SRC_WIDTH_FIXED
-		bpl	9b
-
-		ldr&t	TMP2, [SRC, TMP2]
-		and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
-		adds	VX, VX, UNIT_X
-		str&t	TMP2, [DST], #(1 << bpp_shift)
-9:		subpls	VX, VX, SRC_WIDTH_FIXED
-		bpl	9b
-	.endm
-
-	/* now do the scaling */
-	and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
-	adds	VX, VX, UNIT_X
-9:	subpls	VX, VX, SRC_WIDTH_FIXED
-	bpl	9b
-	subs	W, W, #(8 + prefetch_braking_distance)
-	blt	2f
-	/* calculate prefetch offset */
-	mov	PF_OFFS, #prefetch_distance
-	mla	PF_OFFS, UNIT_X, PF_OFFS, VX
-1:	/* main loop, process 8 pixels per iteration with prefetch */
-	pld	[SRC, PF_OFFS, asr #(16 - bpp_shift)]
-	add	PF_OFFS, UNIT_X, lsl #3
-	scale_2_pixels
-	scale_2_pixels
-	scale_2_pixels
-	scale_2_pixels
-	subs	W, W, #8
-	bge	1b
-2:
-	subs	W, W, #(4 - 8 - prefetch_braking_distance)
-	blt	2f
-1:	/* process the remaining pixels */
-	scale_2_pixels
-	scale_2_pixels
-	subs	W, W, #4
-	bge	1b
-2:
-	tst	W, #2
-	beq	2f
-	scale_2_pixels
-2:
-	tst	W, #1
-	ldrne&t	TMP1, [SRC, TMP1]
-	strne&t	TMP1, [DST]
-	/* cleanup helper macro */
-	.purgem	scale_2_pixels
-	.unreq	DST
-	.unreq	SRC
-	.unreq	W
-	.unreq	VX
-	.unreq	UNIT_X
-	.unreq	TMP1
-	.unreq	TMP2
-	.unreq	VXMASK
-	.unreq	PF_OFFS
-	.unreq  SRC_WIDTH_FIXED
-	/* return */
-	pop	{r4, r5, r6, r7, r8, r10}
-	bx	lr
-.endfunc
-.endm
-
-generate_nearest_scanline_func \
-    pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
-
-generate_nearest_scanline_func \
-    pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2,  , 48, 32
diff --git a/lib/pixman/pixman/pixman-arm-simd-asm.h b/lib/pixman/pixman/pixman-arm-simd-asm.h
new file mode 100644
index 000000000..65436062b
--- /dev/null
+++ b/lib/pixman/pixman/pixman-arm-simd-asm.h
@@ -0,0 +1,908 @@
+/*
+ * Copyright © 2012 Raspberry Pi Foundation
+ * Copyright © 2012 RISC OS Open Ltd
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of the copyright holders not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  The copyright holders make no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Ben Avison (bavison@riscosopen.org)
+ *
+ */
+
+/*
+ * Because the alignment of pixel data to cachelines, and even the number of
+ * cachelines per row can vary from row to row, and because of the need to
+ * preload each scanline once and only once, this prefetch strategy treats
+ * each row of pixels independently. When a pixel row is long enough, there
+ * are three distinct phases of prefetch:
+ * * an inner loop section, where each time a cacheline of data is
+ *    processed, another cacheline is preloaded (the exact distance ahead is
+ *    determined empirically using profiling results from lowlevel-blt-bench)
+ * * a leading section, where enough cachelines are preloaded to ensure no
+ *    cachelines escape being preloaded when the inner loop starts
+ * * a trailing section, where a limited number (0 or more) of cachelines
+ *    are preloaded to deal with data (if any) that hangs off the end of the
+ *    last iteration of the inner loop, plus any trailing bytes that were not
+ *    enough to make up one whole iteration of the inner loop
+ * 
+ * There are (in general) three distinct code paths, selected between
+ * depending upon how long the pixel row is. If it is long enough that there
+ * is at least one iteration of the inner loop (as described above) then
+ * this is described as the "wide" case. If it is shorter than that, but
+ * there are still enough bytes output that there is at least one 16-byte-
+ * long, 16-byte-aligned write to the destination (the optimum type of
+ * write), then this is the "medium" case. If it is not even this long, then
+ * this is the "narrow" case, and there is no attempt to align writes to
+ * 16-byte boundaries. In the "medium" and "narrow" cases, all the
+ * cachelines containing data from the pixel row are prefetched up-front.
+ */
+
+/*
+ * Determine whether we put the arguments on the stack for debugging.
+ */
+#undef DEBUG_PARAMS
+
+/*
+ * Bit flags for 'generate_composite_function' macro which are used
+ * to tune generated functions behavior.
+ */
+.set FLAG_DST_WRITEONLY,         0
+.set FLAG_DST_READWRITE,         1
+.set FLAG_COND_EXEC,             0
+.set FLAG_BRANCH_OVER,           2
+.set FLAG_PROCESS_PRESERVES_PSR, 0
+.set FLAG_PROCESS_CORRUPTS_PSR,  4
+.set FLAG_PROCESS_DOESNT_STORE,  0
+.set FLAG_PROCESS_DOES_STORE,    8 /* usually because it needs to conditionally skip it */
+.set FLAG_NO_SPILL_LINE_VARS,        0
+.set FLAG_SPILL_LINE_VARS_WIDE,      16
+.set FLAG_SPILL_LINE_VARS_NON_WIDE,  32
+.set FLAG_SPILL_LINE_VARS,           48
+.set FLAG_PROCESS_CORRUPTS_SCRATCH,  0
+.set FLAG_PROCESS_PRESERVES_SCRATCH, 64
+
+/*
+ * Offset into stack where mask and source pointer/stride can be accessed.
+ */
+#ifdef DEBUG_PARAMS
+.set ARGS_STACK_OFFSET,        (9*4+9*4)
+#else
+.set ARGS_STACK_OFFSET,        (9*4)
+#endif
+
+/*
+ * Constants for selecting preferable prefetch type.
+ */
+.set PREFETCH_TYPE_NONE,       0
+.set PREFETCH_TYPE_STANDARD,   1
+
+/*
+ * Definitions of macros for load/store of pixel data.
+ */
+
+.macro pixldst op, cond=al, numbytes, reg0, reg1, reg2, reg3, base, unaligned=0
+ .if numbytes == 16
+  .if unaligned == 1
+        op&r&cond    WK&reg0, [base], #4
+        op&r&cond    WK&reg1, [base], #4
+        op&r&cond    WK&reg2, [base], #4
+        op&r&cond    WK&reg3, [base], #4
+  .else
+        op&m&cond&ia base!, {WK&reg0,WK&reg1,WK&reg2,WK&reg3}
+  .endif
+ .elseif numbytes == 8
+  .if unaligned == 1
+        op&r&cond    WK&reg0, [base], #4
+        op&r&cond    WK&reg1, [base], #4
+  .else
+        op&m&cond&ia base!, {WK&reg0,WK&reg1}
+  .endif
+ .elseif numbytes == 4
+        op&r&cond    WK&reg0, [base], #4
+ .elseif numbytes == 2
+        op&r&cond&h  WK&reg0, [base], #2
+ .elseif numbytes == 1
+        op&r&cond&b  WK&reg0, [base], #1
+ .else
+  .error "unsupported size: numbytes"
+ .endif
+.endm
+
+.macro pixst_baseupdated cond, numbytes, reg0, reg1, reg2, reg3, base
+ .if numbytes == 16
+        stm&cond&db base, {WK&reg0,WK&reg1,WK&reg2,WK&reg3}
+ .elseif numbytes == 8
+        stm&cond&db base, {WK&reg0,WK&reg1}
+ .elseif numbytes == 4
+        str&cond    WK&reg0, [base, #-4]
+ .elseif numbytes == 2
+        str&cond&h  WK&reg0, [base, #-2]
+ .elseif numbytes == 1
+        str&cond&b  WK&reg0, [base, #-1]
+ .else
+  .error "unsupported size: numbytes"
+ .endif
+.endm
+
+.macro pixld cond, numbytes, firstreg, base, unaligned
+        pixldst ld, cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base, unaligned
+.endm
+
+.macro pixst cond, numbytes, firstreg, base
+ .if (flags) & FLAG_DST_READWRITE
+        pixst_baseupdated cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base
+ .else
+        pixldst st, cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base
+ .endif
+.endm
+
+.macro PF a, x:vararg
+ .if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_STANDARD)
+        a x
+ .endif
+.endm
+
+
+.macro preload_leading_step1  bpp, ptr, base
+/* If the destination is already 16-byte aligned, then we need to preload
+ * between 0 and prefetch_distance (inclusive) cache lines ahead so there
+ * are no gaps when the inner loop starts.
+ */
+ .if bpp > 0
+        PF  bic,    ptr, base, #31
+  .set OFFSET, 0
+  .rept prefetch_distance+1
+        PF  pld,    [ptr, #OFFSET]
+   .set OFFSET, OFFSET+32
+  .endr
+ .endif
+.endm
+
+.macro preload_leading_step2  bpp, bpp_shift, ptr, base
+/* However, if the destination is not 16-byte aligned, we may need to
+ * preload more cache lines than that. The question we need to ask is:
+ * are the bytes corresponding to the leading pixels more than the amount
+ * by which the source pointer will be rounded down for preloading, and if
+ * so, by how many cache lines? Effectively, we want to calculate
+ *     leading_bytes = ((-dst)&15)*src_bpp/dst_bpp
+ *     inner_loop_offset = (src+leading_bytes)&31
+ *     extra_needed = leading_bytes - inner_loop_offset
+ * and test if extra_needed is <= 0, <= 32, or > 32 (where > 32 is only
+ * possible when there are 4 src bytes for every 1 dst byte).
+ */
+ .if bpp > 0
+  .ifc base,DST
+        /* The test can be simplified further when preloading the destination */
+        PF  tst,    base, #16
+        PF  beq,    61f
+  .else
+   .if bpp/dst_w_bpp == 4
+        PF  add,    SCRATCH, base, WK0, lsl #bpp_shift-dst_bpp_shift
+        PF  and,    SCRATCH, SCRATCH, #31
+        PF  rsb,    SCRATCH, SCRATCH, WK0, lsl #bpp_shift-dst_bpp_shift
+        PF  sub,    SCRATCH, SCRATCH, #1    /* so now ranges are -16..-1 / 0..31 / 32..63 */
+        PF  movs,   SCRATCH, SCRATCH, #32-6 /* so this sets         NC   /  nc   /   Nc   */
+        PF  bcs,    61f
+        PF  bpl,    60f
+        PF  pld,    [ptr, #32*(prefetch_distance+2)]
+   .else
+        PF  mov,    SCRATCH, base, lsl #32-5
+        PF  add,    SCRATCH, SCRATCH, WK0, lsl #32-5+bpp_shift-dst_bpp_shift
+        PF  rsbs,   SCRATCH, SCRATCH, WK0, lsl #32-5+bpp_shift-dst_bpp_shift
+        PF  bls,    61f
+   .endif
+  .endif
+60:     PF  pld,    [ptr, #32*(prefetch_distance+1)]
+61:
+ .endif
+.endm
+
+#define IS_END_OF_GROUP(INDEX,SIZE) ((SIZE) < 2 || ((INDEX) & ~((INDEX)+1)) & ((SIZE)/2))
+.macro preload_middle   bpp, base, scratch_holds_offset
+ .if bpp > 0
+        /* prefetch distance = 256/bpp, stm distance = 128/dst_w_bpp */
+  .if IS_END_OF_GROUP(SUBBLOCK,256/128*dst_w_bpp/bpp)
+   .if scratch_holds_offset
+        PF  pld,    [base, SCRATCH]
+   .else
+        PF  bic,    SCRATCH, base, #31
+        PF  pld,    [SCRATCH, #32*prefetch_distance]
+   .endif
+  .endif
+ .endif
+.endm
+
+.macro preload_trailing  bpp, bpp_shift, base
+ .if bpp > 0
+  .if bpp*pix_per_block > 256
+        /* Calculations are more complex if more than one fetch per block */
+        PF  and,    WK1, base, #31
+        PF  add,    WK1, WK1, WK0, lsl #bpp_shift
+        PF  add,    WK1, WK1, #32*(bpp*pix_per_block/256-1)*(prefetch_distance+1)
+        PF  bic,    SCRATCH, base, #31
+80:     PF  pld,    [SCRATCH, #32*(prefetch_distance+1)]
+        PF  add,    SCRATCH, SCRATCH, #32
+        PF  subs,   WK1, WK1, #32
+        PF  bhi,    80b
+  .else
+        /* If exactly one fetch per block, then we need either 0, 1 or 2 extra preloads */
+        PF  mov,    SCRATCH, base, lsl #32-5
+        PF  adds,   SCRATCH, SCRATCH, X, lsl #32-5+bpp_shift
+        PF  adceqs, SCRATCH, SCRATCH, #0
+        /* The instruction above has two effects: ensures Z is only
+         * set if C was clear (so Z indicates that both shifted quantities
+         * were 0), and clears C if Z was set (so C indicates that the sum
+         * of the shifted quantities was greater and not equal to 32) */
+        PF  beq,    82f
+        PF  bic,    SCRATCH, base, #31
+        PF  bcc,    81f
+        PF  pld,    [SCRATCH, #32*(prefetch_distance+2)]
+81:     PF  pld,    [SCRATCH, #32*(prefetch_distance+1)]
+82:
+  .endif
+ .endif
+.endm
+
+
+.macro preload_line    narrow_case, bpp, bpp_shift, base
+/* "narrow_case" - just means that the macro was invoked from the "narrow"
+ *    code path rather than the "medium" one - because in the narrow case,
+ *    the row of pixels is known to output no more than 30 bytes, then
+ *    (assuming the source pixels are no wider than the the destination
+ *    pixels) they cannot possibly straddle more than 2 32-byte cachelines,
+ *    meaning there's no need for a loop.
+ * "bpp" - number of bits per pixel in the channel (source, mask or
+ *    destination) that's being preloaded, or 0 if this channel is not used
+ *    for reading
+ * "bpp_shift" - log2 of ("bpp"/8) (except if "bpp"=0 of course)
+ * "base" - base address register of channel to preload (SRC, MASK or DST)
+ */
+ .if bpp > 0
+  .if narrow_case && (bpp <= dst_w_bpp)
+        /* In these cases, each line for each channel is in either 1 or 2 cache lines */
+        PF  bic,    WK0, base, #31
+        PF  pld,    [WK0]
+        PF  add,    WK1, base, X, LSL #bpp_shift
+        PF  sub,    WK1, WK1, #1
+        PF  bic,    WK1, WK1, #31
+        PF  cmp,    WK1, WK0
+        PF  beq,    90f
+        PF  pld,    [WK1]
+90:
+  .else
+        PF  bic,    WK0, base, #31
+        PF  pld,    [WK0]
+        PF  add,    WK1, base, X, lsl #bpp_shift
+        PF  sub,    WK1, WK1, #1
+        PF  bic,    WK1, WK1, #31
+        PF  cmp,    WK1, WK0
+        PF  beq,    92f
+91:     PF  add,    WK0, WK0, #32
+        PF  cmp,    WK0, WK1
+        PF  pld,    [WK0]
+        PF  bne,    91b
+92:
+  .endif
+ .endif
+.endm
+
+
+.macro conditional_process1_helper  cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx
+        process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0
+ .if decrementx
+        sub&cond X, X, #8*numbytes/dst_w_bpp
+ .endif
+        process_tail  cond, numbytes, firstreg
+ .if !((flags) & FLAG_PROCESS_DOES_STORE)
+        pixst   cond, numbytes, firstreg, DST
+ .endif
+.endm
+
+.macro conditional_process1  cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx
+ .if (flags) & FLAG_BRANCH_OVER
+  .ifc cond,mi
+        bpl     100f
+  .endif
+  .ifc cond,cs
+        bcc     100f
+  .endif
+  .ifc cond,ne
+        beq     100f
+  .endif
+        conditional_process1_helper  , process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx
+100:
+ .else
+        conditional_process1_helper  cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx
+ .endif
+.endm
+
+.macro conditional_process2  test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx
+ .if (flags) & (FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE)
+        /* Can't interleave reads and writes */
+        test
+        conditional_process1  cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx
+  .if (flags) & FLAG_PROCESS_CORRUPTS_PSR
+        test
+  .endif
+        conditional_process1  cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx
+ .else
+        /* Can interleave reads and writes for better scheduling */
+        test
+        process_head  cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0
+        process_head  cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0
+  .if decrementx
+        sub&cond1 X, X, #8*numbytes1/dst_w_bpp
+        sub&cond2 X, X, #8*numbytes2/dst_w_bpp
+  .endif
+        process_tail  cond1, numbytes1, firstreg1
+        process_tail  cond2, numbytes2, firstreg2
+        pixst   cond1, numbytes1, firstreg1, DST
+        pixst   cond2, numbytes2, firstreg2, DST
+ .endif
+.endm
+
+
+.macro test_bits_1_0_ptr
+        movs    SCRATCH, WK0, lsl #32-1  /* C,N = bits 1,0 of DST */
+.endm
+
+.macro test_bits_3_2_ptr
+        movs    SCRATCH, WK0, lsl #32-3  /* C,N = bits 3, 2 of DST */
+.endm
+
+.macro leading_15bytes  process_head, process_tail
+        /* On entry, WK0 bits 0-3 = number of bytes until destination is 16-byte aligned */
+        /* Use unaligned loads in all cases for simplicity */
+ .if dst_w_bpp == 8
+        conditional_process2  test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, 1
+ .elseif dst_w_bpp == 16
+        test_bits_1_0_ptr
+        conditional_process1  cs, process_head, process_tail, 2, 2, 1, 1, 1
+ .endif
+        conditional_process2  test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, 1
+.endm
+
+.macro test_bits_3_2_pix
+        movs    SCRATCH, X, lsl #dst_bpp_shift+32-3
+.endm
+
+.macro test_bits_1_0_pix
+ .if dst_w_bpp == 8
+        movs    SCRATCH, X, lsl #dst_bpp_shift+32-1
+ .else
+        movs    SCRATCH, X, lsr #1
+ .endif
+.endm
+
+.macro trailing_15bytes  process_head, process_tail, unaligned_src, unaligned_mask
+        conditional_process2  test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0
+ .if dst_w_bpp == 16
+        test_bits_1_0_pix
+        conditional_process1  cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0
+ .elseif dst_w_bpp == 8
+        conditional_process2  test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0
+ .endif
+.endm
+
+
+.macro wide_case_inner_loop  process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment
+110:
+ .set SUBBLOCK, 0 /* this is a count of STMs; there can be up to 8 STMs per block */
+ .rept pix_per_block*dst_w_bpp/128
+        process_head  , 16, 0, unaligned_src, unaligned_mask, 1
+  .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
+        preload_middle  src_bpp, SRC, 1
+  .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
+        preload_middle  mask_bpp, MASK, 1
+  .else
+        preload_middle  src_bpp, SRC, 0
+        preload_middle  mask_bpp, MASK, 0
+  .endif
+  .if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0)
+        /* Because we know that writes are 16-byte aligned, it's relatively easy to ensure that
+         * destination prefetches are 32-byte aligned. It's also the easiest channel to offset
+         * preloads for, to achieve staggered prefetches for multiple channels, because there are
+         * always two STMs per prefetch, so there is always an opposite STM on which to put the
+         * preload. Note, no need to BIC the base register here */
+        PF  pld,    [DST, #32*prefetch_distance - dst_alignment]
+  .endif
+        process_tail  , 16, 0
+  .if !((flags) & FLAG_PROCESS_DOES_STORE)
+        pixst   , 16, 0, DST
+  .endif
+  .set SUBBLOCK, SUBBLOCK+1
+ .endr
+        subs    X, X, #pix_per_block
+        bhs     110b
+.endm
+
+.macro wide_case_inner_loop_and_trailing_pixels  process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask
+        /* Destination now 16-byte aligned; we have at least one block before we have to stop preloading */
+ .if dst_r_bpp > 0
+        tst     DST, #16
+        bne     111f
+        process_inner_loop  process_head, process_tail, unaligned_src, unaligned_mask, 16
+        b       112f
+111:
+ .endif
+        process_inner_loop  process_head, process_tail, unaligned_src, unaligned_mask, 0
+112:
+        /* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */
+ .if (src_bpp*pix_per_block > 256) || (mask_bpp*pix_per_block > 256) || (dst_r_bpp*pix_per_block > 256)
+        PF  and,    WK0, X, #pix_per_block-1
+ .endif
+        preload_trailing  src_bpp, src_bpp_shift, SRC
+        preload_trailing  mask_bpp, mask_bpp_shift, MASK
+        preload_trailing  dst_r_bpp, dst_bpp_shift, DST
+        add     X, X, #(prefetch_distance+2)*pix_per_block - 128/dst_w_bpp
+        /* The remainder of the line is handled identically to the medium case */
+        medium_case_inner_loop_and_trailing_pixels  process_head, process_tail,, exit_label, unaligned_src, unaligned_mask
+.endm
+
+.macro medium_case_inner_loop_and_trailing_pixels  process_head, process_tail, unused, exit_label, unaligned_src, unaligned_mask
+120:
+        process_head  , 16, 0, unaligned_src, unaligned_mask, 0
+        process_tail  , 16, 0
+ .if !((flags) & FLAG_PROCESS_DOES_STORE)
+        pixst   , 16, 0, DST
+ .endif
+        subs    X, X, #128/dst_w_bpp
+        bhs     120b
+        /* Trailing pixels */
+        tst     X, #128/dst_w_bpp - 1
+        beq     exit_label
+        trailing_15bytes  process_head, process_tail, unaligned_src, unaligned_mask
+.endm
+
+.macro narrow_case_inner_loop_and_trailing_pixels  process_head, process_tail, unused, exit_label, unaligned_src, unaligned_mask
+        tst     X, #16*8/dst_w_bpp
+        conditional_process1  ne, process_head, process_tail, 16, 0, unaligned_src, unaligned_mask, 0
+        /* Trailing pixels */
+        /* In narrow case, it's relatively unlikely to be aligned, so let's do without a branch here */
+        trailing_15bytes  process_head, process_tail, unaligned_src, unaligned_mask
+.endm
+
+.macro switch_on_alignment  action, process_head, process_tail, process_inner_loop, exit_label
+ /* Note that if we're reading the destination, it's already guaranteed to be aligned at this point */
+ .if mask_bpp == 8 || mask_bpp == 16
+        tst     MASK, #3
+        bne     141f
+ .endif
+  .if src_bpp == 8 || src_bpp == 16
+        tst     SRC, #3
+        bne     140f
+  .endif
+        action  process_head, process_tail, process_inner_loop, exit_label, 0, 0
+  .if src_bpp == 8 || src_bpp == 16
+        b       exit_label
+140:
+        action  process_head, process_tail, process_inner_loop, exit_label, 1, 0
+  .endif
+ .if mask_bpp == 8 || mask_bpp == 16
+        b       exit_label
+141:
+  .if src_bpp == 8 || src_bpp == 16
+        tst     SRC, #3
+        bne     142f
+  .endif
+        action  process_head, process_tail, process_inner_loop, exit_label, 0, 1
+  .if src_bpp == 8 || src_bpp == 16
+        b       exit_label
+142:
+        action  process_head, process_tail, process_inner_loop, exit_label, 1, 1
+  .endif
+ .endif
+.endm
+
+
+.macro end_of_line      restore_x, vars_spilled, loop_label, last_one
+ .if vars_spilled
+        /* Sadly, GAS doesn't seem have an equivalent of the DCI directive? */
+        /* This is ldmia sp,{} */
+        .word   0xE89D0000 | LINE_SAVED_REGS
+ .endif
+        subs    Y, Y, #1
+ .if vars_spilled
+  .if (LINE_SAVED_REGS) & (1<<1)
+        str     Y, [sp]
+  .endif
+ .endif
+        add     DST, DST, STRIDE_D
+ .if src_bpp > 0
+        add     SRC, SRC, STRIDE_S
+ .endif
+ .if mask_bpp > 0
+        add     MASK, MASK, STRIDE_M
+ .endif
+ .if restore_x
+        mov     X, ORIG_W
+ .endif
+        bhs     loop_label
+ .ifc "last_one",""
+  .if vars_spilled
+        b       197f
+  .else
+        b       198f
+  .endif
+ .else
+  .if (!vars_spilled) && ((flags) & FLAG_SPILL_LINE_VARS)
+        b       198f
+  .endif
+ .endif
+.endm
+
+
+.macro generate_composite_function fname, \
+                                   src_bpp_, \
+                                   mask_bpp_, \
+                                   dst_w_bpp_, \
+                                   flags_, \
+                                   prefetch_distance_, \
+                                   init, \
+                                   newline, \
+                                   cleanup, \
+                                   process_head, \
+                                   process_tail, \
+                                   process_inner_loop
+
+ .func fname
+ .global fname
+ /* For ELF format also set function visibility to hidden */
+#ifdef __ELF__
+ .hidden fname
+ .type fname, %function
+#endif
+
+/*
+ * Make some macro arguments globally visible and accessible
+ * from other macros
+ */
+ .set src_bpp, src_bpp_
+ .set mask_bpp, mask_bpp_
+ .set dst_w_bpp, dst_w_bpp_
+ .set flags, flags_
+ .set prefetch_distance, prefetch_distance_
+
+/*
+ * Select prefetch type for this function.
+ */
+ .if prefetch_distance == 0
+  .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
+ .else
+  .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_STANDARD
+ .endif
+
+ .if src_bpp == 32
+  .set src_bpp_shift, 2
+ .elseif src_bpp == 24
+  .set src_bpp_shift, 0
+ .elseif src_bpp == 16
+  .set src_bpp_shift, 1
+ .elseif src_bpp == 8
+  .set src_bpp_shift, 0
+ .elseif src_bpp == 0
+  .set src_bpp_shift, -1
+ .else
+  .error "requested src bpp (src_bpp) is not supported"
+ .endif
+
+ .if mask_bpp == 32
+  .set mask_bpp_shift, 2
+ .elseif mask_bpp == 24
+  .set mask_bpp_shift, 0
+ .elseif mask_bpp == 8
+  .set mask_bpp_shift, 0
+ .elseif mask_bpp == 0
+  .set mask_bpp_shift, -1
+ .else
+  .error "requested mask bpp (mask_bpp) is not supported"
+ .endif
+
+ .if dst_w_bpp == 32
+  .set dst_bpp_shift, 2
+ .elseif dst_w_bpp == 24
+  .set dst_bpp_shift, 0
+ .elseif dst_w_bpp == 16
+  .set dst_bpp_shift, 1
+ .elseif dst_w_bpp == 8
+  .set dst_bpp_shift, 0
+ .else
+  .error "requested dst bpp (dst_w_bpp) is not supported"
+ .endif
+
+ .if (((flags) & FLAG_DST_READWRITE) != 0)
+  .set dst_r_bpp, dst_w_bpp
+ .else
+  .set dst_r_bpp, 0
+ .endif
+
+ .set pix_per_block, 16*8/dst_w_bpp
+ .if src_bpp != 0
+  .if 32*8/src_bpp > pix_per_block
+   .set pix_per_block, 32*8/src_bpp
+  .endif
+ .endif
+ .if mask_bpp != 0
+  .if 32*8/mask_bpp > pix_per_block
+   .set pix_per_block, 32*8/mask_bpp
+  .endif
+ .endif
+ .if dst_r_bpp != 0
+  .if 32*8/dst_r_bpp > pix_per_block
+   .set pix_per_block, 32*8/dst_r_bpp
+  .endif
+ .endif
+
+/* The standard entry conditions set up by pixman-arm-common.h are:
+ * r0 = width (pixels)
+ * r1 = height (rows)
+ * r2 = pointer to top-left pixel of destination
+ * r3 = destination stride (pixels)
+ * [sp] = source pixel value, or pointer to top-left pixel of source
+ * [sp,#4] = 0 or source stride (pixels)
+ * The following arguments are unused for non-mask operations
+ * [sp,#8] = mask pixel value, or pointer to top-left pixel of mask
+ * [sp,#12] = 0 or mask stride (pixels)
+ */
+
+/*
+ * Assign symbolic names to registers
+ */
+    X           .req    r0  /* pixels to go on this line */
+    Y           .req    r1  /* lines to go */
+    DST         .req    r2  /* destination pixel pointer */
+    STRIDE_D    .req    r3  /* destination stride (bytes, minus width) */
+    SRC         .req    r4  /* source pixel pointer */
+    STRIDE_S    .req    r5  /* source stride (bytes, minus width) */
+    MASK        .req    r6  /* mask pixel pointer (if applicable) */
+    STRIDE_M    .req    r7  /* mask stride (bytes, minus width) */
+    WK0         .req    r8  /* pixel data registers */
+    WK1         .req    r9
+    WK2         .req    r10
+    WK3         .req    r11
+    SCRATCH     .req    r12
+    ORIG_W      .req    r14 /* width (pixels) */
+
+fname:
+        push    {r4-r11, lr}        /* save all registers */
+
+        subs    Y, Y, #1
+        blo     199f
+
+#ifdef DEBUG_PARAMS
+        sub     sp, sp, #9*4
+#endif
+
+ .if src_bpp > 0
+        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
+        ldr     STRIDE_S, [sp, #ARGS_STACK_OFFSET+4]
+ .endif
+ .if mask_bpp > 0
+        ldr     MASK, [sp, #ARGS_STACK_OFFSET+8]
+        ldr     STRIDE_M, [sp, #ARGS_STACK_OFFSET+12]
+ .endif
+        
+#ifdef DEBUG_PARAMS
+        add     Y, Y, #1
+        stmia   sp, {r0-r7,pc}
+        sub     Y, Y, #1
+#endif
+
+        init
+        
+        lsl     STRIDE_D, #dst_bpp_shift /* stride in bytes */
+        sub     STRIDE_D, STRIDE_D, X, lsl #dst_bpp_shift
+ .if src_bpp > 0
+        lsl     STRIDE_S, #src_bpp_shift
+        sub     STRIDE_S, STRIDE_S, X, lsl #src_bpp_shift
+ .endif
+ .if mask_bpp > 0
+        lsl     STRIDE_M, #mask_bpp_shift
+        sub     STRIDE_M, STRIDE_M, X, lsl #mask_bpp_shift
+ .endif
+ 
+        /* Are we not even wide enough to have one 16-byte aligned 16-byte block write? */
+        cmp     X, #2*16*8/dst_w_bpp - 1
+        blo     170f
+ .if src_bpp || mask_bpp || dst_r_bpp /* Wide and medium cases are the same for fill */
+        /* To preload ahead on the current line, we need at least (prefetch_distance+2) 32-byte blocks on all prefetch channels */
+        cmp     X, #(prefetch_distance+3)*pix_per_block - 1
+        blo     160f
+
+        /* Wide case */
+        /* Adjust X so that the decrement instruction can also test for
+         * inner loop termination. We want it to stop when there are
+         * (prefetch_distance+1) complete blocks to go. */
+        sub     X, X, #(prefetch_distance+2)*pix_per_block
+        mov     ORIG_W, X
+  .if (flags) & FLAG_SPILL_LINE_VARS_WIDE
+        /* This is stmdb sp!,{} */
+        .word   0xE92D0000 | LINE_SAVED_REGS
+  .endif
+151:    /* New line */
+        newline
+        preload_leading_step1  src_bpp, WK1, SRC
+        preload_leading_step1  mask_bpp, WK2, MASK
+        preload_leading_step1  dst_r_bpp, WK3, DST
+        
+        tst     DST, #15
+        beq     154f
+        rsb     WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */
+  .if (src_bpp != 0 && src_bpp != 2*dst_w_bpp) || (mask_bpp != 0 && mask_bpp != 2*dst_w_bpp)
+        PF  and,    WK0, WK0, #15
+  .endif
+
+        preload_leading_step2  src_bpp, src_bpp_shift, WK1, SRC
+        preload_leading_step2  mask_bpp, mask_bpp_shift, WK2, MASK
+        preload_leading_step2  dst_r_bpp, dst_bpp_shift, WK3, DST
+
+        leading_15bytes  process_head, process_tail
+        
+154:    /* Destination now 16-byte aligned; we have at least one prefetch on each channel as well as at least one 16-byte output block */
+ .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
+        and     SCRATCH, SRC, #31
+        rsb     SCRATCH, SCRATCH, #32*prefetch_distance
+ .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH)
+        and     SCRATCH, MASK, #31
+        rsb     SCRATCH, SCRATCH, #32*prefetch_distance
+ .endif
+ .ifc "process_inner_loop",""
+        switch_on_alignment  wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, wide_case_inner_loop, 157f
+ .else
+        switch_on_alignment  wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, process_inner_loop, 157f
+ .endif
+
+157:    /* Check for another line */
+        end_of_line 1, %((flags) & FLAG_SPILL_LINE_VARS_WIDE), 151b
+ .endif
+
+ .ltorg
+
+160:    /* Medium case */
+        mov     ORIG_W, X
+ .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE
+        /* This is stmdb sp!,{} */
+        .word   0xE92D0000 | LINE_SAVED_REGS
+ .endif
+161:    /* New line */
+        newline
+        preload_line 0, src_bpp, src_bpp_shift, SRC  /* in: X, corrupts: WK0-WK1 */
+        preload_line 0, mask_bpp, mask_bpp_shift, MASK
+        preload_line 0, dst_r_bpp, dst_bpp_shift, DST
+        
+        sub     X, X, #128/dst_w_bpp     /* simplifies inner loop termination */
+        tst     DST, #15
+        beq     164f
+        rsb     WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */
+        
+        leading_15bytes  process_head, process_tail
+        
+164:    /* Destination now 16-byte aligned; we have at least one 16-byte output block */
+        switch_on_alignment  medium_case_inner_loop_and_trailing_pixels, process_head, process_tail,, 167f
+        
+167:    /* Check for another line */
+        end_of_line 1, %((flags) & FLAG_SPILL_LINE_VARS_NON_WIDE), 161b
+
+ .ltorg
+
+170:    /* Narrow case, less than 31 bytes, so no guarantee of at least one 16-byte block */
+ .if dst_w_bpp < 32
+        mov     ORIG_W, X
+ .endif
+ .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE
+        /* This is stmdb sp!,{} */
+        .word   0xE92D0000 | LINE_SAVED_REGS
+ .endif
+171:    /* New line */
+        newline
+        preload_line 1, src_bpp, src_bpp_shift, SRC  /* in: X, corrupts: WK0-WK1 */
+        preload_line 1, mask_bpp, mask_bpp_shift, MASK
+        preload_line 1, dst_r_bpp, dst_bpp_shift, DST
+        
+ .if dst_w_bpp == 8
+        tst     DST, #3
+        beq     174f
+172:    subs    X, X, #1
+        blo     177f
+        process_head  , 1, 0, 1, 1, 0
+        process_tail  , 1, 0
+  .if !((flags) & FLAG_PROCESS_DOES_STORE)
+        pixst   , 1, 0, DST
+  .endif
+        tst     DST, #3
+        bne     172b
+ .elseif dst_w_bpp == 16
+        tst     DST, #2
+        beq     174f
+        subs    X, X, #1
+        blo     177f
+        process_head  , 2, 0, 1, 1, 0
+        process_tail  , 2, 0
+  .if !((flags) & FLAG_PROCESS_DOES_STORE)
+        pixst   , 2, 0, DST
+  .endif
+ .endif
+
+174:    /* Destination now 4-byte aligned; we have 0 or more output bytes to go */
+        switch_on_alignment  narrow_case_inner_loop_and_trailing_pixels, process_head, process_tail,, 177f
+
+177:    /* Check for another line */
+        end_of_line %(dst_w_bpp < 32), %((flags) & FLAG_SPILL_LINE_VARS_NON_WIDE), 171b, last_one
+
+197:
+ .if (flags) & FLAG_SPILL_LINE_VARS
+        add     sp, sp, #LINE_SAVED_REG_COUNT*4
+ .endif
+198:
+        cleanup
+
+#ifdef DEBUG_PARAMS
+        add     sp, sp, #9*4 /* junk the debug copy of arguments */
+#endif
+199:
+        pop     {r4-r11, pc}  /* exit */
+
+ .ltorg
+
+    .unreq  X
+    .unreq  Y
+    .unreq  DST
+    .unreq  STRIDE_D
+    .unreq  SRC
+    .unreq  STRIDE_S
+    .unreq  MASK
+    .unreq  STRIDE_M
+    .unreq  WK0
+    .unreq  WK1
+    .unreq  WK2
+    .unreq  WK3
+    .unreq  SCRATCH
+    .unreq  ORIG_W
+    .endfunc
+.endm
+
+.macro line_saved_regs  x:vararg
+ .set LINE_SAVED_REGS, 0
+ .set LINE_SAVED_REG_COUNT, 0
+ .irp SAVED_REG,x
+  .ifc "SAVED_REG","Y"
+   .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<1)
+   .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1
+  .endif
+  .ifc "SAVED_REG","STRIDE_D"
+   .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<3)
+   .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1
+  .endif
+  .ifc "SAVED_REG","STRIDE_S"
+   .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<5)
+   .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1
+  .endif
+  .ifc "SAVED_REG","STRIDE_M"
+   .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<7)
+   .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1
+  .endif
+  .ifc "SAVED_REG","ORIG_W"
+   .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<14)
+   .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1
+  .endif
+ .endr
+.endm
+
+.macro nop_macro x:vararg
+.endm
diff --git a/lib/pixman/pixman/pixman-arm-simd.c b/lib/pixman/pixman/pixman-arm-simd.c
index 3d19bfac1..af062e19d 100644
--- a/lib/pixman/pixman/pixman-arm-simd.c
+++ b/lib/pixman/pixman/pixman-arm-simd.c
@@ -31,369 +31,191 @@
 #include "pixman-arm-common.h"
 #include "pixman-inlines.h"
 
-#if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */
-
-void
-pixman_composite_add_8_8_asm_armv6 (int32_t  width,
-				    int32_t  height,
-				    uint8_t *dst_line,
-				    int32_t  dst_stride,
-				    uint8_t *src_line,
-				    int32_t  src_stride)
-{
-    uint8_t *dst, *src;
-    int32_t w;
-    uint8_t s, d;
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	/* ensure both src and dst are properly aligned before doing 32 bit reads
-	 * we'll stay in this loop if src and dst have differing alignments
-	 */
-	while (w && (((unsigned long)dst & 3) || ((unsigned long)src & 3)))
-	{
-	    s = *src;
-	    d = *dst;
-	    asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s));
-	    *dst = d;
-
-	    dst++;
-	    src++;
-	    w--;
-	}
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8888_8888,
+		                   uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_x888_8888,
+                                   uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_0565,
+                                   uint16_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8_8,
+                                   uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_8888,
+                                   uint16_t, 1, uint32_t, 1)
 
-	while (w >= 4)
-	{
-	    asm ("uqadd8 %0, %1, %2"
-		 : "=r" (*(uint32_t*)dst)
-		 : "r" (*(uint32_t*)src), "r" (*(uint32_t*)dst));
-	    dst += 4;
-	    src += 4;
-	    w -= 4;
-	}
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
+                                   uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
+                                   uint32_t, 1, uint32_t, 1)
 
-	while (w)
-	{
-	    s = *src;
-	    d = *dst;
-	    asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s));
-	    *dst = d;
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
+                                     uint32_t, 1, uint32_t, 1)
 
-	    dst++;
-	    src++;
-	    w--;
-	}
-    }
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888,
+                                      uint8_t, 1, uint32_t, 1)
 
-}
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC,
+                                        uint16_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC,
+                                        uint32_t, uint32_t)
 
 void
-pixman_composite_over_8888_8888_asm_armv6 (int32_t   width,
-                                           int32_t   height,
-                                           uint32_t *dst_line,
-                                           int32_t   dst_stride,
-                                           uint32_t *src_line,
-                                           int32_t   src_stride)
-{
-    uint32_t    *dst;
-    uint32_t    *src;
-    int32_t w;
-    uint32_t component_half = 0x800080;
-    uint32_t upper_component_mask = 0xff00ff00;
-    uint32_t alpha_mask = 0xff;
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
+pixman_composite_src_n_8888_asm_armv6 (int32_t   w,
+                                       int32_t   h,
+                                       uint32_t *dst,
+                                       int32_t   dst_stride,
+                                       uint32_t  src);
 
-/* #define inner_branch */
-	asm volatile (
-	    "cmp %[w], #0\n\t"
-	    "beq 2f\n\t"
-	    "1:\n\t"
-	    /* load src */
-	    "ldr r5, [%[src]], #4\n\t"
-#ifdef inner_branch
-	    /* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
-	     * The 0x0 case also allows us to avoid doing an unecessary data
-	     * write which is more valuable so we only check for that
-	     */
-	    "cmp r5, #0\n\t"
-	    "beq 3f\n\t"
-
-	    /* = 255 - alpha */
-	    "sub r8, %[alpha_mask], r5, lsr #24\n\t"
-
-	    "ldr r4, [%[dest]] \n\t"
-
-#else
-	    "ldr r4, [%[dest]] \n\t"
-
-	    /* = 255 - alpha */
-	    "sub r8, %[alpha_mask], r5, lsr #24\n\t"
-#endif
-	    "uxtb16 r6, r4\n\t"
-	    "uxtb16 r7, r4, ror #8\n\t"
-
-	    /* multiply by 257 and divide by 65536 */
-	    "mla r6, r6, r8, %[component_half]\n\t"
-	    "mla r7, r7, r8, %[component_half]\n\t"
-
-	    "uxtab16 r6, r6, r6, ror #8\n\t"
-	    "uxtab16 r7, r7, r7, ror #8\n\t"
-
-	    /* recombine the 0xff00ff00 bytes of r6 and r7 */
-	    "and r7, r7, %[upper_component_mask]\n\t"
-	    "uxtab16 r6, r7, r6, ror #8\n\t"
-
-	    "uqadd8 r5, r6, r5\n\t"
-
-#ifdef inner_branch
-	    "3:\n\t"
-
-#endif
-	    "str r5, [%[dest]], #4\n\t"
-	    /* increment counter and jmp to top */
-	    "subs	%[w], %[w], #1\n\t"
-	    "bne	1b\n\t"
-	    "2:\n\t"
-	    : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
-	    : [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask),
-	      [alpha_mask] "r" (alpha_mask)
-	    : "r4", "r5", "r6", "r7", "r8", "cc", "memory"
-	    );
-    }
-}
+void
+pixman_composite_src_n_0565_asm_armv6 (int32_t   w,
+                                       int32_t   h,
+                                       uint16_t *dst,
+                                       int32_t   dst_stride,
+                                       uint16_t  src);
 
 void
-pixman_composite_over_8888_n_8888_asm_armv6 (int32_t   width,
-                                             int32_t   height,
-                                             uint32_t *dst_line,
-                                             int32_t   dst_stride,
-                                             uint32_t *src_line,
-                                             int32_t   src_stride,
-                                             uint32_t  mask)
+pixman_composite_src_n_8_asm_armv6 (int32_t   w,
+                                    int32_t   h,
+                                    uint8_t  *dst,
+                                    int32_t   dst_stride,
+                                    uint8_t  src);
+
+static pixman_bool_t
+arm_simd_fill (pixman_implementation_t *imp,
+               uint32_t *               bits,
+               int                      stride, /* in 32-bit words */
+               int                      bpp,
+               int                      x,
+               int                      y,
+               int                      width,
+               int                      height,
+               uint32_t                 _xor)
 {
-    uint32_t *dst;
-    uint32_t *src;
-    int32_t w;
-    uint32_t component_half = 0x800080;
-    uint32_t alpha_mask = 0xff;
-
-    mask = (mask) >> 24;
+    /* stride is always multiple of 32bit units in pixman */
+    uint32_t byte_stride = stride * sizeof(uint32_t);
 
-    while (height--)
+    switch (bpp)
     {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-/* #define inner_branch */
-	asm volatile (
-	    "cmp %[w], #0\n\t"
-	    "beq 2f\n\t"
-	    "1:\n\t"
-	    /* load src */
-	    "ldr r5, [%[src]], #4\n\t"
-#ifdef inner_branch
-	    /* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
-	     * The 0x0 case also allows us to avoid doing an unecessary data
-	     * write which is more valuable so we only check for that
-	     */
-	    "cmp r5, #0\n\t"
-	    "beq 3f\n\t"
-
-#endif
-	    "ldr r4, [%[dest]] \n\t"
-
-	    "uxtb16 r6, r5\n\t"
-	    "uxtb16 r7, r5, ror #8\n\t"
-
-	    /* multiply by alpha (r8) then by 257 and divide by 65536 */
-	    "mla r6, r6, %[mask_alpha], %[component_half]\n\t"
-	    "mla r7, r7, %[mask_alpha], %[component_half]\n\t"
-
-	    "uxtab16 r6, r6, r6, ror #8\n\t"
-	    "uxtab16 r7, r7, r7, ror #8\n\t"
-
-	    "uxtb16 r6, r6, ror #8\n\t"
-	    "uxtb16 r7, r7, ror #8\n\t"
-
-	    /* recombine */
-	    "orr r5, r6, r7, lsl #8\n\t"
-
-	    "uxtb16 r6, r4\n\t"
-	    "uxtb16 r7, r4, ror #8\n\t"
-
-	    /* 255 - alpha */
-	    "sub r8, %[alpha_mask], r5, lsr #24\n\t"
-
-	    /* multiply by alpha (r8) then by 257 and divide by 65536 */
-	    "mla r6, r6, r8, %[component_half]\n\t"
-	    "mla r7, r7, r8, %[component_half]\n\t"
-
-	    "uxtab16 r6, r6, r6, ror #8\n\t"
-	    "uxtab16 r7, r7, r7, ror #8\n\t"
-
-	    "uxtb16 r6, r6, ror #8\n\t"
-	    "uxtb16 r7, r7, ror #8\n\t"
-
-	    /* recombine */
-	    "orr r6, r6, r7, lsl #8\n\t"
-
-	    "uqadd8 r5, r6, r5\n\t"
-
-#ifdef inner_branch
-	    "3:\n\t"
-
-#endif
-	    "str r5, [%[dest]], #4\n\t"
-	    /* increment counter and jmp to top */
-	    "subs	%[w], %[w], #1\n\t"
-	    "bne	1b\n\t"
-	    "2:\n\t"
-	    : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
-	    : [component_half] "r" (component_half), [mask_alpha] "r" (mask),
-	      [alpha_mask] "r" (alpha_mask)
-	    : "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory"
-	    );
+    case 8:
+	pixman_composite_src_n_8_asm_armv6 (
+		width,
+		height,
+		(uint8_t *)(((char *) bits) + y * byte_stride + x),
+		byte_stride,
+		_xor & 0xff);
+	return TRUE;
+    case 16:
+	pixman_composite_src_n_0565_asm_armv6 (
+		width,
+		height,
+		(uint16_t *)(((char *) bits) + y * byte_stride + x * 2),
+		byte_stride / 2,
+		_xor & 0xffff);
+	return TRUE;
+    case 32:
+	pixman_composite_src_n_8888_asm_armv6 (
+		width,
+		height,
+		(uint32_t *)(((char *) bits) + y * byte_stride + x * 4),
+		byte_stride / 4,
+		_xor);
+	return TRUE;
+    default:
+	return FALSE;
     }
 }
 
-void
-pixman_composite_over_n_8_8888_asm_armv6 (int32_t   width,
-                                          int32_t   height,
-                                          uint32_t *dst_line,
-                                          int32_t   dst_stride,
-                                          uint32_t  src,
-                                          int32_t   unused,
-                                          uint8_t  *mask_line,
-                                          int32_t   mask_stride)
+static pixman_bool_t
+arm_simd_blt (pixman_implementation_t *imp,
+              uint32_t *               src_bits,
+              uint32_t *               dst_bits,
+              int                      src_stride, /* in 32-bit words */
+              int                      dst_stride, /* in 32-bit words */
+              int                      src_bpp,
+              int                      dst_bpp,
+              int                      src_x,
+              int                      src_y,
+              int                      dest_x,
+              int                      dest_y,
+              int                      width,
+              int                      height)
 {
-    uint32_t  srca;
-    uint32_t *dst;
-    uint8_t  *mask;
-    int32_t w;
-
-    srca = src >> 24;
-
-    uint32_t component_mask = 0xff00ff;
-    uint32_t component_half = 0x800080;
-
-    uint32_t src_hi = (src >> 8) & component_mask;
-    uint32_t src_lo = src & component_mask;
+    if (src_bpp != dst_bpp)
+	return FALSE;
 
-    while (height--)
+    switch (src_bpp)
     {
-	dst = dst_line;
-	dst_line += dst_stride;
-	mask = mask_line;
-	mask_line += mask_stride;
-	w = width;
-
-/* #define inner_branch */
-	asm volatile (
-	    "cmp %[w], #0\n\t"
-	    "beq 2f\n\t"
-	    "1:\n\t"
-	    /* load mask */
-	    "ldrb r5, [%[mask]], #1\n\t"
-#ifdef inner_branch
-	    /* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
-	     * The 0x0 case also allows us to avoid doing an unecessary data
-	     * write which is more valuable so we only check for that
-	     */
-	    "cmp r5, #0\n\t"
-	    "beq 3f\n\t"
-
-#endif
-	    "ldr r4, [%[dest]] \n\t"
-
-	    /* multiply by alpha (r8) then by 257 and divide by 65536 */
-	    "mla r6, %[src_lo], r5, %[component_half]\n\t"
-	    "mla r7, %[src_hi], r5, %[component_half]\n\t"
-
-	    "uxtab16 r6, r6, r6, ror #8\n\t"
-	    "uxtab16 r7, r7, r7, ror #8\n\t"
-
-	    "uxtb16 r6, r6, ror #8\n\t"
-	    "uxtb16 r7, r7, ror #8\n\t"
-
-	    /* recombine */
-	    "orr r5, r6, r7, lsl #8\n\t"
-
-	    "uxtb16 r6, r4\n\t"
-	    "uxtb16 r7, r4, ror #8\n\t"
-
-	    /* we could simplify this to use 'sub' if we were
-	     * willing to give up a register for alpha_mask
-	     */
-	    "mvn r8, r5\n\t"
-	    "mov r8, r8, lsr #24\n\t"
-
-	    /* multiply by alpha (r8) then by 257 and divide by 65536 */
-	    "mla r6, r6, r8, %[component_half]\n\t"
-	    "mla r7, r7, r8, %[component_half]\n\t"
-
-	    "uxtab16 r6, r6, r6, ror #8\n\t"
-	    "uxtab16 r7, r7, r7, ror #8\n\t"
-
-	    "uxtb16 r6, r6, ror #8\n\t"
-	    "uxtb16 r7, r7, ror #8\n\t"
-
-	    /* recombine */
-	    "orr r6, r6, r7, lsl #8\n\t"
-
-	    "uqadd8 r5, r6, r5\n\t"
-
-#ifdef inner_branch
-	    "3:\n\t"
-
-#endif
-	    "str r5, [%[dest]], #4\n\t"
-	    /* increment counter and jmp to top */
-	    "subs	%[w], %[w], #1\n\t"
-	    "bne	1b\n\t"
-	    "2:\n\t"
-	    : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask)
-	    : [component_half] "r" (component_half),
-	      [src_hi] "r" (src_hi), [src_lo] "r" (src_lo)
-	    : "r4", "r5", "r6", "r7", "r8", "cc", "memory");
+    case 8:
+        pixman_composite_src_8_8_asm_armv6 (
+                width, height,
+                (uint8_t *)(((char *) dst_bits) +
+                dest_y * dst_stride * 4 + dest_x * 1), dst_stride * 4,
+                (uint8_t *)(((char *) src_bits) +
+                src_y * src_stride * 4 + src_x * 1), src_stride * 4);
+        return TRUE;
+    case 16:
+	pixman_composite_src_0565_0565_asm_armv6 (
+		width, height,
+		(uint16_t *)(((char *) dst_bits) +
+		dest_y * dst_stride * 4 + dest_x * 2), dst_stride * 2,
+		(uint16_t *)(((char *) src_bits) +
+		src_y * src_stride * 4 + src_x * 2), src_stride * 2);
+	return TRUE;
+    case 32:
+	pixman_composite_src_8888_8888_asm_armv6 (
+		width, height,
+		(uint32_t *)(((char *) dst_bits) +
+		dest_y * dst_stride * 4 + dest_x * 4), dst_stride,
+		(uint32_t *)(((char *) src_bits) +
+		src_y * src_stride * 4 + src_x * 4), src_stride);
+	return TRUE;
+    default:
+	return FALSE;
     }
 }
 
-#endif
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
-                                   uint8_t, 1, uint8_t, 1)
-PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
-                                   uint32_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
-                                     uint32_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888,
-                                      uint8_t, 1, uint32_t, 1)
-
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC,
-                                        uint16_t, uint16_t)
-PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC,
-                                        uint32_t, uint32_t)
-
 static const pixman_fast_path_t arm_simd_fast_paths[] =
 {
+    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, armv6_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, armv6_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, armv6_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, armv6_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, armv6_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, armv6_composite_src_8888_8888),
+
+    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, armv6_composite_src_x888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, armv6_composite_src_x888_8888),
+
+    PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, armv6_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, armv6_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, a1r5g5b5, armv6_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC, a1b5g5r5, null, a1b5g5r5, armv6_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, armv6_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC, a1b5g5r5, null, x1b5g5r5, armv6_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, armv6_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC, x1b5g5r5, null, x1b5g5r5, armv6_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC, a4r4g4b4, null, a4r4g4b4, armv6_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC, a4b4g4r4, null, a4b4g4r4, armv6_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC, a4r4g4b4, null, x4r4g4b4, armv6_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC, a4b4g4r4, null, x4b4g4r4, armv6_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC, x4r4g4b4, null, x4r4g4b4, armv6_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC, x4b4g4r4, null, x4b4g4r4, armv6_composite_src_0565_0565),
+
+    PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, armv6_composite_src_8_8),
+    PIXMAN_STD_FAST_PATH (SRC, r3g3b2, null, r3g3b2, armv6_composite_src_8_8),
+    PIXMAN_STD_FAST_PATH (SRC, b2g3r3, null, b2g3r3, armv6_composite_src_8_8),
+    PIXMAN_STD_FAST_PATH (SRC, a2r2g2b2, null, a2r2g2b2, armv6_composite_src_8_8),
+    PIXMAN_STD_FAST_PATH (SRC, a2b2g2r2, null, a2b2g2r2, armv6_composite_src_8_8),
+    PIXMAN_STD_FAST_PATH (SRC, c8, null, c8, armv6_composite_src_8_8),
+    PIXMAN_STD_FAST_PATH (SRC, g8, null, g8, armv6_composite_src_8_8),
+    PIXMAN_STD_FAST_PATH (SRC, x4a4, null, x4a4, armv6_composite_src_8_8),
+    PIXMAN_STD_FAST_PATH (SRC, x4c4, null, x4c4, armv6_composite_src_8_8),
+    PIXMAN_STD_FAST_PATH (SRC, x4g4, null, x4g4, armv6_composite_src_8_8),
+
+    PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, armv6_composite_src_0565_8888),
+    PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, armv6_composite_src_0565_8888),
+    PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, armv6_composite_src_0565_8888),
+    PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, armv6_composite_src_0565_8888),
+
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888),
@@ -428,5 +250,8 @@ _pixman_implementation_create_arm_simd (pixman_implementation_t *fallback)
 {
     pixman_implementation_t *imp = _pixman_implementation_create (fallback, arm_simd_fast_paths);
 
+    imp->blt = arm_simd_blt;
+    imp->fill = arm_simd_fill;
+
     return imp;
 }
diff --git a/lib/pixman/pixman/pixman-bits-image.c b/lib/pixman/pixman/pixman-bits-image.c
index 085dd1606..75a39a115 100644
--- a/lib/pixman/pixman/pixman-bits-image.c
+++ b/lib/pixman/pixman/pixman-bits-image.c
@@ -413,10 +413,108 @@ bits_image_fetch_pixel_convolution (bits_image_t   *image,
 	}
     }
 
-    satot >>= 16;
-    srtot >>= 16;
-    sgtot >>= 16;
-    sbtot >>= 16;
+    satot = (satot + 0x8000) >> 16;
+    srtot = (srtot + 0x8000) >> 16;
+    sgtot = (sgtot + 0x8000) >> 16;
+    sbtot = (sbtot + 0x8000) >> 16;
+
+    satot = CLIP (satot, 0, 0xff);
+    srtot = CLIP (srtot, 0, 0xff);
+    sgtot = CLIP (sgtot, 0, 0xff);
+    sbtot = CLIP (sbtot, 0, 0xff);
+
+    return ((satot << 24) | (srtot << 16) | (sgtot <<  8) | (sbtot));
+}
+
+static uint32_t
+bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
+                                              pixman_fixed_t x,
+                                              pixman_fixed_t y,
+                                              get_pixel_t    get_pixel)
+{
+    pixman_fixed_t *params = image->common.filter_params;
+    pixman_repeat_t repeat_mode = image->common.repeat;
+    int width = image->width;
+    int height = image->height;
+    int cwidth = pixman_fixed_to_int (params[0]);
+    int cheight = pixman_fixed_to_int (params[1]);
+    int x_phase_bits = pixman_fixed_to_int (params[2]);
+    int y_phase_bits = pixman_fixed_to_int (params[3]);
+    int x_phase_shift = 16 - x_phase_bits;
+    int y_phase_shift = 16 - y_phase_bits;
+    int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
+    int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
+    pixman_fixed_t *y_params;
+    int srtot, sgtot, sbtot, satot;
+    int32_t x1, x2, y1, y2;
+    int32_t px, py;
+    int i, j;
+
+    /* Round x and y to the middle of the closest phase before continuing. This
+     * ensures that the convolution matrix is aligned right, since it was
+     * positioned relative to a particular phase (and not relative to whatever
+     * exact fraction we happen to get here).
+     */
+    x = ((x >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1);
+    y = ((y >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1);
+
+    px = (x & 0xffff) >> x_phase_shift;
+    py = (y & 0xffff) >> y_phase_shift;
+
+    y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight;
+
+    x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
+    y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
+    x2 = x1 + cwidth;
+    y2 = y1 + cheight;
+
+    srtot = sgtot = sbtot = satot = 0;
+
+    for (i = y1; i < y2; ++i)
+    {
+        pixman_fixed_48_16_t fy = *y_params++;
+        pixman_fixed_t *x_params = params + 4 + px * cwidth;
+
+        if (fy)
+        {
+            for (j = x1; j < x2; ++j)
+            {
+                pixman_fixed_t fx = *x_params++;
+		int rx = j;
+		int ry = i;
+
+                if (fx)
+                {
+                    pixman_fixed_t f;
+                    uint32_t pixel;
+
+                    if (repeat_mode != PIXMAN_REPEAT_NONE)
+                    {
+                        repeat (repeat_mode, &rx, width);
+                        repeat (repeat_mode, &ry, height);
+
+                        pixel = get_pixel (image, rx, ry, FALSE);
+                    }
+                    else
+                    {
+                        pixel = get_pixel (image, rx, ry, TRUE);
+		    }
+
+                    f = (fy * fx + 0x8000) >> 16;
+
+                    srtot += (int)RED_8 (pixel) * f;
+                    sgtot += (int)GREEN_8 (pixel) * f;
+                    sbtot += (int)BLUE_8 (pixel) * f;
+                    satot += (int)ALPHA_8 (pixel) * f;
+                }
+            }
+	}
+    }
+
+    satot = (satot + 0x8000) >> 16;
+    srtot = (srtot + 0x8000) >> 16;
+    sgtot = (sgtot + 0x8000) >> 16;
+    sbtot = (sbtot + 0x8000) >> 16;
 
     satot = CLIP (satot, 0, 0xff);
     srtot = CLIP (srtot, 0, 0xff);
@@ -449,6 +547,10 @@ bits_image_fetch_pixel_filtered (bits_image_t *image,
 	return bits_image_fetch_pixel_convolution (image, x, y, get_pixel);
 	break;
 
+    case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
+        return bits_image_fetch_pixel_separable_convolution (image, x, y, get_pixel);
+        break;
+
     default:
         break;
     }
@@ -618,11 +720,155 @@ bits_image_fetch_general (pixman_iter_t  *iter,
     return buffer;
 }
 
-static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
-
 typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x);
 
 static force_inline void
+bits_image_fetch_separable_convolution_affine (pixman_image_t * image,
+					       int              offset,
+					       int              line,
+					       int              width,
+					       uint32_t *       buffer,
+					       const uint32_t * mask,
+
+					       convert_pixel_t	convert_pixel,
+					       pixman_format_code_t	format,
+					       pixman_repeat_t	repeat_mode)
+{
+    bits_image_t *bits = &image->bits;
+    pixman_fixed_t *params = image->common.filter_params;
+    int cwidth = pixman_fixed_to_int (params[0]);
+    int cheight = pixman_fixed_to_int (params[1]);
+    int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
+    int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
+    int x_phase_bits = pixman_fixed_to_int (params[2]);
+    int y_phase_bits = pixman_fixed_to_int (params[3]);
+    int x_phase_shift = 16 - x_phase_bits;
+    int y_phase_shift = 16 - y_phase_bits;
+    pixman_fixed_t vx, vy;
+    pixman_fixed_t ux, uy;
+    pixman_vector_t v;
+    int k;
+
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+
+    if (!pixman_transform_point_3d (image->common.transform, &v))
+	return;
+
+    ux = image->common.transform->matrix[0][0];
+    uy = image->common.transform->matrix[1][0];
+
+    vx = v.vector[0];
+    vy = v.vector[1];
+
+    for (k = 0; k < width; ++k)
+    {
+	pixman_fixed_t *y_params;
+	int satot, srtot, sgtot, sbtot;
+	pixman_fixed_t x, y;
+	int32_t x1, x2, y1, y2;
+	int32_t px, py;
+	int i, j;
+
+	if (mask && !mask[k])
+	    goto next;
+
+	/* Round x and y to the middle of the closest phase before continuing. This
+	 * ensures that the convolution matrix is aligned right, since it was
+	 * positioned relative to a particular phase (and not relative to whatever
+	 * exact fraction we happen to get here).
+	 */
+	x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1);
+	y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1);
+
+	px = (x & 0xffff) >> x_phase_shift;
+	py = (y & 0xffff) >> y_phase_shift;
+
+	x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
+	y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
+	x2 = x1 + cwidth;
+	y2 = y1 + cheight;
+
+	satot = srtot = sgtot = sbtot = 0;
+
+	y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight;
+
+	for (i = y1; i < y2; ++i)
+	{
+	    pixman_fixed_t fy = *y_params++;
+
+	    if (fy)
+	    {
+		pixman_fixed_t *x_params = params + 4 + px * cwidth;
+
+		for (j = x1; j < x2; ++j)
+		{
+		    pixman_fixed_t fx = *x_params++;
+		    int rx = j;
+		    int ry = i;
+		    
+		    if (fx)
+		    {
+			pixman_fixed_t f;
+			uint32_t pixel, mask;
+			uint8_t *row;
+
+			mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+
+			if (repeat_mode != PIXMAN_REPEAT_NONE)
+			{
+			    repeat (repeat_mode, &rx, bits->width);
+			    repeat (repeat_mode, &ry, bits->height);
+
+			    row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry;
+			    pixel = convert_pixel (row, rx) | mask;
+			}
+			else
+			{
+			    if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height)
+			    {
+				pixel = 0;
+			    }
+			    else
+			    {
+				row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry;
+				pixel = convert_pixel (row, rx) | mask;
+			    }
+			}
+
+			f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16;
+			srtot += (int)RED_8 (pixel) * f;
+			sgtot += (int)GREEN_8 (pixel) * f;
+			sbtot += (int)BLUE_8 (pixel) * f;
+			satot += (int)ALPHA_8 (pixel) * f;
+		    }
+		}
+	    }
+	}
+
+	satot = (satot + 0x8000) >> 16;
+	srtot = (srtot + 0x8000) >> 16;
+	sgtot = (sgtot + 0x8000) >> 16;
+	sbtot = (sbtot + 0x8000) >> 16;
+
+	satot = CLIP (satot, 0, 0xff);
+	srtot = CLIP (srtot, 0, 0xff);
+	sgtot = CLIP (sgtot, 0, 0xff);
+	sbtot = CLIP (sbtot, 0, 0xff);
+
+	buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0);
+
+    next:
+	vx += ux;
+	vy += uy;
+    }
+}
+
+static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+static force_inline void
 bits_image_fetch_bilinear_affine (pixman_image_t * image,
 				  int              offset,
 				  int              line,
@@ -868,9 +1114,26 @@ convert_a8 (const uint8_t *row, int x)
 static force_inline uint32_t
 convert_r5g6b5 (const uint8_t *row, int x)
 {
-    return CONVERT_0565_TO_0888 (*((uint16_t *)row + x));
+    return convert_0565_to_0888 (*((uint16_t *)row + x));
 }
 
+#define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode)  \
+    static uint32_t *							\
+    bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t   *iter, \
+							    const uint32_t * mask) \
+    {									\
+	bits_image_fetch_separable_convolution_affine (                 \
+	    iter->image,                                                \
+	    iter->x, iter->y++,                                         \
+	    iter->width,                                                \
+	    iter->buffer, mask,                                         \
+	    convert_ ## format,                                         \
+	    PIXMAN_ ## format,                                          \
+	    repeat_mode);                                               \
+									\
+	return iter->buffer;                                            \
+    }
+
 #define MAKE_BILINEAR_FETCHER(name, format, repeat_mode)		\
     static uint32_t *							\
     bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t   *iter,	\
@@ -903,7 +1166,8 @@ convert_r5g6b5 (const uint8_t *row, int x)
 
 #define MAKE_FETCHERS(name, format, repeat_mode)			\
     MAKE_NEAREST_FETCHER (name, format, repeat_mode)			\
-    MAKE_BILINEAR_FETCHER (name, format, repeat_mode)
+    MAKE_BILINEAR_FETCHER (name, format, repeat_mode)			\
+    MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode)
 
 MAKE_FETCHERS (pad_a8r8g8b8,     a8r8g8b8, PIXMAN_REPEAT_PAD)
 MAKE_FETCHERS (none_a8r8g8b8,    a8r8g8b8, PIXMAN_REPEAT_NONE)
@@ -1153,6 +1417,20 @@ static const fetcher_info_t fetcher_info[] =
      FAST_PATH_AFFINE_TRANSFORM		|				\
      FAST_PATH_NEAREST_FILTER)
 
+#define GENERAL_SEPARABLE_CONVOLUTION_FLAGS				\
+    (FAST_PATH_NO_ALPHA_MAP            |				\
+     FAST_PATH_NO_ACCESSORS            |				\
+     FAST_PATH_HAS_TRANSFORM           |				\
+     FAST_PATH_AFFINE_TRANSFORM        |				\
+     FAST_PATH_SEPARABLE_CONVOLUTION_FILTER)
+    
+#define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)   \
+    { PIXMAN_ ## format,                                               \
+      GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
+      bits_image_fetch_separable_convolution_affine_ ## name,          \
+      _pixman_image_get_scanline_generic_float			       \
+    },
+
 #define BILINEAR_AFFINE_FAST_PATH(name, format, repeat)			\
     { PIXMAN_ ## format,						\
       GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,		\
@@ -1168,6 +1446,7 @@ static const fetcher_info_t fetcher_info[] =
     },
 
 #define AFFINE_FAST_PATHS(name, format, repeat)				\
+    SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)	\
     BILINEAR_AFFINE_FAST_PATH(name, format, repeat)			\
     NEAREST_AFFINE_FAST_PATH(name, format, repeat)
     
diff --git a/lib/pixman/pixman/pixman-combine-float.c b/lib/pixman/pixman/pixman-combine-float.c
index c3d54f025..5ea739f76 100644
--- a/lib/pixman/pixman/pixman-combine-float.c
+++ b/lib/pixman/pixman/pixman-combine-float.c
@@ -201,56 +201,56 @@ get_factor (combine_factor_t factor, float sa, float da)
 	break;
 
     case SA_OVER_DA:
-	if (da == 0.0f)
+	if (FLOAT_IS_ZERO (da))
 	    f = 1.0f;
 	else
 	    f = CLAMP (sa / da);
 	break;
 
     case DA_OVER_SA:
-	if (sa == 0.0f)
+	if (FLOAT_IS_ZERO (sa))
 	    f = 1.0f;
 	else
 	    f = CLAMP (da / sa);
 	break;
 
     case INV_SA_OVER_DA:
-	if (da == 0.0f)
+	if (FLOAT_IS_ZERO (da))
 	    f = 1.0f;
 	else
 	    f = CLAMP ((1.0f - sa) / da);
 	break;
 
     case INV_DA_OVER_SA:
-	if (sa == 0.0f)
+	if (FLOAT_IS_ZERO (sa))
 	    f = 1.0f;
 	else
 	    f = CLAMP ((1.0f - da) / sa);
 	break;
 
     case ONE_MINUS_SA_OVER_DA:
-	if (da == 0.0f)
+	if (FLOAT_IS_ZERO (da))
 	    f = 0.0f;
 	else
 	    f = CLAMP (1.0f - sa / da);
 	break;
 
     case ONE_MINUS_DA_OVER_SA:
-	if (sa == 0.0f)
+	if (FLOAT_IS_ZERO (sa))
 	    f = 0.0f;
 	else
 	    f = CLAMP (1.0f - da / sa);
 	break;
 
     case ONE_MINUS_INV_DA_OVER_SA:
-	if (sa == 0.0f)
+	if (FLOAT_IS_ZERO (sa))
 	    f = 0.0f;
 	else
 	    f = CLAMP (1.0f - (1.0f - da) / sa);
 	break;
 
     case ONE_MINUS_INV_SA_OVER_DA:
-	if (da == 0.0f)
+	if (FLOAT_IS_ZERO (da))
 	    f = 0.0f;
 	else
 	    f = CLAMP (1.0f - (1.0f - sa) / da);
@@ -403,11 +403,11 @@ blend_lighten (float sa, float s, float da, float d)
 static force_inline float
 blend_color_dodge (float sa, float s, float da, float d)
 {
-    if (d == 0.0f)
+    if (FLOAT_IS_ZERO (d))
 	return 0.0f;
     else if (d * sa >= sa * da - s * da)
 	return sa * da;
-    else if (sa - s == 0.0f)
+    else if (FLOAT_IS_ZERO (sa - s))
 	return sa * da;
     else
 	return sa * sa * d / (sa - s);
@@ -420,7 +420,7 @@ blend_color_burn (float sa, float s, float da, float d)
 	return sa * da;
     else if (sa * (da - d) >= s * da)
 	return 0.0f;
-    else if (s == 0.0f)
+    else if (FLOAT_IS_ZERO (s))
 	return 0.0f;
     else
 	return sa * (da - sa * (da - d) / s);
@@ -440,14 +440,14 @@ blend_soft_light (float sa, float s, float da, float d)
 {
     if (2 * s < sa)
     {
-	if (da == 0.0f)
+	if (FLOAT_IS_ZERO (da))
 	    return d * sa;
 	else
 	    return d * sa - d * (da - d) * (sa - 2 * s) / da;
     }
     else
     {
-	if (da == 0.0f)
+	if (FLOAT_IS_ZERO (da))
 	{
 	    return 0.0f;
 	}
@@ -651,10 +651,12 @@ clip_color (rgb_t *color, float a)
     float l = get_lum (color);
     float n = channel_min (color);
     float x = channel_max (color);
+    float t;
 
     if (n < 0.0f)
     {
-	if ((l - n) < 4 * FLT_EPSILON)
+	t = l - n;
+	if (FLOAT_IS_ZERO (t))
 	{
 	    color->r = 0.0f;
 	    color->g = 0.0f;
@@ -662,14 +664,15 @@ clip_color (rgb_t *color, float a)
 	}
 	else
 	{
-	    color->r = l + (((color->r - l) * l) / (l - n));
-	    color->g = l + (((color->g - l) * l) / (l - n));
-	    color->b = l + (((color->b - l) * l) / (l - n));
+	    color->r = l + (((color->r - l) * l) / t);
+	    color->g = l + (((color->g - l) * l) / t);
+	    color->b = l + (((color->b - l) * l) / t);
 	}
     }
     if (x > a)
     {
-	if ((x - l) < 4 * FLT_EPSILON)
+	t = x - l;
+	if (FLOAT_IS_ZERO (t))
 	{
 	    color->r = a;
 	    color->g = a;
@@ -677,9 +680,9 @@ clip_color (rgb_t *color, float a)
 	}
 	else
 	{
-	    color->r = l + (((color->r - l) * (a - l) / (x - l)));
-	    color->g = l + (((color->g - l) * (a - l) / (x - l)));
-	    color->b = l + (((color->b - l) * (a - l) / (x - l)));
+	    color->r = l + (((color->r - l) * (a - l) / t));
+	    color->g = l + (((color->g - l) * (a - l) / t));
+	    color->b = l + (((color->b - l) * (a - l) / t));
 	}
     }
 }
@@ -700,6 +703,7 @@ static void
 set_sat (rgb_t *src, float sat)
 {
     float *max, *mid, *min;
+    float t;
 
     if (src->r > src->g)
     {
@@ -750,14 +754,16 @@ set_sat (rgb_t *src, float sat)
 	}
     }
 
-    if (*max > *min)
+    t = *max - *min;
+
+    if (FLOAT_IS_ZERO (t))
     {
-	*mid = (((*mid - *min) * sat) / (*max - *min));
-	*max = sat;
+	*mid = *max = 0.0f;
     }
     else
     {
-	*mid = *max = 0.0f;
+	*mid = ((*mid - *min) * sat) / t;
+	*max = sat;
     }
 
     *min = 0.0f;
diff --git a/lib/pixman/pixman/pixman-combine32.c b/lib/pixman/pixman/pixman-combine32.c
index 54cc8771b..3ac7576bd 100644
--- a/lib/pixman/pixman/pixman-combine32.c
+++ b/lib/pixman/pixman/pixman-combine32.c
@@ -196,14 +196,58 @@ combine_over_u (pixman_implementation_t *imp,
 {
     int i;
 
-    for (i = 0; i < width; ++i)
+    if (!mask)
     {
-	uint32_t s = combine_mask (src, mask, i);
-	uint32_t d = *(dest + i);
-	uint32_t ia = ALPHA_8 (~s);
-
-	UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
-	*(dest + i) = d;
+	for (i = 0; i < width; ++i)
+	{
+	    uint32_t s = *(src + i);
+	    uint32_t a = ALPHA_8 (s);
+	    if (a == 0xFF)
+	    {
+		*(dest + i) = s;
+	    }
+	    else if (s)
+	    {
+		uint32_t d = *(dest + i);
+		uint32_t ia = a ^ 0xFF;
+		UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+		*(dest + i) = d;
+	    }
+	}
+    }
+    else
+    {
+	for (i = 0; i < width; ++i)
+	{
+	    uint32_t m = ALPHA_8 (*(mask + i));
+	    if (m == 0xFF)
+	    {
+		uint32_t s = *(src + i);
+		uint32_t a = ALPHA_8 (s);
+		if (a == 0xFF)
+		{
+		    *(dest + i) = s;
+		}
+		else if (s)
+		{
+		    uint32_t d = *(dest + i);
+		    uint32_t ia = a ^ 0xFF;
+		    UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+		    *(dest + i) = d;
+		}
+	    }
+	    else if (m)
+	    {
+		uint32_t s = *(src + i);
+		if (s)
+		{
+		    uint32_t d = *(dest + i);
+		    UN8x4_MUL_UN8 (s, m);
+		    UN8x4_MUL_UN8_ADD_UN8x4 (d, ALPHA_8 (~s), s);
+		    *(dest + i) = d;
+		}
+	    }
+	}
     }
 }
 
diff --git a/lib/pixman/pixman/pixman-combine32.h b/lib/pixman/pixman/pixman-combine32.h
index 875dde3cf..cdd56a61a 100644
--- a/lib/pixman/pixman/pixman-combine32.h
+++ b/lib/pixman/pixman/pixman-combine32.h
@@ -20,6 +20,47 @@
 #define BLUE_8(x) ((x) & MASK)
 
 /*
+ * ARMv6 has UQADD8 instruction, which implements unsigned saturated
+ * addition for 8-bit values packed in 32-bit registers. It is very useful
+ * for UN8x4_ADD_UN8x4, UN8_rb_ADD_UN8_rb and ADD_UN8 macros (which would
+ * otherwise need a lot of arithmetic operations to simulate this operation).
+ * Since most of the major ARM linux distros are built for ARMv7, we are
+ * much less dependent on runtime CPU detection and can get practical
+ * benefits from conditional compilation here for a lot of users.
+ */
+
+#if defined(USE_GCC_INLINE_ASM) && defined(__arm__) && \
+    !defined(__aarch64__) && (!defined(__thumb__) || defined(__thumb2__))
+#if defined(__ARM_ARCH_6__)   || defined(__ARM_ARCH_6J__)  || \
+    defined(__ARM_ARCH_6K__)  || defined(__ARM_ARCH_6Z__)  || \
+    defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \
+    defined(__ARM_ARCH_6M__)  || defined(__ARM_ARCH_7__)   || \
+    defined(__ARM_ARCH_7A__)  || defined(__ARM_ARCH_7R__)  || \
+    defined(__ARM_ARCH_7M__)  || defined(__ARM_ARCH_7EM__)
+
+static force_inline uint32_t
+un8x4_add_un8x4 (uint32_t x, uint32_t y)
+{
+    uint32_t t;
+    asm ("uqadd8 %0, %1, %2" : "=r" (t) : "%r" (x), "r" (y));
+    return t;
+}
+
+#define UN8x4_ADD_UN8x4(x, y) \
+    ((x) = un8x4_add_un8x4 ((x), (y)))
+
+#define UN8_rb_ADD_UN8_rb(x, y, t) \
+    ((t) = un8x4_add_un8x4 ((x), (y)), (x) = (t))
+
+#define ADD_UN8(x, y, t) \
+    ((t) = (x), un8x4_add_un8x4 ((t), (y)))
+
+#endif
+#endif
+
+/*****************************************************************************/
+
+/*
  * Helper macros.
  */
 
@@ -29,9 +70,11 @@
 #define DIV_UN8(a, b)							\
     (((uint16_t) (a) * MASK + ((b) / 2)) / (b))
 
+#ifndef ADD_UN8
 #define ADD_UN8(x, y, t)				     \
     ((t) = (x) + (y),					     \
      (uint32_t) (uint8_t) ((t) | (0 - ((t) >> G_SHIFT))))
+#endif
 
 #define DIV_ONE_UN8(x)							\
     (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT)
@@ -56,6 +99,7 @@
 /*
  * x_rb = min (x_rb + y_rb, 255)
  */
+#ifndef UN8_rb_ADD_UN8_rb
 #define UN8_rb_ADD_UN8_rb(x, y, t)					\
     do									\
     {									\
@@ -63,6 +107,7 @@
 	t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);		\
 	x = (t & RB_MASK);						\
     } while (0)
+#endif
 
 /*
  * x_rb = (x_rb * a_rb) / 255
@@ -208,6 +253,7 @@
 /*
   x_c = min(x_c + y_c, 255)
 */
+#ifndef UN8x4_ADD_UN8x4
 #define UN8x4_ADD_UN8x4(x, y)						\
     do									\
     {									\
@@ -223,3 +269,4 @@
 									\
 	x = r1__ | (r2__ << G_SHIFT);					\
     } while (0)
+#endif
diff --git a/lib/pixman/pixman/pixman-compiler.h b/lib/pixman/pixman/pixman-compiler.h
index a978accfd..9b190b422 100644
--- a/lib/pixman/pixman/pixman-compiler.h
+++ b/lib/pixman/pixman/pixman-compiler.h
@@ -19,6 +19,12 @@
 #endif
 
 #if defined (__GNUC__)
+#  define unlikely(expr) __builtin_expect ((expr), 0)
+#else
+#  define unlikely(expr)  (expr)
+#endif
+
+#if defined (__GNUC__)
 #  define MAYBE_UNUSED  __attribute__((unused))
 #else
 #  define MAYBE_UNUSED
@@ -56,6 +62,10 @@
 # define INT64_MAX              (9223372036854775807)
 #endif
 
+#ifndef SIZE_MAX
+# define SIZE_MAX               ((size_t)-1)
+#endif
+
 
 #ifndef M_PI
 # define M_PI			3.14159265358979323846
diff --git a/lib/pixman/pixman/pixman-edge.c b/lib/pixman/pixman/pixman-edge.c
index 8d498ab44..ad6dfc4cf 100644
--- a/lib/pixman/pixman/pixman-edge.c
+++ b/lib/pixman/pixman/pixman-edge.c
@@ -374,6 +374,7 @@ pixman_rasterize_edges (pixman_image_t *image,
                         pixman_fixed_t  b)
 {
     return_if_fail (image->type == BITS);
+    return_if_fail (PIXMAN_FORMAT_TYPE (image->bits.format) == PIXMAN_TYPE_A);
     
     if (image->bits.read_func || image->bits.write_func)
 	pixman_rasterize_edges_accessors (image, l, r, t, b);
diff --git a/lib/pixman/pixman/pixman-fast-path.c b/lib/pixman/pixman/pixman-fast-path.c
index d95cb4dee..247aea645 100644
--- a/lib/pixman/pixman/pixman-fast-path.c
+++ b/lib/pixman/pixman/pixman-fast-path.c
@@ -35,7 +35,7 @@
 static force_inline uint32_t
 fetch_24 (uint8_t *a)
 {
-    if (((unsigned long)a) & 1)
+    if (((uintptr_t)a) & 1)
     {
 #ifdef WORDS_BIGENDIAN
 	return (*a << 16) | (*(uint16_t *)(a + 1));
@@ -57,7 +57,7 @@ static force_inline void
 store_24 (uint8_t *a,
           uint32_t v)
 {
-    if (((unsigned long)a) & 1)
+    if (((uintptr_t)a) & 1)
     {
 #ifdef WORDS_BIGENDIAN
 	*a = (uint8_t) (v >> 16);
@@ -507,15 +507,15 @@ fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
 		else
 		{
 		    d = *dst;
-		    d = over (src, CONVERT_0565_TO_0888 (d));
+		    d = over (src, convert_0565_to_0888 (d));
 		}
-		*dst = CONVERT_8888_TO_0565 (d);
+		*dst = convert_8888_to_0565 (d);
 	    }
 	    else if (m)
 	    {
 		d = *dst;
-		d = over (in (src, m), CONVERT_0565_TO_0888 (d));
-		*dst = CONVERT_8888_TO_0565 (d);
+		d = over (in (src, m), convert_0565_to_0888 (d));
+		*dst = convert_8888_to_0565 (d);
 	    }
 	    dst++;
 	}
@@ -541,7 +541,7 @@ fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
     if (src == 0)
 	return;
 
-    src16 = CONVERT_8888_TO_0565 (src);
+    src16 = convert_8888_to_0565 (src);
 
     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
@@ -566,14 +566,14 @@ fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 		else
 		{
 		    d = *dst;
-		    d = over (src, CONVERT_0565_TO_0888 (d));
-		    *dst = CONVERT_8888_TO_0565 (d);
+		    d = over (src, convert_0565_to_0888 (d));
+		    *dst = convert_8888_to_0565 (d);
 		}
 	    }
 	    else if (ma)
 	    {
 		d = *dst;
-		d = CONVERT_0565_TO_0888 (d);
+		d = convert_0565_to_0888 (d);
 
 		s = src;
 
@@ -582,7 +582,7 @@ fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 		ma = ~ma;
 		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
 
-		*dst = CONVERT_8888_TO_0565 (d);
+		*dst = convert_8888_to_0565 (d);
 	    }
 	    dst++;
 	}
@@ -729,9 +729,9 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp,
 		else
 		{
 		    d = *dst;
-		    d = over (s, CONVERT_0565_TO_0888 (d));
+		    d = over (s, convert_0565_to_0888 (d));
 		}
-		*dst = CONVERT_8888_TO_0565 (d);
+		*dst = convert_8888_to_0565 (d);
 	    }
 	    dst++;
 	}
@@ -739,36 +739,6 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp,
 }
 
 static void
-fast_composite_src_x888_0565 (pixman_implementation_t *imp,
-                              pixman_composite_info_t *info)
-{
-    PIXMAN_COMPOSITE_ARGS (info);
-    uint16_t    *dst_line, *dst;
-    uint32_t    *src_line, *src, s;
-    int dst_stride, src_stride;
-    int32_t w;
-
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
-    while (height--)
-    {
-	dst = dst_line;
-	dst_line += dst_stride;
-	src = src_line;
-	src_line += src_stride;
-	w = width;
-
-	while (w--)
-	{
-	    s = *src++;
-	    *dst = CONVERT_8888_TO_0565 (s);
-	    dst++;
-	}
-    }
-}
-
-static void
 fast_composite_add_8_8 (pixman_implementation_t *imp,
 			pixman_composite_info_t *info)
 {
@@ -838,13 +808,13 @@ fast_composite_add_0565_0565 (pixman_implementation_t *imp,
 	    if (s)
 	    {
 		d = *dst;
-		s = CONVERT_0565_TO_8888 (s);
+		s = convert_0565_to_8888 (s);
 		if (d)
 		{
-		    d = CONVERT_0565_TO_8888 (d);
+		    d = convert_0565_to_8888 (d);
 		    UN8x4_ADD_UN8x4 (s, d);
 		}
-		*dst = CONVERT_8888_TO_0565 (s);
+		*dst = convert_8888_to_0565 (s);
 	    }
 	    dst++;
 	}
@@ -1094,7 +1064,7 @@ fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
 
     if (srca == 0xff)
     {
-	src565 = CONVERT_8888_TO_0565 (src);
+	src565 = convert_8888_to_0565 (src);
 	while (height--)
 	{
 	    dst = dst_line;
@@ -1142,8 +1112,8 @@ fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
 		}
 		if (bitcache & bitmask)
 		{
-		    d = over (src, CONVERT_0565_TO_0888 (*dst));
-		    *dst = CONVERT_8888_TO_0565 (d);
+		    d = over (src, convert_0565_to_0888 (*dst));
+		    *dst = convert_8888_to_0565 (d);
 		}
 		bitmask = UPDATE_BITMASK (bitmask);
 		dst++;
@@ -1176,7 +1146,7 @@ fast_composite_solid_fill (pixman_implementation_t *imp,
     else if (dest_image->bits.format == PIXMAN_r5g6b5 ||
              dest_image->bits.format == PIXMAN_b5g6r5)
     {
-	src = CONVERT_8888_TO_0565 (src);
+	src = convert_8888_to_0565 (src);
     }
 
     pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
@@ -1243,6 +1213,18 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp,
     pixman_composite_func_t func;
     pixman_format_code_t mask_format;
     uint32_t src_flags, mask_flags;
+    int32_t sx, sy;
+    int32_t width_remain;
+    int32_t num_pixels;
+    int32_t src_width;
+    int32_t i, j;
+    pixman_image_t extended_src_image;
+    uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
+    pixman_bool_t need_src_extension;
+    uint32_t *src_line;
+    int32_t src_stride;
+    int32_t src_bpp;
+    pixman_composite_info_t info2 = *info;
 
     src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
 		    FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
@@ -1258,149 +1240,131 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp,
 	mask_flags = FAST_PATH_IS_OPAQUE;
     }
 
-    if (_pixman_implementation_lookup_composite (
-	    imp->toplevel, info->op,
-	    src_image->common.extended_format_code, src_flags,
-	    mask_format, mask_flags,
-	    dest_image->common.extended_format_code, info->dest_flags,
-	    &imp, &func))
+    _pixman_implementation_lookup_composite (
+	imp->toplevel, info->op,
+	src_image->common.extended_format_code, src_flags,
+	mask_format, mask_flags,
+	dest_image->common.extended_format_code, info->dest_flags,
+	&imp, &func);
+
+    src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
+
+    if (src_image->bits.width < REPEAT_MIN_WIDTH		&&
+	(src_bpp == 32 || src_bpp == 16 || src_bpp == 8)	&&
+	!src_image->bits.indexed)
     {
-	int32_t sx, sy;
-	int32_t width_remain;
-	int32_t num_pixels;
-	int32_t src_width;
-	int32_t i, j;
-	pixman_image_t extended_src_image;
-	uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
-	pixman_bool_t need_src_extension;
-	uint32_t *src_line;
-	int32_t src_stride;
-	int32_t src_bpp;
-	pixman_composite_info_t info2 = *info;
-
-	src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
-
-	if (src_image->bits.width < REPEAT_MIN_WIDTH		&&
-	    (src_bpp == 32 || src_bpp == 16 || src_bpp == 8)	&&
-	    !src_image->bits.indexed)
-	{
-	    sx = src_x;
-	    sx = MOD (sx, src_image->bits.width);
-	    sx += width;
-	    src_width = 0;
+	sx = src_x;
+	sx = MOD (sx, src_image->bits.width);
+	sx += width;
+	src_width = 0;
 
-	    while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
-		src_width += src_image->bits.width;
+	while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
+	    src_width += src_image->bits.width;
 
-	    src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
+	src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
 
-	    /* Initialize/validate stack-allocated temporary image */
-	    _pixman_bits_image_init (&extended_src_image, src_image->bits.format,
-				     src_width, 1, &extended_src[0], src_stride,
-				     FALSE);
-	    _pixman_image_validate (&extended_src_image);
+	/* Initialize/validate stack-allocated temporary image */
+	_pixman_bits_image_init (&extended_src_image, src_image->bits.format,
+				 src_width, 1, &extended_src[0], src_stride,
+				 FALSE);
+	_pixman_image_validate (&extended_src_image);
 
-	    info2.src_image = &extended_src_image;
-	    need_src_extension = TRUE;
-	}
-	else
-	{
-	    src_width = src_image->bits.width;
-	    need_src_extension = FALSE;
-	}
+	info2.src_image = &extended_src_image;
+	need_src_extension = TRUE;
+    }
+    else
+    {
+	src_width = src_image->bits.width;
+	need_src_extension = FALSE;
+    }
 
-	sx = src_x;
-	sy = src_y;
+    sx = src_x;
+    sy = src_y;
 
-	while (--height >= 0)
-	{
-	    sx = MOD (sx, src_width);
-	    sy = MOD (sy, src_image->bits.height);
+    while (--height >= 0)
+    {
+	sx = MOD (sx, src_width);
+	sy = MOD (sy, src_image->bits.height);
 
-	    if (need_src_extension)
+	if (need_src_extension)
+	{
+	    if (src_bpp == 32)
 	    {
-		if (src_bpp == 32)
-		{
-		    PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
+		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
 
-		    for (i = 0; i < src_width; )
-		    {
-			for (j = 0; j < src_image->bits.width; j++, i++)
-			    extended_src[i] = src_line[j];
-		    }
-		}
-		else if (src_bpp == 16)
+		for (i = 0; i < src_width; )
 		{
-		    uint16_t *src_line_16;
-
-		    PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
-					   src_line_16, 1);
-		    src_line = (uint32_t*)src_line_16;
-
-		    for (i = 0; i < src_width; )
-		    {
-			for (j = 0; j < src_image->bits.width; j++, i++)
-			    ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
-		    }
+		    for (j = 0; j < src_image->bits.width; j++, i++)
+			extended_src[i] = src_line[j];
 		}
-		else if (src_bpp == 8)
-		{
-		    uint8_t *src_line_8;
+	    }
+	    else if (src_bpp == 16)
+	    {
+		uint16_t *src_line_16;
 
-		    PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
-					   src_line_8, 1);
-		    src_line = (uint32_t*)src_line_8;
+		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
+				       src_line_16, 1);
+		src_line = (uint32_t*)src_line_16;
 
-		    for (i = 0; i < src_width; )
-		    {
-			for (j = 0; j < src_image->bits.width; j++, i++)
-			    ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
-		    }
+		for (i = 0; i < src_width; )
+		{
+		    for (j = 0; j < src_image->bits.width; j++, i++)
+			((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
 		}
-
-		info2.src_y = 0;
 	    }
-	    else
+	    else if (src_bpp == 8)
 	    {
-		info2.src_y = sy;
+		uint8_t *src_line_8;
+
+		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
+				       src_line_8, 1);
+		src_line = (uint32_t*)src_line_8;
+
+		for (i = 0; i < src_width; )
+		{
+		    for (j = 0; j < src_image->bits.width; j++, i++)
+			((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
+		}
 	    }
 
-	    width_remain = width;
+	    info2.src_y = 0;
+	}
+	else
+	{
+	    info2.src_y = sy;
+	}
 
-	    while (width_remain > 0)
-	    {
-		num_pixels = src_width - sx;
+	width_remain = width;
 
-		if (num_pixels > width_remain)
-		    num_pixels = width_remain;
+	while (width_remain > 0)
+	{
+	    num_pixels = src_width - sx;
 
-		info2.src_x = sx;
-		info2.width = num_pixels;
-		info2.height = 1;
+	    if (num_pixels > width_remain)
+		num_pixels = width_remain;
 
-		func (imp, &info2);
+	    info2.src_x = sx;
+	    info2.width = num_pixels;
+	    info2.height = 1;
 
-		width_remain -= num_pixels;
-		info2.mask_x += num_pixels;
-		info2.dest_x += num_pixels;
-		sx = 0;
-	    }
+	    func (imp, &info2);
 
-	    sx = src_x;
-	    sy++;
-	    info2.mask_x = info->mask_x;
-	    info2.mask_y++;
-	    info2.dest_x = info->dest_x;
-	    info2.dest_y++;
+	    width_remain -= num_pixels;
+	    info2.mask_x += num_pixels;
+	    info2.dest_x += num_pixels;
+	    sx = 0;
 	}
 
-	if (need_src_extension)
-	    _pixman_image_fini (&extended_src_image);
-    }
-    else
-    {
-	_pixman_log_error (FUNC, "Didn't find a suitable function ");
+	sx = src_x;
+	sy++;
+	info2.mask_x = info->mask_x;
+	info2.mask_y++;
+	info2.dest_x = info->dest_x;
+	info2.dest_y++;
     }
+
+    if (need_src_extension)
+	_pixman_image_fini (&extended_src_image);
 }
 
 /* Use more unrolling for src_0565_0565 because it is typically CPU bound */
@@ -1913,10 +1877,6 @@ static const pixman_fast_path_t c_fast_paths[] =
     PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
     PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
     PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
-    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
-    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
-    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
-    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
     PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
     PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
 
@@ -2067,12 +2027,12 @@ pixman_fill1 (uint32_t *bits,
               int       y,
               int       width,
               int       height,
-              uint32_t  xor)
+              uint32_t  filler)
 {
     uint32_t *dst = bits + y * stride + (x >> 5);
     int offs = x & 31;
 
-    if (xor & 1)
+    if (filler & 1)
     {
 	while (height--)
 	{
@@ -2097,11 +2057,11 @@ pixman_fill8 (uint32_t *bits,
               int       y,
               int       width,
               int       height,
-              uint32_t xor)
+              uint32_t  filler)
 {
     int byte_stride = stride * (int) sizeof (uint32_t);
     uint8_t *dst = (uint8_t *) bits;
-    uint8_t v = xor & 0xff;
+    uint8_t v = filler & 0xff;
     int i;
 
     dst = dst + y * byte_stride + x;
@@ -2122,12 +2082,12 @@ pixman_fill16 (uint32_t *bits,
                int       y,
                int       width,
                int       height,
-               uint32_t xor)
+               uint32_t  filler)
 {
     int short_stride =
 	(stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
     uint16_t *dst = (uint16_t *)bits;
-    uint16_t v = xor & 0xffff;
+    uint16_t v = filler & 0xffff;
     int i;
 
     dst = dst + y * short_stride + x;
@@ -2148,7 +2108,7 @@ pixman_fill32 (uint32_t *bits,
                int       y,
                int       width,
                int       height,
-               uint32_t  xor)
+               uint32_t  filler)
 {
     int i;
 
@@ -2157,7 +2117,7 @@ pixman_fill32 (uint32_t *bits,
     while (height--)
     {
 	for (i = 0; i < width; ++i)
-	    bits[i] = xor;
+	    bits[i] = filler;
 
 	bits += stride;
     }
@@ -2172,24 +2132,24 @@ fast_path_fill (pixman_implementation_t *imp,
                 int                      y,
                 int                      width,
                 int                      height,
-                uint32_t		 xor)
+                uint32_t		 filler)
 {
     switch (bpp)
     {
     case 1:
-	pixman_fill1 (bits, stride, x, y, width, height, xor);
+	pixman_fill1 (bits, stride, x, y, width, height, filler);
 	break;
 
     case 8:
-	pixman_fill8 (bits, stride, x, y, width, height, xor);
+	pixman_fill8 (bits, stride, x, y, width, height, filler);
 	break;
 
     case 16:
-	pixman_fill16 (bits, stride, x, y, width, height, xor);
+	pixman_fill16 (bits, stride, x, y, width, height, filler);
 	break;
 
     case 32:
-	pixman_fill32 (bits, stride, x, y, width, height, xor);
+	pixman_fill32 (bits, stride, x, y, width, height, filler);
 	break;
 
     default:
@@ -2199,12 +2159,200 @@ fast_path_fill (pixman_implementation_t *imp,
     return TRUE;
 }
 
+/*****************************************************************************/
+
+static uint32_t *
+fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
+{
+    int32_t w = iter->width;
+    uint32_t *dst = iter->buffer;
+    const uint16_t *src = (const uint16_t *)iter->bits;
+
+    iter->bits += iter->stride;
+
+    /* Align the source buffer at 4 bytes boundary */
+    if (w > 0 && ((uintptr_t)src & 3))
+    {
+	*dst++ = convert_0565_to_8888 (*src++);
+	w--;
+    }
+    /* Process two pixels per iteration */
+    while ((w -= 2) >= 0)
+    {
+	uint32_t sr, sb, sg, t0, t1;
+	uint32_t s = *(const uint32_t *)src;
+	src += 2;
+	sr = (s >> 8) & 0x00F800F8;
+	sb = (s << 3) & 0x00F800F8;
+	sg = (s >> 3) & 0x00FC00FC;
+	sr |= sr >> 5;
+	sb |= sb >> 5;
+	sg |= sg >> 6;
+	t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
+	     (sb & 0xFF) | 0xFF000000;
+	t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
+	     (sb >> 16) | 0xFF000000;
+#ifdef WORDS_BIGENDIAN
+	*dst++ = t1;
+	*dst++ = t0;
+#else
+	*dst++ = t0;
+	*dst++ = t1;
+#endif
+    }
+    if (w & 1)
+    {
+	*dst = convert_0565_to_8888 (*src);
+    }
+
+    return iter->buffer;
+}
+
+static uint32_t *
+fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
+{
+    iter->bits += iter->stride;
+    return iter->buffer;
+}
+
+/* Helper function for a workaround, which tries to ensure that 0x1F001F
+ * constant is always allocated in a register on RISC architectures.
+ */
+static force_inline uint32_t
+convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F)
+{
+    uint32_t a, b;
+    a = (s >> 3) & x1F001F;
+    b = s & 0xFC00;
+    a |= a >> 5;
+    a |= b >> 5;
+    return a;
+}
+
+static void
+fast_write_back_r5g6b5 (pixman_iter_t *iter)
+{
+    int32_t w = iter->width;
+    uint16_t *dst = (uint16_t *)(iter->bits - iter->stride);
+    const uint32_t *src = iter->buffer;
+    /* Workaround to ensure that x1F001F variable is allocated in a register */
+    static volatile uint32_t volatile_x1F001F = 0x1F001F;
+    uint32_t x1F001F = volatile_x1F001F;
+
+    while ((w -= 4) >= 0)
+    {
+	uint32_t s1 = *src++;
+	uint32_t s2 = *src++;
+	uint32_t s3 = *src++;
+	uint32_t s4 = *src++;
+	*dst++ = convert_8888_to_0565_workaround (s1, x1F001F);
+	*dst++ = convert_8888_to_0565_workaround (s2, x1F001F);
+	*dst++ = convert_8888_to_0565_workaround (s3, x1F001F);
+	*dst++ = convert_8888_to_0565_workaround (s4, x1F001F);
+    }
+    if (w & 2)
+    {
+	*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
+	*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
+    }
+    if (w & 1)
+    {
+	*dst = convert_8888_to_0565_workaround (*src, x1F001F);
+    }
+}
+
+typedef struct
+{
+    pixman_format_code_t	format;
+    pixman_iter_get_scanline_t	get_scanline;
+    pixman_iter_write_back_t	write_back;
+} fetcher_info_t;
+
+static const fetcher_info_t fetchers[] =
+{
+    { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
+    { PIXMAN_null }
+};
+
+static pixman_bool_t
+fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+    pixman_image_t *image = iter->image;
+
+#define FLAGS								\
+    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |		\
+     FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+
+    if ((iter->iter_flags & ITER_NARROW)			&&
+	(iter->image_flags & FLAGS) == FLAGS)
+    {
+	const fetcher_info_t *f;
+
+	for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+	{
+	    if (image->common.extended_format_code == f->format)
+	    {
+		uint8_t *b = (uint8_t *)image->bits.bits;
+		int s = image->bits.rowstride * 4;
+
+		iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
+		iter->stride = s;
+
+		iter->get_scanline = f->get_scanline;
+		return TRUE;
+	    }
+	}
+    }
+
+    return FALSE;
+}
+
+static pixman_bool_t
+fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+    pixman_image_t *image = iter->image;
+
+    if ((iter->iter_flags & ITER_NARROW)		&&
+	(iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS)
+    {
+	const fetcher_info_t *f;
+
+	for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+	{
+	    if (image->common.extended_format_code == f->format)
+	    {
+		uint8_t *b = (uint8_t *)image->bits.bits;
+		int s = image->bits.rowstride * 4;
+
+		iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
+		iter->stride = s;
+
+		if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
+		    (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
+		{
+		    iter->get_scanline = fast_dest_fetch_noop;
+		}
+		else
+		{
+		    iter->get_scanline = f->get_scanline;
+		}
+		iter->write_back = f->write_back;
+		return TRUE;
+	    }
+	}
+    }
+    return FALSE;
+}
+
+
 pixman_implementation_t *
 _pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
 {
     pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
 
     imp->fill = fast_path_fill;
+    imp->src_iter_init = fast_src_iter_init;
+    imp->dest_iter_init = fast_dest_iter_init;
 
     return imp;
 }
diff --git a/lib/pixman/pixman/pixman-filter.c b/lib/pixman/pixman/pixman-filter.c
new file mode 100644
index 000000000..26b39d571
--- /dev/null
+++ b/lib/pixman/pixman/pixman-filter.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright 2012, Red Hat, Inc.
+ * Copyright 2012, Soren Sandmann
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Soren Sandmann <soren.sandmann@gmail.com>
+ */
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+#include <config.h>
+#include "pixman-private.h"
+
+typedef double (* kernel_func_t) (double x);
+
+typedef struct
+{
+    pixman_kernel_t	kernel;
+    kernel_func_t	func;
+    double		width;
+} filter_info_t;
+
+static double
+impulse_kernel (double x)
+{
+    return (x == 0.0)? 1.0 : 0.0;
+}
+
+static double
+box_kernel (double x)
+{
+    return 1;
+}
+
+static double
+linear_kernel (double x)
+{
+    return 1 - fabs (x);
+}
+
+static double
+gaussian_kernel (double x)
+{
+#define SQRT2 (1.4142135623730950488016887242096980785696718753769480)
+#define SIGMA (SQRT2 / 2.0)
+    
+    return exp (- x * x / (2 * SIGMA * SIGMA)) / (SIGMA * sqrt (2.0 * M_PI));
+}
+
+static double
+sinc (double x)
+{
+    if (x == 0.0)
+	return 1.0;
+    else
+	return sin (M_PI * x) / (M_PI * x);
+}
+
+static double
+lanczos (double x, int n)
+{
+    return sinc (x) * sinc (x * (1.0 / n));
+}
+
+static double
+lanczos2_kernel (double x)
+{
+    return lanczos (x, 2);
+}
+
+static double
+lanczos3_kernel (double x)
+{
+    return lanczos (x, 3);
+}
+
+static double
+nice_kernel (double x)
+{
+    return lanczos3_kernel (x * 0.75);
+}
+
+static double
+general_cubic (double x, double B, double C)
+{
+    double ax = fabs(x);
+
+    if (ax < 1)
+    {
+	return ((12 - 9 * B - 6 * C) * ax * ax * ax +
+		(-18 + 12 * B + 6 * C) * ax * ax + (6 - 2 * B)) / 6;
+    }
+    else if (ax >= 1 && ax < 2)
+    {
+	return ((-B - 6 * C) * ax * ax * ax +
+		(6 * B + 30 * C) * ax * ax + (-12 * B - 48 * C) *
+		ax + (8 * B + 24 * C)) / 6;
+    }
+    else
+    {
+	return 0;
+    }
+}
+
+static double
+cubic_kernel (double x)
+{
+    /* This is the Mitchell-Netravali filter.
+     *
+     * (0.0, 0.5) would give us the Catmull-Rom spline,
+     * but that one seems to be indistinguishable from Lanczos2.
+     */
+    return general_cubic (x, 1/3.0, 1/3.0);
+}
+
+static const filter_info_t filters[] =
+{
+    { PIXMAN_KERNEL_IMPULSE,	        impulse_kernel,   0.0 },
+    { PIXMAN_KERNEL_BOX,	        box_kernel,       1.0 },
+    { PIXMAN_KERNEL_LINEAR,	        linear_kernel,    2.0 },
+    { PIXMAN_KERNEL_CUBIC,		cubic_kernel,     4.0 },
+    { PIXMAN_KERNEL_GAUSSIAN,	        gaussian_kernel,  6 * SIGMA },
+    { PIXMAN_KERNEL_LANCZOS2,	        lanczos2_kernel,  4.0 },
+    { PIXMAN_KERNEL_LANCZOS3,	        lanczos3_kernel,  6.0 },
+    { PIXMAN_KERNEL_LANCZOS3_STRETCHED, nice_kernel,      8.0 },
+};
+
+/* This function scales @kernel2 by @scale, then
+ * aligns @x1 in @kernel1 with @x2 in @kernel2 and
+ * and integrates the product of the kernels across @width.
+ *
+ * This function assumes that the intervals are within
+ * the kernels in question. E.g., the caller must not
+ * try to integrate a linear kernel ouside of [-1:1]
+ */
+static double
+integral (pixman_kernel_t kernel1, double x1,
+	  pixman_kernel_t kernel2, double scale, double x2,
+	  double width)
+{
+    /* If the integration interval crosses zero, break it into
+     * two separate integrals. This ensures that filters such
+     * as LINEAR that are not differentiable at 0 will still
+     * integrate properly.
+     */
+    if (x1 < 0 && x1 + width > 0)
+    {
+	return
+	    integral (kernel1, x1, kernel2, scale, x2, - x1) +
+	    integral (kernel1, 0, kernel2, scale, x2 - x1, width + x1);
+    }
+    else if (x2 < 0 && x2 + width > 0)
+    {
+	return
+	    integral (kernel1, x1, kernel2, scale, x2, - x2) +
+	    integral (kernel1, x1 - x2, kernel2, scale, 0, width + x2);
+    }
+    else if (kernel1 == PIXMAN_KERNEL_IMPULSE)
+    {
+	assert (width == 0.0);
+	return filters[kernel2].func (x2 * scale);
+    }
+    else if (kernel2 == PIXMAN_KERNEL_IMPULSE)
+    {
+	assert (width == 0.0);
+	return filters[kernel1].func (x1);
+    }
+    else
+    {
+	/* Integration via Simpson's rule */
+#define N_SEGMENTS 128
+#define SAMPLE(a1, a2)							\
+	(filters[kernel1].func ((a1)) * filters[kernel2].func ((a2) * scale))
+	
+	double s = 0.0;
+	double h = width / (double)N_SEGMENTS;
+	int i;
+
+	s = SAMPLE (x1, x2);
+
+	for (i = 1; i < N_SEGMENTS; i += 2)
+	{
+	    double a1 = x1 + h * i;
+	    double a2 = x2 + h * i;
+
+	    s += 2 * SAMPLE (a1, a2);
+
+	    if (i >= 2 && i < N_SEGMENTS - 1)
+		s += 4 * SAMPLE (a1, a2);
+	}
+
+	s += SAMPLE (x1 + width, x2 + width);
+	
+	return h * s * (1.0 / 3.0);
+    }
+}
+
+static pixman_fixed_t *
+create_1d_filter (int             *width,
+		  pixman_kernel_t  reconstruct,
+		  pixman_kernel_t  sample,
+		  double           scale,
+		  int              n_phases)
+{
+    pixman_fixed_t *params, *p;
+    double step;
+    double size;
+    int i;
+
+    size = scale * filters[sample].width + filters[reconstruct].width;
+    *width = ceil (size);
+
+    p = params = malloc (*width * n_phases * sizeof (pixman_fixed_t));
+    if (!params)
+        return NULL;
+
+    step = 1.0 / n_phases;
+
+    for (i = 0; i < n_phases; ++i)
+    {
+        double frac = step / 2.0 + i * step;
+	pixman_fixed_t new_total;
+        int x, x1, x2;
+	double total;
+
+	/* Sample convolution of reconstruction and sampling
+	 * filter. See rounding.txt regarding the rounding
+	 * and sample positions.
+	 */
+
+	x1 = ceil (frac - *width / 2.0 - 0.5);
+        x2 = x1 + *width;
+
+	total = 0;
+        for (x = x1; x < x2; ++x)
+        {
+	    double pos = x + 0.5 - frac;
+	    double rlow = - filters[reconstruct].width / 2.0;
+	    double rhigh = rlow + filters[reconstruct].width;
+	    double slow = pos - scale * filters[sample].width / 2.0;
+	    double shigh = slow + scale * filters[sample].width;
+	    double c = 0.0;
+	    double ilow, ihigh;
+
+	    if (rhigh >= slow && rlow <= shigh)
+	    {
+		ilow = MAX (slow, rlow);
+		ihigh = MIN (shigh, rhigh);
+
+		c = integral (reconstruct, ilow,
+			      sample, 1.0 / scale, ilow - pos,
+			      ihigh - ilow);
+	    }
+
+	    total += c;
+            *p++ = (pixman_fixed_t)(c * 65535.0 + 0.5);
+        }
+
+	/* Normalize */
+	p -= *width;
+        total = 1 / total;
+        new_total = 0;
+	for (x = x1; x < x2; ++x)
+	{
+	    pixman_fixed_t t = (*p) * total + 0.5;
+
+	    new_total += t;
+	    *p++ = t;
+	}
+
+	if (new_total != pixman_fixed_1)
+	    *(p - *width / 2) += (pixman_fixed_1 - new_total);
+    }
+
+    return params;
+}
+
+/* Create the parameter list for a SEPARABLE_CONVOLUTION filter
+ * with the given kernels and scale parameters
+ */
+PIXMAN_EXPORT pixman_fixed_t *
+pixman_filter_create_separable_convolution (int             *n_values,
+					    pixman_fixed_t   scale_x,
+					    pixman_fixed_t   scale_y,
+					    pixman_kernel_t  reconstruct_x,
+					    pixman_kernel_t  reconstruct_y,
+					    pixman_kernel_t  sample_x,
+					    pixman_kernel_t  sample_y,
+					    int              subsample_bits_x,
+					    int	             subsample_bits_y)
+{
+    double sx = fabs (pixman_fixed_to_double (scale_x));
+    double sy = fabs (pixman_fixed_to_double (scale_y));
+    pixman_fixed_t *horz = NULL, *vert = NULL, *params = NULL;
+    int subsample_x, subsample_y;
+    int width, height;
+
+    subsample_x = (1 << subsample_bits_x);
+    subsample_y = (1 << subsample_bits_y);
+
+    horz = create_1d_filter (&width, reconstruct_x, sample_x, sx, subsample_x);
+    vert = create_1d_filter (&height, reconstruct_y, sample_y, sy, subsample_y);
+
+    if (!horz || !vert)
+        goto out;
+    
+    *n_values = 4 + width * subsample_x + height * subsample_y;
+    
+    params = malloc (*n_values * sizeof (pixman_fixed_t));
+    if (!params)
+        goto out;
+
+    params[0] = pixman_int_to_fixed (width);
+    params[1] = pixman_int_to_fixed (height);
+    params[2] = pixman_int_to_fixed (subsample_bits_x);
+    params[3] = pixman_int_to_fixed (subsample_bits_y);
+
+    memcpy (params + 4, horz,
+	    width * subsample_x * sizeof (pixman_fixed_t));
+    memcpy (params + 4 + width * subsample_x, vert,
+	    height * subsample_y * sizeof (pixman_fixed_t));
+
+out:
+    free (horz);
+    free (vert);
+
+    return params;
+}
diff --git a/lib/pixman/pixman/pixman-general.c b/lib/pixman/pixman/pixman-general.c
index 0bf91e444..93a1b9acf 100644
--- a/lib/pixman/pixman/pixman-general.c
+++ b/lib/pixman/pixman/pixman-general.c
@@ -42,9 +42,7 @@ general_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
 {
     pixman_image_t *image = iter->image;
 
-    if (image->type == SOLID)
-	_pixman_solid_fill_iter_init (image, iter);
-    else if (image->type == LINEAR)
+    if (image->type == LINEAR)
 	_pixman_linear_gradient_iter_init (image, iter);
     else if (image->type == RADIAL)
 	_pixman_radial_gradient_iter_init (image, iter);
@@ -52,7 +50,9 @@ general_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
 	_pixman_conical_gradient_iter_init (image, iter);
     else if (image->type == BITS)
 	_pixman_bits_image_src_iter_init (image, iter);
-    else
+    else if (image->type == SOLID)
+        _pixman_log_error (FUNC, "Solid image not handled by noop");
+    else         
 	_pixman_log_error (FUNC, "Pixman bug: unknown image type\n");
 
     return TRUE;
@@ -188,9 +188,6 @@ general_composite_rect  (pixman_implementation_t *imp,
     compose = _pixman_implementation_lookup_combiner (
 	imp->toplevel, op, component_alpha, narrow);
 
-    if (!compose)
-	return;
-
     for (i = 0; i < height; ++i)
     {
 	uint32_t *s, *m, *d;
diff --git a/lib/pixman/pixman/pixman-glyph.c b/lib/pixman/pixman/pixman-glyph.c
index 15b3f1fea..5a271b64b 100644
--- a/lib/pixman/pixman/pixman-glyph.c
+++ b/lib/pixman/pixman/pixman-glyph.c
@@ -463,16 +463,13 @@ pixman_composite_glyphs_no_mask (pixman_op_t            op,
 		{
 		    glyph_format = glyph_img->common.extended_format_code;
 		    glyph_flags = glyph_img->common.flags;
-		    
+
 		    _pixman_implementation_lookup_composite (
 			get_implementation(), op,
 			src->common.extended_format_code, src->common.flags,
 			glyph_format, glyph_flags | extra,
 			dest_format, dest_flags,
 			&implementation, &func);
-
-		    if (!func)
-			goto out;
 		}
 
 		info.src_x = src_x + composite_box.x1 - dest_x;
@@ -508,7 +505,7 @@ add_glyphs (pixman_glyph_cache_t *cache,
     uint32_t glyph_flags = 0;
     pixman_composite_func_t func = NULL;
     pixman_implementation_t *implementation = NULL;
-    uint32_t dest_format;
+    pixman_format_code_t dest_format;
     uint32_t dest_flags;
     pixman_box32_t dest_box;
     pixman_composite_info_t info;
@@ -582,9 +579,6 @@ add_glyphs (pixman_glyph_cache_t *cache,
 		mask_format, info.mask_flags,
 		dest_format, dest_flags,
 		&implementation, &func);
-
-	    if (!func)
-		goto out;
 	}
 
 	glyph_box.x1 = glyphs[i].x - glyph->origin_x + off_x;
diff --git a/lib/pixman/pixman/pixman-gradient-walker.c b/lib/pixman/pixman/pixman-gradient-walker.c
index e7e724fa6..5944a559a 100644
--- a/lib/pixman/pixman/pixman-gradient-walker.c
+++ b/lib/pixman/pixman/pixman-gradient-walker.c
@@ -37,11 +37,14 @@ _pixman_gradient_walker_init (pixman_gradient_walker_t *walker,
     walker->stops     = gradient->stops;
     walker->left_x    = 0;
     walker->right_x   = 0x10000;
-    walker->stepper   = 0;
-    walker->left_ag   = 0;
-    walker->left_rb   = 0;
-    walker->right_ag  = 0;
-    walker->right_rb  = 0;
+    walker->a_s       = 0.0f;
+    walker->a_b       = 0.0f;
+    walker->r_s       = 0.0f;
+    walker->r_b       = 0.0f;
+    walker->g_s       = 0.0f;
+    walker->g_b       = 0.0f;
+    walker->b_s       = 0.0f;
+    walker->b_b       = 0.0f;
     walker->repeat    = repeat;
 
     walker->need_reset = TRUE;
@@ -55,6 +58,9 @@ gradient_walker_reset (pixman_gradient_walker_t *walker,
     pixman_color_t *left_c, *right_c;
     int n, count = walker->num_stops;
     pixman_gradient_stop_t *stops = walker->stops;
+    float la, lr, lg, lb;
+    float ra, rr, rg, rb;
+    float lx, rx;
 
     if (walker->repeat == PIXMAN_REPEAT_NORMAL)
     {
@@ -116,24 +122,49 @@ gradient_walker_reset (pixman_gradient_walker_t *walker,
 	    left_c = right_c;
     }
 
-    walker->left_x   = left_x;
-    walker->right_x  = right_x;
-    walker->left_ag  = ((left_c->alpha >> 8) << 16)   | (left_c->green >> 8);
-    walker->left_rb  = ((left_c->red & 0xff00) << 8)  | (left_c->blue >> 8);
-    walker->right_ag = ((right_c->alpha >> 8) << 16)  | (right_c->green >> 8);
-    walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
-
-    if (walker->left_x == walker->right_x                ||
-        (walker->left_ag == walker->right_ag &&
-	 walker->left_rb == walker->right_rb))
+    /* The alpha channel is scaled to be in the [0, 255] interval,
+     * and the red/green/blue channels are scaled to be in [0, 1].
+     * This ensures that after premultiplication all channels will
+     * be in the [0, 255] interval.
+     */
+    la = (left_c->alpha * (1.0f/257.0f));
+    lr = (left_c->red * (1.0f/257.0f));
+    lg = (left_c->green * (1.0f/257.0f));
+    lb = (left_c->blue * (1.0f/257.0f));
+
+    ra = (right_c->alpha * (1.0f/257.0f));
+    rr = (right_c->red * (1.0f/257.0f));
+    rg = (right_c->green * (1.0f/257.0f));
+    rb = (right_c->blue * (1.0f/257.0f));
+    
+    lx = left_x * (1.0f/65536.0f);
+    rx = right_x * (1.0f/65536.0f);
+    
+    if (FLOAT_IS_ZERO (rx - lx) || left_x == INT32_MIN || right_x == INT32_MAX)
     {
-	walker->stepper = 0;
+	walker->a_s = walker->r_s = walker->g_s = walker->b_s = 0.0f;
+	walker->a_b = (la + ra) / 2.0f;
+	walker->r_b = (lr + rr) / 510.0f;
+	walker->g_b = (lg + rg) / 510.0f;
+	walker->b_b = (lb + rb) / 510.0f;
     }
     else
     {
-	int32_t width = right_x - left_x;
-	walker->stepper = ((1 << 24) + width / 2) / width;
+	float w_rec = 1.0f / (rx - lx);
+
+	walker->a_b = (la * rx - ra * lx) * w_rec;
+	walker->r_b = (lr * rx - rr * lx) * w_rec * (1.0f/255.0f);
+	walker->g_b = (lg * rx - rg * lx) * w_rec * (1.0f/255.0f);
+	walker->b_b = (lb * rx - rb * lx) * w_rec * (1.0f/255.0f);
+
+	walker->a_s = (ra - la) * w_rec;
+	walker->r_s = (rr - lr) * w_rec * (1.0f/255.0f);
+	walker->g_s = (rg - lg) * w_rec * (1.0f/255.0f);
+	walker->b_s = (rb - lb) * w_rec * (1.0f/255.0f);
     }
+   
+    walker->left_x = left_x;
+    walker->right_x = right_x;
 
     walker->need_reset = FALSE;
 }
@@ -142,31 +173,30 @@ uint32_t
 _pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
                                pixman_fixed_48_16_t      x)
 {
-    int dist, idist;
-    uint32_t t1, t2, a, color;
+    float a, r, g, b;
+    uint8_t a8, r8, g8, b8;
+    uint32_t v;
+    float y;
 
     if (walker->need_reset || x < walker->left_x || x >= walker->right_x)
-	gradient_walker_reset (walker, x);
-
-    dist  = ((int)(x - walker->left_x) * walker->stepper) >> 16;
-    idist = 256 - dist;
+        gradient_walker_reset (walker, x);
 
-    /* combined INTERPOLATE and premultiply */
-    t1 = walker->left_rb * idist + walker->right_rb * dist;
-    t1 = (t1 >> 8) & 0xff00ff;
+    y = x * (1.0f / 65536.0f);
 
-    t2  = walker->left_ag * idist + walker->right_ag * dist;
-    t2 &= 0xff00ff00;
+    a = walker->a_s * y + walker->a_b;
+    r = a * (walker->r_s * y + walker->r_b);
+    g = a * (walker->g_s * y + walker->g_b);
+    b = a * (walker->b_s * y + walker->b_b);
 
-    color = t2 & 0xff000000;
-    a     = t2 >> 24;
+    a8 = a + 0.5f;
+    r8 = r + 0.5f;
+    g8 = g + 0.5f;
+    b8 = b + 0.5f;
 
-    t1  = t1 * a + 0x800080;
-    t1  = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
+    v = ((a8 << 24) & 0xff000000) |
+        ((r8 << 16) & 0x00ff0000) |
+        ((g8 <<  8) & 0x0000ff00) |
+        ((b8 >>  0) & 0x000000ff);
 
-    t2  = (t2 >> 8) * a + 0x800080;
-    t2  = (t2 + ((t2 >> 8) & 0xff00ff));
-
-    return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
+    return v;
 }
-
diff --git a/lib/pixman/pixman/pixman-image.c b/lib/pixman/pixman/pixman-image.c
index d9c303441..65041b43b 100644
--- a/lib/pixman/pixman/pixman-image.c
+++ b/lib/pixman/pixman/pixman-image.c
@@ -373,6 +373,10 @@ compute_image_info (pixman_image_t *image)
     case PIXMAN_FILTER_CONVOLUTION:
 	break;
 
+    case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
+	flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER;
+	break;
+
     default:
 	flags |= FAST_PATH_NO_CONVOLUTION_FILTER;
 	break;
@@ -515,8 +519,9 @@ compute_image_info (pixman_image_t *image)
      * if all channels are opaque, so we simply turn it off
      * unconditionally for those images.
      */
-    if (image->common.alpha_map					||
-	image->common.filter == PIXMAN_FILTER_CONVOLUTION	||
+    if (image->common.alpha_map						||
+	image->common.filter == PIXMAN_FILTER_CONVOLUTION		||
+        image->common.filter == PIXMAN_FILTER_SEPARABLE_CONVOLUTION     ||
 	image->common.component_alpha)
     {
 	flags &= ~(FAST_PATH_IS_OPAQUE | FAST_PATH_SAMPLES_OPAQUE);
@@ -679,6 +684,19 @@ pixman_image_set_filter (pixman_image_t *      image,
     if (params == common->filter_params && filter == common->filter)
 	return TRUE;
 
+    if (filter == PIXMAN_FILTER_SEPARABLE_CONVOLUTION)
+    {
+	int width = pixman_fixed_to_int (params[0]);
+	int height = pixman_fixed_to_int (params[1]);
+	int x_phase_bits = pixman_fixed_to_int (params[2]);
+	int y_phase_bits = pixman_fixed_to_int (params[3]);
+	int n_x_phases = (1 << x_phase_bits);
+	int n_y_phases = (1 << y_phase_bits);
+
+	return_val_if_fail (
+	    n_params == 4 + n_x_phases * width + n_y_phases * height, FALSE);
+    }
+    
     new_params = NULL;
     if (params)
     {
@@ -870,7 +888,7 @@ pixman_image_get_format (pixman_image_t *image)
     if (image->type == BITS)
 	return image->bits.format;
 
-    return 0;
+    return PIXMAN_null;
 }
 
 uint32_t
diff --git a/lib/pixman/pixman/pixman-implementation.c b/lib/pixman/pixman/pixman-implementation.c
index a70892c75..cfb82bb1f 100644
--- a/lib/pixman/pixman/pixman-implementation.c
+++ b/lib/pixman/pixman/pixman-implementation.c
@@ -65,7 +65,13 @@ typedef struct
 
 PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache);
 
-pixman_bool_t
+static void
+dummy_composite_rect (pixman_implementation_t *imp,
+		      pixman_composite_info_t *info)
+{
+}
+
+void
 _pixman_implementation_lookup_composite (pixman_implementation_t  *toplevel,
 					 pixman_op_t               op,
 					 pixman_format_code_t      src_format,
@@ -142,7 +148,18 @@ _pixman_implementation_lookup_composite (pixman_implementation_t  *toplevel,
 	    ++info;
 	}
     }
-    return FALSE;
+
+    /* We should never reach this point */
+    _pixman_log_error (
+        FUNC,
+        "No composite function found\n"
+        "\n"
+        "The most likely cause of this is that this system has issues with\n"
+        "thread local storage\n");
+
+    *out_imp = NULL;
+    *out_func = dummy_composite_rect;
+    return;
 
 update_cache:
     if (i)
@@ -160,8 +177,16 @@ update_cache:
 	cache->cache[0].fast_path.dest_flags = dest_flags;
 	cache->cache[0].fast_path.func = *out_func;
     }
+}
 
-    return TRUE;
+static void
+dummy_combine (pixman_implementation_t *imp,
+	       pixman_op_t              op,
+	       uint32_t *               pd,
+	       const uint32_t *         ps,
+	       const uint32_t *         pm,
+	       int                      w)
+{
 }
 
 pixman_combine_32_func_t
@@ -199,7 +224,9 @@ _pixman_implementation_lookup_combiner (pixman_implementation_t *imp,
 	imp = imp->fallback;
     }
 
-    return NULL;
+    /* We should never reach this point */
+    _pixman_log_error (FUNC, "No known combine function\n");
+    return dummy_combine;
 }
 
 pixman_bool_t
@@ -242,12 +269,12 @@ _pixman_implementation_fill (pixman_implementation_t *imp,
                              int                      y,
                              int                      width,
                              int                      height,
-                             uint32_t                 xor)
+                             uint32_t                 filler)
 {
     while (imp)
     {
 	if (imp->fill &&
-	    ((*imp->fill) (imp, bits, stride, bpp, x, y, width, height, xor)))
+	    ((*imp->fill) (imp, bits, stride, bpp, x, y, width, height, filler)))
 	{
 	    return TRUE;
 	}
diff --git a/lib/pixman/pixman/pixman-inlines.h b/lib/pixman/pixman/pixman-inlines.h
index 7f2e4047e..dd1c2f17f 100644
--- a/lib/pixman/pixman/pixman-inlines.h
+++ b/lib/pixman/pixman/pixman-inlines.h
@@ -88,6 +88,42 @@ pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
 	   ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
 }
 
+#if BILINEAR_INTERPOLATION_BITS <= 4
+/* Inspired by Filter_32_opaque from Skia */
+static force_inline uint32_t
+bilinear_interpolation (uint32_t tl, uint32_t tr,
+			uint32_t bl, uint32_t br,
+			int distx, int disty)
+{
+    int distxy, distxiy, distixy, distixiy;
+    uint32_t lo, hi;
+
+    distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
+    disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
+
+    distxy = distx * disty;
+    distxiy = (distx << 4) - distxy;	/* distx * (16 - disty) */
+    distixy = (disty << 4) - distxy;	/* disty * (16 - distx) */
+    distixiy =
+	16 * 16 - (disty << 4) -
+	(distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
+
+    lo = (tl & 0xff00ff) * distixiy;
+    hi = ((tl >> 8) & 0xff00ff) * distixiy;
+
+    lo += (tr & 0xff00ff) * distxiy;
+    hi += ((tr >> 8) & 0xff00ff) * distxiy;
+
+    lo += (bl & 0xff00ff) * distixy;
+    hi += ((bl >> 8) & 0xff00ff) * distixy;
+
+    lo += (br & 0xff00ff) * distxy;
+    hi += ((br >> 8) & 0xff00ff) * distxy;
+
+    return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
+}
+
+#else
 #if SIZEOF_LONG > 4
 
 static force_inline uint32_t
@@ -184,6 +220,7 @@ bilinear_interpolation (uint32_t tl, uint32_t tr,
 }
 
 #endif
+#endif // BILINEAR_INTERPOLATION_BITS <= 4
 
 /*
  * For each scanline fetched from source image with PAD repeat:
@@ -314,36 +351,36 @@ scanline_func_name (dst_type_t       *dst,							\
 												\
 		if (a1 == 0xff)									\
 		{										\
-		    *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);			\
+		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
 		}										\
 		else if (s1)									\
 		{										\
-		    d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst);				\
-		    s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1);				\
+		    d = convert_ ## DST_FORMAT ## _to_8888 (*dst);				\
+		    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);				\
 		    a1 ^= 0xff;									\
 		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
-		    *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);					\
+		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
 		}										\
 		dst++;										\
 												\
 		if (a2 == 0xff)									\
 		{										\
-		    *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2);			\
+		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);			\
 		}										\
 		else if (s2)									\
 		{										\
-		    d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst);				\
-		    s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2);				\
+		    d = convert_## DST_FORMAT ## _to_8888 (*dst);				\
+		    s2 = convert_## SRC_FORMAT ## _to_8888 (s2);				\
 		    a2 ^= 0xff;									\
 		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);					\
-		    *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);					\
+		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
 		}										\
 		dst++;										\
 	    }											\
 	    else /* PIXMAN_OP_SRC */								\
 	    {											\
-		*dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);			\
-		*dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2);			\
+		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
+		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);			\
 	    }											\
 	}											\
 												\
@@ -358,21 +395,21 @@ scanline_func_name (dst_type_t       *dst,							\
 												\
 		if (a1 == 0xff)									\
 		{										\
-		    *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);			\
+		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
 		}										\
 		else if (s1)									\
 		{										\
-		    d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst);				\
-		    s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1);				\
+		    d = convert_## DST_FORMAT ## _to_8888 (*dst);				\
+		    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);				\
 		    a1 ^= 0xff;									\
 		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
-		    *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);					\
+		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
 		}										\
 		dst++;										\
 	    }											\
 	    else /* PIXMAN_OP_SRC */								\
 	    {											\
-		*dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);			\
+		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
 	    }											\
 	}											\
 }
diff --git a/lib/pixman/pixman/pixman-matrix.c b/lib/pixman/pixman/pixman-matrix.c
index a029ab757..89b96826b 100644
--- a/lib/pixman/pixman/pixman-matrix.c
+++ b/lib/pixman/pixman/pixman-matrix.c
@@ -34,6 +34,338 @@
 
 #define F(x)    pixman_int_to_fixed (x)
 
+static force_inline int
+count_leading_zeros (uint32_t x)
+{
+#ifdef __GNUC__
+    return __builtin_clz (x);
+#else
+    int n = 0;
+    while (x)
+    {
+        n++;
+        x >>= 1;
+    }
+    return 32 - n;
+#endif
+}
+
+/*
+ * Large signed/unsigned integer division with rounding for the platforms with
+ * only 64-bit integer data type supported (no 128-bit data type).
+ *
+ * Arguments:
+ *     hi, lo - high and low 64-bit parts of the dividend
+ *     div    - 48-bit divisor
+ *
+ * Returns: lowest 64 bits of the result as a return value and highest 64
+ *          bits of the result to "result_hi" pointer
+ */
+
+/* grade-school unsigned division (128-bit by 48-bit) with rounding to nearest */
+static force_inline uint64_t
+rounded_udiv_128_by_48 (uint64_t  hi,
+                        uint64_t  lo,
+                        uint64_t  div,
+                        uint64_t *result_hi)
+{
+    uint64_t tmp, remainder, result_lo;
+    assert(div < ((uint64_t)1 << 48));
+
+    remainder = hi % div;
+    *result_hi = hi / div;
+
+    tmp = (remainder << 16) + (lo >> 48);
+    result_lo = tmp / div;
+    remainder = tmp % div;
+
+    tmp = (remainder << 16) + ((lo >> 32) & 0xFFFF);
+    result_lo = (result_lo << 16) + (tmp / div);
+    remainder = tmp % div;
+
+    tmp = (remainder << 16) + ((lo >> 16) & 0xFFFF);
+    result_lo = (result_lo << 16) + (tmp / div);
+    remainder = tmp % div;
+
+    tmp = (remainder << 16) + (lo & 0xFFFF);
+    result_lo = (result_lo << 16) + (tmp / div);
+    remainder = tmp % div;
+
+    /* round to nearest */
+    if (remainder * 2 >= div && ++result_lo == 0)
+        *result_hi += 1;
+
+    return result_lo;
+}
+
+/* signed division (128-bit by 49-bit) with rounding to nearest */
+static inline int64_t
+rounded_sdiv_128_by_49 (int64_t   hi,
+                        uint64_t  lo,
+                        int64_t   div,
+                        int64_t  *signed_result_hi)
+{
+    uint64_t result_lo, result_hi;
+    int sign = 0;
+    if (div < 0)
+    {
+        div = -div;
+        sign ^= 1;
+    }
+    if (hi < 0)
+    {
+        if (lo != 0)
+            hi++;
+        hi = -hi;
+        lo = -lo;
+        sign ^= 1;
+    }
+    result_lo = rounded_udiv_128_by_48 (hi, lo, div, &result_hi);
+    if (sign)
+    {
+        if (result_lo != 0)
+            result_hi++;
+        result_hi = -result_hi;
+        result_lo = -result_lo;
+    }
+    if (signed_result_hi)
+    {
+        *signed_result_hi = result_hi;
+    }
+    return result_lo;
+}
+
+/*
+ * Multiply 64.16 fixed point value by (2^scalebits) and convert
+ * to 128-bit integer.
+ */
+static force_inline void
+fixed_64_16_to_int128 (int64_t  hi,
+                       int64_t  lo,
+                       int64_t *rhi,
+                       int64_t *rlo,
+                       int      scalebits)
+{
+    /* separate integer and fractional parts */
+    hi += lo >> 16;
+    lo &= 0xFFFF;
+
+    if (scalebits <= 0)
+    {
+        *rlo = hi >> (-scalebits);
+        *rhi = *rlo >> 63;
+    }
+    else
+    {
+        *rhi = hi >> (64 - scalebits);
+        *rlo = (uint64_t)hi << scalebits;
+        if (scalebits < 16)
+            *rlo += lo >> (16 - scalebits);
+        else
+            *rlo += lo << (scalebits - 16);
+    }
+}
+
+/*
+ * Convert 112.16 fixed point value to 48.16 with clamping for the out
+ * of range values.
+ */
+static force_inline pixman_fixed_48_16_t
+fixed_112_16_to_fixed_48_16 (int64_t hi, int64_t lo, pixman_bool_t *clampflag)
+{
+    if ((lo >> 63) != hi)
+    {
+        *clampflag = TRUE;
+        return hi >= 0 ? INT64_MAX : INT64_MIN;
+    }
+    else
+    {
+        return lo;
+    }
+}
+
+/*
+ * Transform a point with 31.16 fixed point coordinates from the destination
+ * space to a point with 48.16 fixed point coordinates in the source space.
+ * No overflows are possible for affine transformations and the results are
+ * accurate including the least significant bit. Projective transformations
+ * may overflow, in this case the results are just clamped to return maximum
+ * or minimum 48.16 values (so that the caller can at least handle the NONE
+ * and PAD repeats correctly) and the return value is FALSE to indicate that
+ * such clamping has happened.
+ */
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_point_31_16 (const pixman_transform_t    *t,
+                              const pixman_vector_48_16_t *v,
+                              pixman_vector_48_16_t       *result)
+{
+    pixman_bool_t clampflag = FALSE;
+    int i;
+    int64_t tmp[3][2], divint;
+    uint16_t divfrac;
+
+    /* input vector values must have no more than 31 bits (including sign)
+     * in the integer part */
+    assert (v->v[0] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[1] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[2] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+
+    for (i = 0; i < 3; i++)
+    {
+        tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16);
+        tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF);
+        tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16);
+        tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF);
+        tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16);
+        tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF);
+    }
+
+    /*
+     * separate 64-bit integer and 16-bit fractional parts for the divisor,
+     * which is also scaled by 65536 after fixed point multiplication.
+     */
+    divint  = tmp[2][0] + (tmp[2][1] >> 16);
+    divfrac = tmp[2][1] & 0xFFFF;
+
+    if (divint == pixman_fixed_1 && divfrac == 0)
+    {
+        /*
+         * this is a simple affine transformation
+         */
+        result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
+        result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
+        result->v[2] = pixman_fixed_1;
+    }
+    else if (divint == 0 && divfrac == 0)
+    {
+        /*
+         * handle zero divisor (if the values are non-zero, set the
+         * results to maximum positive or minimum negative)
+         */
+        clampflag = TRUE;
+
+        result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
+        result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
+
+        if (result->v[0] > 0)
+            result->v[0] = INT64_MAX;
+        else if (result->v[0] < 0)
+            result->v[0] = INT64_MIN;
+
+        if (result->v[1] > 0)
+            result->v[1] = INT64_MAX;
+        else if (result->v[1] < 0)
+            result->v[1] = INT64_MIN;
+    }
+    else
+    {
+        /*
+         * projective transformation, analyze the top 32 bits of the divisor
+         */
+        int32_t hi32divbits = divint >> 32;
+        if (hi32divbits < 0)
+            hi32divbits = ~hi32divbits;
+
+        if (hi32divbits == 0)
+        {
+            /* the divisor is small, we can actually keep all the bits */
+            int64_t hi, rhi, lo, rlo;
+            int64_t div = (divint << 16) + divfrac;
+
+            fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32);
+            rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
+            result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
+
+            fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32);
+            rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
+            result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
+        }
+        else
+        {
+            /* the divisor needs to be reduced to 48 bits */
+            int64_t hi, rhi, lo, rlo, div;
+            int shift = 32 - count_leading_zeros (hi32divbits);
+            fixed_64_16_to_int128 (divint, divfrac, &hi, &div, 16 - shift);
+
+            fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32 - shift);
+            rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
+            result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
+
+            fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32 - shift);
+            rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
+            result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
+        }
+    }
+    result->v[2] = pixman_fixed_1;
+    return !clampflag;
+}
+
+PIXMAN_EXPORT void
+pixman_transform_point_31_16_affine (const pixman_transform_t    *t,
+                                     const pixman_vector_48_16_t *v,
+                                     pixman_vector_48_16_t       *result)
+{
+    int64_t hi0, lo0, hi1, lo1;
+
+    /* input vector values must have no more than 31 bits (including sign)
+     * in the integer part */
+    assert (v->v[0] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[1] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+
+    hi0  = (int64_t)t->matrix[0][0] * (v->v[0] >> 16);
+    lo0  = (int64_t)t->matrix[0][0] * (v->v[0] & 0xFFFF);
+    hi0 += (int64_t)t->matrix[0][1] * (v->v[1] >> 16);
+    lo0 += (int64_t)t->matrix[0][1] * (v->v[1] & 0xFFFF);
+    hi0 += (int64_t)t->matrix[0][2];
+
+    hi1  = (int64_t)t->matrix[1][0] * (v->v[0] >> 16);
+    lo1  = (int64_t)t->matrix[1][0] * (v->v[0] & 0xFFFF);
+    hi1 += (int64_t)t->matrix[1][1] * (v->v[1] >> 16);
+    lo1 += (int64_t)t->matrix[1][1] * (v->v[1] & 0xFFFF);
+    hi1 += (int64_t)t->matrix[1][2];
+
+    result->v[0] = hi0 + ((lo0 + 0x8000) >> 16);
+    result->v[1] = hi1 + ((lo1 + 0x8000) >> 16);
+    result->v[2] = pixman_fixed_1;
+}
+
+PIXMAN_EXPORT void
+pixman_transform_point_31_16_3d (const pixman_transform_t    *t,
+                                 const pixman_vector_48_16_t *v,
+                                 pixman_vector_48_16_t       *result)
+{
+    int i;
+    int64_t tmp[3][2];
+
+    /* input vector values must have no more than 31 bits (including sign)
+     * in the integer part */
+    assert (v->v[0] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[1] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[2] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+
+    for (i = 0; i < 3; i++)
+    {
+        tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16);
+        tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF);
+        tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16);
+        tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF);
+        tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16);
+        tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF);
+    }
+
+    result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
+    result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
+    result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16);
+}
+
 PIXMAN_EXPORT void
 pixman_transform_init_identity (struct pixman_transform *matrix)
 {
@@ -50,69 +382,41 @@ PIXMAN_EXPORT pixman_bool_t
 pixman_transform_point_3d (const struct pixman_transform *transform,
                            struct pixman_vector *         vector)
 {
-    struct pixman_vector result;
-    pixman_fixed_32_32_t partial;
-    pixman_fixed_48_16_t v;
-    int i, j;
+    pixman_vector_48_16_t tmp;
+    tmp.v[0] = vector->vector[0];
+    tmp.v[1] = vector->vector[1];
+    tmp.v[2] = vector->vector[2];
 
-    for (j = 0; j < 3; j++)
-    {
-	v = 0;
-	for (i = 0; i < 3; i++)
-	{
-	    partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] *
-	               (pixman_fixed_48_16_t) vector->vector[i]);
-	    v += partial >> 16;
-	}
-	
-	if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16)
-	    return FALSE;
-	
-	result.vector[j] = (pixman_fixed_t) v;
-    }
-    
-    *vector = result;
+    pixman_transform_point_31_16_3d (transform, &tmp, &tmp);
 
-    if (!result.vector[2])
-	return FALSE;
+    vector->vector[0] = tmp.v[0];
+    vector->vector[1] = tmp.v[1];
+    vector->vector[2] = tmp.v[2];
 
-    return TRUE;
+    return vector->vector[0] == tmp.v[0] &&
+           vector->vector[1] == tmp.v[1] &&
+           vector->vector[2] == tmp.v[2];
 }
 
 PIXMAN_EXPORT pixman_bool_t
 pixman_transform_point (const struct pixman_transform *transform,
                         struct pixman_vector *         vector)
 {
-    pixman_fixed_32_32_t partial;
-    pixman_fixed_34_30_t v[3];
-    pixman_fixed_48_16_t quo;
-    int i, j;
+    pixman_vector_48_16_t tmp;
+    tmp.v[0] = vector->vector[0];
+    tmp.v[1] = vector->vector[1];
+    tmp.v[2] = vector->vector[2];
 
-    for (j = 0; j < 3; j++)
-    {
-	v[j] = 0;
-	
-	for (i = 0; i < 3; i++)
-	{
-	    partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] *
-	               (pixman_fixed_32_32_t) vector->vector[i]);
-	    v[j] += partial >> 2;
-	}
-    }
-    
-    if (!(v[2] >> 16))
-	return FALSE;
+    if (!pixman_transform_point_31_16 (transform, &tmp, &tmp))
+        return FALSE;
 
-    for (j = 0; j < 2; j++)
-    {
-	quo = v[j] / (v[2] >> 16);
-	if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16)
-	    return FALSE;
-	vector->vector[j] = (pixman_fixed_t) quo;
-    }
-    
-    vector->vector[2] = pixman_fixed_1;
-    return TRUE;
+    vector->vector[0] = tmp.v[0];
+    vector->vector[1] = tmp.v[1];
+    vector->vector[2] = tmp.v[2];
+
+    return vector->vector[0] == tmp.v[0] &&
+           vector->vector[1] == tmp.v[1] &&
+           vector->vector[2] == tmp.v[2];
 }
 
 PIXMAN_EXPORT pixman_bool_t
@@ -138,7 +442,7 @@ pixman_transform_multiply (struct pixman_transform *      dst,
 		    (pixman_fixed_32_32_t) l->matrix[dy][o] *
 		    (pixman_fixed_32_32_t) r->matrix[o][dx];
 
-		v += partial >> 16;
+		v += (partial + 0x8000) >> 16;
 	    }
 
 	    if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16)
@@ -336,14 +640,14 @@ PIXMAN_EXPORT pixman_bool_t
 pixman_transform_invert (struct pixman_transform *      dst,
                          const struct pixman_transform *src)
 {
-    struct pixman_f_transform m, r;
+    struct pixman_f_transform m;
 
     pixman_f_transform_from_pixman_transform (&m, src);
 
-    if (!pixman_f_transform_invert (&r, &m))
+    if (!pixman_f_transform_invert (&m, &m))
 	return FALSE;
 
-    if (!pixman_transform_from_pixman_f_transform (dst, &r))
+    if (!pixman_transform_from_pixman_f_transform (dst, &m))
 	return FALSE;
 
     return TRUE;
@@ -469,10 +773,11 @@ PIXMAN_EXPORT pixman_bool_t
 pixman_f_transform_invert (struct pixman_f_transform *      dst,
                            const struct pixman_f_transform *src)
 {
-    double det;
-    int i, j;
     static const int a[3] = { 2, 2, 1 };
     static const int b[3] = { 1, 0, 0 };
+    pixman_f_transform_t d;
+    double det;
+    int i, j;
 
     det = 0;
     for (i = 0; i < 3; i++)
@@ -507,10 +812,12 @@ pixman_f_transform_invert (struct pixman_f_transform *      dst,
 	    if (((i + j) & 1) != 0)
 		p = -p;
 	    
-	    dst->m[j][i] = det * p;
+	    d.m[j][i] = det * p;
 	}
     }
 
+    *dst = d;
+
     return TRUE;
 }
 
diff --git a/lib/pixman/pixman/pixman-mips-dspr2-asm.S b/lib/pixman/pixman/pixman-mips-dspr2-asm.S
index b5cae1690..866e93e58 100644
--- a/lib/pixman/pixman/pixman-mips-dspr2-asm.S
+++ b/lib/pixman/pixman/pixman-mips-dspr2-asm.S
@@ -310,6 +310,649 @@ LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips)
 
 END(pixman_composite_src_x888_8888_asm_mips)
 
+#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+LEAF_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (b8g8r8)
+ * a2 - w
+ */
+
+    beqz              a2, 6f
+     nop
+
+    lui               t8, 0xff00;
+    srl               t9, a2, 2   /* t9 = how many multiples of 4 src pixels */
+    beqz              t9, 4f      /* branch if less than 4 src pixels */
+     nop
+
+    li                t0, 0x1
+    li                t1, 0x2
+    li                t2, 0x3
+    andi              t3, a1, 0x3
+    beq               t3, t0, 1f
+     nop
+    beq               t3, t1, 2f
+     nop
+    beq               t3, t2, 3f
+     nop
+
+0:
+    beqz              t9, 4f
+     addiu            t9, t9, -1
+    lw                t0, 0(a1)            /* t0 = R2 | B1 | G1 | R1 */
+    lw                t1, 4(a1)            /* t1 = G3 | R3 | B2 | G2 */
+    lw                t2, 8(a1)            /* t2 = B4 | G4 | R4 | B3 */
+
+    addiu             a1, a1, 12
+    addiu             a2, a2, -4
+
+    wsbh              t0, t0               /* t0 = B1 | R2 | R1 | G1 */
+    wsbh              t1, t1               /* t1 = R3 | G3 | G2 | B2 */
+    wsbh              t2, t2               /* t2 = G4 | B4 | B3 | R4 */
+
+    packrl.ph         t3, t1, t0           /* t3 = G2 | B2 | B1 | R2 */
+    packrl.ph         t4, t0, t0           /* t4 = R1 | G1 | B1 | R2 */
+    rotr              t3, t3, 16           /* t3 = B1 | R2 | G2 | B2 */
+    or                t3, t3, t8           /* t3 = FF | R2 | G2 | B2 */
+    srl               t4, t4, 8            /* t4 =  0 | R1 | G1 | B1 */
+    or                t4, t4, t8           /* t4 = FF | R1 | G1 | B1 */
+    packrl.ph         t5, t2, t1           /* t5 = B3 | R4 | R3 | G3 */
+    rotr              t5, t5, 24           /* t5 = R4 | R3 | G3 | B3 */
+    or                t5, t5, t8           /* t5 = FF | R3 | G3 | B3 */
+    rotr              t2, t2, 16           /* t2 = B3 | R4 | G4 | B4 */
+    or                t2, t2, t8           /* t5 = FF | R3 | G3 | B3 */
+
+    sw                t4, 0(a0)
+    sw                t3, 4(a0)
+    sw                t5, 8(a0)
+    sw                t2, 12(a0)
+    b                 0b
+     addiu            a0, a0, 16
+
+1:
+    lbu               t6, 0(a1)            /* t6 =  0 |  0 |  0 | R1 */
+    lhu               t7, 1(a1)            /* t7 =  0 |  0 | B1 | G1 */
+    sll               t6, t6, 16           /* t6 =  0 | R1 |  0 | 0  */
+    wsbh              t7, t7               /* t7 =  0 |  0 | G1 | B1 */
+    or                t7, t6, t7           /* t7 =  0 | R1 | G1 | B1 */
+11:
+    beqz              t9, 4f
+     addiu            t9, t9, -1
+    lw                t0, 3(a1)            /* t0 = R3 | B2 | G2 | R2 */
+    lw                t1, 7(a1)            /* t1 = G4 | R4 | B3 | G3 */
+    lw                t2, 11(a1)           /* t2 = B5 | G5 | R5 | B4 */
+
+    addiu             a1, a1, 12
+    addiu             a2, a2, -4
+
+    wsbh              t0, t0               /* t0 = B2 | R3 | R2 | G2 */
+    wsbh              t1, t1               /* t1 = R4 | G4 | G3 | B3 */
+    wsbh              t2, t2               /* t2 = G5 | B5 | B4 | R5 */
+
+    packrl.ph         t3, t1, t0           /* t3 = G3 | B3 | B2 | R3 */
+    packrl.ph         t4, t2, t1           /* t4 = B4 | R5 | R4 | G4 */
+    rotr              t0, t0, 24           /* t0 = R3 | R2 | G2 | B2 */
+    rotr              t3, t3, 16           /* t3 = B2 | R3 | G3 | B3 */
+    rotr              t4, t4, 24           /* t4 = R5 | R4 | G4 | B4 */
+    or                t7, t7, t8           /* t7 = FF | R1 | G1 | B1 */
+    or                t0, t0, t8           /* t0 = FF | R2 | G2 | B2 */
+    or                t3, t3, t8           /* t1 = FF | R3 | G3 | B3 */
+    or                t4, t4, t8           /* t3 = FF | R4 | G4 | B4 */
+
+    sw                t7, 0(a0)
+    sw                t0, 4(a0)
+    sw                t3, 8(a0)
+    sw                t4, 12(a0)
+    rotr              t7, t2, 16           /* t7 = xx | R5 | G5 | B5 */
+    b                 11b
+     addiu            a0, a0, 16
+
+2:
+    lhu               t7, 0(a1)            /* t7 =  0 |  0 | G1 | R1 */
+    wsbh              t7, t7               /* t7 =  0 |  0 | R1 | G1 */
+21:
+    beqz              t9, 4f
+     addiu            t9, t9, -1
+    lw                t0, 2(a1)            /* t0 = B2 | G2 | R2 | B1 */
+    lw                t1, 6(a1)            /* t1 = R4 | B3 | G3 | R3 */
+    lw                t2, 10(a1)           /* t2 = G5 | R5 | B4 | G4 */
+
+    addiu             a1, a1, 12
+    addiu             a2, a2, -4
+
+    wsbh              t0, t0               /* t0 = G2 | B2 | B1 | R2 */
+    wsbh              t1, t1               /* t1 = B3 | R4 | R3 | G3 */
+    wsbh              t2, t2               /* t2 = R5 | G5 | G4 | B4 */
+
+    precr_sra.ph.w    t7, t0, 0            /* t7 = R1 | G1 | B1 | R2 */
+    rotr              t0, t0, 16           /* t0 = B1 | R2 | G2 | B2 */
+    packrl.ph         t3, t2, t1           /* t3 = G4 | B4 | B3 | R4 */
+    rotr              t1, t1, 24           /* t1 = R4 | R3 | G3 | B3 */
+    srl               t7, t7, 8            /* t7 =  0 | R1 | G1 | B1 */
+    rotr              t3, t3, 16           /* t3 = B3 | R4 | G4 | B4 */
+    or                t7, t7, t8           /* t7 = FF | R1 | G1 | B1 */
+    or                t0, t0, t8           /* t0 = FF | R2 | G2 | B2 */
+    or                t1, t1, t8           /* t1 = FF | R3 | G3 | B3 */
+    or                t3, t3, t8           /* t3 = FF | R4 | G4 | B4 */
+
+    sw                t7, 0(a0)
+    sw                t0, 4(a0)
+    sw                t1, 8(a0)
+    sw                t3, 12(a0)
+    srl               t7, t2, 16           /* t7 =  0 |  0 | R5 | G5 */
+    b                 21b
+     addiu            a0, a0, 16
+
+3:
+    lbu               t7, 0(a1)            /* t7 =  0 |  0 |  0 | R1 */
+31:
+    beqz              t9, 4f
+     addiu            t9, t9, -1
+    lw                t0, 1(a1)            /* t0 = G2 | R2 | B1 | G1 */
+    lw                t1, 5(a1)            /* t1 = B3 | G3 | R3 | B2 */
+    lw                t2, 9(a1)            /* t2 = R5 | B4 | G4 | R4 */
+
+    addiu             a1, a1, 12
+    addiu             a2, a2, -4
+
+    wsbh              t0, t0               /* t0 = R2 | G2 | G1 | B1 */
+    wsbh              t1, t1               /* t1 = G3 | B3 | B2 | R3 */
+    wsbh              t2, t2               /* t2 = B4 | R5 | R4 | G4 */
+
+    precr_sra.ph.w    t7, t0, 0            /* t7 = xx | R1 | G1 | B1 */
+    packrl.ph         t3, t1, t0           /* t3 = B2 | R3 | R2 | G2 */
+    rotr              t1, t1, 16           /* t1 = B2 | R3 | G3 | B3 */
+    rotr              t4, t2, 24           /* t4 = R5 | R4 | G4 | B4 */
+    rotr              t3, t3, 24           /* t3 = R3 | R2 | G2 | B2 */
+    or                t7, t7, t8           /* t7 = FF | R1 | G1 | B1 */
+    or                t3, t3, t8           /* t3 = FF | R2 | G2 | B2 */
+    or                t1, t1, t8           /* t1 = FF | R3 | G3 | B3 */
+    or                t4, t4, t8           /* t4 = FF | R4 | G4 | B4 */
+
+    sw                t7, 0(a0)
+    sw                t3, 4(a0)
+    sw                t1, 8(a0)
+    sw                t4, 12(a0)
+    srl               t7, t2, 16           /* t7 =  0 |  0 | xx | R5 */
+    b                 31b
+     addiu            a0, a0, 16
+
+4:
+    beqz              a2, 6f
+     nop
+5:
+    lbu               t0, 0(a1)            /* t0 =  0 | 0 | 0 | R */
+    lbu               t1, 1(a1)            /* t1 =  0 | 0 | 0 | G */
+    lbu               t2, 2(a1)            /* t2 =  0 | 0 | 0 | B */
+    addiu             a1, a1, 3
+
+    sll               t0, t0, 16           /* t2 =  0 | R | 0 | 0 */
+    sll               t1, t1, 8            /* t1 =  0 | 0 | G | 0 */
+
+    or                t2, t2, t1           /* t2 =  0 | 0 | G | B */
+    or                t2, t2, t0           /* t2 =  0 | R | G | B */
+    or                t2, t2, t8           /* t2 = FF | R | G | B */
+
+    sw                t2, 0(a0)
+    addiu             a2, a2, -1
+    bnez              a2, 5b
+     addiu            a0, a0, 4
+6:
+    j                 ra
+     nop
+
+END(pixman_composite_src_0888_8888_rev_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips)
+/*
+ * a0 - dst (r5g6b5)
+ * a1 - src (b8g8r8)
+ * a2 - w
+ */
+
+    SAVE_REGS_ON_STACK 0, v0, v1
+    beqz              a2, 6f
+     nop
+
+    li                t6, 0xf800f800
+    li                t7, 0x07e007e0
+    li                t8, 0x001F001F
+    srl               t9, a2, 2   /* t9 = how many multiples of 4 src pixels */
+    beqz              t9, 4f      /* branch if less than 4 src pixels */
+     nop
+
+    li                t0, 0x1
+    li                t1, 0x2
+    li                t2, 0x3
+    andi              t3, a1, 0x3
+    beq               t3, t0, 1f
+     nop
+    beq               t3, t1, 2f
+     nop
+    beq               t3, t2, 3f
+     nop
+
+0:
+    beqz              t9, 4f
+     addiu            t9, t9, -1
+    lw                t0, 0(a1)            /* t0 = R2 | B1 | G1 | R1 */
+    lw                t1, 4(a1)            /* t1 = G3 | R3 | B2 | G2 */
+    lw                t2, 8(a1)            /* t2 = B4 | G4 | R4 | B3 */
+
+    addiu             a1, a1, 12
+    addiu             a2, a2, -4
+
+    wsbh              t0, t0               /* t0 = B1 | R2 | R1 | G1 */
+    wsbh              t1, t1               /* t1 = R3 | G3 | G2 | B2 */
+    wsbh              t2, t2               /* t2 = G4 | B4 | B3 | R4 */
+
+    packrl.ph         t3, t1, t0           /* t3 = G2 | B2 | B1 | R2 */
+    packrl.ph         t4, t0, t0           /* t4 = R1 | G1 | B1 | R2 */
+    rotr              t3, t3, 16           /* t3 = B1 | R2 | G2 | B2 */
+    srl               t4, t4, 8            /* t4 =  0 | R1 | G1 | B1 */
+    packrl.ph         t5, t2, t1           /* t5 = B3 | R4 | R3 | G3 */
+    rotr              t5, t5, 24           /* t5 = R4 | R3 | G3 | B3 */
+    rotr              t2, t2, 16           /* t2 = B3 | R4 | G4 | B4 */
+
+    CONVERT_2x8888_TO_2x0565 t4, t3, t4, t3, t6, t7, t8, v0, v1
+    CONVERT_2x8888_TO_2x0565 t5, t2, t5, t2, t6, t7, t8, v0, v1
+
+    sh                t4, 0(a0)
+    sh                t3, 2(a0)
+    sh                t5, 4(a0)
+    sh                t2, 6(a0)
+    b                 0b
+     addiu            a0, a0, 8
+
+1:
+    lbu               t4, 0(a1)            /* t4 =  0 |  0 |  0 | R1 */
+    lhu               t5, 1(a1)            /* t5 =  0 |  0 | B1 | G1 */
+    sll               t4, t4, 16           /* t4 =  0 | R1 |  0 | 0  */
+    wsbh              t5, t5               /* t5 =  0 |  0 | G1 | B1 */
+    or                t5, t4, t5           /* t5 =  0 | R1 | G1 | B1 */
+11:
+    beqz              t9, 4f
+     addiu            t9, t9, -1
+    lw                t0, 3(a1)            /* t0 = R3 | B2 | G2 | R2 */
+    lw                t1, 7(a1)            /* t1 = G4 | R4 | B3 | G3 */
+    lw                t2, 11(a1)           /* t2 = B5 | G5 | R5 | B4 */
+
+    addiu             a1, a1, 12
+    addiu             a2, a2, -4
+
+    wsbh              t0, t0               /* t0 = B2 | R3 | R2 | G2 */
+    wsbh              t1, t1               /* t1 = R4 | G4 | G3 | B3 */
+    wsbh              t2, t2               /* t2 = G5 | B5 | B4 | R5 */
+
+    packrl.ph         t3, t1, t0           /* t3 = G3 | B3 | B2 | R3 */
+    packrl.ph         t4, t2, t1           /* t4 = B4 | R5 | R4 | G4 */
+    rotr              t0, t0, 24           /* t0 = R3 | R2 | G2 | B2 */
+    rotr              t3, t3, 16           /* t3 = B2 | R3 | G3 | B3 */
+    rotr              t4, t4, 24           /* t4 = R5 | R4 | G4 | B4 */
+
+    CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1
+    CONVERT_2x8888_TO_2x0565 t3, t4, t3, t4, t6, t7, t8, v0, v1
+
+    sh                t5, 0(a0)
+    sh                t0, 2(a0)
+    sh                t3, 4(a0)
+    sh                t4, 6(a0)
+    rotr              t5, t2, 16           /* t5 = xx | R5 | G5 | B5 */
+    b                 11b
+     addiu            a0, a0, 8
+
+2:
+    lhu               t5, 0(a1)            /* t5 =  0 |  0 | G1 | R1 */
+    wsbh              t5, t5               /* t5 =  0 |  0 | R1 | G1 */
+21:
+    beqz              t9, 4f
+     addiu            t9, t9, -1
+    lw                t0, 2(a1)            /* t0 = B2 | G2 | R2 | B1 */
+    lw                t1, 6(a1)            /* t1 = R4 | B3 | G3 | R3 */
+    lw                t2, 10(a1)           /* t2 = G5 | R5 | B4 | G4 */
+
+    addiu             a1, a1, 12
+    addiu             a2, a2, -4
+
+    wsbh              t0, t0               /* t0 = G2 | B2 | B1 | R2 */
+    wsbh              t1, t1               /* t1 = B3 | R4 | R3 | G3 */
+    wsbh              t2, t2               /* t2 = R5 | G5 | G4 | B4 */
+
+    precr_sra.ph.w    t5, t0, 0            /* t5 = R1 | G1 | B1 | R2 */
+    rotr              t0, t0, 16           /* t0 = B1 | R2 | G2 | B2 */
+    packrl.ph         t3, t2, t1           /* t3 = G4 | B4 | B3 | R4 */
+    rotr              t1, t1, 24           /* t1 = R4 | R3 | G3 | B3 */
+    srl               t5, t5, 8            /* t5 =  0 | R1 | G1 | B1 */
+    rotr              t3, t3, 16           /* t3 = B3 | R4 | G4 | B4 */
+
+    CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1
+    CONVERT_2x8888_TO_2x0565 t1, t3, t1, t3, t6, t7, t8, v0, v1
+
+    sh                t5, 0(a0)
+    sh                t0, 2(a0)
+    sh                t1, 4(a0)
+    sh                t3, 6(a0)
+    srl               t5, t2, 16           /* t5 =  0 |  0 | R5 | G5 */
+    b                 21b
+     addiu            a0, a0, 8
+
+3:
+    lbu               t5, 0(a1)            /* t5 =  0 |  0 |  0 | R1 */
+31:
+    beqz              t9, 4f
+     addiu            t9, t9, -1
+    lw                t0, 1(a1)            /* t0 = G2 | R2 | B1 | G1 */
+    lw                t1, 5(a1)            /* t1 = B3 | G3 | R3 | B2 */
+    lw                t2, 9(a1)            /* t2 = R5 | B4 | G4 | R4 */
+
+    addiu             a1, a1, 12
+    addiu             a2, a2, -4
+
+    wsbh              t0, t0               /* t0 = R2 | G2 | G1 | B1 */
+    wsbh              t1, t1               /* t1 = G3 | B3 | B2 | R3 */
+    wsbh              t2, t2               /* t2 = B4 | R5 | R4 | G4 */
+
+    precr_sra.ph.w    t5, t0, 0            /* t5 = xx | R1 | G1 | B1 */
+    packrl.ph         t3, t1, t0           /* t3 = B2 | R3 | R2 | G2 */
+    rotr              t1, t1, 16           /* t1 = B2 | R3 | G3 | B3 */
+    rotr              t4, t2, 24           /* t4 = R5 | R4 | G4 | B4 */
+    rotr              t3, t3, 24           /* t3 = R3 | R2 | G2 | B2 */
+
+    CONVERT_2x8888_TO_2x0565 t5, t3, t5, t3, t6, t7, t8, v0, v1
+    CONVERT_2x8888_TO_2x0565 t1, t4, t1, t4, t6, t7, t8, v0, v1
+
+    sh                t5, 0(a0)
+    sh                t3, 2(a0)
+    sh                t1, 4(a0)
+    sh                t4, 6(a0)
+    srl               t5, t2, 16           /* t5 =  0 |  0 | xx | R5 */
+    b                 31b
+     addiu            a0, a0, 8
+
+4:
+    beqz              a2, 6f
+     nop
+5:
+    lbu               t0, 0(a1)            /* t0 =  0 | 0 | 0 | R */
+    lbu               t1, 1(a1)            /* t1 =  0 | 0 | 0 | G */
+    lbu               t2, 2(a1)            /* t2 =  0 | 0 | 0 | B */
+    addiu             a1, a1, 3
+
+    sll               t0, t0, 16           /* t2 =  0 | R | 0 | 0 */
+    sll               t1, t1, 8            /* t1 =  0 | 0 | G | 0 */
+
+    or                t2, t2, t1           /* t2 =  0 | 0 | G | B */
+    or                t2, t2, t0           /* t2 =  0 | R | G | B */
+
+    CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
+
+    sh                t3, 0(a0)
+    addiu             a2, a2, -1
+    bnez              a2, 5b
+     addiu            a0, a0, 2
+6:
+    RESTORE_REGS_FROM_STACK 0, v0, v1
+    j                 ra
+     nop
+
+END(pixman_composite_src_0888_0565_rev_asm_mips)
+#endif
+
+LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips)
+/*
+ * a0 - dst  (a8b8g8r8)
+ * a1 - src  (a8r8g8b8)
+ * a2 - w
+ */
+
+    SAVE_REGS_ON_STACK 0, v0
+    li       v0, 0x00ff00ff
+
+    beqz     a2, 3f
+     nop
+    addiu    t1, a2, -1
+    beqz     t1, 2f
+     nop
+1:
+    lw       t0, 0(a1)
+    lw       t1, 4(a1)
+    addiu    a1, a1, 8
+    addiu    a2, a2, -2
+    srl      t2, t0, 24
+    srl      t3, t1, 24
+
+    MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9
+
+    sll      t0, t0, 8
+    sll      t1, t1, 8
+    andi     t2, t2, 0xff
+    andi     t3, t3, 0xff
+    or       t0, t0, t2
+    or       t1, t1, t3
+    wsbh     t0, t0
+    wsbh     t1, t1
+    rotr     t0, t0, 16
+    rotr     t1, t1, 16
+    sw       t0, 0(a0)
+    sw       t1, 4(a0)
+
+    addiu    t2, a2, -1
+    bgtz     t2, 1b
+     addiu   a0, a0, 8
+2:
+    beqz     a2, 3f
+     nop
+    lw       t0, 0(a1)
+    srl      t1, t0, 24
+
+    MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5
+
+    sll      t0, t0, 8
+    andi     t1, t1, 0xff
+    or       t0, t0, t1
+    wsbh     t0, t0
+    rotr     t0, t0, 16
+    sw       t0, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 0, v0
+    j        ra
+     nop
+
+END(pixman_composite_src_pixbuf_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (a8r8g8b8)
+ * a2 - w
+ */
+
+    SAVE_REGS_ON_STACK 0, v0
+    li       v0, 0x00ff00ff
+
+    beqz     a2, 3f
+     nop
+    addiu    t1, a2, -1
+    beqz     t1, 2f
+     nop
+1:
+    lw       t0, 0(a1)
+    lw       t1, 4(a1)
+    addiu    a1, a1, 8
+    addiu    a2, a2, -2
+    srl      t2, t0, 24
+    srl      t3, t1, 24
+
+    MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9
+
+    sll      t0, t0, 8
+    sll      t1, t1, 8
+    andi     t2, t2, 0xff
+    andi     t3, t3, 0xff
+    or       t0, t0, t2
+    or       t1, t1, t3
+    rotr     t0, t0, 8
+    rotr     t1, t1, 8
+    sw       t0, 0(a0)
+    sw       t1, 4(a0)
+
+    addiu    t2, a2, -1
+    bgtz     t2, 1b
+     addiu   a0, a0, 8
+2:
+    beqz     a2, 3f
+     nop
+    lw       t0, 0(a1)
+    srl      t1, t0, 24
+
+    MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5
+
+    sll      t0, t0, 8
+    andi     t1, t1, 0xff
+    or       t0, t0, t1
+    rotr     t0, t0, 8
+    sw       t0, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 0, v0
+    j        ra
+     nop
+
+END(pixman_composite_src_rpixbuf_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+
+    SAVE_REGS_ON_STACK 0, v0
+    li       v0, 0x00ff00ff
+
+    beqz     a3, 3f
+     nop
+    addiu    t1, a3, -1
+    beqz     t1, 2f
+     nop
+
+1:
+                       /* a1 = source      (32bit constant) */
+    lbu      t0, 0(a2) /* t2 = mask        (a8) */
+    lbu      t1, 1(a2) /* t3 = mask        (a8) */
+    addiu    a2, a2, 2
+
+    MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, t2, t3, v0, t4, t5, t6, t7, t8, t9
+
+    sw       t2, 0(a0)
+    sw       t3, 4(a0)
+    addiu    a3, a3, -2
+    addiu    t2, a3, -1
+    bgtz     t2, 1b
+     addiu   a0, a0, 8
+
+    beqz     a3, 3f
+     nop
+
+2:
+    lbu      t0, 0(a2)
+    addiu    a2, a2, 1
+
+    MIPS_UN8x4_MUL_UN8 a1, t0, t1, v0, t3, t4, t5
+
+    sw       t1, 0(a0)
+    addiu    a3, a3, -1
+    addiu    a0, a0, 4
+
+3:
+    RESTORE_REGS_FROM_STACK 0, v0
+    j        ra
+     nop
+
+END(pixman_composite_src_n_8_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm_mips)
+/*
+ * a0 - dst  (a8)
+ * a1 - src  (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+    li                t9, 0x00ff00ff
+    beqz              a3, 3f
+     nop
+    srl               t7, a3, 2   /* t7 = how many multiples of 4 dst pixels */
+    beqz              t7, 1f      /* branch if less than 4 src pixels */
+     nop
+
+    srl               t8, a1, 24
+    replv.ph          t8, t8
+
+0:
+    beqz              t7, 1f
+     addiu            t7, t7, -1
+    lbu               t0, 0(a2)
+    lbu               t1, 1(a2)
+    lbu               t2, 2(a2)
+    lbu               t3, 3(a2)
+
+    addiu             a2, a2, 4
+
+    precr_sra.ph.w    t1, t0, 0
+    precr_sra.ph.w    t3, t2, 0
+    precr.qb.ph       t0, t3, t1
+
+    muleu_s.ph.qbl    t2, t0, t8
+    muleu_s.ph.qbr    t3, t0, t8
+    shra_r.ph         t4, t2, 8
+    shra_r.ph         t5, t3, 8
+    and               t4, t4, t9
+    and               t5, t5, t9
+    addq.ph           t2, t2, t4
+    addq.ph           t3, t3, t5
+    shra_r.ph         t2, t2, 8
+    shra_r.ph         t3, t3, 8
+    precr.qb.ph       t2, t2, t3
+
+    sb                t2, 0(a0)
+    srl               t2, t2, 8
+    sb                t2, 1(a0)
+    srl               t2, t2, 8
+    sb                t2, 2(a0)
+    srl               t2, t2, 8
+    sb                t2, 3(a0)
+    addiu             a3, a3, -4
+    b                 0b
+     addiu            a0, a0, 4
+
+1:
+    beqz              a3, 3f
+     nop
+    srl               t8, a1, 24
+2:
+    lbu               t0, 0(a2)
+    addiu             a2, a2, 1
+
+    mul               t2, t0, t8
+    shra_r.ph         t3, t2, 8
+    andi              t3, t3, 0x00ff
+    addq.ph           t2, t2, t3
+    shra_r.ph         t2, t2, 8
+
+    sb                t2, 0(a0)
+    addiu             a3, a3, -1
+    bnez              a3, 2b
+     addiu            a0, a0, 1
+
+3:
+    j                 ra
+     nop
+
+END(pixman_composite_src_n_8_8_asm_mips)
+
 LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
 /*
  * a0 - dst  (a8r8g8b8)
@@ -318,34 +961,35 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
  * a3 - w
  */
 
-    SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
-    beqz         a3, 4f
+    beqz         a3, 8f
      nop
+    SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
+
     li           t6, 0xff
     addiu        t7, zero, -1 /* t7 = 0xffffffff */
     srl          t8, a1, 24   /* t8 = srca */
     li           t9, 0x00ff00ff
+
     addiu        t1, a3, -1
-    beqz         t1, 3f       /* last pixel */
-     nop
-    beq          t8, t6, 2f   /* if (srca == 0xff) */
+    beqz         t1, 4f       /* last pixel */
      nop
-1:
-                              /* a1 = src */
+
+0:
     lw           t0, 0(a2)    /* t0 = mask */
     lw           t1, 4(a2)    /* t1 = mask */
+    addiu        a3, a3, -2   /* w = w - 2 */
     or           t2, t0, t1
-    beqz         t2, 12f      /* if (t0 == 0) && (t1 == 0) */
+    beqz         t2, 3f      /* if (t0 == 0) && (t1 == 0) */
      addiu       a2, a2, 8
-    and          t3, t0, t1
-    move         t4, a1       /* t4 = src */
-    move         t5, a1       /* t5 = src */
+    and          t2, t0, t1
+    beq          t2, t7, 1f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
+     nop
+
+//if(ma)
     lw           t2, 0(a0)    /* t2 = dst */
-    beq          t3, t7, 11f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
-     lw          t3, 4(a0)    /* t3 = dst */
+    lw           t3, 4(a0)    /* t3 = dst */
     MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
     MIPS_2xUN8x4_MUL_2xUN8   t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5
-11:
     not          t0, t0
     not          t1, t1
     MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
@@ -353,60 +997,77 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
     addu_s.qb    t3, t5, t3
     sw           t2, 0(a0)
     sw           t3, 4(a0)
-12:
-    addiu        a3, a3, -2
     addiu        t1, a3, -1
-    bgtz         t1, 1b
+    bgtz         t1, 0b
      addiu       a0, a0, 8
-    b            3f
+    b            4f
+     nop
+1:
+//if (t0 == 0xffffffff) && (t1 == 0xffffffff):
+    beq          t8, t6, 2f   /* if (srca == 0xff) */
      nop
-2:
-                              /* a1 = src */
-    lw           t0, 0(a2)    /* t0 = mask */
-    lw           t1, 4(a2)    /* t1 = mask */
-    or           t2, t0, t1
-    beqz         t2, 22f      /* if (t0 == 0) & (t1 == 0) */
-     addiu       a2, a2, 8
-    and          t2, t0, t1
-    move         t4, a1
-    beq          t2, t7, 21f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
-     move        t5, a1
     lw           t2, 0(a0)    /* t2 = dst */
     lw           t3, 4(a0)    /* t3 = dst */
-    MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
-    not          t0, t0
-    not          t1, t1
-    MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
-    addu_s.qb    t4, t4, t2
-    addu_s.qb    t5, t5, t3
-21:
-    sw           t4, 0(a0)
-    sw           t5, 4(a0)
-22:
-    addiu        a3, a3, -2
+    not          t0, a1
+    not          t1, a1
+    srl          t0, t0, 24
+    srl          t1, t1, 24
+    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
+    addu_s.qb    t2, a1, t2
+    addu_s.qb    t3, a1, t3
+    sw           t2, 0(a0)
+    sw           t3, 4(a0)
     addiu        t1, a3, -1
-    bgtz         t1, 2b
+    bgtz         t1, 0b
      addiu       a0, a0, 8
+    b            4f
+     nop
+2:
+    sw           a1, 0(a0)
+    sw           a1, 4(a0)
 3:
-    blez         a3, 4f
+    addiu        t1, a3, -1
+    bgtz         t1, 0b
+     addiu       a0, a0, 8
+
+4:
+    beqz         a3, 7f
      nop
                               /* a1 = src */
-    lw           t1, 0(a2)    /* t1 = mask */
-    beqz         t1, 4f
+    lw           t0, 0(a2)    /* t0 = mask */
+    beqz         t0, 7f       /* if (t0 == 0) */
      nop
-    move         t2, a1       /* t2 = src */
-    beq          t1, t7, 31f
-     lw          t0, 0(a0)    /* t0 = dst */
-
-    MIPS_UN8x4_MUL_UN8x4  a1, t1, t2, t9, t3, t4, t5, t6
-    MIPS_UN8x4_MUL_UN8    t1, t8, t1, t9, t3, t4, t5
-31:
-    not          t1, t1
-    MIPS_UN8x4_MUL_UN8x4  t0, t1, t0, t9, t3, t4, t5, t6
-    addu_s.qb    t0, t2, t0
-    sw           t0, 0(a0)
-4:
+    beq          t0, t7, 5f  /* if (t0 == 0xffffffff) */
+     nop
+//if(ma)
+    lw           t1, 0(a0)    /* t1 = dst */
+    MIPS_UN8x4_MUL_UN8x4  a1, t0, t2, t9, t3, t4, t5, s0
+    MIPS_UN8x4_MUL_UN8    t0, t8, t0, t9, t3, t4, t5
+    not          t0, t0
+    MIPS_UN8x4_MUL_UN8x4  t1, t0, t1, t9, t3, t4, t5, s0
+    addu_s.qb    t1, t2, t1
+    sw           t1, 0(a0)
+    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
+    j            ra
+     nop
+5:
+//if (t0 == 0xffffffff)
+    beq          t8, t6, 6f   /* if (srca == 0xff) */
+     nop
+    lw           t1, 0(a0)    /* t1 = dst */
+    not          t0, a1
+    srl          t0, t0, 24
+    MIPS_UN8x4_MUL_UN8 t1, t0, t1, t9, t2, t3, t4
+    addu_s.qb    t1, a1, t1
+    sw           t1, 0(a0)
+    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
+    j            ra
+     nop
+6:
+    sw           a1, 0(a0)
+7:
     RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
+8:
     j            ra
      nop
 
@@ -420,111 +1081,251 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
  * a3 - w
  */
 
-    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
-    beqz         a3, 4f
+    beqz         a3, 8f
      nop
-    li           t5, 0xf800f800
-    li           t6, 0x07e007e0
-    li           t7, 0x001F001F
-    li           t9, 0x00ff00ff
+    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
 
+    li           t6, 0xff
+    addiu        t7, zero, -1 /* t7 = 0xffffffff */
     srl          t8, a1, 24   /* t8 = srca */
+    li           t9, 0x00ff00ff
+    li           s6, 0xf800f800
+    li           s7, 0x07e007e0
+    li           s8, 0x001F001F
+
     addiu        t1, a3, -1
-    beqz         t1, 3f       /* last pixel */
+    beqz         t1, 4f       /* last pixel */
      nop
-    li           s0, 0xff     /* s0 = 0xff */
-    addiu        s1, zero, -1 /* s1 = 0xffffffff */
 
-    beq          t8, s0, 2f   /* if (srca == 0xff) */
-     nop
-1:
-                              /* a1 = src */
+0:
     lw           t0, 0(a2)    /* t0 = mask */
     lw           t1, 4(a2)    /* t1 = mask */
+    addiu        a3, a3, -2   /* w = w - 2 */
     or           t2, t0, t1
-    beqz         t2, 12f      /* if (t0 == 0) && (t1 == 0) */
+    beqz         t2, 3f      /* if (t0 == 0) && (t1 == 0) */
      addiu       a2, a2, 8
-    and          t3, t0, t1
-    move         s2, a1       /* s2 = src */
-    move         s3, a1       /* s3 = src */
+    and          t2, t0, t1
+    beq          t2, t7, 1f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
+     nop
+
+//if(ma)
     lhu          t2, 0(a0)    /* t2 = dst */
-    beq          t3, s1, 11f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
-     lhu         t3, 2(a0)    /* t3 = dst */
-    MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8
-    MIPS_2xUN8x4_MUL_2xUN8   t0, t1, t8, t8, t0, t1, t9, t4, s4, s5, s6, s7, s8
-11:
+    lhu          t3, 2(a0)    /* t3 = dst */
+    MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
+    MIPS_2xUN8x4_MUL_2xUN8   t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5
     not          t0, t0
     not          t1, t1
-    CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8
-    MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t0, t1
-    addu_s.qb    s2, s2, s4
-    addu_s.qb    s3, s3, s5
-    CONVERT_2x8888_TO_2x0565 s2, s3, t2, t3, t5, t6, t7, s4, s5
+    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3
+    MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
+    addu_s.qb    t2, t4, t2
+    addu_s.qb    t3, t5, t3
+    CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1
     sh           t2, 0(a0)
     sh           t3, 2(a0)
-12:
-    addiu        a3, a3, -2
     addiu        t1, a3, -1
-    bgtz         t1, 1b
+    bgtz         t1, 0b
      addiu       a0, a0, 4
-    b            3f
+    b            4f
+     nop
+1:
+//if (t0 == 0xffffffff) && (t1 == 0xffffffff):
+    beq          t8, t6, 2f   /* if (srca == 0xff) */
      nop
-2:
-                              /* a1 = src */
-    lw           t0, 0(a2)    /* t0 = mask */
-    lw           t1, 4(a2)    /* t1 = mask */
-    or           t2, t0, t1
-    beqz         t2, 22f      /* if (t0 == 0) & (t1 == 0) */
-     addiu       a2, a2, 8
-    and          t3, t0, t1
-    move         t2, a1
-    beq          t3, s1, 21f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
-     move        t3, a1
     lhu          t2, 0(a0)    /* t2 = dst */
     lhu          t3, 2(a0)    /* t3 = dst */
-    MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8
-    not          t0, t0
-    not          t1, t1
-    CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8
-    MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t2, t3
-    addu_s.qb    t2, s2, s4
-    addu_s.qb    t3, s3, s5
-21:
-    CONVERT_2x8888_TO_2x0565 t2, t3, t0, t1, t5, t6, t7, s2, s3
-    sh           t0, 0(a0)
-    sh           t1, 2(a0)
-22:
-    addiu        a3, a3, -2
+    not          t0, a1
+    not          t1, a1
+    srl          t0, t0, 24
+    srl          t1, t1, 24
+    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3
+    MIPS_2xUN8x4_MUL_2xUN8   t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
+    addu_s.qb    t2, a1, t2
+    addu_s.qb    t3, a1, t3
+    CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1
+    sh           t2, 0(a0)
+    sh           t3, 2(a0)
     addiu        t1, a3, -1
-    bgtz         t1, 2b
+    bgtz         t1, 0b
      addiu       a0, a0, 4
+    b            4f
+     nop
+2:
+    CONVERT_1x8888_TO_1x0565 a1, t2, s0, s1
+    sh           t2, 0(a0)
+    sh           t2, 2(a0)
 3:
-    blez         a3, 4f
+    addiu        t1, a3, -1
+    bgtz         t1, 0b
+     addiu       a0, a0, 4
+
+4:
+    beqz         a3, 7f
      nop
                               /* a1 = src */
-    lw           t1, 0(a2)    /* t1 = mask */
-    beqz         t1, 4f
+    lw           t0, 0(a2)    /* t0 = mask */
+    beqz         t0, 7f       /* if (t0 == 0) */
      nop
-    move         t2, a1       /* t2 = src */
-    beq          t1, t7, 31f
-     lhu         t0, 0(a0)    /* t0 = dst */
-
-    MIPS_UN8x4_MUL_UN8x4     a1, t1, t2, t9, t3, t4, t5, t6
-    MIPS_UN8x4_MUL_UN8       t1, t8, t1, t9, t3, t4, t5
-31:
-    not          t1, t1
-    CONVERT_1x0565_TO_1x8888 t0, s1, s2, s3
-    MIPS_UN8x4_MUL_UN8x4     s1, t1, t3, t9, t4, t5, t6, t7
-    addu_s.qb    t0, t2, t3
-    CONVERT_1x8888_TO_1x0565 t0, s1, s2, s3
-    sh           s1, 0(a0)
-4:
-    RESTORE_REGS_FROM_STACK  20, s0, s1, s2, s3, s4, s5, s6, s7, s8
+    beq          t0, t7, 5f  /* if (t0 == 0xffffffff) */
+     nop
+//if(ma)
+    lhu          t1, 0(a0)    /* t1 = dst */
+    MIPS_UN8x4_MUL_UN8x4     a1, t0, t2, t9, t3, t4, t5, s0
+    MIPS_UN8x4_MUL_UN8       t0, t8, t0, t9, t3, t4, t5
+    not          t0, t0
+    CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3
+    MIPS_UN8x4_MUL_UN8x4     s1, t0, s1, t9, t3, t4, t5, s0
+    addu_s.qb    s1, t2, s1
+    CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2
+    sh           t1, 0(a0)
+    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
+    j            ra
+     nop
+5:
+//if (t0 == 0xffffffff)
+    beq          t8, t6, 6f   /* if (srca == 0xff) */
+     nop
+    lhu          t1, 0(a0)    /* t1 = dst */
+    not          t0, a1
+    srl          t0, t0, 24
+    CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3
+    MIPS_UN8x4_MUL_UN8       s1, t0, s1, t9, t2, t3, t4
+    addu_s.qb    s1, a1, s1
+    CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2
+    sh           t1, 0(a0)
+    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
+    j            ra
+     nop
+6:
+    CONVERT_1x8888_TO_1x0565 a1, t1, s0, s2
+    sh           t1, 0(a0)
+7:
+    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
+8:
     j            ra
      nop
 
 END(pixman_composite_over_n_8888_0565_ca_asm_mips)
 
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8_asm_mips)
+/*
+ * a0 - dst  (a8)
+ * a1 - src  (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+    SAVE_REGS_ON_STACK 0, v0
+    li                t9, 0x00ff00ff
+    beqz              a3, 3f
+     nop
+    srl               v0, a3, 2   /* v0 = how many multiples of 4 dst pixels */
+    beqz              v0, 1f      /* branch if less than 4 src pixels */
+     nop
+
+    srl               t8, a1, 24
+    replv.ph          t8, t8
+
+0:
+    beqz              v0, 1f
+     addiu            v0, v0, -1
+    lbu               t0, 0(a2)
+    lbu               t1, 1(a2)
+    lbu               t2, 2(a2)
+    lbu               t3, 3(a2)
+    lbu               t4, 0(a0)
+    lbu               t5, 1(a0)
+    lbu               t6, 2(a0)
+    lbu               t7, 3(a0)
+
+    addiu             a2, a2, 4
+
+    precr_sra.ph.w    t1, t0, 0
+    precr_sra.ph.w    t3, t2, 0
+    precr_sra.ph.w    t5, t4, 0
+    precr_sra.ph.w    t7, t6, 0
+
+    precr.qb.ph       t0, t3, t1
+    precr.qb.ph       t1, t7, t5
+
+    muleu_s.ph.qbl    t2, t0, t8
+    muleu_s.ph.qbr    t3, t0, t8
+    shra_r.ph         t4, t2, 8
+    shra_r.ph         t5, t3, 8
+    and               t4, t4, t9
+    and               t5, t5, t9
+    addq.ph           t2, t2, t4
+    addq.ph           t3, t3, t5
+    shra_r.ph         t2, t2, 8
+    shra_r.ph         t3, t3, 8
+    precr.qb.ph       t0, t2, t3
+    not               t6, t0
+
+    preceu.ph.qbl     t7, t6
+    preceu.ph.qbr     t6, t6
+
+    muleu_s.ph.qbl    t2, t1, t7
+    muleu_s.ph.qbr    t3, t1, t6
+    shra_r.ph         t4, t2, 8
+    shra_r.ph         t5, t3, 8
+    and               t4, t4, t9
+    and               t5, t5, t9
+    addq.ph           t2, t2, t4
+    addq.ph           t3, t3, t5
+    shra_r.ph         t2, t2, 8
+    shra_r.ph         t3, t3, 8
+    precr.qb.ph       t1, t2, t3
+
+    addu_s.qb         t2, t0, t1
+
+    sb                t2, 0(a0)
+    srl               t2, t2, 8
+    sb                t2, 1(a0)
+    srl               t2, t2, 8
+    sb                t2, 2(a0)
+    srl               t2, t2, 8
+    sb                t2, 3(a0)
+    addiu             a3, a3, -4
+    b                 0b
+     addiu            a0, a0, 4
+
+1:
+    beqz              a3, 3f
+     nop
+    srl               t8, a1, 24
+2:
+    lbu               t0, 0(a2)
+    lbu               t1, 0(a0)
+    addiu             a2, a2, 1
+
+    mul               t2, t0, t8
+    shra_r.ph         t3, t2, 8
+    andi              t3, t3, 0x00ff
+    addq.ph           t2, t2, t3
+    shra_r.ph         t2, t2, 8
+    not               t3, t2
+    andi              t3, t3, 0x00ff
+
+
+    mul               t4, t1, t3
+    shra_r.ph         t5, t4, 8
+    andi              t5, t5, 0x00ff
+    addq.ph           t4, t4, t5
+    shra_r.ph         t4, t4, 8
+    andi              t4, t4, 0x00ff
+
+    addu_s.qb         t2, t2, t4
+    sb                t2, 0(a0)
+    addiu             a3, a3, -1
+    bnez              a3, 2b
+     addiu            a0, a0, 1
+
+3:
+    RESTORE_REGS_FROM_STACK 0, v0
+    j                 ra
+     nop
+
+END(pixman_composite_over_n_8_8_asm_mips)
+
 LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips)
 /*
  * a0 - dst  (a8r8g8b8)
@@ -1209,6 +2010,218 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
 
 END(pixman_composite_over_8888_8888_asm_mips)
 
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_0565_asm_mips)
+/*
+ * a0 - dst  (r5g6b5)
+ * a1 - src  (a8r8g8b8)
+ * a2 - w
+ */
+
+    SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
+    li           t4, 0x00ff00ff
+    li           s3, 0xf800f800
+    li           s4, 0x07e007e0
+    li           s5, 0x001F001F
+    beqz         a2, 3f
+     nop
+    addiu        t1, a2, -1
+    beqz         t1, 2f
+     nop
+1:
+    lw           t0, 0(a1) /* t0 = source      (a8r8g8b8) */
+    lw           t1, 4(a1) /* t1 = source      (a8r8g8b8) */
+    lhu          t2, 0(a0) /* t2 = destination (r5g6b5) */
+    lhu          t3, 2(a0) /* t3 = destination (r5g6b5) */
+    addiu        a1, a1, 8
+
+    not          t5, t0
+    srl          t5, t5, 24
+    not          t6, t1
+    srl          t6, t6, 24
+
+    or           t7, t5, t6
+    beqz         t7, 11f
+     or          t8, t0, t1
+    beqz         t8, 12f
+
+    CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, s4, s5, t7, t8, t9, s2
+    MIPS_2xUN8x4_MUL_2xUN8   s0, s1, t5, t6, t7, t8, t4, t9, t2, t3, s2, s0, s1
+
+    addu_s.qb    t0, t7, t0
+    addu_s.qb    t1, t8, t1
+11:
+    CONVERT_2x8888_TO_2x0565 t0, t1, t7, t8, s3, s4, s5, t2, t3
+    sh           t7, 0(a0)
+    sh           t8, 2(a0)
+12:
+    addiu        a2, a2, -2
+    addiu        t1, a2, -1
+    bgtz         t1, 1b
+     addiu       a0, a0, 4
+2:
+    beqz         a2, 3f
+     nop
+
+    lw           t0, 0(a1) /* t0 = source      (a8r8g8b8) */
+    lhu          t1, 0(a0) /* t1 = destination (r5g6b5) */
+    addiu        a1, a1, 4
+
+    not          t2, t0
+    srl          t2, t2, 24
+
+    beqz         t2, 21f
+     nop
+    beqz         t0, 3f
+
+    CONVERT_1x0565_TO_1x8888 t1, s0, t8, t9
+    MIPS_UN8x4_MUL_UN8       s0, t2, t3, t4, t5, t6, t7
+
+    addu_s.qb    t0, t3, t0
+21:
+    CONVERT_1x8888_TO_1x0565 t0, s0, t8, t9
+    sh           s0, 0(a0)
+
+3:
+    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
+    j            ra
+     nop
+
+END(pixman_composite_over_8888_0565_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips)
+/*
+ * a0 - dst  (r5g6b5)
+ * a1 - src  (32bit constant)
+ * a2 - w
+ */
+
+    beqz         a2, 5f
+     nop
+
+    not          t0, a1
+    srl          t0, t0, 24
+    bgtz         t0, 1f
+     nop
+    CONVERT_1x8888_TO_1x0565 a1, t1, t2, t3
+0:
+    sh           t1, 0(a0)
+    addiu        a2, a2, -1
+    bgtz         a2, 0b
+     addiu       a0, a0, 2
+    j            ra
+     nop
+
+1:
+    SAVE_REGS_ON_STACK 0, s0, s1, s2
+    li           t4, 0x00ff00ff
+    li           t5, 0xf800f800
+    li           t6, 0x07e007e0
+    li           t7, 0x001F001F
+    addiu        t1, a2, -1
+    beqz         t1, 3f
+     nop
+2:
+    lhu          t1, 0(a0) /* t1 = destination (r5g6b5) */
+    lhu          t2, 2(a0) /* t2 = destination (r5g6b5) */
+
+    CONVERT_2x0565_TO_2x8888 t1, t2, t3, t8, t6, t7, t9, s0, s1, s2
+    MIPS_2xUN8x4_MUL_2xUN8   t3, t8, t0, t0, t1, t2, t4, t9, s0, s1, s2, t3, t8
+    addu_s.qb                t1, t1, a1
+    addu_s.qb                t2, t2, a1
+    CONVERT_2x8888_TO_2x0565 t1, t2, t3, t8, t5, t6, t7, s0, s1
+
+    sh           t3, 0(a0)
+    sh           t8, 2(a0)
+
+    addiu        a2, a2, -2
+    addiu        t1, a2, -1
+    bgtz         t1, 2b
+     addiu       a0, a0, 4
+3:
+    beqz         a2, 4f
+     nop
+
+    lhu          t1, 0(a0) /* t1 = destination (r5g6b5) */
+
+    CONVERT_1x0565_TO_1x8888 t1, t2, s0, s1
+    MIPS_UN8x4_MUL_UN8       t2, t0, t1, t4, s0, s1, s2
+    addu_s.qb                t1, t1, a1
+    CONVERT_1x8888_TO_1x0565 t1, t2, s0, s1
+
+    sh           t2, 0(a0)
+
+4:
+    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
+5:
+    j            ra
+     nop
+
+END(pixman_composite_over_n_0565_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (32bit constant)
+ * a2 - w
+ */
+
+    beqz         a2, 5f
+     nop
+
+    not          t0, a1
+    srl          t0, t0, 24
+    bgtz         t0, 1f
+     nop
+0:
+    sw           a1, 0(a0)
+    addiu        a2, a2, -1
+    bgtz         a2, 0b
+     addiu       a0, a0, 4
+    j            ra
+     nop
+
+1:
+    SAVE_REGS_ON_STACK 0, s0, s1, s2
+    li           t4, 0x00ff00ff
+    addiu        t1, a2, -1
+    beqz         t1, 3f
+     nop
+2:
+    lw           t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+    lw           t3, 4(a0) /* t3 = destination (a8r8g8b8) */
+
+    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t0, t7, t8, t4, t9, s0, s1, s2, t2, t3
+
+    addu_s.qb    t7, t7, a1
+    addu_s.qb    t8, t8, a1
+
+    sw           t7, 0(a0)
+    sw           t8, 4(a0)
+
+    addiu        a2, a2, -2
+    addiu        t1, a2, -1
+    bgtz         t1, 2b
+     addiu       a0, a0, 8
+3:
+    beqz         a2, 4f
+     nop
+
+    lw           t1, 0(a0) /* t1 = destination (a8r8g8b8) */
+
+    MIPS_UN8x4_MUL_UN8 t1, t0, t3, t4, t5, t6, t7
+
+    addu_s.qb    t3, t3, a1
+
+    sw           t3, 0(a0)
+
+4:
+    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
+5:
+    j            ra
+     nop
+
+END(pixman_composite_over_n_8888_asm_mips)
+
 LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips)
 /*
  * a0 - dst  (a8)
@@ -1833,6 +2846,671 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips)
 
 END(pixman_composite_add_8888_8888_asm_mips)
 
+LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm_mips)
+/*
+ * a0 - dst  (r5g6b5)
+ * a1 - src  (a8)
+ * a2 - w
+ */
+
+    beqz     a2, 4f
+     nop
+
+    SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
+    li       t2, 0xf800f800
+    li       t3, 0x07e007e0
+    li       t4, 0x001F001F
+    li       t5, 0x00ff00ff
+
+    addiu    t1, a2, -1
+    beqz     t1, 2f
+     nop
+1:
+    lbu      t0, 0(a1) /* t0 = source      (a8) */
+    lbu      t1, 1(a1) /* t1 = source      (a8) */
+    lhu      t6, 0(a0) /* t6 = destination (r5g6b5) */
+    lhu      t7, 2(a0) /* t7 = destination (r5g6b5) */
+    addiu    a1, a1, 2
+
+    not      t0, t0
+    not      t1, t1
+    andi     t0, 0xff  /* t0 = neg source1 */
+    andi     t1, 0xff  /* t1 = neg source2 */
+    CONVERT_2x0565_TO_2x8888 t6, t7, t8, t9, t3, t4, s0, s1, s2, s3
+    MIPS_2xUN8x4_MUL_2xUN8   t8, t9, t0, t1, t6, t7, t5, s0, s1, s2, s3, t8, t9
+    CONVERT_2x8888_TO_2x0565 t6, t7, t8, t9, t2, t3, t4, s0, s1
+
+    sh       t8, 0(a0)
+    sh       t9, 2(a0)
+    addiu    a2, a2, -2
+    addiu    t1, a2, -1
+    bgtz     t1, 1b
+     addiu   a0, a0, 4
+2:
+    beqz     a2, 3f
+     nop
+    lbu      t0, 0(a1) /* t0 = source      (a8) */
+    lhu      t1, 0(a0) /* t1 = destination (r5g6b5) */
+
+    not      t0, t0
+    andi     t0, 0xff  /* t0 = neg source */
+    CONVERT_1x0565_TO_1x8888 t1, t2, t3, t4
+    MIPS_UN8x4_MUL_UN8        t2, t0, t1, t5, t3, t4, t6
+    CONVERT_1x8888_TO_1x0565 t1, t2, t3, t4
+
+    sh       t2, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
+4:
+    j        ra
+     nop
+
+END(pixman_composite_out_reverse_8_0565_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (a8)
+ * a2 - w
+ */
+
+    beqz     a2, 3f
+     nop
+    li       t4, 0x00ff00ff
+    addiu    t1, a2, -1
+    beqz     t1, 2f
+     nop
+1:
+    lbu      t0, 0(a1) /* t0 = source      (a8) */
+    lbu      t1, 1(a1) /* t1 = source      (a8) */
+    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+    lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
+    addiu    a1, a1, 2
+    not      t0, t0
+    not      t1, t1
+    andi     t0, 0xff  /* t0 = neg source */
+    andi     t1, 0xff  /* t1 = neg source */
+
+    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t5, t6, t4, t7, t8, t9, t2, t3, t0
+
+    sw       t5, 0(a0)
+    sw       t6, 4(a0)
+    addiu    a2, a2, -2
+    addiu    t1, a2, -1
+    bgtz     t1, 1b
+     addiu   a0, a0, 8
+2:
+    beqz     a2, 3f
+     nop
+    lbu      t0, 0(a1) /* t0 = source      (a8) */
+    lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
+    not      t0, t0
+    andi     t0, 0xff  /* t0 = neg source */
+
+    MIPS_UN8x4_MUL_UN8 t1, t0, t2, t4, t3, t5, t6
+
+    sw       t2, 0(a0)
+3:
+    j        ra
+     nop
+
+END(pixman_composite_out_reverse_8_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (32bit constant)
+ * a2 - w
+ */
+
+    beqz              a2, 5f
+     nop
+
+    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
+    li                t0, 0x00ff00ff
+    srl               t9, a2, 2   /* t9 = how many multiples of 4 src pixels */
+    beqz              t9, 2f      /* branch if less than 4 src pixels */
+     nop
+1:
+    beqz              t9, 2f
+     addiu            t9, t9, -1
+
+    lw                t1, 0(a0)
+    lw                t2, 4(a0)
+    lw                t3, 8(a0)
+    lw                t4, 12(a0)
+
+    addiu             a2, a2, -4
+
+    not               t5, t1
+    not               t6, t2
+    not               t7, t3
+    not               t8, t4
+    srl               t5, t5, 24
+    srl               t6, t6, 24
+    srl               t7, t7, 24
+    srl               t8, t8, 24
+    replv.ph          t5, t5
+    replv.ph          t6, t6
+    replv.ph          t7, t7
+    replv.ph          t8, t8
+    muleu_s.ph.qbl    s0, a1, t5
+    muleu_s.ph.qbr    s1, a1, t5
+    muleu_s.ph.qbl    s2, a1, t6
+    muleu_s.ph.qbr    s3, a1, t6
+    muleu_s.ph.qbl    s4, a1, t7
+    muleu_s.ph.qbr    s5, a1, t7
+    muleu_s.ph.qbl    s6, a1, t8
+    muleu_s.ph.qbr    s7, a1, t8
+
+    shra_r.ph         t5, s0, 8
+    shra_r.ph         t6, s1, 8
+    shra_r.ph         t7, s2, 8
+    shra_r.ph         t8, s3, 8
+    and               t5, t5, t0
+    and               t6, t6, t0
+    and               t7, t7, t0
+    and               t8, t8, t0
+    addq.ph           s0, s0, t5
+    addq.ph           s1, s1, t6
+    addq.ph           s2, s2, t7
+    addq.ph           s3, s3, t8
+    shra_r.ph         s0, s0, 8
+    shra_r.ph         s1, s1, 8
+    shra_r.ph         s2, s2, 8
+    shra_r.ph         s3, s3, 8
+    shra_r.ph         t5, s4, 8
+    shra_r.ph         t6, s5, 8
+    shra_r.ph         t7, s6, 8
+    shra_r.ph         t8, s7, 8
+    and               t5, t5, t0
+    and               t6, t6, t0
+    and               t7, t7, t0
+    and               t8, t8, t0
+    addq.ph           s4, s4, t5
+    addq.ph           s5, s5, t6
+    addq.ph           s6, s6, t7
+    addq.ph           s7, s7, t8
+    shra_r.ph         s4, s4, 8
+    shra_r.ph         s5, s5, 8
+    shra_r.ph         s6, s6, 8
+    shra_r.ph         s7, s7, 8
+
+    precr.qb.ph       t5, s0, s1
+    precr.qb.ph       t6, s2, s3
+    precr.qb.ph       t7, s4, s5
+    precr.qb.ph       t8, s6, s7
+    addu_s.qb         t5, t1, t5
+    addu_s.qb         t6, t2, t6
+    addu_s.qb         t7, t3, t7
+    addu_s.qb         t8, t4, t8
+
+    sw                t5, 0(a0)
+    sw                t6, 4(a0)
+    sw                t7, 8(a0)
+    sw                t8, 12(a0)
+    b                 1b
+     addiu            a0, a0, 16
+
+2:
+    beqz              a2, 4f
+     nop
+3:
+    lw                t1, 0(a0)
+
+    not               t2, t1
+    srl               t2, t2, 24
+    replv.ph          t2, t2
+
+    muleu_s.ph.qbl    t4, a1, t2
+    muleu_s.ph.qbr    t5, a1, t2
+    shra_r.ph         t6, t4, 8
+    shra_r.ph         t7, t5, 8
+
+    and               t6,t6,t0
+    and               t7,t7,t0
+
+    addq.ph           t8, t4, t6
+    addq.ph           t9, t5, t7
+
+    shra_r.ph         t8, t8, 8
+    shra_r.ph         t9, t9, 8
+
+    precr.qb.ph       t9, t8, t9
+
+    addu_s.qb         t9, t1, t9
+    sw                t9, 0(a0)
+
+    addiu             a2, a2, -1
+    bnez              a2, 3b
+     addiu            a0, a0, 4
+4:
+    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
+5:
+    j                 ra
+     nop
+
+END(pixman_composite_over_reverse_n_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips)
+/*
+ * a0 - dst  (a8)
+ * a1 - src  (32bit constant)
+ * a2 - w
+ */
+
+    li                t9, 0x00ff00ff
+    beqz              a2, 3f
+     nop
+    srl               t7, a2, 2   /* t7 = how many multiples of 4 dst pixels */
+    beqz              t7, 1f      /* branch if less than 4 src pixels */
+     nop
+
+    srl               t8, a1, 24
+    replv.ph          t8, t8
+
+0:
+    beqz              t7, 1f
+     addiu            t7, t7, -1
+    lbu               t0, 0(a0)
+    lbu               t1, 1(a0)
+    lbu               t2, 2(a0)
+    lbu               t3, 3(a0)
+
+    precr_sra.ph.w    t1, t0, 0
+    precr_sra.ph.w    t3, t2, 0
+    precr.qb.ph       t0, t3, t1
+
+    muleu_s.ph.qbl    t2, t0, t8
+    muleu_s.ph.qbr    t3, t0, t8
+    shra_r.ph         t4, t2, 8
+    shra_r.ph         t5, t3, 8
+    and               t4, t4, t9
+    and               t5, t5, t9
+    addq.ph           t2, t2, t4
+    addq.ph           t3, t3, t5
+    shra_r.ph         t2, t2, 8
+    shra_r.ph         t3, t3, 8
+    precr.qb.ph       t2, t2, t3
+
+    sb                t2, 0(a0)
+    srl               t2, t2, 8
+    sb                t2, 1(a0)
+    srl               t2, t2, 8
+    sb                t2, 2(a0)
+    srl               t2, t2, 8
+    sb                t2, 3(a0)
+    addiu             a2, a2, -4
+    b                 0b
+     addiu            a0, a0, 4
+
+1:
+    beqz              a2, 3f
+     nop
+    srl               t8, a1, 24
+2:
+    lbu               t0, 0(a0)
+
+    mul               t2, t0, t8
+    shra_r.ph         t3, t2, 8
+    andi              t3, t3, 0x00ff
+    addq.ph           t2, t2, t3
+    shra_r.ph         t2, t2, 8
+
+    sb                t2, 0(a0)
+    addiu             a2, a2, -1
+    bnez              a2, 2b
+     addiu            a0, a0, 1
+
+3:
+    j                 ra
+     nop
+
+END(pixman_composite_in_n_8_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
+/*
+ * a0     - dst  (a8r8g8b8)
+ * a1     - src  (a8r8g8b8)
+ * a2     - w
+ * a3     - vx
+ * 16(sp) - unit_x
+ */
+
+    SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
+    lw       t8, 16(sp) /* t8 = unit_x */
+    li       t6, 0x00ff00ff
+    beqz     a2, 3f
+     nop
+    addiu    t1, a2, -1
+    beqz     t1, 2f
+     nop
+1:
+    sra      t0, a3, 16 /* t0 = vx >> 16 */
+    sll      t0, t0, 2  /* t0 = t0 * 4 (a8r8g8b8) */
+    addu     t0, a1, t0
+    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
+    addu     a3, a3, t8 /* a3 = vx + unit_x */
+
+    sra      t1, a3, 16 /* t0 = vx >> 16 */
+    sll      t1, t1, 2  /* t0 = t0 * 4 (a8r8g8b8) */
+    addu     t1, a1, t1
+    lw       t1, 0(t1)  /* t1 = source      (a8r8g8b8) */
+    addu     a3, a3, t8 /* a3 = vx + unit_x */
+
+    lw       t2, 0(a0)  /* t2 = destination (a8r8g8b8) */
+    lw       t3, 4(a0)  /* t3 = destination (a8r8g8b8) */
+
+    OVER_2x8888_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t9, s0, s1, s2, s3
+
+    sw       t4, 0(a0)
+    sw       t5, 4(a0)
+    addiu    a2, a2, -2
+    addiu    t1, a2, -1
+    bgtz     t1, 1b
+     addiu   a0, a0, 8
+2:
+    beqz     a2, 3f
+     nop
+    sra      t0, a3, 16 /* t0 = vx >> 16 */
+    sll      t0, t0, 2  /* t0 = t0 * 4 (a8r8g8b8) */
+    addu     t0, a1, t0
+    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
+    lw       t1, 0(a0)  /* t1 = destination (a8r8g8b8) */
+    addu     a3, a3, t8 /* a3 = vx + unit_x */
+
+    OVER_8888_8888 t0, t1, t2, t6, t4, t5, t3, t7
+
+    sw       t2, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
+    j        ra
+     nop
+
+END(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
+/*
+ * a0     - dst  (r5g6b5)
+ * a1     - src  (a8r8g8b8)
+ * a2     - w
+ * a3     - vx
+ * 16(sp) - unit_x
+ */
+
+    SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, v0, v1
+    lw       t8, 40(sp) /* t8 = unit_x */
+    li       t4, 0x00ff00ff
+    li       t5, 0xf800f800
+    li       t6, 0x07e007e0
+    li       t7, 0x001F001F
+    beqz     a2, 3f
+     nop
+    addiu    t1, a2, -1
+    beqz     t1, 2f
+     nop
+1:
+    sra      t0, a3, 16 /* t0 = vx >> 16 */
+    sll      t0, t0, 2  /* t0 = t0 * 4 (a8r8g8b8) */
+    addu     t0, a1, t0
+    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
+    addu     a3, a3, t8 /* a3 = vx + unit_x */
+    sra      t1, a3, 16 /* t0 = vx >> 16 */
+    sll      t1, t1, 2  /* t0 = t0 * 4 (a8r8g8b8) */
+    addu     t1, a1, t1
+    lw       t1, 0(t1)  /* t1 = source      (a8r8g8b8) */
+    addu     a3, a3, t8 /* a3 = vx + unit_x */
+    lhu      t2, 0(a0)  /* t2 = destination (r5g6b5) */
+    lhu      t3, 2(a0)  /* t3 = destination (r5g6b5) */
+
+    CONVERT_2x0565_TO_2x8888 t2, t3, v0, v1, t6, t7, s0, s1, s2, s3
+    OVER_2x8888_2x8888       t0, t1, v0, v1, t2, t3, t4, t9, s0, s1, s2, s3, s4
+    CONVERT_2x8888_TO_2x0565 t2, t3, v0, v1, t5, t6, t7, t9, s2
+
+    sh       v0, 0(a0)
+    sh       v1, 2(a0)
+    addiu    a2, a2, -2
+    addiu    t1, a2, -1
+    bgtz     t1, 1b
+     addiu   a0, a0, 4
+2:
+    beqz     a2, 3f
+     nop
+    sra      t0, a3, 16 /* t0 = vx >> 16 */
+    sll      t0, t0, 2  /* t0 = t0 * 4 (a8r8g8b8) */
+    addu     t0, a1, t0
+    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
+    lhu      t1, 0(a0)  /* t1 = destination (r5g6b5) */
+    addu     a3, a3, t8 /* a3 = vx + unit_x */
+
+    CONVERT_1x0565_TO_1x8888 t1, t2, t5, t6
+    OVER_8888_8888           t0, t2, t1, t4, t3, t5, t6, t7
+    CONVERT_1x8888_TO_1x0565 t1, t2, t5, t6
+
+    sh       t2, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, v0, v1
+    j        ra
+     nop
+
+END(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
+/*
+ * a0     - dst (a8r8g8b8)
+ * a1     - src (r5g6b5)
+ * a2     - w
+ * a3     - vx
+ * 16(sp) - unit_x
+ */
+
+    SAVE_REGS_ON_STACK 0, v0
+    beqz     a2, 3f
+     nop
+
+    lw       v0, 16(sp) /* v0 = unit_x */
+    addiu    t1, a2, -1
+    beqz     t1, 2f
+     nop
+
+    li       t4, 0x07e007e0
+    li       t5, 0x001F001F
+1:
+    sra      t0, a3, 16 /* t0 = vx >> 16 */
+    sll      t0, t0, 1  /* t0 = t0 * 2 ((r5g6b5)) */
+    addu     t0, a1, t0
+    lhu      t0, 0(t0)  /* t0 = source ((r5g6b5)) */
+    addu     a3, a3, v0 /* a3 = vx + unit_x */
+    sra      t1, a3, 16 /* t1 = vx >> 16 */
+    sll      t1, t1, 1  /* t1 = t1 * 2 ((r5g6b5)) */
+    addu     t1, a1, t1
+    lhu      t1, 0(t1)  /* t1 = source ((r5g6b5)) */
+    addu     a3, a3, v0 /* a3 = vx + unit_x */
+    addiu    a2, a2, -2
+
+    CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
+
+    sw       t2, 0(a0)
+    sw       t3, 4(a0)
+
+    addiu    t2, a2, -1
+    bgtz     t2, 1b
+     addiu   a0, a0, 8
+2:
+    beqz     a2, 3f
+     nop
+    sra      t0, a3, 16 /* t0 = vx >> 16 */
+    sll      t0, t0, 1  /* t0 = t0 * 2 ((r5g6b5)) */
+    addu     t0, a1, t0
+    lhu      t0, 0(t0)  /* t0 = source ((r5g6b5)) */
+
+    CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3
+
+    sw       t1, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 0, v0
+    j        ra
+     nop
+
+END(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
+/*
+ * a0     - dst  (r5g6b5)
+ * a1     - src  (a8r8g8b8)
+ * a2     - mask (a8)
+ * a3     - w
+ * 16(sp) - vx
+ * 20(sp) - unit_x
+ */
+    beqz     a3, 4f
+     nop
+
+    SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
+    lw       v0, 36(sp) /* v0 = vx */
+    lw       v1, 40(sp) /* v1 = unit_x */
+    li       t6, 0x00ff00ff
+    li       t7, 0xf800f800
+    li       t8, 0x07e007e0
+    li       t9, 0x001F001F
+
+    addiu    t1, a3, -1
+    beqz     t1, 2f
+     nop
+1:
+    sra      t0, v0, 16 /* t0 = vx >> 16 */
+    sll      t0, t0, 2  /* t0 = t0 * 4      (a8r8g8b8) */
+    addu     t0, a1, t0
+    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
+    addu     v0, v0, v1 /* v0 = vx + unit_x */
+    sra      t1, v0, 16 /* t1 = vx >> 16 */
+    sll      t1, t1, 2  /* t1 = t1 * 4      (a8r8g8b8) */
+    addu     t1, a1, t1
+    lw       t1, 0(t1)  /* t1 = source      (a8r8g8b8) */
+    addu     v0, v0, v1 /* v0 = vx + unit_x */
+    lbu      t2, 0(a2)  /* t2 = mask        (a8) */
+    lbu      t3, 1(a2)  /* t3 = mask        (a8) */
+    lhu      t4, 0(a0)  /* t4 = destination (r5g6b5) */
+    lhu      t5, 2(a0)  /* t5 = destination (r5g6b5) */
+    addiu    a2, a2, 2
+
+    CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5
+    OVER_2x8888_2x8_2x8888   t0, t1, \
+                             t2, t3, \
+                             s0, s1, \
+                             t4, t5, \
+                             t6, s2, s3, s4, s5, t2, t3
+    CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3
+
+    sh       s0, 0(a0)
+    sh       s1, 2(a0)
+    addiu    a3, a3, -2
+    addiu    t1, a3, -1
+    bgtz     t1, 1b
+     addiu   a0, a0, 4
+2:
+    beqz     a3, 3f
+     nop
+    sra      t0, v0, 16 /* t0 = vx >> 16 */
+    sll      t0, t0, 2  /* t0 = t0 * 4      (a8r8g8b8) */
+    addu     t0, a1, t0
+    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
+    lbu      t1, 0(a2)  /* t1 = mask        (a8) */
+    lhu      t2, 0(a0)  /* t2 = destination (r5g6b5) */
+
+    CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5
+    OVER_8888_8_8888         t0, t1, t3, t2, t6, t4, t5, t7, t8
+    CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
+
+    sh       t3, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
+4:
+    j        ra
+     nop
+
+END(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
+/*
+ * a0     - dst  (r5g6b5)
+ * a1     - src  (r5g6b5)
+ * a2     - mask (a8)
+ * a3     - w
+ * 16(sp) - vx
+ * 20(sp) - unit_x
+ */
+
+    beqz     a3, 4f
+     nop
+    SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
+    lw       v0, 36(sp) /* v0 = vx */
+    lw       v1, 40(sp) /* v1 = unit_x */
+    li       t4, 0xf800f800
+    li       t5, 0x07e007e0
+    li       t6, 0x001F001F
+    li       t7, 0x00ff00ff
+
+    addiu    t1, a3, -1
+    beqz     t1, 2f
+     nop
+1:
+    sra      t0, v0, 16 /* t0 = vx >> 16 */
+    sll      t0, t0, 1  /* t0 = t0 * 2      (r5g6b5) */
+    addu     t0, a1, t0
+    lhu      t0, 0(t0)  /* t0 = source      (r5g6b5) */
+    addu     v0, v0, v1 /* v0 = vx + unit_x */
+    sra      t1, v0, 16 /* t1 = vx >> 16 */
+    sll      t1, t1, 1  /* t1 = t1 * 2      (r5g6b5) */
+    addu     t1, a1, t1
+    lhu      t1, 0(t1)  /* t1 = source      (r5g6b5) */
+    addu     v0, v0, v1 /* v0 = vx + unit_x */
+    lbu      t2, 0(a2)  /* t2 = mask        (a8) */
+    lbu      t3, 1(a2)  /* t3 = mask        (a8) */
+    lhu      t8, 0(a0)  /* t8 = destination (r5g6b5) */
+    lhu      t9, 2(a0)  /* t9 = destination (r5g6b5) */
+    addiu    a2, a2, 2
+
+    CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
+    CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1
+    OVER_2x8888_2x8_2x8888   s0, s1, \
+                             t2, t3, \
+                             s2, s3, \
+                             t0, t1, \
+                             t7, t8, t9, s4, s5, s0, s1
+    CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3
+
+    sh       s0, 0(a0)
+    sh       s1, 2(a0)
+    addiu    a3, a3, -2
+    addiu    t1, a3, -1
+    bgtz     t1, 1b
+     addiu   a0, a0, 4
+2:
+    beqz     a3, 3f
+     nop
+    sra      t0, v0, 16 /* t0 = vx >> 16 */
+    sll      t0, t0, 1  /* t0 = t0 * 2      (r5g6b5) */
+    addu     t0, a1, t0
+
+    lhu      t0, 0(t0)  /* t0 = source      (r5g6b5) */
+    lbu      t1, 0(a2)  /* t1 = mask        (a8) */
+    lhu      t2, 0(a0)  /* t2 = destination (r5g6b5) */
+
+    CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5
+    CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6
+    OVER_8888_8_8888         t3, t1, t4, t0, t7, t2, t5, t6, t8
+    CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
+
+    sh       t3, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
+4:
+    j        ra
+     nop
+
+END(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
+
 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
 /*
  * a0     - *dst
diff --git a/lib/pixman/pixman/pixman-mips-dspr2-asm.h b/lib/pixman/pixman/pixman-mips-dspr2-asm.h
index b330c0f0d..cab122d80 100644
--- a/lib/pixman/pixman/pixman-mips-dspr2-asm.h
+++ b/lib/pixman/pixman/pixman-mips-dspr2-asm.h
@@ -354,17 +354,16 @@ LEAF_MIPS32R2(symbol)                                   \
                                 out1_565, out2_565,  \
                                 maskR, maskG, maskB, \
                                 scratch1, scratch2
-    precrq.ph.w       \scratch1, \in2_8888, \in1_8888
-    precr_sra.ph.w    \in2_8888, \in1_8888, 0
-    shll.ph           \scratch1, \scratch1, 8
-    srl               \in2_8888, \in2_8888, 3
-    and               \scratch2, \in2_8888, \maskB
-    and               \scratch1, \scratch1, \maskR
-    srl               \in2_8888, \in2_8888, 2
-    and               \out2_565, \in2_8888, \maskG
-    or                \out2_565, \out2_565, \scratch2
-    or                \out1_565, \out2_565, \scratch1
-    srl               \out2_565, \out1_565, 16
+    precr.qb.ph    \scratch1, \in2_8888, \in1_8888
+    precrq.qb.ph   \in2_8888, \in2_8888, \in1_8888
+    and            \out1_565, \scratch1, \maskR
+    shrl.ph        \scratch1, \scratch1, 3
+    shll.ph        \in2_8888, \in2_8888, 3
+    and            \scratch1, \scratch1, \maskB
+    or             \out1_565, \out1_565, \scratch1
+    and            \in2_8888, \in2_8888, \maskG
+    or             \out1_565, \out1_565, \in2_8888
+    srl            \out2_565, \out1_565, 16
 .endm
 
 /*
@@ -587,6 +586,36 @@ LEAF_MIPS32R2(symbol)                                   \
     addu_s.qb          \out_8888, \out_8888, \s_8888
 .endm
 
+/*
+ * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two
+ * a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR
+ * needed for rounding process. maskLSR must have following value:
+ *   li       maskLSR, 0x00ff00ff
+ */
+.macro OVER_2x8888_2x8888 s1_8888,   \
+                          s2_8888,   \
+                          d1_8888,   \
+                          d2_8888,   \
+                          out1_8888, \
+                          out2_8888, \
+                          maskLSR,   \
+                          scratch1, scratch2, scratch3, \
+                          scratch4, scratch5, scratch6
+    not                    \scratch1,  \s1_8888
+    srl                    \scratch1,  \scratch1,  24
+    not                    \scratch2,  \s2_8888
+    srl                    \scratch2,  \scratch2,  24
+    MIPS_2xUN8x4_MUL_2xUN8 \d1_8888,   \d2_8888, \
+                           \scratch1,  \scratch2,  \
+                           \out1_8888, \out2_8888, \
+                           \maskLSR, \
+                           \scratch3,  \scratch4, \scratch5, \
+                           \scratch6,  \d1_8888,  \d2_8888
+
+    addu_s.qb              \out1_8888, \out1_8888, \s1_8888
+    addu_s.qb              \out2_8888, \out2_8888, \s2_8888
+.endm
+
 .macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888,   \
                                     m_8,      \
                                     d_8888,   \
diff --git a/lib/pixman/pixman/pixman-mips-dspr2.c b/lib/pixman/pixman/pixman-mips-dspr2.c
index 9da636d5a..e10c9df0a 100644
--- a/lib/pixman/pixman/pixman-mips-dspr2.c
+++ b/lib/pixman/pixman/pixman-mips-dspr2.c
@@ -48,17 +48,39 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888,
                                     uint32_t, 1, uint32_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888,
                                     uint8_t, 3, uint8_t, 3)
+#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_8888_rev,
+                                    uint8_t, 3, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_0565_rev,
+                                    uint8_t, 3, uint16_t, 1)
+#endif
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_pixbuf_8888,
+                                    uint32_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_rpixbuf_8888,
+                                    uint32_t, 1, uint32_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_8888,
                                     uint32_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_0565,
+                                    uint32_t, 1, uint16_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8_8,
                                     uint8_t, 1, uint8_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8888_8888,
                                     uint32_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_0565,
+                                    uint8_t, 1, uint16_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_8888,
+                                    uint8_t,  1, uint32_t, 1)
 
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8888,
+                                       uint8_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8,
+                                       uint8_t, 1, uint8_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca,
                                        uint32_t, 1, uint32_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca,
                                        uint32_t, 1, uint16_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8,
+                                       uint8_t, 1, uint8_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
                                        uint8_t, 1, uint32_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
@@ -77,6 +99,15 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_0565_n_0565,
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, add_8888_n_8888,
                                       uint32_t, 1, uint32_t, 1)
 
+PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_0565,
+                                  uint16_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_8888,
+                                  uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_reverse_n_8888,
+                                  uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_DST (0, in_n_8,
+                                  uint8_t, 1)
+
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8_8_8, uint8_t,  1,
                                          uint8_t,  1, uint8_t,  1)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8_8888, uint32_t, 1,
@@ -94,6 +125,13 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_0565_8_0565, uint16_t, 1,
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8888_8888, uint32_t, 1,
                                          uint32_t, 1, uint32_t, 1)
 
+PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_8888, OVER,
+                                         uint32_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_0565, OVER,
+                                         uint32_t, uint16_t)
+PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (0565_8888, SRC,
+                                         uint16_t, uint32_t)
+
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC,
                                           uint32_t, uint32_t)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC,
@@ -107,6 +145,11 @@ PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, OVER,
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, ADD,
                                           uint32_t, uint32_t)
 
+PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_0565,
+                                            OVER, uint32_t, uint16_t)
+PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, 0565_8_0565,
+                                            OVER, uint16_t, uint16_t)
+
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_8888, SRC,
                                              uint32_t, uint32_t)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_0565, SRC,
@@ -256,6 +299,19 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888),
     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888),
     PIXMAN_STD_FAST_PATH (SRC, r8g8b8,   null, r8g8b8,   mips_composite_src_0888_0888),
+#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+    PIXMAN_STD_FAST_PATH (SRC, b8g8r8,   null, x8r8g8b8, mips_composite_src_0888_8888_rev),
+    PIXMAN_STD_FAST_PATH (SRC, b8g8r8,   null, r5g6b5,   mips_composite_src_0888_0565_rev),
+#endif
+    PIXMAN_STD_FAST_PATH (SRC, pixbuf,   pixbuf,  a8r8g8b8, mips_composite_src_pixbuf_8888),
+    PIXMAN_STD_FAST_PATH (SRC, pixbuf,   pixbuf,  a8b8g8r8, mips_composite_src_rpixbuf_8888),
+    PIXMAN_STD_FAST_PATH (SRC, rpixbuf,  rpixbuf, a8r8g8b8, mips_composite_src_rpixbuf_8888),
+    PIXMAN_STD_FAST_PATH (SRC, rpixbuf,  rpixbuf, a8b8g8r8, mips_composite_src_pixbuf_8888),
+    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   a8r8g8b8, mips_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   x8r8g8b8, mips_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   a8b8g8r8, mips_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   x8b8g8r8, mips_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   a8,       mips_composite_src_n_8_8),
 
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, mips_composite_over_n_8888_8888_ca),
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, mips_composite_over_n_8888_8888_ca),
@@ -263,13 +319,16 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mips_composite_over_n_8888_8888_ca),
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5,   mips_composite_over_n_8888_0565_ca),
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5,   mips_composite_over_n_8888_0565_ca),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8,       mips_composite_over_n_8_8),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8r8g8b8, mips_composite_over_n_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8r8g8b8, mips_composite_over_n_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8b8g8r8, mips_composite_over_n_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8b8g8r8, mips_composite_over_n_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   mips_composite_over_n_8_0565),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   mips_composite_over_n_8_0565),
-
+    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     r5g6b5,   mips_composite_over_n_0565),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     a8r8g8b8, mips_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     x8r8g8b8, mips_composite_over_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    a8r8g8b8, mips_composite_over_8888_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    x8r8g8b8, mips_composite_over_8888_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    r5g6b5,   mips_composite_over_8888_n_0565),
@@ -289,6 +348,8 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     x8r8g8b8, mips_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     a8b8g8r8, mips_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     x8b8g8r8, mips_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     r5g6b5,   mips_composite_over_8888_0565),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     b5g6r5,   mips_composite_over_8888_0565),
     PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8,       mips_composite_add_n_8_8),
     PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8r8g8b8, mips_composite_add_n_8_8888),
     PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8b8g8r8, mips_composite_add_n_8_8888),
@@ -303,6 +364,35 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (ADD,  a8,       null,     a8,       mips_composite_add_8_8),
     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, mips_composite_add_8888_8888),
     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     a8b8g8r8, mips_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8,    null, r5g6b5,   mips_composite_out_reverse_8_0565),
+    PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8,    null, b5g6r5,   mips_composite_out_reverse_8_0565),
+    PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8,    null, a8r8g8b8, mips_composite_out_reverse_8_8888),
+    PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8,    null, a8b8g8r8, mips_composite_out_reverse_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, mips_composite_over_reverse_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, mips_composite_over_reverse_n_8888),
+    PIXMAN_STD_FAST_PATH (IN,           solid, null, a8,       mips_composite_in_n_8),
+
+    PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888),
+    PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mips_8888_8888),
+    PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888),
+    PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mips_8888_8888),
+
+    PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_0565),
+    PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_0565),
+
+    PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, mips_0565_8888),
+    PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888),
+    /* Note: NONE repeat is not supported yet */
+    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, mips_0565_8888),
+    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, mips_0565_8888),
+    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, mips_0565_8888),
+    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, mips_0565_8888),
+
+    PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565),
+    PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565),
+
+    PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, mips_0565_8_0565),
+    PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, mips_0565_8_0565),
 
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888),
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888),
diff --git a/lib/pixman/pixman/pixman-mips-dspr2.h b/lib/pixman/pixman/pixman-mips-dspr2.h
index bddcfd827..955ed70b8 100644
--- a/lib/pixman/pixman/pixman-mips-dspr2.h
+++ b/lib/pixman/pixman/pixman-mips-dspr2.h
@@ -85,6 +85,42 @@ mips_composite_##name (pixman_implementation_t *imp,             \
     }                                                            \
 }
 
+/****************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_N_DST(flags, name,            \
+                                         dst_type, dst_cnt)      \
+void                                                             \
+pixman_composite_##name##_asm_mips (dst_type *dst,               \
+                                    uint32_t  src,               \
+                                    int32_t   w);                \
+                                                                 \
+static void                                                      \
+mips_composite_##name (pixman_implementation_t *imp,             \
+                       pixman_composite_info_t *info)            \
+{                                                                \
+    PIXMAN_COMPOSITE_ARGS (info);                                \
+    dst_type  *dst_line, *dst;                                   \
+    int32_t    dst_stride;                                       \
+    uint32_t   src;                                              \
+                                                                 \
+    src = _pixman_image_get_solid (                              \
+    imp, src_image, dest_image->bits.format);                    \
+                                                                 \
+    if ((flags & SKIP_ZERO_SRC) && src == 0)                     \
+        return;                                                  \
+                                                                 \
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
+                           dst_stride, dst_line, dst_cnt);       \
+                                                                 \
+    while (height--)                                             \
+    {                                                            \
+        dst = dst_line;                                          \
+        dst_line += dst_stride;                                  \
+                                                                 \
+        pixman_composite_##name##_asm_mips (dst, src, width);    \
+    }                                                            \
+}
+
 /*******************************************************************/
 
 #define PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST(flags, name,          \
@@ -212,6 +248,94 @@ mips_composite_##name (pixman_implementation_t *imp,                     \
 
 /****************************************************************************/
 
+#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST(name, op,                    \
+                                                src_type, dst_type)          \
+void                                                                         \
+pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (                    \
+                                                   dst_type *       dst,     \
+                                                   const src_type * src,     \
+                                                   int32_t          w,       \
+                                                   pixman_fixed_t   vx,      \
+                                                   pixman_fixed_t   unit_x); \
+                                                                             \
+static force_inline void                                                     \
+scaled_nearest_scanline_mips_##name##_##op (dst_type *       pd,             \
+                                            const src_type * ps,             \
+                                            int32_t          w,              \
+                                            pixman_fixed_t   vx,             \
+                                            pixman_fixed_t   unit_x,         \
+                                            pixman_fixed_t   max_vx,         \
+                                            pixman_bool_t    zero_src)       \
+{                                                                            \
+    pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, w,      \
+                                                             vx, unit_x);    \
+}                                                                            \
+                                                                             \
+FAST_NEAREST_MAINLOOP (mips_##name##_cover_##op,                             \
+                       scaled_nearest_scanline_mips_##name##_##op,           \
+                       src_type, dst_type, COVER)                            \
+FAST_NEAREST_MAINLOOP (mips_##name##_none_##op,                              \
+                       scaled_nearest_scanline_mips_##name##_##op,           \
+                       src_type, dst_type, NONE)                             \
+FAST_NEAREST_MAINLOOP (mips_##name##_pad_##op,                               \
+                       scaled_nearest_scanline_mips_##name##_##op,           \
+                       src_type, dst_type, PAD)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                    \
+    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),                            \
+    SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),                             \
+    SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
+
+
+/*****************************************************************************/
+
+#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST(flags, name, op,           \
+                                                  src_type, dst_type)         \
+void                                                                          \
+pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (                     \
+                                                   dst_type *       dst,      \
+                                                   const src_type * src,      \
+                                                   const uint8_t *  mask,     \
+                                                   int32_t          w,        \
+                                                   pixman_fixed_t   vx,       \
+                                                   pixman_fixed_t   unit_x);  \
+                                                                              \
+static force_inline void                                                      \
+scaled_nearest_scanline_mips_##name##_##op (const uint8_t *  mask,            \
+                                            dst_type *       pd,              \
+                                            const src_type * ps,              \
+                                            int32_t          w,               \
+                                            pixman_fixed_t   vx,              \
+                                            pixman_fixed_t   unit_x,          \
+                                            pixman_fixed_t   max_vx,          \
+                                            pixman_bool_t    zero_src)        \
+{                                                                             \
+    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
+        return;                                                               \
+    pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps,          \
+                                                             mask, w,         \
+                                                             vx, unit_x);     \
+}                                                                             \
+                                                                              \
+FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_cover_##op,                       \
+                              scaled_nearest_scanline_mips_##name##_##op,     \
+                              src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\
+FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_none_##op,                        \
+                              scaled_nearest_scanline_mips_##name##_##op,     \
+                              src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
+FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_pad_##op,                         \
+                              scaled_nearest_scanline_mips_##name##_##op,     \
+                              src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)             \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),                     \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),                      \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+
+/****************************************************************************/
+
 #define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op,            \
                                                  src_type, dst_type)         \
 void                                                                         \
diff --git a/lib/pixman/pixman/pixman-mmx.c b/lib/pixman/pixman/pixman-mmx.c
index 1e6dbe8aa..14790c029 100644
--- a/lib/pixman/pixman/pixman-mmx.c
+++ b/lib/pixman/pixman/pixman-mmx.c
@@ -44,8 +44,6 @@
 #include "pixman-combine32.h"
 #include "pixman-inlines.h"
 
-#define no_vERBOSE
-
 #ifdef VERBOSE
 #define CHECKPOINT() error_f ("at %s %d\n", __FUNCTION__, __LINE__)
 #else
@@ -62,7 +60,7 @@ _mm_empty (void)
 #endif
 
 #ifdef USE_X86_MMX
-# if (defined(__SUNPRO_C) || defined(_MSC_VER))
+# if (defined(__SUNPRO_C) || defined(_MSC_VER) || defined(_WIN64))
 #  include <xmmintrin.h>
 # else
 /* We have to compile with -msse to use xmmintrin.h, but that causes SSE
@@ -1402,7 +1400,7 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp,
 
 	CHECKPOINT ();
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    store8888 (dst, over (vsrc, vsrca, load8888 (dst)));
 
@@ -1468,7 +1466,7 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp,
 
 	CHECKPOINT ();
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    uint64_t d = *dst;
 	    __m64 vdest = expand565 (to_m64 (d), 0);
@@ -1546,7 +1544,7 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
 	uint32_t *p = (uint32_t *)mask_line;
 	uint32_t *q = (uint32_t *)dst_line;
 
-	while (twidth && (unsigned long)q & 7)
+	while (twidth && (uintptr_t)q & 7)
 	{
 	    uint32_t m = *(uint32_t *)p;
 
@@ -1637,7 +1635,7 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    __m64 s = load8888 (src);
 	    __m64 d = load8888 (dst);
@@ -1707,7 +1705,7 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    uint32_t ssrc = *src | 0xff000000;
 	    __m64 s = load8888 (&ssrc);
@@ -1881,7 +1879,7 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
 
 	CHECKPOINT ();
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    __m64 vsrc = load8888 (src);
 	    uint64_t d = *dst;
@@ -1984,7 +1982,7 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
 
 	CHECKPOINT ();
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    uint64_t m = *mask;
 
@@ -2064,7 +2062,7 @@ mmx_fill (pixman_implementation_t *imp,
           int                      y,
           int                      width,
           int                      height,
-          uint32_t		   xor)
+          uint32_t		   filler)
 {
     uint64_t fill;
     __m64 vfill;
@@ -2084,7 +2082,7 @@ mmx_fill (pixman_implementation_t *imp,
 	byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
 	byte_width = width;
 	stride *= 1;
-        xor = (xor & 0xff) * 0x01010101;
+        filler = (filler & 0xff) * 0x01010101;
     }
     else if (bpp == 16)
     {
@@ -2092,7 +2090,7 @@ mmx_fill (pixman_implementation_t *imp,
 	byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
 	byte_width = 2 * width;
 	stride *= 2;
-        xor = (xor & 0xffff) * 0x00010001;
+        filler = (filler & 0xffff) * 0x00010001;
     }
     else
     {
@@ -2102,7 +2100,7 @@ mmx_fill (pixman_implementation_t *imp,
 	stride *= 4;
     }
 
-    fill = ((uint64_t)xor << 32) | xor;
+    fill = ((uint64_t)filler << 32) | filler;
     vfill = to_m64 (fill);
 
 #if defined __GNUC__ && defined USE_X86_MMX
@@ -2127,23 +2125,23 @@ mmx_fill (pixman_implementation_t *imp,
 	byte_line += stride;
 	w = byte_width;
 
-	if (w >= 1 && ((unsigned long)d & 1))
+	if (w >= 1 && ((uintptr_t)d & 1))
 	{
-	    *(uint8_t *)d = (xor & 0xff);
+	    *(uint8_t *)d = (filler & 0xff);
 	    w--;
 	    d++;
 	}
 
-	if (w >= 2 && ((unsigned long)d & 3))
+	if (w >= 2 && ((uintptr_t)d & 3))
 	{
-	    *(uint16_t *)d = xor;
+	    *(uint16_t *)d = filler;
 	    w -= 2;
 	    d += 2;
 	}
 
-	while (w >= 4 && ((unsigned long)d & 7))
+	while (w >= 4 && ((uintptr_t)d & 7))
 	{
-	    *(uint32_t *)d = xor;
+	    *(uint32_t *)d = filler;
 
 	    w -= 4;
 	    d += 4;
@@ -2182,20 +2180,20 @@ mmx_fill (pixman_implementation_t *imp,
 
 	while (w >= 4)
 	{
-	    *(uint32_t *)d = xor;
+	    *(uint32_t *)d = filler;
 
 	    w -= 4;
 	    d += 4;
 	}
 	if (w >= 2)
 	{
-	    *(uint16_t *)d = xor;
+	    *(uint16_t *)d = filler;
 	    w -= 2;
 	    d += 2;
 	}
 	if (w >= 1)
 	{
-	    *(uint8_t *)d = (xor & 0xff);
+	    *(uint8_t *)d = (filler & 0xff);
 	    w--;
 	    d++;
 	}
@@ -2227,10 +2225,10 @@ mmx_composite_src_x888_0565 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    s = *src++;
-	    *dst = CONVERT_8888_TO_0565 (s);
+	    *dst = convert_8888_to_0565 (s);
 	    dst++;
 	    w--;
 	}
@@ -2253,7 +2251,7 @@ mmx_composite_src_x888_0565 (pixman_implementation_t *imp,
 	while (w)
 	{
 	    s = *src++;
-	    *dst = CONVERT_8888_TO_0565 (s);
+	    *dst = convert_8888_to_0565 (s);
 	    dst++;
 	    w--;
 	}
@@ -2305,7 +2303,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
 
 	CHECKPOINT ();
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    uint64_t m = *mask;
 
@@ -2419,7 +2417,7 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
 
 	CHECKPOINT ();
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    uint64_t m = *mask;
 
@@ -2536,7 +2534,7 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
 
 	CHECKPOINT ();
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    __m64 vsrc = load8888 (src);
 	    uint64_t d = *dst;
@@ -2651,7 +2649,7 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    __m64 s = load8888 (src);
 	    __m64 d = load8888 (dst);
@@ -2739,7 +2737,7 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 	uint32_t *p = (uint32_t *)mask_line;
 	uint16_t *q = (uint16_t *)dst_line;
 
-	while (twidth && ((unsigned long)q & 7))
+	while (twidth && ((uintptr_t)q & 7))
 	{
 	    uint32_t m = *(uint32_t *)p;
 
@@ -2840,7 +2838,7 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    uint16_t tmp;
 	    uint8_t a;
@@ -2911,7 +2909,7 @@ mmx_composite_in_8_8 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 3)
+	while (w && (uintptr_t)dst & 3)
 	{
 	    uint8_t s, d;
 	    uint16_t tmp;
@@ -2990,7 +2988,7 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 3)
+	while (w && (uintptr_t)dst & 3)
 	{
 	    uint16_t tmp;
 	    uint16_t a;
@@ -3067,7 +3065,7 @@ mmx_composite_add_8_8 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    s = *src;
 	    d = *dst;
@@ -3130,19 +3128,19 @@ mmx_composite_add_0565_0565 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    s = *src++;
 	    if (s)
 	    {
 		d = *dst;
-		s = CONVERT_0565_TO_8888 (s);
+		s = convert_0565_to_8888 (s);
 		if (d)
 		{
-		    d = CONVERT_0565_TO_8888 (d);
+		    d = convert_0565_to_8888 (d);
 		    UN8x4_ADD_UN8x4 (s, d);
 		}
-		*dst = CONVERT_8888_TO_0565 (s);
+		*dst = convert_8888_to_0565 (s);
 	    }
 	    dst++;
 	    w--;
@@ -3174,13 +3172,13 @@ mmx_composite_add_0565_0565 (pixman_implementation_t *imp,
 	    if (s)
 	    {
 		d = *dst;
-		s = CONVERT_0565_TO_8888 (s);
+		s = convert_0565_to_8888 (s);
 		if (d)
 		{
-		    d = CONVERT_0565_TO_8888 (d);
+		    d = convert_0565_to_8888 (d);
 		    UN8x4_ADD_UN8x4 (s, d);
 		}
-		*dst = CONVERT_8888_TO_0565 (s);
+		*dst = convert_8888_to_0565 (s);
 	    }
 	    dst++;
 	}
@@ -3212,7 +3210,7 @@ mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    store (dst, _mm_adds_pu8 (load ((const uint32_t *)src),
 	                              load ((const uint32_t *)dst)));
@@ -3296,7 +3294,7 @@ mmx_blt (pixman_implementation_t *imp,
 	dst_bytes += dst_stride;
 	w = byte_width;
 
-	if (w >= 1 && ((unsigned long)d & 1))
+	if (w >= 1 && ((uintptr_t)d & 1))
 	{
 	    *(uint8_t *)d = *(uint8_t *)s;
 	    w -= 1;
@@ -3304,7 +3302,7 @@ mmx_blt (pixman_implementation_t *imp,
 	    d += 1;
 	}
 
-	if (w >= 2 && ((unsigned long)d & 3))
+	if (w >= 2 && ((uintptr_t)d & 3))
 	{
 	    *(uint16_t *)d = *(uint16_t *)s;
 	    w -= 2;
@@ -3312,7 +3310,7 @@ mmx_blt (pixman_implementation_t *imp,
 	    d += 2;
 	}
 
-	while (w >= 4 && ((unsigned long)d & 7))
+	while (w >= 4 && ((uintptr_t)d & 7))
 	{
 	    *(uint32_t *)d = ldl_u ((uint32_t *)s);
 
@@ -3495,7 +3493,7 @@ mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
 
 	CHECKPOINT ();
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    __m64 vdest = load8888 (dst);
 
@@ -3778,7 +3776,7 @@ mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
 
     iter->bits += iter->stride;
 
-    while (w && ((unsigned long)dst) & 7)
+    while (w && ((uintptr_t)dst) & 7)
     {
 	*dst++ = (*src++) | 0xff000000;
 	w--;
@@ -3820,11 +3818,11 @@ mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
 
     iter->bits += iter->stride;
 
-    while (w && ((unsigned long)dst) & 0x0f)
+    while (w && ((uintptr_t)dst) & 0x0f)
     {
 	uint16_t s = *src++;
 
-	*dst++ = CONVERT_0565_TO_8888 (s);
+	*dst++ = convert_0565_to_8888 (s);
 	w--;
     }
 
@@ -3847,7 +3845,7 @@ mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
     {
 	uint16_t s = *src++;
 
-	*dst++ = CONVERT_0565_TO_8888 (s);
+	*dst++ = convert_0565_to_8888 (s);
 	w--;
     }
 
@@ -3864,7 +3862,7 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
 
     iter->bits += iter->stride;
 
-    while (w && (((unsigned long)dst) & 15))
+    while (w && (((uintptr_t)dst) & 15))
     {
         *dst++ = *(src++) << 24;
         w--;
diff --git a/lib/pixman/pixman/pixman-noop.c b/lib/pixman/pixman/pixman-noop.c
index 850caa192..e39996d9d 100644
--- a/lib/pixman/pixman/pixman-noop.c
+++ b/lib/pixman/pixman/pixman-noop.c
@@ -77,25 +77,33 @@ noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
 	iter->get_scanline = _pixman_iter_get_scanline_noop;
     }
     else if (image->common.extended_format_code == PIXMAN_solid		&&
-	     ((iter->image_flags & (FAST_PATH_BITS_IMAGE | FAST_PATH_NO_ALPHA_MAP)) ==
-	      (FAST_PATH_BITS_IMAGE | FAST_PATH_NO_ALPHA_MAP)))
+	     (iter->image->type == SOLID ||
+	      (iter->image_flags & FAST_PATH_NO_ALPHA_MAP)))
     {
-	bits_image_t *bits = &image->bits;
-
 	if (iter->iter_flags & ITER_NARROW)
 	{
-	    uint32_t color = bits->fetch_pixel_32 (bits, 0, 0);
 	    uint32_t *buffer = iter->buffer;
 	    uint32_t *end = buffer + iter->width;
+	    uint32_t color;
+
+	    if (image->type == SOLID)
+		color = image->solid.color_32;
+	    else
+		color = image->bits.fetch_pixel_32 (&image->bits, 0, 0);
 
 	    while (buffer < end)
 		*(buffer++) = color;
 	}
 	else
 	{
-	    argb_t color = bits->fetch_pixel_float (bits, 0, 0);
 	    argb_t *buffer = (argb_t *)iter->buffer;
 	    argb_t *end = buffer + iter->width;
+	    argb_t color;
+
+	    if (image->type == SOLID)
+		color = image->solid.color_float;
+	    else
+		color = image->bits.fetch_pixel_float (&image->bits, 0, 0);
 
 	    while (buffer < end)
 		*(buffer++) = color;
diff --git a/lib/pixman/pixman/pixman-ppc.c b/lib/pixman/pixman/pixman-ppc.c
index 601f1df12..a6e7bb0cf 100644
--- a/lib/pixman/pixman/pixman-ppc.c
+++ b/lib/pixman/pixman/pixman-ppc.c
@@ -37,10 +37,10 @@
 static pixman_bool_t
 pixman_have_vmx (void)
 {
-    int error, have_mmx;
+    int error, have_vmx;
     size_t length = sizeof(have_vmx);
 
-    sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0);
+    error = sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0);
 
     if (error)
 	return FALSE;
@@ -56,8 +56,8 @@ pixman_have_vmx (void)
 static pixman_bool_t
 pixman_have_vmx (void)
 {
-    int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC };
     int error, have_vmx;
+    int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC };
     size_t length = sizeof(have_vmx);
 
     error = sysctl (mib, 2, &have_vmx, &length, NULL, 0);
diff --git a/lib/pixman/pixman/pixman-private.h b/lib/pixman/pixman/pixman-private.h
index c0a6bc0a5..6d9c05321 100644
--- a/lib/pixman/pixman/pixman-private.h
+++ b/lib/pixman/pixman/pixman-private.h
@@ -1,3 +1,5 @@
+#include <float.h>
+
 #ifndef PIXMAN_PRIVATE_H
 #define PIXMAN_PRIVATE_H
 
@@ -263,9 +265,6 @@ void
 _pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter);
 
 void
-_pixman_solid_fill_iter_init (pixman_image_t *image, pixman_iter_t  *iter);
-
-void
 _pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t  *iter);
 
 void
@@ -320,13 +319,12 @@ _pixman_image_validate (pixman_image_t *image);
  */
 typedef struct
 {
-    uint32_t                left_ag;
-    uint32_t                left_rb;
-    uint32_t                right_ag;
-    uint32_t                right_rb;
+    float		    a_s, a_b;
+    float		    r_s, r_b;
+    float		    g_s, g_b;
+    float		    b_s, b_b;
     pixman_fixed_t	    left_x;
     pixman_fixed_t          right_x;
-    pixman_fixed_t          stepper;
 
     pixman_gradient_stop_t *stops;
     int                     num_stops;
@@ -455,7 +453,7 @@ typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp,
 					     int                      y,
 					     int                      width,
 					     int                      height,
-					     uint32_t                 xor);
+					     uint32_t                 filler);
 typedef pixman_bool_t (*pixman_iter_init_func_t) (pixman_implementation_t *imp,
 						  pixman_iter_t           *iter);
 
@@ -500,7 +498,7 @@ pixman_implementation_t *
 _pixman_implementation_create (pixman_implementation_t *fallback,
 			       const pixman_fast_path_t *fast_paths);
 
-pixman_bool_t
+void
 _pixman_implementation_lookup_composite (pixman_implementation_t  *toplevel,
 					 pixman_op_t               op,
 					 pixman_format_code_t      src_format,
@@ -542,7 +540,7 @@ _pixman_implementation_fill (pixman_implementation_t *imp,
                              int                      y,
                              int                      width,
                              int                      height,
-                             uint32_t                 xor);
+                             uint32_t                 filler);
 
 pixman_bool_t
 _pixman_implementation_src_iter_init (pixman_implementation_t       *imp,
@@ -687,6 +685,7 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
 #define FAST_PATH_SAMPLES_COVER_CLIP_NEAREST	(1 << 23)
 #define FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR	(1 << 24)
 #define FAST_PATH_BITS_IMAGE			(1 << 25)
+#define FAST_PATH_SEPARABLE_CONVOLUTION_FILTER  (1 << 26)
 
 #define FAST_PATH_PAD_REPEAT						\
     (FAST_PATH_NO_NONE_REPEAT		|				\
@@ -881,24 +880,55 @@ pixman_list_move_to_front (pixman_list_t *list, pixman_link_t *link)
 
 #define CLIP(v, low, high) ((v) < (low) ? (low) : ((v) > (high) ? (high) : (v)))
 
+#define FLOAT_IS_ZERO(f)     (-FLT_MIN < (f) && (f) < FLT_MIN)
+
 /* Conversion between 8888 and 0565 */
 
-#define CONVERT_8888_TO_0565(s)						\
-    ((((s) >> 3) & 0x001f) |						\
-     (((s) >> 5) & 0x07e0) |						\
-     (((s) >> 8) & 0xf800))
+static force_inline uint16_t
+convert_8888_to_0565 (uint32_t s)
+{
+    /* The following code can be compiled into just 4 instructions on ARM */
+    uint32_t a, b;
+    a = (s >> 3) & 0x1F001F;
+    b = s & 0xFC00;
+    a |= a >> 5;
+    a |= b >> 5;
+    return (uint16_t)a;
+}
 
-#define CONVERT_0565_TO_0888(s)						\
-    (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) |			\
-     ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) |			\
-     ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000)))
+static force_inline uint32_t
+convert_0565_to_0888 (uint16_t s)
+{
+    return (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) |
+            ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) |
+            ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000)));
+}
 
-#define CONVERT_0565_TO_8888(s) (CONVERT_0565_TO_0888(s) | 0xff000000)
+static force_inline uint32_t
+convert_0565_to_8888 (uint16_t s)
+{
+    return convert_0565_to_0888 (s) | 0xff000000;
+}
 
 /* Trivial versions that are useful in macros */
-#define CONVERT_8888_TO_8888(s) (s)
-#define CONVERT_x888_TO_8888(s) ((s) | 0xff000000)
-#define CONVERT_0565_TO_0565(s) (s)
+
+static force_inline uint32_t
+convert_8888_to_8888 (uint32_t s)
+{
+    return s;
+}
+
+static force_inline uint32_t
+convert_x888_to_8888 (uint32_t s)
+{
+    return s | 0xff000000;
+}
+
+static force_inline uint16_t
+convert_0565_to_0565 (uint16_t s)
+{
+    return s;
+}
 
 #define PIXMAN_FORMAT_IS_WIDE(f)					\
     (PIXMAN_FORMAT_A (f) > 8 ||						\
@@ -987,15 +1017,13 @@ float pixman_unorm_to_float (uint16_t u, int n_bits);
 
 #endif
 
-#ifdef DEBUG
-
 void
 _pixman_log_error (const char *function, const char *message);
 
 #define return_if_fail(expr)                                            \
     do                                                                  \
     {                                                                   \
-	if (!(expr))							\
+	if (unlikely (!(expr)))                                         \
 	{								\
 	    _pixman_log_error (FUNC, "The expression " # expr " was false"); \
 	    return;							\
@@ -1006,7 +1034,7 @@ _pixman_log_error (const char *function, const char *message);
 #define return_val_if_fail(expr, retval)                                \
     do                                                                  \
     {                                                                   \
-	if (!(expr))                                                    \
+	if (unlikely (!(expr)))                                         \
 	{								\
 	    _pixman_log_error (FUNC, "The expression " # expr " was false"); \
 	    return (retval);						\
@@ -1017,38 +1045,31 @@ _pixman_log_error (const char *function, const char *message);
 #define critical_if_fail(expr)						\
     do									\
     {									\
-	if (!(expr))							\
+	if (unlikely (!(expr)))                                         \
 	    _pixman_log_error (FUNC, "The expression " # expr " was false"); \
     }									\
     while (0)
 
+/*
+ * Matrix
+ */
 
-#else
-
-#define _pixman_log_error(f,m) do { } while (0)				\
+typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t;
 
-#define return_if_fail(expr)						\
-    do                                                                  \
-    {                                                                   \
-	if (!(expr))							\
-	    return;							\
-    }                                                                   \
-    while (0)
+pixman_bool_t
+pixman_transform_point_31_16 (const pixman_transform_t    *t,
+                              const pixman_vector_48_16_t *v,
+                              pixman_vector_48_16_t       *result);
 
-#define return_val_if_fail(expr, retval)                                \
-    do                                                                  \
-    {                                                                   \
-	if (!(expr))							\
-	    return (retval);						\
-    }                                                                   \
-    while (0)
+void
+pixman_transform_point_31_16_3d (const pixman_transform_t    *t,
+                                 const pixman_vector_48_16_t *v,
+                                 pixman_vector_48_16_t       *result);
 
-#define critical_if_fail(expr)						\
-    do									\
-    {									\
-    }									\
-    while (0)
-#endif
+void
+pixman_transform_point_31_16_affine (const pixman_transform_t    *t,
+                                     const pixman_vector_48_16_t *v,
+                                     pixman_vector_48_16_t       *result);
 
 /*
  * Timers
diff --git a/lib/pixman/pixman/pixman-radial-gradient.c b/lib/pixman/pixman/pixman-radial-gradient.c
index 8d562468d..6a217963d 100644
--- a/lib/pixman/pixman/pixman-radial-gradient.c
+++ b/lib/pixman/pixman/pixman-radial-gradient.c
@@ -109,7 +109,7 @@ radial_compute_color (double                    a,
 	}
 	else
 	{
-	    if (t * dr > mindr)
+	    if (t * dr >= mindr)
 		return _pixman_gradient_walker_pixel (walker, t);
 	}
 
@@ -145,9 +145,9 @@ radial_compute_color (double                    a,
 	}
 	else
 	{
-	    if (t0 * dr > mindr)
+	    if (t0 * dr >= mindr)
 		return _pixman_gradient_walker_pixel (walker, t0);
-	    else if (t1 * dr > mindr)
+	    else if (t1 * dr >= mindr)
 		return _pixman_gradient_walker_pixel (walker, t1);
 	}
     }
diff --git a/lib/pixman/pixman/pixman-region.c b/lib/pixman/pixman/pixman-region.c
index 9d2a60b64..59bc9c797 100644
--- a/lib/pixman/pixman/pixman-region.c
+++ b/lib/pixman/pixman/pixman-region.c
@@ -42,7 +42,7 @@
  * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
  * SOFTWARE.
  *
- * Copyright � 1998 Keith Packard
+ * Copyright © 1998 Keith Packard
  *
  * Permission to use, copy, modify, distribute, and sell this software and its
  * documentation for any purpose is hereby granted without fee, provided that
@@ -202,7 +202,7 @@ PIXREGION_SZOF (size_t n)
     return size + sizeof(region_data_type_t);
 }
 
-static void *
+static region_data_type_t *
 alloc_data (size_t n)
 {
     size_t sz = PIXREGION_SZOF (n);
@@ -1858,7 +1858,7 @@ pixman_region_subtract_o (region_type_t * region,
         else if (r2->x1 <= x1)
         {
             /*
-	     * Subtrahend preceeds minuend: nuke left edge of minuend.
+	     * Subtrahend precedes minuend: nuke left edge of minuend.
 	     */
             x1 = r2->x2;
             if (x1 >= r1->x2)
@@ -1982,7 +1982,7 @@ PREFIX (_subtract) (region_type_t *reg_d,
     }
 
     /* Add those rectangles in region 1 that aren't in region 2,
-       do yucky substraction for overlaps, and
+       do yucky subtraction for overlaps, and
        just throw away rectangles in region 2 that aren't in region 1 */
     if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE))
 	return FALSE;
@@ -2042,7 +2042,7 @@ PREFIX (_inverse) (region_type_t *new_reg,  /* Destination region */
     }
 
     /* Add those rectangles in region 1 that aren't in region 2,
-     * do yucky substraction for overlaps, and
+     * do yucky subtraction for overlaps, and
      * just throw away rectangles in region 2 that aren't in region 1
      */
     inv_reg.extents = *inv_rect;
diff --git a/lib/pixman/pixman/pixman-solid-fill.c b/lib/pixman/pixman/pixman-solid-fill.c
index 60d56d52a..5f9fef630 100644
--- a/lib/pixman/pixman/pixman-solid-fill.c
+++ b/lib/pixman/pixman/pixman-solid-fill.c
@@ -26,31 +26,6 @@
 #endif
 #include "pixman-private.h"
 
-void
-_pixman_solid_fill_iter_init (pixman_image_t *image, pixman_iter_t  *iter)
-{
-    if (iter->iter_flags & ITER_NARROW)
-    {
-	uint32_t *b = (uint32_t *)iter->buffer;
-	uint32_t *e = b + iter->width;
-	uint32_t color = iter->image->solid.color_32;
-
-	while (b < e)
-	    *(b++) = color;
-    }
-    else
-    {
-	argb_t *b = (argb_t *)iter->buffer;
-	argb_t *e = b + iter->width;
-	argb_t color = image->solid.color_float;
-
-	while (b < e)
-	    *(b++) = color;
-    }
-
-    iter->get_scanline = _pixman_iter_get_scanline_noop;
-}
-
 static uint32_t
 color_to_uint32 (const pixman_color_t *color)
 {
diff --git a/lib/pixman/pixman/pixman-sse2.c b/lib/pixman/pixman/pixman-sse2.c
index 27cf60e16..8a82eda7e 100644
--- a/lib/pixman/pixman/pixman-sse2.c
+++ b/lib/pixman/pixman/pixman-sse2.c
@@ -576,7 +576,7 @@ core_combine_over_u_sse2_mask (uint32_t *	  pd,
     uint32_t s, d;
 
     /* Align dst on a 16-byte boundary */
-    while (w && ((unsigned long)pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	d = *pd;
 	s = combine1 (ps, pm);
@@ -661,7 +661,7 @@ core_combine_over_u_sse2_no_mask (uint32_t *	  pd,
     uint32_t s, d;
 
     /* Align dst on a 16-byte boundary */
-    while (w && ((unsigned long)pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	d = *pd;
 	s = *ps;
@@ -753,7 +753,7 @@ sse2_combine_over_reverse_u (pixman_implementation_t *imp,
 
     /* Align dst on a 16-byte boundary */
     while (w &&
-           ((unsigned long)pd & 15))
+           ((uintptr_t)pd & 15))
     {
 	d = *pd;
 	s = combine1 (ps, pm);
@@ -840,7 +840,7 @@ sse2_combine_in_u (pixman_implementation_t *imp,
     __m128i xmm_src_lo, xmm_src_hi;
     __m128i xmm_dst_lo, xmm_dst_hi;
 
-    while (w && ((unsigned long) pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	s = combine1 (ps, pm);
 	d = *pd;
@@ -901,7 +901,7 @@ sse2_combine_in_reverse_u (pixman_implementation_t *imp,
     __m128i xmm_src_lo, xmm_src_hi;
     __m128i xmm_dst_lo, xmm_dst_hi;
 
-    while (w && ((unsigned long) pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	s = combine1 (ps, pm);
 	d = *pd;
@@ -957,7 +957,7 @@ sse2_combine_out_reverse_u (pixman_implementation_t *imp,
                             const uint32_t *         pm,
                             int                      w)
 {
-    while (w && ((unsigned long) pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	uint32_t s = combine1 (ps, pm);
 	uint32_t d = *pd;
@@ -1026,7 +1026,7 @@ sse2_combine_out_u (pixman_implementation_t *imp,
                     const uint32_t *         pm,
                     int                      w)
 {
-    while (w && ((unsigned long) pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	uint32_t s = combine1 (ps, pm);
 	uint32_t d = *pd;
@@ -1113,7 +1113,7 @@ sse2_combine_atop_u (pixman_implementation_t *imp,
     __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
     __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
 
-    while (w && ((unsigned long) pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	s = combine1 (ps, pm);
 	d = *pd;
@@ -1197,7 +1197,7 @@ sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
     __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
     __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
 
-    while (w && ((unsigned long) pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	s = combine1 (ps, pm);
 	d = *pd;
@@ -1285,7 +1285,7 @@ sse2_combine_xor_u (pixman_implementation_t *imp,
     __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
     __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
 
-    while (w && ((unsigned long) pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	s = combine1 (ps, pm);
 	d = *pd;
@@ -1357,7 +1357,7 @@ sse2_combine_add_u (pixman_implementation_t *imp,
     const uint32_t* ps = src;
     const uint32_t* pm = mask;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = combine1 (ps, pm);
 	d = *pd;
@@ -1430,7 +1430,7 @@ sse2_combine_saturate_u (pixman_implementation_t *imp,
     uint32_t pack_cmp;
     __m128i xmm_src, xmm_dst;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = combine1 (ps, pm);
 	d = *pd;
@@ -1518,7 +1518,7 @@ sse2_combine_src_ca (pixman_implementation_t *imp,
     __m128i xmm_mask_lo, xmm_mask_hi;
     __m128i xmm_dst_lo, xmm_dst_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -1586,7 +1586,7 @@ sse2_combine_over_ca (pixman_implementation_t *imp,
     __m128i xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -1662,7 +1662,7 @@ sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
     __m128i xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -1727,7 +1727,7 @@ sse2_combine_in_ca (pixman_implementation_t *imp,
     __m128i xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -1802,7 +1802,7 @@ sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
     __m128i xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -1875,7 +1875,7 @@ sse2_combine_out_ca (pixman_implementation_t *imp,
     __m128i xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -1951,7 +1951,7 @@ sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
     __m128i xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -2048,7 +2048,7 @@ sse2_combine_atop_ca (pixman_implementation_t *imp,
     __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -2141,7 +2141,7 @@ sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
     __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -2237,7 +2237,7 @@ sse2_combine_xor_ca (pixman_implementation_t *imp,
     __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -2313,7 +2313,7 @@ sse2_combine_add_ca (pixman_implementation_t *imp,
     __m128i xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -2414,7 +2414,7 @@ sse2_composite_over_n_8888 (pixman_implementation_t *imp,
 	dst_line += dst_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    d = *dst;
 	    *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
@@ -2483,7 +2483,7 @@ sse2_composite_over_n_0565 (pixman_implementation_t *imp,
 	dst_line += dst_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    d = *dst;
 
@@ -2568,7 +2568,7 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
 	dst_line += dst_stride;
 	mask_line += mask_stride;
 
-	while (w && (unsigned long)pd & 15)
+	while (w && (uintptr_t)pd & 15)
 	{
 	    m = *pm++;
 
@@ -2682,7 +2682,7 @@ sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
 	dst_line += dst_stride;
 	mask_line += mask_stride;
 
-	while (w && (unsigned long)pd & 15)
+	while (w && (uintptr_t)pd & 15)
 	{
 	    m = *pm++;
 
@@ -2786,7 +2786,7 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    uint32_t s = *src++;
 
@@ -2878,10 +2878,10 @@ sse2_composite_src_x888_0565 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    s = *src++;
-	    *dst = CONVERT_8888_TO_0565 (s);
+	    *dst = convert_8888_to_0565 (s);
 	    dst++;
 	    w--;
 	}
@@ -2901,7 +2901,7 @@ sse2_composite_src_x888_0565 (pixman_implementation_t *imp,
 	while (w)
 	{
 	    s = *src++;
-	    *dst = CONVERT_8888_TO_0565 (s);
+	    *dst = convert_8888_to_0565 (s);
 	    dst++;
 	    w--;
 	}
@@ -2932,7 +2932,7 @@ sse2_composite_src_x888_8888 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    *dst++ = *src++ | 0xff000000;
 	    w--;
@@ -2999,7 +2999,7 @@ sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    uint32_t s = (*src++) | 0xff000000;
 	    uint32_t d = *dst;
@@ -3125,7 +3125,7 @@ sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
 
 	/* Align dst on a 16-byte boundary */
 	while (w &&
-	       ((unsigned long)dst & 15))
+	       ((uintptr_t)dst & 15))
 	{
 	    s = *src++;
 	    d = *dst;
@@ -3231,7 +3231,7 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    uint8_t m = *mask++;
 
@@ -3321,7 +3321,7 @@ sse2_fill (pixman_implementation_t *imp,
            int                      y,
            int                      width,
            int                      height,
-           uint32_t		    xor)
+           uint32_t		    filler)
 {
     uint32_t byte_width;
     uint8_t *byte_line;
@@ -3338,9 +3338,9 @@ sse2_fill (pixman_implementation_t *imp,
 	byte_width = width;
 	stride *= 1;
 
-	b = xor & 0xff;
+	b = filler & 0xff;
 	w = (b << 8) | b;
-	xor = (w << 16) | w;
+	filler = (w << 16) | w;
     }
     else if (bpp == 16)
     {
@@ -3349,7 +3349,7 @@ sse2_fill (pixman_implementation_t *imp,
 	byte_width = 2 * width;
 	stride *= 2;
 
-        xor = (xor & 0xffff) * 0x00010001;
+        filler = (filler & 0xffff) * 0x00010001;
     }
     else if (bpp == 32)
     {
@@ -3363,7 +3363,7 @@ sse2_fill (pixman_implementation_t *imp,
 	return FALSE;
     }
 
-    xmm_def = create_mask_2x32_128 (xor, xor);
+    xmm_def = create_mask_2x32_128 (filler, filler);
 
     while (height--)
     {
@@ -3372,23 +3372,23 @@ sse2_fill (pixman_implementation_t *imp,
 	byte_line += stride;
 	w = byte_width;
 
-	if (w >= 1 && ((unsigned long)d & 1))
+	if (w >= 1 && ((uintptr_t)d & 1))
 	{
-	    *(uint8_t *)d = xor;
+	    *(uint8_t *)d = filler;
 	    w -= 1;
 	    d += 1;
 	}
 
-	while (w >= 2 && ((unsigned long)d & 3))
+	while (w >= 2 && ((uintptr_t)d & 3))
 	{
-	    *(uint16_t *)d = xor;
+	    *(uint16_t *)d = filler;
 	    w -= 2;
 	    d += 2;
 	}
 
-	while (w >= 4 && ((unsigned long)d & 15))
+	while (w >= 4 && ((uintptr_t)d & 15))
 	{
-	    *(uint32_t *)d = xor;
+	    *(uint32_t *)d = filler;
 
 	    w -= 4;
 	    d += 4;
@@ -3439,7 +3439,7 @@ sse2_fill (pixman_implementation_t *imp,
 
 	while (w >= 4)
 	{
-	    *(uint32_t *)d = xor;
+	    *(uint32_t *)d = filler;
 
 	    w -= 4;
 	    d += 4;
@@ -3447,14 +3447,14 @@ sse2_fill (pixman_implementation_t *imp,
 
 	if (w >= 2)
 	{
-	    *(uint16_t *)d = xor;
+	    *(uint16_t *)d = filler;
 	    w -= 2;
 	    d += 2;
 	}
 
 	if (w >= 1)
 	{
-	    *(uint8_t *)d = xor;
+	    *(uint8_t *)d = filler;
 	    w -= 1;
 	    d += 1;
 	}
@@ -3505,7 +3505,7 @@ sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    uint8_t m = *mask++;
 
@@ -3621,7 +3621,7 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    m = *mask++;
 
@@ -3745,7 +3745,7 @@ sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    s = *src++;
 	    d = *dst;
@@ -3854,7 +3854,7 @@ sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    s = *src++;
 	    d = *dst;
@@ -3957,7 +3957,7 @@ sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	dst_line += dst_stride;
 
-	while (w && ((unsigned long)dst & 15))
+	while (w && ((uintptr_t)dst & 15))
 	{
 	    m = *(uint32_t *) mask;
 
@@ -4083,7 +4083,7 @@ sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	while (w && ((unsigned long)dst & 15))
+	while (w && ((uintptr_t)dst & 15))
 	{
 	    m = (uint32_t) *mask++;
 	    d = (uint32_t) *dst;
@@ -4176,7 +4176,7 @@ sse2_composite_in_n_8 (pixman_implementation_t *imp,
 	dst_line += dst_stride;
 	w = width;
 
-	while (w && ((unsigned long)dst & 15))
+	while (w && ((uintptr_t)dst & 15))
 	{
 	    d = (uint32_t) *dst;
 
@@ -4245,7 +4245,7 @@ sse2_composite_in_8_8 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && ((unsigned long)dst & 15))
+	while (w && ((uintptr_t)dst & 15))
 	{
 	    s = (uint32_t) *src++;
 	    d = (uint32_t) *dst;
@@ -4322,7 +4322,7 @@ sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	while (w && ((unsigned long)dst & 15))
+	while (w && ((uintptr_t)dst & 15))
 	{
 	    m = (uint32_t) *mask++;
 	    d = (uint32_t) *dst;
@@ -4414,7 +4414,7 @@ sse2_composite_add_n_8 (pixman_implementation_t *imp,
 	dst_line += dst_stride;
 	w = width;
 
-	while (w && ((unsigned long)dst & 15))
+	while (w && ((uintptr_t)dst & 15))
 	{
 	    *dst = (uint8_t)_mm_cvtsi128_si32 (
 		_mm_adds_epu8 (
@@ -4474,7 +4474,7 @@ sse2_composite_add_8_8 (pixman_implementation_t *imp,
 	w = width;
 
 	/* Small head */
-	while (w && (unsigned long)dst & 3)
+	while (w && (uintptr_t)dst & 3)
 	{
 	    t = (*dst) + (*src++);
 	    *dst++ = t | (0 - (t >> 8));
@@ -4523,7 +4523,163 @@ sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
 
 	sse2_combine_add_u (imp, op, dst, src, NULL, width);
     }
+}
+
+static void
+sse2_composite_add_n_8888 (pixman_implementation_t *imp,
+			   pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t *dst_line, *dst, src;
+    int dst_stride;
+
+    __m128i xmm_src;
+
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
 
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+	return;
+
+    if (src == ~0)
+    {
+	pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32,
+		     dest_x, dest_y, width, height, ~0);
+
+	return;
+    }
+
+    xmm_src = _mm_set_epi32 (src, src, src, src);
+    while (height--)
+    {
+	int w = width;
+	uint32_t d;
+
+	dst = dst_line;
+	dst_line += dst_stride;
+
+	while (w && (uintptr_t)dst & 15)
+	{
+	    d = *dst;
+	    *dst++ =
+		_mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d)));
+	    w--;
+	}
+
+	while (w >= 4)
+	{
+	    save_128_aligned
+		((__m128i*)dst,
+		 _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst)));
+
+	    dst += 4;
+	    w -= 4;
+	}
+
+	while (w--)
+	{
+	    d = *dst;
+	    *dst++ =
+		_mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src,
+						  _mm_cvtsi32_si128 (d)));
+	}
+    }
+}
+
+static void
+sse2_composite_add_n_8_8888 (pixman_implementation_t *imp,
+			     pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t     *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    uint32_t src;
+
+    __m128i xmm_src;
+
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+	return;
+    xmm_src = expand_pixel_32_1x128 (src);
+
+    PIXMAN_IMAGE_GET_LINE (
+	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	mask = mask_line;
+	mask_line += mask_stride;
+	w = width;
+
+	while (w && ((uintptr_t)dst & 15))
+	{
+	    uint8_t m = *mask++;
+	    if (m)
+	    {
+		*dst = pack_1x128_32
+		    (_mm_adds_epu16
+		     (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)),
+		      unpack_32_1x128 (*dst)));
+	    }
+	    dst++;
+	    w--;
+	}
+
+	while (w >= 4)
+	{
+	    uint32_t m = *(uint32_t*)mask;
+	    if (m)
+	    {
+		__m128i xmm_mask_lo, xmm_mask_hi;
+		__m128i xmm_dst_lo, xmm_dst_hi;
+
+		__m128i xmm_dst = load_128_aligned ((__m128i*)dst);
+		__m128i xmm_mask =
+		    _mm_unpacklo_epi8 (unpack_32_1x128(m),
+				       _mm_setzero_si128 ());
+
+		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+					&xmm_mask_lo, &xmm_mask_hi);
+
+		pix_multiply_2x128 (&xmm_src, &xmm_src,
+				    &xmm_mask_lo, &xmm_mask_hi,
+				    &xmm_mask_lo, &xmm_mask_hi);
+
+		xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
+		xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
+
+		save_128_aligned (
+		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	    }
+
+	    w -= 4;
+	    dst += 4;
+	    mask += 4;
+	}
+
+	while (w)
+	{
+	    uint8_t m = *mask++;
+	    if (m)
+	    {
+		*dst = pack_1x128_32
+		    (_mm_adds_epu16
+		     (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)),
+		      unpack_32_1x128 (*dst)));
+	    }
+	    dst++;
+	    w--;
+	}
+    }
 }
 
 static pixman_bool_t
@@ -4582,7 +4738,7 @@ sse2_blt (pixman_implementation_t *imp,
 	dst_bytes += dst_stride;
 	w = byte_width;
 
-	while (w >= 2 && ((unsigned long)d & 3))
+	while (w >= 2 && ((uintptr_t)d & 3))
 	{
 	    *(uint16_t *)d = *(uint16_t *)s;
 	    w -= 2;
@@ -4590,7 +4746,7 @@ sse2_blt (pixman_implementation_t *imp,
 	    d += 2;
 	}
 
-	while (w >= 4 && ((unsigned long)d & 15))
+	while (w >= 4 && ((uintptr_t)d & 15))
 	{
 	    *(uint32_t *)d = *(uint32_t *)s;
 
@@ -4697,7 +4853,7 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
 
         w = width;
 
-        while (w && (unsigned long)dst & 15)
+        while (w && (uintptr_t)dst & 15)
         {
             s = 0xff000000 | *src++;
             m = (uint32_t) *mask++;
@@ -4821,7 +4977,7 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
 
         w = width;
 
-        while (w && (unsigned long)dst & 15)
+        while (w && (uintptr_t)dst & 15)
         {
 	    uint32_t sa;
 
@@ -4960,7 +5116,7 @@ sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
 	dst_line += dst_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    __m128i vd;
 
@@ -5045,7 +5201,7 @@ sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
 
         w = width;
 
-        while (w && (unsigned long)dst & 15)
+        while (w && (uintptr_t)dst & 15)
         {
 	    uint32_t sa;
 
@@ -5173,7 +5329,7 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t*       pd,
 	return;
 
     /* Align dst on a 16-byte boundary */
-    while (w && ((unsigned long)pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	d = *pd;
 	s = combine1 (ps + pixman_fixed_to_int (vx), pm);
@@ -5291,7 +5447,7 @@ scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
 
     xmm_mask = create_mask_16_128 (*mask >> 24);
 
-    while (w && (unsigned long)dst & 15)
+    while (w && (uintptr_t)dst & 15)
     {
 	uint32_t s = *(src + pixman_fixed_to_int (vx));
 	vx += unit_x;
@@ -5398,19 +5554,27 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,
 			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,
 			      uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)
 
-#define BMSK ((1 << BILINEAR_INTERPOLATION_BITS) - 1)
-
-#define BILINEAR_DECLARE_VARIABLES						\
+#if BILINEAR_INTERPOLATION_BITS < 8
+# define BILINEAR_DECLARE_VARIABLES						\
     const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt);	\
     const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb);	\
-    const __m128i xmm_xorc8 = _mm_set_epi16 (0, 0, 0, 0, BMSK, BMSK, BMSK, BMSK);\
-    const __m128i xmm_addc8 = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1);		\
-    const __m128i xmm_xorc7 = _mm_set_epi16 (0, BMSK, 0, BMSK, 0, BMSK, 0, BMSK);\
-    const __m128i xmm_addc7 = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1);		\
+    const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1);		\
+    const __m128i xmm_ux = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x,	\
+					  unit_x, -unit_x, unit_x, -unit_x);	\
+    const __m128i xmm_zero = _mm_setzero_si128 ();				\
+    __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1),		\
+				   vx, -(vx + 1), vx, -(vx + 1))
+#else
+# define BILINEAR_DECLARE_VARIABLES						\
+    const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt);	\
+    const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb);	\
+    const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1);		\
     const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x,	\
-					  unit_x, unit_x, unit_x, unit_x);	\
+					  -unit_x, -unit_x, -unit_x, -unit_x);	\
     const __m128i xmm_zero = _mm_setzero_si128 ();				\
-    __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, vx, vx, vx, vx)
+    __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx,				\
+				   -(vx + 1), -(vx + 1), -(vx + 1), -(vx + 1))
+#endif
 
 #define BILINEAR_INTERPOLATE_ONE_PIXEL(pix)					\
 do {										\
@@ -5429,8 +5593,8 @@ do {										\
     if (BILINEAR_INTERPOLATION_BITS < 8)					\
     {										\
 	/* calculate horizontal weights */					\
-	xmm_wh = _mm_add_epi16 (xmm_addc7, _mm_xor_si128 (xmm_xorc7,		\
-		   _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS)));	\
+	xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x,		\
+					16 - BILINEAR_INTERPOLATION_BITS));	\
 	xmm_x = _mm_add_epi16 (xmm_x, xmm_ux);					\
 	/* horizontal interpolation */						\
 	a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 (		\
@@ -5439,8 +5603,8 @@ do {										\
     else									\
     {										\
 	/* calculate horizontal weights */					\
-	xmm_wh = _mm_add_epi16 (xmm_addc8, _mm_xor_si128 (xmm_xorc8,		\
-		_mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS)));	\
+	xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x,		\
+					16 - BILINEAR_INTERPOLATION_BITS));	\
 	xmm_x = _mm_add_epi16 (xmm_x, xmm_ux);					\
 	/* horizontal interpolation */						\
 	xmm_lo = _mm_mullo_epi16 (a, xmm_wh);					\
@@ -5538,7 +5702,7 @@ scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t *       dst,
     BILINEAR_DECLARE_VARIABLES;
     uint32_t pix1, pix2, pix3, pix4;
 
-    while (w && ((unsigned long)dst & 15))
+    while (w && ((uintptr_t)dst & 15))
     {
 	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
 
@@ -5639,7 +5803,7 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t *       dst,
     uint32_t pix1, pix2, pix3, pix4;
     uint32_t m;
 
-    while (w && ((unsigned long)dst & 15))
+    while (w && ((uintptr_t)dst & 15))
     {
 	uint32_t sa;
 
@@ -5786,6 +5950,121 @@ FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,
 			       uint32_t, uint8_t, uint32_t,
 			       NORMAL, FLAG_HAVE_NON_SOLID_MASK)
 
+static force_inline void
+scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t *       dst,
+						const uint32_t * mask,
+						const uint32_t * src_top,
+						const uint32_t * src_bottom,
+						int32_t          w,
+						int              wt,
+						int              wb,
+						pixman_fixed_t   vx,
+						pixman_fixed_t   unit_x,
+						pixman_fixed_t   max_vx,
+						pixman_bool_t    zero_src)
+{
+    BILINEAR_DECLARE_VARIABLES;
+    uint32_t pix1, pix2, pix3, pix4;
+    __m128i xmm_mask;
+
+    if (zero_src || (*mask >> 24) == 0)
+	return;
+
+    xmm_mask = create_mask_16_128 (*mask >> 24);
+
+    while (w && ((uintptr_t)dst & 15))
+    {
+	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+	if (pix1)
+	{
+		uint32_t d = *dst;
+
+		__m128i ms = unpack_32_1x128 (pix1);
+		__m128i alpha     = expand_alpha_1x128 (ms);
+		__m128i dest      = xmm_mask;
+		__m128i alpha_dst = unpack_32_1x128 (d);
+
+		*dst = pack_1x128_32
+			(in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
+	}
+
+	dst++;
+	w--;
+    }
+
+    while (w >= 4)
+    {
+	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+	BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+	BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
+	BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+
+	if (pix1 | pix2 | pix3 | pix4)
+	{
+	    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+	    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+	    __m128i xmm_alpha_lo, xmm_alpha_hi;
+
+	    xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
+
+	    xmm_dst = load_128_aligned ((__m128i*)dst);
+
+	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+	    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+				&xmm_alpha_lo, &xmm_alpha_hi);
+
+	    in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+			   &xmm_alpha_lo, &xmm_alpha_hi,
+			   &xmm_mask, &xmm_mask,
+			   &xmm_dst_lo, &xmm_dst_hi);
+
+	    save_128_aligned
+		((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	}
+
+	dst += 4;
+	w -= 4;
+    }
+
+    while (w)
+    {
+	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+	if (pix1)
+	{
+		uint32_t d = *dst;
+
+		__m128i ms = unpack_32_1x128 (pix1);
+		__m128i alpha     = expand_alpha_1x128 (ms);
+		__m128i dest      = xmm_mask;
+		__m128i alpha_dst = unpack_32_1x128 (d);
+
+		*dst = pack_1x128_32
+			(in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
+	}
+
+	dst++;
+	w--;
+    }
+}
+
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
+			       scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
+			       uint32_t, uint32_t, uint32_t,
+			       COVER, FLAG_HAVE_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
+			       scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
+			       uint32_t, uint32_t, uint32_t,
+			       PAD, FLAG_HAVE_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
+			       scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
+			       uint32_t, uint32_t, uint32_t,
+			       NONE, FLAG_HAVE_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,
+			       scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
+			       uint32_t, uint32_t, uint32_t,
+			       NORMAL, FLAG_HAVE_SOLID_MASK)
+
 static const pixman_fast_path_t sse2_fast_paths[] =
 {
     /* PIXMAN_OP_OVER */
@@ -5848,6 +6127,14 @@ static const pixman_fast_path_t sse2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888),
     PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8),
     PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8),
+    PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888),
+    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888),
+    PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888),
+    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888),
+    PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888),
 
     /* PIXMAN_OP_SRC */
     PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888),
@@ -5912,6 +6199,11 @@ static const pixman_fast_path_t sse2_fast_paths[] =
     SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
     SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
 
+    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
+    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
+    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
+    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
+
     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888),
     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888),
     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888),
@@ -5930,7 +6222,7 @@ sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
 
     iter->bits += iter->stride;
 
-    while (w && ((unsigned long)dst) & 0x0f)
+    while (w && ((uintptr_t)dst) & 0x0f)
     {
 	*dst++ = (*src++) | 0xff000000;
 	w--;
@@ -5966,11 +6258,11 @@ sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
 
     iter->bits += iter->stride;
 
-    while (w && ((unsigned long)dst) & 0x0f)
+    while (w && ((uintptr_t)dst) & 0x0f)
     {
 	uint16_t s = *src++;
 
-	*dst++ = CONVERT_0565_TO_8888 (s);
+	*dst++ = convert_0565_to_8888 (s);
 	w--;
     }
 
@@ -5995,7 +6287,7 @@ sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
     {
 	uint16_t s = *src++;
 
-	*dst++ = CONVERT_0565_TO_8888 (s);
+	*dst++ = convert_0565_to_8888 (s);
 	w--;
     }
 
@@ -6012,7 +6304,7 @@ sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
 
     iter->bits += iter->stride;
 
-    while (w && (((unsigned long)dst) & 15))
+    while (w && (((uintptr_t)dst) & 15))
     {
         *dst++ = *(src++) << 24;
         w--;
diff --git a/lib/pixman/pixman/pixman-trap.c b/lib/pixman/pixman/pixman-trap.c
index ab5c8c895..91766fdbf 100644
--- a/lib/pixman/pixman/pixman-trap.c
+++ b/lib/pixman/pixman/pixman-trap.c
@@ -491,6 +491,8 @@ pixman_composite_trapezoids (pixman_op_t		op,
 {
     int i;
 
+    return_if_fail (PIXMAN_FORMAT_TYPE (mask_format) == PIXMAN_TYPE_A);
+    
     if (n_traps <= 0)
 	return;
 
@@ -521,8 +523,9 @@ pixman_composite_trapezoids (pixman_op_t		op,
 	if (!get_trap_extents (op, dst, traps, n_traps, &box))
 	    return;
 	
-	tmp = pixman_image_create_bits (
-	    mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1);
+	if (!(tmp = pixman_image_create_bits (
+		  mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1)))
+	    return;
 	
 	for (i = 0; i < n_traps; ++i)
 	{
diff --git a/lib/pixman/pixman/pixman-utils.c b/lib/pixman/pixman/pixman-utils.c
index b1e9fb62d..f31171f6d 100644
--- a/lib/pixman/pixman/pixman-utils.c
+++ b/lib/pixman/pixman/pixman-utils.c
@@ -292,8 +292,6 @@ _pixman_internal_only_get_implementation (void)
     return get_implementation ();
 }
 
-#ifdef DEBUG
-
 void
 _pixman_log_error (const char *function, const char *message)
 {
@@ -310,5 +308,3 @@ _pixman_log_error (const char *function, const char *message)
 	n_messages++;
     }
 }
-
-#endif
diff --git a/lib/pixman/pixman/pixman.c b/lib/pixman/pixman/pixman.c
index e3b6516b5..184f0c4e6 100644
--- a/lib/pixman/pixman/pixman.c
+++ b/lib/pixman/pixman/pixman.c
@@ -455,6 +455,14 @@ analyze_extent (pixman_image_t       *image,
 	    height = params[1];
 	    break;
 
+	case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
+	    params = image->common.filter_params;
+	    x_off = - pixman_fixed_e - ((params[0] - pixman_fixed_1) >> 1);
+	    y_off = - pixman_fixed_e - ((params[1] - pixman_fixed_1) >> 1);
+	    width = params[0];
+	    height = params[1];
+	    break;
+	    
 	case PIXMAN_FILTER_GOOD:
 	case PIXMAN_FILTER_BEST:
 	case PIXMAN_FILTER_BILINEAR:
@@ -573,11 +581,13 @@ pixman_image_composite32 (pixman_op_t      op,
                           int32_t          height)
 {
     pixman_format_code_t src_format, mask_format, dest_format;
-    uint32_t src_flags, mask_flags, dest_flags;
     pixman_region32_t region;
     pixman_box32_t extents;
     pixman_implementation_t *imp;
     pixman_composite_func_t func;
+    pixman_composite_info_t info;
+    const pixman_box32_t *pbox;
+    int n;
 
     _pixman_image_validate (src);
     if (mask)
@@ -585,27 +595,27 @@ pixman_image_composite32 (pixman_op_t      op,
     _pixman_image_validate (dest);
 
     src_format = src->common.extended_format_code;
-    src_flags = src->common.flags;
+    info.src_flags = src->common.flags;
 
-    if (mask)
+    if (mask && !(mask->common.flags & FAST_PATH_IS_OPAQUE))
     {
 	mask_format = mask->common.extended_format_code;
-	mask_flags = mask->common.flags;
+	info.mask_flags = mask->common.flags;
     }
     else
     {
 	mask_format = PIXMAN_null;
-	mask_flags = FAST_PATH_IS_OPAQUE;
+	info.mask_flags = FAST_PATH_IS_OPAQUE;
     }
 
     dest_format = dest->common.extended_format_code;
-    dest_flags = dest->common.flags;
+    info.dest_flags = dest->common.flags;
 
     /* Check for pixbufs */
     if ((mask_format == PIXMAN_a8r8g8b8 || mask_format == PIXMAN_a8b8g8r8) &&
 	(src->type == BITS && src->bits.bits == mask->bits.bits)	   &&
 	(src->common.repeat == mask->common.repeat)			   &&
-	(src_flags & mask_flags & FAST_PATH_ID_TRANSFORM)		   &&
+	(info.src_flags & info.mask_flags & FAST_PATH_ID_TRANSFORM)	   &&
 	(src_x == mask_x && src_y == mask_y))
     {
 	if (src_format == PIXMAN_x8b8g8r8)
@@ -630,7 +640,7 @@ pixman_image_composite32 (pixman_op_t      op,
     extents.x2 -= dest_x - src_x;
     extents.y2 -= dest_y - src_y;
 
-    if (!analyze_extent (src, &extents, &src_flags))
+    if (!analyze_extent (src, &extents, &info.src_flags))
 	goto out;
 
     extents.x1 -= src_x - mask_x;
@@ -638,7 +648,7 @@ pixman_image_composite32 (pixman_op_t      op,
     extents.x2 -= src_x - mask_x;
     extents.y2 -= src_y - mask_y;
 
-    if (!analyze_extent (mask, &extents, &mask_flags))
+    if (!analyze_extent (mask, &extents, &info.mask_flags))
 	goto out;
 
     /* If the clip is within the source samples, and the samples are
@@ -651,16 +661,16 @@ pixman_image_composite32 (pixman_op_t      op,
 			 FAST_PATH_BILINEAR_FILTER |			\
 			 FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR)
 
-    if ((src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
-	(src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
+    if ((info.src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
+	(info.src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
     {
-	src_flags |= FAST_PATH_IS_OPAQUE;
+	info.src_flags |= FAST_PATH_IS_OPAQUE;
     }
 
-    if ((mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
-	(mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
+    if ((info.mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
+	(info.mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
     {
-	mask_flags |= FAST_PATH_IS_OPAQUE;
+	info.mask_flags |= FAST_PATH_IS_OPAQUE;
     }
 
     /*
@@ -668,42 +678,35 @@ pixman_image_composite32 (pixman_op_t      op,
      * if the src or dest are opaque. The output operator should be
      * mathematically equivalent to the source.
      */
-    op = optimize_operator (op, src_flags, mask_flags, dest_flags);
+    info.op = optimize_operator (op, info.src_flags, info.mask_flags, info.dest_flags);
 
-    if (_pixman_implementation_lookup_composite (
-	    get_implementation (), op,
-	    src_format, src_flags, mask_format, mask_flags, dest_format, dest_flags,
-	    &imp, &func))
-    {
-	pixman_composite_info_t info;
-	const pixman_box32_t *pbox;
-	int n;
+    _pixman_implementation_lookup_composite (
+	get_implementation (), info.op,
+	src_format, info.src_flags,
+	mask_format, info.mask_flags,
+	dest_format, info.dest_flags,
+	&imp, &func);
 
-	info.op = op;
-	info.src_image = src;
-	info.mask_image = mask;
-	info.dest_image = dest;
-	info.src_flags = src_flags;
-	info.mask_flags = mask_flags;
-	info.dest_flags = dest_flags;
+    info.src_image = src;
+    info.mask_image = mask;
+    info.dest_image = dest;
 
-	pbox = pixman_region32_rectangles (&region, &n);
+    pbox = pixman_region32_rectangles (&region, &n);
 
-	while (n--)
-	{
-	    info.src_x = pbox->x1 + src_x - dest_x;
-	    info.src_y = pbox->y1 + src_y - dest_y;
-	    info.mask_x = pbox->x1 + mask_x - dest_x;
-	    info.mask_y = pbox->y1 + mask_y - dest_y;
-	    info.dest_x = pbox->x1;
-	    info.dest_y = pbox->y1;
-	    info.width = pbox->x2 - pbox->x1;
-	    info.height = pbox->y2 - pbox->y1;
-
-	    func (imp, &info);
-
-	    pbox++;
-	}
+    while (n--)
+    {
+	info.src_x = pbox->x1 + src_x - dest_x;
+	info.src_y = pbox->y1 + src_y - dest_y;
+	info.mask_x = pbox->x1 + mask_x - dest_x;
+	info.mask_y = pbox->y1 + mask_y - dest_y;
+	info.dest_x = pbox->x1;
+	info.dest_y = pbox->y1;
+	info.width = pbox->x2 - pbox->x1;
+	info.height = pbox->y2 - pbox->y1;
+
+	func (imp, &info);
+
+	pbox++;
     }
 
 out:
@@ -758,10 +761,10 @@ pixman_fill (uint32_t *bits,
              int       y,
              int       width,
              int       height,
-             uint32_t xor)
+             uint32_t  filler)
 {
     return _pixman_implementation_fill (
-	get_implementation(), bits, stride, bpp, x, y, width, height, xor);
+	get_implementation(), bits, stride, bpp, x, y, width, height, filler);
 }
 
 static uint32_t
@@ -820,7 +823,7 @@ color_to_pixel (const pixman_color_t *color,
 	c = c >> 24;
     else if (format == PIXMAN_r5g6b5 ||
              format == PIXMAN_b5g6r5)
-	c = CONVERT_8888_TO_0565 (c);
+	c = convert_8888_to_0565 (c);
 
 #if 0
     printf ("color: %x %x %x %x\n", color->alpha, color->red, color->green, color->blue);
diff --git a/lib/pixman/pixman/pixman.h b/lib/pixman/pixman/pixman.h
index c8723cf41..7ff9fb52a 100644
--- a/lib/pixman/pixman/pixman.h
+++ b/lib/pixman/pixman/pixman.h
@@ -226,6 +226,9 @@ pixman_bool_t pixman_transform_is_inverse       (const struct pixman_transform *
 /*
  * Floating point matrices
  */
+typedef struct pixman_f_transform pixman_f_transform_t;
+typedef struct pixman_f_vector pixman_f_vector_t;
+
 struct pixman_f_vector
 {
     double  v[3];
@@ -289,7 +292,28 @@ typedef enum
     PIXMAN_FILTER_BEST,
     PIXMAN_FILTER_NEAREST,
     PIXMAN_FILTER_BILINEAR,
-    PIXMAN_FILTER_CONVOLUTION
+    PIXMAN_FILTER_CONVOLUTION,
+
+    /* The SEPARABLE_CONVOLUTION filter takes the following parameters:
+     *
+     *         width:           integer given as 16.16 fixpoint number
+     *         height:          integer given as 16.16 fixpoint number
+     *         x_phase_bits:	integer given as 16.16 fixpoint
+     *         y_phase_bits:	integer given as 16.16 fixpoint
+     *         xtables:         (1 << x_phase_bits) tables of size width
+     *         ytables:         (1 << y_phase_bits) tables of size height
+     *
+     * When sampling at (x, y), the location is first rounded to one of
+     * n_x_phases * n_y_phases subpixel positions. These subpixel positions
+     * determine an xtable and a ytable to use.
+     *
+     * Conceptually a width x height matrix is then formed in which each entry
+     * is the product of the corresponding entries in the x and y tables.
+     * This matrix is then aligned with the image pixels such that its center
+     * is as close as possible to the subpixel location chosen earlier. Then
+     * the image is convolved with the matrix and the resulting pixel returned.
+     */
+    PIXMAN_FILTER_SEPARABLE_CONVOLUTION
 } pixman_filter_t;
 
 typedef enum
@@ -807,6 +831,33 @@ int             pixman_image_get_height              (pixman_image_t
 int		pixman_image_get_stride              (pixman_image_t               *image); /* in bytes */
 int		pixman_image_get_depth               (pixman_image_t		   *image);
 pixman_format_code_t pixman_image_get_format	     (pixman_image_t		   *image);
+
+typedef enum
+{
+    PIXMAN_KERNEL_IMPULSE,
+    PIXMAN_KERNEL_BOX,
+    PIXMAN_KERNEL_LINEAR,
+    PIXMAN_KERNEL_CUBIC,
+    PIXMAN_KERNEL_GAUSSIAN,
+    PIXMAN_KERNEL_LANCZOS2,
+    PIXMAN_KERNEL_LANCZOS3,
+    PIXMAN_KERNEL_LANCZOS3_STRETCHED       /* Jim Blinn's 'nice' filter */
+} pixman_kernel_t;
+
+/* Create the parameter list for a SEPARABLE_CONVOLUTION filter
+ * with the given kernels and scale parameters.
+ */
+pixman_fixed_t *
+pixman_filter_create_separable_convolution (int             *n_values,
+					    pixman_fixed_t   scale_x,
+					    pixman_fixed_t   scale_y,
+					    pixman_kernel_t  reconstruct_x,
+					    pixman_kernel_t  reconstruct_y,
+					    pixman_kernel_t  sample_x,
+					    pixman_kernel_t  sample_y,
+					    int              subsample_bits_x,
+					    int              subsample_bits_y);
+
 pixman_bool_t	pixman_image_fill_rectangles	     (pixman_op_t		    op,
 						      pixman_image_t		   *image,
 						      const pixman_color_t	   *color,
diff --git a/lib/pixman/test/Makefile.am b/lib/pixman/test/Makefile.am
index eeb3679f0..5d901d572 100644
--- a/lib/pixman/test/Makefile.am
+++ b/lib/pixman/test/Makefile.am
@@ -3,11 +3,11 @@ include $(top_srcdir)/test/Makefile.sources
 AM_CFLAGS = $(OPENMP_CFLAGS)
 AM_LDFLAGS = $(OPENMP_CFLAGS) $(TESTPROGS_EXTRA_LDFLAGS)
 LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm  $(PNG_LIBS)
-INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(PNG_CFLAGS)
+AM_CPPFLAGS = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(PNG_CFLAGS)
 
 libutils_la_SOURCES = $(libutils_sources) $(libutils_headers)
 
 noinst_LTLIBRARIES = libutils.la
-noinst_PROGRAMS = $(TESTPROGRAMS) $(BENCHMARKS)
+noinst_PROGRAMS = $(TESTPROGRAMS) $(OTHERPROGRAMS)
 
 TESTS = $(TESTPROGRAMS)
diff --git a/lib/pixman/test/Makefile.in b/lib/pixman/test/Makefile.in
index 21477cc68..48974546f 100644
--- a/lib/pixman/test/Makefile.in
+++ b/lib/pixman/test/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.12.3 from Makefile.am.
+# Makefile.in generated by automake 1.12.6 from Makefile.am.
 # @configure_input@
 
 # Copyright (C) 1994-2012 Free Software Foundation, Inc.
@@ -66,7 +66,7 @@ CONFIG_CLEAN_FILES =
 CONFIG_CLEAN_VPATH_FILES =
 LTLIBRARIES = $(noinst_LTLIBRARIES)
 libutils_la_LIBADD =
-am__objects_1 = utils.lo
+am__objects_1 = utils.lo utils-prng.lo
 am__objects_2 =
 am_libutils_la_OBJECTS = $(am__objects_1) $(am__objects_2)
 libutils_la_OBJECTS = $(am_libutils_la_OBJECTS)
@@ -74,18 +74,20 @@ AM_V_lt = $(am__v_lt_@AM_V@)
 am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
 am__v_lt_0 = --silent
 am__v_lt_1 = 
-am__EXEEXT_1 = a1-trap-test$(EXEEXT) pdf-op-test$(EXEEXT) \
-	region-test$(EXEEXT) region-translate-test$(EXEEXT) \
-	combiner-test$(EXEEXT) fetch-test$(EXEEXT) \
-	rotate-test$(EXEEXT) oob-test$(EXEEXT) infinite-loop$(EXEEXT) \
-	trap-crasher$(EXEEXT) alpha-loop$(EXEEXT) \
-	scaling-crash-test$(EXEEXT) scaling-helpers-test$(EXEEXT) \
-	gradient-crash-test$(EXEEXT) region-contains-test$(EXEEXT) \
-	alphamap$(EXEEXT) stress-test$(EXEEXT) \
+am__EXEEXT_1 = prng-test$(EXEEXT) a1-trap-test$(EXEEXT) \
+	pdf-op-test$(EXEEXT) region-test$(EXEEXT) \
+	region-translate-test$(EXEEXT) combiner-test$(EXEEXT) \
+	pixel-test$(EXEEXT) fetch-test$(EXEEXT) rotate-test$(EXEEXT) \
+	oob-test$(EXEEXT) infinite-loop$(EXEEXT) trap-crasher$(EXEEXT) \
+	alpha-loop$(EXEEXT) scaling-crash-test$(EXEEXT) \
+	scaling-helpers-test$(EXEEXT) gradient-crash-test$(EXEEXT) \
+	region-contains-test$(EXEEXT) alphamap$(EXEEXT) \
+	matrix-test$(EXEEXT) stress-test$(EXEEXT) \
 	composite-traps-test$(EXEEXT) blitters-test$(EXEEXT) \
 	glyph-test$(EXEEXT) scaling-test$(EXEEXT) affine-test$(EXEEXT) \
 	composite$(EXEEXT)
-am__EXEEXT_2 = lowlevel-blt-bench$(EXEEXT)
+am__EXEEXT_2 = lowlevel-blt-bench$(EXEEXT) radial-perf-test$(EXEEXT) \
+	check-formats$(EXEEXT)
 PROGRAMS = $(noinst_PROGRAMS)
 a1_trap_test_SOURCES = a1-trap-test.c
 a1_trap_test_OBJECTS = a1-trap-test.$(OBJEXT)
@@ -113,6 +115,11 @@ blitters_test_OBJECTS = blitters-test.$(OBJEXT)
 blitters_test_LDADD = $(LDADD)
 blitters_test_DEPENDENCIES = libutils.la \
 	$(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1)
+check_formats_SOURCES = check-formats.c
+check_formats_OBJECTS = check-formats.$(OBJEXT)
+check_formats_LDADD = $(LDADD)
+check_formats_DEPENDENCIES = libutils.la \
+	$(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1)
 combiner_test_SOURCES = combiner-test.c
 combiner_test_OBJECTS = combiner-test.$(OBJEXT)
 combiner_test_LDADD = $(LDADD)
@@ -153,6 +160,11 @@ lowlevel_blt_bench_OBJECTS = lowlevel-blt-bench.$(OBJEXT)
 lowlevel_blt_bench_LDADD = $(LDADD)
 lowlevel_blt_bench_DEPENDENCIES = libutils.la \
 	$(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1)
+matrix_test_SOURCES = matrix-test.c
+matrix_test_OBJECTS = matrix-test.$(OBJEXT)
+matrix_test_LDADD = $(LDADD)
+matrix_test_DEPENDENCIES = libutils.la \
+	$(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1)
 oob_test_SOURCES = oob-test.c
 oob_test_OBJECTS = oob-test.$(OBJEXT)
 oob_test_LDADD = $(LDADD)
@@ -163,6 +175,21 @@ pdf_op_test_OBJECTS = pdf-op-test.$(OBJEXT)
 pdf_op_test_LDADD = $(LDADD)
 pdf_op_test_DEPENDENCIES = libutils.la \
 	$(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1)
+pixel_test_SOURCES = pixel-test.c
+pixel_test_OBJECTS = pixel-test.$(OBJEXT)
+pixel_test_LDADD = $(LDADD)
+pixel_test_DEPENDENCIES = libutils.la \
+	$(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1)
+prng_test_SOURCES = prng-test.c
+prng_test_OBJECTS = prng-test.$(OBJEXT)
+prng_test_LDADD = $(LDADD)
+prng_test_DEPENDENCIES = libutils.la \
+	$(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1)
+radial_perf_test_SOURCES = radial-perf-test.c
+radial_perf_test_OBJECTS = radial-perf-test.$(OBJEXT)
+radial_perf_test_LDADD = $(LDADD)
+radial_perf_test_DEPENDENCIES = libutils.la \
+	$(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1)
 region_contains_test_SOURCES = region-contains-test.c
 region_contains_test_OBJECTS = region-contains-test.$(OBJEXT)
 region_contains_test_LDADD = $(LDADD)
@@ -243,21 +270,23 @@ am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
 am__v_CCLD_0 = @echo "  CCLD    " $@;
 am__v_CCLD_1 = 
 SOURCES = $(libutils_la_SOURCES) a1-trap-test.c affine-test.c \
-	alpha-loop.c alphamap.c blitters-test.c combiner-test.c \
-	composite.c composite-traps-test.c fetch-test.c glyph-test.c \
-	gradient-crash-test.c infinite-loop.c lowlevel-blt-bench.c \
-	oob-test.c pdf-op-test.c region-contains-test.c region-test.c \
-	region-translate-test.c rotate-test.c scaling-crash-test.c \
-	scaling-helpers-test.c scaling-test.c stress-test.c \
-	trap-crasher.c
+	alpha-loop.c alphamap.c blitters-test.c check-formats.c \
+	combiner-test.c composite.c composite-traps-test.c \
+	fetch-test.c glyph-test.c gradient-crash-test.c \
+	infinite-loop.c lowlevel-blt-bench.c matrix-test.c oob-test.c \
+	pdf-op-test.c pixel-test.c prng-test.c radial-perf-test.c \
+	region-contains-test.c region-test.c region-translate-test.c \
+	rotate-test.c scaling-crash-test.c scaling-helpers-test.c \
+	scaling-test.c stress-test.c trap-crasher.c
 DIST_SOURCES = $(libutils_la_SOURCES) a1-trap-test.c affine-test.c \
-	alpha-loop.c alphamap.c blitters-test.c combiner-test.c \
-	composite.c composite-traps-test.c fetch-test.c glyph-test.c \
-	gradient-crash-test.c infinite-loop.c lowlevel-blt-bench.c \
-	oob-test.c pdf-op-test.c region-contains-test.c region-test.c \
-	region-translate-test.c rotate-test.c scaling-crash-test.c \
-	scaling-helpers-test.c scaling-test.c stress-test.c \
-	trap-crasher.c
+	alpha-loop.c alphamap.c blitters-test.c check-formats.c \
+	combiner-test.c composite.c composite-traps-test.c \
+	fetch-test.c glyph-test.c gradient-crash-test.c \
+	infinite-loop.c lowlevel-blt-bench.c matrix-test.c oob-test.c \
+	pdf-op-test.c pixel-test.c prng-test.c radial-perf-test.c \
+	region-contains-test.c region-test.c region-translate-test.c \
+	rotate-test.c scaling-crash-test.c scaling-helpers-test.c \
+	scaling-test.c stress-test.c trap-crasher.c
 am__can_run_installinfo = \
   case $$AM_UPDATE_INFO_DIR in \
     n|no|NO) false;; \
@@ -344,6 +373,8 @@ PIXMAN_VERSION_MAJOR = @PIXMAN_VERSION_MAJOR@
 PIXMAN_VERSION_MICRO = @PIXMAN_VERSION_MICRO@
 PIXMAN_VERSION_MINOR = @PIXMAN_VERSION_MINOR@
 PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
 PNG_CFLAGS = @PNG_CFLAGS@
 PNG_LIBS = @PNG_LIBS@
 PTHREAD_LDFLAGS = @PTHREAD_LDFLAGS@
@@ -415,11 +446,13 @@ top_srcdir = @top_srcdir@
 
 # Tests (sorted by expected completion time)
 TESTPROGRAMS = \
+	prng-test		\
 	a1-trap-test		\
 	pdf-op-test		\
 	region-test		\
 	region-translate-test	\
 	combiner-test		\
+	pixel-test		\
 	fetch-test		\
 	rotate-test		\
 	oob-test		\
@@ -431,6 +464,7 @@ TESTPROGRAMS = \
 	gradient-crash-test	\
 	region-contains-test	\
 	alphamap		\
+	matrix-test		\
 	stress-test		\
 	composite-traps-test	\
 	blitters-test		\
@@ -441,25 +475,29 @@ TESTPROGRAMS = \
 	$(NULL)
 
 
-# Benchmarks
-BENCHMARKS = \
+# Other programs
+OTHERPROGRAMS = \
 	lowlevel-blt-bench	\
+	radial-perf-test	\
+        check-formats           \
 	$(NULL)
 
 
 # Utility functions
 libutils_sources = \
 	utils.c			\
+	utils-prng.c		\
 	$(NULL)
 
 libutils_headers = \
 	utils.h			\
+	utils-prng.h		\
 	$(NULL)
 
 AM_CFLAGS = $(OPENMP_CFLAGS)
 AM_LDFLAGS = $(OPENMP_CFLAGS) $(TESTPROGS_EXTRA_LDFLAGS)
 LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm  $(PNG_LIBS)
-INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(PNG_CFLAGS)
+AM_CPPFLAGS = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(PNG_CFLAGS)
 libutils_la_SOURCES = $(libutils_sources) $(libutils_headers)
 noinst_LTLIBRARIES = libutils.la
 all: all-am
@@ -534,6 +572,9 @@ alphamap$(EXEEXT): $(alphamap_OBJECTS) $(alphamap_DEPENDENCIES) $(EXTRA_alphamap
 blitters-test$(EXEEXT): $(blitters_test_OBJECTS) $(blitters_test_DEPENDENCIES) $(EXTRA_blitters_test_DEPENDENCIES) 
 	@rm -f blitters-test$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(blitters_test_OBJECTS) $(blitters_test_LDADD) $(LIBS)
+check-formats$(EXEEXT): $(check_formats_OBJECTS) $(check_formats_DEPENDENCIES) $(EXTRA_check_formats_DEPENDENCIES) 
+	@rm -f check-formats$(EXEEXT)
+	$(AM_V_CCLD)$(LINK) $(check_formats_OBJECTS) $(check_formats_LDADD) $(LIBS)
 combiner-test$(EXEEXT): $(combiner_test_OBJECTS) $(combiner_test_DEPENDENCIES) $(EXTRA_combiner_test_DEPENDENCIES) 
 	@rm -f combiner-test$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(combiner_test_OBJECTS) $(combiner_test_LDADD) $(LIBS)
@@ -558,12 +599,24 @@ infinite-loop$(EXEEXT): $(infinite_loop_OBJECTS) $(infinite_loop_DEPENDENCIES) $
 lowlevel-blt-bench$(EXEEXT): $(lowlevel_blt_bench_OBJECTS) $(lowlevel_blt_bench_DEPENDENCIES) $(EXTRA_lowlevel_blt_bench_DEPENDENCIES) 
 	@rm -f lowlevel-blt-bench$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(lowlevel_blt_bench_OBJECTS) $(lowlevel_blt_bench_LDADD) $(LIBS)
+matrix-test$(EXEEXT): $(matrix_test_OBJECTS) $(matrix_test_DEPENDENCIES) $(EXTRA_matrix_test_DEPENDENCIES) 
+	@rm -f matrix-test$(EXEEXT)
+	$(AM_V_CCLD)$(LINK) $(matrix_test_OBJECTS) $(matrix_test_LDADD) $(LIBS)
 oob-test$(EXEEXT): $(oob_test_OBJECTS) $(oob_test_DEPENDENCIES) $(EXTRA_oob_test_DEPENDENCIES) 
 	@rm -f oob-test$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(oob_test_OBJECTS) $(oob_test_LDADD) $(LIBS)
 pdf-op-test$(EXEEXT): $(pdf_op_test_OBJECTS) $(pdf_op_test_DEPENDENCIES) $(EXTRA_pdf_op_test_DEPENDENCIES) 
 	@rm -f pdf-op-test$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(pdf_op_test_OBJECTS) $(pdf_op_test_LDADD) $(LIBS)
+pixel-test$(EXEEXT): $(pixel_test_OBJECTS) $(pixel_test_DEPENDENCIES) $(EXTRA_pixel_test_DEPENDENCIES) 
+	@rm -f pixel-test$(EXEEXT)
+	$(AM_V_CCLD)$(LINK) $(pixel_test_OBJECTS) $(pixel_test_LDADD) $(LIBS)
+prng-test$(EXEEXT): $(prng_test_OBJECTS) $(prng_test_DEPENDENCIES) $(EXTRA_prng_test_DEPENDENCIES) 
+	@rm -f prng-test$(EXEEXT)
+	$(AM_V_CCLD)$(LINK) $(prng_test_OBJECTS) $(prng_test_LDADD) $(LIBS)
+radial-perf-test$(EXEEXT): $(radial_perf_test_OBJECTS) $(radial_perf_test_DEPENDENCIES) $(EXTRA_radial_perf_test_DEPENDENCIES) 
+	@rm -f radial-perf-test$(EXEEXT)
+	$(AM_V_CCLD)$(LINK) $(radial_perf_test_OBJECTS) $(radial_perf_test_LDADD) $(LIBS)
 region-contains-test$(EXEEXT): $(region_contains_test_OBJECTS) $(region_contains_test_DEPENDENCIES) $(EXTRA_region_contains_test_DEPENDENCIES) 
 	@rm -f region-contains-test$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(region_contains_test_OBJECTS) $(region_contains_test_LDADD) $(LIBS)
@@ -603,6 +656,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alpha-loop.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alphamap.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blitters-test.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/check-formats.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/combiner-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/composite-traps-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/composite.Po@am__quote@
@@ -611,8 +665,12 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gradient-crash-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/infinite-loop.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lowlevel-blt-bench.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matrix-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oob-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdf-op-test.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixel-test.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/prng-test.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radial-perf-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/region-contains-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/region-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/region-translate-test.Po@am__quote@
@@ -622,6 +680,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scaling-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stress-test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/trap-crasher.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utils-prng.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utils.Plo@am__quote@
 
 .c.o:
diff --git a/lib/pixman/test/Makefile.sources b/lib/pixman/test/Makefile.sources
index 077897161..b5fc740f3 100644
--- a/lib/pixman/test/Makefile.sources
+++ b/lib/pixman/test/Makefile.sources
@@ -1,10 +1,12 @@
 # Tests (sorted by expected completion time)
 TESTPROGRAMS =			\
+	prng-test		\
 	a1-trap-test		\
 	pdf-op-test		\
 	region-test		\
 	region-translate-test	\
 	combiner-test		\
+	pixel-test		\
 	fetch-test		\
 	rotate-test		\
 	oob-test		\
@@ -16,6 +18,7 @@ TESTPROGRAMS =			\
 	gradient-crash-test	\
 	region-contains-test	\
 	alphamap		\
+	matrix-test		\
 	stress-test		\
 	composite-traps-test	\
 	blitters-test		\
@@ -25,16 +28,20 @@ TESTPROGRAMS =			\
 	composite		\
 	$(NULL)
 
-# Benchmarks
-BENCHMARKS =			\
+# Other programs
+OTHERPROGRAMS =                 \
 	lowlevel-blt-bench	\
+	radial-perf-test	\
+        check-formats           \
 	$(NULL)
 
 # Utility functions
 libutils_sources =		\
 	utils.c			\
+	utils-prng.c		\
 	$(NULL)
 
 libutils_headers =		\
 	utils.h			\
+	utils-prng.h		\
 	$(NULL)
diff --git a/lib/pixman/test/a1-trap-test.c b/lib/pixman/test/a1-trap-test.c
index 93c6caa14..c2b488316 100644
--- a/lib/pixman/test/a1-trap-test.c
+++ b/lib/pixman/test/a1-trap-test.c
@@ -45,6 +45,14 @@ main (int argc, char **argv)
     assert (bits[1] == 0xffffffff);
     assert (bits[1 * WIDTH + 0] == 0xffffffff);
     assert (bits[1 * WIDTH + 1] == 0xffffffff);
+
+    /* The check-formats test depends on operator_name() and format_name() returning
+     * these precise formats, so if those change, check-formats.c must be updated too.
+     */
+    assert (
+        strcmp (operator_name (PIXMAN_OP_DISJOINT_OVER), "PIXMAN_OP_DISJOINT_OVER") == 0);
+    assert (
+        strcmp (format_name (PIXMAN_r5g6b5), "r5g6b5") == 0);
     
     return 0;
 }
diff --git a/lib/pixman/test/affine-test.c b/lib/pixman/test/affine-test.c
index 7bc28b4cd..2506250db 100644
--- a/lib/pixman/test/affine-test.c
+++ b/lib/pixman/test/affine-test.c
@@ -48,18 +48,18 @@ test_composite (int      testnum,
     uint32_t           crc32;
     FLOAT_REGS_CORRUPTION_DETECTOR_START ();
 
-    lcg_srand (testnum);
+    prng_srand (testnum);
 
-    src_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
-    dst_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
-    op = (lcg_rand_n (2) == 0) ? PIXMAN_OP_SRC : PIXMAN_OP_OVER;
+    src_bpp = (prng_rand_n (2) == 0) ? 2 : 4;
+    dst_bpp = (prng_rand_n (2) == 0) ? 2 : 4;
+    op = (prng_rand_n (2) == 0) ? PIXMAN_OP_SRC : PIXMAN_OP_OVER;
 
-    src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
-    src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
-    dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1;
-    dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1;
-    src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp;
-    dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp;
+    src_width = prng_rand_n (MAX_SRC_WIDTH) + 1;
+    src_height = prng_rand_n (MAX_SRC_HEIGHT) + 1;
+    dst_width = prng_rand_n (MAX_DST_WIDTH) + 1;
+    dst_height = prng_rand_n (MAX_DST_HEIGHT) + 1;
+    src_stride = src_width * src_bpp + prng_rand_n (MAX_STRIDE) * src_bpp;
+    dst_stride = dst_width * dst_bpp + prng_rand_n (MAX_STRIDE) * dst_bpp;
 
     if (src_stride & 3)
 	src_stride += 2;
@@ -67,26 +67,23 @@ test_composite (int      testnum,
     if (dst_stride & 3)
 	dst_stride += 2;
 
-    src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2);
-    src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2);
-    dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2);
-    dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2);
-    w = lcg_rand_n (dst_width * 3 / 2 - dst_x);
-    h = lcg_rand_n (dst_height * 3 / 2 - dst_y);
+    src_x = -(src_width / 4) + prng_rand_n (src_width * 3 / 2);
+    src_y = -(src_height / 4) + prng_rand_n (src_height * 3 / 2);
+    dst_x = -(dst_width / 4) + prng_rand_n (dst_width * 3 / 2);
+    dst_y = -(dst_height / 4) + prng_rand_n (dst_height * 3 / 2);
+    w = prng_rand_n (dst_width * 3 / 2 - dst_x);
+    h = prng_rand_n (dst_height * 3 / 2 - dst_y);
 
     srcbuf = (uint32_t *)malloc (src_stride * src_height);
     dstbuf = (uint32_t *)malloc (dst_stride * dst_height);
 
-    for (i = 0; i < src_stride * src_height; i++)
-	*((uint8_t *)srcbuf + i) = lcg_rand_n (256);
+    prng_randmemset (srcbuf, src_stride * src_height, 0);
+    prng_randmemset (dstbuf, dst_stride * dst_height, 0);
 
-    for (i = 0; i < dst_stride * dst_height; i++)
-	*((uint8_t *)dstbuf + i) = lcg_rand_n (256);
-
-    src_fmt = src_bpp == 4 ? (lcg_rand_n (2) == 0 ?
+    src_fmt = src_bpp == 4 ? (prng_rand_n (2) == 0 ?
                               PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5;
 
-    dst_fmt = dst_bpp == 4 ? (lcg_rand_n (2) == 0 ?
+    dst_fmt = dst_bpp == 4 ? (prng_rand_n (2) == 0 ?
                               PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5;
 
     src_img = pixman_image_create_bits (
@@ -100,29 +97,29 @@ test_composite (int      testnum,
 
     pixman_transform_init_identity (&transform);
 
-    if (lcg_rand_n (3) > 0)
+    if (prng_rand_n (3) > 0)
     {
-	scale_x = -65536 * 3 + lcg_rand_N (65536 * 6);
-	if (lcg_rand_n (2))
-	    scale_y = -65536 * 3 + lcg_rand_N (65536 * 6);
+	scale_x = -65536 * 3 + prng_rand_n (65536 * 6);
+	if (prng_rand_n (2))
+	    scale_y = -65536 * 3 + prng_rand_n (65536 * 6);
 	else
 	    scale_y = scale_x;
 	pixman_transform_init_scale (&transform, scale_x, scale_y);
     }
-    if (lcg_rand_n (3) > 0)
+    if (prng_rand_n (3) > 0)
     {
-	translate_x = -65536 * 3 + lcg_rand_N (6 * 65536);
-	if (lcg_rand_n (2))
-	    translate_y = -65536 * 3 + lcg_rand_N (6 * 65536);
+	translate_x = -65536 * 3 + prng_rand_n (6 * 65536);
+	if (prng_rand_n (2))
+	    translate_y = -65536 * 3 + prng_rand_n (6 * 65536);
 	else
 	    translate_y = translate_x;
 	pixman_transform_translate (&transform, NULL, translate_x, translate_y);
     }
 
-    if (lcg_rand_n (4) > 0)
+    if (prng_rand_n (4) > 0)
     {
 	int c, s, tx = 0, ty = 0;
-	switch (lcg_rand_n (4))
+	switch (prng_rand_n (4))
 	{
 	case 0:
 	    /* 90 degrees */
@@ -145,32 +142,32 @@ test_composite (int      testnum,
 	    break;
 	default:
 	    /* arbitrary rotation */
-	    c = lcg_rand_N (2 * 65536) - 65536;
-	    s = lcg_rand_N (2 * 65536) - 65536;
+	    c = prng_rand_n (2 * 65536) - 65536;
+	    s = prng_rand_n (2 * 65536) - 65536;
 	    break;
 	}
 	pixman_transform_rotate (&transform, NULL, c, s);
 	pixman_transform_translate (&transform, NULL, tx, ty);
     }
 
-    if (lcg_rand_n (8) == 0)
+    if (prng_rand_n (8) == 0)
     {
 	/* Flip random bits */
 	int maxflipcount = 8;
 	while (maxflipcount--)
 	{
-	    int i = lcg_rand_n (2);
-	    int j = lcg_rand_n (3);
-	    int bitnum = lcg_rand_n (32);
+	    int i = prng_rand_n (2);
+	    int j = prng_rand_n (3);
+	    int bitnum = prng_rand_n (32);
 	    transform.matrix[i][j] ^= 1 << bitnum;
-	    if (lcg_rand_n (2))
+	    if (prng_rand_n (2))
 		break;
 	}
     }
 
     pixman_image_set_transform (src_img, &transform);
 
-    switch (lcg_rand_n (4))
+    switch (prng_rand_n (4))
     {
     case 0:
 	repeat = PIXMAN_REPEAT_NONE;
@@ -193,7 +190,7 @@ test_composite (int      testnum,
     }
     pixman_image_set_repeat (src_img, repeat);
 
-    if (lcg_rand_n (2))
+    if (prng_rand_n (2))
 	pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0);
     else
 	pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
@@ -203,9 +200,9 @@ test_composite (int      testnum,
 #define M(r,c)								\
 	transform.matrix[r][c]
 
-	printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt);
-	printf ("op=%d, repeat=%d, transform=\n",
-	        op, repeat);
+	printf ("src_fmt=%s, dst_fmt=%s\n", format_name (src_fmt), format_name (dst_fmt));
+	printf ("op=%s, repeat=%d, transform=\n",
+	        operator_name (op), repeat);
 	printf (" { { { 0x%08x, 0x%08x, 0x%08x },\n"
 		"     { 0x%08x, 0x%08x, 0x%08x },\n"
 		"     { 0x%08x, 0x%08x, 0x%08x },\n"
@@ -220,19 +217,19 @@ test_composite (int      testnum,
 	printf ("w=%d, h=%d\n", w, h);
     }
 
-    if (lcg_rand_n (8) == 0)
+    if (prng_rand_n (8) == 0)
     {
 	pixman_box16_t clip_boxes[2];
-	int            n = lcg_rand_n (2) + 1;
+	int            n = prng_rand_n (2) + 1;
 
 	for (i = 0; i < n; i++)
 	{
-	    clip_boxes[i].x1 = lcg_rand_n (src_width);
-	    clip_boxes[i].y1 = lcg_rand_n (src_height);
+	    clip_boxes[i].x1 = prng_rand_n (src_width);
+	    clip_boxes[i].y1 = prng_rand_n (src_height);
 	    clip_boxes[i].x2 =
-		clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1);
+		clip_boxes[i].x1 + prng_rand_n (src_width - clip_boxes[i].x1);
 	    clip_boxes[i].y2 =
-		clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1);
+		clip_boxes[i].y1 + prng_rand_n (src_height - clip_boxes[i].y1);
 
 	    if (verbose)
 	    {
@@ -248,18 +245,18 @@ test_composite (int      testnum,
 	pixman_region_fini (&clip);
     }
 
-    if (lcg_rand_n (8) == 0)
+    if (prng_rand_n (8) == 0)
     {
 	pixman_box16_t clip_boxes[2];
-	int            n = lcg_rand_n (2) + 1;
+	int            n = prng_rand_n (2) + 1;
 	for (i = 0; i < n; i++)
 	{
-	    clip_boxes[i].x1 = lcg_rand_n (dst_width);
-	    clip_boxes[i].y1 = lcg_rand_n (dst_height);
+	    clip_boxes[i].x1 = prng_rand_n (dst_width);
+	    clip_boxes[i].y1 = prng_rand_n (dst_height);
 	    clip_boxes[i].x2 =
-		clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1);
+		clip_boxes[i].x1 + prng_rand_n (dst_width - clip_boxes[i].x1);
 	    clip_boxes[i].y2 =
-		clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1);
+		clip_boxes[i].y1 + prng_rand_n (dst_height - clip_boxes[i].y1);
 
 	    if (verbose)
 	    {
@@ -310,11 +307,11 @@ test_composite (int      testnum,
 }
 
 #if BILINEAR_INTERPOLATION_BITS == 8
-#define CHECKSUM 0x1EF2175A
+#define CHECKSUM 0x2CDF1F07
 #elif BILINEAR_INTERPOLATION_BITS == 7
-#define CHECKSUM 0x74050F50
+#define CHECKSUM 0xBC00B1DF
 #elif BILINEAR_INTERPOLATION_BITS == 4
-#define CHECKSUM 0x4362EAE8
+#define CHECKSUM 0xA227306B
 #else
 #define CHECKSUM 0x00000000
 #endif
diff --git a/lib/pixman/test/alpha-loop.c b/lib/pixman/test/alpha-loop.c
index e4d90a988..eca761537 100644
--- a/lib/pixman/test/alpha-loop.c
+++ b/lib/pixman/test/alpha-loop.c
@@ -8,9 +8,14 @@
 int
 main (int argc, char **argv)
 {
-    uint8_t *alpha = make_random_bytes (WIDTH * HEIGHT);
-    uint32_t *src = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * 4);
-    uint32_t *dest = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * 4);
+    uint8_t *alpha;
+    uint32_t *src, *dest;
+
+    prng_srand (0);
+
+    alpha = make_random_bytes (WIDTH * HEIGHT);
+    src = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * 4);
+    dest = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * 4);
 
     pixman_image_t *a = pixman_image_create_bits (PIXMAN_a8, WIDTH, HEIGHT, (uint32_t *)alpha, WIDTH);
     pixman_image_t *d = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, dest, WIDTH * 4);
diff --git a/lib/pixman/test/alphamap.c b/lib/pixman/test/alphamap.c
index 0c5757ea3..4d09076fb 100644
--- a/lib/pixman/test/alphamap.c
+++ b/lib/pixman/test/alphamap.c
@@ -26,25 +26,6 @@ static const int origins[] =
     0, 10, -100
 };
 
-static const char *
-format_name (pixman_format_code_t format)
-{
-    if (format == PIXMAN_a8)
-	return "a8";
-    else if (format == PIXMAN_a2r10g10b10)
-	return "a2r10g10b10";
-    else if (format == PIXMAN_a8r8g8b8)
-	return "a8r8g8b8";
-    else if (format == PIXMAN_a4r4g4b4)
-	return "a4r4g4b4";
-    else if (format == PIXMAN_null)
-	return "none";
-    else
-	assert (0);
-
-    return "<unknown - bug in alphamap.c>";
-}
-
 static void
 on_destroy (pixman_image_t *image, void *data)
 {
@@ -307,6 +288,8 @@ main (int argc, char **argv)
 {
     int i, j, a, b, x, y;
 
+    prng_srand (0);
+
     for (i = 0; i < ARRAY_LENGTH (formats); ++i)
     {
 	for (j = 0; j < ARRAY_LENGTH (formats); ++j)
diff --git a/lib/pixman/test/blitters-test.c b/lib/pixman/test/blitters-test.c
index 30d69124c..a2c6ff4d8 100644
--- a/lib/pixman/test/blitters-test.c
+++ b/lib/pixman/test/blitters-test.c
@@ -25,7 +25,7 @@ create_random_image (pixman_format_code_t *allowed_formats,
 		     int                   max_extra_stride,
 		     pixman_format_code_t *used_fmt)
 {
-    int n = 0, i, width, height, stride;
+    int n = 0, width, height, stride;
     pixman_format_code_t fmt;
     uint32_t *buf;
     pixman_image_t *img;
@@ -33,27 +33,28 @@ create_random_image (pixman_format_code_t *allowed_formats,
     while (allowed_formats[n] != PIXMAN_null)
 	n++;
 
-    if (n > N_MOST_LIKELY_FORMATS && lcg_rand_n (4) != 0)
+    if (n > N_MOST_LIKELY_FORMATS && prng_rand_n (4) != 0)
 	n = N_MOST_LIKELY_FORMATS;
-    fmt = allowed_formats[lcg_rand_n (n)];
+    fmt = allowed_formats[prng_rand_n (n)];
 
-    width = lcg_rand_n (max_width) + 1;
-    height = lcg_rand_n (max_height) + 1;
+    width = prng_rand_n (max_width) + 1;
+    height = prng_rand_n (max_height) + 1;
     stride = (width * PIXMAN_FORMAT_BPP (fmt) + 7) / 8 +
-	lcg_rand_n (max_extra_stride + 1);
+	prng_rand_n (max_extra_stride + 1);
     stride = (stride + 3) & ~3;
 
     /* do the allocation */
     buf = aligned_malloc (64, stride * height);
 
-    /* initialize image with random data */
-    for (i = 0; i < stride * height; i++)
+    if (prng_rand_n (4) == 0)
     {
-	/* generation is biased to having more 0 or 255 bytes as
-	 * they are more likely to be special-cased in code
-	 */
-	*((uint8_t *)buf + i) = lcg_rand_n (4) ? lcg_rand_n (256) :
-	    (lcg_rand_n (2) ? 0 : 255);
+	/* uniform distribution */
+	prng_randmemset (buf, stride * height, 0);
+    }
+    else
+    {
+	/* significantly increased probability for 0x00 and 0xFF */
+	prng_randmemset (buf, stride * height, RANDMEMSET_MORE_00_AND_FF);
     }
 
     img = pixman_image_create_bits (fmt, width, height, buf, stride);
@@ -67,7 +68,7 @@ create_random_image (pixman_format_code_t *allowed_formats,
 	pixman_image_set_indexed (img, &(y_palette[PIXMAN_FORMAT_BPP (fmt)]));
     }
 
-    if (lcg_rand_n (16) == 0)
+    if (prng_rand_n (16) == 0)
 	pixman_image_set_filter (img, PIXMAN_FILTER_BILINEAR, NULL, 0);
 
     image_endian_swap (img);
@@ -251,11 +252,11 @@ test_composite (int testnum, int verbose)
     if (max_extra_stride > 8)
 	max_extra_stride = 8;
 
-    lcg_srand (testnum);
+    prng_srand (testnum);
 
-    op = op_list[lcg_rand_n (ARRAY_LENGTH (op_list))];
+    op = op_list[prng_rand_n (ARRAY_LENGTH (op_list))];
 
-    if (lcg_rand_n (8))
+    if (prng_rand_n (8))
     {
 	/* normal image */
 	src_img = create_random_image (img_fmt_list, max_width, max_height,
@@ -284,10 +285,10 @@ test_composite (int testnum, int verbose)
     dstbuf = pixman_image_get_data (dst_img);
     srcbuf = pixman_image_get_data (src_img);
 
-    src_x = lcg_rand_n (src_width);
-    src_y = lcg_rand_n (src_height);
-    dst_x = lcg_rand_n (dst_width);
-    dst_y = lcg_rand_n (dst_height);
+    src_x = prng_rand_n (src_width);
+    src_y = prng_rand_n (src_height);
+    dst_x = prng_rand_n (dst_width);
+    dst_y = prng_rand_n (dst_height);
 
     mask_img = NULL;
     mask_fmt = PIXMAN_null;
@@ -296,10 +297,10 @@ test_composite (int testnum, int verbose)
     maskbuf = NULL;
 
     if ((src_fmt == PIXMAN_x8r8g8b8 || src_fmt == PIXMAN_x8b8g8r8) &&
-	(lcg_rand_n (4) == 0))
+	(prng_rand_n (4) == 0))
     {
 	/* PIXBUF */
-	mask_fmt = lcg_rand_n (2) ? PIXMAN_a8r8g8b8 : PIXMAN_a8b8g8r8;
+	mask_fmt = prng_rand_n (2) ? PIXMAN_a8r8g8b8 : PIXMAN_a8b8g8r8;
 	mask_img = pixman_image_create_bits (mask_fmt,
 	                                     src_width,
 	                                     src_height,
@@ -309,9 +310,9 @@ test_composite (int testnum, int verbose)
 	mask_y = src_y;
 	maskbuf = srcbuf;
     }
-    else if (lcg_rand_n (2))
+    else if (prng_rand_n (2))
     {
-	if (lcg_rand_n (2))
+	if (prng_rand_n (2))
 	{
 	    mask_img = create_random_image (mask_fmt_list, max_width, max_height,
 					   max_extra_stride, &mask_fmt);
@@ -324,21 +325,23 @@ test_composite (int testnum, int verbose)
 	    pixman_image_set_repeat (mask_img, PIXMAN_REPEAT_NORMAL);
 	}
 
-	if (lcg_rand_n (2))
+	if (prng_rand_n (2))
 	    pixman_image_set_component_alpha (mask_img, 1);
 
-	mask_x = lcg_rand_n (pixman_image_get_width (mask_img));
-	mask_y = lcg_rand_n (pixman_image_get_height (mask_img));
+	mask_x = prng_rand_n (pixman_image_get_width (mask_img));
+	mask_y = prng_rand_n (pixman_image_get_height (mask_img));
     }
 
 
-    w = lcg_rand_n (dst_width - dst_x + 1);
-    h = lcg_rand_n (dst_height - dst_y + 1);
+    w = prng_rand_n (dst_width - dst_x + 1);
+    h = prng_rand_n (dst_height - dst_y + 1);
 
     if (verbose)
     {
-	printf ("op=%d, src_fmt=%08X, dst_fmt=%08X, mask_fmt=%08X\n",
-	    op, src_fmt, dst_fmt, mask_fmt);
+        printf ("op=%s\n", operator_name (op));
+	printf ("src_fmt=%s, dst_fmt=%s, mask_fmt=%s\n",
+	    format_name (src_fmt), format_name (dst_fmt),
+	    format_name (mask_fmt));
 	printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n",
 	    src_width, src_height, dst_width, dst_height);
 	printf ("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n",
@@ -390,6 +393,8 @@ main (int argc, const char *argv[])
 {
     int i;
 
+    prng_srand (0);
+
     for (i = 1; i <= 8; i++)
     {
 	initialize_palette (&(rgb_palette[i]), i, TRUE);
@@ -397,6 +402,6 @@ main (int argc, const char *argv[])
     }
 
     return fuzzer_test_main("blitters", 2000000,
-			    0x46136E0A,
+			    0x0CF3283B,
 			    test_composite, argc, argv);
 }
diff --git a/lib/pixman/test/check-formats.c b/lib/pixman/test/check-formats.c
new file mode 100644
index 000000000..7edc198c1
--- /dev/null
+++ b/lib/pixman/test/check-formats.c
@@ -0,0 +1,352 @@
+#include <ctype.h>
+#include "utils.h"
+
+static int
+check_op (pixman_op_t          op,
+          pixman_format_code_t src_format,
+          pixman_format_code_t dest_format)
+{
+    uint32_t src_alpha_mask, src_green_mask;
+    uint32_t dest_alpha_mask, dest_green_mask;
+    pixel_checker_t src_checker, dest_checker;
+    pixman_image_t *si, *di;
+    uint32_t sa, sg, da, dg;
+    uint32_t s, d;
+    int retval = 0;
+
+    pixel_checker_init (&src_checker, src_format);
+    pixel_checker_init (&dest_checker, dest_format);
+
+    pixel_checker_get_masks (
+        &src_checker, &src_alpha_mask, NULL, &src_green_mask, NULL);
+    pixel_checker_get_masks (
+        &dest_checker, &dest_alpha_mask, NULL, &dest_green_mask, NULL);
+
+    /* printf ("masks: %x %x %x %x\n", */
+    /* 	    src_alpha_mask, src_green_mask, */
+    /* 	    dest_alpha_mask, dest_green_mask); */
+
+    si = pixman_image_create_bits (src_format, 1, 1, &s, 4);
+    di = pixman_image_create_bits (dest_format, 1, 1, &d, 4);
+
+    sa = 0;
+    do
+    {
+        sg = 0;
+        do
+        {
+            da = 0;
+            do
+            {
+                dg = 0;
+                do
+                {
+                    color_t src_color, dest_color, result_color;
+                    uint32_t orig_d;
+
+                    s = sa | sg;
+                    d = da | dg;
+
+                    orig_d = d;
+
+		    pixel_checker_convert_pixel_to_color (&src_checker, s, &src_color);
+		    pixel_checker_convert_pixel_to_color (&dest_checker, d, &dest_color);
+
+		    do_composite (op, &src_color, NULL, &dest_color, &result_color, FALSE);
+
+
+		    if (!is_little_endian())
+                    {
+			s <<= 32 - PIXMAN_FORMAT_BPP (src_format);
+			d <<= 32 - PIXMAN_FORMAT_BPP (dest_format);
+                    }
+
+		    pixman_image_composite32 (op, si, NULL, di,
+					      0, 0, 0, 0, 0, 0, 1, 1);
+
+		    if (!is_little_endian())
+                        d >>= (32 - PIXMAN_FORMAT_BPP (dest_format));
+
+                    if (!pixel_checker_check (&dest_checker, d, &result_color))
+                    {
+                        printf ("---- test failed ----\n");
+                        printf ("operator: %-32s\n", operator_name (op));
+                        printf ("source:   %-12s pixel: %08x\n", format_name (src_format), s);
+                        printf ("dest:     %-12s pixel: %08x\n", format_name (dest_format), orig_d);
+                        printf ("got:      %-12s pixel: %08x\n", format_name (dest_format), d);
+
+                        retval = 1;
+                    }
+
+                    dg -= dest_green_mask;
+                    dg &= dest_green_mask;
+                }
+                while (dg != 0);
+
+                da -= dest_alpha_mask;
+                da &= dest_alpha_mask;
+            }
+            while (da != 0);
+
+            sg -= src_green_mask;
+            sg &= src_green_mask;
+        }
+        while (sg != 0);
+
+        sa -= src_alpha_mask;
+        sa &= src_alpha_mask;
+    }
+    while (sa != 0);
+
+    pixman_image_unref (si);
+    pixman_image_unref (di);
+
+    return retval;
+}
+
+static const pixman_op_t op_list[] =
+{
+    PIXMAN_OP_CLEAR,
+    PIXMAN_OP_SRC,
+    PIXMAN_OP_DST,
+    PIXMAN_OP_OVER,
+    PIXMAN_OP_OVER_REVERSE,
+    PIXMAN_OP_IN,
+    PIXMAN_OP_IN_REVERSE,
+    PIXMAN_OP_OUT,
+    PIXMAN_OP_OUT_REVERSE,
+    PIXMAN_OP_ATOP,
+    PIXMAN_OP_ATOP_REVERSE,
+    PIXMAN_OP_XOR,
+    PIXMAN_OP_ADD,
+    PIXMAN_OP_SATURATE,
+
+    PIXMAN_OP_DISJOINT_CLEAR,
+    PIXMAN_OP_DISJOINT_SRC,
+    PIXMAN_OP_DISJOINT_DST,
+    PIXMAN_OP_DISJOINT_OVER,
+    PIXMAN_OP_DISJOINT_OVER_REVERSE,
+    PIXMAN_OP_DISJOINT_IN,
+    PIXMAN_OP_DISJOINT_IN_REVERSE,
+    PIXMAN_OP_DISJOINT_OUT,
+    PIXMAN_OP_DISJOINT_OUT_REVERSE,
+    PIXMAN_OP_DISJOINT_ATOP,
+    PIXMAN_OP_DISJOINT_ATOP_REVERSE,
+    PIXMAN_OP_DISJOINT_XOR,
+
+    PIXMAN_OP_CONJOINT_CLEAR,
+    PIXMAN_OP_CONJOINT_SRC,
+    PIXMAN_OP_CONJOINT_DST,
+    PIXMAN_OP_CONJOINT_OVER,
+    PIXMAN_OP_CONJOINT_OVER_REVERSE,
+    PIXMAN_OP_CONJOINT_IN,
+    PIXMAN_OP_CONJOINT_IN_REVERSE,
+    PIXMAN_OP_CONJOINT_OUT,
+    PIXMAN_OP_CONJOINT_OUT_REVERSE,
+    PIXMAN_OP_CONJOINT_ATOP,
+    PIXMAN_OP_CONJOINT_ATOP_REVERSE,
+    PIXMAN_OP_CONJOINT_XOR,
+};
+
+static const pixman_format_code_t format_list[] =
+{
+    PIXMAN_a8r8g8b8,
+    PIXMAN_x8r8g8b8,
+    PIXMAN_a8b8g8r8,
+    PIXMAN_x8b8g8r8,
+    PIXMAN_b8g8r8a8,
+    PIXMAN_b8g8r8x8,
+    PIXMAN_r8g8b8a8,
+    PIXMAN_r8g8b8x8,
+    PIXMAN_x14r6g6b6,
+    PIXMAN_x2r10g10b10,
+    PIXMAN_a2r10g10b10,
+    PIXMAN_x2b10g10r10,
+    PIXMAN_a2b10g10r10,
+    PIXMAN_a8r8g8b8_sRGB,
+    PIXMAN_r8g8b8,
+    PIXMAN_b8g8r8,
+    PIXMAN_r5g6b5,
+    PIXMAN_b5g6r5,
+    PIXMAN_a1r5g5b5,
+    PIXMAN_x1r5g5b5,
+    PIXMAN_a1b5g5r5,
+    PIXMAN_x1b5g5r5,
+    PIXMAN_a4r4g4b4,
+    PIXMAN_x4r4g4b4,
+    PIXMAN_a4b4g4r4,
+    PIXMAN_x4b4g4r4,
+    PIXMAN_a8,
+    PIXMAN_r3g3b2,
+    PIXMAN_b2g3r3,
+    PIXMAN_a2r2g2b2,
+    PIXMAN_a2b2g2r2,
+    PIXMAN_x4a4,
+    PIXMAN_a4,
+    PIXMAN_r1g2b1,
+    PIXMAN_b1g2r1,
+    PIXMAN_a1r1g1b1,
+    PIXMAN_a1b1g1r1,
+    PIXMAN_a1,
+};
+
+static pixman_format_code_t
+format_from_string (const char *s)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_LENGTH (format_list); ++i)
+    {
+        if (strcasecmp (format_name (format_list[i]), s) == 0)
+            return format_list[i];
+    }
+
+    return PIXMAN_null;
+}
+
+static void
+emit (const char *s, int *n_chars)
+{
+    *n_chars += printf ("%s,", s);
+    if (*n_chars > 60)
+    {
+        printf ("\n    ");
+        *n_chars = 0;
+    }
+    else
+    {
+        printf (" ");
+        (*n_chars)++;
+    }
+}
+
+static void
+list_formats (void)
+{
+    int n_chars;
+    int i;
+
+    printf ("Formats:\n    ");
+
+    n_chars = 0;
+    for (i = 0; i < ARRAY_LENGTH (format_list); ++i)
+        emit (format_name (format_list[i]), &n_chars);
+
+    printf ("\n\n");
+}
+
+static void
+list_operators (void)
+{
+    char short_name [128] = { 0 };
+    int i, n_chars;
+
+    printf ("Operators:\n    ");
+
+    n_chars = 0;
+    for (i = 0; i < ARRAY_LENGTH (op_list); ++i)
+    {
+        pixman_op_t op = op_list[i];
+        int j;
+
+        snprintf (short_name, sizeof (short_name) - 1, "%s",
+                  operator_name (op) + strlen ("PIXMAN_OP_"));
+
+        for (j = 0; short_name[j] != '\0'; ++j)
+            short_name[j] = tolower (short_name[j]);
+
+        emit (short_name, &n_chars);
+    }
+
+    printf ("\n\n");
+}
+
+static pixman_op_t
+operator_from_string (const char *s)
+{
+    char full_name[128] = { 0 };
+    int i;
+
+    snprintf (full_name, (sizeof full_name) - 1, "PIXMAN_OP_%s", s);
+
+    for (i = 0; i < ARRAY_LENGTH (op_list); ++i)
+    {
+        pixman_op_t op = op_list[i];
+
+        if (strcasecmp (operator_name (op), full_name) == 0)
+            return op;
+    }
+
+    return PIXMAN_OP_NONE;
+}
+
+int
+main (int argc, char **argv)
+{
+    enum { OPTION_OP, OPTION_SRC, OPTION_DEST, LAST_OPTION } option;
+    pixman_format_code_t src_fmt, dest_fmt;
+    pixman_op_t op;
+
+    op = PIXMAN_OP_NONE;
+    src_fmt = PIXMAN_null;
+    dest_fmt = PIXMAN_null;
+
+    argc--;
+    argv++;
+
+    for (option = OPTION_OP; option < LAST_OPTION; ++option)
+    {
+        char *arg = NULL;
+
+        if (argc)
+        {
+            argc--;
+            arg = *argv++;
+        }
+
+        switch (option)
+        {
+        case OPTION_OP:
+            if (!arg)
+                printf ("  - missing operator\n");
+            else if ((op = operator_from_string (arg)) == PIXMAN_OP_NONE)
+                printf ("  - unknown operator %s\n", arg);
+            break;
+
+        case OPTION_SRC:
+            if (!arg)
+                printf ("  - missing source format\n");
+            else if ((src_fmt = format_from_string (arg)) == PIXMAN_null)
+                printf ("  - unknown source format %s\n", arg);
+            break;
+
+        case OPTION_DEST:
+            if (!arg)
+                printf ("  - missing destination format\n");
+            else if ((dest_fmt = format_from_string (arg)) == PIXMAN_null)
+                printf ("  - unknown destination format %s\n", arg);
+            break;
+
+        default:
+            assert (0);
+            break;
+        }
+    }
+
+    while (argc--)
+    {
+        op = PIXMAN_OP_NONE;
+        printf ("  - unexpected argument: %s\n", *argv++);
+    }
+
+    if (op == PIXMAN_OP_NONE || src_fmt == PIXMAN_null || dest_fmt == PIXMAN_null)
+    {
+        printf ("\nUsage:\n    check-formats <operator> <src-format> <dest-format>\n\n");
+        list_operators();
+        list_formats();
+
+        return -1;
+    }
+
+    return check_op (op, src_fmt, dest_fmt);
+}
diff --git a/lib/pixman/test/combiner-test.c b/lib/pixman/test/combiner-test.c
index c438ae62e..01f63a56e 100644
--- a/lib/pixman/test/combiner-test.c
+++ b/lib/pixman/test/combiner-test.c
@@ -67,7 +67,7 @@ static const pixman_op_t op_list[] =
 static float
 rand_float (void)
 {
-    uint32_t u = lcg_rand_u32();
+    uint32_t u = prng_rand();
 
     return *(float *)&u;
 }
@@ -123,7 +123,7 @@ main ()
     
     impl = _pixman_internal_only_get_implementation();
     
-    lcg_srand (0);
+    prng_srand (0);
 
     for (i = 0; i < ARRAY_LENGTH (op_list); ++i)
     {
diff --git a/lib/pixman/test/composite-traps-test.c b/lib/pixman/test/composite-traps-test.c
index 9fc94a4d6..2983eae83 100644
--- a/lib/pixman/test/composite-traps-test.c
+++ b/lib/pixman/test/composite-traps-test.c
@@ -26,7 +26,7 @@ static pixman_op_t operators[] =
 };
 
 #define RANDOM_ELT(array)						\
-    ((array)[lcg_rand_n(ARRAY_LENGTH((array)))])
+    ((array)[prng_rand_n(ARRAY_LENGTH((array)))])
 
 static void
 destroy_bits (pixman_image_t *image, void *data)
@@ -37,7 +37,7 @@ destroy_bits (pixman_image_t *image, void *data)
 static pixman_fixed_t
 random_fixed (int n)
 {
-    return lcg_rand_N (n << 16);
+    return prng_rand_n (n << 16);
 }
 
 /*
@@ -75,17 +75,17 @@ test_composite (int      testnum,
     
     FLOAT_REGS_CORRUPTION_DETECTOR_START ();
 
-    lcg_srand (testnum);
+    prng_srand (testnum);
 
     op = RANDOM_ELT (operators);
     mask_format = RANDOM_ELT (mask_formats);
 
     /* Create source image */
     
-    if (lcg_rand_n (4) == 0)
+    if (prng_rand_n (4) == 0)
     {
 	src_img = pixman_image_create_solid_fill (
-	    &(colors[lcg_rand_n (ARRAY_LENGTH (colors))]));
+	    &(colors[prng_rand_n (ARRAY_LENGTH (colors))]));
 
 	src_x = 10;
 	src_y = 234;
@@ -94,13 +94,13 @@ test_composite (int      testnum,
     {
 	pixman_format_code_t src_format = RANDOM_ELT(formats);
 	int src_bpp = (PIXMAN_FORMAT_BPP (src_format) + 7) / 8;
-	int src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
-	int src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
-	int src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp;
+	int src_width = prng_rand_n (MAX_SRC_WIDTH) + 1;
+	int src_height = prng_rand_n (MAX_SRC_HEIGHT) + 1;
+	int src_stride = src_width * src_bpp + prng_rand_n (MAX_STRIDE) * src_bpp;
 	uint32_t *bits;
 
-	src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2);
-	src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2);
+	src_x = -(src_width / 4) + prng_rand_n (src_width * 3 / 2);
+	src_y = -(src_height / 4) + prng_rand_n (src_height * 3 / 2);
 
 	src_stride = (src_stride + 3) & ~3;
 	
@@ -111,19 +111,19 @@ test_composite (int      testnum,
 
 	pixman_image_set_destroy_function (src_img, destroy_bits, bits);
 
-	if (lcg_rand_n (8) == 0)
+	if (prng_rand_n (8) == 0)
 	{
 	    pixman_box16_t clip_boxes[2];
-	    int            n = lcg_rand_n (2) + 1;
+	    int            n = prng_rand_n (2) + 1;
 	    
 	    for (i = 0; i < n; i++)
 	    {
-		clip_boxes[i].x1 = lcg_rand_n (src_width);
-		clip_boxes[i].y1 = lcg_rand_n (src_height);
+		clip_boxes[i].x1 = prng_rand_n (src_width);
+		clip_boxes[i].y1 = prng_rand_n (src_height);
 		clip_boxes[i].x2 =
-		    clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1);
+		    clip_boxes[i].x1 + prng_rand_n (src_width - clip_boxes[i].x1);
 		clip_boxes[i].y2 =
-		    clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1);
+		    clip_boxes[i].y1 + prng_rand_n (src_height - clip_boxes[i].y1);
 		
 		if (verbose)
 		{
@@ -146,15 +146,15 @@ test_composite (int      testnum,
     {
 	dst_format = RANDOM_ELT(formats);
 	dst_bpp = (PIXMAN_FORMAT_BPP (dst_format) + 7) / 8;
-	dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1;
-	dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1;
-	dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp;
+	dst_width = prng_rand_n (MAX_DST_WIDTH) + 1;
+	dst_height = prng_rand_n (MAX_DST_HEIGHT) + 1;
+	dst_stride = dst_width * dst_bpp + prng_rand_n (MAX_STRIDE) * dst_bpp;
 	dst_stride = (dst_stride + 3) & ~3;
 	
 	dst_bits = (uint32_t *)make_random_bytes (dst_stride * dst_height);
 
-	dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2);
-	dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2);
+	dst_x = -(dst_width / 4) + prng_rand_n (dst_width * 3 / 2);
+	dst_y = -(dst_height / 4) + prng_rand_n (dst_height * 3 / 2);
 	
 	dst_img = pixman_image_create_bits (
 	    dst_format, dst_width, dst_height, dst_bits, dst_stride);
@@ -166,7 +166,7 @@ test_composite (int      testnum,
     {
 	int i;
 
-	n_traps = lcg_rand_n (25);
+	n_traps = prng_rand_n (25);
 	traps = fence_malloc (n_traps * sizeof (pixman_trapezoid_t));
 
 	for (i = 0; i < n_traps; ++i)
@@ -186,18 +186,18 @@ test_composite (int      testnum,
 	}
     }
     
-    if (lcg_rand_n (8) == 0)
+    if (prng_rand_n (8) == 0)
     {
 	pixman_box16_t clip_boxes[2];
-	int            n = lcg_rand_n (2) + 1;
+	int            n = prng_rand_n (2) + 1;
 	for (i = 0; i < n; i++)
 	{
-	    clip_boxes[i].x1 = lcg_rand_n (dst_width);
-	    clip_boxes[i].y1 = lcg_rand_n (dst_height);
+	    clip_boxes[i].x1 = prng_rand_n (dst_width);
+	    clip_boxes[i].y1 = prng_rand_n (dst_height);
 	    clip_boxes[i].x2 =
-		clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1);
+		clip_boxes[i].x1 + prng_rand_n (dst_width - clip_boxes[i].x1);
 	    clip_boxes[i].y2 =
-		clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1);
+		clip_boxes[i].y1 + prng_rand_n (dst_height - clip_boxes[i].y1);
 
 	    if (verbose)
 	    {
@@ -251,6 +251,6 @@ test_composite (int      testnum,
 int
 main (int argc, const char *argv[])
 {
-    return fuzzer_test_main("composite traps", 40000, 0x33BFAA55,
+    return fuzzer_test_main("composite traps", 40000, 0x749BCC57,
 			    test_composite, argc, argv);
 }
diff --git a/lib/pixman/test/composite.c b/lib/pixman/test/composite.c
index 2930fb75b..9e51a8f65 100644
--- a/lib/pixman/test/composite.c
+++ b/lib/pixman/test/composite.c
@@ -28,15 +28,7 @@
 #include <time.h>
 #include "utils.h"
 
-typedef struct format_t format_t;
 typedef struct image_t image_t;
-typedef struct operator_t operator_t;
-
-struct format_t
-{
-    pixman_format_code_t format;
-    const char *name;
-};
 
 static const color_t colors[] =
 {
@@ -82,401 +74,113 @@ static const int sizes[] =
     10
 };
 
-static const format_t formats[] =
+static const pixman_format_code_t formats[] =
 {
-#define P(x) { PIXMAN_##x, #x }
-
     /* 32 bpp formats */
-    P(a8r8g8b8),
-    P(x8r8g8b8),
-    P(a8b8g8r8),
-    P(x8b8g8r8),
-    P(b8g8r8a8),
-    P(b8g8r8x8),
-    P(r8g8b8a8),
-    P(r8g8b8x8),
-    P(x2r10g10b10),
-    P(x2b10g10r10),
-    P(a2r10g10b10),
-    P(a2b10g10r10),
+    PIXMAN_a8r8g8b8,
+    PIXMAN_x8r8g8b8,
+    PIXMAN_a8b8g8r8,
+    PIXMAN_x8b8g8r8,
+    PIXMAN_b8g8r8a8,
+    PIXMAN_b8g8r8x8,
+    PIXMAN_r8g8b8a8,
+    PIXMAN_r8g8b8x8,
+    PIXMAN_x2r10g10b10,
+    PIXMAN_x2b10g10r10,
+    PIXMAN_a2r10g10b10,
+    PIXMAN_a2b10g10r10,
     
     /* sRGB formats */
-    P(a8r8g8b8_sRGB),
+    PIXMAN_a8r8g8b8_sRGB,
 
     /* 24 bpp formats */
-    P(r8g8b8),
-    P(b8g8r8),
-    P(r5g6b5),
-    P(b5g6r5),
+    PIXMAN_r8g8b8,
+    PIXMAN_b8g8r8,
+    PIXMAN_r5g6b5,
+    PIXMAN_b5g6r5,
 
     /* 16 bpp formats */
-    P(x1r5g5b5),
-    P(x1b5g5r5),
-    P(a1r5g5b5),
-    P(a1b5g5r5),
-    P(a4b4g4r4),
-    P(x4b4g4r4),
-    P(a4r4g4b4),
-    P(x4r4g4b4),
+    PIXMAN_x1r5g5b5,
+    PIXMAN_x1b5g5r5,
+    PIXMAN_a1r5g5b5,
+    PIXMAN_a1b5g5r5,
+    PIXMAN_a4b4g4r4,
+    PIXMAN_x4b4g4r4,
+    PIXMAN_a4r4g4b4,
+    PIXMAN_x4r4g4b4,
 
     /* 8 bpp formats */
-    P(a8),
-    P(r3g3b2),
-    P(b2g3r3),
-    P(a2r2g2b2),
-    P(a2b2g2r2),
-    P(x4a4),
+    PIXMAN_a8,
+    PIXMAN_r3g3b2,
+    PIXMAN_b2g3r3,
+    PIXMAN_a2r2g2b2,
+    PIXMAN_a2b2g2r2,
+    PIXMAN_x4a4,
 
     /* 4 bpp formats */
-    P(a4),
-    P(r1g2b1),
-    P(b1g2r1),
-    P(a1r1g1b1),
-    P(a1b1g1r1),
+    PIXMAN_a4,
+    PIXMAN_r1g2b1,
+    PIXMAN_b1g2r1,
+    PIXMAN_a1r1g1b1,
+    PIXMAN_a1b1g1r1,
 
     /* 1 bpp formats */
-    P(a1)
-#undef P
+    PIXMAN_a1,
 };
 
 struct image_t
 {
     pixman_image_t *image;
-    const format_t *format;
+    pixman_format_code_t format;
     const color_t *color;
     pixman_repeat_t repeat;
     int size;
 };
 
-struct operator_t
+static const pixman_op_t operators[] =
 {
-    pixman_op_t op;
-    const char *name;
+    PIXMAN_OP_CLEAR,
+    PIXMAN_OP_SRC,
+    PIXMAN_OP_DST,
+    PIXMAN_OP_OVER,
+    PIXMAN_OP_OVER_REVERSE,
+    PIXMAN_OP_IN,
+    PIXMAN_OP_IN_REVERSE,
+    PIXMAN_OP_OUT,
+    PIXMAN_OP_OUT_REVERSE,
+    PIXMAN_OP_ATOP,
+    PIXMAN_OP_ATOP_REVERSE,
+    PIXMAN_OP_XOR,
+    PIXMAN_OP_ADD,
+    PIXMAN_OP_SATURATE,
+
+    PIXMAN_OP_DISJOINT_CLEAR,
+    PIXMAN_OP_DISJOINT_SRC,
+    PIXMAN_OP_DISJOINT_DST,
+    PIXMAN_OP_DISJOINT_OVER,
+    PIXMAN_OP_DISJOINT_OVER_REVERSE,
+    PIXMAN_OP_DISJOINT_IN,
+    PIXMAN_OP_DISJOINT_IN_REVERSE,
+    PIXMAN_OP_DISJOINT_OUT,
+    PIXMAN_OP_DISJOINT_OUT_REVERSE,
+    PIXMAN_OP_DISJOINT_ATOP,
+    PIXMAN_OP_DISJOINT_ATOP_REVERSE,
+    PIXMAN_OP_DISJOINT_XOR,
+
+    PIXMAN_OP_CONJOINT_CLEAR,
+    PIXMAN_OP_CONJOINT_SRC,
+    PIXMAN_OP_CONJOINT_DST,
+    PIXMAN_OP_CONJOINT_OVER,
+    PIXMAN_OP_CONJOINT_OVER_REVERSE,
+    PIXMAN_OP_CONJOINT_IN,
+    PIXMAN_OP_CONJOINT_IN_REVERSE,
+    PIXMAN_OP_CONJOINT_OUT,
+    PIXMAN_OP_CONJOINT_OUT_REVERSE,
+    PIXMAN_OP_CONJOINT_ATOP,
+    PIXMAN_OP_CONJOINT_ATOP_REVERSE,
+    PIXMAN_OP_CONJOINT_XOR,
 };
 
-static const operator_t operators[] =
-{
-#define P(x) { PIXMAN_OP_##x, #x }
-    P(CLEAR),
-    P(SRC),
-    P(DST),
-    P(OVER),
-    P(OVER_REVERSE),
-    P(IN),
-    P(IN_REVERSE),
-    P(OUT),
-    P(OUT_REVERSE),
-    P(ATOP),
-    P(ATOP_REVERSE),
-    P(XOR),
-    P(ADD),
-    P(SATURATE),
-
-    P(DISJOINT_CLEAR),
-    P(DISJOINT_SRC),
-    P(DISJOINT_DST),
-    P(DISJOINT_OVER),
-    P(DISJOINT_OVER_REVERSE),
-    P(DISJOINT_IN),
-    P(DISJOINT_IN_REVERSE),
-    P(DISJOINT_OUT),
-    P(DISJOINT_OUT_REVERSE),
-    P(DISJOINT_ATOP),
-    P(DISJOINT_ATOP_REVERSE),
-    P(DISJOINT_XOR),
-
-    P(CONJOINT_CLEAR),
-    P(CONJOINT_SRC),
-    P(CONJOINT_DST),
-    P(CONJOINT_OVER),
-    P(CONJOINT_OVER_REVERSE),
-    P(CONJOINT_IN),
-    P(CONJOINT_IN_REVERSE),
-    P(CONJOINT_OUT),
-    P(CONJOINT_OUT_REVERSE),
-    P(CONJOINT_ATOP),
-    P(CONJOINT_ATOP_REVERSE),
-    P(CONJOINT_XOR),
-#undef P
-};
-
-static double
-calc_op (pixman_op_t op, double src, double dst, double srca, double dsta)
-{
-#define mult_chan(src, dst, Fa, Fb) MIN ((src) * (Fa) + (dst) * (Fb), 1.0)
-
-    double Fa, Fb;
-
-    switch (op)
-    {
-    case PIXMAN_OP_CLEAR:
-    case PIXMAN_OP_DISJOINT_CLEAR:
-    case PIXMAN_OP_CONJOINT_CLEAR:
-	return mult_chan (src, dst, 0.0, 0.0);
-
-    case PIXMAN_OP_SRC:
-    case PIXMAN_OP_DISJOINT_SRC:
-    case PIXMAN_OP_CONJOINT_SRC:
-	return mult_chan (src, dst, 1.0, 0.0);
-
-    case PIXMAN_OP_DST:
-    case PIXMAN_OP_DISJOINT_DST:
-    case PIXMAN_OP_CONJOINT_DST:
-	return mult_chan (src, dst, 0.0, 1.0);
-
-    case PIXMAN_OP_OVER:
-	return mult_chan (src, dst, 1.0, 1.0 - srca);
-
-    case PIXMAN_OP_OVER_REVERSE:
-	return mult_chan (src, dst, 1.0 - dsta, 1.0);
-
-    case PIXMAN_OP_IN:
-	return mult_chan (src, dst, dsta, 0.0);
-
-    case PIXMAN_OP_IN_REVERSE:
-	return mult_chan (src, dst, 0.0, srca);
-
-    case PIXMAN_OP_OUT:
-	return mult_chan (src, dst, 1.0 - dsta, 0.0);
-
-    case PIXMAN_OP_OUT_REVERSE:
-	return mult_chan (src, dst, 0.0, 1.0 - srca);
-
-    case PIXMAN_OP_ATOP:
-	return mult_chan (src, dst, dsta, 1.0 - srca);
-
-    case PIXMAN_OP_ATOP_REVERSE:
-	return mult_chan (src, dst, 1.0 - dsta,  srca);
-
-    case PIXMAN_OP_XOR:
-	return mult_chan (src, dst, 1.0 - dsta, 1.0 - srca);
-
-    case PIXMAN_OP_ADD:
-	return mult_chan (src, dst, 1.0, 1.0);
-
-    case PIXMAN_OP_SATURATE:
-    case PIXMAN_OP_DISJOINT_OVER_REVERSE:
-	if (srca == 0.0)
-	    Fa = 1.0;
-	else
-	    Fa = MIN (1.0, (1.0 - dsta) / srca);
-	return mult_chan (src, dst, Fa, 1.0);
-
-    case PIXMAN_OP_DISJOINT_OVER:
-	if (dsta == 0.0)
-	    Fb = 1.0;
-	else
-	    Fb = MIN (1.0, (1.0 - srca) / dsta);
-	return mult_chan (src, dst, 1.0, Fb);
-
-    case PIXMAN_OP_DISJOINT_IN:
-	if (srca == 0.0)
-	    Fa = 0.0;
-	else
-	    Fa = MAX (0.0, 1.0 - (1.0 - dsta) / srca);
-	return mult_chan (src, dst, Fa, 0.0);
-
-    case PIXMAN_OP_DISJOINT_IN_REVERSE:
-	if (dsta == 0.0)
-	    Fb = 0.0;
-	else
-	    Fb = MAX (0.0, 1.0 - (1.0 - srca) / dsta);
-	return mult_chan (src, dst, 0.0, Fb);
-
-    case PIXMAN_OP_DISJOINT_OUT:
-	if (srca == 0.0)
-	    Fa = 1.0;
-	else
-	    Fa = MIN (1.0, (1.0 - dsta) / srca);
-	return mult_chan (src, dst, Fa, 0.0);
-
-    case PIXMAN_OP_DISJOINT_OUT_REVERSE:
-	if (dsta == 0.0)
-	    Fb = 1.0;
-	else
-	    Fb = MIN (1.0, (1.0 - srca) / dsta);
-	return mult_chan (src, dst, 0.0, Fb);
-
-    case PIXMAN_OP_DISJOINT_ATOP:
-	if (srca == 0.0)
-	    Fa = 0.0;
-	else
-	    Fa = MAX (0.0, 1.0 - (1.0 - dsta) / srca);
-	if (dsta == 0.0)
-	    Fb = 1.0;
-	else
-	    Fb = MIN (1.0, (1.0 - srca) / dsta);
-	return mult_chan (src, dst, Fa, Fb);
-
-    case PIXMAN_OP_DISJOINT_ATOP_REVERSE:
-	if (srca == 0.0)
-	    Fa = 1.0;
-	else
-	    Fa = MIN (1.0, (1.0 - dsta) / srca);
-	if (dsta == 0.0)
-	    Fb = 0.0;
-	else
-	    Fb = MAX (0.0, 1.0 - (1.0 - srca) / dsta);
-	return mult_chan (src, dst, Fa, Fb);
-
-    case PIXMAN_OP_DISJOINT_XOR:
-	if (srca == 0.0)
-	    Fa = 1.0;
-	else
-	    Fa = MIN (1.0, (1.0 - dsta) / srca);
-	if (dsta == 0.0)
-	    Fb = 1.0;
-	else
-	    Fb = MIN (1.0, (1.0 - srca) / dsta);
-	return mult_chan (src, dst, Fa, Fb);
-
-    case PIXMAN_OP_CONJOINT_OVER:
-	if (dsta == 0.0)
-	    Fb = 0.0;
-	else
-	    Fb = MAX (0.0, 1.0 - srca / dsta);
-	return mult_chan (src, dst, 1.0, Fb);
-
-    case PIXMAN_OP_CONJOINT_OVER_REVERSE:
-	if (srca == 0.0)
-	    Fa = 0.0;
-	else
-	    Fa = MAX (0.0, 1.0 - dsta / srca);
-	return mult_chan (src, dst, Fa, 1.0);
-
-    case PIXMAN_OP_CONJOINT_IN:
-	if (srca == 0.0)
-	    Fa = 1.0;
-	else
-	    Fa = MIN (1.0, dsta / srca);
-	return mult_chan (src, dst, Fa, 0.0);
-
-    case PIXMAN_OP_CONJOINT_IN_REVERSE:
-	if (dsta == 0.0)
-	    Fb = 1.0;
-	else
-	    Fb = MIN (1.0, srca / dsta);
-	return mult_chan (src, dst, 0.0, Fb);
-
-    case PIXMAN_OP_CONJOINT_OUT:
-	if (srca == 0.0)
-	    Fa = 0.0;
-	else
-	    Fa = MAX (0.0, 1.0 - dsta / srca);
-	return mult_chan (src, dst, Fa, 0.0);
-
-    case PIXMAN_OP_CONJOINT_OUT_REVERSE:
-	if (dsta == 0.0)
-	    Fb = 0.0;
-	else
-	    Fb = MAX (0.0, 1.0 - srca / dsta);
-	return mult_chan (src, dst, 0.0, Fb);
-
-    case PIXMAN_OP_CONJOINT_ATOP:
-	if (srca == 0.0)
-	    Fa = 1.0;
-	else
-	    Fa = MIN (1.0, dsta / srca);
-	if (dsta == 0.0)
-	    Fb = 0.0;
-	else
-	    Fb = MAX (0.0, 1.0 - srca / dsta);
-	return mult_chan (src, dst, Fa, Fb);
-
-    case PIXMAN_OP_CONJOINT_ATOP_REVERSE:
-	if (srca == 0.0)
-	    Fa = 0.0;
-	else
-	    Fa = MAX (0.0, 1.0 - dsta / srca);
-	if (dsta == 0.0)
-	    Fb = 1.0;
-	else
-	    Fb = MIN (1.0, srca / dsta);
-	return mult_chan (src, dst, Fa, Fb);
-
-    case PIXMAN_OP_CONJOINT_XOR:
-	if (srca == 0.0)
-	    Fa = 0.0;
-	else
-	    Fa = MAX (0.0, 1.0 - dsta / srca);
-	if (dsta == 0.0)
-	    Fb = 0.0;
-	else
-	    Fb = MAX (0.0, 1.0 - srca / dsta);
-	return mult_chan (src, dst, Fa, Fb);
-
-    case PIXMAN_OP_MULTIPLY:
-    case PIXMAN_OP_SCREEN:
-    case PIXMAN_OP_OVERLAY:
-    case PIXMAN_OP_DARKEN:
-    case PIXMAN_OP_LIGHTEN:
-    case PIXMAN_OP_COLOR_DODGE:
-    case PIXMAN_OP_COLOR_BURN:
-    case PIXMAN_OP_HARD_LIGHT:
-    case PIXMAN_OP_SOFT_LIGHT:
-    case PIXMAN_OP_DIFFERENCE:
-    case PIXMAN_OP_EXCLUSION:
-    case PIXMAN_OP_HSL_HUE:
-    case PIXMAN_OP_HSL_SATURATION:
-    case PIXMAN_OP_HSL_COLOR:
-    case PIXMAN_OP_HSL_LUMINOSITY:
-    default:
-	abort();
-	return 0; /* silence MSVC */
-    }
-#undef mult_chan
-}
-
-static void
-do_composite (pixman_op_t op,
-	      const color_t *src,
-	      const color_t *mask,
-	      const color_t *dst,
-	      color_t *result,
-	      pixman_bool_t component_alpha)
-{
-    color_t srcval, srcalpha;
-
-    if (mask == NULL)
-    {
-	srcval = *src;
-
-	srcalpha.r = src->a;
-	srcalpha.g = src->a;
-	srcalpha.b = src->a;
-	srcalpha.a = src->a;
-    }
-    else if (component_alpha)
-    {
-	srcval.r = src->r * mask->r;
-	srcval.g = src->g * mask->g;
-	srcval.b = src->b * mask->b;
-	srcval.a = src->a * mask->a;
-
-	srcalpha.r = src->a * mask->r;
-	srcalpha.g = src->a * mask->g;
-	srcalpha.b = src->a * mask->b;
-	srcalpha.a = src->a * mask->a;
-    }
-    else
-    {
-	srcval.r = src->r * mask->a;
-	srcval.g = src->g * mask->a;
-	srcval.b = src->b * mask->a;
-	srcval.a = src->a * mask->a;
-
-	srcalpha.r = src->a * mask->a;
-	srcalpha.g = src->a * mask->a;
-	srcalpha.b = src->a * mask->a;
-	srcalpha.a = src->a * mask->a;
-    }
-
-    result->r = calc_op (op, srcval.r, dst->r, srcalpha.r, dst->a);
-    result->g = calc_op (op, srcval.g, dst->g, srcalpha.g, dst->a);
-    result->b = calc_op (op, srcval.b, dst->b, srcalpha.b, dst->a);
-    result->a = calc_op (op, srcval.a, dst->a, srcalpha.a, dst->a);
-}
-
 static uint32_t
 get_value (pixman_image_t *image)
 {
@@ -498,7 +202,7 @@ describe_image (image_t *info, char *buf)
     if (info->size)
     {
 	sprintf (buf, "%s, %dx%d%s",
-		 info->format->name,
+		 format_name (info->format),
 		 info->size, info->size,
 		 info->repeat ? " R" :"");
     }
@@ -521,7 +225,7 @@ describe_color (const color_t *color, char *buf)
 
 static pixman_bool_t
 composite_test (image_t *dst,
-		const operator_t *op,
+		pixman_op_t op,
 		image_t *src,
 		image_t *mask,
 		pixman_bool_t component_alpha,
@@ -534,12 +238,12 @@ composite_test (image_t *dst,
     {
 	pixman_image_set_component_alpha (mask->image, component_alpha);
 
-	pixman_image_composite (op->op, src->image, mask->image, dst->image,
+	pixman_image_composite (op, src->image, mask->image, dst->image,
 				0, 0, 0, 0, 0, 0, dst->size, dst->size);
     }
     else
     {
-	pixman_image_composite (op->op, src->image, NULL, dst->image,
+	pixman_image_composite (op, src->image, NULL, dst->image,
 				0, 0,
 				0, 0,
 				0, 0,
@@ -561,43 +265,43 @@ composite_test (image_t *dst,
      */
     if (src->size)
     {
-	if (PIXMAN_FORMAT_TYPE (src->format->format) == PIXMAN_TYPE_ARGB_SRGB)
+	if (PIXMAN_FORMAT_TYPE (src->format) == PIXMAN_TYPE_ARGB_SRGB)
         {
 	    tsrc.r = convert_linear_to_srgb (tsrc.r);
 	    tsrc.g = convert_linear_to_srgb (tsrc.g);
 	    tsrc.b = convert_linear_to_srgb (tsrc.b);
-	    round_color (src->format->format, &tsrc);
+	    round_color (src->format, &tsrc);
 	    tsrc.r = convert_srgb_to_linear (tsrc.r);
 	    tsrc.g = convert_srgb_to_linear (tsrc.g);
 	    tsrc.b = convert_srgb_to_linear (tsrc.b);
 	}
         else
         {
-	    round_color (src->format->format, &tsrc);
+	    round_color (src->format, &tsrc);
 	}
     }
 
     if (mask && mask->size)
     {
-	if (PIXMAN_FORMAT_TYPE (mask->format->format) == PIXMAN_TYPE_ARGB_SRGB)
+	if (PIXMAN_FORMAT_TYPE (mask->format) == PIXMAN_TYPE_ARGB_SRGB)
 	{
 	    tmsk.r = convert_linear_to_srgb (tmsk.r);
 	    tmsk.g = convert_linear_to_srgb (tmsk.g);
 	    tmsk.b = convert_linear_to_srgb (tmsk.b);
-	    round_color (mask->format->format, &tmsk);
+	    round_color (mask->format, &tmsk);
 	    tmsk.r = convert_srgb_to_linear (tmsk.r);
 	    tmsk.g = convert_srgb_to_linear (tmsk.g);
 	    tmsk.b = convert_srgb_to_linear (tmsk.b);
 	}
 	else
 	{
-	    round_color (mask->format->format, &tmsk);
+	    round_color (mask->format, &tmsk);
 	}
     }
 
     if (mask)
     {
-	if (component_alpha && PIXMAN_FORMAT_R (mask->format->format) == 0)
+	if (component_alpha && PIXMAN_FORMAT_R (mask->format) == 0)
 	{
 	    /* Ax component-alpha masks expand alpha into
 	     * all color channels.
@@ -606,29 +310,29 @@ composite_test (image_t *dst,
 	}
     }
 
-    if (PIXMAN_FORMAT_TYPE (dst->format->format) == PIXMAN_TYPE_ARGB_SRGB)
+    if (PIXMAN_FORMAT_TYPE (dst->format) == PIXMAN_TYPE_ARGB_SRGB)
     {
 	tdst.r = convert_linear_to_srgb (tdst.r);
 	tdst.g = convert_linear_to_srgb (tdst.g);
 	tdst.b = convert_linear_to_srgb (tdst.b);
-    	round_color (dst->format->format, &tdst);
+    	round_color (dst->format, &tdst);
 	tdst.r = convert_srgb_to_linear (tdst.r);
 	tdst.g = convert_srgb_to_linear (tdst.g);
 	tdst.b = convert_srgb_to_linear (tdst.b);
     }
     else
     {
-    	round_color (dst->format->format, &tdst);
+    	round_color (dst->format, &tdst);
     }
 
-    do_composite (op->op,
+    do_composite (op,
 		  &tsrc,
 		  mask? &tmsk : NULL,
 		  &tdst,
 		  &expected,
 		  component_alpha);
 
-    pixel_checker_init (&checker, dst->format->format);
+    pixel_checker_init (&checker, dst->format);
 
     if (!pixel_checker_check (&checker, get_value (dst->image), &expected))
     {
@@ -638,7 +342,7 @@ composite_test (image_t *dst,
 
 	printf ("---- Test %d failed ----\n", testno);
 	printf ("Operator:      %s %s\n",
-		 op->name, component_alpha ? "CA" : "");
+                operator_name (op), component_alpha ? "CA" : "");
 
 	printf ("Source:        %s\n", describe_image (src, buf));
 	if (mask != NULL)
@@ -687,7 +391,7 @@ image_init (image_t *info,
     info->color = &colors[color];
     compute_pixman_color (info->color, &fill);
 
-    info->format = &formats[format];
+    info->format = formats[format];
     info->size = sizes[size] & ~FLAGS;
     info->repeat = PIXMAN_REPEAT_NONE;
 
@@ -695,7 +399,7 @@ image_init (image_t *info,
     {
 	pixman_image_t *solid;
 
-	info->image = pixman_image_create_bits (info->format->format,
+	info->image = pixman_image_create_bits (info->format,
 						info->size, info->size,
 						NULL, 0);
 
@@ -725,38 +429,38 @@ image_fini (image_t *info)
 static int
 random_size (void)
 {
-    return lcg_rand_n (ARRAY_LENGTH (sizes));
+    return prng_rand_n (ARRAY_LENGTH (sizes));
 }
 
 static int
 random_color (void)
 {
-    return lcg_rand_n (ARRAY_LENGTH (colors));
+    return prng_rand_n (ARRAY_LENGTH (colors));
 }
 
 static int
 random_format (void)
 {
-    return lcg_rand_n (ARRAY_LENGTH (formats));
+    return prng_rand_n (ARRAY_LENGTH (formats));
 }
 
 static pixman_bool_t
 run_test (uint32_t seed)
 {
     image_t src, mask, dst;
-    const operator_t *op;
+    pixman_op_t op;
     int ca;
     int ok;
 
-    lcg_srand (seed);
+    prng_srand (seed);
 
     image_init (&dst, random_color(), random_format(), 1);
     image_init (&src, random_color(), random_format(), random_size());
     image_init (&mask, random_color(), random_format(), random_size());
 
-    op = &(operators [lcg_rand_n (ARRAY_LENGTH (operators))]);
+    op = operators [prng_rand_n (ARRAY_LENGTH (operators))];
 
-    ca = lcg_rand_n (3);
+    ca = prng_rand_n (3);
 
     switch (ca)
     {
diff --git a/lib/pixman/test/glyph-test.c b/lib/pixman/test/glyph-test.c
index 9dd5b41e4..1811add73 100644
--- a/lib/pixman/test/glyph-test.c
+++ b/lib/pixman/test/glyph-test.c
@@ -107,7 +107,7 @@ random_format (const pixman_format_code_t *formats)
     i = 0;
     while (formats[i] != PIXMAN_null)
 	++i;
-    return formats[lcg_rand_n (i)];
+    return formats[prng_rand_n (i)];
 }
 
 static pixman_image_t *
@@ -122,27 +122,27 @@ create_image (int max_size, const pixman_format_code_t *formats, uint32_t flags)
     int i;
     pixman_image_destroy_func_t destroy;
 
-    if ((flags & ALLOW_SOLID) && lcg_rand_n (4) == 0)
+    if ((flags & ALLOW_SOLID) && prng_rand_n (4) == 0)
     {
 	pixman_color_t color;
 
-	color.alpha = lcg_rand_u32();
-	color.red = lcg_rand_u32();
-	color.green = lcg_rand_u32();
-	color.blue = lcg_rand_u32();
+	color.alpha = prng_rand();
+	color.red = prng_rand();
+	color.green = prng_rand();
+	color.blue = prng_rand();
 
 	return pixman_image_create_solid_fill (&color);
     }
 
-    width = lcg_rand_n (max_size) + 1;
-    height = lcg_rand_n (max_size) + 1;
+    width = prng_rand_n (max_size) + 1;
+    height = prng_rand_n (max_size) + 1;
     format = random_format (formats);
 
     bpp = PIXMAN_FORMAT_BPP (format);
-    stride = (width * bpp + 7) / 8 + lcg_rand_n (17);
+    stride = (width * bpp + 7) / 8 + prng_rand_n (17);
     stride = (stride + 3) & ~3;
 
-    if (lcg_rand_n (64) == 0)
+    if (prng_rand_n (64) == 0)
     {
 	if (!(data = (uint32_t *)make_random_bytes (stride * height)))
 	{
@@ -153,34 +153,28 @@ create_image (int max_size, const pixman_format_code_t *formats, uint32_t flags)
     }
     else
     {
-	uint8_t *d8;
-
 	data = malloc (stride * height);
-
-	d8 = (uint8_t *)data;
-	for (i = 0; i < height * stride; ++i)
-	    d8[i] = lcg_rand_n (256);
-
+	prng_randmemset (data, height * stride, 0);
 	destroy = destroy_malloced;
     }
 
     image = pixman_image_create_bits (format, width, height, data, stride);
     pixman_image_set_destroy_function (image, destroy, data);
 
-    if ((flags & ALLOW_CLIPPED) && lcg_rand_n (8) == 0)
+    if ((flags & ALLOW_CLIPPED) && prng_rand_n (8) == 0)
     {
 	pixman_box16_t clip_boxes[8];
 	pixman_region16_t clip;
-	int n = lcg_rand_n (8) + 1;
+	int n = prng_rand_n (8) + 1;
 
 	for (i = 0; i < n; i++)
 	{
-	    clip_boxes[i].x1 = lcg_rand_n (width);
-	    clip_boxes[i].y1 = lcg_rand_n (height);
+	    clip_boxes[i].x1 = prng_rand_n (width);
+	    clip_boxes[i].y1 = prng_rand_n (height);
 	    clip_boxes[i].x2 =
-		clip_boxes[i].x1 + lcg_rand_n (width - clip_boxes[i].x1);
+		clip_boxes[i].x1 + prng_rand_n (width - clip_boxes[i].x1);
 	    clip_boxes[i].y2 =
-		clip_boxes[i].y1 + lcg_rand_n (height - clip_boxes[i].y1);
+		clip_boxes[i].y1 + prng_rand_n (height - clip_boxes[i].y1);
 	}
 
 	pixman_region_init_rects (&clip, clip_boxes, n);
@@ -188,35 +182,35 @@ create_image (int max_size, const pixman_format_code_t *formats, uint32_t flags)
 	pixman_region_fini (&clip);
     }
 
-    if ((flags & ALLOW_SOURCE_CLIPPING) && lcg_rand_n (4) == 0)
+    if ((flags & ALLOW_SOURCE_CLIPPING) && prng_rand_n (4) == 0)
     {
 	pixman_image_set_source_clipping (image, TRUE);
 	pixman_image_set_has_client_clip (image, TRUE);
     }
 
-    if ((flags & ALLOW_ALPHA_MAP) && lcg_rand_n (16) == 0)
+    if ((flags & ALLOW_ALPHA_MAP) && prng_rand_n (16) == 0)
     {
 	pixman_image_t *alpha_map;
 	int alpha_x, alpha_y;
 
-	alpha_x = lcg_rand_n (width);
-	alpha_y = lcg_rand_n (height);
+	alpha_x = prng_rand_n (width);
+	alpha_y = prng_rand_n (height);
 	alpha_map =
 	    create_image (max_size, formats, (flags & ~(ALLOW_ALPHA_MAP | ALLOW_SOLID)));
 	pixman_image_set_alpha_map (image, alpha_map, alpha_x, alpha_y);
 	pixman_image_unref (alpha_map);
     }
 
-    if ((flags & ALLOW_REPEAT) && lcg_rand_n (2) == 0)
-	pixman_image_set_repeat (image, lcg_rand_n (4));
+    if ((flags & ALLOW_REPEAT) && prng_rand_n (2) == 0)
+	pixman_image_set_repeat (image, prng_rand_n (4));
 
     image_endian_swap (image);
 
     return image;
 }
 
-#define KEY1(p) ((void *)(((unsigned long)p) ^ (0xa7e23dfaUL)))
-#define KEY2(p) ((void *)(((unsigned long)p) ^ (0xabcd9876UL)))
+#define KEY1(p) ((void *)(((uintptr_t)p) ^ (0xa7e23dfaUL)))
+#define KEY2(p) ((void *)(((uintptr_t)p) ^ (0xabcd9876UL)))
 
 #define MAX_GLYPHS 32
 
@@ -230,7 +224,7 @@ test_glyphs (int testnum, int verbose)
     int n_glyphs, i;
     pixman_glyph_cache_t *cache;
 
-    lcg_srand (testnum);
+    prng_srand (testnum);
 
     cache = pixman_glyph_cache_create ();
 
@@ -245,13 +239,13 @@ test_glyphs (int testnum, int verbose)
 
     pixman_glyph_cache_freeze (cache);
 
-    n_glyphs = lcg_rand_n (MAX_GLYPHS);
+    n_glyphs = prng_rand_n (MAX_GLYPHS);
     for (i = 0; i < n_glyphs; ++i)
 	glyph_images[i] = create_image (32, glyph_formats, 0);
 
     for (i = 0; i < 4 * n_glyphs; ++i)
     {
-	int g = lcg_rand_n (n_glyphs);
+	int g = prng_rand_n (n_glyphs);
 	pixman_image_t *glyph_img = glyph_images[g];
 	void *key1 = KEY1 (glyph_img);
 	void *key2 = KEY2 (glyph_img);
@@ -264,21 +258,21 @@ test_glyphs (int testnum, int verbose)
 	}
 
 	glyphs[i].glyph = glyph;
-	glyphs[i].x = lcg_rand_n (128);
-	glyphs[i].y = lcg_rand_n (128);
+	glyphs[i].x = prng_rand_n (128);
+	glyphs[i].y = prng_rand_n (128);
     }
 
-    if (lcg_rand_n (2) == 0)
+    if (prng_rand_n (2) == 0)
     {
-	int src_x = lcg_rand_n (300) - 150;
-	int src_y = lcg_rand_n (300) - 150;
-	int mask_x = lcg_rand_n (64) - 32;
-	int mask_y = lcg_rand_n (64) - 32;
-	int dest_x = lcg_rand_n (64) - 32;
-	int dest_y = lcg_rand_n (64) - 32;
-	int width = lcg_rand_n (64);
-	int height = lcg_rand_n (64);
-	pixman_op_t op = operators[lcg_rand_n (ARRAY_LENGTH (operators))];
+	int src_x = prng_rand_n (300) - 150;
+	int src_y = prng_rand_n (300) - 150;
+	int mask_x = prng_rand_n (64) - 32;
+	int mask_y = prng_rand_n (64) - 32;
+	int dest_x = prng_rand_n (64) - 32;
+	int dest_y = prng_rand_n (64) - 32;
+	int width = prng_rand_n (64);
+	int height = prng_rand_n (64);
+	pixman_op_t op = operators[prng_rand_n (ARRAY_LENGTH (operators))];
 	pixman_format_code_t format = random_format (glyph_formats);
 
 	pixman_composite_glyphs (
@@ -292,11 +286,11 @@ test_glyphs (int testnum, int verbose)
     }
     else
     {
-	pixman_op_t op = operators[lcg_rand_n (ARRAY_LENGTH (operators))];
-	int src_x = lcg_rand_n (300) - 150;
-	int src_y = lcg_rand_n (300) - 150;
-	int dest_x = lcg_rand_n (64) - 32;
-	int dest_y = lcg_rand_n (64) - 32;
+	pixman_op_t op = operators[prng_rand_n (ARRAY_LENGTH (operators))];
+	int src_x = prng_rand_n (300) - 150;
+	int src_y = prng_rand_n (300) - 150;
+	int dest_x = prng_rand_n (64) - 32;
+	int dest_y = prng_rand_n (64) - 32;
 
 	pixman_composite_glyphs_no_mask (
 	    op, source, dest,
@@ -333,6 +327,6 @@ int
 main (int argc, const char *argv[])
 {
     return fuzzer_test_main ("glyph", 30000,	
-			     0x79E74996,
+			     0xFA478A79,
 			     test_glyphs, argc, argv);
 }
diff --git a/lib/pixman/test/lowlevel-blt-bench.c b/lib/pixman/test/lowlevel-blt-bench.c
index 3afa926b0..1049e21e7 100644
--- a/lib/pixman/test/lowlevel-blt-bench.c
+++ b/lib/pixman/test/lowlevel-blt-bench.c
@@ -33,6 +33,14 @@
 #define L1CACHE_SIZE (8 * 1024)
 #define L2CACHE_SIZE (128 * 1024)
 
+/* This is applied to both L1 and L2 tests - alternatively, you could
+ * parameterise bench_L or split it into two functions. It could be
+ * read at runtime on some architectures, but it only really matters
+ * that it's a number that's an integer divisor of both cacheline
+ * lengths, and further, it only really matters for caches that don't
+ * do allocate0on-write. */
+#define CACHELINE_LENGTH (32) /* bytes */
+
 #define WIDTH  1920
 #define HEIGHT 1080
 #define BUFSIZE (WIDTH * HEIGHT * 4)
@@ -168,18 +176,29 @@ bench_L  (pixman_op_t              op,
           int                      width,
           int                      lines_count)
 {
-    int64_t      i, j;
+    int64_t      i, j, k;
     int          x = 0;
     int          q = 0;
     volatile int qx;
 
     for (i = 0; i < n; i++)
     {
-	/* touch destination buffer to fetch it into L1 cache */
-	for (j = 0; j < width + 64; j += 16) {
-	    q += dst[j];
-	    q += src[j];
-	}
+        /* For caches without allocate-on-write, we need to force the
+         * destination buffer back into the cache on each iteration,
+         * otherwise if they are evicted during the test, they remain
+         * uncached. This doesn't matter for tests which read the
+         * destination buffer, or for caches that do allocate-on-write,
+         * but in those cases this loop just adds constant time, which
+         * should be successfully cancelled out.
+         */
+        for (j = 0; j < lines_count; j++)
+        {
+            for (k = 0; k < width + 62; k += CACHELINE_LENGTH / sizeof *dst)
+            {
+                q += dst[j * WIDTH + k];
+            }
+            q += dst[j * WIDTH + width + 62];
+        }
 	if (++x >= 64)
 	    x = 0;
 	call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 63 - x, 0, width, lines_count);
@@ -366,6 +385,7 @@ bench_composite (char * testname,
     double                          t1, t2, t3, pix_cnt;
     int64_t                         n, l1test_width, nlines;
     double                             bytes_per_pix = 0;
+    pixman_bool_t                   bench_pixbuf = FALSE;
 
     pixman_composite_func_t func = pixman_image_composite_wrapper;
 
@@ -403,16 +423,20 @@ bench_composite (char * testname,
 
     mask_img = NULL;
     xmask_img = NULL;
+    if (strcmp (testname, "pixbuf") == 0 || strcmp (testname, "rpixbuf") == 0)
+    {
+        bench_pixbuf = TRUE;
+    }
     if (!(mask_flags & SOLID_FLAG) && mask_fmt != PIXMAN_null)
     {
         bytes_per_pix += (mask_fmt >> 24) / ((op == PIXMAN_OP_SRC) ? 8.0 : 4.0);
         mask_img = pixman_image_create_bits (mask_fmt,
                                              WIDTH, HEIGHT,
-                                             mask,
+                                             bench_pixbuf ? src : mask,
                                              WIDTH * 4);
         xmask_img = pixman_image_create_bits (mask_fmt,
                                              XWIDTH, XHEIGHT,
-                                             mask,
+                                             bench_pixbuf ? src : mask,
                                              XWIDTH * 4);
     }
     else if (mask_fmt != PIXMAN_null)
@@ -441,8 +465,8 @@ bench_composite (char * testname,
     printf ("%24s %c", testname, func != pixman_image_composite_wrapper ?
             '-' : '=');
 
-    memcpy (src, dst, BUFSIZE);
     memcpy (dst, src, BUFSIZE);
+    memcpy (src, dst, BUFSIZE);
 
     l1test_width = L1CACHE_SIZE / 8 - 64;
     if (l1test_width < 1)
@@ -461,8 +485,8 @@ bench_composite (char * testname,
             ((t3 - t2) - (t2 - t1)) / 1000000.);
     fflush (stdout);
 
-    memcpy (src, dst, BUFSIZE);
     memcpy (dst, src, BUFSIZE);
+    memcpy (src, dst, BUFSIZE);
 
     nlines = (L2CACHE_SIZE / l1test_width) /
 	((PIXMAN_FORMAT_BPP(src_fmt) + PIXMAN_FORMAT_BPP(dst_fmt)) / 8);
@@ -480,8 +504,8 @@ bench_composite (char * testname,
             ((t3 - t2) - (t2 - t1)) / 1000000.);
     fflush (stdout);
 
-    memcpy (src, dst, BUFSIZE);
     memcpy (dst, src, BUFSIZE);
+    memcpy (src, dst, BUFSIZE);
 
     n = 1 + npix / (WIDTH * HEIGHT);
     t1 = gettime ();
@@ -496,8 +520,8 @@ bench_composite (char * testname,
         ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1)) * bytes_per_pix) * (100.0 / bandwidth) );
     fflush (stdout);
 
-    memcpy (src, dst, BUFSIZE);
     memcpy (dst, src, BUFSIZE);
+    memcpy (src, dst, BUFSIZE);
 
     n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
     t1 = gettime ();
@@ -510,8 +534,8 @@ bench_composite (char * testname,
     printf ("  HT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
     fflush (stdout);
 
-    memcpy (src, dst, BUFSIZE);
     memcpy (dst, src, BUFSIZE);
+    memcpy (src, dst, BUFSIZE);
 
     n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
     t1 = gettime ();
@@ -524,8 +548,8 @@ bench_composite (char * testname,
     printf ("  VT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
     fflush (stdout);
 
-    memcpy (src, dst, BUFSIZE);
     memcpy (dst, src, BUFSIZE);
+    memcpy (src, dst, BUFSIZE);
 
     n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH);
     t1 = gettime ();
@@ -538,8 +562,8 @@ bench_composite (char * testname,
     printf ("  R:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.);
     fflush (stdout);
 
-    memcpy (src, dst, BUFSIZE);
     memcpy (dst, src, BUFSIZE);
+    memcpy (src, dst, BUFSIZE);
 
     n = 1 + npix / (16 * TINYWIDTH * TINYWIDTH);
     t1 = gettime ();
@@ -616,6 +640,7 @@ tests_tbl[] =
     { "src_n_2x10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
     { "src_n_2a10",            PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r10g10b10 },
     { "src_8888_0565",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
+    { "src_0565_8888",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
     { "src_8888_4444",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a4r4g4b4 },
     { "src_8888_2222",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a2r2g2b2 },
     { "src_8888_2x10",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x2r10g10b10 },
@@ -623,12 +648,16 @@ tests_tbl[] =
     { "src_0888_0565",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
     { "src_0888_8888",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
     { "src_0888_x888",         PIXMAN_r8g8b8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
+    { "src_0888_8888_rev",     PIXMAN_b8g8r8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
+    { "src_0888_0565_rev",     PIXMAN_b8g8r8,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
     { "src_x888_x888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_x8r8g8b8 },
     { "src_x888_8888",         PIXMAN_x8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
     { "src_8888_8888",         PIXMAN_a8r8g8b8,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8r8g8b8 },
     { "src_0565_0565",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
     { "src_1555_0565",         PIXMAN_a1r5g5b5,    0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_r5g6b5 },
     { "src_0565_1555",         PIXMAN_r5g6b5,      0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a1r5g5b5 },
+    { "src_8_8",               PIXMAN_a8,          0, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8 },
+    { "src_n_8",               PIXMAN_a8,          1, PIXMAN_OP_SRC,     PIXMAN_null,     0, PIXMAN_a8 },
     { "src_n_8_0565",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_r5g6b5 },
     { "src_n_8_1555",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a1r5g5b5 },
     { "src_n_8_4444",          PIXMAN_a8r8g8b8,    1, PIXMAN_OP_SRC,     PIXMAN_a8,       0, PIXMAN_a4r4g4b4 },
@@ -685,6 +714,8 @@ tests_tbl[] =
     { "outrev_n_8888_x888_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 },
     { "outrev_n_8888_8888_ca", PIXMAN_a8r8g8b8,    1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 },
     { "over_reverse_n_8888",   PIXMAN_a8r8g8b8,    0, PIXMAN_OP_OVER_REVERSE, PIXMAN_null, 0, PIXMAN_a8r8g8b8 },
+    { "pixbuf",                PIXMAN_x8b8g8r8,    0, PIXMAN_OP_SRC,     PIXMAN_a8b8g8r8, 0, PIXMAN_a8r8g8b8 },
+    { "rpixbuf",               PIXMAN_x8b8g8r8,    0, PIXMAN_OP_SRC,     PIXMAN_a8b8g8r8, 0, PIXMAN_a8b8g8r8 },
 };
 
 int
@@ -771,7 +802,7 @@ main (int argc, char *argv[])
 
     for (i = 0; i < ARRAY_LENGTH (tests_tbl); i++)
     {
-	if (strcmp (pattern, "all") == 0 || strstr (tests_tbl[i].testname, pattern))
+	if (strcmp (pattern, "all") == 0 || strcmp (tests_tbl[i].testname, pattern) == 0)
 	{
 	    bench_composite (tests_tbl[i].testname,
 			     tests_tbl[i].src_fmt,
diff --git a/lib/pixman/test/matrix-test.c b/lib/pixman/test/matrix-test.c
new file mode 100644
index 000000000..8437dd291
--- /dev/null
+++ b/lib/pixman/test/matrix-test.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "utils.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifdef HAVE_FLOAT128
+
+#define pixman_fixed_to_float128(x) (((__float128)(x)) / 65536.0Q)
+
+typedef struct { __float128 v[3]; } pixman_vector_f128_t;
+typedef struct { __float128 m[3][3]; } pixman_transform_f128_t;
+
+pixman_bool_t
+pixman_transform_point_f128 (const pixman_transform_f128_t *t,
+                             const pixman_vector_f128_t    *v,
+                             pixman_vector_f128_t          *result)
+{
+    int i;
+    for (i = 0; i < 3; i++)
+    {
+        result->v[i] = t->m[i][0] * v->v[0] +
+                       t->m[i][1] * v->v[1] +
+                       t->m[i][2] * v->v[2];
+    }
+    if (result->v[2] != 0)
+    {
+        result->v[0] /= result->v[2];
+        result->v[1] /= result->v[2];
+        result->v[2] = 1;
+        return TRUE;
+    }
+    else
+    {
+        return FALSE;
+    }
+}
+
+pixman_bool_t does_it_fit_fixed_48_16 (__float128 x)
+{
+    if (x >= 65536.0Q * 65536.0Q * 32768.0Q)
+        return FALSE;
+    if (x <= -65536.0Q * 65536.0Q * 32768.0Q)
+        return FALSE;
+    return TRUE;
+}
+
+#endif
+
+uint32_t
+test_matrix (int testnum, int verbose)
+{
+    uint32_t crc32 = 0;
+    int i, j, k;
+    pixman_bool_t is_affine;
+
+    prng_srand (testnum);
+
+    for (i = 0; i < 100; i++)
+    {
+        pixman_bool_t           transform_ok;
+        pixman_transform_t      ti;
+        pixman_vector_48_16_t   vi, result_i;
+#ifdef HAVE_FLOAT128
+        pixman_transform_f128_t tf;
+        pixman_vector_f128_t    vf, result_f;
+#endif
+        prng_randmemset (&ti, sizeof(ti), 0);
+        prng_randmemset (&vi, sizeof(vi), 0);
+
+        for (j = 0; j < 3; j++)
+        {
+            /* make sure that "vi" contains 31.16 fixed point data */
+            vi.v[j] >>= 17;
+            /* and apply random shift */
+            if (prng_rand_n (3) == 0)
+                vi.v[j] >>= prng_rand_n (46);
+        }
+
+        if (prng_rand_n (2))
+        {
+            /* random shift for the matrix */
+            for (j = 0; j < 3; j++)
+                for (k = 0; k < 3; k++)
+                    ti.matrix[j][k] >>= prng_rand_n (30);
+        }
+
+        if (prng_rand_n (2))
+        {
+            /* affine matrix */
+            ti.matrix[2][0] = 0;
+            ti.matrix[2][1] = 0;
+            ti.matrix[2][2] = pixman_fixed_1;
+        }
+
+        if (prng_rand_n (2))
+        {
+            /* cartesian coordinates */
+            vi.v[2] = pixman_fixed_1;
+        }
+
+        is_affine = (ti.matrix[2][0] == 0 && ti.matrix[2][1] == 0 &&
+                     ti.matrix[2][2] == pixman_fixed_1 &&
+                     vi.v[2] == pixman_fixed_1);
+
+        transform_ok = TRUE;
+        if (is_affine && prng_rand_n (2))
+            pixman_transform_point_31_16_affine (&ti, &vi, &result_i);
+        else
+            transform_ok = pixman_transform_point_31_16 (&ti, &vi, &result_i);
+
+        crc32 = compute_crc32 (crc32, &result_i, sizeof(result_i));
+
+#ifdef HAVE_FLOAT128
+        /* compare with a reference 128-bit floating point implementation */
+        for (j = 0; j < 3; j++)
+        {
+            vf.v[j] = pixman_fixed_to_float128 (vi.v[j]);
+            for (k = 0; k < 3; k++)
+            {
+                tf.m[j][k] = pixman_fixed_to_float128 (ti.matrix[j][k]);
+            }
+        }
+
+        if (pixman_transform_point_f128 (&tf, &vf, &result_f))
+        {
+            if (transform_ok ||
+                (does_it_fit_fixed_48_16 (result_f.v[0]) &&
+                 does_it_fit_fixed_48_16 (result_f.v[1]) &&
+                 does_it_fit_fixed_48_16 (result_f.v[2])))
+            {
+                for (j = 0; j < 3; j++)
+                {
+                    double diff = fabs (result_f.v[j] -
+                                        pixman_fixed_to_float128 (result_i.v[j]));
+
+                    if (is_affine && diff > (0.51 / 65536.0))
+                    {
+                        printf ("%d:%d: bad precision for affine (%.12f)\n",
+                               testnum, i, diff);
+                        abort ();
+                    }
+                    else if (diff > (0.71 / 65536.0))
+                    {
+                        printf ("%d:%d: bad precision for projective (%.12f)\n",
+                               testnum, i, diff);
+                        abort ();
+                    }
+                }
+            }
+        }
+#endif
+    }
+    return crc32;
+}
+
+int
+main (int argc, const char *argv[])
+{
+    return fuzzer_test_main ("matrix", 20000,
+			     0xBEBF98C3,
+			     test_matrix, argc, argv);
+}
diff --git a/lib/pixman/test/pixel-test.c b/lib/pixman/test/pixel-test.c
new file mode 100644
index 000000000..8c525d202
--- /dev/null
+++ b/lib/pixman/test/pixel-test.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright © 2013 Soeren Sandmann
+ * Copyright © 2013 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <stdio.h>
+#include <stdlib.h> /* abort() */
+#include <math.h>
+#include <time.h>
+#include "utils.h"
+
+typedef struct pixel_combination_t pixel_combination_t;
+struct pixel_combination_t
+{
+    pixman_op_t			op;
+    pixman_format_code_t	src_format;
+    uint32_t			src_pixel;
+    pixman_format_code_t	dest_format;
+    uint32_t			dest_pixel;
+};
+
+static const pixel_combination_t regressions[] =
+{
+    { PIXMAN_OP_OVER,
+      PIXMAN_a8r8g8b8,	0x0f00c300,
+      PIXMAN_x14r6g6b6,	0x003c0,
+    },
+    { PIXMAN_OP_DISJOINT_XOR,
+      PIXMAN_a4r4g4b4,	0xd0c0,
+      PIXMAN_a8r8g8b8,	0x5300ea00,
+    },
+    { PIXMAN_OP_OVER,
+      PIXMAN_a8r8g8b8,	0x20c6bf00,
+      PIXMAN_r5g6b5,	0xb9ff
+    },
+    { PIXMAN_OP_OVER,
+      PIXMAN_a8r8g8b8,	0x204ac7ff,
+      PIXMAN_r5g6b5,	0xc1ff
+    },
+    { PIXMAN_OP_OVER_REVERSE,
+      PIXMAN_r5g6b5,	0xffc3,
+      PIXMAN_a8r8g8b8,	0x102d00dd
+    },
+    { PIXMAN_OP_OVER_REVERSE,
+      PIXMAN_r5g6b5,	0x1f00,
+      PIXMAN_a8r8g8b8,	0x1bdf0c89
+    },
+    { PIXMAN_OP_OVER_REVERSE,
+      PIXMAN_r5g6b5,	0xf9d2,
+      PIXMAN_a8r8g8b8,	0x1076bcf7
+    },
+    { PIXMAN_OP_OVER_REVERSE,
+      PIXMAN_r5g6b5,	0x00c3,
+      PIXMAN_a8r8g8b8,	0x1bfe9ae5
+    },
+    { PIXMAN_OP_OVER_REVERSE,
+      PIXMAN_r5g6b5,	0x09ff,
+      PIXMAN_a8r8g8b8,	0x0b00c16c
+    },
+    { PIXMAN_OP_DISJOINT_ATOP,
+      PIXMAN_a2r2g2b2,	0xbc,
+      PIXMAN_a8r8g8b8,	0x9efff1ff
+    },
+    { PIXMAN_OP_DISJOINT_ATOP,
+      PIXMAN_a4r4g4b4,	0xae5f,
+      PIXMAN_a8r8g8b8,	0xf215b675
+    },
+    { PIXMAN_OP_DISJOINT_ATOP_REVERSE,
+      PIXMAN_a8r8g8b8,	0xce007980,
+      PIXMAN_a8r8g8b8,	0x80ffe4ad
+    },
+    { PIXMAN_OP_DISJOINT_XOR,
+      PIXMAN_a8r8g8b8,	0xb8b07bea,
+      PIXMAN_a4r4g4b4,	0x939c
+    },
+    { PIXMAN_OP_CONJOINT_ATOP_REVERSE,
+      PIXMAN_r5g6b5,	0x0063,
+      PIXMAN_a8r8g8b8,	0x10bb1ed7,
+    },
+};
+
+static void
+fill (pixman_image_t *image, uint32_t pixel)
+{
+    uint8_t *data = (uint8_t *)pixman_image_get_data (image);
+    int bytes_per_pixel = PIXMAN_FORMAT_BPP (pixman_image_get_format (image)) / 8;
+    int n_bytes = pixman_image_get_stride (image) * pixman_image_get_height (image);
+    int i;
+
+    switch (bytes_per_pixel)
+    {
+    case 4:
+	for (i = 0; i < n_bytes / 4; ++i)
+	    ((uint32_t *)data)[i] = pixel;
+	break;
+
+    case 2:
+	pixel &= 0xffff;
+	for (i = 0; i < n_bytes / 2; ++i)
+	    ((uint16_t *)data)[i] = pixel;
+	break;
+
+    case 1:
+	pixel &= 0xff;
+	for (i = 0; i < n_bytes; ++i)
+	    ((uint8_t *)data)[i] = pixel;
+	break;
+
+    default:
+	assert (0);
+	break;
+    }
+}
+
+static uint32_t
+access (pixman_image_t *image, int x, int y)
+{
+    int bytes_per_pixel;
+    int stride;
+    uint32_t result;
+    uint8_t *location;
+
+    if (x < 0 || x >= image->bits.width || y < 0 || y >= image->bits.height)
+        return 0;
+
+    bytes_per_pixel = PIXMAN_FORMAT_BPP (image->bits.format) / 8;
+    stride = image->bits.rowstride * 4;
+
+    location = (uint8_t *)image->bits.bits + y * stride + x * bytes_per_pixel;
+
+    if (bytes_per_pixel == 4)
+        result = *(uint32_t *)location;
+    else if (bytes_per_pixel == 2)
+        result = *(uint16_t *)location;
+    else if (bytes_per_pixel == 1)
+        result = *(uint8_t *)location;
+    else
+	assert (0);
+
+    return result;
+}
+
+static pixman_bool_t
+verify (int test_no, const pixel_combination_t *combination, int size)
+{
+    pixman_image_t *src, *dest;
+    pixel_checker_t src_checker, dest_checker;
+    color_t source_color, dest_color, reference_color;
+    pixman_bool_t result = TRUE;
+    int i, j;
+
+    /* Compute reference color */
+    pixel_checker_init (&src_checker, combination->src_format);
+    pixel_checker_init (&dest_checker, combination->dest_format);
+    pixel_checker_convert_pixel_to_color (
+	&src_checker, combination->src_pixel, &source_color);
+    pixel_checker_convert_pixel_to_color (
+	&dest_checker, combination->dest_pixel, &dest_color);
+    do_composite (combination->op,
+		  &source_color, NULL, &dest_color,
+		  &reference_color, FALSE);
+
+    src = pixman_image_create_bits (
+	combination->src_format, size, size, NULL, -1);
+    dest = pixman_image_create_bits (
+	combination->dest_format, size, size, NULL, -1);
+
+    fill (src, combination->src_pixel);
+    fill (dest, combination->dest_pixel);
+
+    pixman_image_composite32 (
+	combination->op, src, NULL, dest, 0, 0, 0, 0, 0, 0, size, size);
+
+    for (j = 0; j < size; ++j)
+    {
+	for (i = 0; i < size; ++i)
+	{
+	    uint32_t computed = access (dest, i, j);
+	    int32_t a, r, g, b;
+
+	    if (!pixel_checker_check (&dest_checker, computed, &reference_color))
+	    {
+		printf ("----------- Test %d failed ----------\n", test_no);
+
+		printf ("   operator:         %s\n", operator_name (combination->op));
+		printf ("   src format:       %s\n", format_name (combination->src_format));
+		printf ("   dest format:      %s\n", format_name (combination->dest_format));
+                printf (" - source ARGB:      %f  %f  %f  %f   (pixel: %8x)\n",
+                        source_color.a, source_color.r, source_color.g, source_color.b,
+                        combination->src_pixel);
+		pixel_checker_split_pixel (&src_checker, combination->src_pixel,
+					   &a, &r, &g, &b);
+                printf ("                     %8d  %8d  %8d  %8d\n", a, r, g, b);
+
+                printf (" - dest ARGB:        %f  %f  %f  %f   (pixel: %8x)\n",
+                        dest_color.a, dest_color.r, dest_color.g, dest_color.b,
+                        combination->dest_pixel);
+		pixel_checker_split_pixel (&dest_checker, combination->dest_pixel,
+					   &a, &r, &g, &b);
+                printf ("                     %8d  %8d  %8d  %8d\n", a, r, g, b);
+
+                pixel_checker_split_pixel (&dest_checker, computed, &a, &r, &g, &b);
+                printf (" - expected ARGB:    %f  %f  %f  %f\n",
+                        reference_color.a, reference_color.r, reference_color.g, reference_color.b);
+
+                pixel_checker_get_min (&dest_checker, &reference_color, &a, &r, &g, &b);
+                printf ("   min acceptable:   %8d  %8d  %8d  %8d\n", a, r, g, b);
+
+                pixel_checker_split_pixel (&dest_checker, computed, &a, &r, &g, &b);
+                printf ("   got:              %8d  %8d  %8d  %8d   (pixel: %8x)\n", a, r, g, b, computed);
+
+                pixel_checker_get_max (&dest_checker, &reference_color, &a, &r, &g, &b);
+                printf ("   max acceptable:   %8d  %8d  %8d  %8d\n", a, r, g, b);
+
+		result = FALSE;
+		goto done;
+	    }
+	}
+    }
+
+done:
+    pixman_image_unref (src);
+    pixman_image_unref (dest);
+
+    return result;
+}
+
+int
+main (int argc, char **argv)
+{
+    int result = 0;
+    int i, j;
+
+    for (i = 0; i < ARRAY_LENGTH (regressions); ++i)
+    {
+	const pixel_combination_t *combination = &(regressions[i]);
+
+	for (j = 1; j < 34; ++j)
+	{
+	    if (!verify (i, combination, j))
+	    {
+		result = 1;
+		break;
+	    }
+	}
+    }
+
+    return result;
+}
diff --git a/lib/pixman/test/prng-test.c b/lib/pixman/test/prng-test.c
new file mode 100644
index 000000000..c1d9320cc
--- /dev/null
+++ b/lib/pixman/test/prng-test.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com>
+ *
+ * Based on the public domain implementation of small noncryptographic PRNG
+ * authored by Bob Jenkins: http://burtleburtle.net/bob/rand/smallprng.html
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include "utils-prng.h"
+#include "utils.h"
+
+/* The original code from http://www.burtleburtle.net/bob/rand/smallprng.html */
+
+typedef uint32_t u4;
+typedef struct ranctx { u4 a; u4 b; u4 c; u4 d; } ranctx;
+
+#define rot(x,k) (((x)<<(k))|((x)>>(32-(k))))
+u4 ranval( ranctx *x ) {
+    u4 e = x->a - rot(x->b, 27);
+    x->a = x->b ^ rot(x->c, 17);
+    x->b = x->c + x->d;
+    x->c = x->d + e;
+    x->d = e + x->a;
+    return x->d;
+}
+
+void raninit( ranctx *x, u4 seed ) {
+    u4 i;
+    x->a = 0xf1ea5eed, x->b = x->c = x->d = seed;
+    for (i=0; i<20; ++i) {
+        (void)ranval(x);
+    }
+}
+
+/*****************************************************************************/
+
+#define BUFSIZE (8 * 1024 * 1024)
+#define N 50
+
+void bench (void)
+{
+    double t1, t2;
+    int i;
+    prng_t prng;
+    uint8_t *buf = aligned_malloc (16, BUFSIZE + 1);
+
+    prng_srand_r (&prng, 1234);
+    t1 = gettime();
+    for (i = 0; i < N; i++)
+        prng_randmemset_r (&prng, buf, BUFSIZE, 0);
+    t2 = gettime();
+    printf ("aligned randmemset                    : %.2f MB/s\n",
+            (double)BUFSIZE * N / 1000000. / (t2 - t1));
+
+    t1 = gettime();
+    for (i = 0; i < N; i++)
+        prng_randmemset_r (&prng, buf + 1, BUFSIZE, 0);
+    t2 = gettime();
+    printf ("unaligned randmemset                  : %.2f MB/s\n",
+            (double)BUFSIZE * N / 1000000. / (t2 - t1));
+
+    t1 = gettime();
+    for (i = 0; i < N; i++)
+    {
+        prng_randmemset_r (&prng, buf, BUFSIZE, RANDMEMSET_MORE_00_AND_FF);
+    }
+    t2 = gettime ();
+    printf ("aligned randmemset (more 00 and FF)   : %.2f MB/s\n",
+            (double)BUFSIZE * N / 1000000. / (t2 - t1));
+
+    t1 = gettime();
+    for (i = 0; i < N; i++)
+    {
+        prng_randmemset_r (&prng, buf + 1, BUFSIZE, RANDMEMSET_MORE_00_AND_FF);
+    }
+    t2 = gettime ();
+    printf ("unaligned randmemset (more 00 and FF) : %.2f MB/s\n",
+            (double)BUFSIZE * N / 1000000. / (t2 - t1));
+
+    free (buf);
+}
+
+#define SMALLBUFSIZE 100
+
+int main (int argc, char *argv[])
+{
+    const uint32_t ref_crc[RANDMEMSET_MORE_00_AND_FF + 1] =
+    {
+        0xBA06763D, 0x103FC550, 0x8B59ABA5, 0xD82A0F39,
+        0xD2321099, 0xFD8C5420, 0xD3B7C42A, 0xFC098093,
+        0x85E01DE0, 0x6680F8F7, 0x4D32DD3C, 0xAE52382B,
+        0x149E6CB5, 0x8B336987, 0x15DCB2B3, 0x8A71B781
+    };
+    uint32_t crc1, crc2;
+    uint32_t ref, seed, seed0, seed1, seed2, seed3;
+    prng_rand_128_data_t buf;
+    uint8_t *bytebuf = aligned_malloc(16, SMALLBUFSIZE + 1);
+    ranctx x;
+    prng_t prng;
+    prng_randmemset_flags_t flags;
+
+    if (argc > 1 && strcmp(argv[1], "-bench") == 0)
+    {
+        bench ();
+        return 0;
+    }
+
+    /* basic test */
+    raninit (&x, 0);
+    prng_srand_r (&prng, 0);
+    assert (ranval (&x) == prng_rand_r (&prng));
+
+    /* test for simd code */
+    seed = 0;
+    prng_srand_r (&prng, seed);
+    seed0 = (seed = seed * 1103515245 + 12345);
+    seed1 = (seed = seed * 1103515245 + 12345);
+    seed2 = (seed = seed * 1103515245 + 12345);
+    seed3 = (seed = seed * 1103515245 + 12345);
+    prng_rand_128_r (&prng, &buf);
+
+    raninit (&x, seed0);
+    ref = ranval (&x);
+    assert (ref == buf.w[0]);
+
+    raninit (&x, seed1);
+    ref = ranval (&x);
+    assert (ref == buf.w[1]);
+
+    raninit (&x, seed2);
+    ref = ranval (&x);
+    assert (ref == buf.w[2]);
+
+    raninit (&x, seed3);
+    ref = ranval (&x);
+    assert (ref == buf.w[3]);
+
+    /* test for randmemset */
+    for (flags = 0; flags <= RANDMEMSET_MORE_00_AND_FF; flags++)
+    {
+        prng_srand_r (&prng, 1234);
+        prng_randmemset_r (&prng, bytebuf, 16, flags);
+        prng_randmemset_r (&prng, bytebuf + 16, SMALLBUFSIZE - 17, flags);
+        crc1 = compute_crc32 (0, bytebuf, SMALLBUFSIZE - 1);
+        prng_srand_r (&prng, 1234);
+        prng_randmemset_r (&prng, bytebuf + 1, SMALLBUFSIZE - 1, flags);
+        crc2 = compute_crc32 (0, bytebuf + 1, SMALLBUFSIZE - 1);
+        assert (ref_crc[flags] == crc1);
+        assert (ref_crc[flags] == crc2);
+    }
+
+    free (bytebuf);
+
+    return 0;
+}
diff --git a/lib/pixman/test/radial-perf-test.c b/lib/pixman/test/radial-perf-test.c
new file mode 100644
index 000000000..71092e27b
--- /dev/null
+++ b/lib/pixman/test/radial-perf-test.c
@@ -0,0 +1,58 @@
+#include "utils.h"
+#include <stdio.h>
+
+int
+main ()
+{
+    static const pixman_point_fixed_t inner = { 0x0000, 0x0000 };
+    static const pixman_point_fixed_t outer = { 0x0000, 0x0000 };
+    static const pixman_fixed_t r_inner = 0;
+    static const pixman_fixed_t r_outer = 64 << 16;
+    static const pixman_gradient_stop_t stops[] = {
+	{ 0x00000, { 0x6666, 0x6666, 0x6666, 0xffff } },
+	{ 0x10000, { 0x0000, 0x0000, 0x0000, 0xffff } }
+    };
+    static const pixman_transform_t transform = {
+	{ { 0x0,        0x26ee, 0x0}, 
+	  { 0xffffeeef, 0x0,    0x0}, 
+	  { 0x0,        0x0,    0x10000}
+	}
+    };
+    static const pixman_color_t z = { 0x0000, 0x0000, 0x0000, 0x0000 };
+    pixman_image_t *dest, *radial, *zero;
+    int i;
+    double before, after;
+
+    dest = pixman_image_create_bits (
+	PIXMAN_x8r8g8b8, 640, 429, NULL, -1);
+    zero = pixman_image_create_solid_fill (&z);
+    radial = pixman_image_create_radial_gradient (
+	&inner, &outer, r_inner, r_outer, stops, ARRAY_LENGTH (stops));
+    pixman_image_set_transform (radial, &transform);
+    pixman_image_set_repeat (radial, PIXMAN_REPEAT_PAD);
+
+#define N_COMPOSITE	500
+
+    before = gettime();
+    for (i = 0; i < N_COMPOSITE; ++i)
+    {
+	before -= gettime();
+
+	pixman_image_composite (
+	    PIXMAN_OP_SRC, zero, NULL, dest,
+	    0, 0, 0, 0, 0, 0, 640, 429);
+
+	before += gettime();
+
+	pixman_image_composite32 (
+	    PIXMAN_OP_OVER, radial, NULL, dest,
+	    - 150, -158, 0, 0, 0, 0, 640, 361);
+    }
+
+    after = gettime();
+
+    write_png (dest, "radial.png");
+
+    printf ("Average time to composite: %f\n", (after - before) / N_COMPOSITE);
+    return 0;
+}
diff --git a/lib/pixman/test/region-contains-test.c b/lib/pixman/test/region-contains-test.c
index 9524e2888..096e65179 100644
--- a/lib/pixman/test/region-contains-test.c
+++ b/lib/pixman/test/region-contains-test.c
@@ -9,16 +9,16 @@ make_random_region (pixman_region32_t *region)
 
     pixman_region32_init (region);
 
-    n_boxes = lcg_rand_n (64);
+    n_boxes = prng_rand_n (64);
     while (n_boxes--)
     {
 	int32_t x, y;
 	uint32_t w, h;
 
-	x = (int32_t)lcg_rand_u32() >> 2;
-	y = (int32_t)lcg_rand_u32() >> 2;
-	w = lcg_rand_u32() >> 2;
-	h = lcg_rand_u32() >> 2;
+	x = (int32_t)prng_rand() >> 2;
+	y = (int32_t)prng_rand() >> 2;
+	w = prng_rand() >> 2;
+	h = prng_rand() >> 2;
 
 	pixman_region32_union_rect (region, region, x, y, w, h);
     }
@@ -37,12 +37,12 @@ random_coord (pixman_region32_t *region, pixman_bool_t x)
     int n_boxes;
     int begin, end;
 
-    if (lcg_rand_n (14))
+    if (prng_rand_n (14))
     {
 	bb = pixman_region32_rectangles (region, &n_boxes);
 	if (n_boxes == 0)
 	    goto use_extent;
-	b = bb + lcg_rand_n (n_boxes);
+	b = bb + prng_rand_n (n_boxes);
     }
     else
     {
@@ -62,12 +62,12 @@ random_coord (pixman_region32_t *region, pixman_bool_t x)
 	end = b->y2;
     }
 
-    switch (lcg_rand_n (5))
+    switch (prng_rand_n (5))
     {
     case 0:
-	return begin - lcg_rand_u32();
+	return begin - prng_rand();
     case 1:
-	return end + lcg_rand_u32 ();
+	return end + prng_rand ();
     case 2:
 	return end;
     case 3:
@@ -111,14 +111,14 @@ test_region_contains_rectangle (int i, int verbose)
     pixman_region32_t region;
     uint32_t r, r1, r2, r3, r4, crc32;
 
-    lcg_srand (i);
+    prng_srand (i);
 
     make_random_region (&region);
 
     box.x1 = random_coord (&region, TRUE);
-    box.x2 = box.x1 + lcg_rand_u32 ();
+    box.x2 = box.x1 + prng_rand ();
     box.y1 = random_coord (&region, FALSE);
-    box.y2 = box.y1 + lcg_rand_u32 ();
+    box.y2 = box.y1 + prng_rand ();
 
     if (verbose)
     {
@@ -163,7 +163,7 @@ main (int argc, const char *argv[])
 {
     return fuzzer_test_main ("region_contains",
 			     1000000,
-			     0xD2BF8C73,
+			     0x548E0F3F,
 			     test_region_contains_rectangle,
 			     argc, argv);
 }
diff --git a/lib/pixman/test/region-test.c b/lib/pixman/test/region-test.c
index 9d5a41eb9..bfc219bc7 100644
--- a/lib/pixman/test/region-test.c
+++ b/lib/pixman/test/region-test.c
@@ -32,6 +32,8 @@ main ()
 	0xffff
     };
 
+    prng_srand (0);
+
     /* This used to go into an infinite loop before pixman-region.c
      * was fixed to not use explict "short" variables
      */
@@ -91,10 +93,10 @@ main ()
 	/* Add some random rectangles */
 	for (j = 0; j < 64; j++)
 	    pixman_region32_union_rect (&r1, &r1,
-					lcg_rand_n (image_size),
-					lcg_rand_n (image_size),
-					lcg_rand_n (25),
-					lcg_rand_n (25));
+					prng_rand_n (image_size),
+					prng_rand_n (image_size),
+					prng_rand_n (25),
+					prng_rand_n (25));
 
 	/* Clip to image size */
 	pixman_region32_init_rect (&r2, 0, 0, image_size, image_size);
diff --git a/lib/pixman/test/rotate-test.c b/lib/pixman/test/rotate-test.c
index d63a28947..9d2a620cb 100644
--- a/lib/pixman/test/rotate-test.c
+++ b/lib/pixman/test/rotate-test.c
@@ -43,13 +43,13 @@ static const pixman_transform_t transforms[] =
 };
 
 #define RANDOM_FORMAT()							\
-    (formats[lcg_rand_n (ARRAY_LENGTH (formats))])
+    (formats[prng_rand_n (ARRAY_LENGTH (formats))])
 
 #define RANDOM_OP()							\
-    (ops[lcg_rand_n (ARRAY_LENGTH (ops))])
+    (ops[prng_rand_n (ARRAY_LENGTH (ops))])
 
 #define RANDOM_TRANSFORM()						\
-    (&(transforms[lcg_rand_n (ARRAY_LENGTH (transforms))]))
+    (&(transforms[prng_rand_n (ARRAY_LENGTH (transforms))]))
 
 static void
 on_destroy (pixman_image_t *image, void *data)
@@ -63,10 +63,8 @@ make_image (void)
     pixman_format_code_t format = RANDOM_FORMAT();
     uint32_t *bytes = malloc (WIDTH * HEIGHT * 4);
     pixman_image_t *image;
-    int i;
 
-    for (i = 0; i < WIDTH * HEIGHT * 4; ++i)
-	((uint8_t *)bytes)[i] = lcg_rand_n (256);
+    prng_randmemset (bytes, WIDTH * HEIGHT * 4, 0);
 
     image = pixman_image_create_bits (
 	format, WIDTH, HEIGHT, bytes, WIDTH * 4);
@@ -86,7 +84,7 @@ test_transform (int testnum, int verbose)
     pixman_image_t *src, *dest;
     uint32_t crc;
 
-    lcg_srand (testnum);
+    prng_srand (testnum);
     
     src = make_image ();
     dest = make_image ();
@@ -108,6 +106,6 @@ int
 main (int argc, const char *argv[])
 {
     return fuzzer_test_main ("rotate", 15000,
-			     0x03A24D51,
+			     0xECF5E426,
 			     test_transform, argc, argv);
 }
diff --git a/lib/pixman/test/scaling-helpers-test.c b/lib/pixman/test/scaling-helpers-test.c
index 33ec47c85..cd5ace0b2 100644
--- a/lib/pixman/test/scaling-helpers-test.c
+++ b/lib/pixman/test/scaling-helpers-test.c
@@ -52,14 +52,15 @@ int
 main (void)
 {
     int i;
+    prng_srand (0);
     for (i = 0; i < 10000; i++)
     {
 	int32_t left_pad1, left_tz1, width1, right_tz1, right_pad1;
 	int32_t left_pad2, left_tz2, width2, right_tz2, right_pad2;
-	pixman_fixed_t vx = lcg_rand_N(10000 << 16) - (3000 << 16);
-	int32_t width = lcg_rand_N(10000);
-	int32_t source_image_width = lcg_rand_N(10000) + 1;
-	pixman_fixed_t unit_x = lcg_rand_N(10 << 16) + 1;
+	pixman_fixed_t vx = prng_rand_n(10000 << 16) - (3000 << 16);
+	int32_t width = prng_rand_n(10000);
+	int32_t source_image_width = prng_rand_n(10000) + 1;
+	pixman_fixed_t unit_x = prng_rand_n(10 << 16) + 1;
 	width1 = width2 = width;
 
 	bilinear_pad_repeat_get_scanline_bounds_ref (source_image_width,
diff --git a/lib/pixman/test/scaling-test.c b/lib/pixman/test/scaling-test.c
index 273612395..a8cb4c47b 100644
--- a/lib/pixman/test/scaling-test.c
+++ b/lib/pixman/test/scaling-test.c
@@ -26,7 +26,7 @@ get_format (int bpp)
 {
     if (bpp == 4)
     {
-	switch (lcg_rand_n (4))
+	switch (prng_rand_n (4))
 	{
 	default:
 	case 0:
@@ -80,11 +80,11 @@ test_composite (int      testnum,
     uint32_t           crc32;
     FLOAT_REGS_CORRUPTION_DETECTOR_START ();
 
-    lcg_srand (testnum);
+    prng_srand (testnum);
 
-    src_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
-    dst_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
-    switch (lcg_rand_n (3))
+    src_bpp = (prng_rand_n (2) == 0) ? 2 : 4;
+    dst_bpp = (prng_rand_n (2) == 0) ? 2 : 4;
+    switch (prng_rand_n (3))
     {
     case 0:
 	op = PIXMAN_OP_SRC;
@@ -97,24 +97,24 @@ test_composite (int      testnum,
 	break;
     }
 
-    src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
-    src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
+    src_width = prng_rand_n (MAX_SRC_WIDTH) + 1;
+    src_height = prng_rand_n (MAX_SRC_HEIGHT) + 1;
 
-    if (lcg_rand_n (2))
+    if (prng_rand_n (2))
     {
-	mask_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
-	mask_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
+	mask_width = prng_rand_n (MAX_SRC_WIDTH) + 1;
+	mask_height = prng_rand_n (MAX_SRC_HEIGHT) + 1;
     }
     else
     {
 	mask_width = mask_height = 1;
     }
 
-    dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1;
-    dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1;
-    src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp;
-    mask_stride = mask_width * mask_bpp + lcg_rand_n (MAX_STRIDE) * mask_bpp;
-    dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp;
+    dst_width = prng_rand_n (MAX_DST_WIDTH) + 1;
+    dst_height = prng_rand_n (MAX_DST_HEIGHT) + 1;
+    src_stride = src_width * src_bpp + prng_rand_n (MAX_STRIDE) * src_bpp;
+    mask_stride = mask_width * mask_bpp + prng_rand_n (MAX_STRIDE) * mask_bpp;
+    dst_stride = dst_width * dst_bpp + prng_rand_n (MAX_STRIDE) * dst_bpp;
 
     if (src_stride & 3)
 	src_stride += 2;
@@ -127,27 +127,22 @@ test_composite (int      testnum,
     if (dst_stride & 3)
 	dst_stride += 2;
 
-    src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2);
-    src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2);
-    mask_x = -(mask_width / 4) + lcg_rand_n (mask_width * 3 / 2);
-    mask_y = -(mask_height / 4) + lcg_rand_n (mask_height * 3 / 2);
-    dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2);
-    dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2);
-    w = lcg_rand_n (dst_width * 3 / 2 - dst_x);
-    h = lcg_rand_n (dst_height * 3 / 2 - dst_y);
+    src_x = -(src_width / 4) + prng_rand_n (src_width * 3 / 2);
+    src_y = -(src_height / 4) + prng_rand_n (src_height * 3 / 2);
+    mask_x = -(mask_width / 4) + prng_rand_n (mask_width * 3 / 2);
+    mask_y = -(mask_height / 4) + prng_rand_n (mask_height * 3 / 2);
+    dst_x = -(dst_width / 4) + prng_rand_n (dst_width * 3 / 2);
+    dst_y = -(dst_height / 4) + prng_rand_n (dst_height * 3 / 2);
+    w = prng_rand_n (dst_width * 3 / 2 - dst_x);
+    h = prng_rand_n (dst_height * 3 / 2 - dst_y);
 
     srcbuf = (uint32_t *)malloc (src_stride * src_height);
     maskbuf = (uint32_t *)malloc (mask_stride * mask_height);
     dstbuf = (uint32_t *)malloc (dst_stride * dst_height);
 
-    for (i = 0; i < src_stride * src_height; i++)
-	*((uint8_t *)srcbuf + i) = lcg_rand_n (256);
-
-    for (i = 0; i < mask_stride * mask_height; i++)
-	*((uint8_t *)maskbuf + i) = lcg_rand_n (256);
-
-    for (i = 0; i < dst_stride * dst_height; i++)
-	*((uint8_t *)dstbuf + i) = lcg_rand_n (256);
+    prng_randmemset (srcbuf, src_stride * src_height, 0);
+    prng_randmemset (maskbuf, mask_stride * mask_height, 0);
+    prng_randmemset (dstbuf, dst_stride * dst_height, 0);
 
     src_fmt = get_format (src_bpp);
     dst_fmt = get_format (dst_bpp);
@@ -164,29 +159,29 @@ test_composite (int      testnum,
     image_endian_swap (src_img);
     image_endian_swap (dst_img);
 
-    if (lcg_rand_n (4) > 0)
+    if (prng_rand_n (4) > 0)
     {
-	scale_x = -32768 * 3 + lcg_rand_N (65536 * 5);
-	scale_y = -32768 * 3 + lcg_rand_N (65536 * 5);
-	translate_x = lcg_rand_N (65536);
-	translate_y = lcg_rand_N (65536);
+	scale_x = -32768 * 3 + prng_rand_n (65536 * 5);
+	scale_y = -32768 * 3 + prng_rand_n (65536 * 5);
+	translate_x = prng_rand_n (65536);
+	translate_y = prng_rand_n (65536);
 	pixman_transform_init_scale (&transform, scale_x, scale_y);
 	pixman_transform_translate (&transform, NULL, translate_x, translate_y);
 	pixman_image_set_transform (src_img, &transform);
     }
 
-    if (lcg_rand_n (2) > 0)
+    if (prng_rand_n (2) > 0)
     {
-	mask_scale_x = -32768 * 3 + lcg_rand_N (65536 * 5);
-	mask_scale_y = -32768 * 3 + lcg_rand_N (65536 * 5);
-	mask_translate_x = lcg_rand_N (65536);
-	mask_translate_y = lcg_rand_N (65536);
+	mask_scale_x = -32768 * 3 + prng_rand_n (65536 * 5);
+	mask_scale_y = -32768 * 3 + prng_rand_n (65536 * 5);
+	mask_translate_x = prng_rand_n (65536);
+	mask_translate_y = prng_rand_n (65536);
 	pixman_transform_init_scale (&transform, mask_scale_x, mask_scale_y);
 	pixman_transform_translate (&transform, NULL, mask_translate_x, mask_translate_y);
 	pixman_image_set_transform (mask_img, &transform);
     }
 
-    switch (lcg_rand_n (4))
+    switch (prng_rand_n (4))
     {
     case 0:
 	mask_repeat = PIXMAN_REPEAT_NONE;
@@ -209,7 +204,7 @@ test_composite (int      testnum,
     }
     pixman_image_set_repeat (mask_img, mask_repeat);
 
-    switch (lcg_rand_n (4))
+    switch (prng_rand_n (4))
     {
     case 0:
 	repeat = PIXMAN_REPEAT_NONE;
@@ -232,21 +227,22 @@ test_composite (int      testnum,
     }
     pixman_image_set_repeat (src_img, repeat);
 
-    if (lcg_rand_n (2))
+    if (prng_rand_n (2))
 	pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0);
     else
 	pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
 
-    if (lcg_rand_n (2))
+    if (prng_rand_n (2))
 	pixman_image_set_filter (mask_img, PIXMAN_FILTER_NEAREST, NULL, 0);
     else
 	pixman_image_set_filter (mask_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
 
     if (verbose)
     {
-	printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt);
-	printf ("op=%d, scale_x=%d, scale_y=%d, repeat=%d\n",
-	        op, scale_x, scale_y, repeat);
+	printf ("src_fmt=%s, dst_fmt=%s\n", 
+		format_name (src_fmt), format_name (dst_fmt));
+	printf ("op=%s, scale_x=%d, scale_y=%d, repeat=%d\n",
+	        operator_name (op), scale_x, scale_y, repeat);
 	printf ("translate_x=%d, translate_y=%d\n",
 	        translate_x, translate_y);
 	printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n",
@@ -256,19 +252,19 @@ test_composite (int      testnum,
 	printf ("w=%d, h=%d\n", w, h);
     }
 
-    if (lcg_rand_n (8) == 0)
+    if (prng_rand_n (8) == 0)
     {
 	pixman_box16_t clip_boxes[2];
-	int            n = lcg_rand_n (2) + 1;
+	int            n = prng_rand_n (2) + 1;
 
 	for (i = 0; i < n; i++)
 	{
-	    clip_boxes[i].x1 = lcg_rand_n (src_width);
-	    clip_boxes[i].y1 = lcg_rand_n (src_height);
+	    clip_boxes[i].x1 = prng_rand_n (src_width);
+	    clip_boxes[i].y1 = prng_rand_n (src_height);
 	    clip_boxes[i].x2 =
-		clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1);
+		clip_boxes[i].x1 + prng_rand_n (src_width - clip_boxes[i].x1);
 	    clip_boxes[i].y2 =
-		clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1);
+		clip_boxes[i].y1 + prng_rand_n (src_height - clip_boxes[i].y1);
 
 	    if (verbose)
 	    {
@@ -284,19 +280,19 @@ test_composite (int      testnum,
 	pixman_region_fini (&clip);
     }
 
-    if (lcg_rand_n (8) == 0)
+    if (prng_rand_n (8) == 0)
     {
 	pixman_box16_t clip_boxes[2];
-	int            n = lcg_rand_n (2) + 1;
+	int            n = prng_rand_n (2) + 1;
 
 	for (i = 0; i < n; i++)
 	{
-	    clip_boxes[i].x1 = lcg_rand_n (mask_width);
-	    clip_boxes[i].y1 = lcg_rand_n (mask_height);
+	    clip_boxes[i].x1 = prng_rand_n (mask_width);
+	    clip_boxes[i].y1 = prng_rand_n (mask_height);
 	    clip_boxes[i].x2 =
-		clip_boxes[i].x1 + lcg_rand_n (mask_width - clip_boxes[i].x1);
+		clip_boxes[i].x1 + prng_rand_n (mask_width - clip_boxes[i].x1);
 	    clip_boxes[i].y2 =
-		clip_boxes[i].y1 + lcg_rand_n (mask_height - clip_boxes[i].y1);
+		clip_boxes[i].y1 + prng_rand_n (mask_height - clip_boxes[i].y1);
 
 	    if (verbose)
 	    {
@@ -312,18 +308,18 @@ test_composite (int      testnum,
 	pixman_region_fini (&clip);
     }
 
-    if (lcg_rand_n (8) == 0)
+    if (prng_rand_n (8) == 0)
     {
 	pixman_box16_t clip_boxes[2];
-	int            n = lcg_rand_n (2) + 1;
+	int            n = prng_rand_n (2) + 1;
 	for (i = 0; i < n; i++)
 	{
-	    clip_boxes[i].x1 = lcg_rand_n (dst_width);
-	    clip_boxes[i].y1 = lcg_rand_n (dst_height);
+	    clip_boxes[i].x1 = prng_rand_n (dst_width);
+	    clip_boxes[i].y1 = prng_rand_n (dst_height);
 	    clip_boxes[i].x2 =
-		clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1);
+		clip_boxes[i].x1 + prng_rand_n (dst_width - clip_boxes[i].x1);
 	    clip_boxes[i].y2 =
-		clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1);
+		clip_boxes[i].y1 + prng_rand_n (dst_height - clip_boxes[i].y1);
 
 	    if (verbose)
 	    {
@@ -337,7 +333,7 @@ test_composite (int      testnum,
 	pixman_region_fini (&clip);
     }
 
-    if (lcg_rand_n (2) == 0)
+    if (prng_rand_n (2) == 0)
 	pixman_image_composite (op, src_img, NULL, dst_img,
                             src_x, src_y, 0, 0, dst_x, dst_y, w, h);
     else
@@ -380,11 +376,11 @@ test_composite (int      testnum,
 }
 
 #if BILINEAR_INTERPOLATION_BITS == 8
-#define CHECKSUM 0x8D3A7539
+#define CHECKSUM 0x9096E6B6
 #elif BILINEAR_INTERPOLATION_BITS == 7
-#define CHECKSUM 0x03A23E0C
+#define CHECKSUM 0xCE8EC6BA
 #elif BILINEAR_INTERPOLATION_BITS == 4
-#define CHECKSUM 0xE96D1A5E
+#define CHECKSUM 0xAB1D39BE
 #else
 #define CHECKSUM 0x00000000
 #endif
diff --git a/lib/pixman/test/stress-test.c b/lib/pixman/test/stress-test.c
index 059250dd4..1f03c7543 100644
--- a/lib/pixman/test/stress-test.c
+++ b/lib/pixman/test/stress-test.c
@@ -74,7 +74,7 @@ static pixman_filter_t filters[] =
 static int
 get_size (void)
 {
-    switch (lcg_rand_n (28))
+    switch (prng_rand_n (28))
     {
     case 0:
 	return 1;
@@ -84,10 +84,10 @@ get_size (void)
 
     default:
     case 2:
-	return lcg_rand_n (100);
+	return prng_rand_n (100);
 
     case 4:
-	return lcg_rand_n (2000) + 1000;
+	return prng_rand_n (2000) + 1000;
 
     case 5:
 	return 65535;
@@ -96,7 +96,7 @@ get_size (void)
 	return 65536;
 
     case 7:
-	return lcg_rand_N (64000) + 63000;
+	return prng_rand_n (64000) + 63000;
     }
 }
 
@@ -164,7 +164,7 @@ real_writer (void *src, uint32_t value, int size)
 static uint32_t
 fake_reader (const void *src, int size)
 {
-    uint32_t r = lcg_rand_u32 ();
+    uint32_t r = prng_rand ();
 
     assert (size == 1 || size == 2 || size == 4);
 
@@ -182,16 +182,16 @@ log_rand (void)
 {
     uint32_t mask;
 
-    mask = (1 << lcg_rand_n (10)) - 1;
+    mask = (1 << prng_rand_n (10)) - 1;
 
-    return (lcg_rand_u32 () & mask) - (mask >> 1);
+    return (prng_rand () & mask) - (mask >> 1);
 }
 
 static int32_t
 rand_x (pixman_image_t *image)
 {
     if (image->type == BITS)
-	return lcg_rand_n (image->bits.width);
+	return prng_rand_n (image->bits.width);
     else
 	return log_rand ();
 }
@@ -200,13 +200,42 @@ static int32_t
 rand_y (pixman_image_t *image)
 {
     if (image->type == BITS)
-	return lcg_rand_n (image->bits.height);
+	return prng_rand_n (image->bits.height);
     else
 	return log_rand ();
 }
 
+typedef enum
+{
+    DONT_CARE,
+    PREFER_ALPHA,
+    REQUIRE_ALPHA
+} alpha_preference_t;
+
+static pixman_format_code_t
+random_format (alpha_preference_t alpha)
+{
+    pixman_format_code_t format;
+    int n = prng_rand_n (ARRAY_LENGTH (image_formats));
+
+    if (alpha >= PREFER_ALPHA &&
+	(alpha == REQUIRE_ALPHA || prng_rand_n (4) != 0))
+    {
+        do
+        {
+            format = image_formats[n++ % ARRAY_LENGTH (image_formats)];
+        } while (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_A);
+    }
+    else
+    {
+        format = image_formats[n];
+    }
+
+    return format;
+}
+
 static pixman_image_t *
-create_random_bits_image (void)
+create_random_bits_image (alpha_preference_t alpha_preference)
 {
     pixman_format_code_t format;
     pixman_indexed_t *indexed;
@@ -220,7 +249,7 @@ create_random_bits_image (void)
     int n_coefficients = 0;
 
     /* format */
-    format = image_formats[lcg_rand_n (ARRAY_LENGTH (image_formats))];
+    format = random_format (alpha_preference);
 
     indexed = NULL;
     if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR)
@@ -246,7 +275,7 @@ create_random_bits_image (void)
 
     while ((uint64_t)width * height > 200000)
     {
-	if (lcg_rand_n(2) == 0)
+	if (prng_rand_n(2) == 0)
 	    height = 200000 / width;
 	else
 	    width = 200000 / height;
@@ -258,11 +287,11 @@ create_random_bits_image (void)
 	width = 1;
 
     /* bits */
-    switch (lcg_rand_n (7))
+    switch (prng_rand_n (7))
     {
     default:
     case 0:
-	stride = width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17);
+	stride = width * PIXMAN_FORMAT_BPP (format) + prng_rand_n (17);
 	stride = (stride + 3) & (~3);
 	bits = (uint32_t *)make_random_bytes (height * stride);
 	break;
@@ -273,7 +302,7 @@ create_random_bits_image (void)
 	break;
 
     case 2: /* Zero-filled */
-	stride = width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17);
+	stride = width * PIXMAN_FORMAT_BPP (format) + prng_rand_n (17);
 	stride = (stride + 3) & (~3);
 	bits = fence_malloc (height * stride);
 	if (!bits)
@@ -282,7 +311,7 @@ create_random_bits_image (void)
 	break;
 
     case 3: /* Filled with 0xFF */
-	stride = width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17);
+	stride = width * PIXMAN_FORMAT_BPP (format) + prng_rand_n (17);
 	stride = (stride + 3) & (~3);
 	bits = fence_malloc (height * stride);
 	if (!bits)
@@ -298,7 +327,7 @@ create_random_bits_image (void)
 	break;
 
     case 5: /* bits is a real pointer, has read/write functions */
-	stride = width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17);
+	stride = width * PIXMAN_FORMAT_BPP (format) + prng_rand_n (17);
 	stride = (stride + 3) & (~3);
 	bits = fence_malloc (height * stride);
 	if (!bits)
@@ -309,7 +338,7 @@ create_random_bits_image (void)
 	break;
 
     case 6: /* bits is a real pointer, stride is negative */
-	stride = (width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17));
+	stride = (width * PIXMAN_FORMAT_BPP (format) + prng_rand_n (17));
 	stride = (stride + 3) & (~3);
 	bits = (uint32_t *)make_random_bytes (height * stride);
 	if (!bits)
@@ -320,11 +349,11 @@ create_random_bits_image (void)
     }
 
     /* Filter */
-    filter = filters[lcg_rand_n (ARRAY_LENGTH (filters))];
+    filter = filters[prng_rand_n (ARRAY_LENGTH (filters))];
     if (filter == PIXMAN_FILTER_CONVOLUTION)
     {
-	int width = lcg_rand_n (3);
-	int height = lcg_rand_n (4);
+	int width = prng_rand_n (3);
+	int height = prng_rand_n (4);
 
 	n_coefficients = width * height + 2;
 	coefficients = malloc (n_coefficients * sizeof (pixman_fixed_t));
@@ -334,7 +363,7 @@ create_random_bits_image (void)
 	    int i;
 
 	    for (i = 0; i < width * height; ++i)
-		coefficients[i + 2] = lcg_rand_u32();
+		coefficients[i + 2] = prng_rand();
 
 	    coefficients[0] = width << 16;
 	    coefficients[1] = height << 16;
@@ -380,16 +409,16 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map)
     /* Set properties that are generic to all images */
 
     /* Repeat */
-    repeat = repeats[lcg_rand_n (ARRAY_LENGTH (repeats))];
+    repeat = repeats[prng_rand_n (ARRAY_LENGTH (repeats))];
     pixman_image_set_repeat (image, repeat);
 
     /* Alpha map */
-    if (allow_alpha_map && lcg_rand_n (4) == 0)
+    if (allow_alpha_map && prng_rand_n (4) == 0)
     {
 	pixman_image_t *alpha_map;
 	int16_t x, y;
 
-	alpha_map = create_random_bits_image ();
+	alpha_map = create_random_bits_image (DONT_CARE);
 
 	if (alpha_map)
 	{
@@ -405,17 +434,17 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map)
     }
 
     /* Component alpha */
-    pixman_image_set_component_alpha (image, lcg_rand_n (3) == 0);
+    pixman_image_set_component_alpha (image, prng_rand_n (3) == 0);
 
     /* Clip region */
-    if (lcg_rand_n (8) < 2)
+    if (prng_rand_n (8) < 2)
     {
 	pixman_region32_t region;
 	int i, n_rects;
 
 	pixman_region32_init (&region);
 
-	switch (lcg_rand_n (12))
+	switch (prng_rand_n (12))
 	{
 	case 0:
 	    n_rects = 0;
@@ -434,7 +463,7 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map)
 	    break;
 
 	default:
-	    n_rects = lcg_rand_n (100);
+	    n_rects = prng_rand_n (100);
 	    break;
 	}
 
@@ -452,7 +481,7 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map)
 		&region, &region, x, y, width, height);
 	}
 
-	if (image->type == BITS && lcg_rand_n (8) != 0)
+	if (image->type == BITS && prng_rand_n (8) != 0)
 	{
 	    uint32_t width, height;
 	    int x, y;
@@ -463,16 +492,16 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map)
 	     */
 	    for (i = 0; i < 5; ++i)
 	    {
-		x = lcg_rand_n (2 * image->bits.width) - image->bits.width;
-		y = lcg_rand_n (2 * image->bits.height) - image->bits.height;
-		width = lcg_rand_n (image->bits.width) - x + 10;
-		height = lcg_rand_n (image->bits.height) - y + 10;
+		x = prng_rand_n (2 * image->bits.width) - image->bits.width;
+		y = prng_rand_n (2 * image->bits.height) - image->bits.height;
+		width = prng_rand_n (image->bits.width) - x + 10;
+		height = prng_rand_n (image->bits.height) - y + 10;
 
 		if (width + x < x)
 		    width = INT32_MAX - x;
 		if (height + y < y)
 		    height = INT32_MAX - y;
-		
+
 		pixman_region32_union_rect (
 		    &region, &region, x, y, width, height);
 	    }
@@ -484,13 +513,13 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map)
     }
 
     /* Whether source clipping is enabled */
-    pixman_image_set_source_clipping (image, !!lcg_rand_n (2));
+    pixman_image_set_source_clipping (image, !!prng_rand_n (2));
 
     /* Client clip */
-    pixman_image_set_has_client_clip (image, !!lcg_rand_n (2));
+    pixman_image_set_has_client_clip (image, !!prng_rand_n (2));
 
     /* Transform */
-    if (lcg_rand_n (5) < 2)
+    if (prng_rand_n (5) < 2)
     {
 	pixman_transform_t xform;
 	int i, j, k;
@@ -504,39 +533,39 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map)
 
 	for (k = 0; k < 3; ++k)
 	{
-	    switch (lcg_rand_n (4))
+	    switch (prng_rand_n (4))
 	    {
 	    case 0:
 		/* rotation */
-		c = lcg_rand_N (2 * 65536) - 65536;
-		s = lcg_rand_N (2 * 65536) - 65536;
+		c = prng_rand_n (2 * 65536) - 65536;
+		s = prng_rand_n (2 * 65536) - 65536;
 		pixman_transform_rotate (&xform, NULL, c, s);
 		break;
 
 	    case 1:
 		/* translation */
-		tx = lcg_rand_u32();
-		ty = lcg_rand_u32();
+		tx = prng_rand();
+		ty = prng_rand();
 		pixman_transform_translate (&xform, NULL, tx, ty);
 		break;
 
 	    case 2:
 		/* scale */
-		sx = lcg_rand_u32();
-		sy = lcg_rand_u32();
+		sx = prng_rand();
+		sy = prng_rand();
 		pixman_transform_scale (&xform, NULL, sx, sy);
 		break;
 
 	    case 3:
-		if (lcg_rand_n (16) == 0)
+		if (prng_rand_n (16) == 0)
 		{
 		    /* random */
 		    for (i = 0; i < 3; ++i)
 			for (j = 0; j < 3; ++j)
-			    xform.matrix[i][j] = lcg_rand_u32();
+			    xform.matrix[i][j] = prng_rand();
 		    break;
 		}
-		else if (lcg_rand_n (16) == 0)
+		else if (prng_rand_n (16) == 0)
 		{
 		    /* zero */
 		    memset (&xform, 0, sizeof xform);
@@ -554,10 +583,10 @@ random_color (void)
 {
     pixman_color_t color =
     {
-	lcg_rand() & 0xffff,
-	lcg_rand() & 0xffff,
-	lcg_rand() & 0xffff,
-	lcg_rand() & 0xffff,
+	prng_rand() & 0xffff,
+	prng_rand() & 0xffff,
+	prng_rand() & 0xffff,
+	prng_rand() & 0xffff,
     };
 
     return color;
@@ -581,7 +610,7 @@ create_random_stops (int *n_stops)
     int i;
     pixman_gradient_stop_t *stops;
 
-    *n_stops = lcg_rand_n (50) + 1;
+    *n_stops = prng_rand_n (50) + 1;
 
     step = pixman_fixed_1 / *n_stops;
 
@@ -646,8 +675,8 @@ create_random_radial_image (void)
 
     inner_c = create_random_point();
     outer_c = create_random_point();
-    inner_r = lcg_rand();
-    outer_r = lcg_rand();
+    inner_r = prng_rand();
+    outer_r = prng_rand();
 
     stops = create_random_stops (&n_stops);
 
@@ -672,7 +701,7 @@ create_random_conical_image (void)
     pixman_image_t *result;
 
     c = create_random_point();
-    angle = lcg_rand();
+    angle = prng_rand();
 
     stops = create_random_stops (&n_stops);
 
@@ -691,11 +720,11 @@ create_random_image (void)
 {
     pixman_image_t *result;
 
-    switch (lcg_rand_n (5))
+    switch (prng_rand_n (5))
     {
     default:
     case 0:
-	result = create_random_bits_image ();
+	result = create_random_bits_image (DONT_CARE);
 	break;
 
     case 1:
@@ -721,6 +750,39 @@ create_random_image (void)
     return result;
 }
 
+static void
+random_line (pixman_line_fixed_t *line, int width, int height)
+{
+    line->p1.x = prng_rand_n (width) << 16;
+    line->p1.y = prng_rand_n (height) << 16;
+    line->p2.x = prng_rand_n (width) << 16;
+    line->p2.y = prng_rand_n (height) << 16;
+}
+
+static pixman_trapezoid_t *
+create_random_trapezoids (int *n_traps, int height, int width)
+{
+    pixman_trapezoid_t *trapezoids;
+    int i;
+
+    *n_traps = prng_rand_n (16) + 1;
+
+    trapezoids = malloc (sizeof (pixman_trapezoid_t) * *n_traps);
+
+    for (i = 0; i < *n_traps; ++i)
+    {
+        pixman_trapezoid_t *t = &(trapezoids[i]);
+
+        t->top = prng_rand_n (height) << 16;
+        t->bottom = prng_rand_n (height) << 16;
+
+        random_line (&t->left, height, width);
+        random_line (&t->right, height, width);
+    }
+
+    return trapezoids;
+}
+
 static const pixman_op_t op_list[] =
 {
     PIXMAN_OP_SRC,
@@ -792,27 +854,88 @@ run_test (uint32_t seed, pixman_bool_t verbose, uint32_t mod)
 	if (mod == 0 || (seed % mod) == 0)
 	    printf ("Seed 0x%08x\n", seed);
     }
-	    
-    lcg_srand (seed);
 
-    source = create_random_image ();
-    mask   = create_random_image ();
-    dest   = create_random_bits_image ();
+    source = mask = dest = NULL;
 
-    if (source && mask && dest)
+    prng_srand (seed);
+
+    if (prng_rand_n (8) == 0)
     {
+        int n_traps;
+        pixman_trapezoid_t *trapezoids;
+	int p = prng_rand_n (3);
+
+	if (p == 0)
+	    dest = create_random_bits_image (DONT_CARE);
+	else
+	    dest = create_random_bits_image (REQUIRE_ALPHA);
+
+	if (!dest)
+	    goto out;
+
 	set_general_properties (dest, TRUE);
 
-	op = op_list [lcg_rand_n (ARRAY_LENGTH (op_list))];
+	if (!(trapezoids = create_random_trapezoids (
+		  &n_traps, dest->bits.width, dest->bits.height)))
+	{
+	    goto out;
+	}
+
+	switch (p)
+	{
+	case 0:
+	    source = create_random_image ();
+
+	    if (source)
+	    {
+		op = op_list [prng_rand_n (ARRAY_LENGTH (op_list))];
+
+		pixman_composite_trapezoids (
+		    op, source, dest,
+		    random_format (REQUIRE_ALPHA),
+		    rand_x (source), rand_y (source),
+		    rand_x (dest), rand_y (dest),
+		    n_traps, trapezoids);
+	    }
+	    break;
+
+	case 1:
+	    pixman_rasterize_trapezoid (
+		dest, &trapezoids[prng_rand_n (n_traps)],
+		rand_x (dest), rand_y (dest));
+	    break;
 
-	pixman_image_composite32 (op,
-				  source, mask, dest,
-				  rand_x (source), rand_y (source),
-				  rand_x (mask), rand_y (mask),
-				  0, 0, 
-				  dest->bits.width,
-				  dest->bits.height);
+	case 2:
+	    pixman_add_trapezoids (
+		dest, rand_x (dest), rand_y (dest), n_traps, trapezoids);
+	    break;
+        }
+
+	free (trapezoids);
+    }
+    else
+    {
+        dest = create_random_bits_image (DONT_CARE);
+        source = create_random_image ();
+        mask = create_random_image ();
+
+        if (source && mask && dest)
+        {
+            set_general_properties (dest, TRUE);
+
+            op = op_list [prng_rand_n (ARRAY_LENGTH (op_list))];
+
+            pixman_image_composite32 (op,
+                                      source, mask, dest,
+                                      rand_x (source), rand_y (source),
+                                      rand_x (mask), rand_y (mask),
+                                      0, 0,
+                                      dest->bits.width,
+                                      dest->bits.height);
+        }
     }
+
+out:
     if (source)
 	pixman_image_unref (source);
     if (mask)
diff --git a/lib/pixman/test/utils-prng.c b/lib/pixman/test/utils-prng.c
new file mode 100644
index 000000000..7b32e3531
--- /dev/null
+++ b/lib/pixman/test/utils-prng.c
@@ -0,0 +1,298 @@
+/*
+ * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com>
+ *
+ * Based on the public domain implementation of small noncryptographic PRNG
+ * authored by Bob Jenkins: http://burtleburtle.net/bob/rand/smallprng.html
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "utils.h"
+#include "utils-prng.h"
+
+#if defined(GCC_VECTOR_EXTENSIONS_SUPPORTED) && defined(__SSE2__)
+#include <xmmintrin.h>
+#endif
+
+void smallprng_srand_r (smallprng_t *x, uint32_t seed)
+{
+    uint32_t i;
+    x->a = 0xf1ea5eed, x->b = x->c = x->d = seed;
+    for (i = 0; i < 20; ++i)
+        smallprng_rand_r (x);
+}
+
+/*
+ * Set a 32-bit seed for PRNG
+ *
+ * LCG is used here for generating independent seeds for different
+ * smallprng instances (in the case if smallprng is also used for
+ * generating these seeds, "Big Crush" test from TestU01 detects
+ * some problems in the glued 'prng_rand_128_r' output data).
+ * Actually we might be even better using some cryptographic
+ * hash for this purpose, but LCG seems to be also enough for
+ * passing "Big Crush".
+ */
+void prng_srand_r (prng_t *x, uint32_t seed)
+{
+#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
+    int i;
+    prng_rand_128_data_t dummy;
+    smallprng_srand_r (&x->p0, seed);
+    x->a[0] = x->a[1] = x->a[2] = x->a[3] = 0xf1ea5eed;
+    x->b[0] = x->c[0] = x->d[0] = (seed = seed * 1103515245 + 12345);
+    x->b[1] = x->c[1] = x->d[1] = (seed = seed * 1103515245 + 12345);
+    x->b[2] = x->c[2] = x->d[2] = (seed = seed * 1103515245 + 12345);
+    x->b[3] = x->c[3] = x->d[3] = (seed = seed * 1103515245 + 12345);
+    for (i = 0; i < 20; ++i)
+        prng_rand_128_r (x, &dummy);
+#else
+    smallprng_srand_r (&x->p0, seed);
+    smallprng_srand_r (&x->p1, (seed = seed * 1103515245 + 12345));
+    smallprng_srand_r (&x->p2, (seed = seed * 1103515245 + 12345));
+    smallprng_srand_r (&x->p3, (seed = seed * 1103515245 + 12345));
+    smallprng_srand_r (&x->p4, (seed = seed * 1103515245 + 12345));
+#endif
+}
+
+static force_inline void
+store_rand_128_data (void *addr, prng_rand_128_data_t *d, int aligned)
+{
+#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
+    if (aligned)
+    {
+        *(uint8x16 *)addr = d->vb;
+        return;
+    }
+    else
+    {
+#ifdef __SSE2__
+        /* workaround for http://gcc.gnu.org/PR55614 */
+        _mm_storeu_si128 (addr, _mm_loadu_si128 ((__m128i *)d));
+        return;
+#endif
+    }
+#endif
+    /* we could try something better for unaligned writes (packed attribute),
+     * but GCC is not very reliable: http://gcc.gnu.org/PR55454 */
+    memcpy (addr, d, 16);
+}
+
+/*
+ * Helper function and the actual code for "prng_randmemset_r" function
+ */
+static force_inline void
+randmemset_internal (prng_t                  *prng,
+                     uint8_t                 *buf,
+                     size_t                   size,
+                     prng_randmemset_flags_t  flags,
+                     int                      aligned)
+{
+    prng_t local_prng = *prng;
+    prng_rand_128_data_t randdata;
+    size_t i;
+
+    while (size >= 16)
+    {
+        prng_rand_128_data_t t;
+        if (flags == 0)
+        {
+            prng_rand_128_r (&local_prng, &randdata);
+        }
+        else
+        {
+            prng_rand_128_r (&local_prng, &t);
+            prng_rand_128_r (&local_prng, &randdata);
+#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
+            if (flags & RANDMEMSET_MORE_FF)
+            {
+                const uint8x16 const_C0 =
+                {
+                    0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0,
+                    0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0
+                };
+                randdata.vb |= (t.vb >= const_C0);
+            }
+            if (flags & RANDMEMSET_MORE_00)
+            {
+                const uint8x16 const_40 =
+                {
+                    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+                    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40
+                };
+                randdata.vb &= (t.vb >= const_40);
+            }
+            if (flags & RANDMEMSET_MORE_FFFFFFFF)
+            {
+                const uint32x4 const_C0000000 =
+                {
+                    0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000
+                };
+                randdata.vw |= ((t.vw << 30) >= const_C0000000);
+            }
+            if (flags & RANDMEMSET_MORE_00000000)
+            {
+                const uint32x4 const_40000000 =
+                {
+                    0x40000000, 0x40000000, 0x40000000, 0x40000000
+                };
+                randdata.vw &= ((t.vw << 30) >= const_40000000);
+            }
+#else
+            #define PROCESS_ONE_LANE(i)                                       \
+                if (flags & RANDMEMSET_MORE_FF)                               \
+                {                                                             \
+                    uint32_t mask_ff = (t.w[i] & (t.w[i] << 1)) & 0x80808080; \
+                    mask_ff |= mask_ff >> 1;                                  \
+                    mask_ff |= mask_ff >> 2;                                  \
+                    mask_ff |= mask_ff >> 4;                                  \
+                    randdata.w[i] |= mask_ff;                                 \
+                }                                                             \
+                if (flags & RANDMEMSET_MORE_00)                               \
+                {                                                             \
+                    uint32_t mask_00 = (t.w[i] | (t.w[i] << 1)) & 0x80808080; \
+                    mask_00 |= mask_00 >> 1;                                  \
+                    mask_00 |= mask_00 >> 2;                                  \
+                    mask_00 |= mask_00 >> 4;                                  \
+                    randdata.w[i] &= mask_00;                                 \
+                }                                                             \
+                if (flags & RANDMEMSET_MORE_FFFFFFFF)                         \
+                {                                                             \
+                    int32_t mask_ff = ((t.w[i] << 30) & (t.w[i] << 31)) &     \
+                                       0x80000000;                            \
+                    randdata.w[i] |= mask_ff >> 31;                           \
+                }                                                             \
+                if (flags & RANDMEMSET_MORE_00000000)                         \
+                {                                                             \
+                    int32_t mask_00 = ((t.w[i] << 30) | (t.w[i] << 31)) &     \
+                                       0x80000000;                            \
+                    randdata.w[i] &= mask_00 >> 31;                           \
+                }
+
+            PROCESS_ONE_LANE (0)
+            PROCESS_ONE_LANE (1)
+            PROCESS_ONE_LANE (2)
+            PROCESS_ONE_LANE (3)
+#endif
+        }
+        if (is_little_endian ())
+        {
+            store_rand_128_data (buf, &randdata, aligned);
+            buf += 16;
+        }
+        else
+        {
+#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
+            const uint8x16 bswap_shufflemask =
+            {
+                3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+            };
+            randdata.vb = __builtin_shuffle (randdata.vb, bswap_shufflemask);
+            store_rand_128_data (buf, &randdata, aligned);
+            buf += 16;
+#else
+            uint8_t t1, t2, t3, t4;
+            #define STORE_ONE_LANE(i)                                         \
+                t1 = randdata.b[i * 4 + 3];                                   \
+                t2 = randdata.b[i * 4 + 2];                                   \
+                t3 = randdata.b[i * 4 + 1];                                   \
+                t4 = randdata.b[i * 4 + 0];                                   \
+                *buf++ = t1;                                                  \
+                *buf++ = t2;                                                  \
+                *buf++ = t3;                                                  \
+                *buf++ = t4;
+
+            STORE_ONE_LANE (0)
+            STORE_ONE_LANE (1)
+            STORE_ONE_LANE (2)
+            STORE_ONE_LANE (3)
+#endif
+        }
+        size -= 16;
+    }
+    i = 0;
+    while (i < size)
+    {
+        uint8_t randbyte = prng_rand_r (&local_prng) & 0xFF;
+        if (flags != 0)
+        {
+            uint8_t t = prng_rand_r (&local_prng) & 0xFF;
+            if ((flags & RANDMEMSET_MORE_FF) && (t >= 0xC0))
+                randbyte = 0xFF;
+            if ((flags & RANDMEMSET_MORE_00) && (t < 0x40))
+                randbyte = 0x00;
+            if (i % 4 == 0 && i + 4 <= size)
+            {
+                t = prng_rand_r (&local_prng) & 0xFF;
+                if ((flags & RANDMEMSET_MORE_FFFFFFFF) && (t >= 0xC0))
+                {
+                    memset(&buf[i], 0xFF, 4);
+                    i += 4;
+                    continue;
+                }
+                if ((flags & RANDMEMSET_MORE_00000000) && (t < 0x40))
+                {
+                    memset(&buf[i], 0x00, 4);
+                    i += 4;
+                    continue;
+                }
+            }
+        }
+        buf[i] = randbyte;
+        i++;
+    }
+    *prng = local_prng;
+}
+
+/*
+ * Fill memory buffer with random data. Flags argument may be used
+ * to tweak some statistics properties:
+ *    RANDMEMSET_MORE_00        - set ~25% of bytes to 0x00
+ *    RANDMEMSET_MORE_FF        - set ~25% of bytes to 0xFF
+ *    RANDMEMSET_MORE_00000000  - ~25% chance for 00000000 4-byte clusters
+ *    RANDMEMSET_MORE_FFFFFFFF  - ~25% chance for FFFFFFFF 4-byte clusters
+ */
+void prng_randmemset_r (prng_t                  *prng,
+                        void                    *voidbuf,
+                        size_t                   size,
+                        prng_randmemset_flags_t  flags)
+{
+    uint8_t *buf = (uint8_t *)voidbuf;
+    if ((uintptr_t)buf & 15)
+    {
+        /* unaligned buffer */
+        if (flags == 0)
+            randmemset_internal (prng, buf, size, 0, 0);
+        else if (flags == RANDMEMSET_MORE_00_AND_FF)
+            randmemset_internal (prng, buf, size, RANDMEMSET_MORE_00_AND_FF, 0);
+        else
+            randmemset_internal (prng, buf, size, flags, 0);
+    }
+    else
+    {
+        /* aligned buffer */
+        if (flags == 0)
+            randmemset_internal (prng, buf, size, 0, 1);
+        else if (flags == RANDMEMSET_MORE_00_AND_FF)
+            randmemset_internal (prng, buf, size, RANDMEMSET_MORE_00_AND_FF, 1);
+        else
+            randmemset_internal (prng, buf, size, flags, 1);
+    }
+}
diff --git a/lib/pixman/test/utils-prng.h b/lib/pixman/test/utils-prng.h
new file mode 100644
index 000000000..564ffcef1
--- /dev/null
+++ b/lib/pixman/test/utils-prng.h
@@ -0,0 +1,171 @@
+/*
+ * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com>
+ *
+ * Based on the public domain implementation of small noncryptographic PRNG
+ * authored by Bob Jenkins: http://burtleburtle.net/bob/rand/smallprng.html
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __UTILS_PRNG_H__
+#define __UTILS_PRNG_H__
+
+/*
+ * This file provides a fast SIMD-optimized noncryptographic PRNG (pseudorandom
+ * number generator), with the output good enough to pass "Big Crush" tests
+ * from TestU01 (http://en.wikipedia.org/wiki/TestU01).
+ *
+ * SIMD code uses http://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html
+ * which is a GCC specific extension. There is also a slower alternative
+ * code path, which should work with any C compiler.
+ *
+ * The "prng_t" structure keeps the internal state of the random number
+ * generator. It is possible to have multiple instances of the random number
+ * generator active at the same time, in this case each of them needs to have
+ * its own "prng_t". All the functions take a pointer to "prng_t"
+ * as the first argument.
+ *
+ * Functions:
+ *
+ * ----------------------------------------------------------------------------
+ * void prng_srand_r (prng_t *prng, uint32_t seed);
+ *
+ * Initialize the pseudorandom number generator. The sequence of preudorandom
+ * numbers is deterministic and only depends on "seed". Any two generators
+ * initialized with the same seed will produce exactly the same sequence.
+ *
+ * ----------------------------------------------------------------------------
+ * uint32_t prng_rand_r (prng_t *prng);
+ *
+ * Generate a single uniformly distributed 32-bit pseudorandom value.
+ *
+ * ----------------------------------------------------------------------------
+ * void prng_randmemset_r (prng_t                  *prng,
+ *                         void                    *buffer,
+ *                         size_t                   size,
+ *                         prng_randmemset_flags_t  flags);
+ *
+ * Fills the memory buffer "buffer" with "size" bytes of pseudorandom data.
+ * The "flags" argument may be used to tweak some statistics properties:
+ *    RANDMEMSET_MORE_00 - set ~25% of bytes to 0x00
+ *    RANDMEMSET_MORE_FF - set ~25% of bytes to 0xFF
+ * The flags can be combined. This allows a bit better simulation of typical
+ * pixel data, which normally contains a lot of fully transparent or fully
+ * opaque pixels.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "pixman-private.h"
+
+/*****************************************************************************/
+
+#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
+#define GCC_VECTOR_EXTENSIONS_SUPPORTED
+typedef uint32_t uint32x4 __attribute__ ((vector_size(16)));
+typedef uint8_t  uint8x16 __attribute__ ((vector_size(16)));
+#endif
+
+typedef struct
+{
+    uint32_t a, b, c, d;
+} smallprng_t;
+
+typedef struct
+{
+#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
+    uint32x4 a, b, c, d;
+#else
+    smallprng_t p1, p2, p3, p4;
+#endif
+    smallprng_t p0;
+} prng_t;
+
+typedef union
+{
+    uint8_t  b[16];
+    uint32_t w[4];
+#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
+    uint8x16 vb;
+    uint32x4 vw;
+#endif
+} prng_rand_128_data_t;
+
+/*****************************************************************************/
+
+static force_inline uint32_t
+smallprng_rand_r (smallprng_t *x)
+{
+    uint32_t e = x->a - ((x->b << 27) + (x->b >> (32 - 27)));
+    x->a = x->b ^ ((x->c << 17) ^ (x->c >> (32 - 17)));
+    x->b = x->c + x->d;
+    x->c = x->d + e;
+    x->d = e + x->a;
+    return x->d;
+}
+
+/* Generate 4 bytes (32-bits) of random data */
+static force_inline uint32_t
+prng_rand_r (prng_t *x)
+{
+    return smallprng_rand_r (&x->p0);
+}
+
+/* Generate 16 bytes (128-bits) of random data */
+static force_inline void
+prng_rand_128_r (prng_t *x, prng_rand_128_data_t *data)
+{
+#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
+    uint32x4 e = x->a - ((x->b << 27) + (x->b >> (32 - 27)));
+    x->a = x->b ^ ((x->c << 17) ^ (x->c >> (32 - 17)));
+    x->b = x->c + x->d;
+    x->c = x->d + e;
+    x->d = e + x->a;
+    data->vw = x->d;
+#else
+    data->w[0] = smallprng_rand_r (&x->p1);
+    data->w[1] = smallprng_rand_r (&x->p2);
+    data->w[2] = smallprng_rand_r (&x->p3);
+    data->w[3] = smallprng_rand_r (&x->p4);
+#endif
+}
+
+typedef enum
+{
+    RANDMEMSET_MORE_00        = 1, /* ~25% chance for 0x00 bytes */
+    RANDMEMSET_MORE_FF        = 2, /* ~25% chance for 0xFF bytes */
+    RANDMEMSET_MORE_00000000  = 4, /* ~25% chance for 0x00000000 clusters */
+    RANDMEMSET_MORE_FFFFFFFF  = 8, /* ~25% chance for 0xFFFFFFFF clusters */
+    RANDMEMSET_MORE_00_AND_FF = (RANDMEMSET_MORE_00 | RANDMEMSET_MORE_00000000 |
+                                 RANDMEMSET_MORE_FF | RANDMEMSET_MORE_FFFFFFFF)
+} prng_randmemset_flags_t;
+
+/* Set the 32-bit seed for PRNG */
+void prng_srand_r (prng_t *prng, uint32_t seed);
+
+/* Fill memory buffer with random data */
+void prng_randmemset_r (prng_t                  *prng,
+                        void                    *buffer,
+                        size_t                   size,
+                        prng_randmemset_flags_t  flags);
+
+#endif
diff --git a/lib/pixman/test/utils.c b/lib/pixman/test/utils.c
index 716bb7594..3d1ba22ae 100644
--- a/lib/pixman/test/utils.c
+++ b/lib/pixman/test/utils.c
@@ -27,10 +27,11 @@
 #include <png.h>
 #endif
 
-/* Random number seed
+/* Random number generator state
  */
 
-uint32_t lcg_seed;
+prng_t prng_state_data;
+prng_t *prng_state;
 
 /*----------------------------------------------------------------------------*\
  *  CRC-32 version 2.0.0 by Craig Bruce, 2006-04-29.
@@ -237,14 +238,6 @@ compute_crc32_for_image (uint32_t        crc32,
     return crc32;
 }
 
-pixman_bool_t
-is_little_endian (void)
-{
-    volatile uint16_t endian_check_var = 0x1234;
-
-    return (*(volatile uint8_t *)&endian_check_var == 0x34);
-}
-
 /* perform endian conversion of pixel data
  */
 void
@@ -377,7 +370,7 @@ fence_malloc (int64_t len)
 	return NULL;
     }
 
-    initial_page = (uint8_t *)(((unsigned long)addr + page_mask) & ~page_mask);
+    initial_page = (uint8_t *)(((uintptr_t)addr + page_mask) & ~page_mask);
     leading_protected = initial_page + page_size;
     payload = leading_protected + N_LEADING_PROTECTED * page_size;
     trailing_protected = payload + n_payload_bytes;
@@ -431,13 +424,11 @@ uint8_t *
 make_random_bytes (int n_bytes)
 {
     uint8_t *bytes = fence_malloc (n_bytes);
-    int i;
 
     if (!bytes)
 	return NULL;
 
-    for (i = 0; i < n_bytes; ++i)
-	bytes[i] = lcg_rand () & 0xff;
+    prng_randmemset (bytes, n_bytes, 0);
 
     return bytes;
 }
@@ -557,6 +548,60 @@ write_png (pixman_image_t *image, const char *filename)
 
 #endif
 
+static void
+color8_to_color16 (uint32_t color8, pixman_color_t *color16)
+{
+    color16->alpha = ((color8 & 0xff000000) >> 24);
+    color16->red =   ((color8 & 0x00ff0000) >> 16);
+    color16->green = ((color8 & 0x0000ff00) >> 8);
+    color16->blue =  ((color8 & 0x000000ff) >> 0);
+
+    color16->alpha |= color16->alpha << 8;
+    color16->red   |= color16->red << 8;
+    color16->blue  |= color16->blue << 8;
+    color16->green |= color16->green << 8;
+}
+
+void
+draw_checkerboard (pixman_image_t *image,
+		   int check_size,
+		   uint32_t color1, uint32_t color2)
+{
+    pixman_color_t check1, check2;
+    pixman_image_t *c1, *c2;
+    int n_checks_x, n_checks_y;
+    int i, j;
+
+    color8_to_color16 (color1, &check1);
+    color8_to_color16 (color2, &check2);
+    
+    c1 = pixman_image_create_solid_fill (&check1);
+    c2 = pixman_image_create_solid_fill (&check2);
+
+    n_checks_x = (
+	pixman_image_get_width (image) + check_size - 1) / check_size;
+    n_checks_y = (
+	pixman_image_get_height (image) + check_size - 1) / check_size;
+
+    for (j = 0; j < n_checks_y; j++)
+    {
+	for (i = 0; i < n_checks_x; i++)
+	{
+	    pixman_image_t *src;
+
+	    if (((i ^ j) & 1))
+		src = c1;
+	    else
+		src = c2;
+
+	    pixman_image_composite32 (PIXMAN_OP_SRC, src, NULL, image,
+				      0, 0, 0, 0,
+				      i * check_size, j * check_size,
+				      check_size, check_size);
+	}
+    }
+}
+
 /*
  * A function, which can be used as a core part of the test programs,
  * intended to detect various problems with the help of fuzzing input
@@ -689,11 +734,13 @@ get_random_seed (void)
 {
     union { double d; uint32_t u32; } t;
     t.d = gettime();
-    lcg_srand (t.u32);
+    prng_srand (t.u32);
 
-    return lcg_rand_u32 ();
+    return prng_rand ();
 }
 
+#ifdef HAVE_SIGACTION
+#ifdef HAVE_ALARM
 static const char *global_msg;
 
 static void
@@ -702,6 +749,8 @@ on_alarm (int signo)
     printf ("%s\n", global_msg);
     exit (1);
 }
+#endif
+#endif
 
 void
 fail_after (int seconds, const char *msg)
@@ -781,7 +830,7 @@ initialize_palette (pixman_indexed_t *palette, uint32_t depth, int is_rgb)
     uint32_t mask = (1 << depth) - 1;
 
     for (i = 0; i < 32768; ++i)
-	palette->ent[i] = lcg_rand() & mask;
+	palette->ent[i] = prng_rand() & mask;
 
     memset (palette->rgba, 0, sizeof (palette->rgba));
 
@@ -801,7 +850,7 @@ initialize_palette (pixman_indexed_t *palette, uint32_t depth, int is_rgb)
 	{
 	    uint32_t old_idx;
 
-	    rgba24 = lcg_rand();
+	    rgba24 = prng_rand();
 	    i15 = CONVERT_15 (rgba24, is_rgb);
 
 	    old_idx = palette->ent[i15];
@@ -821,6 +870,445 @@ initialize_palette (pixman_indexed_t *palette, uint32_t depth, int is_rgb)
     }
 }
 
+const char *
+operator_name (pixman_op_t op)
+{
+    switch (op)
+    {
+    case PIXMAN_OP_CLEAR: return "PIXMAN_OP_CLEAR";
+    case PIXMAN_OP_SRC: return "PIXMAN_OP_SRC";
+    case PIXMAN_OP_DST: return "PIXMAN_OP_DST";
+    case PIXMAN_OP_OVER: return "PIXMAN_OP_OVER";
+    case PIXMAN_OP_OVER_REVERSE: return "PIXMAN_OP_OVER_REVERSE";
+    case PIXMAN_OP_IN: return "PIXMAN_OP_IN";
+    case PIXMAN_OP_IN_REVERSE: return "PIXMAN_OP_IN_REVERSE";
+    case PIXMAN_OP_OUT: return "PIXMAN_OP_OUT";
+    case PIXMAN_OP_OUT_REVERSE: return "PIXMAN_OP_OUT_REVERSE";
+    case PIXMAN_OP_ATOP: return "PIXMAN_OP_ATOP";
+    case PIXMAN_OP_ATOP_REVERSE: return "PIXMAN_OP_ATOP_REVERSE";
+    case PIXMAN_OP_XOR: return "PIXMAN_OP_XOR";
+    case PIXMAN_OP_ADD: return "PIXMAN_OP_ADD";
+    case PIXMAN_OP_SATURATE: return "PIXMAN_OP_SATURATE";
+
+    case PIXMAN_OP_DISJOINT_CLEAR: return "PIXMAN_OP_DISJOINT_CLEAR";
+    case PIXMAN_OP_DISJOINT_SRC: return "PIXMAN_OP_DISJOINT_SRC";
+    case PIXMAN_OP_DISJOINT_DST: return "PIXMAN_OP_DISJOINT_DST";
+    case PIXMAN_OP_DISJOINT_OVER: return "PIXMAN_OP_DISJOINT_OVER";
+    case PIXMAN_OP_DISJOINT_OVER_REVERSE: return "PIXMAN_OP_DISJOINT_OVER_REVERSE";
+    case PIXMAN_OP_DISJOINT_IN: return "PIXMAN_OP_DISJOINT_IN";
+    case PIXMAN_OP_DISJOINT_IN_REVERSE: return "PIXMAN_OP_DISJOINT_IN_REVERSE";
+    case PIXMAN_OP_DISJOINT_OUT: return "PIXMAN_OP_DISJOINT_OUT";
+    case PIXMAN_OP_DISJOINT_OUT_REVERSE: return "PIXMAN_OP_DISJOINT_OUT_REVERSE";
+    case PIXMAN_OP_DISJOINT_ATOP: return "PIXMAN_OP_DISJOINT_ATOP";
+    case PIXMAN_OP_DISJOINT_ATOP_REVERSE: return "PIXMAN_OP_DISJOINT_ATOP_REVERSE";
+    case PIXMAN_OP_DISJOINT_XOR: return "PIXMAN_OP_DISJOINT_XOR";
+
+    case PIXMAN_OP_CONJOINT_CLEAR: return "PIXMAN_OP_CONJOINT_CLEAR";
+    case PIXMAN_OP_CONJOINT_SRC: return "PIXMAN_OP_CONJOINT_SRC";
+    case PIXMAN_OP_CONJOINT_DST: return "PIXMAN_OP_CONJOINT_DST";
+    case PIXMAN_OP_CONJOINT_OVER: return "PIXMAN_OP_CONJOINT_OVER";
+    case PIXMAN_OP_CONJOINT_OVER_REVERSE: return "PIXMAN_OP_CONJOINT_OVER_REVERSE";
+    case PIXMAN_OP_CONJOINT_IN: return "PIXMAN_OP_CONJOINT_IN";
+    case PIXMAN_OP_CONJOINT_IN_REVERSE: return "PIXMAN_OP_CONJOINT_IN_REVERSE";
+    case PIXMAN_OP_CONJOINT_OUT: return "PIXMAN_OP_CONJOINT_OUT";
+    case PIXMAN_OP_CONJOINT_OUT_REVERSE: return "PIXMAN_OP_CONJOINT_OUT_REVERSE";
+    case PIXMAN_OP_CONJOINT_ATOP: return "PIXMAN_OP_CONJOINT_ATOP";
+    case PIXMAN_OP_CONJOINT_ATOP_REVERSE: return "PIXMAN_OP_CONJOINT_ATOP_REVERSE";
+    case PIXMAN_OP_CONJOINT_XOR: return "PIXMAN_OP_CONJOINT_XOR";
+
+    case PIXMAN_OP_MULTIPLY: return "PIXMAN_OP_MULTIPLY";
+    case PIXMAN_OP_SCREEN: return "PIXMAN_OP_SCREEN";
+    case PIXMAN_OP_OVERLAY: return "PIXMAN_OP_OVERLAY";
+    case PIXMAN_OP_DARKEN: return "PIXMAN_OP_DARKEN";
+    case PIXMAN_OP_LIGHTEN: return "PIXMAN_OP_LIGHTEN";
+    case PIXMAN_OP_COLOR_DODGE: return "PIXMAN_OP_COLOR_DODGE";
+    case PIXMAN_OP_COLOR_BURN: return "PIXMAN_OP_COLOR_BURN";
+    case PIXMAN_OP_HARD_LIGHT: return "PIXMAN_OP_HARD_LIGHT";
+    case PIXMAN_OP_SOFT_LIGHT: return "PIXMAN_OP_SOFT_LIGHT";
+    case PIXMAN_OP_DIFFERENCE: return "PIXMAN_OP_DIFFERENCE";
+    case PIXMAN_OP_EXCLUSION: return "PIXMAN_OP_EXCLUSION";
+    case PIXMAN_OP_HSL_HUE: return "PIXMAN_OP_HSL_HUE";
+    case PIXMAN_OP_HSL_SATURATION: return "PIXMAN_OP_HSL_SATURATION";
+    case PIXMAN_OP_HSL_COLOR: return "PIXMAN_OP_HSL_COLOR";
+    case PIXMAN_OP_HSL_LUMINOSITY: return "PIXMAN_OP_HSL_LUMINOSITY";
+
+    case PIXMAN_OP_NONE:
+	return "<invalid operator 'none'>";
+    };
+
+    return "<unknown operator>";
+}
+
+const char *
+format_name (pixman_format_code_t format)
+{
+    switch (format)
+    {
+/* 32bpp formats */
+    case PIXMAN_a8r8g8b8: return "a8r8g8b8";
+    case PIXMAN_x8r8g8b8: return "x8r8g8b8";
+    case PIXMAN_a8b8g8r8: return "a8b8g8r8";
+    case PIXMAN_x8b8g8r8: return "x8b8g8r8";
+    case PIXMAN_b8g8r8a8: return "b8g8r8a8";
+    case PIXMAN_b8g8r8x8: return "b8g8r8x8";
+    case PIXMAN_r8g8b8a8: return "r8g8b8a8";
+    case PIXMAN_r8g8b8x8: return "r8g8b8x8";
+    case PIXMAN_x14r6g6b6: return "x14r6g6b6";
+    case PIXMAN_x2r10g10b10: return "x2r10g10b10";
+    case PIXMAN_a2r10g10b10: return "a2r10g10b10";
+    case PIXMAN_x2b10g10r10: return "x2b10g10r10";
+    case PIXMAN_a2b10g10r10: return "a2b10g10r10";
+
+/* sRGB formats */
+    case PIXMAN_a8r8g8b8_sRGB: return "a8r8g8b8_sRGB";
+
+/* 24bpp formats */
+    case PIXMAN_r8g8b8: return "r8g8b8";
+    case PIXMAN_b8g8r8: return "b8g8r8";
+
+/* 16bpp formats */
+    case PIXMAN_r5g6b5: return "r5g6b5";
+    case PIXMAN_b5g6r5: return "b5g6r5";
+
+    case PIXMAN_a1r5g5b5: return "a1r5g5b5";
+    case PIXMAN_x1r5g5b5: return "x1r5g5b5";
+    case PIXMAN_a1b5g5r5: return "a1b5g5r5";
+    case PIXMAN_x1b5g5r5: return "x1b5g5r5";
+    case PIXMAN_a4r4g4b4: return "a4r4g4b4";
+    case PIXMAN_x4r4g4b4: return "x4r4g4b4";
+    case PIXMAN_a4b4g4r4: return "a4b4g4r4";
+    case PIXMAN_x4b4g4r4: return "x4b4g4r4";
+
+/* 8bpp formats */
+    case PIXMAN_a8: return "a8";
+    case PIXMAN_r3g3b2: return "r3g3b2";
+    case PIXMAN_b2g3r3: return "b2g3r3";
+    case PIXMAN_a2r2g2b2: return "a2r2g2b2";
+    case PIXMAN_a2b2g2r2: return "a2b2g2r2";
+
+#if 0
+    case PIXMAN_x4c4: return "x4c4";
+    case PIXMAN_g8: return "g8";
+#endif
+    case PIXMAN_c8: return "x4c4 / c8";
+    case PIXMAN_x4g4: return "x4g4 / g8";
+
+    case PIXMAN_x4a4: return "x4a4";
+
+/* 4bpp formats */
+    case PIXMAN_a4: return "a4";
+    case PIXMAN_r1g2b1: return "r1g2b1";
+    case PIXMAN_b1g2r1: return "b1g2r1";
+    case PIXMAN_a1r1g1b1: return "a1r1g1b1";
+    case PIXMAN_a1b1g1r1: return "a1b1g1r1";
+
+    case PIXMAN_c4: return "c4";
+    case PIXMAN_g4: return "g4";
+
+/* 1bpp formats */
+    case PIXMAN_a1: return "a1";
+
+    case PIXMAN_g1: return "g1";
+
+/* YUV formats */
+    case PIXMAN_yuy2: return "yuy2";
+    case PIXMAN_yv12: return "yv12";
+    };
+
+    /* Fake formats.
+     *
+     * This is separate switch to prevent GCC from complaining
+     * that the values are not in the pixman_format_code_t enum.
+     */
+    switch ((uint32_t)format)
+    {
+    case PIXMAN_null: return "null"; 
+    case PIXMAN_solid: return "solid"; 
+    case PIXMAN_pixbuf: return "pixbuf"; 
+    case PIXMAN_rpixbuf: return "rpixbuf"; 
+    case PIXMAN_unknown: return "unknown"; 
+    };
+
+    return "<unknown format>";
+};
+
+static double
+calc_op (pixman_op_t op, double src, double dst, double srca, double dsta)
+{
+#define mult_chan(src, dst, Fa, Fb) MIN ((src) * (Fa) + (dst) * (Fb), 1.0)
+
+    double Fa, Fb;
+
+    switch (op)
+    {
+    case PIXMAN_OP_CLEAR:
+    case PIXMAN_OP_DISJOINT_CLEAR:
+    case PIXMAN_OP_CONJOINT_CLEAR:
+	return mult_chan (src, dst, 0.0, 0.0);
+
+    case PIXMAN_OP_SRC:
+    case PIXMAN_OP_DISJOINT_SRC:
+    case PIXMAN_OP_CONJOINT_SRC:
+	return mult_chan (src, dst, 1.0, 0.0);
+
+    case PIXMAN_OP_DST:
+    case PIXMAN_OP_DISJOINT_DST:
+    case PIXMAN_OP_CONJOINT_DST:
+	return mult_chan (src, dst, 0.0, 1.0);
+
+    case PIXMAN_OP_OVER:
+	return mult_chan (src, dst, 1.0, 1.0 - srca);
+
+    case PIXMAN_OP_OVER_REVERSE:
+	return mult_chan (src, dst, 1.0 - dsta, 1.0);
+
+    case PIXMAN_OP_IN:
+	return mult_chan (src, dst, dsta, 0.0);
+
+    case PIXMAN_OP_IN_REVERSE:
+	return mult_chan (src, dst, 0.0, srca);
+
+    case PIXMAN_OP_OUT:
+	return mult_chan (src, dst, 1.0 - dsta, 0.0);
+
+    case PIXMAN_OP_OUT_REVERSE:
+	return mult_chan (src, dst, 0.0, 1.0 - srca);
+
+    case PIXMAN_OP_ATOP:
+	return mult_chan (src, dst, dsta, 1.0 - srca);
+
+    case PIXMAN_OP_ATOP_REVERSE:
+	return mult_chan (src, dst, 1.0 - dsta,  srca);
+
+    case PIXMAN_OP_XOR:
+	return mult_chan (src, dst, 1.0 - dsta, 1.0 - srca);
+
+    case PIXMAN_OP_ADD:
+	return mult_chan (src, dst, 1.0, 1.0);
+
+    case PIXMAN_OP_SATURATE:
+    case PIXMAN_OP_DISJOINT_OVER_REVERSE:
+	if (srca == 0.0)
+	    Fa = 1.0;
+	else
+	    Fa = MIN (1.0, (1.0 - dsta) / srca);
+	return mult_chan (src, dst, Fa, 1.0);
+
+    case PIXMAN_OP_DISJOINT_OVER:
+	if (dsta == 0.0)
+	    Fb = 1.0;
+	else
+	    Fb = MIN (1.0, (1.0 - srca) / dsta);
+	return mult_chan (src, dst, 1.0, Fb);
+
+    case PIXMAN_OP_DISJOINT_IN:
+	if (srca == 0.0)
+	    Fa = 0.0;
+	else
+	    Fa = MAX (0.0, 1.0 - (1.0 - dsta) / srca);
+	return mult_chan (src, dst, Fa, 0.0);
+
+    case PIXMAN_OP_DISJOINT_IN_REVERSE:
+	if (dsta == 0.0)
+	    Fb = 0.0;
+	else
+	    Fb = MAX (0.0, 1.0 - (1.0 - srca) / dsta);
+	return mult_chan (src, dst, 0.0, Fb);
+
+    case PIXMAN_OP_DISJOINT_OUT:
+	if (srca == 0.0)
+	    Fa = 1.0;
+	else
+	    Fa = MIN (1.0, (1.0 - dsta) / srca);
+	return mult_chan (src, dst, Fa, 0.0);
+
+    case PIXMAN_OP_DISJOINT_OUT_REVERSE:
+	if (dsta == 0.0)
+	    Fb = 1.0;
+	else
+	    Fb = MIN (1.0, (1.0 - srca) / dsta);
+	return mult_chan (src, dst, 0.0, Fb);
+
+    case PIXMAN_OP_DISJOINT_ATOP:
+	if (srca == 0.0)
+	    Fa = 0.0;
+	else
+	    Fa = MAX (0.0, 1.0 - (1.0 - dsta) / srca);
+	if (dsta == 0.0)
+	    Fb = 1.0;
+	else
+	    Fb = MIN (1.0, (1.0 - srca) / dsta);
+	return mult_chan (src, dst, Fa, Fb);
+
+    case PIXMAN_OP_DISJOINT_ATOP_REVERSE:
+	if (srca == 0.0)
+	    Fa = 1.0;
+	else
+	    Fa = MIN (1.0, (1.0 - dsta) / srca);
+	if (dsta == 0.0)
+	    Fb = 0.0;
+	else
+	    Fb = MAX (0.0, 1.0 - (1.0 - srca) / dsta);
+	return mult_chan (src, dst, Fa, Fb);
+
+    case PIXMAN_OP_DISJOINT_XOR:
+	if (srca == 0.0)
+	    Fa = 1.0;
+	else
+	    Fa = MIN (1.0, (1.0 - dsta) / srca);
+	if (dsta == 0.0)
+	    Fb = 1.0;
+	else
+	    Fb = MIN (1.0, (1.0 - srca) / dsta);
+	return mult_chan (src, dst, Fa, Fb);
+
+    case PIXMAN_OP_CONJOINT_OVER:
+	if (dsta == 0.0)
+	    Fb = 0.0;
+	else
+	    Fb = MAX (0.0, 1.0 - srca / dsta);
+	return mult_chan (src, dst, 1.0, Fb);
+
+    case PIXMAN_OP_CONJOINT_OVER_REVERSE:
+	if (srca == 0.0)
+	    Fa = 0.0;
+	else
+	    Fa = MAX (0.0, 1.0 - dsta / srca);
+	return mult_chan (src, dst, Fa, 1.0);
+
+    case PIXMAN_OP_CONJOINT_IN:
+	if (srca == 0.0)
+	    Fa = 1.0;
+	else
+	    Fa = MIN (1.0, dsta / srca);
+	return mult_chan (src, dst, Fa, 0.0);
+
+    case PIXMAN_OP_CONJOINT_IN_REVERSE:
+	if (dsta == 0.0)
+	    Fb = 1.0;
+	else
+	    Fb = MIN (1.0, srca / dsta);
+	return mult_chan (src, dst, 0.0, Fb);
+
+    case PIXMAN_OP_CONJOINT_OUT:
+	if (srca == 0.0)
+	    Fa = 0.0;
+	else
+	    Fa = MAX (0.0, 1.0 - dsta / srca);
+	return mult_chan (src, dst, Fa, 0.0);
+
+    case PIXMAN_OP_CONJOINT_OUT_REVERSE:
+	if (dsta == 0.0)
+	    Fb = 0.0;
+	else
+	    Fb = MAX (0.0, 1.0 - srca / dsta);
+	return mult_chan (src, dst, 0.0, Fb);
+
+    case PIXMAN_OP_CONJOINT_ATOP:
+	if (srca == 0.0)
+	    Fa = 1.0;
+	else
+	    Fa = MIN (1.0, dsta / srca);
+	if (dsta == 0.0)
+	    Fb = 0.0;
+	else
+	    Fb = MAX (0.0, 1.0 - srca / dsta);
+	return mult_chan (src, dst, Fa, Fb);
+
+    case PIXMAN_OP_CONJOINT_ATOP_REVERSE:
+	if (srca == 0.0)
+	    Fa = 0.0;
+	else
+	    Fa = MAX (0.0, 1.0 - dsta / srca);
+	if (dsta == 0.0)
+	    Fb = 1.0;
+	else
+	    Fb = MIN (1.0, srca / dsta);
+	return mult_chan (src, dst, Fa, Fb);
+
+    case PIXMAN_OP_CONJOINT_XOR:
+	if (srca == 0.0)
+	    Fa = 0.0;
+	else
+	    Fa = MAX (0.0, 1.0 - dsta / srca);
+	if (dsta == 0.0)
+	    Fb = 0.0;
+	else
+	    Fb = MAX (0.0, 1.0 - srca / dsta);
+	return mult_chan (src, dst, Fa, Fb);
+
+    case PIXMAN_OP_MULTIPLY:
+    case PIXMAN_OP_SCREEN:
+    case PIXMAN_OP_OVERLAY:
+    case PIXMAN_OP_DARKEN:
+    case PIXMAN_OP_LIGHTEN:
+    case PIXMAN_OP_COLOR_DODGE:
+    case PIXMAN_OP_COLOR_BURN:
+    case PIXMAN_OP_HARD_LIGHT:
+    case PIXMAN_OP_SOFT_LIGHT:
+    case PIXMAN_OP_DIFFERENCE:
+    case PIXMAN_OP_EXCLUSION:
+    case PIXMAN_OP_HSL_HUE:
+    case PIXMAN_OP_HSL_SATURATION:
+    case PIXMAN_OP_HSL_COLOR:
+    case PIXMAN_OP_HSL_LUMINOSITY:
+    default:
+	abort();
+	return 0; /* silence MSVC */
+    }
+#undef mult_chan
+}
+
+void
+do_composite (pixman_op_t op,
+	      const color_t *src,
+	      const color_t *mask,
+	      const color_t *dst,
+	      color_t *result,
+	      pixman_bool_t component_alpha)
+{
+    color_t srcval, srcalpha;
+
+    if (mask == NULL)
+    {
+	srcval = *src;
+
+	srcalpha.r = src->a;
+	srcalpha.g = src->a;
+	srcalpha.b = src->a;
+	srcalpha.a = src->a;
+    }
+    else if (component_alpha)
+    {
+	srcval.r = src->r * mask->r;
+	srcval.g = src->g * mask->g;
+	srcval.b = src->b * mask->b;
+	srcval.a = src->a * mask->a;
+
+	srcalpha.r = src->a * mask->r;
+	srcalpha.g = src->a * mask->g;
+	srcalpha.b = src->a * mask->b;
+	srcalpha.a = src->a * mask->a;
+    }
+    else
+    {
+	srcval.r = src->r * mask->a;
+	srcval.g = src->g * mask->a;
+	srcval.b = src->b * mask->a;
+	srcval.a = src->a * mask->a;
+
+	srcalpha.r = src->a * mask->a;
+	srcalpha.g = src->a * mask->a;
+	srcalpha.b = src->a * mask->a;
+	srcalpha.a = src->a * mask->a;
+    }
+
+    result->r = calc_op (op, srcval.r, dst->r, srcalpha.r, dst->a);
+    result->g = calc_op (op, srcval.g, dst->g, srcalpha.g, dst->a);
+    result->b = calc_op (op, srcval.b, dst->b, srcalpha.b, dst->a);
+    result->a = calc_op (op, srcval.a, dst->a, srcalpha.a, dst->a);
+}
+
 static double
 round_channel (double p, int m)
 {
@@ -933,6 +1421,59 @@ pixel_checker_split_pixel (const pixel_checker_t *checker, uint32_t pixel,
     *b = (pixel & checker->bm) >> checker->bs;
 }
 
+void
+pixel_checker_get_masks (const pixel_checker_t *checker,
+                         uint32_t              *am,
+                         uint32_t              *rm,
+                         uint32_t              *gm,
+                         uint32_t              *bm)
+{
+    if (am)
+        *am = checker->am;
+    if (rm)
+        *rm = checker->rm;
+    if (gm)
+        *gm = checker->gm;
+    if (bm)
+        *bm = checker->bm;
+}
+
+void
+pixel_checker_convert_pixel_to_color (const pixel_checker_t *checker,
+                                      uint32_t pixel, color_t *color)
+{
+    int a, r, g, b;
+
+    pixel_checker_split_pixel (checker, pixel, &a, &r, &g, &b);
+
+    if (checker->am == 0)
+        color->a = 1.0;
+    else
+        color->a = a / (double)(checker->am >> checker->as);
+
+    if (checker->rm == 0)
+        color->r = 0.0;
+    else
+        color->r = r / (double)(checker->rm >> checker->rs);
+
+    if (checker->gm == 0)
+        color->g = 0.0;
+    else
+        color->g = g / (double)(checker->gm >> checker->gs);
+
+    if (checker->bm == 0)
+        color->b = 0.0;
+    else
+        color->b = b / (double)(checker->bm >> checker->bs);
+
+    if (PIXMAN_FORMAT_TYPE (checker->format) == PIXMAN_TYPE_ARGB_SRGB)
+    {
+	color->r = convert_srgb_to_linear (color->r);
+	color->g = convert_srgb_to_linear (color->g);
+	color->b = convert_srgb_to_linear (color->b);
+    }
+}
+
 static int32_t
 convert (double v, uint32_t width, uint32_t mask, uint32_t shift, double def)
 {
@@ -972,7 +1513,7 @@ get_limits (const pixel_checker_t *checker, double limit,
 
 /* The acceptable deviation in units of [0.0, 1.0]
  */
-#define DEVIATION (0.004)
+#define DEVIATION (0.0064)
 
 void
 pixel_checker_get_max (const pixel_checker_t *checker, color_t *color,
diff --git a/lib/pixman/test/utils.h b/lib/pixman/test/utils.h
index f7ea34c5f..c2781516f 100644
--- a/lib/pixman/test/utils.h
+++ b/lib/pixman/test/utils.h
@@ -4,6 +4,7 @@
 
 #include <assert.h>
 #include "pixman-private.h" /* For 'inline' definition */
+#include "utils-prng.h"
 
 #define ARRAY_LENGTH(A) ((int) (sizeof (A) / sizeof ((A) [0])))
 
@@ -11,49 +12,44 @@
  * taken from POSIX.1-2001 example
  */
 
-extern uint32_t lcg_seed;
+extern prng_t prng_state_data;
+extern prng_t *prng_state;
 #ifdef USE_OPENMP
-#pragma omp threadprivate(lcg_seed)
+#pragma omp threadprivate(prng_state_data)
+#pragma omp threadprivate(prng_state)
 #endif
 
 static inline uint32_t
-lcg_rand (void)
+prng_rand (void)
 {
-    lcg_seed = lcg_seed * 1103515245 + 12345;
-    return ((uint32_t)(lcg_seed / 65536) % 32768);
+    return prng_rand_r (prng_state);
 }
 
 static inline void
-lcg_srand (uint32_t seed)
+prng_srand (uint32_t seed)
 {
-    lcg_seed = seed;
+    if (!prng_state)
+    {
+        /* Without setting a seed, PRNG does not work properly (is just
+         * returning zeros). So we only initialize the pointer here to
+         * make sure that 'prng_srand' is always called before any
+         * other 'prng_*' function. The wrongdoers violating this order
+         * will get a segfault. */
+        prng_state = &prng_state_data;
+    }
+    prng_srand_r (prng_state, seed);
 }
 
 static inline uint32_t
-lcg_rand_n (int max)
+prng_rand_n (int max)
 {
-    return lcg_rand () % max;
+    return prng_rand () % max;
 }
 
-static inline uint32_t
-lcg_rand_N (int max)
-{
-    uint32_t lo = lcg_rand ();
-    uint32_t hi = lcg_rand () << 15;
-    return (lo | hi) % max;
-}
-
-static inline uint32_t
-lcg_rand_u32 (void)
+static inline void
+prng_randmemset (void *buffer, size_t size, prng_randmemset_flags_t flags)
 {
-    /* This uses the 10/11 most significant bits from the 3 lcg results
-     * (and mixes them with the low from the adjacent one).
-     */
-    uint32_t lo = lcg_rand() >> -(32 - 15 - 11 * 2);
-    uint32_t mid = lcg_rand() << (32 - 15 - 11 * 1);
-    uint32_t hi = lcg_rand() << (32 - 15 - 11 * 0);
-
-    return (hi ^ mid ^ lo);
+    prng_randmemset_r (prng_state, buffer, size, flags);
 }
 
 /* CRC 32 computation
@@ -69,8 +65,12 @@ compute_crc32_for_image (uint32_t        in_crc32,
 
 /* Returns TRUE if running on a little endian system
  */
-pixman_bool_t
-is_little_endian (void);
+static force_inline pixman_bool_t
+is_little_endian (void)
+{
+    unsigned long endian_check_var = 1;
+    return *(unsigned char *)&endian_check_var == 1;
+}
 
 /* perform endian conversion of pixel data
  */
@@ -124,6 +124,11 @@ a8r8g8b8_to_rgba_np (uint32_t *dst, uint32_t *src, int n_pixels);
 pixman_bool_t
 write_png (pixman_image_t *image, const char *filename);
 
+void
+draw_checkerboard (pixman_image_t *image,
+		   int check_size,
+		   uint32_t color1, uint32_t color2);
+
 /* A pair of macros which can help to detect corruption of
  * floating point registers after a function call. This may
  * happen if _mm_empty() call is forgotten in MMX/SSE2 fast
@@ -172,12 +177,26 @@ convert_linear_to_srgb (double component);
 void
 initialize_palette (pixman_indexed_t *palette, uint32_t depth, int is_rgb);
 
+const char *
+operator_name (pixman_op_t op);
+
+const char *
+format_name (pixman_format_code_t format);
+
 typedef struct
 {
     double r, g, b, a;
 } color_t;
 
 void
+do_composite (pixman_op_t op,
+	      const color_t *src,
+	      const color_t *mask,
+	      const color_t *dst,
+	      color_t *result,
+	      pixman_bool_t component_alpha);
+
+void
 round_color (pixman_format_code_t format, color_t *color);
 
 typedef struct
@@ -206,3 +225,14 @@ pixel_checker_get_min (const pixel_checker_t *checker, color_t *color,
 pixman_bool_t
 pixel_checker_check (const pixel_checker_t *checker,
 		     uint32_t pixel, color_t *color);
+
+void
+pixel_checker_convert_pixel_to_color (const pixel_checker_t *checker,
+                                      uint32_t pixel, color_t *color);
+
+void
+pixel_checker_get_masks (const pixel_checker_t *checker,
+                         uint32_t              *am,
+                         uint32_t              *rm,
+                         uint32_t              *gm,
+                         uint32_t              *bm);
author	Matthieu Herrb <matthieu@cvs.openbsd.org>	2013-06-07 17:18:02 +0000
committer	Matthieu Herrb <matthieu@cvs.openbsd.org>	2013-06-07 17:18:02 +0000
commit	05f5801ea23297bb114b9f00d5f4c7d23743b121 (patch)
tree	14dbb55d6b817ce49d2798c9cf00c42bc4011a50
parent	6babe96864db98aee21458f0a62425b19818a203 (diff)