diff options
author | Matthieu Herrb <matthieu@cvs.openbsd.org> | 2013-06-07 17:18:02 +0000 |
---|---|---|
committer | Matthieu Herrb <matthieu@cvs.openbsd.org> | 2013-06-07 17:18:02 +0000 |
commit | 05f5801ea23297bb114b9f00d5f4c7d23743b121 (patch) | |
tree | 14dbb55d6b817ce49d2798c9cf00c42bc4011a50 | |
parent | 6babe96864db98aee21458f0a62425b19818a203 (diff) |
Update to pixman 0.30.0. Tested by several people during t2k13. Thanks.
90 files changed, 30199 insertions, 2733 deletions
diff --git a/lib/pixman/ChangeLog b/lib/pixman/ChangeLog index a9bfdeeb2..562bcc066 100644 --- a/lib/pixman/ChangeLog +++ b/lib/pixman/ChangeLog @@ -1,3 +1,19500 @@ +commit 41daf50aaeca71e70bc27aa4bf27ae4812c6eecf +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed May 8 19:31:22 2013 -0400 + + Pre-release version bump to 0.30.0 + +commit 5a7179191dba6c592a63cad8aa8bc8af7ab5e586 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Apr 30 18:57:43 2013 -0400 + + Post-release version bump to 0.29.5 + +commit 2714b5d201525e176429c0c030b8376a32b4f6c7 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Apr 30 18:50:04 2013 -0400 + + Pre-release version bump to 0.29.4 + +commit 7fc2654a1fdd6d6c41eddaac50b3668433873679 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Apr 27 04:27:39 2013 -0400 + + pixman/refactor: Delete this file + + Essentially all of it is obsolete by now. + +commit cb928a77c05a9c581e596b8eb24962d47fc39e9f +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Mon Apr 15 19:33:02 2013 +0200 + + MIPS: DSPr2: Added rpixbuf fast path. + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + rpixbuf = L1: 14.63 L2: 13.55 M: 9.91 ( 79.53%) HT: 8.47 VT: 8.32 R: 8.17 RT: 4.90 ( 33Kops/s) + + Optimized: + rpixbuf = L1: 45.69 L2: 37.30 M: 17.24 (138.31%) HT: 15.66 VT: 14.88 R: 13.97 RT: 8.38 ( 44Kops/s) + +commit c6a6fbdcd3ef18f733ff7ad11d5fafac384744cd +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Mon Apr 15 19:33:01 2013 +0200 + + MIPS: DSPr2: Added pixbuf fast path. + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + pixbuf = L1: 18.18 L2: 16.47 M: 13.36 (107.27%) HT: 10.16 VT: 10.07 R: 9.84 RT: 5.54 ( 35Kops/s) + + Optimized: + pixbuf = L1: 43.54 L2: 36.02 M: 17.08 (137.09%) HT: 15.58 VT: 14.85 R: 13.87 RT: 8.38 ( 44Kops/s) + +commit f69335d5292310dc18f2f84d462430137a771976 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Mon Apr 15 19:33:00 2013 +0200 + + test: add "pixbuf" and "rpixbuf" to lowlevel-blt-bench + + Add necessary support to lowlevel-blt benchmark for benchmarking pixbuf and + rpixbuf fast paths. bench_composite function now checks for pixbuf string in + testname, and if that is detected, use same bits for src and mask images. + +commit 3dc9e3827e342b415c519da1039b9a2e4fb293ec +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Mon Apr 15 19:32:59 2013 +0200 + + test: add "src_0888_8888_rev" and "src_0888_0565_rev" to lowlevel-blt-bench + +commit 44174ce51d1ed5a1bf988b9dd9218d8cbd379de3 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Mon Apr 15 19:32:58 2013 +0200 + + MIPS: DSPr2: Fix for bug in in_n_8 routine. + + Rounding logic was not implemented right. + Instead of using rounding version of the 8-bit shift, logical shifts were used. + Also, code used unnecessary multiplications, which could be avoided by packing + 4 destination (a8) pixel into one 32bit register. There were also, unnecessary + spills on stack. Code is rewritten to address mentioned issues. + + The bug was revealed by increasing number of the iterations in blitters-test. + + Performance numbers on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + in_n_8 = L1: 21.20 L2: 22.86 M: 21.42 ( 14.21%) HT: 15.97 VT: 15.69 R: 15.47 RT: 8.00 ( 48Kops/s) + Optimized (first implementation, with bug): + in_n_8 = L1: 89.38 L2: 86.07 M: 65.48 ( 43.44%) HT: 44.64 VT: 41.50 R: 40.77 RT: 16.94 ( 66Kops/s) + Optimized (with bug fix, and code revisited): + in_n_8 = L1: 102.33 L2: 95.65 M: 70.54 ( 46.84%) HT: 48.35 VT: 45.06 R: 43.20 RT: 17.60 ( 66Kops/s) + +commit 5858f09d264ef762ddcf7ede324bfce9f5991d29 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Mon Apr 15 19:32:57 2013 +0200 + + MIPS: DSPr2: Added src_0565_8888 nearest neighbor fast path. + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + src_0565_8888 = L1: 20.70 L2: 19.22 M: 12.50 ( 49.79%) HT: 10.45 VT: 10.18 R: 9.99 RT: 5.31 ( 31Kops/s) + + Optimized: + src_0565_8888 = L1: 62.98 L2: 53.44 M: 23.07 ( 91.87%) HT: 19.85 VT: 19.15 R: 17.70 RT: 9.68 ( 43Kops/s) + +commit 311d55b6d8e1ac3acaa12d1d7c3eefdcfdc70718 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Mon Apr 15 19:32:56 2013 +0200 + + MIPS: DSPr2: Added over_8888_0565 nearest neighbor fast path. + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + over_8888_0565 = L1: 13.22 L2: 12.02 M: 9.77 ( 38.92%) HT: 8.58 VT: 8.35 R: 8.38 RT: 5.78 ( 35Kops/s) + + Optimized: + over_8888_0565 = L1: 26.20 L2: 22.97 M: 15.92 ( 63.40%) HT: 13.33 VT: 13.13 R: 12.72 RT: 7.65 ( 39Kops/s) + +commit bd487ee34c343142cbe451a2e04541d8aba0eaa7 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Mon Apr 15 19:32:55 2013 +0200 + + MIPS: DSPr2: Added over_8888_8888 nearest neighbor fast path. + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + over_8888_8888 = L1: 19.47 L2: 16.30 M: 11.24 ( 59.69%) HT: 9.54 VT: 9.29 R: 9.47 RT: 6.24 ( 37Kops/s) + + Optimized: + over_8888_8888 = L1: 43.67 L2: 33.30 M: 16.32 ( 86.65%) HT: 14.10 VT: 13.78 R: 12.96 RT: 7.85 ( 39Kops/s) + +commit 66def909ad82ed4ccb49380031cb828655c9a47f +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Mon Apr 15 19:32:54 2013 +0200 + + MIPS: DSPr2: Fix bug in over_n_8888_8888_ca/over_n_8888_0565_ca routines + + After introducing new PRNG (pseudorandom number generator) a bug in two DSPr2 + routines was revealed. Bug manifested by wrong calculation in composite and + glyph tests, which caused make check to fail for MIPS DSPr2 optimizations. + + Bug was in the calculation of the: + *dst = over (src, *dst) when ma == 0xffffffff + + In this case src was not negated and shifted right by 24 bits, it was only + negated. When implementing this routine in the first place, I missplaced those + shifts, which alowed me to combine code for over operation and: + UN8x4_MUL_UN8x4 (s, ma); + UN8x4_MUL_UN8 (ma, srca); + ma = ~ma; + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); + So I decided to rewrite that piece of code from scratch. I changed logic, so + now assembly code mimics code from pixman-fast-path.c but processes two pixels + at a time. This code should be easier to debug and maintain. + + The bug was revealed in commit b31a6962. Errors were detected by composite + and glyph tests. + +commit d768558ce195caa208262866f9262b29efff22dc +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Mon Jan 28 07:00:12 2013 +0200 + + sse2: faster bilinear interpolation (get rid of XOR instruction) + + The old code was calculating horizontal weights for right pixels + in the following way (for simplicity assume 8-bit interpolation + precision): + + Start with "x = vx" and do increment "x += ux" after each pixel. + In this case right pixel weight for interpolation can be calculated + as "((x >> 8) ^ 0xFF) + 1", which is the same as "256 - (x >> 8)". + + The new code instead: + + Starts with "x = -(vx + 1)", performs increment "x += -ux" after + each pixel and calculates right weights as just "(x >> 8) + 1", + eliminating the need for XOR operation in the inner loop. + + So we have one instruction less on the critical path. Benchmarks + with "lowlevel-blt-bench -b src_8888_8888" using GCC 4.7.2 on + x86-64 system and default optimizations: + + Intel Core i7 860 (2.8GHz): + before: src_8888_8888 = L1: 291.37 L2: 288.58 M:285.38 + after: src_8888_8888 = L1: 319.66 L2: 316.47 M:312.06 + + Intel Core2 T7300 (2GHz): + before: src_8888_8888 = L1: 121.95 L2: 118.38 M:118.52 + after: src_8888_8888 = L1: 128.82 L2: 125.12 M:124.88 + + Intel Atom N450 (1.67GHz): + before: src_8888_8888 = L1: 64.25 L2: 62.37 M: 61.80 + after: src_8888_8888 = L1: 64.23 L2: 62.37 M: 61.82 + + Inspired by the "sse2_bilinear_interpolation" function (single + pixel interpolation) from: + http://lists.freedesktop.org/archives/pixman/2013-January/002575.html + +commit 59109f32930a0c163628f8087cbb0a15b19cb96b +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Tue Mar 5 00:59:13 2013 +0200 + + test: larger 0xFF/0x00 filled clusters in random images for blitters-test + + Current blitters-test program had difficulties detecting a bug in + over_n_8888_8888_ca implementation for MIPS DSPr2: + + http://lists.freedesktop.org/archives/pixman/2013-March/002645.html + + In order to hit the buggy code path, two consecutive mask values had + to be equal to 0xFFFFFFFF because of loop unrolling. The current + blitters-test generates random images in such a way that each byte + has 25% probability for having 0xFF value. Hence each 32-bit mask + value has ~0.4% probability for 0xFFFFFFFF. Because we are testing + many compositing operations with many pixels, encountering at least + one 0xFFFFFFFF mask value reasonably fast is not a problem. If a + bug related to 0xFFFFFFFF mask value is artificialy introduced into + over_n_8888_8888_ca generic C function, it gets detected on 675591 + iteration in blitters-test (out of 2000000). + + However two consecutive 0xFFFFFFFF mask values are much less likely + to be generated, so the bug was missed by blitters-test. + + This patch addresses the problem by also randomly setting the 32-bit + values in images to either 0xFFFFFFFF or 0x00000000 (also with 25% + probability). It allows to have larger clusters of consecutive 0x00 + or 0xFF bytes in images which may have special shortcuts for handling + them in unrolled or SIMD optimized code. + +commit a99147d1ea0d67f635f9284c242485fb5621cab3 +Author: Stefan Weil <sw@weilnetz.de> +Date: Sat Apr 27 08:00:38 2013 +0200 + + Trivial spelling fixes in comments + + They were found by codespell. + + Signed-off-by: Stefan Weil <sw@weilnetz.de> + +commit 9d0bb10312e5de0653c9e28df79ce8a5e8cec97a +Author: Peter Breitenlohner <peb@mppmu.mpg.de> +Date: Mon Apr 8 13:13:05 2013 +0200 + + Check for missing sqrtf() as, e.g., for Solaris 9 + + Signed-off-by: Peter Breitenlohner <peb@mppmu.mpg.de> + +commit d8ac35af1208a4fa4d67f03fee10b5449fb8495a +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Feb 14 08:06:19 2013 -0500 + + Improve precision of calculations in pixman-gradient-walker.c + + The computations in pixman-gradient-walker.c currently take place at + very limited 8 bit precision which results in quite visible artefacts + in gradients. An example is the one produced by demos/linear-gradient + which currently looks like this: + + http://i.imgur.com/kQbX8nd.png + + With the changes in this commit, the gradient looks like this: + + http://i.imgur.com/nUlyuKI.png + + The images are also available here: + + http://people.freedesktop.org/~sandmann/gradients/before.png + http://people.freedesktop.org/~sandmann/gradients/after.png + + This patch computes pixels using floating point, but uses a faster + algorithm, which makes up for the loss of performance. + + == Theory: + + In both the new and the old algorithm, the various gradient + implementations compute a parameter x that indicates how far along the + gradient the current scanline is. The current algorithm has a cache of + the two color stops surrounding the last parameter; those are used in + a SIMD-within-register fashion in this way: + + t1 = walker->left_rb * idist + walker->right_rb * dist; + + where dist and idist are the distances to the left and right color + stops respectively normalized to the distance between the left and + right stops. The normalization (which involves a division) is captured + in another cached variable "stepper". The cached values are recomputed + whenever the parameter moves in between two different stops (called + "reset" in the implementation). + + Because idist and dist are computed in 8 bits only, a lot of + information is lost, which is quite visible as the image linked above + shows. + + The new algorithm caches more information in the following way. When + interpolating between stops, the formula to be used is this: + + t = ((x - left) / (right - left)); + + result = lc * (1 - t) + rc * t; + + where + + - x is the parameter as computed by the main gradient code, + - left is the position of the left color stop, + - right is the position of the right color stop + - lc is the color of the left color stop + - rc is the color of the right color stop + + That formula can also be written like this: + + result + = lc * (1 - t) + rc * t; + = lc + (rc - lc) * t + = lc + (rc - lc) * ((x - left) / (right - left)) + = (rc - lc) / (right - left) * x + + lc - (left * (rc - lc)) / (right - left) + = s * x + b + + where + + s = (rc - lc) / (right - left) + + and + + b = lc - left * (rc - lc) / (right - left) + = (lc * (right - left) - left * (rc - lc)) / (right - left) + = (lc * right - rc * left) / (right - left) + + To summarize, setting w = (right - left): + + s = (rc - lc) / w + b = (lc * right - rc * left) / w + + r = s * x + b + + Since s and b only depend on the two active stops, both can be cached + so that the computation only needs to do one multiplication and one + addition per pixel (followed by premultiplication of the alpha + channel). That is, seven multiplications in total, which is the same + number as the old SIMD-within-register implementation had. + + == Implementation notes: + + The new formula described above is implemented in single precision + floating point, and the eight divisions necessary to compute the + cached values are done by multiplication with the reciprocal of the + distance between the color stops. + + The alpha values used in the cached computation are scaled by 255.0, + whereas the RGB values are kept in the [0, 1] interval. The ensures + that after premultiplication, all values will be in the [0, 255] + interval. + + This scaling is done by first dividing all the all the channels by + 257, and then later on dividing the r, g, b channels by 255. It would + be more natural to do all this scaling in only one place, but + inexplicably, that results in a (substantial) slowdown on Sandy Bridge + with GCC v 4.7. + + == Performance impact (median of three runs of radial-perf-test): + + == Intel Sandy Bridge, Core i3 @ 1.2GHz + + Before: 0.014553 + After: 0.014410 + Change: 1.0% faster + + == AMD Barcelona @ 1.2 GHz + + Before: 0.021735 + After: 0.021328 + Change: 1.9% faster + + Ie., slightly faster, though conceivably there could be a negative + impact on machines with a bigger difference between integer and + floating point performance. + + V2: + + - Use 's' and 'b' in the variable names instead of 'm' and 'd'. This + way they match the explanation above + + - Move variable declarations to the top of the function + + - Remove unused stepper field + + - Some formatting fixes + + - Don't pointlessly include pixman-combine32.h + + - Don't offset x for each pixel; go back to offsetting left_x and + right_x at reset time. The offsets cancel out in the formula above, + so there is no impact on the calcualations. + +commit a1c2331e0eb35d87cf295518838debe1217ca9df +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Mar 8 14:05:50 2013 -0500 + + Move the IS_ZERO() to pixman-private.h and rename to FLOAT_IS_ZERO() + + Some upcoming changes to pixman-gradient-walker.c will need this + macro. + +commit 2c953e572f6c3c18046e768dd07d12150b1f2e94 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Feb 24 21:49:06 2013 -0500 + + test: Add radial-perf-test, a microbenchmark for radial gradients + + This benchmark renders one of the radial gradients used in the + swfdec-youtube cairo trace 500 times and reports the average time it + took. + + V2: Update .gitignore + +commit 460faaa41105c2939d041506f6ff08e2b12e7596 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Feb 14 20:32:31 2013 -0500 + + demos: Add linear-gradient demo program + + This program displays a linear gradient from blue to yellow. Due to + limited precision in pixman-gradient-walker.c, it currently has some + ugly artefacts that gives it a 'brushed metal' appearance. + + V2: Update .gitignore + +commit aaae3d8eefa069098e9014822817ca1429fdea46 +Author: Behdad Esfahbod <behdad@behdad.org> +Date: Fri Mar 8 06:00:00 2013 -0500 + + Remove unused macro + +commit 5feda20fc39407879993ed4a6d861ef7f78d9432 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Wed Feb 27 14:40:51 2013 +0100 + + MIPS: DSPr2: Added more fast-paths for SRC operation: + - src_0888_8888_rev + - src_0888_0565_rev + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + src_0888_8888_rev = L1: 51.88 L2: 42.00 M: 19.04 ( 88.50%) HT: 15.27 VT: 14.62 R: 14.13 RT: 7.12 ( 45Kops/s) + src_0888_0565_rev = L1: 31.96 L2: 30.90 M: 22.60 ( 75.03%) HT: 15.32 VT: 15.11 R: 14.49 RT: 6.64 ( 43Kops/s) + + Optimized: + src_0888_8888_rev = L1: 222.73 L2: 113.70 M: 20.97 ( 97.35%) HT: 18.31 VT: 17.14 R: 16.71 RT: 9.74 ( 54Kops/s) + src_0888_0565_rev = L1: 100.37 L2: 74.27 M: 29.43 ( 97.63%) HT: 22.92 VT: 21.59 R: 20.52 RT: 10.56 ( 56Kops/s) + +commit 43914d68d1c87a9da6f53e6b0a12941c97bb0e5d +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Wed Feb 27 14:39:45 2013 +0100 + + MIPS: DSPr2: Added more fast-paths for OVER operation: + - over_8888_0565 + - over_n_8_8 + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + over_8888_0565 = L1: 14.30 L2: 13.22 M: 10.43 ( 41.56%) HT: 12.51 VT: 12.95 R: 11.82 RT: 7.34 ( 49Kops/s) + over_n_8_8 = L1: 12.77 L2: 16.93 M: 15.03 ( 29.94%) HT: 10.78 VT: 10.72 R: 10.29 RT: 4.92 ( 33Kops/s) + + Optimized: + over_8888_0565 = L1: 26.03 L2: 22.92 M: 15.68 ( 62.43%) HT: 16.19 VT: 16.27 R: 14.93 RT: 8.60 ( 52Kops/s) + over_n_8_8 = L1: 62.00 L2: 55.17 M: 40.29 ( 80.23%) HT: 26.77 VT: 25.64 R: 24.13 RT: 10.01 ( 47Kops/s) + +commit 2156fb51b353867d5a18b734690ca551f74d4fb1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Feb 15 18:34:46 2013 -0500 + + gtk-utils.c: Use cairo in show_image() rather than GdkPixbuf + + GdkPixbufs are not premultiplied, so when using them to display pixman + images, there is some unecessary conversions going on: First the image + is converted to non-premultiplied, and then GdkPixbuf premultiplies + before sending the result to the X server. These conversions may cause + the displayed image to not be exactly identical to the original. + + This patch just uses a cairo image surface instead, which avoids these + conversions. + + Also make the comment about sRGB a little more concise. + +commit 5e207f825bd1ed3142a623bcbceca00508907c5e +Author: Ben Avison <bavison@riscosopen.org> +Date: Wed Feb 6 00:39:12 2013 +0000 + + Fix to lowlevel-blt-bench + + The source, mask and destination buffers are initialised to 0xCC just after + they are allocated. Between each benchmark, there are a pair of memcpys, + from the destination buffer to the source buffer and back again (there are + no explanatory comments, but presumably this is an effort to flush the + caches). However, it has an unintended consequence, which is to change the + contents of the buffers on entry to subsequent benchmarks. This means it is + not a fair test: for example, with over_n_8888 (featured in the following + patches) it reports L2 and even M tests as being faster than the L1 test, + because after the L1 test, the source buffer is filled with fully opaque + pixels, for which over_n_8888 has a shortcut. + + The fix here is simply to reverse the order of the memcpys, so src and + destination are both filled with 0xCC on entry to all tests. + +commit d26f922dc1a605dae00fa0540198707485ba1f08 +Author: Stefan Weil <sw@weilnetz.de> +Date: Sat Feb 9 12:40:16 2013 +0100 + + sse2: Use uintptr_t in type casts from pointer to integral value + + Some recent code added new type casts from pointer to unsigned long. + These type casts result in compiler warnings for systems like + MinGW-w64 (64 bit Windows) where sizeof(unsigned long) != sizeof(void *). + + Signed-off-by: Stefan Weil <sw@weilnetz.de> + Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit dc80eb09e2831d5ad3bfe638462f80921357952b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Jan 31 14:54:49 2013 -0500 + + lookup_composite: Don't update cache in case of error + + If we fail to find a composite function, don't update the fast path + cache with the dummy compositing function. + + Also make the error message state that the bug is likely caused by + issues with thread local storage. + +commit 4dced81c917c753a4e699e3793efa15a39361cf0 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Jan 31 14:36:38 2013 -0500 + + Turn on error logging at all times + + While releasing 0.29.2 the distcheck run produced a number of error + messages that had to be fixed in 349015e1fc5d912ba4253133b90e751d0b. + These were not caught before so nobody had actually run pixman with + debugging turned on. It's not the first time this has happened, see + 5b0563f39eb29e4ae431717696174da5 for example. + + So this patch makes the return_if_fail() macros use unlikely() around + the expressions and then turns on error logging at all times. The + performance hit should negligible since we were already evaluating the + expressions. + + The place where DEBUG actually does cause a performance hit is in the + region selfcheck code, and that will still only be enabled in + development snapshots. + +commit f4c9492c12d98f76d99b4dbdca56d517e1ffdb19 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Jan 31 14:31:26 2013 -0500 + + pixman-compiler.h: Add unlikely() macro + + When compiling with GCC this macro expands to __builtin_expect((expr), 0). + On other compilers, it just expands to (expr). + +commit 5ebb5ac3807cdc7bb76358041a15cc5adca2ef23 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jan 22 08:29:57 2013 -0500 + + utils.c: Increase acceptable deviation to 0.0064 in pixel_checker_t + + The check-formats programs reveals that the 8 bit pipeline cannot meet + the current 0.004 acceptable deviation specified in utils.c, so we + have to increase it. Some of the failing pixels were captured in + pixel-test, which with this commit now passes. + + == a4r4g4b4 DISJOINT_XOR a8r8g8b8 == + + The DISJOINT_XOR operator applied to an a4r4g4b4 source pixel of + 0xd0c0 and a destination pixel of 0x5300ea00 results in the exact + value: + + fa = (1 - da) / sa = (1 - 0x53 / 255.0) / (0xd / 15.0) = 0.7782 + fb = (1 - sa) / da = (1 - 0xd / 15.0) / (0x53 / 255.0) = 0.4096 + + r = fa * (0xc / 15.0) + fb * (0xea / 255.0) = 0.99853 + + But when computing in 8 bits, we get: + + fa8 = ((255 - 0x53) * 255 + 0xdd / 2) / 0xdd = 0xc6 + fb8 = ((255 - 0xdd) * 255 + 0x53 / 3) / 0x53 = 0x68 + + r8 = (fa8 * 0xcc + 127) / 255 + (fb8 * 0xea + 127) / 255 = 0xfd + + and + + 0xfd / 255.0 = 0.9921568627450981 + + for a deviation of 0.00637118610187, which we then have to consider + acceptable given the current implementation. + + By switching to computing the result with + + r = (fa * s + fb * d + 127) / 255 + + rather than + + r = (fa * s + 127) / 255 + (fb * d + 127) / 255 + + the deviation would be only 0.00244961747442, so at some point it may + be worth doing either this, or switching to floating point for + operators that involve divisions. + + Note that the conversion from 4 bits to 8 bits does not cause any + error in this case because both rounding and bit replication produces + an exact result when the number of from-bits divide the number of + to-bits. + + == a8r8g8b8 OVER r5g6b5 == + + When OVER compositing the a8r8g8b8 pixel 0x0f00c300 with the x14r6g6b6 + pixel 0x03c0, the true floating point value of the resulting green + channel is: + + 0xc3 / 255.0 + (1.0 - 0x0f / 255.0) * (0x0f / 63.0) = 0.9887955 + + but when compositing 8 bit values, where the 6-bit green channel is + converted to 8 bit through bit replication, the 8-bit result is: + + 0xc3 + ((255 - 0x0f) * 0x3c + 127) / 255 = 251 + + which corresponds to a real value of 0.984314. The difference from the + true value is 0.004482 which is bigger than the acceptable deviation + of 0.004. So, if we were to compute all the CONJOINT/DISJOINT + operators in floating point, or otherwise make them more accurate, the + acceptable deviation could be set at 0.0045. + + If we were doing the 6-bit conversion with rounding: + + (x / 63.0 * 255.0 + 0.5) + + instead of bit replication, the deviation in this particular case + would be only 0.0005, so we may want to consider this at some + point. + +commit f2ba7fe1d812a30004b734e398f45b586833d43f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jan 19 16:32:15 2013 -0500 + + test: Add new pixel-test regression test + + This test program contains a table of individual operator/pixel + combinations. For each pixel combination, images of various sizes are + filled with the pixels and then composited. The result is then + verified against the output of do_composite(). If the result doesn't + match, detailed error information is printed. + + The initial 14 pixel combinations currently all fail. + +commit 6781636740099633b9a8f7e0cc8e7828770f2fc3 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jan 21 15:02:53 2013 -0500 + + a1-trap-test: Add tests for operator_name and format_name() + + The check-formats.c test depends on the exact format of the strings + returned from these functions, so add a test here. + + a1-trap-test isn't the ideal place, but it seems like overkill to add + a new test just for these trivial checks. + +commit d1434d112ca5cd325e4fb85fc60afd1b9e902786 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jan 21 15:54:05 2013 -0500 + + test: Add new check-formats utility + + Given an operator and two formats, this program will composite and + check all pixels where the red and blue channels are 0. That is, if + the two formats are a8r8g8b8 and a4r4g4b4, all source pixels matching + the mask + + 0xff00ff00 + + are composited with the given operator against all destination pixels + matching the mask + + 0xf0f0 + + and the result is then verified against the do_composite() function + that was moved to utils.c earlier. + + This program reveals that a number of operators and format + combinations are not computed to within the precision currently + accepted by pixel_checker_t. For example: + + check-formats over a8r8g8b8 r5g6b5 | grep failed | wc -l + 30 + + reveals that there are 30 pixel combinations where OVER produces + insufficiently precise results for the a8r8g8b8 and r5g6b5 formats. + +commit 1820131fe6674d46b9876965b30b331d593124a8 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jan 22 07:36:19 2013 -0500 + + utils.[ch]: Add pixel_checker_get_masks() + + This function returns the a, r, g, and b masks corresponding to the + pixel checker's format. + +commit 5eb61f72ea50e02eb185c746108909945b589e65 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jan 22 11:57:53 2013 -0500 + + test/utils.[ch]: Add pixel_checker_convert_pixel_to_color() + + This function takes a pixel in the format corresponding to the pixel + checker, and converts to a color_t. + +commit 3ae717f71a31620a5cb28792b9effd0c69ffb822 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jan 19 12:14:24 2013 -0500 + + test: Move do_composite() function from composite.c to utils.c + + So that it can be used in other tests. + +commit 958bd334b3c17f529c80f2eeef4224f45c62f292 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jan 29 21:42:02 2013 -0500 + + Post-release version bump to 0.29.3 + +commit a56707e23bf2d3ef7c2ff9f66f214716791a424d +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jan 29 20:23:39 2013 -0500 + + Pre-release version bump to 0.29.2 + +commit 349015e1fc5d912ba4253133b90e751d0b6ca7f2 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jan 29 20:23:31 2013 -0500 + + stresstest: Ensure that the rasterizer is only given alpha formats + + In c2cb303d33ec11390b93cabd90f0f9, return_if_fail()s were added to + prevent the trapezoid rasterizers from being called with non-alpha + formats. However, stress-test actually does call the rasterizers with + non-alpha formats, but because _pixman_log_error() is disabled in + versions with an odd minor number, the errors never materialized. + + Fix this by changing the argument to random format to an enum of three + values DONT_CARE, PREFER_ALPHA, or REQUIRE_ALPHA, and then in the + switch that calls the trapezoid rasterizers, pass the appropriate + value for the function in question. + +commit afde862928da7ac927cf4b60a022fafe5f060d26 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Jan 27 20:08:06 2013 -0500 + + Change default GPGKEY to 3892336E, which is soren.sandmann@gmail.com + + The old one belongs to the email address sandmann@daimi.au.dk, which + doesn't work anyore. + + Also use gpg to get the name and address for the "(Signed by ...)" + line since that works more reliably for me than using git. + +commit 69a7a9b6b6dc5b769888c469de3435059318f7cc +Author: Ben Avison <bavison@riscosopen.org> +Date: Thu Jan 24 18:19:48 2013 +0000 + + Improve L1 and L2 benchmark tests for caches that don't use allocate-on-write + + In particular this affects single-core ARMs (e.g. ARM11, Cortex-A8), which + are usually configured this way. For other CPUs, this should only add a + constant time, which will be cancelled out by the EXCLUDE_OVERHEAD runs. + + The problems were caused by cachelines becoming permanently evicted from + the cache, because the code that was intended to pull them back in again on + each iteration assumed too long a cache line (for the L1 test) or failed to + read memory beyond the first pixel row (for the L2 test). Also, the reloading + of the source buffer was unnecessary. + + These issues were identified by Siarhei in this post: + http://lists.freedesktop.org/archives/pixman/2013-January/002543.html + +commit 1fa67f499d3826fad8783684bb90c8aadd9f682f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Jan 18 14:13:21 2013 -0500 + + pixman-combine-float.c: Use IS_ZERO() in clip_color() and set_sat() + + The clip_color() function has some checks to avoid division by zero, + but they are done by comparing the value to 4 * FLT_EPSILON, where a + better choice is the IS_ZERO() macro that compares to +/- FLT_MIN. + + In set_sat(), the check is that *max > *min before dividing by *max - + *min, but that has the potential problem that interactions between GCC + optimizions and 80 bit x87 registers could mean that (*max > *min) is + true in 80 bits, but (*max - *min) is 0 in 32 bits, so that the + division by zero is not prevented. Using IS_ZERO() here as well + prevents this. + +commit 7e53e5866458fe592fc109cb1455c21c4b61dee9 +Author: Ben Avison <bavison@riscosopen.org> +Date: Sat Jan 19 16:16:53 2013 +0000 + + ARMv6: Replacement add_8_8, over_8888_8888, over_8888_n_8888 and over_n_8_8888 routines + + Improved by adding preloads, combining writes and using the SEL + instruction. + + add_8_8 + + Before After + Mean StdDev Mean StdDev Confidence Change + L1 62.1 0.2 543.4 12.4 100.0% +774.9% + L2 38.7 0.4 116.8 1.7 100.0% +201.8% + M 40.0 0.1 110.1 0.5 100.0% +175.3% + HT 30.9 0.2 43.4 0.5 100.0% +40.4% + VT 30.6 0.3 39.2 0.5 100.0% +28.0% + R 21.3 0.2 35.4 0.4 100.0% +66.6% + RT 8.6 0.2 10.2 0.3 100.0% +19.4% + + over_8888_8888 + + Before After + Mean StdDev Mean StdDev Confidence Change + L1 32.3 0.1 38.0 0.2 100.0% +17.7% + L2 15.9 0.4 30.6 0.5 100.0% +92.8% + M 13.3 0.0 25.6 0.0 100.0% +92.9% + HT 10.5 0.1 15.5 0.1 100.0% +47.1% + VT 10.4 0.1 14.6 0.1 100.0% +40.8% + R 10.3 0.1 15.8 0.1 100.0% +53.3% + RT 6.0 0.1 7.6 0.1 100.0% +25.9% + + over_8888_n_8888 + + Before After + Mean StdDev Mean StdDev Confidence Change + L1 17.6 0.1 21.0 0.1 100.0% +19.2% + L2 11.2 0.2 19.2 0.1 100.0% +71.2% + M 10.2 0.0 19.6 0.0 100.0% +92.6% + HT 8.4 0.0 11.9 0.1 100.0% +41.7% + VT 8.3 0.0 11.3 0.1 100.0% +36.4% + R 8.3 0.0 11.8 0.1 100.0% +43.1% + RT 5.1 0.1 6.2 0.1 100.0% +21.3% + + over_n_8_8888 + + Before After + Mean StdDev Mean StdDev Confidence Change + L1 17.5 0.1 22.8 0.8 100.0% +30.1% + L2 14.2 0.3 21.7 0.2 100.0% +52.6% + M 12.0 0.0 22.3 0.0 100.0% +84.8% + HT 10.5 0.1 14.1 0.1 100.0% +34.5% + VT 10.0 0.1 13.5 0.1 100.0% +35.3% + R 9.4 0.0 12.9 0.2 100.0% +37.7% + RT 5.5 0.1 6.5 0.2 100.0% +19.2% + +commit f87dfd6f37a29c69320edd92f28aed5334b09366 +Author: Ben Avison <bavison@riscosopen.org> +Date: Sat Jan 19 16:16:52 2013 +0000 + + ARMv6: New conversion routines + + There was no previous attempt at accelerating these specifically for + ARMv6. + + src_x888_8888 + + Before After + Mean StdDev Mean StdDev Confidence Change + L1 96.7 0.5 270.4 2.6 100.0% +179.5% + L2 44.6 2.7 110.6 9.7 100.0% +148.0% + M 26.9 0.1 87.6 0.5 100.0% +226.1% + HT 19.3 0.2 37.5 0.4 100.0% +93.7% + VT 18.6 0.1 33.7 0.4 100.0% +81.6% + R 18.4 0.1 32.2 0.3 100.0% +75.2% + RT 9.2 0.2 12.1 0.3 100.0% +31.4% + + src_0565_8888 + + Before After + Mean StdDev Mean StdDev Confidence Change + L1 37.0 0.3 66.9 0.2 100.0% +80.8% + L2 30.3 0.2 55.9 0.3 100.0% +84.4% + M 25.9 0.0 62.3 0.2 100.0% +140.3% + HT 15.2 0.1 33.1 0.3 100.0% +116.9% + VT 15.1 0.1 30.7 0.3 100.0% +103.6% + R 14.2 0.1 27.6 0.3 100.0% +94.0% + RT 6.0 0.1 11.2 0.3 100.0% +87.2% + +commit a0f59f3b2884b056428229363576666f158a9bb4 +Author: Ben Avison <bavison@riscosopen.org> +Date: Sat Jan 19 16:16:51 2013 +0000 + + ARMv6: New blit routines + + These are usable either as various composite operations, or via the + top-level function pixman_blt() which now does some blitting for the + first time on an ARMv6 platform (previously it just returned FALSE). + + src_8888_8888 + + Before After + Mean StdDev Mean StdDev Confidence Change + L1 414.5 9.4 445.8 3.6 100.0% +7.6% + L2 93.3 20.7 114.5 12.9 100.0% +22.7% + M 57.0 0.2 89.2 0.5 100.0% +56.4% + HT 28.7 0.3 39.6 0.4 100.0% +37.9% + VT 25.5 0.2 35.3 0.4 100.0% +38.4% + R 20.1 0.1 33.8 0.3 100.0% +67.8% + RT 7.8 0.2 12.7 0.4 100.0% +62.7% + + src_0565_0565 + + Before After + Mean StdDev Mean StdDev Confidence Change + L1 397.4 6.1 412.5 5.2 100.0% +3.8% + L2 143.2 10.9 141.9 6.5 68.9% -0.9% (insignificant) + M 90.7 0.4 133.5 0.7 100.0% +47.1% + HT 38.6 0.3 53.7 0.7 100.0% +39.0% + VT 33.0 0.3 47.3 0.6 100.0% +43.3% + R 25.7 0.2 42.1 0.5 100.0% +64.1% + RT 8.0 0.2 13.3 0.3 100.0% +65.6% + + src_8_8 + + Before After + Mean StdDev Mean StdDev Confidence Change + L1 716.5 9.8 768.2 20.4 100.0% +7.2% + L2 246.2 12.7 260.5 8.8 100.0% +5.8% + M 146.8 0.7 227.9 0.7 100.0% +55.2% + HT 44.9 0.6 62.1 1.0 100.0% +38.2% + VT 35.6 0.4 53.4 0.7 100.0% +50.0% + R 29.7 0.3 48.2 0.6 100.0% +62.2% + RT 8.6 0.2 12.9 0.4 100.0% +49.3% + +commit 3cff56c5b091d2e584503e7887414e224876de37 +Author: Ben Avison <bavison@riscosopen.org> +Date: Sat Jan 19 16:16:50 2013 +0000 + + ARMv6: New fill routines + + Note that this also effectively accelerates src_n_8888, src_n_0565 and + src_n_8 composite types, because of the fast paths in + pixman-fast-path.c implemented by fast_composite_solid_fill(), which + end up dispatching these platform-specific fill routines. + + src_n_8888 + + Before After + Mean StdDev Mean StdDev Confidence Change + L1 157.3 1.1 574.2 8.7 100.0% +265.0% + L2 94.2 0.5 364.8 4.2 100.0% +287.3% + M 92.7 0.4 358.7 1.1 100.0% +287.1% + HT 68.5 0.9 133.6 4.0 100.0% +95.2% + VT 61.3 0.8 111.8 2.6 100.0% +82.4% + R 61.1 0.9 108.7 2.8 100.0% +78.1% + RT 24.6 1.0 28.6 1.6 100.0% +16.0% + + src_n_0565 + + Before After + Mean StdDev Mean StdDev Confidence Change + L1 157.4 1.0 983.1 38.5 100.0% +524.6% + L2 93.6 0.5 696.0 14.3 100.0% +643.4% + M 92.7 0.4 680.5 1.0 100.0% +634.0% + HT 68.3 0.9 160.3 6.6 100.0% +134.6% + VT 61.1 0.8 130.1 3.4 100.0% +112.9% + R 61.0 0.8 125.4 4.1 100.0% +105.7% + RT 24.9 1.3 29.5 1.5 100.0% +18.2% + + src_n_8 + + Before After + Mean StdDev Mean StdDev Confidence Change + L1 154.7 1.0 1324.4 48.5 100.0% +756.3% + L2 92.4 0.4 1178.4 10.9 100.0% +1175.6% + M 92.9 0.4 1275.7 2.1 100.0% +1273.5% + HT 68.2 1.0 169.8 5.5 100.0% +149.0% + VT 61.2 1.0 138.5 3.6 100.0% +126.3% + R 61.3 0.9 130.1 3.8 100.0% +112.4% + RT 25.5 1.3 29.2 1.9 100.0% +14.6% + +commit 2e173326aaf232d84ed71faf3517bd7989680e27 +Author: Ben Avison <bavison@riscosopen.org> +Date: Mon Jan 28 17:03:50 2013 +0000 + + ARMv6: Lay the groundwork for later patches in the series + + Move the entire contents of pixman-arm-simd-asm.S to a new file; + ultimately this will only retain the scaled operations, so it is + named pixman-arm-simd-asm-scaled.S. Added new header file + pixman-arm-simd-asm.h, containing the macros which are the basis of + all the new ARMv6 implementations, although at this point in the + series, nothing uses them and the library should be binary-identical. + +commit 65fc1adb6545737058e938105ae948a3607c277c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jan 26 00:34:53 2013 -0500 + + demo/scale: Add a spin button to set the number of subsample bits + + For large upscalings the level of subsampling for the filter has a + quite visible effect, so make it settable in the UI so that people can + experiment with various values. + +commit ed39992564beefe6b12f81e842caba11aff98a9c +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Sat Dec 15 07:18:53 2012 +0200 + + Use pixman_transform_point_31_16() from pixman_transform_point() + + Old functions pixman_transform_point() and pixman_transform_point_3d() + now become just wrappers for pixman_transform_point_31_16() and + pixman_transform_point_31_16_3d(). Eventually their uses should be + completely eliminated in the pixman code and replaced with their + extended range counterparts. This is needed in order to be able + to correctly handle any matrices and parameters that may come + to pixman from the code responsible for XRender implementation. + +commit 5a78d74ccccba2aeb473f04ade44512d2f6c0613 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Sat Dec 15 06:19:21 2012 +0200 + + test: Added matrix-test for testing projective transform accuracy + + This test uses __float128 data type when it is available + for implementing a "perfect" reference implementation. The + output from from pixman_transform_point_31_16() and + pixman_transform_point_31_16_affine() is compared with the + reference implementation to make sure that the rounding + errors may only show up in a single least significant bit. + + The platforms and compilers, which do not support __float128 + data type, can rely on crc32 checksum for the pseudorandom + transform results. + +commit 09600ae7e34eb777471c931cd4c3a8cdbda6e84a +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Wed Dec 12 02:41:55 2012 +0200 + + configure.ac: Added detection for __float128 support + + GCC supports 128-bit floating point data type on some platforms (including + but not limited to x86 and x86-64). This may be useful for tests, which + need prefectly accurate reference implementations of certain algorithms. + +commit c3deb8334a71998b986a7b8d5b74bedf26cc23aa +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Fri Dec 14 18:43:57 2012 +0200 + + Add higher precision "pixman_transform_point_*" functions + + The following new functions are added: + + pixman_transform_point_31_16_3d() - + Calculates the product of a matrix and a vector multiplication. + + pixman_transform_point_31_16() - + Calculates the product of a matrix and a vector multiplication. + Then converts the homogenous resulting vector [x, y, z] to + cartesian [x', y', 1] variant, where x' = x / z, and y' = y / z. + + pixman_transform_point_31_16_affine() - + A faster sibling of the other two functions, which assumes affine + transformation, where the bottom row of the matrix is [0, 0, 1] and + the last element of the input vector is set to 1. + + These functions transform a point with 31.16 fixed point coordinates from + the destination space to a point with 48.16 fixed point coordinates in + the source space. + + The results are accurate and the rounding errors may only show up in + the least significant bit. No overflows are possible for the affine + transformations as long as the input data is provided in 31.16 format. + In the case of projective transformations, some output values may be not + representable using 48.16 fixed point format. In this case the results + are clamped to return maximum or minimum 48.16 values (so that the caller + can at least handle NONE and PAD repeats correctly). + +commit a47ed2c31180e6c3b332747a1721731e0649b10f +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Mon Dec 3 17:42:21 2012 +0200 + + Faster fetch for the C variant of r5g6b5 src/dest iterator + + Processing two pixels at once is used to reduce the number of + arithmetic operations. + + The speedup relative to the generic fetch_scanline_r5g6b5() from + "pixman-access.c" (pixman was compiled with gcc 4.7.2): + + MIPS 74K 480MHz : 20.32 MPix/s -> 26.47 MPix/s + ARM11 700MHz : 34.95 MPix/s -> 38.22 MPix/s + ARM Cortex-A8 1000MHz : 87.44 MPix/s -> 100.92 MPix/s + ARM Cortex-A9 1700MHz : 150.95 MPix/s -> 158.13 MPix/s + ARM Cortex-A15 1700MHz : 148.91 MPix/s -> 155.42 MPix/s + IBM Cell PPU 3200MHz : 75.29 MPix/s -> 98.33 MPix/s + Intel Core i7 2800MHz : 257.02 MPix/s -> 376.93 MPix/s + + That's the performance for C code (SIMD and assembly optimizations + are disabled via PIXMAN_DISABLE environment variable). + +commit e66fd5ccb6b69dfa1acde36220dc3c3c44026890 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Mon Dec 3 17:07:31 2012 +0200 + + Faster write-back for the C variant of r5g6b5 dest iterator + + Unrolling loops improves performance, so just use it here. + + Also GCC can't properly optimize this code for RISC processors and + allocate 0x1F001F constant in a register. Because this constant is + too large to be represented as an immediate operand in instructions, + GCC inserts some redundant arithmetics. This problem can be workarounded + by explicitly using a variable for 0x1F001F constant and also initializing + it by a read from another volatile variable. In this case GCC is forced + to allocate a register for it, because it is not seen as a constant anymore. + + The speedup relative to the generic store_scanline_r5g6b5() from + "pixman-access.c" (pixman was compiled with gcc 4.7.2): + + MIPS 74K 480MHz : 33.22 MPix/s -> 43.42 MPix/s + ARM11 700MHz : 50.16 MPix/s -> 78.23 MPix/s + ARM Cortex-A8 1000MHz : 117.75 MPix/s -> 196.34 MPix/s + ARM Cortex-A9 1700MHz : 177.04 MPix/s -> 320.32 MPix/s + ARM Cortex-A15 1700MHz : 231.44 MPix/s -> 261.64 MPix/s + IBM Cell PPU 3200MHz : 130.25 MPix/s -> 145.61 MPix/s + Intel Core i7 2800MHz : 502.21 MPix/s -> 721.73 MPix/s + + That's the performance for C code (SIMD and assembly optimizations + are disabled via PIXMAN_DISABLE environment variable). + +commit a9f66694163da9e8e41a69497acbadd630e0cb51 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Mon Dec 3 06:32:46 2012 +0200 + + Added C variants of r5g6b5 fetch/write-back iterators + + Adding specialized iterators for r5g6b5 color format allows us to work + on fine tuning performance of r5g6b5 fetch/write-back operations in the + pixman general "fetch -> combine -> store" pipeline. + + These iterators also make "src_x888_0565" fast path redundant, so it can + be removed. + +commit 794033ed43ed74ad66075a4d0c83fd36565da876 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Wed Jan 23 10:27:22 2013 +0000 + + Eliminate duplicate copies of channel flags for pixman_image_composite32() + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit a59f081df45ec5c15b295bb31b22dbe787e2f2b1 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Jan 12 16:52:47 2013 +0000 + + Always return a valid function from lookup_combiner() + + We should always have at least a C combiner available, so we never + expect the search to fail. If it does, emit an error and return a + dummy function. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 520230914bbb56473b872f2ef7dc59092f426415 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Jan 12 08:28:32 2013 +0000 + + Always return a valid function from lookup_composite() + + We never expect to fail to find the appropriate function as the + general_composite_rect should always match. So if somehow we fallthrough + the search, emit a _pixman_log_error() and return a dummy function. + + Note that we remove some conditionals and a level of indentation hence a + large amount of code movement. This also reveals that in a few places we + are duplicating stack variables that can be eliminated later. + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b283c864a3de039f9213adaf402c6597db12d0c4 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 8 18:39:03 2013 +0000 + + sse2: Add fast paths for bilinear source with a solid mask + + Based on the existing sse2_8888_n_8888 nearest scaling routines. + + fishbowl on an i5-2500: 60.9s -> 56.9s + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit d00ce4091215e8a648c6f1912829b35c02b06add +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 1 19:41:54 2013 +0000 + + sse2: Add a fast path for add_n_8_8888 + + This path is being exercised by compositing of trapezoids for clipmasks, for + instance as used in the firefox-asteroids cairo-trace. + + IVB i7-3720qm ./tests/lowlevel-blt-bench add_n_8_8888: + + reference memcpy speed = 14846.7MB/s (3711.7MP/s for 32bpp fills) + + before: L1: 681.10 L2: 735.14 M:701.44 ( 28.35%) HT:283.32 VT:213.23 R:208.93 RT: 77.89 ( 793Kops/s) + + after: L1: 992.91 L2:1017.33 M:982.58 ( 39.88%) HT:458.93 VT:332.32 R:326.13 RT:136.66 (1287Kops/s) + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 7ced3beec99e9965717f76cc822d0702383a1fce +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Jan 1 19:41:54 2013 +0000 + + sse2: Add a fast path for add_n_8888 + + This path is being exercised by inplace compositing of trapezoids, for + instance as used in the firefox-asteroids cairo-trace. + + IVB i3-3720qm ./tests/lowlevel-blt-bench add_n_888: + + reference memcpy speed = 14918.3MB/s (3729.6MP/s for 32bpp fills) + + before: L1:1752.44 L2:2259.48 M:2215.73 ( 58.80%) HT:589.49 VT:404.04 R:424.69 RT:134.68 (1182Kops/s) + + after: L1:3931.21 L2:6132.78 M:3440.17 ( 92.24%) HT:1337.70 VT:1357.64 R:1270.27 RT:359.78 (2161Kops/s) + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit b7f523e3bcbef1f08bf9b374f2704723d5298c1f +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Thu Jan 24 14:49:41 2013 -0500 + + Add a version of bilinear_interpolation for precision <=4 + + Having 4 or fewer bits means we can do two components at + a time in a single 32 bit register. + + Here are the results for firefox-fishtank on a Pandaboard with + 4.6.3 and PIXMAN_DISABLE="arm-neon" + + Before: + [ # ] backend test min(s) median(s) stddev. count + [ 0] image t-firefox-fishtank 7.841 7.910 0.70% 6/6 + + After: + [ # ] backend test min(s) median(s) stddev. count + [ 0] image t-firefox-fishtank 6.951 6.995 1.11% 6/6 + +commit 24e83cae64eaa238a7bf67488917b0f8cac89114 +Author: Ben Avison <bavison@riscosopen.org> +Date: Sat Jan 19 16:36:22 2013 +0000 + + Tweaks to lowlevel-blt-bench + + This adds two extra tests, src_n_8 and src_8_8, which I have been + using to benchmark my ARMv6 changes. + + I'd also like to propose that it requires an exact test name as the + executable's argument, as achieved by this strstr to strcmp change. + Without this, it is impossible to only benchmark (for example) + add_8_8, add_n_8 or src_n_8, due to those also being substrings of + many other test names. + +commit b527a0e615a726aa6a7d18f0ea0b38564b153afa +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jan 19 12:29:48 2013 -0500 + + test: Use operator_name() and format_name() in composite.c + + With the operator_name() and format_name() functions there is no + longer any reason for composite.c to have its own table of format and + operator names. + +commit 4eb9a24abae6cee7562c3ec8965dc4eaaba0e8ab +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jan 19 09:36:50 2013 -0500 + + utils.[ch]: Add new format_name() function + + This function returns the name of the given format code, which is + useful for printing out debug information. The function is written as + a switch without a default value so that the compiler will warn if new + formats are added in the future. The fake formats used in the fast + path tables are also recognized. + + The function is used in alpha_map.c, where it replaces an existing + format_name() function, and in blitters-test.c, affine-test.c, and + scaling-test.c. + +commit 1676b4938912bd140791c347aa4d08db255dd60f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jan 19 08:55:27 2013 -0500 + + test/utils.[ch]: Add new function operator_name() + + This function returns the name of the given operator, which is useful + for printing out debug information. The function is done as a switch + without a default value so that the compiler will warn if new + operators are added in the future. + + The function is used in affine-test.c, scaling-test.c, and + blitters-test.c. + +commit 8d85311143b0bc30d3490c0ca2ddbe927a1f9ac8 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jan 12 08:03:35 2013 -0500 + + README: Add guidelines on how to contribute patches + + Ben Avison pointed out here: + + http://lists.freedesktop.org/archives/pixman/2013-January/002485.html + + that there isn't really any documentation about how to submit patches + to pixman. This patch adds some information to the README file. + + v2: Incorporate some comments from Ben Avison + v3: Change gitweb URL to cgit + +commit 61dacffaf47e6b631a2c67230f8f111038d1de09 +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Jan 18 16:53:32 2013 -0800 + + Convert INCLUDES to AM_CPPFLAGS + + INCLUDES has been deprecated starting with automake 1.13. Convert all + occurrences with the recommended AM_CPPFLAGS replacement. + +commit c7c28f440db083d69ca930b44fc6280bb558e098 +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Jan 18 16:49:00 2013 -0800 + + Add new demos and tests to .gitignore + +commit 2c6577476e5b18e17904ae8af244a39c352e2e33 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Tue Jan 22 03:01:05 2013 +0100 + + MIPS: DSPr2: Added more fast-paths: + - over_reverse_n_8888 + - in_n_8_8 + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + over_reverse_n_8888 = L1: 19.42 L2: 19.07 M: 15.38 ( 40.80%) HT: 13.35 VT: 13.10 R: 12.92 RT: 8.27 ( 49Kops/s) + in_n_8_8 = L1: 21.20 L2: 22.86 M: 21.42 ( 14.21%) HT: 15.97 VT: 15.69 R: 15.47 RT: 8.00 ( 48Kops/s) + + Optimized: + over_reverse_n_8888 = L1: 60.09 L2: 47.87 M: 28.65 ( 76.02%) HT: 23.58 VT: 22.51 R: 21.99 RT: 12.28 ( 60Kops/s) + in_n_8_8 = L1: 89.38 L2: 86.07 M: 65.48 ( 43.44%) HT: 44.64 VT: 41.50 R: 40.77 RT: 16.94 ( 66Kops/s) + +commit a67b0e24d7eaba3b9525eeb8bf357ded95cc6b7c +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Tue Jan 22 02:59:44 2013 +0100 + + MIPS: DSPr2: Added more fast-paths for REVERSE operation: + - out_reverse_8_0565 + - out_reverse_8_8888 + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + out_reverse_8_0565 = L1: 14.29 L2: 13.58 M: 12.14 ( 24.16%) HT: 9.23 VT: 9.12 R: 8.84 RT: 4.75 ( 36Kops/s) + out_reverse_8_8888 = L1: 27.46 L2: 23.24 M: 17.41 ( 57.73%) HT: 12.61 VT: 12.47 R: 11.79 RT: 5.86 ( 41Kops/s) + + Optimized: + out_reverse_8_0565 = L1: 28.24 L2: 25.64 M: 20.63 ( 41.05%) HT: 16.69 VT: 16.14 R: 15.50 RT: 8.69 ( 52Kops/s) + out_reverse_8_8888 = L1: 52.78 L2: 41.44 M: 23.50 ( 77.94%) HT: 18.79 VT: 18.16 R: 16.90 RT: 9.11 ( 53Kops/s) + +commit 35cc965514ca6e665c18411fcf66db826d559c2a +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Dec 20 11:28:25 2012 -0500 + + pixman-filter.c: Cope with NULL returns from malloc() + + v2: Don't return a pointer to uninitialized memory when the allocation + of horz and vert fails, but allocation of params doesn't. + +commit 58526cfc7290a740f61e288f09fe721c4e6511bd +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Aug 26 22:06:27 2012 -0400 + + Handle solid images in the noop iterator + + The noop src iterator already has code to handle solid images, but + that code never actually runs currently because it is not possible for + an image to have both a format code of PIXMAN_solid and a flag of + FAST_PATH_BITS_IMAGE. + + If these two were to be set at the same time, the + fast_composite_tiled_repeat() fast path would trigger for solid images + (because it triggers for PIXMAN_any formats, which includes + PIXMAN_solid), but for solid images we can usually do better than that + fast path. + + So this patch removes _pixman_solid_fill_iter_init() and instead + handles such images (along with repeating 1x1 bits images without an + alpha map) in pixman-noop.c. + + When a 1x1R image is involved in the general composite path, before + this patch, it would hit this code in repeat() in pixman-inlines.h: + + while (*c >= size) + *c -= size; + while (*c < 0) + *c += size; + + and those loops could run for a huge number of iteratons (proportional + to the composite width). For such cases, the performance improvement + is really big: + + ./test/lowlevel-blt-bench -n add_n_8888: + + Before: + + add_n_8888 = L1: 3.86 L2: 3.78 M: 1.40 ( 0.06%) HT: 1.43 VT: 1.41 R: 1.41 RT: 1.38 ( 19Kops/s) + + After: + + add_n_8888 = L1:1236.86 L2:2468.49 M:1097.88 ( 49.04%) HT:476.49 VT:429.05 R:417.04 RT:155.12 ( 817Kops/s) + +commit 480dd38fd190fb7ca4ff172a31a4a6ef2944f20c +Author: Marko Lindqvist <cazfi74@gmail.com> +Date: Thu Jan 3 06:38:01 2013 +0200 + + Fix build with automake-1.13 + + Automake-1.13 has removed long obsolete AM_CONFIG_HEADER macro ( + http://lists.gnu.org/archive/html/automake/2012-12/msg00038.html ) + and autoreconf errors out upon seeing it. + + Attached patch replaces obsolete AM_CONFIG_HEADER with now proper + AC_CONFIG_HEADERS. + +commit 1abde88ae60ae0877073d85cbf5b39013337f5da +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Thu Dec 20 05:14:39 2012 +0200 + + Use more appropriate types and remove a magic constant + +commit c1fd5a42439b21872170979d8c400cbb374e1f9d +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Thu Dec 20 05:00:46 2012 +0200 + + Define SIZE_MAX if it is not provided by the standard C headers + + C++ compilers do not define SIZE_MAX. It is also not available + if the code is compiled by some C compilers: + http://lists.freedesktop.org/archives/pixman/2012-August/002196.html + +commit 66c429282282176cdb5913b7396116c28725363e +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Sun Dec 16 04:03:58 2012 +0200 + + Rename 'xor' variable to 'filler' (because 'xor' is a C++ keyword) + +commit 4dfda2adfe2eb1130fc27b1da35df778284afd91 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 14 21:53:34 2012 -0500 + + float-combiner.c: Change tests for x == 0.0 tests to - FLT_MIN < x < FLT_MIN + + pixman-float-combiner.c currently uses checks like these: + + if (x == 0.0f) + ... + else + ... / x; + + to prevent division by 0. In theory this is correct: a division-by-zero + exception is only supposed to happen when the floating point numerator is + exactly equal to a positive or negative zero. + + However, in practice, the combination of x87 and gcc optimizations + causes issues. The x87 registers are 80 bits wide, which means the + initial test: + + if (x == 0.0f) + + may be false when x is an 80 bit floating point number, but when x is + rounded to a 32 bit single precision number, it becomes equal to + 0.0. In principle, gcc should compensate for this quirk of x87, and + there are some options such as -ffloat-store, -fexcess-precision=standard, + and -std=c99 that will make it do so, but these all have a performance + cost. It is also possible to set the FPU to a mode that makes it do + all computation with single or double precision, but that would + require pixman to save the existing mode before doing anything with + floating point and restore it afterwards. + + Instead, this patch side-steps the issue by replacing exact checks for + equality with zero with a new macro that checkes whether the value is + between -FLT_MIN and FLT_MIN. + + There is extensive reading material about this issue linked off the + infamous gcc bug 323: + + http://gcc.gnu.org/bugzilla/show_bug.cgi?id=323 + +commit 2734071d7bee699401dc8c98d5c2ef0e2dbb0c91 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Thu Dec 6 17:13:16 2012 +0200 + + ARM: make use of UQADD8 instruction even in generic C code paths + + ARMv6 has UQADD8 instruction, which implements unsigned saturated + addition for 8-bit values packed in 32-bit registers. It is very useful + for UN8x4_ADD_UN8x4, UN8_rb_ADD_UN8_rb and ADD_UN8 macros (which would + otherwise need a lot of arithmetic operations to simulate this operation). + Since most of the major ARM linux distros are built for ARMv7, we are + much less dependent on runtime CPU detection and can get practical + benefits from conditional compilation here for a lot of users. + + The results of cairo-perf-trace benchmark on ARM Cortex-A15 with pixman + compiled by gcc 4.7.2 and PIXMAN_DISABLE set to "arm-simd arm-neon": + + Speedups + ======== + image firefox-talos-gfx (29938.22 0.12%) -> (27814.76 0.51%) : 1.08x speedup + image firefox-asteroids (23241.11 0.07%) -> (21795.19 0.07%) : 1.07x speedup + image firefox-canvas-alpha (174519.85 0.08%) -> (164788.64 0.20%) : 1.06x speedup + image poppler (9464.46 1.61%) -> (8991.53 0.14%) : 1.05x speedup + +commit f9a41703b2d46c988b9e4e378d27396f718006ae +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Mon Dec 3 03:01:21 2012 +0200 + + Faster conversion from a8r8g8b8 to r5g6b5 in C code + + This change reduces 3 shifts, 3 ANDs and 2 ORs (total 8 arithmetic + operations) to 3 shifts, 2 ANDs and 2 ORs (total 7 arithmetic + operations). + + We get garbage in the high 16 bits of the result, which might need + to be cleared when casting to uint16_t (it would bring us back to + total 8 arithmetic operations). However in the case if the result + of a8r8g8b8->r5g6b5 conversion is immediately stored to memory, no + extra instructions for clearing these garbage bits are needed. + + This allows the a8r8g8b8->r5g6b5 conversion code to be compiled + into 4 instructions for ARM instead of 5 (assuming a good optimizing + compiler), which has no pipeline stalls on ARM11 as an additional + bonus. + + The change in benchmark results for 'lowlevel-blt-bench src_8888_0565' + with PIXMAN_DISABLE="arm-simd arm-neon mips-dspr2 mmx sse2" and pixman + compiled by gcc-4.7.2: + + MIPS 74K 480MHz : 40.44 MPix/s -> 40.13 MPix/s + ARM11 700MHz : 50.28 MPix/s -> 62.85 MPix/s + ARM Cortex-A8 1000MHz : 124.38 MPix/s -> 141.85 MPix/s + ARM Cortex-A15 1700MHz : 281.07 MPix/s -> 303.29 MPix/s + Intel Core i7 2800MHz : 515.92 MPix/s -> 531.16 MPix/s + + The same trick was used in xomap (X server for Nokia N800/N810): + http://repository.maemo.org/pool/diablo/free/x/xorg-server/ + xorg-server_1.3.99.0~git20070321-0osso20083801.tar.gz + +commit 3922e90c400fca3ac43dc77b8dd0c0591e7e4fbc +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Mon Dec 3 02:50:20 2012 +0200 + + Change CONVERT_XXXX_TO_YYYY macros into inline functions + + It is easier and safer to modify their code in the case if the + calculations need some temporary variables. And the temporary + variables will be needed soon. + +commit e4519360c15772ac51038b9f86e3f730f06cfb65 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Mon Dec 3 05:44:36 2012 +0200 + + test: add "src_0565_8888" to lowlevel-blt-bench + +commit 6a6c8c51ed9e7272e624b3c99187ddf71d19a0fd +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Dec 13 15:37:40 2012 -0500 + + pixman_composite_trapezoids(): Check for NULL return from create_bits() + + A check is needed that the creation of the temporary image in + pixman_composite_trapezoids() succeeds. + + Fixes crash in stress-test -s 0x313c on my system. + +commit c2cb303d33ec11390b93cabd90f0f95bc9264113 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Dec 13 15:26:17 2012 -0500 + + pixman_composite_trapezoids: Return early if mask_format is not of TYPE_ALPHA + + stress-test -s 0x17ee crashes because pixman_composite_trapezoids() is + given a mask_format of PIXMAN_c8, which causes it to create a + temporary image with that format but without a palette. This causes + crashes later. + + The only mask_format that we actually support are those of TYPE_ALPHA, + so this patch add a return_if_fail() to ensure this. + + Similarly, although currently it won't crash if given an invalid + format, alpha-only formats have always been the only thing that made + sense for the pixman_rasterize_edges() functions, so add a + return_if_fail() ensuring that the destination format is of type + PIXMAN_TYPE_ALPHA. + +commit 1f0c02811ea71b36380b9d4029a248659bd9af50 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Dec 13 11:21:16 2012 -0500 + + Add testing of trapezoids to stress-test + + The entry points add_trapezoids(), rasterize_trapezoid() and + composite_trapezoid() are exercised with random trapezoids. + + This uncovers crashes with stress-test seeds 0x17ee and 0x313c. + +commit 526dc06e5694172abf979c03a5cf530207fe2d27 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Dec 8 06:06:34 2012 -0500 + + demos/radial-test: Add checkerboard to display the alpha channel + +commit 6402b2aa0c2215a5add233b3c1bc2ae634d43aaf +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Dec 8 06:46:38 2012 -0500 + + demos/conical-test: Use the draw_checkerboard() utility function + + Instead of having its own copy. + +commit e382e52d675a4ae86ed94ab1124ea7d98c3db75a +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Dec 8 06:44:24 2012 -0500 + + test/utils.[ch]: Add utility function to draw a checkerboard + + This is useful in demo programs to display the alpha channel. + +commit b0a6504122ba4f585fb60626ec71bf613fc64fae +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 7 19:51:19 2012 -0500 + + radial: When comparing t to mindr, use >= rather than > + + Radial gradients are conceptually rendered as a sequence of circles + generated by linearly extrapolating from the two circles given by the + gradient specification. Any circles in that sequence that would end up + with a negative radius are not drawn, a condition that is enforced by + checking that t * dr is bigger than mindr: + + if (t * dr > mindr) + + However, it is legitimate for a circle to have radius exactly 0, so + the test should use >= rather than >. + + This gets rid of the dots in demos/radial-test except for when the c2 + circle has radius 0 and a repeat mode of either NONE or NORMAL. Both + those dots correspond to a t value of 1.0, which is outside the + defined interval of [0.0, 1.0) and therefore subject to the repeat + algorithm. As a result, in the NONE case, a value of 1.0 turns into + transparent black. In the NORMAL case, 1.0 wraps around and becomes + 0.0 which is red, unlike 0.99 which is blue. + + Cc: ranma42@gmail.com + +commit 54aca22058e8f4daf999b37e5c5e6ddd8e67f811 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 7 19:43:53 2012 -0500 + + demos/radial-test: Add zero-radius circles to demonstrate rendering bugs + + Add two new gradient columns, one where the start circle is has radius + 0 and one where the end circle has radius 0. All the new gradients + except for one are rendered with a bright dot in the middle. In most + but not all cases this is incorrect. + + Cc: ranma42@gmail.com + +commit fdab3c1b6cd9c5e197ec3f6bc0a03da32880e317 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Sat Dec 8 15:16:51 2012 +0200 + + test: Workaround unaligned MOVDQA bug (http://gcc.gnu.org/PR55614) + + Just use SSE2 intrinsics to do unaligned memory accesses as + a workaround for this gcc bug related to vector extensions. + +commit 2bc59006d7fe91abf68a2061ad86c06e1b2964ab +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Fri Nov 30 12:00:47 2012 +0200 + + Improve performance of combine_over_u + + The generic C over_u combiner can be a lot faster with the + addition of special shortcuts for 0xFF and 0x00 alpha/mask + values. This is already implemented in C and SSE2 fast paths. + + Profiling the run of cairo-perf-trace benchmarks with PIXMAN_DISABLE + environment variable set to "fast mmx sse2" on Intel Core i7: + + === before === + + 37.32% cairo-perf-trac libpixman-1.so.0.29.1 [.] combine_over_u + 21.37% cairo-perf-trac libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_no_repeat_8888 + 13.51% cairo-perf-trac libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_affine_none_a8r8g8b8 + 2.96% cairo-perf-trac libpixman-1.so.0.29.1 [.] radial_compute_color + 2.74% cairo-perf-trac libpixman-1.so.0.29.1 [.] fetch_scanline_a8 + 2.71% cairo-perf-trac libpixman-1.so.0.29.1 [.] fetch_scanline_x8r8g8b8 + 2.17% cairo-perf-trac libpixman-1.so.0.29.1 [.] _pixman_gradient_walker_pixel + 1.86% cairo-perf-trac libcairo.so.2.11200.0 [.] _cairo_tor_scan_converter_generate + 1.57% cairo-perf-trac libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_affine_pad_a8r8g8b8 + 0.97% cairo-perf-trac libpixman-1.so.0.29.1 [.] combine_in_reverse_u + 0.96% cairo-perf-trac libpixman-1.so.0.29.1 [.] combine_over_ca + + === after === + + 28.79% cairo-perf-trac libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_no_repeat_8888 + 18.44% cairo-perf-trac libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_affine_none_a8r8g8b8 + 15.54% cairo-perf-trac libpixman-1.so.0.29.1 [.] combine_over_u + 3.94% cairo-perf-trac libpixman-1.so.0.29.1 [.] radial_compute_color + 3.69% cairo-perf-trac libpixman-1.so.0.29.1 [.] fetch_scanline_a8 + 3.69% cairo-perf-trac libpixman-1.so.0.29.1 [.] fetch_scanline_x8r8g8b8 + 2.94% cairo-perf-trac libpixman-1.so.0.29.1 [.] _pixman_gradient_walker_pixel + 2.52% cairo-perf-trac libcairo.so.2.11200.0 [.] _cairo_tor_scan_converter_generate + 2.08% cairo-perf-trac libpixman-1.so.0.29.1 [.] bits_image_fetch_bilinear_affine_pad_a8r8g8b8 + 1.31% cairo-perf-trac libpixman-1.so.0.29.1 [.] combine_in_reverse_u + 1.29% cairo-perf-trac libpixman-1.so.0.29.1 [.] combine_over_ca + +commit 8ca4e144724ba2041bc5ef077ccf6d24e7cf4d1f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Nov 26 14:27:34 2012 -0500 + + Add fast paths for separable convolution + + Similar to the fast paths for general affine access, add some fast + paths for the separable filter for all combinations of formats + x8r8g8b8, a8r8g8b8, r5g6b5, a8 with the four repeat modes. + + It is easy to see the speedup in the demos/scale program. + +commit 4f18ba30cea56331e30992242201b20954c8f7f2 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Dec 4 13:17:49 2012 -0500 + + Add demo program for conical gradients + + This new test is derived from radial-test.c and displays conical + gradients at various angles. + + It also demonstrates how PIXMAN_REPEAT_NORMAL is supposed to work when + used with a gradient specification where the first stop is not a 0.0: + In this case the gradient is supposed to have a smooth transition from + the last stop back to the first stop with no sharp transitions. It + also shows that the repeat mode is not ignored for conical gradients + as one might be tempted to think. + +commit 3a98787bddeb007a1cd2b86235205774c15250f2 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Nov 12 12:27:39 2012 -0500 + + Add demos/zone_plate.png + + The zone plate image is a useful test case for image scalers because + it contains all representable frequencies, so any imperfection in + resampling filters will show up as Moire patterns. + + This version is symmetric around the midpoint of the image, so since + rotating it is supposed to be a noop, it can also be used to verify + that the resampling filters don't shift the image. + + V2: Run the file through OptiPNG to cut the size in half, as suggested + by Siarhei. + +commit 97491ed26cfd4bad9cceffa789bfcbef77421d38 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Nov 22 10:18:26 2012 -0500 + + demos: Add new demo program, "scale" + + This program allows interactively scaling and rotating images with + using various filters and repeat modes. It uses + pixman_filter_create_separate_convolution() to generate the filters. + +commit 7f5bb22d17f17c2032914163a318f4ec438ba280 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Nov 22 10:16:16 2012 -0500 + + demos/gtk-utils.[ch]: Add pixman_image_from_file() + + This function uses GdkPixbuf to load various common formats such as + .png and .jpg into a pixman image. + +commit 6915f3e24f4169260a8ad6ab7ff3087388dbe5db +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Nov 22 10:15:06 2012 -0500 + + Add new pixman_filter_create_separable_convolution() API + + This new API is a helper function to create filter parameters suitable + for use with PIXMAN_FILTER_SEPARABLE_CONVOLUTION. + + For each dimension, given a scale factor, reconstruction and sample + filter kernels, and a subsampling resolution, this function will + compute a convolution of the two kernels scaled appropriately, then + sample that convolution and return the resulting vectors in a form + suitable for being used as parameters to + PIXMAN_FILTER_SEPARABLE_CONVOLUTION. + + The filter kernels offered are the following: + + - IMPULSE: Dirac delta function, ie., point sampling + - BOX: Box filter + - LINEAR: Linear filter, aka. "Tent" filter + - CUBIC: Cubic filter, currently Mitchell-Netravali + - GAUSSIAN: Gaussian function, sigma=1, support=3*sigma + - LANCZOS2: Two-lobed Lanczos filter + - LANCZOS3: Three-lobed Lanczos filter + - LANCZOS3_STRETCHED: Three-lobed Lanczos filter, stretched by 4/3.0. + This is the "Nice" filter from Dirty Pixels by + Jim Blinn. + + The intended way to use this function is to extract scaling factors + from the transformation and then pass those to this function to get a + filter suitable for compositing with that transformation. The filter + kernels can be chosen according to quality and performance tradeoffs. + + To get equivalent quality to GdkPixbuf for downscalings, use BOX for + both reconstruction and sampling. For upscalings, use LINEAR for + reconstruction and IMPULSE for sampling (though note that for + upscaling in both X and Y directions, simply using + PIXMAN_FILTER_BILINEAR will likely be a better choice). + +commit 68760d3fe1351cb745aedcada7d765edc08bbe8b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Nov 22 10:17:56 2012 -0500 + + rounding.txt: Describe how SEPARABLE_CONVOLUTION filter works + + Add some notes on how to compute the convolution matrices to be used + with the SEPARABLE_CONVOLUTION filter. + +commit 6fd480b17c8398c217e4c11e826c82dbb8288006 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Nov 22 10:14:06 2012 -0500 + + Add new filter PIXMAN_FILTER_SEPARABLE_CONVOLUTION + + This filter is a new way to use a convolution matrix for filtering. In + contrast to the existing CONVOLUTION filter, this new variant is + different in two respects: + + - It is subsampled: Instead of just one convolution matrix, this + filter chooses between a number of matrices based on the subpixel + sample location, allowing the convolution kernel to be sampled at a + higher resolution. + + - It is separable: Each matrix is specified as the tensor product of + two vectors. This has the advantages that many fewer values have to + be stored, and that the filtering can be done separately in the x + and y dimensions (although the initial implementation doesn't + actually do that). + + The motivation for this new filter is to improve image downsampling + quality. Currently, the best pixman can do is the regular convolution + filter which is limited to coarsely sampled convolution kernels. + + With this new feature, any separable filter can be used at any desired + resolution. + +commit 7e39861da3655779ce76a72592feed3c1dd90017 +Author: Benjamin Gilbert <bgilbert@backtick.net> +Date: Sat Dec 1 23:55:31 2012 -0500 + + Fix thread safety on mingw-w64 and clang + + After finding a working TLS storage class specifier, configure was + continuing to test other candidates. This caused it to prefer + __declspec(thread) over __thread. However, __declspec(thread) is + ignored with a warning by mingw-w64 [1] and silently ignored by clang [2]. + The resulting binary behaved as if PIXMAN_NO_TLS was defined. + + Bug introduced by a069da6c. + + [1] https://bugs.freedesktop.org/show_bug.cgi?id=57591 + [2] http://lists.freedesktop.org/archives/pixman/2012-October/002320.html + +commit ebedd9a2ad8e841cd8323838b5136657d9ebb988 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Sun Nov 25 02:59:25 2012 +0200 + + test: Get rid of the obsolete 'prng_rand_N' and 'prng_rand_u32' + + They are the same as 'prng_rand_n' and 'prng_rand' + +commit b31a696263f1ae9aebb9bb21b93a0c15453bf611 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Sun Nov 25 02:50:35 2012 +0200 + + test: Switch to the new PRNG instead of old LCG + + Wallclock time for running pixman "make check" (compile time not included): + + ----------------------------+----------------+-----------------------------+ + | old PRNG (LCG) | new PRNG (Bob Jenkins) | + Processor type +----------------+------------+----------------+ + | gcc 4.5 | gcc 4.5 | gcc 4.7 (simd) | + ----------------------------+----------------+------------+----------------+ + quad Intel Core i7 @2.8GHz | 0m49.494s | 0m43.722s | 0m37.560s | + dual ARM Cortex-A15 @1.7GHz | 5m8.465s | 4m37.375s | 3m45.819s | + IBM Cell PPU @3.2GHz | 23m0.821s | 20m38.316s | 16m37.513s | + ----------------------------+----------------+------------+----------------+ + + But some tests got a particularly large boost. For example benchmarking and + profiling blitters-test on Core i7: + + === before === + + $ time ./blitters-test + + real 0m10.907s + user 0m55.650s + sys 0m0.000s + + 70.45% blitters-test blitters-test [.] create_random_image + 15.81% blitters-test blitters-test [.] compute_crc32_for_image_internal + 2.26% blitters-test blitters-test [.] _pixman_implementation_lookup_composite + 1.07% blitters-test libc-2.15.so [.] _int_free + 0.89% blitters-test libc-2.15.so [.] malloc_consolidate + 0.87% blitters-test libc-2.15.so [.] _int_malloc + 0.75% blitters-test blitters-test [.] combine_conjoint_general_u + 0.61% blitters-test blitters-test [.] combine_disjoint_general_u + 0.40% blitters-test blitters-test [.] test_composite + 0.31% blitters-test libc-2.15.so [.] _int_memalign + 0.31% blitters-test blitters-test [.] _pixman_bits_image_setup_accessors + 0.28% blitters-test libc-2.15.so [.] malloc + + === after === + + $ time ./blitters-test + + real 0m3.655s + user 0m20.550s + sys 0m0.000s + + 41.77% blitters-test.n blitters-test.new [.] compute_crc32_for_image_internal + 15.77% blitters-test.n blitters-test.new [.] prng_randmemset_r + 6.15% blitters-test.n blitters-test.new [.] _pixman_implementation_lookup_composite + 3.09% blitters-test.n libc-2.15.so [.] _int_free + 2.68% blitters-test.n libc-2.15.so [.] malloc_consolidate + 2.39% blitters-test.n libc-2.15.so [.] _int_malloc + 2.27% blitters-test.n blitters-test.new [.] create_random_image + 2.22% blitters-test.n blitters-test.new [.] combine_conjoint_general_u + 1.52% blitters-test.n blitters-test.new [.] combine_disjoint_general_u + 1.40% blitters-test.n blitters-test.new [.] test_composite + 1.02% blitters-test.n blitters-test.new [.] prng_srand_r + 1.00% blitters-test.n blitters-test.new [.] _pixman_image_validate + 0.96% blitters-test.n blitters-test.new [.] _pixman_bits_image_setup_accessors + 0.90% blitters-test.n libc-2.15.so [.] malloc + +commit 309e66f047cab0951d8e42628dcd181e2d14c58d +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Sat Nov 24 23:22:48 2012 +0200 + + test: Search/replace 'lcg_*' -> 'prng_*' + + The 'lcg' prefix is going to be misleading if we replace + PRNG algorithm. + +commit d6545a2fc6f65c4959c6f85a15e95675347c0940 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Sat Nov 24 19:43:41 2012 +0200 + + test: Added a better PRNG (pseudorandom number generator) + + This adds a fast SIMD-optimized variant of a small noncryptographic + PRNG originally developed by Bob Jenkins: + http://www.burtleburtle.net/bob/rand/smallprng.html + + The generated pseudorandom data is good enough to pass "Big Crush" + tests from TestU01 (http://en.wikipedia.org/wiki/TestU01). + + SIMD code uses http://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html + which is a GCC specific extension. There is also a slower alternative + code path, which should work with any C compiler. + + The performance of filling buffer with random data: + Intel Core i7 @2.8GHz (SSE2) : ~5.9 GB/s + ARM Cortex-A15 @1.7GHz (NEON) : ~2.2 GB/s + IBM Cell PPU @3.2GHz (Altivec) : ~1.7 GB/s + +commit 41f98a07fc3235b64713a39238238801304ac346 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Fri Nov 23 09:07:23 2012 +0200 + + test: Change is_little_endian() into inline function + + Also dropped redundant volatile keyword because any object + can be accessed via char* pointer without breaking aliasing + rules. The compilers are able to optimize this function to either + constant 0 or 1. + +commit 978bab253d1d061b00b5e80aa45ab6986aac466f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Nov 21 11:43:31 2012 -0500 + + Add text file rounding.txt describing how rounding works + + It is not entirely obvious how pixman gets from "location in the + source image" to "pixel value stored in the destination". This file + describes how the filters work, and in particular how positions are + rounded to samples. + +commit 74319e9d39f5d7f85cb75fcb91343f298b0e62e2 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Nov 20 23:28:43 2012 -0500 + + Convolution filter: round color values instead of truncating + + The pixel computed by the convolution filter should be rounded off, + not truncated. As a simple example consider a convolution matrix + consisting of five times 0x3333. If all five all five input pixels are + 0xff, then the result of truncating will be + + (5 * 0x3333 * 255) >> 16 = 254 + + But the real value of the computation is (5 * 0x3333 / 65536.0) * 254 + = 254.9961, so the error is almost 1. If the user isn't very careful + about normalizing the convolution kernel so that it sums to one in + fixed point, such error might cause solid images to change color, or + opaque images to become translucent. + + The fix is simply to round instead of truncate. + +commit f0816ddaf4e61d9295de5b1cbe51f956db7fbd16 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Nov 20 03:23:51 2012 -0500 + + Round fixed-point multiplication + + After two fixed-point numbers are multiplied, the result is shifted + into place, but up until now pixman has simply discarded the low-order + bits instead of rounding to the closest number. + + Fix that by adding 0x8000 (or 0x2 in one place) before shifting and + update the test checksums to match. + +commit 44dd746bb68625b2f6be77c3f80292b45defe9d7 +Author: Stefan Weil <sw@weilnetz.de> +Date: Tue Nov 13 19:44:44 2012 +0100 + + test: Fix compiler warnings caused by unused code + + Signed-off-by: Stefan Weil <sw@weilnetz.de> + +commit 5f96022d3bca15050958512f1c15a0067d2225af +Author: Stefan Weil <sw@weilnetz.de> +Date: Tue Nov 13 19:38:32 2012 +0100 + + pixman: Use uintptr_t in type casts from pointer to integral value + + These modifications fix lots of compiler warnings for systems where + sizeof(unsigned long) != sizeof(void *). + This is especially true for MinGW-w64 (64 bit Windows). + + Signed-off-by: Stefan Weil <sw@weilnetz.de> + +commit a96efd02d68b726d6d140d0bd211bc7cc1be127a +Author: Stefan Weil <sw@weilnetz.de> +Date: Tue Nov 13 19:44:15 2012 +0100 + + Always use xmmintrin.h for 64 bit Windows + + MinGW-w64 uses the GNU compiler and does not define _MSC_VER. + Nevertheless, it provides xmmintrin.h and must be handled + here like the MS compiler. Otherwise compilation fails due to + conflicting declarations. + + Signed-off-by: Stefan Weil <sw@weilnetz.de> + +commit 899e0d60524bcd2cff6cad6acb310181fb96b39a +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Mon Nov 12 22:48:51 2012 +0100 + + MIPS: DSPr2: Added several nearest neighbor fast paths with a8 mask: + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench -n + + Referent (before): + over_8888_8_0565 = L1: 9.62 L2: 8.85 M: 7.40 ( 39.27%) HT: 5.67 VT: 5.61 R: 5.45 RT: 2.98 ( 22Kops/s) + over_0565_8_0565 = L1: 7.90 L2: 7.49 M: 6.72 ( 26.75%) HT: 5.24 VT: 5.20 R: 5.06 RT: 2.90 ( 22Kops/s) + + Optimized: + over_8888_8_0565 = L1: 18.51 L2: 16.82 M: 12.13 ( 64.43%) HT: 10.06 VT: 9.88 R: 9.54 RT: 5.63 ( 31Kops/s) + over_0565_8_0565 = L1: 14.82 L2: 13.94 M: 11.34 ( 45.20%) HT: 9.45 VT: 9.35 R: 9.03 RT: 5.50 ( 31Kops/s) + +commit a432bdce6637aa96060b9f1e25aae51c6fb95670 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Mon Nov 12 22:48:53 2012 +0100 + + MIPS: DSPr2: Added more fast-paths for OVER operation: + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + over_n_0565 = L1: 14.48 L2: 21.36 M: 17.57 ( 23.30%) HT: 6.95 VT: 6.44 R: 6.39 RT: 2.16 ( 22Kops/s) + over_n_8888 = L1: 92.60 L2: 86.13 M: 24.41 ( 64.74%) HT: 8.94 VT: 8.06 R: 8.00 RT: 2.53 ( 25Kops/s) + + Optimized: + over_n_0565 = L1: 27.65 L2: 189.22 M: 58.19 ( 77.12%) HT: 52.80 VT: 49.88 R: 47.53 RT: 23.67 ( 72Kops/s) + over_n_8888 = L1: 235.99 L2: 230.86 M: 29.09 ( 77.11%) HT: 27.95 VT: 27.24 R: 26.58 RT: 18.10 ( 67Kops/s) + +commit e33e9d3f55590c369c532b0305f928045e0a46cb +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Mon Nov 12 22:48:52 2012 +0100 + + MIPS: DSPr2: Added more fast-paths for SRC operation: + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + src_n_8_8888 = L1: 13.79 L2: 22.47 M: 17.55 ( 58.28%) HT: 6.95 VT: 6.46 R: 6.34 RT: 2.07 ( 20Kops/s) + src_n_8_8 = L1: 20.22 L2: 20.21 M: 18.20 ( 24.17%) HT: 6.65 VT: 6.22 R: 6.11 RT: 2.03 ( 20Kops/s) + + Optimized: + src_n_8_8888 = L1: 58.31 L2: 53.34 M: 25.69 ( 85.29%) HT: 22.55 VT: 21.44 R: 19.91 RT: 10.34 ( 48Kops/s) + src_n_8_8 = L1: 102.60 L2: 89.43 M: 65.01 ( 86.32%) HT: 37.87 VT: 37.02 R: 32.43 RT: 12.41 ( 51Kops/s) + +commit d881e1f5801ca0aefecccb43db05db539b3080d5 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Nov 11 14:05:54 2012 -0500 + + Allow src and dst to be identical in pixman_f_transform_invert() + + It is useful to be able to invert a matrix in place, but currently + pixman_f_transform_invert() will produce wrong results if you pass the + same matrix as both source and destination. + + Fix that by inverting into a temporary matrix and then copying that to + the destination. + +commit 614e7aaf14652c726b067bbc7562ef237dcd50de +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Nov 8 03:11:51 2012 -0500 + + pixman.h: Add typedefs for pixman_f_transform and pixman_f_vector + +commit b2e0e240fec4a8eaa7fe8da3a6807bcb8ac97edf +Author: Joshua Root <jmr@macports.org> +Date: Fri Nov 9 14:39:14 2012 +1100 + + Fix undeclared variable use and sysctlbyname error handling on ppc + + Fixes bug 56889. + +commit 400436dc52450359de35cac9efa6aea631cf34e9 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Oct 31 13:14:07 2012 -0400 + + pixman_image_composite: Reduce opaque masks to NULL + + When the mask is known to be opaque, we might as well reduce it to + NULL to take advantage of the various fast paths that operate on NULL + masks. + +commit f2ada9e63fdd1034766e86d71008e0d819074f27 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Nov 7 13:45:09 2012 -0500 + + Post-release version bump to 0.29.1 + +commit 8a2ff3e0ef0449921d962f8b9c093c2353ffd945 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Nov 7 13:40:34 2012 -0500 + + Pre-release version bump to 0.28.0 + +commit 4b91f6ca72db3e8cbd7e97e9ef44be2f8994040d +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Oct 25 10:42:26 2012 -0400 + + Post-release version bump to 0.27.5 + +commit 0de3f3344908757b61f9f51b59d4a39f7447451b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Oct 25 10:35:27 2012 -0400 + + Pre-release version bump to 0.27.4 + +commit f0750258459580bbc9f136710f8e5c551bd01a0f +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Sun Oct 14 11:58:52 2012 +0200 + + MIPS: DSPr2: Added more fast-paths for ADD operation: - add_8888_8888_8888 - add_8_8 - add_8888_8888 + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + add_8888_8888_8888 = L1: 17.55 L2: 13.35 M: 8.13 ( 93.95%) HT: 6.60 VT: 6.64 R: 6.45 RT: 3.47 ( 26Kops/s) + add_8_8 = L1: 86.07 L2: 84.89 M: 62.36 ( 90.11%) HT: 36.36 VT: 34.74 R: 29.56 RT: 11.56 ( 52Kops/s) + add_8888_8888 = L1: 95.59 L2: 73.05 M: 17.62 (101.84%) HT: 15.46 VT: 15.01 R: 13.94 RT: 6.71 ( 42Kops/s) + + Optimized: + add_8888_8888_8888 = L1: 41.52 L2: 33.21 M: 11.97 (138.45%) HT: 10.47 VT: 10.19 R: 9.42 RT: 4.86 ( 32Kops/s) + add_8_8 = L1: 135.06 L2: 104.82 M: 57.13 ( 82.58%) HT: 34.79 VT: 36.60 R: 28.28 RT: 10.54 ( 51Kops/s) + add_8888_8888 = L1: 176.36 L2: 67.82 M: 17.48 (101.06%) HT: 15.16 VT: 14.62 R: 13.88 RT: 8.05 ( 45Kops/s) + +commit ca83717c63813b6f53f89dd94b5771bd32382a18 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Sun Oct 14 11:58:51 2012 +0200 + + MIPS: DSPr2: Added more fast-paths for ADD operation: - add_0565_8_0565 - add_8888_8_8888 - add_8888_n_8888 + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + add_0565_8_0565 = L1: 8.89 L2: 8.37 M: 7.35 ( 29.22%) HT: 5.90 VT: 5.85 R: 5.67 RT: 3.31 ( 26Kops/s) + add_8888_8_8888 = L1: 17.22 L2: 14.17 M: 9.89 ( 65.56%) HT: 7.57 VT: 7.50 R: 7.36 RT: 4.10 ( 30Kops/s) + add_8888_n_8888 = L1: 17.79 L2: 14.87 M: 10.35 ( 54.89%) HT: 5.19 VT: 4.93 R: 4.92 RT: 1.90 ( 19Kops/s) + + Optimized: + add_0565_8_0565 = L1: 21.72 L2: 20.01 M: 14.96 ( 59.54%) HT: 12.03 VT: 11.81 R: 11.26 RT: 6.33 ( 37Kops/s) + add_8888_8_8888 = L1: 47.42 L2: 38.64 M: 15.90 (105.48%) HT: 13.34 VT: 13.03 R: 11.84 RT: 6.63 ( 38Kops/s) + add_8888_n_8888 = L1: 54.83 L2: 42.66 M: 17.36 ( 92.11%) HT: 15.20 VT: 14.82 R: 13.66 RT: 7.83 ( 41Kops/s) + +commit 52d20e692ebc605077448ab6f52fd257f83481b2 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Sun Oct 14 11:58:50 2012 +0200 + + MIPS: DSPr2: Added fast-paths for ADD operation: - add_n_8_8 - add_n_8_8888 - add_8_8_8 + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + add_n_8_8 = L1: 41.37 L2: 37.83 M: 30.38 ( 60.45%) HT: 23.70 VT: 22.85 R: 21.51 RT: 10.32 ( 45Kops/s) + add_n_8_8888 = L1: 16.01 L2: 14.46 M: 11.64 ( 46.32%) HT: 5.50 VT: 5.18 R: 5.06 RT: 1.89 ( 18Kops/s) + add_8_8_8 = L1: 13.26 L2: 12.47 M: 11.16 ( 29.61%) HT: 8.09 VT: 8.04 R: 7.68 RT: 3.90 ( 29Kops/s) + + Optimized: + add_n_8_8 = L1: 96.03 L2: 79.37 M: 51.89 (103.31%) HT: 32.59 VT: 31.29 R: 28.52 RT: 11.08 ( 46Kops/s) + add_n_8_8888 = L1: 53.61 L2: 46.92 M: 23.78 ( 94.70%) HT: 19.06 VT: 18.64 R: 17.30 RT: 9.15 ( 43Kops/s) + add_8_8_8 = L1: 89.65 L2: 66.82 M: 37.10 ( 98.48%) HT: 22.10 VT: 21.74 R: 20.12 RT: 8.12 ( 41Kops/s) + +commit 9df645dfb04b5a790faabe1e9a84fc37287d91b0 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Fri Oct 19 01:59:16 2012 +0300 + + Workaround for FTBFS with gcc 4.6 (http://gcc.gnu.org/PR54965) + + GCC 4.6 has problems with force_inline, so just use normal inline instead. + Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=55630 + +commit 31e5a0a393defb8e0534ab1bde29ab23fc04795d +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Oct 12 18:34:33 2012 -0400 + + pixman_composite_trapezoids(): don't clip to extents for some operators + + pixman_composite_trapezoids() is supposed to composite across the + entire destination, but it actually only composites across the extent + of the trapezoids. For operators such as ADD or OVER this doesn't + matter since a zero source has no effect on the destination. But for + operators such as SRC or IN, it does matter. + + So for such operators where a zero source has an effect, don't clip to + the trap extents. + +commit 65db2362e2793a527c2e831cceb81d8d6ad51b8f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Oct 12 18:29:56 2012 -0400 + + pixman_composite_trapezoids(): Factor out extents computation + + The computation of the extents rectangle is moved to its own + function. + +commit 2d9cb563b415e90cef898de03de7ed9c6f091db1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Oct 12 18:07:29 2012 -0400 + + Add new pixman_image_create_bits_no_clear() API + + When pixman_image_create_bits() function is given NULL for bits, it + will allocate a new buffer and initialize it to zero. However, in some + cases, only a small region of the image is actually used; in that case + it is wasteful to touch all of the memory. + + The new pixman_image_create_bits_no_clear() works exactly like + _create_bits() except that it doesn't initialize any newly allocated + memory. + +commit af803be17b4ea5f53db9af57b6c6ef06db99ebbd +Author: Benny Siegert <bsiegert@gmail.com> +Date: Sun Oct 14 16:28:48 2012 +0200 + + configure.ac: PIXMAN_LINK_WITH_ENV fix + + (fixes bug #52101) + + On MirBSD, the compiler produces a (harmless) warning when the compiler + is called without the standard CFLAGS: + + foo.c:0: note: someone does not honour COPTS correctly, passed 0 times + + However, PIXMAN_LINK_WITH_ENV considers _any_ output on stderr as an + error, even if the exit status of the compiler is 0. Furthermore, it + resets CFLAGS and LDFLAGS at the start. On MirBSD, this will lead to a + warning in each test, making all such tests fail. In particular, the + pthread_setspecific test fails, thus pixman is compiled without thread + support. This leads to compile errors later on, or at least it did when + I tried this on pkgsrc. Re-adding the saved CFLAGS, LDFLAGS and LIBS + before the test makes it work. + + The second hunk inverts the order of the pthread flag checks. On BSD + systems (this is true at least on OpenBSD and MirBSD), both -lpthread + and -pthread work but the latter is "preferred", whatever this means. + +commit 6e56098c0338ce74228187e4c96fed1a66cb0956 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Sat Sep 29 02:29:22 2012 +0300 + + Add missing force_inline to in() function used for C fast paths + +commit 90bcafa495c1074b0ea1d35f99aa4837917494bd +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Sun Jul 8 23:10:00 2012 +0300 + + MIPS: skip runtime detection for DSPr2 if -mdspr2 option is in CFLAGS + + This provides a way to enable MIPS DSP ASE optimizations if running + under qemu-user (where /proc/cpuinfo contains information about the + host processor instead of the emulated one). Can be used for running + pixman test suite in qemu-user when having no access to real MIPS + hardware. + +commit d5f2f39319fc358cccda60abe0bc927bd27131c1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Oct 11 04:04:04 2012 -0400 + + region: Remove overlap argument from pixman_op() + + This is used to compute whether the regions in question overlap, but + nothing makes use of this information, so it can be removed. + +commit cb4f325ec0e844008075fe89ceb9f634ae41e7c9 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Oct 11 04:07:00 2012 -0400 + + region: Formatting fix + + The while part of a do/while loop was formatted as if it were a while + loop with an empty body. Probably some indent tool misinterpreted the + code at some point. + +commit 15b153d633fcfce886c30fee98599fddbf019ee8 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Oct 7 17:58:32 2012 -0400 + + Only regard images as pixbufs if they have identity transformations + + In order for a src/mask pair to be considered a pixbuf, they have to + have identical transformations, but we don't check for that. Since the + only fast paths we have for pixbufs require identity transformations, + it sufficies to check that both source and mask are + untransformed. + + This is also the reason that this bug can't be triggered by any test + code - if the source and mask had different transformations, we would + consider them a pixbuf, but then wouldn't take the fast path because + at least one of the transformations would be different from the + identity. + +commit 3d81d89c292058522cce91338028d9b4c4a23c24 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Oct 4 12:41:08 2012 -0400 + + Remove BUILT_SOURCES + + pixman-combine32.[ch] were the only built sources, so BUILT_SOURCES + can now be removed. + +commit ec7aa11a6e4d0d02df9b339dfce9460dce954602 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Sep 23 03:52:34 2012 -0400 + + Speed up pixman_expand_to_float() + + GCC doesn't move the divisions out of the loop, so do it manually by + looking up the four (1.0f / mask) values in a table. Table lookups are + used under the theory that one L2 hit plus three L1 hits is preferable + to four floating point divisions. + +commit 8ccda2be30adf9dfcc3087b38a5062258324dcce +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Sep 21 18:36:16 2012 -0400 + + Don't auto-generate pixman-combine32.[ch] anymore + + Since pixman-combine64.[ch] are not used anymore, there is no point + generating these files from pixman-combine.[ch].template. + + Also get rid of dependency on perl in configure.ac. + +commit 4afd20cc71ba75190ebcead774b946157d0995a6 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Aug 18 14:39:29 2012 -0400 + + Remove 64 bit pipeline + + The 64 bit pipeline is not used anymore, so it can now be removed. + + Don't generate pixman-combine64.[ch] anymore. Don't generate the + pixman-srgb.c anymore. Delete all the 64 bit fetchers in + pixman-access.c, all the 64 bit iterator functions in + pixman-bits-image.c and all the functions that expand from 8 to 16 + bits. + +commit 5ff0bbd9721bb216a8332cbde18adc458af3cdec +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Aug 18 14:11:07 2012 -0400 + + Switch the wide pipeline over to using floating point + + In pixman-bits-image.c, remove bits_image_fetch_untransformed_64() and + add bits_image_fetch_untransformed_float(); change + dest_get_scanline_wide() to produce a floating point buffer, + + In the gradients, change *_get_scanline_wide() to call + pixman_expand_to_float() instead of pixman_expand(). + + In pixman-general.c change the wide Bpp to 16 instead of 8, and + initialize the buffers to 0 to prevent NaNs from causing trouble. + + In pixman-noop.c make the wide solid iterator generate floating point + pixels. + + In pixman-solid-fill.c, cache a floating point pixel, and make the + wide iterator generate floating point pixels. + + Bug fix in bits_image_fetch_untransformed_repeat_normal + +commit e75bacc5f9196c3980ce331c7d53de5b7e92d699 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Aug 18 13:22:15 2012 -0400 + + pixman-access.c: Add floating point accessor functions + + Three new function pointer fields are added to bits_image_t: + + fetch_scanline_float + fetch_pixel_float + store_scanline_float + + similar to the existing 32 and 64 bit accessors. The fetcher_info_t + struct in pixman_access similarly gets a new get_scanline_float field. + + For most formats, the new get_scanline_float field is set to a new + function fetch_scanline_generic_float() that first calls the 32 bit + fetcher uses the 32 bit scanline fetcher and then expands these pixels + to floating point. + + For the 10 bpc formats, new floating point accessors are added that + use pixman_unorm_to_float() and pixman_float_to_unorm() to convert + back and forth. + + The PIXMAN_a8r8g8b8_sRGB format is handled with a 256-entry table that + maps 8 bit sRGB channels to linear single precision floating point + numbers. The sRGB->linear direction can then be done with a simple + table lookup. + + The other direction is currently done with 4096-entry table which + works fine for 16 bit integers, but not so great for floating + point. So instead this patch uses a binary search in the sRGB->linear + table. The existing 32 bit accessors for the sRGB format are also + converted to use this method. + +commit 23252393a2dcae4dc5a7d03727dd66cdd81286ba +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Aug 18 13:02:31 2012 -0400 + + pixman-utils.c, pixman-private.h: Add floating point conversion routines + + A new struct argb_t containing a floating point pixel is added to + pixman-private.h and conversion routines are added to pixman-utils.c + to convert normalized integers to and from that struct. + + New functions: + + - pixman_expand_to_float() + Expands a buffer of integer pixels to a buffer of argb_t pixels + + - pixman_contract_from_float() + Converts a buffer of argb_t pixels to a buffer integer pixels + + - pixman_float_to_unorm() + Converts a floating point number to an unsigned normalized integer + + - pixman_unorm_to_float() + Converts an unsigned normalized integer to a floating point number + +commit 4760599ff3008ab0f1e36a7d4d362362817fd930 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Sep 9 17:56:53 2012 -0400 + + Add combiner test + + This test runs the new floating point combiners on random input with + divide-by-zero exceptions turned on. + + With the floating point combiners the only thing we guarantee is that + divide-by-zero exceptions are not generated, so change + enable_fp_exceptions() to only enable those, and rename accordingly. + +commit a5b459114e35c7a946362f1e5857e8a87a403ec3 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri May 14 00:42:04 2010 -0400 + + Add pixman-combine-float.c + + This file contains floating point implementations of combiners for all + pixman operators. These combiners operate on buffers containing single + precision floating point pixels stored in (a, r, g, b) order. + + The combiners are added to the pixman_implementation_t struct, but + nothing uses them yet. + + This commit incorporates a number of bug fixes contributed by Andrea + Canciani. + + Some notes: + + - The combiners are making sure to never divide by zero regardless of + input, so an application could enable divide-by-zero exceptions and + pixman wouldn't generate any. + + - The operators are implemented according to the Render spec. Ie., + + - If the input pixels are between 0 and 1, then so is the output. + + - The source and destination coefficients for the conjoint and + disjoint operators are clamped to [0, 1]. + + - The PDF operators are not described in the render spec, and the + implementation here doesn't do any clamping except in the final + conversion from floating point to destination format. + + All of the above will need to be rethought if we add support for pixel + formats that can support negative and greater-than-one pixels. It is + in fact already the case in principle that convolution filters can + produce pixels with negative values, but since these go through the + broken "wide" path that narrows everything to 32 bits, these negative + values don't currently survive to the combiners. + +commit 7a9c2d586b2349b5e17966a96d7fe8c390abb75a +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jun 2 00:15:54 2012 -0400 + + blitters-test: Prepare for floating point + + Comment out some formats in blitters-test that are going to rely on + floating point in some upcoming patches. + +commit 600a06c81d3999bc6551c7e889726ed7b8bec84d +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Jul 11 03:27:49 2012 -0400 + + glyph-test: Prepare for floating point + + In preparation for an upcoming change of the wide pipe to use floating + point, comment out some formats in glyph-test that are going to be + using floating point and update the CRC32 value to match. + +commit 2e17b6dd4ee7c32684fb7ffc70d3ad3ebf7cb2ef +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Sep 29 10:34:17 2012 -0400 + + Make pixman.h more const-correct + + Add const to pointer arguments when the function doesn't change the + pointed-to data. + + Also in add_glyphs() in pixman-glyph.c make 'white' in add_glyphs() + static and const. + +commit 183afcf1d95625a1f237ef349a1c8931d94d000d +Author: Matt Turner <mattst88@gmail.com> +Date: Sun Sep 30 11:59:23 2012 -0700 + + iwmmxt: Don't define dummy _mm_empty for >=gcc-4.8 + + Definition was not present in <4.8. + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=55451 + +commit d4b72eb6ccc1f004efedbc6552ee22499350be4d +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Sep 29 18:15:54 2012 -0400 + + rotate-test: Call image_endian_swap() in make_image() + + Otherwise the test fails on big-endian. + + Tested-by: Matt Turner <mattst88@gmail.com> + +commit aff796d6cee4cb81f0352c2f7d0c994229bd5ca1 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Mon Jun 25 22:36:52 2012 -0400 + + Add scaled nearest repeat fast paths + + Before this patch it was often faster to scale and repeat + in two passes because each pass used a fast path vs. + the slow path that the single pass approach takes. This + makes it so that the single pass approach has competitive + performance. + +commit 05560828c495ed9226b43b30e1824447e3d8eff3 +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Sep 21 16:34:24 2012 -0700 + + sse2: mark pack_565_2x128_128 as static force_inline + +commit de60e2e0e3eb6084f8f14b63f25b3cbfb012943f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Sep 15 03:13:09 2012 -0400 + + Fix for infinite-loop test + + The infinite loop detected by "affine-test 212944861" is caused by an + overflow in this expression: + + max_x = pixman_fixed_to_int (vx + (width - 1) * unit_x) + 1; + + where (width - 1) * unit_x doesn't fit in a signed int. This causes + max_x to be too small so that this: + + src_width = 0 + + while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) + src_width += src_image->bits.width; + + results in src_width being 0. Later on when src_width is used for + repeat calculations, we get the infinite loop. + + By casting unit_x to int64_t, the expression no longer overflows and + affine-test 212944861 and infinite-loop no longer loop forever. + +commit aa311a4641b79eac39fe602b75d7bee3de9b1dce +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Sep 13 19:31:29 2012 -0400 + + test: Add inifinite-loop test + + This test demonstrates a bug where a certain transformation matrix can + result in an infinite loop. It was extracted as a standalone version + of "affine-test 212944861". + + If given the option -nf, the test program will not call fail_after() + and therefore potentially run forever. + +commit d5c721768c9811ce22bc0cd50bdf1c7bccc264e0 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Sep 13 19:29:19 2012 -0400 + + affine-test: Print out the transformation matrix when verbose + + Printing out the translation and scale is a bit misleading because the + actual transformation matrix can be modified in various other ways. + + Instead simply print the whole transformation matrix that is actually + used. + +commit 292fce7a230dd253fff71bd1bb2fbf9b7996a892 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Fri Sep 14 09:31:26 2012 +0200 + + MIPS: DSPr2: Added OVER combiner and two new fast paths: - over_8888_8888 - over_8888_8888_8888 + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + over_8888_8888 = L1: 19.61 L2: 17.10 M: 11.16 ( 59.20%) HT: 16.47 VT: 15.81 R: 14.82 RT: 8.90 ( 50Kops/s) + over_8888_8888_8888 = L1: 13.56 L2: 11.22 M: 7.46 ( 79.18%) HT: 6.24 VT: 6.20 R: 6.11 RT: 3.95 ( 29Kops/s) + + Optimized: + over_8888_8888 = L1: 46.42 L2: 36.70 M: 16.69 ( 88.57%) HT: 17.11 VT: 16.55 R: 15.31 RT: 9.48 ( 52Kops/s) + over_8888_8888_8888 = L1: 26.06 L2: 22.53 M: 11.49 (121.91%) HT: 9.93 VT: 9.62 R: 9.19 RT: 5.75 ( 36Kops/s) + +commit 28c9bd4866088a017a0cdf3f0fb47467b97bbc29 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Fri Sep 14 09:31:25 2012 +0200 + + MIPS: DSPr2: Added fast-paths for OVER operation: - over_0565_n_0565 - over_0565_8_0565 + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + over_0565_n_0565 = L1: 7.56 L2: 7.24 M: 6.16 ( 16.38%) HT: 4.01 VT: 3.84 R: 3.79 RT: 1.66 ( 18Kops/s) + over_0565_8_0565 = L1: 7.43 L2: 7.05 M: 5.98 ( 23.85%) HT: 5.27 VT: 5.23 R: 5.09 RT: 3.14 ( 28Kops/s) + + Optimized: + over_0565_n_0565 = L1: 15.47 L2: 14.52 M: 12.30 ( 32.65%) HT: 10.76 VT: 10.57 R: 10.27 RT: 6.63 ( 46Kops/s) + over_0565_8_0565 = L1: 15.47 L2: 14.61 M: 11.78 ( 46.92%) HT: 10.00 VT: 9.84 R: 9.40 RT: 5.81 ( 43Kops/s) + +commit b660eb30b4e5f690d191b26a500a6ba224986b3a +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Fri Sep 14 09:31:24 2012 +0200 + + MIPS: DSPr2: Added fast-paths for OVER operation: - over_8888_n_0565 - over_8888_8_0565 + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + over_8888_n_0565 = L1: 8.95 L2: 8.33 M: 6.95 ( 27.74%) HT: 4.27 VT: 4.07 R: 4.01 RT: 1.74 ( 19Kops/s) + over_8888_8_0565 = L1: 8.86 L2: 8.11 M: 6.72 ( 35.71%) HT: 5.68 VT: 5.62 R: 5.47 RT: 3.35 ( 30Kops/s) + + Optimized: + over_8888_n_0565 = L1: 18.76 L2: 17.55 M: 13.11 ( 52.19%) HT: 11.35 VT: 11.10 R: 10.88 RT: 6.94 ( 47Kops/s) + over_8888_8_0565 = L1: 18.14 L2: 16.79 M: 12.10 ( 64.25%) HT: 10.24 VT: 9.98 R: 9.63 RT: 5.89 ( 43Kops/s) + +commit 37e3368e20cee42f1e1039bb112ed9a09d21156f +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Fri Sep 14 09:31:23 2012 +0200 + + MIPS: DSPr2: Added fast-paths for OVER operation: - over_8888_n_8888 - over_8888_8_8888 + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench results + + Referent (before): + over_8888_n_8888 = L1: 9.92 L2: 11.27 M: 8.50 ( 45.23%) HT: 4.70 VT: 4.45 R: 4.49 RT: 1.85 ( 20Kops/s) + over_8888_8_8888 = L1: 12.54 L2: 10.86 M: 8.18 ( 54.36%) HT: 6.53 VT: 6.45 R: 6.41 RT: 3.83 ( 33Kops/s) + + Optimized: + over_8888_n_8888 = L1: 28.02 L2: 24.92 M: 14.72 ( 78.15%) HT: 13.03 VT: 12.65 R: 12.00 RT: 7.49 ( 49Kops/s) + over_8888_8_8888 = L1: 26.92 L2: 23.93 M: 13.65 ( 90.58%) HT: 11.68 VT: 11.29 R: 10.56 RT: 6.37 ( 45Kops/s) + +commit f580c4c5b2a435ebe2751ce0dace6c42568557f8 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Sep 21 16:52:16 2012 -0400 + + pixman-combine.c.template: Formatting clean-ups + + Various formatting fixes, and removal of some obsolete comments about + strength reduction of operators. + +commit 58f8704664d1f8c812a85b929a50818f213a8438 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Sep 20 21:43:24 2012 -0400 + + Fix bugs in pixman-image.c + + In the checks for whether the transforms are rotation matrices "-1" + and "1" were used instead of the correct -pixman_fixed_1 and + pixman_fixed_1. + + Fixes test suite failure for rotate-test. + +commit 550dfc5e7ecd5b099c1009d77c56cb91a62caeb1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Sep 20 18:41:33 2012 -0400 + + Add rotate-test.c test program + + This program exercises a bug in pixman-image.c where "-1" and "1" were + used instead of the correct "- pixman_fixed_1" and "pixman_fixed_1". + + With the fast implementation enabled: + + % ./rotate-test + rotate test failed! (checksum=35A01AAB, expected 03A24D51) + + Without it: + + % env PIXMAN_DISABLE=fast ./rotate-test + pixman: Disabled fast implementation + rotate test passed (checksum=03A24D51) + + V2: The first version didn't have lcg_srand (testnum) in test_transform(). + +commit 2ab77c97a5a3a816d6383bdc3b6c8bdceb0383b7 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Sep 19 12:04:11 2012 -0400 + + Fix bugs in component alpha combiners for separable PDF operators + + In general, the component alpha version of an operator is supposed to + do this: + + - multiply source with mask in all channels + - multiply mask with source alpha in all channels + - compute the regular operator in all channels using the + mask value whenever source alpha is called for + + The first two steps are usually accomplished with the function + combine_mask_ca(), but for operators where source alpha is not used, + such as SRC, ADD and OUT, the simpler function + combine_mask_value_ca(), which doesn't compute the new mask values, + can be used. + + However, the PDF blend modes generally *do* make use of source alpha, + so they can't use combine_mask_value_ca() as they do now. They have to + use combine_mask_ca(). + + This patch fixes this in combine_multiply_ca() and the CA combiners + generated by PDF_SEPARABLE_BLEND_MODE. + +commit c4b69e706e63e01fbc70e0026c2079007c89de14 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Sep 19 19:46:13 2012 -0400 + + Fix bug in fast_composite_scaled_nearest() + + The fast_composite_scaled_nearest() function can be called when the + format is x8b8g8r8. In that case pixels fetched in fetch_nearest() + need to have their alpha channel set to 0xff. + + Fixes test suite failure in scaling-test. + + Reviewed-by: Matt Turner <mattst88@gmail.com> + +commit 35be7acb660228d4e350b5806c81e55606352e0d +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Sep 19 19:26:49 2012 -0400 + + Add PIXMAN_x8b8g8r8 and PIXMAN_a8b8g8r8 formats to scaling-test + + Update the CRC values based on what the general implementation + reports. This reveals a bug in the fast implementation: + + % env PIXMAN_DISABLE="mmx sse2" ./test/scaling-test + pixman: Disabled mmx implementation + pixman: Disabled sse2 implementation + scaling test failed! (checksum=AA722B06, expected 03A23E0C) + + vs. + + % env PIXMAN_DISABLE="mmx sse2 fast" ./test/scaling-test + pixman: Disabled fast implementation + pixman: Disabled mmx implementation + pixman: Disabled sse2 implementation + scaling test passed (checksum=03A23E0C) + + Reviewed-by: Matt Turner <mattst88@gmail.com> + +commit 9decb9a97975ae6bf25a42c0fd2eaa21b166c36d +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Sep 15 19:10:56 2012 -0400 + + implementation: Rename delegate to fallback + + At this point the chain of implementations has nothing to do with the + delegation design pattern anymore, so rename the delegate pointer to + 'fallback'. + +commit b96599ccf353e89f95aa106853fcf310203c5874 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Sep 15 13:58:45 2012 -0400 + + _pixman_implementation_create(): Initialize implementation with memset() + + All the function pointers are NULL by default now, so we can just zero + the struct. Also write the function a little more compactly. + +commit 9539a18832c278ca0f6f572d8765932be6c9ad65 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Sep 15 13:53:17 2012 -0400 + + Rename _pixman_lookup_composite_function() to _pixman_implementation_lookup_composite() + + And move it into pixman-implementation.c which is where it belongs + logically. + +commit ee6af72dadaf9eb049bfeb35dc9ff57c3902403a +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Sep 15 13:20:52 2012 -0400 + + Move delegation of src/dest iter init into pixman-implementation.c + + Instead of relying on each implementation to delegate when an iterator + can't be initialized, change the type of iterator initializers to + boolean and make pixman-implementation.c do the delegation whenever an + iterator initializer returns FALSE. + +commit c710d0fae2a9dc7d20913e5e39a1bb53f7c942db +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Sep 15 13:08:51 2012 -0400 + + Move fill delegation into pixman-implementation.c + + As in the blt commit, do the delegation in pixman-implementation.c + whenever the implementation fill returns FALSE instead of relying on + each implementation to do it by itself. + + With this change there is no longer any reason for the implementations + to have one fill function that delegates and one that actually blits, + so consolidate those in the NEON, DSPr2, SSE2, and MMX + implementations. + +commit 534507ba3b00b9aaadc9f181c282b01e4e2fe415 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Sep 15 13:03:10 2012 -0400 + + Move blt delegation into pixman-implementation.c + + Rather than require each individual implementation to do the + delegation for blt, just do it in pixman-implementation.c whenever the + implementation blt returns FALSE. + + With this change, there is no longer any reason for the + implementations to have one blt function that delegates and one that + actually blits, so consolidate those in the NEON, DSPr2, SSE2, and MMX + implementations. + +commit 7ef4436abbdb898dc656ebb5832ed5d6fd764bba +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Sep 15 12:48:42 2012 -0400 + + implementation: Write lookup_combiner() in a less convoluted way. + + Instead of initializing an array on the stack, just use a simple + switch to select which set of combiners to look up in. + +commit 3124a51abb89475b8c5045bc96e04c5852694a16 +Author: Matt Turner <mattst88@gmail.com> +Date: Sun Sep 16 00:25:38 2012 -0400 + + build: Remove useless DEP_CFLAGS/DEP_LIBS variables + +commit 46e4faf8ef34d49f15e1946d105289fb06365553 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Thu Jun 21 06:07:07 2012 +0200 + + build: Improve win32 build system + + Handle cross-directory dependencies using PHONY targets and clean up + some redundancies. + +commit c89efdd211cf7cd3dc69a4140045ceab6f445730 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Tue Jul 17 16:14:20 2012 +0200 + + mmx: Fix x86 build on MSVC + + The MSVC compiler is very strict about variable declarations after + statements. + + Move all the declarations of each block before any statement in + the same block to fix multiple instances of: + + pixman-mmx.c(xxxx) : error C2275: '__m64' : illegal use of this type + as an expression + +commit 1e3e569b04f45592ce2174e48df0fcb333ce0ad3 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Aug 26 18:23:53 2012 -0400 + + test/utils.c: Use pow(), not powf() in sRGB conversion routines + + These functions are operating on double precision values, so use pow() + instead of powf(). + +commit 8577daba04e60c1b4c44ce01c6874a573952913a +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Aug 26 18:13:47 2012 -0400 + + pixel_checker: Move sRGB conversion into get_limits() + + The sRGB conversion has to be done every time the limits are being + computed. Without this fix, pixel_checker_get_min/max() will produce + the wrong results when called from somewhere other than + pixel_checker_check(). + +commit 62eb6e5e054da498e38da80ba8143f0a069b0c17 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Aug 25 17:14:50 2012 -0400 + + Remove obsolete TODO file + +commit 384846b38cfb5e1895ae49c40adbf72a85b63d95 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Aug 19 13:45:21 2012 -0400 + + Remove pointless declaration of _pixman_image_get_scanline_generic_64() + + This declaration used to be necessary when + _pixman_image_get_scanline_generic_64() referred to a structure that + itself referred back to _pixman_image_get_scanline_generic_64(). + +commit 09cb1ae10b1976970233c934d27c36e0a4203e1c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jun 9 09:15:53 2012 -0400 + + demos: Add srgb_trap_test.c + + This demo program composites a bunch of trapezoids side by side with + and without gamma aware compositing. + +commit 04e878c231ad3624c57e51a5fcdc55a177d4dc0f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jun 9 09:42:56 2012 -0400 + + Make show_image() cope with more formats + + This makes show_image() deal with more formats than just a8r8g8b8, in + particular, a8r8g8b8_sRGB can now be handled. + + Images that are passed to show_image with a format of a8r8g8b8_sRGB + are displayed without modification under the assumption that the + monitor is approximately sRGB. + + Images with a format of a8r8g8b8 are also displayed without + modification since many other users of show_image() have been + generating essentially sRGB data with this format. Other formats are + also assumed to be gamma compressed; these are converted to a8r8g8b8 + before being displayed. + + With these changes, srgb-test.c doesn't need to do its own conversion + anymore. + +commit 8db9ec9814a3dcd8211ec60cd4fd3c9ae9d77924 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jul 31 15:01:16 2012 -0400 + + Define TIMER_BEGIN and TIMER_END even when timers are not enabled + + This allows code that uses these macros to build when timers are + disabled. + +commit da5268cc19e03b24737dec3e2c51296156b869a8 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Aug 1 15:56:13 2012 -0400 + + Post-release version bump to 0.27.3 + +commit e8ddef78b67a0699a990f3c785396d4b1955f972 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Aug 1 15:22:57 2012 -0400 + + Pre-release version bump to 0.27.2 + +commit c214ca51a00fdd0e773ace32076c3ed8a5d0d482 +Author: Sebastian Bauer <mail@sebastianbauer.info> +Date: Tue Jul 31 07:30:32 2012 +0200 + + Use angle brackets form of including config.h + +commit 98617b3796d12c18d8306cca590160caa3c95454 +Author: Sebastian Bauer <mail@sebastianbauer.info> +Date: Tue Jul 31 07:30:31 2012 +0200 + + Added HAVE_CONFIG_H check before including config.h + +commit 5b0563f39eb29e4ae431717696174da5e282c346 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jul 30 16:21:39 2012 -0400 + + glyph-test: Avoid setting solid images as alpha maps. + + glyph-test would sometimes set a solid image as an alpha map, which is + not allowed. When this happened and the debug spew was enabled, + messages like this one would be generated: + + *** BUG *** + In pixman_image_set_alpha_map: The expression + !alpha_map || alpha_map->type == BITS was false + Set a breakpoint on '_pixman_log_error' to debug + + Fix this by not passing the ALLOW_SOLID flag to create_image() when + the resulting is to be used as an alpha map. + +commit 38fe7cd7be388aae6dff7d9b9979eb4ffa5fa175 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jul 30 16:10:05 2012 -0400 + + stress-test: Avoid overflows in clip rectangles + + The rectangles in the clip region set in set_general_properties() + would sometimes overflow, which would lead to messages like these: + + *** BUG *** + In pixman_region32_union_rect: Invalid rectangle passed + Set a breakpoint on '_pixman_log_error' to debug + + when the micro version number of pixman is even. + + Fix this by detecting the overflow and clamping such that the x2/y2 + coordinates are less than INT32_MAX. + +commit 24d83cbf3df06505fa4cf827271aa2985414cfdd +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jul 30 15:54:27 2012 -0400 + + Add make-srgb.pl to EXTRA_DIST + + Otherwise make distcheck doesn't pass. + +commit 72ba0b955504ecdc69f4cbf96a677b82be18b0cb +Author: Antti S. Lankila <alankila@bel.fi> +Date: Sun Jul 29 22:14:34 2012 +0300 + + Add tests to validate new sRGB behavior + + Composite checks random combinations of operations that now also have + sRGB sources, masks and destinations, and stress-test validates the + read/write primitives. + +commit a161a6ba2394aed68148304de83b8f2c185f4c32 +Author: Antti S. Lankila <alankila@bel.fi> +Date: Sun Jul 29 21:56:18 2012 +0300 + + Add sRGB blending demo program + + Simple sRGB color blender test can be used to determine if the sRGB processing + works as expected. It blends alpha ramps of purple and green together such that + at midpoint of image, 50 % blend of both is realized. At that point, sRGB-aware + processing yields a result close to #bbb rather than #888, which is the linear + light blending result. + + The demo also contains the sample computation for sRGB premultiplied alpha. + +commit 7460457f80b1482338318f0ddcdf5311659fae7b +Author: Antti S. Lankila <alankila@bel.fi> +Date: Sun Jul 29 21:46:58 2012 +0300 + + Add support for sRGB surfaces + + sRGB format is defined as a new format type, PIXMAN_TYPE_ARGB_SRGB. One form of + this type is provided, PIXMAN_a8r8g8b8_sRGB. Use of an sRGB format triggers + wide processing, and the pixel fetch/store functions handle the relevant + conversion between color spaces. Pixman itself is thought to compose in the + linearized sRGB color space. + + sRGB conversion is tabularized. For sRGB to linear, we are using only 256 + values because the current source format uses 8 bits per component precision. + For linear to sRGB, it turns out that only 4096 brightness levels are required + to generate all of the 256 sRGB color values, and therefore only 12 bits per + component are considered during store. As a special case, a no-op + sRGB->linear->sRGB conversion is constructed to be lossless by adjusting the + sRGB->linear conversion table where necessary. + +commit 1dcca0f7ae64e9a96f2feba85dd728c636744009 +Author: Antti S. Lankila <alankila@bel.fi> +Date: Sat Jul 28 14:02:42 2012 +0300 + + Remove unnecessary dst initialization + + The initialization work is already performed correctly in image_init(). + +commit 56321eff65832791252c7c324930d14c44d4d5f7 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jul 9 06:58:59 2012 -0400 + + Make pixman-mmx.c compile on x86-32 without optimization + + When not optimizing, write _mm_shuffle_pi16() as a statement + expression with inline assembly. That way we avoid + __builtin_ia32_pshufw(), which is only available when compiling with + -msse, while still allowing the non-optimizing gcc to understand that + the second argument is a compile time constant. + + Tested-by: Knut Petersen <knut_petersen@t-online.de> + +commit 0c81957e9b4f83944075167ae27a955bb253e267 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Jun 28 15:53:14 2012 -0400 + + Cleanups and simplifications in x86 CPU feature detection + + A new function pixman_cpuid() is added that runs the cpuid instruction + and returns the results. On GCC this function uses inline assembly; on + MSVC, the function calls the __cpuid intrinsic. + + There is also a new function called have_cpuid() which detects whether + cpuid is available. On x86-64 and MSVC, it simply returns TRUE; on + x86-32 bit, it checks whether the 22nd bit of eflags can be + modified. On MSVC this does have the consequence that pixman will no + longer work CPUS without cpuid (ie., older than 486 and some 486 + models). + + These two functions together makes it possible to write a generic + detect_cpu_features() in plain C. This function is then used in a new + have_feature() function that checks whether a specific set of feature + bits is available. + + Aside from the cleanups and simplifications, the main benefit from + this patch is that pixman now can do feature detection on x86-64, so + that newer instruction sets such as SSSE3 and SSE4.1 can be used. (And + apparently the assumption that x86-64 CPUs always have MMX and SSE2 is + no longer correct: Knight's Corner is x86-64, but doesn't have them). + + V2: Rename the constants in the getisax() code, as pointed out by Alan + Coopersmith. Also reinstate the result variable and initialize + features to 0. + + V3: Fixes for the fact that the upper 32 bits of a 64 bit register are + zeroed whenever the corresponding 32 bit register is written to. + + V4: Fixes for the fact that in 32 bit mode, when gcc is not optimizing + there were not enough registers available. The new code uses the "a", + "b", "c", and "d" constraints instead, and has two separate versions + for 32 and 64 bit modes. + +commit 4d641c3803d508ba1eb40e61257949422ae2b90d +Author: Sebastian Bauer <mail@sebastianbauer.info> +Date: Sun Jul 8 18:48:45 2012 -0400 + + Changed the style of two function headers + + Declare functions *_inverse() and *_contains_rectangle() in the same + way as the other functions are declared. This doesn't imply any semantic + changes. It's just a unification of coding styles. + +commit 86ad09b548b45a5a5074d9d83970d5e7e7f89d31 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Mon Jul 2 20:54:20 2012 +0200 + + MIPS: DSPr2: Added more bilinear fast paths (without mask) + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench -b + + Referent (before): + src_8888_8888 = L1: 8.18 L2: 7.79 M: 6.32 ( 33.51%) HT: 5.78 VT: 5.70 R: 5.61 RT: 3.79 ( 29Kops/s) + src_8888_0565 = L1: 6.90 L2: 7.14 M: 6.47 ( 25.75%) HT: 5.54 VT: 5.51 R: 5.46 RT: 3.53 ( 28Kops/s) + src_0565_x888 = L1: 3.76 L2: 3.71 M: 3.37 ( 13.41%) HT: 3.26 VT: 3.22 R: 3.20 RT: 2.58 ( 23Kops/s) + src_0565_0565 = L1: 3.59 L2: 3.56 M: 3.47 ( 9.19%) HT: 3.19 VT: 3.18 R: 3.16 RT: 2.46 ( 22Kops/s) + over_8888_8888 = L1: 5.99 L2: 5.66 M: 4.95 ( 26.28%) HT: 4.40 VT: 4.38 R: 4.31 RT: 3.02 ( 26Kops/s) + add_8888_8888 = L1: 6.84 L2: 6.39 M: 5.48 ( 29.09%) HT: 4.80 VT: 4.79 R: 4.70 RT: 3.20 ( 27Kops/s) + + Optimized: + src_8888_8888 = L1: 18.27 L2: 16.69 M: 12.87 ( 68.25%) HT: 11.80 VT: 11.61 R: 10.60 RT: 7.05 ( 41Kops/s) + src_8888_0565 = L1: 15.18 L2: 14.10 M: 11.75 ( 46.71%) HT: 10.64 VT: 10.50 R: 10.03 RT: 7.15 ( 41Kops/s) + src_0565_x888 = L1: 10.45 L2: 9.96 M: 9.23 ( 36.72%) HT: 8.39 VT: 8.29 R: 8.02 RT: 5.75 ( 37Kops/s) + src_0565_0565 = L1: 9.37 L2: 8.98 M: 8.50 ( 22.53%) HT: 7.71 VT: 7.66 R: 7.52 RT: 5.59 ( 37Kops/s) + over_8888_8888 = L1: 12.21 L2: 11.01 M: 8.56 ( 45.36%) HT: 7.71 VT: 7.64 R: 7.43 RT: 5.51 ( 36Kops/s) + add_8888_8888 = L1: 17.72 L2: 15.16 M: 10.78 ( 57.13%) HT: 9.46 VT: 9.30 R: 9.00 RT: 6.03 ( 38Kops/s) + +commit 707a8be11280c4d395e662e869d4a98d75bb5571 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Mon Jul 2 20:54:19 2012 +0200 + + MIPS: DSPr2: Added several bilinear fast paths with a8 mask + + Performance numbers before/after on MIPS-74kc @ 1GHz: + + lowlevel-blt-bench -b + + Referent (before): + + src_8888_8_8888 = L1: 6.37 L2: 6.08 M: 5.46 ( 32.57%) HT: 4.64 VT: 4.61 R: 4.52 RT: 2.85 ( 23Kops/s) + src_8888_8_0565 = L1: 5.89 L2: 5.66 M: 5.11 ( 23.71%) HT: 4.36 VT: 4.34 R: 4.26 RT: 2.71 ( 22Kops/s) + src_0565_8_x888 = L1: 3.32 L2: 3.27 M: 3.17 ( 14.71%) HT: 2.86 VT: 2.84 R: 2.81 RT: 2.07 ( 19Kops/s) + src_0565_8_0565 = L1: 3.19 L2: 3.15 M: 3.05 ( 10.11%) HT: 2.75 VT: 2.74 R: 2.71 RT: 2.00 ( 18Kops/s) + over_8888_8_8888 = L1: 4.99 L2: 4.71 M: 4.11 ( 27.22%) HT: 3.59 VT: 3.58 R: 3.50 RT: 2.36 ( 21Kops/s) + add_8888_8_8888 = L1: 5.60 L2: 5.26 M: 4.52 ( 29.95%) HT: 3.92 VT: 3.89 R: 3.80 RT: 2.49 ( 21Kops/s) + + Optimized: + + src_8888_8_8888 = L1: 13.19 L2: 12.13 M: 9.75 ( 58.22%) HT: 8.60 VT: 8.44 R: 7.90 RT: 5.06 ( 33Kops/s) + src_8888_8_0565 = L1: 11.64 L2: 10.81 M: 9.18 ( 42.63%) HT: 8.04 VT: 7.90 R: 7.57 RT: 5.02 ( 32Kops/s) + src_0565_8_x888 = L1: 8.34 L2: 7.95 M: 7.29 ( 33.85%) HT: 6.55 VT: 6.48 R: 6.25 RT: 4.35 ( 30Kops/s) + src_0565_8_0565 = L1: 7.71 L2: 7.35 M: 6.90 ( 22.90%) HT: 6.14 VT: 6.10 R: 5.94 RT: 4.07 ( 29Kops/s) + over_8888_8_8888 = L1: 9.73 L2: 8.99 M: 7.15 ( 47.41%) HT: 6.40 VT: 6.30 R: 6.11 RT: 4.28 ( 30Kops/s) + add_8888_8_8888 = L1: 13.01 L2: 11.72 M: 8.70 ( 57.68%) HT: 7.59 VT: 7.46 R: 7.20 RT: 4.74 ( 32Kops/s) + +commit 6aac8e85701be418e1ce13debc1bc8a30687f66b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Jun 27 22:11:29 2012 -0400 + + Simplify CPU detection on PPC. + + Get rid of the initialized and have_vmx static variables in + pixman-ppc.c There is no point to them since CPU detection only + happens once per process. + + On Linux, just read /proc/self/auxv instead of generating the filename + with getpid() and don't bother with the stack buffer. Instead just + read the aux entries one by one. + +commit 4b78d785373c1d02abe695267379674776b3da3d +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Jun 27 22:05:18 2012 -0400 + + Simplifications to ARM CPU detection + + Organize pixman-arm.c such that each operating system/compiler exports + a detect_cpu_features() function that returns a bitmask with the + various features that we are interested in. A new function + have_feature() then calls this function, caches the result, and return + whether the given feature is available. + + The result is that all the pixman_have_arm_<feature> functions become + redundant and can be deleted. + +commit 8b795a9c17aa25328b9c76b949d319bb578d5f1e +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Jun 27 14:14:54 2012 -0400 + + Simplify MIPS CPU detection + + There is no reason to have pixman_have_<feature> functions when all + they do is call pixman_have_mips_feature(). + + Instead rename pixman_have_mips_feature() to have_feature() and call + it directly from _pixman_mips_get_implementations(). Also on + non-Linux, just make have_feature() return FALSE. + +commit 16502dd3ae3bf1d49faf1de533bd58013e168e64 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Jun 27 23:04:24 2012 -0400 + + Move the remaining bits of pixman-cpu into pixman-implementation.c + +commit 5813bb96aec1c48636db621558534561fef67b68 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jun 26 17:26:34 2012 -0400 + + Move MIPS specific CPU detection to its own file, pixman-mips.c + +commit 4ac0a1d60fccf4f9a782747ce61fd15825eddb5a +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jun 26 17:30:22 2012 -0400 + + Move PowerPC specific CPU detection to its own file pixman-ppc.c + +commit 8590415f0e54520a176ff0fb53deb82be16873dd +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jun 26 17:02:24 2012 -0400 + + Move ARM specific CPU detection to a new file pixman-arm.c + + Similar to the x86 commit, this moves the ARM specific CPU detection + to its own file which exports a pixman_arm_get_implementations() + function that is supposed to be a noop on non-ARM. + +commit 39ac18570a70674897aa7085406d9a4f6069feb4 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jun 26 12:44:32 2012 -0400 + + Move x86 specific CPU detection to a new file pixman-x86.c + + Extract the x86 specific parts of pixman-cpu.c and put them in their + own file called pixman-x86.c which exports one function + pixman_x86_get_implementations() that creates the MMX and SSE2 + implementations. This file is supposed to be compiled on all + architectures, but pixman_x86_get_implementations() should be a noop + on non-x86. + +commit 1a3b7614a9808f8af15204d0751a6820bf67059c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jun 26 18:07:39 2012 -0400 + + pixman-cpu.c: Rename disabled to _pixman_disabled() and export it + +commit d4aa82fb9148862904bb7ca33655ce8d571643b0 +Author: Sebastian Bauer <mail@sebastianbauer.info> +Date: Tue Jul 3 05:55:14 2012 -0400 + + Qualify the static variables in pixman_f_transform_invert() with the const keyword. + + Their contents is not overwritten. + +commit f9c91ee2f27eaea68d8c3a130bf7d4bc0c860834 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Jul 1 16:59:53 2012 -0400 + + Use a compile-time constant for the "K" constraint in the MMX detection. + + When compiling with -O0, gcc doesn't understand that in + + signed char x = 0; + + ... + + asm ("...", + : "K" (x)); + + x is constant. Fix this by using an immediate constant instead of a + variable. + +commit cd7ecf548a9e8115226bf0fec174f3abc54becb5 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Jul 1 06:54:06 2012 -0400 + + In fast_composite_tiled_repeat() don't clone images with a palette + + In fast_composite_tiled_repeat() if the source image is less than a + certain constant width, a clone is created which is then + pre-repeated. However, the source image's palette, if it has one, is + not cloned, so for indexed images, the pre-repeating would crash. + + Fix this by not doing any pre-repeating for images with a palette set. + +commit 7b20ad39f778d765566d3f2c5f7c50964100efc1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Jul 1 06:53:18 2012 -0400 + + test: Make stress-test more likely to actually composite something + + stress-test current almost never composites anything because the clip + rectangles and transformations are such that either + _pixman_compute_composite_region32() or analyze_extent() will return + FALSE. + + Fix this by: + + - making log_rand() return smaller numbers so that the clip rectangles + are more likely to be within the destination image + + - adding rand_x() and rand_y() functions that pick positions within an + image and using them for positioning alpha maps and source/mask + positions. + + - making it less likely that clip regions are used in general + + These changes make the test take longer, so speed it up a little by + making most images smaller and by reducing the maximum convolution + filter from 17x19 to 3x4. + + With these changes, stress-test reveals a crash in iteration 0xd39 + where fast_composite_tiled_repeat() creates an indexed image without a + palette. + +commit 4cdf8e9f3aca1925aeca25debb9268877ba3cd3d +Author: Matt Turner <mattst88@gmail.com> +Date: Sun Jul 1 16:35:46 2012 -0400 + + sse2: add missing ABGR entires for bilinear src_8888_8888 + +commit ef99f9e97260cc55678385a6d691c195f57bd6b1 +Author: Matt Turner <mattst88@gmail.com> +Date: Mon May 21 05:56:58 2012 -0400 + + loongson: optimize _mm_set_pi* functions with shuffle instructions + +commit 9aa8e3a26071739d160496ef9f6126f296c500eb +Author: Matt Turner <mattst88@gmail.com> +Date: Wed Jun 27 13:00:36 2012 -0400 + + mmx: optimize bilinear function when using 7-bit precision + + Loongson: + image firefox-fishtank 1037.738 1040.218 0.19% 3/3 + image firefox-fishtank 1056.611 1057.581 0.20% 3/3 + + ARM/iwMMXt: + image firefox-fishtank 1487.282 1492.640 0.17% 3/3 + image firefox-fishtank 1363.913 1364.366 0.11% 3/3 + +commit 1ad6ae6ee8a350f6fe4f30ba928aacf44d04f86e +Author: Matt Turner <mattst88@gmail.com> +Date: Sun May 20 20:51:08 2012 -0400 + + mmx: add scaled bilinear over_8888_8_8888 + + Loongson: + image firefox-fishtank 1665.163 1670.370 0.17% 3/3 + image firefox-fishtank 1037.738 1040.218 0.19% 3/3 + + ARM/iwMMXt: + image firefox-fishtank 2042.723 2045.308 0.10% 3/3 + image firefox-fishtank 1487.282 1492.640 0.17% 3/3 + +commit c43de364cbcd195f7d1d6881a6109cbb3d6b73b8 +Author: Matt Turner <mattst88@gmail.com> +Date: Wed Jun 27 12:57:45 2012 -0400 + + mmx: add scaled bilinear over_8888_8888 + + Loongson: + image firefox-planet-gnome 157.012 158.087 0.30% 6/6 + image firefox-planet-gnome 156.617 157.109 0.15% 5/6 + + ARM/iwMMXt: + image firefox-planet-gnome 148.086 149.339 0.76% 6/6 + image firefox-planet-gnome 144.939 146.123 0.61% 6/6 + +commit 9209cd746b7a81d0536df6dadd6a0b0b983291cb +Author: Matt Turner <mattst88@gmail.com> +Date: Tue Jun 19 00:30:51 2012 -0400 + + mmx: add scaled bilinear src_8888_8888 + + Loongson: + image firefox-planet-gnome 170.025 170.229 0.09% 3/4 + image firefox-planet-gnome 157.012 158.087 0.30% 6/6 + + ARM/iwMMXt: + image firefox-planet-gnome 164.192 164.875 0.34% 3/4 + image firefox-planet-gnome 148.086 149.339 0.76% 6/6 + +commit 51f27d7364d66e47d882ee531b6655368159231a +Author: Matt Turner <mattst88@gmail.com> +Date: Thu Jun 28 12:17:16 2012 -0400 + + mmx: Use expand_alpha instead of mask/shift + +commit b0855f095aba8e0c98d1fd1863b278fc72a4dd2c +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Sun Jul 1 23:00:34 2012 +0300 + + Change default bilinear interpolation precision to 7 bits + + This improves performance for the current SSE2 code. Further + reduction to 4 bits may be considered later if it proves + to allow additional speedup. + +commit c430b1dba7bfea0031227dd4b976da3dd7c4ac02 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Tue Jun 26 01:47:18 2012 +0300 + + sse2: _mm_madd_epi16 for faster bilinear scaling with 7-bit precision + + Reducing interpolation precision allows the use of PMADDWD instruction. + This makes bilinear scaling much faster (on Intel Core i7): + + 8-bit: image firefox-fishtank 57.584 58.349 0.74% 3/3 + 7-bit: image firefox-fishtank 51.139 51.229 0.30% 3/3 + + 8-bit: src_8888_8888 = L1: 228.71 L2: 226.52 M:224.82 ( 14.95%) HT:183.22 VT:154.02 R:171.72 RT:109.36 + 7-bit: src_8888_8888 = L1: 320.45 L2: 317.43 M:314.38 ( 20.77%) HT:215.13 VT:177.35 R:204.46 RT:121.93 + +commit ccd31896bc2f1f323b3be9e8b1447cab892ee62d +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Tue Jun 26 01:06:10 2012 +0300 + + Bilinear interpolation precision is now configurable at compile time + + Macro BILINEAR_INTERPOLATION_BITS in pixman-private.h selects + the number of fractional bits used for bilinear interpolation. + + scaling-test and affine-test have checksums for 4-bit, 7-bit + and 8-bit configurations. + +commit ad9f1d020188fe90ae742041195baebdfbe6fe27 +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Jun 29 14:24:30 2012 -0400 + + Fix distcheck due to custom iwMMXt rules + +commit ff5d041b88c667141b891909acd3085c3ed54994 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Mon Jun 25 07:24:27 2012 +0300 + + sse2: faster bilinear scaling (use _mm_loadl_epi64) + + Using _mm_loadl_epi64() to load two pixels at once (pairs of top + and bottom pixels) is faster than loading each pixel separately + and combining them with _mm_set_epi32(). + + === cairo-perf-trace === + + before: image firefox-fishtank 66.912 66.931 0.13% 3/3 + after: image firefox-fishtank 57.584 58.349 0.74% 3/3 + + === lowlevel-blt-bench === + + before: src_8888_8888 = L1: 181.10 L2: 179.14 M:178.08 ( 11.02%) HT:153.22 VT:133.45 R:142.24 RT: 95.32 + after: src_8888_8888 = L1: 228.68 L2: 225.75 M:223.98 ( 14.23%) HT:185.32 VT:155.06 R:162.73 RT:102.52 + + This improvement was suggested by Matt Turner on irc. + +commit fc162bad561a516f648daf07e9d22d427fe60e74 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Mon Jun 25 07:11:59 2012 +0300 + + test: support nearest/bilinear scaling in lowlevel-blt-bench + + Scale factor is selected to be nearly 1x, so that the MPix/s results + can be directly compared with the results of non-scaled compositing + operations. + +commit 387e9bcddb90bd2c7d1dfb81c073196f9f81042d +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Sat Jun 23 04:08:28 2012 +0300 + + test: Fix for strict aliasing issue in 'get_random_seed' + + Gets rid of gcc warning when compiled with -fstrict-aliasing option in CFLAGS + +commit 4cbeb0aedccde5d2eb87daec08040a8bf161f6d7 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Wed Jun 20 17:13:33 2012 +0200 + + build: Fix compilation on win32 + + When compiling using the win32 build system, config.h is not + available nor needed. + + Fixes: + + pixman-glyph.c(26) : fatal error C1083: Cannot open include file: + 'config.h': No such file or directory + +commit 21077e1b83912b5e895b160bbbcd9b4664191506 +Author: Matt Turner <mattst88@gmail.com> +Date: Wed May 2 23:13:43 2012 -0400 + + sse2: add src_x888_0565 + + Port of 2ddd1c498b to SSE2. + + Uses the pmadd technique described in + http://software.intel.com/sites/landingpage/legacy/mmx/MMX_App_24-16_Bit_Conversion.pdf + + Works around lack of packusdw instruction by first sign extending the + values. + + fast: src_8888_0565 = L1: 681.40 L2: 689.20 M: 644.76 ( 25.51%) HT:404.42 VT:288.04 R:306.07 RT:150.80 (1619Kops/s) + mmx: src_8888_0565 = L1:2056.03 L2:1985.44 M:1574.91 ( 61.87%) HT:533.10 VT:376.35 R:416.10 RT:178.79 (1833Kops/s) + sse2: src_8888_0565 = L1:3793.42 L2:3653.44 M:1878.83 ( 73.94%) HT:535.03 VT:407.96 R:421.46 RT:163.31 (1727Kops/s) + + and for reference, using packusdw + sse4: src_8888_0565 = L1:4396.18 L2:4229.25 M:1904.04 ( 75.18%) HT:559.79 VT:427.96 R:440.06 RT:165.71 (1744Kops/s) + + Notice that MMX is faster in the RT case because it can operate on + 8-bytes instead of the current 16-bytes for SSE2. + +commit 7db07cb731e3689328d9ecbdafffe99d7d38388e +Author: Matt Turner <mattst88@gmail.com> +Date: Wed Jun 13 13:18:49 2012 -0400 + + sse2: enable over_n_0565 for b5g6r5 + + Same as b950bb12 for MMX. + +commit 45946c5fa1760ad185ae20e8797635b0a256ea08 +Author: Matt Turner <mattst88@gmail.com> +Date: Wed Jun 13 16:37:48 2012 -0400 + + .gitignore: add test/glyph-test + +commit eadb442b5c825679016de7e7acb837e58f92bfc4 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jun 12 22:04:29 2012 -0400 + + test: Add missing break in stress-test.c + + Found by coverity: + + https://bugzilla.redhat.com/show_bug.cgi?id=756069 + +commit 492dac7593075e622cfeddc73298df29d50b76bc +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Wed Jun 6 23:54:20 2012 +0300 + + test: fix bisecting issue in fuzzer-find-diff.pl + + Before bisecting to find the exact test which has failed, we + first need to make sure that the first test is fine (the first + test is "good" and the whole range is "bad"). Otherwise + test 2 gets incorrectly flagged as problematic in the case + if we already got a failure on test 1 right from the start. + +commit 40a0d10eeaedb879bbffe41f0537e8468c563df7 +Author: Siarhei Siamashka <siarhei.siamashka@gmail.com> +Date: Wed Jun 6 22:21:32 2012 +0300 + + test: OpenMP 2.5 requires signed loop iteration variables + + Unsigned loop variables are only supported since version 3.0 + of OpenMP specification. Changing loop variables to use int32_t + type fixes pixman build problems with path64 compiler. + +commit 619a60d201bfdfe2f15fca75f6e686fc7c275b5b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jun 11 19:13:45 2012 -0400 + + test: Make glyph test pass on big endian + + The destination buffer was initialized with random uint32_t values, so + it started out different on big endian vs. little endian. Fix that by + initializing the buffer with random uint8_t values instead. + +commit f80e7ad3cbf46218bb3b4247e7b9e5d448670ad8 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Jan 8 13:21:11 2012 -0500 + + bits-image: Turn all the fetchers into iterator getters + + Instead of caching these fetchers in the image structure, and then + have the iterator getter call them from there, simply change them to + be iterator getters themselves. + + This avoids an extra indirect function call and lets us get rid of the + get_scanline_32/64 fields in pixman_image_t. + +commit fd175f9d02f36b1e91973e4264519228547f5dc7 +Author: Antti S. Lankila <alankila@bel.fi> +Date: Sun Jun 10 19:22:56 2012 +0300 + + Faster unorm_to_unorm for wide processing. + + Optimizing the unorm_to_unorm functions allows a speedup from: + + src_8888_2x10 = L1: 62.08 L2: 60.73 M: 59.61 ( 4.30%) HT: 46.81 + VT: 42.17 R: 43.18 RT: 26.01 (325Kops/s) + + to: + + src_8888_2x10 = L1: 76.94 L2: 78.43 M: 75.87 ( 5.59%) HT: 56.73 + VT: 52.39 R: 53.00 RT: 29.29 (363Kops/s) + + on a i7 Q720 -based laptop. + + The key of the patch is the observation that unorm_to_unorm's work can + more easily be done with a simple multiplication and shift, when the + function is applied repeatedly and the parameters are not compile-time + constants. For instance, converting from 0xfe to 0xfefe (expanding + from 8 bits to 16 bits) can be done by calculating + + c = c * 0x101 + + However, sometimes the result is not a neat replication of all the + bits. For instance, going from 10 bits to 16 bits can be done by + calculating + + c = c * 0x401UL >> 4 + + where the intermediate result is 20 bit wide repetition of the 10-bit + pattern followed by shifting off the unnecessary lowest bits. + + The patch has the algorithm to calculate the factor and the shift, and + converts the code to use it. + +commit 367b78fd5c57ee05298eb11370b68d01613961e5 +Author: Matt Turner <mattst88@gmail.com> +Date: Wed May 30 16:44:04 2012 -0400 + + configure.ac: add iwmmxt2 configure flag + + The flag allows the user to select whether pixman-mmx.c is compiled with + -march=iwmmxt or -march=iwmmxt2. + + gcc has scheduling support for the Marvell CPU in the XO 1.75 when + building with -march=iwmmxt2. + +commit 31a6563ec5167d6b15fdb8c158a71ab4f97015ab +Author: Matt Turner <mattst88@gmail.com> +Date: Wed May 30 16:26:32 2012 -0400 + + autotools: use custom build rule to build iwMMXt code + + gcc has no sane way of enabling iwmmxt code generation, like -msse for + SSE, so you have to use -march=iwmmxt{,2}. User CFLAGS are placed after + -march=iwmmxt and override the march value, so we have to use a custom + build rule to order the CFLAGS such that pixman-mmx.c will be built with + the necessary CFLAGS. + +commit 706bf8264cb48aac36e36ff5e23f0ad8a47ff73c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue May 3 07:25:50 2011 -0400 + + Speed up _pixman_image_get_solid() in common cases + + Make _pixman_image_get_solid() faster by special-casing the common + cases where the image is SOLID or a repeating a8r8g8b8 image. + + This optimization together with the previous one results in a small + but reproducable performance improvement on the xfce4-terminal-a1 + cairo trace: + + [ # ] backend test min(s) median(s) stddev. count + Before: + [ 0] image xfce4-terminal-a1 1.221 1.239 1.21% 100/100 + After: + [ 0] image xfce4-terminal-a1 1.170 1.199 1.26% 100/100 + + Either optimization by itself is difficult to separate from noise. + +commit 934c9d8546b71ddea91ac16b0928101903e2608e +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon May 28 02:36:22 2012 -0400 + + Speed up _pixman_composite_glyphs_no_mask() + + Bypass much of the overhead of pixman_image_composite32() by only + computing the composite region once instead of once per glyph, and by + only looking up the composite function whenever the glyph format or + flags change. + + As part of this, the pixman_compute_composite_region32() was renamed + to _pixman_compute_composite_region32() and exported in + pixman-private.h. + + I couldn't find a trace that would reliably demonstrate that this is + actually an improvement by itself (since _pixman_composite_glyphs_no_mask() + is called so rarely), but together with the following optimization for + solid sources, there is a small but reliable improvement to the + xfce4-a1-terminal cairo trace. + +commit a162189dc0fa9978a3b5b6dd24f9bc12995805ed +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon May 28 01:22:26 2012 -0400 + + Speed up pixman_composite_glyphs() + + When adding glyphs to the mask, bypass most of the overhead of + pixman_image_composite32() by: + + - Only looking up the composite function when the glyph changes either + format or flags. + + - Only using a white source when the glyph format is different from + the mask format. + + - Simply intersecting the glyph rectangle with the destination + rectangle instead of doing the full _pixman_composite_region32(). + + Performance results: + + [ # ] backend test min(s) median(s) stddev. count + Before: + [ 0] image firefox-talos-gfx 6.570 6.577 0.13% 8/10 + After: + [ 0] image firefox-talos-gfx 4.272 4.289 0.28% 10/10 + + V2: Changes to deal with white sources + +commit d9710442b4b2294e1ccd1594c54ca8a4feda2ac5 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun May 27 18:23:20 2012 -0400 + + test: Add glyph-test + + This test tests the new glyph cache and compositing API. Much of this + test is intending to making sure that clipping and alpha map handling + survive any optimizations that may be added to the glyph compositing. + + V2: Evaluating lcg_rand_n() multiple times in an argument list lead + to undefined behavior. + +commit dc9237472789b0b45393f6f7eeafa057a86280c4 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon May 28 16:14:12 2012 -0400 + + Add support for alpha maps to compute_crc32_for_image(). + + When a destination image I has an alpha map A, the following rules apply: + + - If I has an alpha channel itself, the content of that channel is + undefined + + - If A has RGB channels, the content of those channels is + undefined. + + Hence in order to compute the CRC32 for such an image, we have to mask + off the alpha channel of the image, and the RGB channels of the alpha + map. + + V2: Shifting by 32 is undefined in C + +commit 43e029d525c191a771e5b964fccff09b6d341bb2 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun May 27 13:38:14 2012 -0400 + + Move CRC32 computation from blitters-test.c into utils.c + + This way it can be used in other tests. + +commit fce31a5ef8c915ee6b7aee4c6b57bee245185c1f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue May 29 04:14:38 2012 -0400 + + Add pixman_glyph_cache_t API + + This new API allows entire glyph strings to be composited in one go + which reduces overhead compared to multiple calls to + pixman_image_composite32(). + + The pixman_glyph_cache_t is a hash table that maps two keys (a "font" + and a "glyph" key, but they are just keys; there is no distinction + between them as far as pixman is concerned) to a glyph. Glyphs in the + cache can be composited through two new entry points + pixman_glyph_cache_composite_glyphs() and + pixman_glyph_cache_composite_glyphs_no_mask(). + + A glyph cache may only be inserted into when it is "frozen", which is + achieved by calling pixman_glyph_cache_freeze(). When + pixman_glyph_cache_thaw() is later called, if the cache has become too + crowded, some glyphs (currently the least-recently-used) will + automatically be evicted. This means that a user must ensure that all + the required glyphs are present in the cache before compositing a + string. The intended way to use the cache is like this: + + pixman_glyph_t glyphs[MAX_GLYPHS]; + + pixman_glyph_cache_freeze (cache); + + for (i = 0; i < n_glyphs; ++i) + { + const void *g; + + if (!(g = pixman_glyph_cache_lookup (cache, font_key, glyph_key))) + { + img = <rasterize glyph as a pixman_image_t>; + + g = pixman_glyph_cache_insert (cache, font_key, glyph_key, + glyph_origin_x, glyph_origin_y, + img); + + if (!g) + { + /* Clean up out-of-memory condition */ + goto oom; + } + + glyphs[i].pos_x = glyph_x_pos; + glyphs[i].pos_y = glyph_y_pos; + glyphs[i].glyph = g; + } + } + + pixman_composite_glyphs (op, src, dest, ..., cache, n_glyphs, glyphs); + + pixman_glyph_cache_thaw (cache); + + V2: + - Move glyphs to front of the MRU list when they are used. Pointed + out by Behdad Esfahbod. + - Composite glyphs with (white IN glyph) ADD mask in order to support + mixed a8 and a8r8g8b8 glyphs. Also pointed out by Behdad. + - Add pixman_glyph_get_mask_format + +commit a3ae88b71b9d2dfc53303963157ecce4b29f0486 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Apr 27 12:07:16 2011 -0400 + + Add doubly linked lists + + This commit adds some new inline functions to maintain a doubly linked + list. + + The way to use them is to embed a pixman_link_t into the structures + that should be linked, and use a pixman_list_t as the head of the + list. + + The new functions are + + pixman_list_init (pixman_list_t *list); + pixman_list_prepend (pixman_list_t *list, pixman_link_t *link); + pixman_list_move_to_front (pixman_list_t *list, pixman_link_t *link); + + There are also a new macro: + + CONTAINER_OF(type, member, data); + + that can be used to get from a pointer to a member to the containing + structure. + + V2: Use the C89 macro offsetof() instead of rolling our own - + suggested by Alan Coopersmith. + +commit c2230fe2aff709de21cc2ee3fa27c3f7578e7f9d +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu May 24 03:10:34 2012 -0400 + + Make use of image flags in mmx and sse2 iterators + + Now that we have the full image flags available, the SSE2 and MMX + iterators can simply check against SAMPLES_COVER_CLIP_NEAREST (which + is computed in pixman_image_composite32()) instead of comparing all + the x/y/width/height parameters. + +commit c1065a9cb4ab1f5847b2373847c65d8ea68975f1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu May 24 03:00:38 2012 -0400 + + Pass the full image flags to iterators + + When pixman_image_composite32() is called some flags are computed that + indicate various things about the composite operation that can't be + deduced from the image flags themselves. These additional flags are + not currently available to iterators. All they can do is read the + image flags in image->common.flags. + + Fix that by passing the info->{src, mask, dest}_flags on to the + iterator initialization and store the flags in the iter struct as + "image_flags". At the same time rename the *iterator* flags variable + to "iter_flags" to avoid confusion. + +commit da6193b1fcc1dfab27f4c36917864f2f2c41cf3e +Author: Matt Turner <mattst88@gmail.com> +Date: Sun May 27 13:01:57 2012 -0400 + + mmx: add missing _mm_empty calls + + Fixes spurious test failures on x86-32. + +commit 62c4bdc94f82d1e4c5dc0e58b5903382d74f3883 +Author: Matt Turner <mattst88@gmail.com> +Date: Fri May 18 01:37:07 2012 -0400 + + mmx: add over_reverse_n_8888 + + Loongson: + over_reverse_n_8888 = L1: 16.04 L2: 15.35 M: 10.20 ( 27.96%) HT: 10.95 VT: 10.45 R: 9.18 RT: 6.99 ( 76Kops/s) + over_reverse_n_8888 = L1: 27.40 L2: 26.67 M: 16.97 ( 45.78%) HT: 16.66 VT: 15.38 R: 14.15 RT: 9.44 ( 97Kops/s) + + image poppler 34.106 35.500 1.48% 6/6 + image poppler 29.598 30.835 1.70% 6/6 + + ARM/iwMMXt: + over_reverse_n_8888 = L1: 15.63 L2: 14.33 M: 10.83 ( 27.55%) HT: 9.78 VT: 9.91 R: 9.49 RT: 6.96 ( 69Kops/s) + over_reverse_n_8888 = L1: 22.79 L2: 19.40 M: 13.76 ( 34.19%) HT: 11.66 VT: 11.86 R: 11.17 RT: 7.85 ( 75Kops/s) + + image poppler 38.040 38.606 1.10% 6/6 + image poppler 31.686 32.278 0.80% 5/6 + +commit 17acc7a4c707db4804b6bf47db30883745049fdb +Author: Matt Turner <mattst88@gmail.com> +Date: Thu May 17 23:27:59 2012 -0400 + + mmx: add add_0565_0565 + + Loongson: + add_0565_0565 = L1: 15.37 L2: 14.91 M: 11.83 ( 16.06%) HT: 10.53 VT: 10.15 R: 9.74 RT: 6.19 ( 68Kops/s) + add_0565_0565 = L1: 45.06 L2: 46.71 M: 27.45 ( 38.00%) HT: 23.76 VT: 22.84 R: 18.96 RT: 9.79 ( 104Kops/s) + + ARM/iwMMXt: + add_0565_0565 = L1: 12.87 L2: 11.58 M: 10.11 ( 12.50%) HT: 9.06 VT: 8.66 R: 7.70 RT: 5.62 ( 58Kops/s) + add_0565_0565 = L1: 31.14 L2: 28.87 M: 22.46 ( 28.60%) HT: 18.61 VT: 17.04 R: 15.21 RT: 9.35 ( 90Kops/s) + +commit d551dc049498d17ab879fd67d47508cafaaede06 +Author: Matt Turner <mattst88@gmail.com> +Date: Thu May 17 23:29:51 2012 -0400 + + fast: add add_0565_0565 function + + I'll need this code for header and tail alignment loops in MMX, so I + might as well implement a fast path here. + +commit f8dc0e98343c7936a37a3624721c5782e7ac309c +Author: Matt Turner <mattst88@gmail.com> +Date: Thu May 17 13:22:18 2012 -0400 + + mmx: implement expand_4x565 in terms of expand_4xpacked565 + + Loongson: + over_n_0565 = L1: 38.57 L2: 38.88 M: 30.01 ( 20.97%) HT: 23.60 VT: 23.88 R: 21.95 RT: 11.65 ( 113Kops/s) + over_n_0565 = L1: 56.28 L2: 55.90 M: 34.20 ( 23.82%) HT: 25.66 VT: 26.60 R: 23.78 RT: 11.80 ( 115Kops/s) + + over_8888_0565 = L1: 35.89 L2: 36.11 M: 21.56 ( 45.47%) HT: 18.33 VT: 17.90 R: 16.27 RT: 9.07 ( 98Kops/s) + over_8888_0565 = L1: 40.91 L2: 41.06 M: 23.13 ( 48.46%) HT: 19.24 VT: 18.71 R: 16.82 RT: 9.18 ( 99Kops/s) + + over_n_8_0565 = L1: 28.92 L2: 29.12 M: 21.42 ( 30.00%) HT: 18.37 VT: 17.75 R: 16.15 RT: 8.79 ( 91Kops/s) + over_n_8_0565 = L1: 32.32 L2: 32.13 M: 22.44 ( 31.27%) HT: 19.15 VT: 18.66 R: 16.62 RT: 8.86 ( 92Kops/s) + + over_n_8888_0565_ca = L1: 29.33 L2: 29.22 M: 18.99 ( 66.69%) HT: 16.69 VT: 16.22 R: 14.63 RT: 8.42 ( 88Kops/s) + over_n_8888_0565_ca = L1: 34.97 L2: 34.14 M: 20.32 ( 71.73%) HT: 17.67 VT: 17.19 R: 15.23 RT: 8.50 ( 89Kops/s) + + ARM/iwMMXt: + over_n_0565 = L1: 29.70 L2: 30.53 M: 24.47 ( 14.84%) HT: 22.28 VT: 21.72 R: 21.13 RT: 12.58 ( 105Kops/s) + over_n_0565 = L1: 41.42 L2: 40.00 M: 30.95 ( 19.13%) HT: 27.06 VT: 27.28 R: 23.43 RT: 14.44 ( 114Kops/s) + + over_8888_0565 = L1: 12.73 L2: 11.53 M: 9.07 ( 16.47%) HT: 9.00 VT: 9.25 R: 8.44 RT: 7.27 ( 76Kops/s) + over_8888_0565 = L1: 23.72 L2: 21.76 M: 15.89 ( 29.51%) HT: 14.36 VT: 14.05 R: 12.44 RT: 8.94 ( 86Kops/s) + + over_n_8_0565 = L1: 6.80 L2: 7.15 M: 6.37 ( 7.90%) HT: 6.58 VT: 6.24 R: 6.49 RT: 5.94 ( 59Kops/s) + over_n_8_0565 = L1: 12.06 L2: 11.02 M: 10.16 ( 13.43%) HT: 9.57 VT: 8.49 R: 9.10 RT: 6.86 ( 69Kops/s) + + over_n_8888_0565_ca = L1: 7.62 L2: 7.01 M: 6.27 ( 20.52%) HT: 6.00 VT: 6.07 R: 5.68 RT: 5.53 ( 57Kops/s) + over_n_8888_0565_ca = L1: 13.54 L2: 11.96 M: 9.76 ( 30.66%) HT: 9.72 VT: 8.45 R: 9.37 RT: 6.85 ( 67Kops/s) + +commit 51681a052f9e1d0970a79187974da77d9bf69450 +Author: Matt Turner <mattst88@gmail.com> +Date: Sun May 13 20:39:05 2012 -0400 + + mmx: add and use expand_4xpacked565 function + + Loongson: + add_0565_0565 = L1: 14.39 L2: 13.98 M: 11.28 ( 15.22%) HT: 10.11 VT: 9.74 R: 9.39 RT: 6.05 ( 67Kops/s) + add_0565_0565 = L1: 15.37 L2: 14.91 M: 11.83 ( 16.06%) HT: 10.53 VT: 10.15 R: 9.74 RT: 6.19 ( 68Kops/s) + + ARM/iwMMXt: + add_0565_0565 = L1: 11.12 L2: 10.40 M: 8.82 ( 10.65%) HT: 7.98 VT: 7.41 R: 7.57 RT: 5.21 ( 54Kops/s) + add_0565_0565 = L1: 12.87 L2: 11.58 M: 10.11 ( 12.50%) HT: 9.06 VT: 8.66 R: 7.70 RT: 5.62 ( 58Kops/s) + +commit 6491c70e3a2a2e10e99c84024895f346f2300f63 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat May 26 16:34:13 2012 -0400 + + Post-release version bump to 0.27.1 + +commit b1a401e6c9f204d33a55eee41897d66ab81d6117 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat May 26 16:17:14 2012 -0400 + + Pre-release version bump to 0.26.0 + +commit f71e3dba979fcfc1cf87d01137e1e32451a173b1 +Author: Ingmar Runge <ingmar@irsoft.de> +Date: Sat May 19 15:45:18 2012 +0200 + + Fix MSVC compilation + + Only up to three SSE intrinsics supported in function declaration. + +commit 1e59e18d73a6e45729a99fe6ccc74d61631ff5f0 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu May 24 15:30:41 2012 -0400 + + test: Composite with solid images instead of using pixman_image_fill_* + + There is a couple of places where the test suite uses the + pixman_image_fill_* functions to initialize images. These functions + can fail, and will do so if the "fast" implementation is disabled. + + So to make sure the test suite passes even using + PIXMAN_DISABLE="fast", use pixman_image_composite32() with a solid + image instead of pixman_image_fill_*. + +commit 30816e3068bccf7c78c78f916b54971d24873bdc +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Thu May 3 00:03:43 2012 +0200 + + MIPS: DSPr2: Added bilinear over_8888_8_8888 fast path. + + Performance numbers before/after on MIPS-74kc @ 1GHz + + Referent (before): + + cairo-perf-trace: + [ # ] backend test min(s) median(s) stddev. count + [ # ] image: pixman 0.25.3 + [ 0] image firefox-fishtank 2289.180 2290.567 0.05% 5/6 + + Optimized: + + cairo-perf-trace: + [ # ] backend test min(s) median(s) stddev. count + [ # ] image: pixman 0.25.3 + [ 0] image firefox-fishtank 1700.925 1708.314 0.22% 5/6 + +commit aea0522f6f1a51b97a673cfe4dc157e501008580 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Wed May 23 18:53:43 2012 +0200 + + MIPS: DSPr2: Fix bug in over_n_8888_8888_ca/over_n_8888_0565_ca routines + + In main loop (unrolled by factor 2), instead of negating multiplied + mask values by srca, values of srca was negated, and passed as alpha + argument for + + UN8x4_MUL_UN8x4_ADD_UN8x4 macro. + + Instead of: + + ma = ~ma; + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); + + Code was doing this: + + ma = ~srca; + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); + + Key is in substituting registers s0/s1 (containing srca value), with + t0/t1 containing mask values multiplied by srca. Register usage is + also improved (less registers are saved on stack, for + over_n_8888_8888_ca routine). + + The bug was introduced in commit d2ee5631 and revealed by composite test. + +commit 74bf5dc2f99245e7b486203b6ba074fb629eb5f3 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun May 20 13:09:16 2012 -0400 + + demos: Add parrot.jpg to EXTRA_DIST + + Pointed out by Cyril Brulebois. + +commit 55698584be93706794b181cbf595846da578e103 +Author: Matt Turner <mattst88@gmail.com> +Date: Tue May 15 16:32:08 2012 -0400 + + configure.ac: Fail the ARM/iwMMXt test if not compiling with -march=iwmmxt + + If not compiling with -march=iwmmxt, the configure test will still pass, + thinking that the __builtin_arm_* intrinsic is a function instead of + generating a single instruction. Since no linking is done, the configure + test doesn't catch this, and we get linking errors in the build. + +commit 3682b615154338f9754e7c1e046b42bb8ad584fa +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue May 15 13:38:44 2012 -0400 + + Post-release version bump to 0.25.7 + +commit 1e1a00e964a1d8ef43d6d75c1c3a0b5d518d1979 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue May 15 13:20:09 2012 -0400 + + Pre-release version bump to 0.25.6 + + Note that 0.25.4 was a botched release that doesn't have a tag and + doesn't correspond to any commit ID. It was however uploaded and + announced, so I'll just use the 0.25.6 version number. + +commit b2c16aaadfae64d2573abb537bfedd92c13b8d06 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue May 15 13:19:19 2012 -0400 + + demos/Makefile.am: Add parrot.c to EXTRA_DIST + + To get 'make distcheck' to pass. + +commit 50d3088d7882e1054a35e917becb7752662da6f0 +Author: Matt Turner <mattst88@gmail.com> +Date: Fri May 11 21:59:13 2012 -0400 + + configure.ac: Rename loongson -> loongson-mmi + + Make it match with the other fast paths, and the PIXMAN_DISABLE value is + already loongson-mmi. + +commit a0a40cb822bec52494c64e6750be50b734dc29df +Author: Matt Turner <mattst88@gmail.com> +Date: Fri May 11 21:49:42 2012 -0400 + + configure.ac: Fix loongson-mmi out-of-tree builds + + When building out-of-tree, gcc wasn't able to find loongson-mmintrin.h + to compile the test program. Add -I$srcdir to CFLAGS to point gcc to it. + +commit 618a08e6aa03b38e8dc71ac610f7fdd55e8a8558 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Thu May 3 00:03:42 2012 +0200 + + MIPS: DSPr2: Added over_n_8_8888 and over_n_8_0565 fast paths. + + Performance numbers before/after on MIPS-74kc @ 1GHz + + Referent (before): + + lowlevel-blt-bench: + over_n_8_8888 = L1: 10.40 L2: 9.79 M: 8.47 ( 33.62%) HT: 7.64 VT: 7.59 R: 7.48 RT: 5.30 ( 40Kops/s) + over_n_8_0565 = L1: 7.40 L2: 7.23 M: 6.78 ( 17.94%) HT: 6.23 VT: 6.17 R: 6.14 RT: 4.62 ( 37Kops/s) + + Optimized: + + lowlevel-blt-bench: + over_n_8_8888 = L1: 27.25 L2: 26.24 M: 18.15 ( 72.12%) HT: 14.52 VT: 14.31 R: 13.83 RT: 7.57 ( 48Kops/s) + over_n_8_0565 = L1: 18.91 L2: 17.59 M: 15.06 ( 39.90%) HT: 12.18 VT: 11.98 R: 11.83 RT: 6.80 ( 46Kops/s) + +commit 7d4beedc612a32b73d7673bbf6447de0f3fca298 +Author: Matt Turner <mattst88@gmail.com> +Date: Wed May 9 19:20:55 2012 -0400 + + mmx: add and use pack_4x565 function + + The pack_4x565 makes use of the pack_4xpacked565 function which uses pmadd. + + Some of the speed up is probably attributable to removing the artificial + serialization imposed by the + vdest = pack_565 (..., vdest, 0); + vdest = pack_565 (..., vdest, 1); + ... + pattern. + + Loongson: + over_n_0565 = L1: 16.44 L2: 16.42 M: 13.83 ( 9.85%) HT: 12.83 VT: 12.61 R: 12.34 RT: 8.90 ( 93Kops/s) + over_n_0565 = L1: 42.48 L2: 42.53 M: 29.83 ( 21.20%) HT: 23.39 VT: 23.72 R: 21.80 RT: 11.60 ( 113Kops/s) + + over_8888_0565 = L1: 15.61 L2: 15.42 M: 12.11 ( 25.79%) HT: 11.07 VT: 10.70 R: 10.37 RT: 7.25 ( 82Kops/s) + over_8888_0565 = L1: 35.01 L2: 35.20 M: 21.42 ( 45.57%) HT: 18.12 VT: 17.61 R: 16.09 RT: 9.01 ( 97Kops/s) + + over_n_8_0565 = L1: 15.17 L2: 14.94 M: 12.57 ( 17.86%) HT: 11.96 VT: 11.52 R: 10.79 RT: 7.31 ( 79Kops/s) + over_n_8_0565 = L1: 29.83 L2: 29.79 M: 21.85 ( 30.94%) HT: 18.82 VT: 18.25 R: 16.15 RT: 8.72 ( 91Kops/s) + + over_n_8888_0565_ca = L1: 15.25 L2: 15.02 M: 11.64 ( 41.39%) HT: 11.08 VT: 10.72 R: 10.02 RT: 7.00 ( 77Kops/s) + over_n_8888_0565_ca = L1: 30.12 L2: 29.99 M: 19.47 ( 68.99%) HT: 17.05 VT: 16.55 R: 14.67 RT: 8.38 ( 88Kops/s) + + ARM/iwMMXt: + over_n_0565 = L1: 19.29 L2: 19.88 M: 17.38 ( 10.54%) HT: 15.53 VT: 16.11 R: 13.69 RT: 11.00 ( 96Kops/s) + over_n_0565 = L1: 36.02 L2: 34.85 M: 28.04 ( 16.97%) HT: 22.12 VT: 24.21 R: 22.36 RT: 12.22 ( 103Kops/s) + + over_8888_0565 = L1: 18.38 L2: 16.59 M: 12.34 ( 22.29%) HT: 11.67 VT: 11.71 R: 11.02 RT: 6.89 ( 72Kops/s) + over_8888_0565 = L1: 24.96 L2: 22.17 M: 15.11 ( 26.81%) HT: 14.14 VT: 13.71 R: 13.18 RT: 8.13 ( 78Kops/s) + + over_n_8_0565 = L1: 14.65 L2: 12.44 M: 11.56 ( 14.50%) HT: 10.93 VT: 10.39 R: 10.06 RT: 7.05 ( 70Kops/s) + over_n_8_0565 = L1: 18.37 L2: 14.98 M: 13.97 ( 16.51%) HT: 12.67 VT: 10.35 R: 11.80 RT: 8.14 ( 74Kops/s) + + over_n_8888_0565_ca = L1: 14.27 L2: 12.93 M: 10.52 ( 33.23%) HT: 9.70 VT: 9.90 R: 9.31 RT: 6.34 ( 65Kops/s) + over_n_8888_0565_ca = L1: 19.69 L2: 17.58 M: 13.40 ( 42.35%) HT: 11.75 VT: 11.33 R: 11.17 RT: 7.49 ( 73Kops/s) + +commit 2beabd9fed76de0023eb36b0c938b8803aa8d129 +Author: Matt Turner <mattst88@gmail.com> +Date: Thu May 10 16:15:34 2012 -0400 + + configure.ac: make -march=loongson2f come before CFLAGS + + Otherwise we'd have -march=loongson2f being overridden by automake's + CFLAGS ordering which causes build failures when -march=<not loongson2f> + is specified by the user. + +commit dadb9a318b8ca10c65e31e7278f4335a6968d246 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue May 8 10:05:18 2012 -0400 + + Add Makefile.win32 and Makefile.win32.common to EXTRA_DIST + + https://bugs.freedesktop.org/show_bug.cgi?id=46905 + +commit 3c57ec471e1aacc863747b82bbe0a84c6d776ab7 +Author: Matt Turner <mattst88@gmail.com> +Date: Wed May 9 22:50:50 2012 -0400 + + .gitignore: add demos/checkerboard and demos/quad2quad + +commit 2d431b53d3cdbf1997e2d3b8e17408c12220c3a1 +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Apr 27 14:12:56 2012 -0400 + + mmx: Use wpackhus in src_x888_0565 on iwMMXt + + iwMMXt which has an unsigned saturation pack instruction, while MMX/EXT + and Loongson don't. + + ARM/iwMMXt: + src_8888_0565 = L1: 110.38 L2: 82.33 M: 40.92 ( 73.22%) HT: 35.63 VT: 32.22 R: 30.07 RT: 18.40 ( 132Kops/s) + src_8888_0565 = L1: 117.91 L2: 83.05 M: 41.52 ( 75.58%) HT: 37.63 VT: 35.40 R: 29.37 RT: 19.39 ( 134Kops/s) + +commit 2ddd1c498b723e8e48a38eef01d5befba30b5259 +Author: Matt Turner <mattst88@gmail.com> +Date: Thu Apr 19 17:33:27 2012 -0400 + + mmx: add src_8888_0565 + + Uses the pmadd technique described in + http://software.intel.com/sites/landingpage/legacy/mmx/MMX_App_24-16_Bit_Conversion.pdf + + The technique uses the packssdw instruction which uses signed + saturatation. This works in their example because they pack 888 to 555 + leaving the high bit as zero. For packing to 565, it is unsuitable, so + we replace it with an or+shuffle. + + Loongson: + src_8888_0565 = L1: 106.13 L2: 83.57 M: 33.46 ( 68.90%) HT: 30.29 VT: 27.67 R: 26.11 RT: 15.06 ( 135Kops/s) + src_8888_0565 = L1: 122.10 L2: 117.53 M: 37.97 ( 78.58%) HT: 33.14 VT: 30.09 R: 29.01 RT: 15.76 ( 139Kops/s) + + ARM/iwMMXt: + src_8888_0565 = L1: 67.88 L2: 56.61 M: 31.20 ( 56.74%) HT: 29.22 VT: 27.01 R: 25.39 RT: 19.29 ( 130Kops/s) + src_8888_0565 = L1: 110.38 L2: 82.33 M: 40.92 ( 73.22%) HT: 35.63 VT: 32.22 R: 30.07 RT: 18.40 ( 132Kops/s) + +commit 3e8fe65a0893fcd82bdea205de49f53be32bb074 +Author: Matt Turner <mattst88@gmail.com> +Date: Wed Apr 18 16:24:28 2012 -0400 + + mmx: add x8f8g8b8 fetcher + + Loongson: + add_x888_x888 = L1: 29.36 L2: 27.81 M: 14.05 ( 38.74%) HT: 12.45 VT: 11.78 R: 11.52 RT: 7.23 ( 75Kops/s) + add_x888_x888 = L1: 36.06 L2: 34.55 M: 14.81 ( 41.03%) HT: 14.01 VT: 13.41 R: 13.06 RT: 9.06 ( 90Kops/s) + + src_x888_8_x888 = L1: 21.92 L2: 20.15 M: 13.35 ( 41.42%) HT: 11.70 VT: 10.95 R: 10.53 RT: 6.18 ( 65Kops/s) + src_x888_8_x888 = L1: 25.43 L2: 23.51 M: 14.12 ( 44.00%) HT: 13.14 VT: 12.50 R: 11.86 RT: 7.49 ( 76Kops/s) + + over_x888_8_0565 = L1: 10.64 L2: 10.17 M: 7.74 ( 21.35%) HT: 6.83 VT: 6.55 R: 6.34 RT: 4.03 ( 46Kops/s) + over_x888_8_0565 = L1: 11.41 L2: 10.97 M: 8.07 ( 22.36%) HT: 7.42 VT: 7.18 R: 6.92 RT: 4.62 ( 52Kops/s) + + ARM/iwMMXt: + add_x888_x888 = L1: 22.10 L2: 18.93 M: 13.48 ( 32.29%) HT: 11.32 VT: 10.64 R: 10.36 RT: 6.51 ( 61Kops/s) + add_x888_x888 = L1: 24.26 L2: 20.83 M: 14.52 ( 35.64%) HT: 12.66 VT: 12.98 R: 11.34 RT: 7.69 ( 72Kops/s) + + src_x888_8_x888 = L1: 19.33 L2: 17.66 M: 14.26 ( 38.43%) HT: 11.53 VT: 10.83 R: 10.57 RT: 6.12 ( 58Kops/s) + src_x888_8_x888 = L1: 21.23 L2: 19.60 M: 15.41 ( 42.55%) HT: 12.66 VT: 13.30 R: 11.55 RT: 7.32 ( 67Kops/s) + + over_x888_8_0565 = L1: 8.15 L2: 7.56 M: 6.50 ( 15.58%) HT: 5.73 VT: 5.49 R: 5.50 RT: 3.53 ( 38Kops/s) + over_x888_8_0565 = L1: 8.35 L2: 7.85 M: 6.68 ( 16.40%) HT: 6.12 VT: 5.97 R: 5.78 RT: 4.03 ( 43Kops/s) + +commit c2b1630d9603f80c2636e8a8bfebca87707d4235 +Author: Matt Turner <mattst88@gmail.com> +Date: Wed Apr 18 16:14:08 2012 -0400 + + mmx: add a8 fetcher + + oprofile of xfce4-terminal-a1 + 210535 9.0407 libpixman-1.so.0.25.3 fetch_scanline_a8 + 144802 6.0054 libpixman-1.so.0.25.3 mmx_fetch_a8 + + Loongson: + add_8_8_8 = L1: 17.98 L2: 17.28 M: 14.28 ( 19.79%) HT: 11.11 VT: 10.38 R: 9.97 RT: 5.14 ( 55Kops/s) + add_8_8_8 = L1: 20.44 L2: 19.65 M: 15.62 ( 21.53%) HT: 12.86 VT: 11.98 R: 11.32 RT: 6.13 ( 64Kops/s) + + src_8888_8_0565 = L1: 19.97 L2: 18.59 M: 13.42 ( 32.55%) HT: 11.46 VT: 10.78 R: 10.33 RT: 5.87 ( 61Kops/s) + src_8888_8_0565 = L1: 21.16 L2: 19.68 M: 13.94 ( 33.64%) HT: 12.31 VT: 11.52 R: 11.02 RT: 6.54 ( 68Kops/s) + + src_x888_8_x888 = L1: 20.54 L2: 18.88 M: 13.07 ( 40.74%) HT: 11.05 VT: 10.36 R: 10.02 RT: 5.68 ( 60Kops/s) + src_x888_8_x888 = L1: 21.92 L2: 20.15 M: 13.35 ( 41.42%) HT: 11.70 VT: 10.95 R: 10.53 RT: 6.18 ( 65Kops/s) + + over_x888_8_0565 = L1: 10.32 L2: 9.85 M: 7.63 ( 21.13%) HT: 6.56 VT: 6.30 R: 6.12 RT: 3.80 ( 43Kops/s) + over_x888_8_0565 = L1: 10.64 L2: 10.17 M: 7.74 ( 21.35%) HT: 6.83 VT: 6.55 R: 6.34 RT: 4.03 ( 46Kops/s) + + ARM/iwMMXt: + add_8_8_8 = L1: 13.10 L2: 11.67 M: 10.74 ( 13.46%) HT: 8.62 VT: 8.15 R: 7.94 RT: 4.39 ( 44Kops/s) + add_8_8_8 = L1: 13.81 L2: 12.79 M: 11.63 ( 13.93%) HT: 9.33 VT: 9.20 R: 9.04 RT: 5.43 ( 52Kops/s) + + src_8888_8_0565 = L1: 16.62 L2: 15.07 M: 12.52 ( 27.46%) HT: 10.07 VT: 10.17 R: 9.95 RT: 5.64 ( 54Kops/s) + src_8888_8_0565 = L1: 16.84 L2: 16.11 M: 13.22 ( 27.71%) HT: 11.74 VT: 10.90 R: 10.80 RT: 6.66 ( 62Kops/s) + + src_x888_8_x888 = L1: 17.49 L2: 16.22 M: 13.73 ( 38.73%) HT: 10.10 VT: 10.33 R: 9.55 RT: 5.21 ( 52Kops/s) + src_x888_8_x888 = L1: 19.33 L2: 17.66 M: 14.26 ( 38.43%) HT: 11.53 VT: 10.83 R: 10.57 RT: 6.12 ( 58Kops/s) + + over_x888_8_0565 = L1: 7.57 L2: 7.29 M: 6.37 ( 15.97%) HT: 5.53 VT: 5.33 R: 5.21 RT: 3.22 ( 35Kops/s) + over_x888_8_0565 = L1: 8.15 L2: 7.56 M: 6.50 ( 15.58%) HT: 5.73 VT: 5.49 R: 5.50 RT: 3.53 ( 38Kops/s) + +commit 20bad64d9a7ff5c2662f12a87f66fcf77c1f3f2c +Author: Matt Turner <mattst88@gmail.com> +Date: Wed Apr 18 16:08:57 2012 -0400 + + mmx: add r5g6b5 fetcher + + Loongson: + add_0565_0565 = L1: 12.73 L2: 12.26 M: 10.05 ( 13.87%) HT: 8.77 VT: 8.50 R: 8.25 RT: 5.28 ( 58Kops/s) + add_0565_0565 = L1: 14.04 L2: 13.63 M: 10.96 ( 15.19%) HT: 9.73 VT: 9.43 R: 9.11 RT: 5.93 ( 64Kops/s) + + ARM/iwMMXt: + add_0565_0565 = L1: 10.36 L2: 10.03 M: 9.04 ( 10.88%) HT: 3.11 VT: 7.16 R: 7.72 RT: 5.12 ( 51Kops/s) + add_0565_0565 = L1: 10.84 L2: 10.20 M: 9.15 ( 11.46%) HT: 7.60 VT: 7.82 R: 7.70 RT: 5.41 ( 53Kops/s) + +commit c136e535adf33069cbf229b8773934d78099af85 +Author: Matt Turner <mattst88@gmail.com> +Date: Tue Apr 17 12:16:55 2012 -0400 + + mmx: Use Loongson pextrh instruction in expand565 + + Same story as pinsrh in the previous commit. + + text data bss dec hex filename + 25336 1952 0 27288 6a98 .libs/libpixman_loongson_mmi_la-pixman-mmx.o + 25072 1952 0 27024 6990 .libs/libpixman_loongson_mmi_la-pixman-mmx.o + + -dsll: 95 + +dsll: 70 + -dsrl: 135 + +dsrl: 105 + -ldc1: 462 + +ldc1: 445 + -lw: 721 + +lw: 700 + +pextrh: 30 + +commit facceb4a1fbba476ad98e76d15868bf7eecd3a30 +Author: Matt Turner <mattst88@gmail.com> +Date: Tue Apr 17 11:28:33 2012 -0400 + + mmx: Use Loongson pinsrh instruction in pack_565 + + The pinsrh instruction is analogous to MMX EXT's pinsrw, except like + other Loongson vector instructions it cannot access the general purpose + registers. In the cases of other Loongson vector instructions, this is a + headache, but it is actually a good thing here. Since the instruction is + different from MMX, I've named the intrinsic loongson_insert_pi16. + + text data bss dec hex filename + 25976 1952 0 27928 6d18 .libs/libpixman_loongson_mmi_la-pixman-mmx.o + 25336 1952 0 27288 6a98 .libs/libpixman_loongson_mmi_la-pixman-mmx.o + + -and: 181 + +and: 147 + -dsll: 143 + +dsll: 95 + -dsrl: 87 + +dsrl: 135 + -ldc1: 523 + +ldc1: 462 + -lw: 767 + +lw: 721 + +pinsrh: 35 + +commit 6d29b7d7557ccb657054e867f4e27f4aa89cb25e +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Feb 24 15:23:09 2012 -0500 + + mmx: don't pack and unpack src unnecessarily + + The combine function was store8888'ing the result, and all consumers + were immediately load8888'ing it, causing lots of unnecessary pack and + unpack instructions. + + It's a very straight forward conversion, except for mmx_combine_over_u + and mmx_combine_saturate_u. mmx_combine_over_u was testing the integer + result to skip pixels, so we use the is_* functions to test the __m64 + data directly without loading it into an integer register. + + For mmx_combine_saturate_u there's not a lot we can do, since it uses + DIV_UN8. + +commit ee750034252fb8f44c871e84a5057bc114699ae7 +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Feb 24 17:39:39 2012 -0500 + + mmx: introduce is_equal, is_opaque, and is_zero functions + + To be used by the next commit. + +commit 10c77b339f40fc027b682ef16edec234508d327b +Author: Matt Turner <mattst88@gmail.com> +Date: Thu Feb 23 16:25:11 2012 -0500 + + mmx: simplify srcsrcsrcsrc calculation in over_n_8_0565 + +commit e06947d1010ffec4903493df4979119b0ac080d3 +Author: Matt Turner <mattst88@gmail.com> +Date: Thu Feb 23 16:15:56 2012 -0500 + + mmx: remove unnecessary uint64_t<->__m64 conversions + + Loongson: + add_8888_8888 = L1: 68.73 L2: 55.09 M: 25.39 ( 68.18%) HT: 25.28 VT: 22.42 R: 20.74 RT: 13.26 ( 131Kops/s) + add_8888_8888 = L1: 159.19 L2: 114.10 M: 30.74 ( 77.91%) HT: 27.63 VT: 24.99 R: 24.61 RT: 14.49 ( 141Kops/s) + +commit c78e986085b3993f1b4355151820228c53d54cad +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Feb 24 12:43:43 2012 -0500 + + mmx: compile on MIPS for Loongson MMI optimizations + + image image16 + evolution 32.985 -> 29.667 27.314 -> 23.870 + firefox-planet-gnome 197.982 -> 180.437 220.986 -> 205.057 + gnome-system-monitor 48.482 -> 49.752 52.820 -> 49.528 + gnome-terminal-vim 60.799 -> 50.528 51.655 -> 44.131 + grads-heat-map 3.167 -> 3.181 3.328 -> 3.321 + gvim 38.646 -> 32.552 38.126 -> 34.453 + midori-zoomed 44.371 -> 43.338 28.860 -> 28.865 + ocitysmap 23.065 -> 18.057 23.046 -> 18.055 + poppler 43.676 -> 36.077 43.065 -> 36.090 + swfdec-giant-steps 20.166 -> 20.365 22.354 -> 16.578 + swfdec-youtube 31.502 -> 28.118 44.052 -> 41.771 + xfce4-terminal-a1 69.517 -> 51.288 62.225 -> 53.309 + +commit 4e0c7902b2c8e517d102a8fccb9cf7da9725f59f +Author: Matt Turner <mattst88@gmail.com> +Date: Wed Feb 15 01:19:07 2012 -0500 + + mmx: make ldq_u take __m64* directly + + Before, if __m64 is allocated in vector or floating-point registers, + + __m64 vs = ldq_u((uint64_t *)src); + + would cause src to be loaded into an integer register and then + transferred to an __m64 register. By switching ldq_u's argument type to + __m64 we give the compile enough information to recognize that it can + load to the vector register directly. + + This patch is necessary for the Loongson optimizations when __m64 is + typedef'd as double. + +commit 2e54b76a2d2203b6a70190f488d76d6d409e879a +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Feb 24 12:34:41 2012 -0500 + + mmx: add load function and use it in add_8888_8888 + +commit 084e3f2f4be900041cc35830359606addc1fc3be +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Feb 24 12:32:03 2012 -0500 + + mmx: add store function and use it in add_8888_8888 + +commit e24c1c849d29f43dc6e50e1f15102709059b40f8 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Apr 5 00:52:21 2012 -0400 + + bits_image_fetch_pixel_convolution(): Make sure channels are signed + + In the computation: + + srtot += RED_8 (pixel) * f + + RED_8 (pixel) is an unsigned quantity, which means the signed filter + coefficient f gets converted to an unsigned integer before the + multiplication. We get away with this because when the 32 bit unsigned + result is converted to int32_t, the correct sign is produced. But if + srtot had been an int64_t, the result would have been a very large + positive number. + + Fix this by explicitly casting the channels to int. + +commit 4d2fee14063b960c6b81b55dd3aa94b956d23eeb +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Apr 5 00:42:55 2012 -0400 + + test/utils.c: Clip values to the [0, 255] interval + + Unpremultiplying a superluminescent pixel can result in values greater + than 255. + +commit e2917645846b64fdc7f2190806c97b0ef4b0fd5b +Author: Matt Turner <mattst88@gmail.com> +Date: Wed Apr 18 18:14:13 2012 -0400 + + configure.ac: fix iwMMXt/gcc version error message + +commit b87cd1f6059789cb154677d8432045a5ca8e16c1 +Author: Matt Turner <mattst88@gmail.com> +Date: Sun Apr 15 14:03:08 2012 -0400 + + mmx: fix _mm_shuffle_pi16 function when compiling without optimization + + The last argument must be an immediate value, and when compiling without + optimization the compiler might not recognize this. So use a macro if + not optimizing. + +commit e927d2397141f80aecd2702ce5f38349c41aebe5 +Author: Matt Turner <mattst88@gmail.com> +Date: Sun Apr 15 14:00:17 2012 -0400 + + configure.ac: require >= gcc-4.5 for ARM iwMMXt + + We're using a patched gcc-4.5, and having to modify configure.ac and + autoreconf between changes is annoying. And besides, 4.5, 4.6, and 4.7's + iwMMXt intrinsic support is equally broken, and we test a known broken + intrinsic in the configure test program, so the version check is rather + meaningless. + +commit 0531170436a2a10a995c7487b396f1378affdb98 +Author: Matt Turner <mattst88@gmail.com> +Date: Thu Apr 5 17:36:05 2012 -0400 + + mmx: Use force_inline instead of __inline__ (bug 46906) + + Fixes the build on MSVC. + +commit b950bb12dc2baaee441b875bd81b67e48947d2f6 +Author: Matt Turner <mattst88@gmail.com> +Date: Thu Mar 15 19:16:20 2012 -0400 + + mmx: enable over_n_0565 for b5g6r5 + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 87ecec8d72be4106358e843a1e7a907b0e814f7f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Apr 2 15:16:18 2012 -0400 + + gtk-utils.c: In pixbuf_from_argb32() use a8r8g8b8_to_rgba_np() + + Instead of inlining a copy of that functionality. + +commit d1ec1467f607c21a4d8b445eab5465ca60a12a97 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Apr 2 15:09:16 2012 -0400 + + test/utils.c: Rename and export the pngify_pixels() function. + + This function converts from a8r8g8b8 to non-premultiplied RGBA (the + PNG or GdkPixbuf format that has the channels in this order: R, G, B, + A in memory regardless of the computer's endianness). The function's + new name is a8r8g8b8_to_rgba_np(). + +commit b16ddf17829633ec6eb54656924b7e841c6c69a4 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Apr 2 14:59:02 2012 -0400 + + gtk-utils.c: Don't include pixman-private.h + + Use pixman_image_get_format() instead of image->bits.format. + +commit b9ca23a9c711280a706eb1df30a0cfaf3b2d8e27 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Mar 25 12:14:54 2012 -0400 + + Rename fast_composite_add_1000_1000 to _add_1_1() + + The 1000_1000 name is a relic from before the refactoring. + +commit 746291a19ed29e2da6de57b382a1dfaa900d067b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Jan 16 06:46:52 2011 -0500 + + Add the original parrot image. + + This is the Parrot image that was downscaled and cropped before being + used in the composite-test.c demo. + +commit 451b25ae90ea077a78d2606ce107b826043a252b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Oct 6 06:06:59 2010 -0400 + + composite-test.c: Add a parrot image + + Instead of the yellow square, use a parrot as the source image. This + demonstrates the various blend modes much better. + + The parrot is a cropped version of finger painting by Rubens LP: + + http://www.flickr.com/photos/dorubens/4030604504/in/set-72157622586088192/ + + where the background has been removed. Used here under Creative + Commons Attribution. The artist's web site: + + http://www.rubenslp.com.br/ + +commit 3aa45d62e45f40faa98f9bb47046578bf19e7574 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Oct 6 03:56:55 2010 -0400 + + composite-test.c: Use similar gradient to the one in the PDF spec. + +commit e1b8969e78eecf9abaaf2b317c10fddf64b02799 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Oct 12 04:49:27 2011 -0400 + + demos: Add checkerboard demo + + This is a simple demo that displays a checkboard with a projective + transformation. + +commit 41863fbabb6dd08871aed47beba5d08ae2ae3cf6 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Oct 12 04:48:33 2011 -0400 + + demos: Add quad2quad program + + This program can compute the projective transformation that transforms + one quadrilateral into another. The code is basically maxima[1] output + translated into C. + + [1] http://maxima.sourceforge.net/ + +commit cf0d0d63645bcb6425a1e2d7b5d9f1e26e205247 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Mar 14 17:11:14 2012 -0400 + + Use "=a" and "=d" constraints for rdtsc inline assembly + + In 32 bit mode the "=A" constraint refers to the register pair + edx:eax, but according to GCC developers this is not the case in 64 + bit mode, where it refers to "rax". + + Hence, using "=A" for rdtsc is incorrect in 64 bit mode. + + See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21249 + +commit 8a8aabf05c8e6c7b68b68c80e4e73877fd35ce78 +Author: Jeremy Huddleston <jeremyhu@apple.com> +Date: Fri Mar 16 11:37:23 2012 -0700 + + configure.ac: Fix a copy-paste-o in TLS detection + + Regression from: a069da6c66da407cc52e1e92321d69c68fd6beb5 + + Signed-off-by: Jeremy Huddleston <jeremyhu@apple.com> + Tested-by: Matt Turner <mattst88@gmail.com> + +commit ee6bac11c28b350c183f190b6c648c316ee1109d +Author: Matt Turner <mattst88@gmail.com> +Date: Wed Mar 14 16:48:00 2012 -0400 + + Use AC_LANG_SOURCE for DSPr2 configure program + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 21eeecffa93ba5912487d88851b1a5c60fc37768 +Author: Chun-wei Fan <fanchunwei@src.gnome.org> +Date: Fri Mar 9 15:54:06 2012 +0800 + + Just include xmmintrin.h on MSVC as well + + The xmmintrin.h as shipped with recent Visual C++ (2003+) provides + _mm_shuffle_pi16 and _mm_mulhi_pu16, so including that header + will do for using these functions, and MSVC does not like the GCC-specific + implementations of _mm_shuffle_pi16 and _mm_mulhi_pu16 that is + currently in the code. + + _MM_SHUFFLE is declared in the same way in MSVC's xmmintrin.h, so don't + re-define it here to avoid a compilation warning. + +commit 94aea2e868ae02235785f31b275f89b9661bca0e +Author: Jeremy Huddleston <jeremyhu@apple.com> +Date: Wed Mar 14 10:26:18 2012 -0700 + + Fix a false-negative in MMX check + + Silence warnings that could make -Werror give a false negative + Use signed char to avoid cases where int8_t isn't declared + + Reported-by: Mike Lothian <mike@fireburn.co.uk> + Tested-by: Mike Lothian <mike@fireburn.co.uk> + Reviewed-by: Matt Turner <mattst88@gmail.com> + Signed-off-by: Jeremy Huddleston <jeremyhu@apple.com> + +commit d2ee5631ae42d031289ae80352e02bafa3f06ed4 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Sun Mar 11 18:52:25 2012 +0100 + + MIPS: DSPr2: Added over_n_8888_8888_ca and over_n_8888_0565_ca fast paths. + + Performance numbers before/after on MIPS-74kc @ 1GHz + + Referent (before): + + lowlevel-blt-bench: + over_n_8888_8888_ca = L1: 8.32 L2: 7.65 M: 6.38 ( 51.08%) HT: 5.78 VT: 5.74 R: 5.84 RT: 4.39 ( 37Kops/s) + over_n_8888_0565_ca = L1: 7.40 L2: 6.95 M: 6.16 ( 41.06%) HT: 5.72 VT: 5.52 R: 5.63 RT: 4.28 ( 36Kops/s) + cairo-perf-trace: + [ # ] backend test min(s) median(s) stddev. count + [ # ] image: pixman 0.25.3 + [ 0] image xfce4-terminal-a1 138.223 139.070 0.33% 6/6 + [ # ] image16: pixman 0.25.3 + [ 0] image16 xfce4-terminal-a1 132.763 132.939 0.06% 5/6 + + Optimized: + + lowlevel-blt-bench: + over_n_8888_8888_ca = L1: 19.35 L2: 23.84 M: 13.68 (109.39%) HT: 11.39 VT: 11.19 R: 11.27 RT: 6.90 ( 47Kops/s) + over_n_8888_0565_ca = L1: 18.68 L2: 17.00 M: 12.56 ( 83.70%) HT: 10.72 VT: 10.45 R: 10.43 RT: 5.79 ( 43Kops/s) + cairo-perf-trace: + [ # ] backend test min(s) median(s) stddev. count + [ # ] image: pixman 0.25.3 + [ 0] image xfce4-terminal-a1 130.400 131.720 0.46% 6/6 + [ # ] image16: pixman 0.25.3 + [ 0] image16 xfce4-terminal-a1 125.830 126.604 0.34% 6/6 + +commit a069da6c66da407cc52e1e92321d69c68fd6beb5 +Author: Jeremy Huddleston <jeremyhu@apple.com> +Date: Thu Mar 8 09:41:34 2012 -0800 + + Expand TLS support beyond __thread to __declspec(thread) + + This code was pretty much coppied from a similar commit that I made to + xorg-server in April. + + cf: xorg/xserver: bb4d145bd25e2aee988b100ecf1105ea3b6a40b8 + + Signed-off-by: Jeremy Huddleston <jeremyhu@apple.com> + +commit 61d999b9101c76bd463101923d2143e31857e7f8 +Author: Jeremy Huddleston <jeremyhu@apple.com> +Date: Thu Mar 8 09:41:32 2012 -0800 + + Disable MMX when incompatible clang is being used. + + Signed-off-by: Jeremy Huddleston <jeremyhu@apple.com> + +commit ad4b6922f23e91b291c794b9fde5ee81941adb64 +Author: Jeremy Huddleston <jeremyhu@apple.com> +Date: Thu Mar 8 09:41:33 2012 -0800 + + Silence a warning about unused pixman_have_mmx + + Signed-off-by: Jeremy Huddleston <jeremyhu@apple.com> + +commit bb5ff2687815eff20757612de965663ef3a2525b +Author: Jeremy Huddleston <jeremyhu@apple.com> +Date: Thu Mar 8 09:41:31 2012 -0800 + + Revert "Disable MMX when Clang is being used." + + This reverts commit 5eb4c12a79b3017ec6cc22ab756f53f225731533. + +commit a6ad5120f7bd4add3b2e9c03c9fd769d5bbfd191 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Mar 8 10:11:20 2012 -0500 + + Post-release version bump to 0.25.3 + +commit f73f7985318bf0e7446941d9bea9a94b35580342 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Mar 8 09:33:16 2012 -0500 + + Pre-release version bump to 0.25.2 + +commit 62df04eb257d16fbb4449855a48f6fdaf567e201 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Mar 8 09:29:46 2012 -0500 + + mmx: Squash a warning by making the argument to ldl_u() const + +commit 85943733cbd7b62991ee962aa22f28bc5d1be353 +Author: Alan Coopersmith <alan.coopersmith@oracle.com> +Date: Fri Feb 24 18:02:56 2012 -0800 + + Just use xmmintrin.h when building with Solaris Studio compilers + + Since the Solaris Studio compilers don't have a mode where MMX + instructions are available and SSE instructions are not, we can + just use the <xmmintrin.h> header directly. + + Fixes build failure due to Studio not supporting the __gnu_inline__ + or __artificial__ attributes. + + Signed-off-by: Alan Coopersmith <alan.coopersmith@oracle.com> + Acked-by: Matt Turner <mattst88@gmail.com> + +commit 304f57644ac6a991c6e538675de935356252c0a5 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Wed Feb 29 12:04:33 2012 +0100 + + MIPS: DSPr2: Added mips_dspr2_blt and mips_dspr2_fill routines. + + Performance numbers before/after on MIPS-74kc @ 1GHz + + Referent (before): + + lowlevel-blt-bench: + src_n_0565 = L1: 238.14 L2: 233.15 M: 57.88 ( 77.23%) HT: 53.22 VT: 49.99 R: 47.73 RT: 24.79 ( 91Kops/s) + src_n_8888 = L1: 190.19 L2: 187.57 M: 28.94 ( 77.23%) HT: 27.91 VT: 27.33 R: 26.64 RT: 14.68 ( 77Kops/s) + cairo-perf-trace: + [ # ] backend test min(s) median(s) stddev. count + [ # ] image: pixman 0.25.1 + [ 0] image gnome-system-monitor 268.460 269.712 0.22% 6/6 + + Optimized: + + lowlevel-blt-bench: + src_n_0565 = L1:1081.39 L2: 258.22 M:189.59 (252.91%) HT: 60.23 VT: 55.01 R: 53.44 RT: 23.68 ( 89Kops/s) + src_n_8888 = L1: 653.46 L2: 113.55 M:135.26 (360.86%) HT: 38.99 VT: 37.38 R: 34.95 RT: 18.67 ( 84Kops/s) + cairo-perf-trace: + [ # ] backend test min(s) median(s) stddev. count + [ # ] image: pixman 0.25.1 + [ 0] image gnome-system-monitor 246.565 246.706 0.04% 6/6 + +commit 999e72b80bd5e3fab5f45b6ad19511389b58d9ab +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Mar 1 02:24:54 2012 -0500 + + pixman-access.c: Remove some unused macros + + The macros related to palette entries: + + RGB15_TO_ENTRY, + RGB24_TO_ENTRY, + RGB24_TO_ENTRY_Y + + are not used anywhere. + +commit c0cb48aae0d09200a187965094138fbf488498cd +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Feb 29 04:44:46 2012 -0500 + + pixman-accessors.h: Delete unused macros + + The MEMCPY_WRAPPED and ACCESS macros are not used anymore. + +commit 5adf569317f923cd5eb547209a8d927be0d81049 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Feb 26 17:35:20 2012 -0500 + + Move fetching for solid bits images to pixman-noop.c + + This should be a bit faster because it can reuse the scanline on each iteration. + +commit 3c3c70fa0b524569df0ec20c50d481626e518462 +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Feb 24 20:11:11 2012 -0500 + + lowlevel-blt-bench: add in_8_8 and in_n_8_8 + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit fcea053561893d116a79f41a113993f1f61b58cf +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Jan 26 13:16:09 2011 -0500 + + Disable implementations mentioned in the PIXMAN_DISABLE environment variable. + + With this, it becomes possible to do + + PIXMAN_DISABLE="sse2 mmx" some_app + + which will run some_app without SSE2 and MMX enabled. This is useful + for benchmarking, testing and narrowing down bugs. + + The current list of implementations that can be disabled: + + fast + mmx + sse2 + arm-simd + arm-iwmmxt + arm-neon + mips-dspr2 + vmx + + The general and noop implementations can't be disabled because pixman + depends on those being available for correct operation. + + Reviewed-by: Matt Turner <mattst88@gmail.com> + +commit e7574d336b7c812a888fac22f99f1b0e9a3518b0 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Wed Feb 22 14:23:48 2012 +0100 + + MIPS: DSPr2: Added fast-paths for SRC operation. + + Following fast-path functions are implemented (routines 4, 5 and 6 utilize + same fast-memcpy routine): + 1. src_x888_8888 + 2. src_8888_0565 + 3. src_0565_8888 + 4. src_0565_0565 + 5. src_8888_8888 + 6. src_0888_0888 + + Performance numbers before/after on MIPS-74kc @ 1GHz + + Referent (before): + + lowlevel-blt-bench: + src_x888_8888 = L1: 199.35 L2: 96.54 M: 18.87 (100.68%) HT: 17.12 VT: 16.24 R: 15.43 RT: 9.33 ( 61Kops/s) + src_8888_0565 = L1: 71.22 L2: 51.95 M: 24.19 ( 96.17%) HT: 20.71 VT: 19.92 R: 18.15 RT: 9.92 ( 63Kops/s) + src_0565_8888 = L1: 38.82 L2: 36.22 M: 18.60 ( 73.95%) HT: 14.47 VT: 13.19 R: 12.97 RT: 6.61 ( 49Kops/s) + src_0565_0565 = L1: 286.05 L2: 155.02 M: 37.68 (100.54%) HT: 31.08 VT: 28.07 R: 26.26 RT: 11.93 ( 68Kops/s) + src_8888_8888 = L1: 454.32 L2: 139.15 M: 19.30 (102.98%) HT: 17.73 VT: 16.08 R: 16.62 RT: 10.45 ( 64Kops/s) + src_0888_0888 = L1: 190.47 L2: 106.14 M: 25.26 (101.08%) HT: 21.88 VT: 20.32 R: 18.83 RT: 10.10 ( 63Kops/s) + cairo-perf-trace: + [ # ] backend test min(s) median(s) stddev. count + [ # ] image: pixman 0.25.1 + [ 0] image firefox-asteroids 421.215 421.325 0.01% 4/6 + [ 1] image firefox-planet-gnome 647.708 648.486 0.13% 6/6 + [ 2] image gnome-system-monitor 276.073 277.506 0.38% 6/6 + [ 3] image gnome-terminal-vim 263.866 265.229 0.39% 6/6 + [ 4] image poppler 123.576 124.003 0.15% 6/6 + + Optimized (with these optimizations): + + lowlevel-blt-bench: + src_x888_8888 = L1: 369.50 L2: 99.37 M: 27.19 (145.07%) HT: 20.24 VT: 19.48 R: 19.00 RT: 10.22 ( 63Kops/s) + src_8888_0565 = L1: 105.65 L2: 67.87 M: 25.41 (101.00%) HT: 20.78 VT: 19.84 R: 18.52 RT: 9.81 ( 63Kops/s) + src_0565_8888 = L1: 77.10 L2: 63.04 M: 23.37 ( 92.90%) HT: 20.29 VT: 19.37 R: 18.14 RT: 10.02 ( 63Kops/s) + src_0565_0565 = L1: 519.02 L2: 241.32 M: 62.35 (166.34%) HT: 33.74 VT: 27.63 R: 26.12 RT: 11.70 ( 67Kops/s) + src_8888_8888 = L1: 390.48 L2: 113.99 M: 30.32 (161.77%) HT: 19.55 VT: 17.05 R: 17.13 RT: 10.19 ( 63Kops/s) + src_0888_0888 = L1: 349.74 L2: 156.68 M: 40.68 (162.78%) HT: 25.58 VT: 20.57 R: 20.20 RT: 9.96 ( 63Kops/s) + cairo-perf-trace: + [ # ] backend test min(s) median(s) stddev. count + [ # ] image: pixman 0.25.1 + [ 0] image firefox-asteroids 400.050 400.308 0.04% 6/6 + [ 1] image firefox-planet-gnome 628.978 629.364 0.07% 6/6 + [ 2] image gnome-system-monitor 270.247 270.313 0.03% 6/6 + [ 3] image gnome-terminal-vim 256.413 257.641 0.21% 6/6 + [ 4] image poppler 119.540 120.023 0.21% 6/6 + +commit 1364c91bd106f8b67c9cd1bda2fdd9d46ac40363 +Author: Nemanja Lukic <nemanja.lukic@rt-rk.com> +Date: Wed Feb 22 14:23:47 2012 +0100 + + MIPS: DSPr2: Basic infrastructure for MIPS architecture + + MIPS DSP instruction set extensions + +commit e43d65d49da2c3e929cf20e82a2f7ed1fa0d0167 +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Feb 24 20:02:55 2012 -0500 + + lowlevel-blt: add over_x888_n_8888 + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 9f607049956c6858706c7ca45829c5ad19f18191 +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Feb 24 19:58:09 2012 -0500 + + lowlevel-blt: add over_8888_8888 + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 5eb4c12a79b3017ec6cc22ab756f53f225731533 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Feb 23 18:36:04 2012 -0500 + + Disable MMX when Clang is being used. + + There are several issues with the Clang compiler and pixman-mmx.c: + + - When not optimizing, it doesn't seem to recognize that an argument + to an __always_inline__ function is compile-time constant. This + results in this error being produced: + + fatal error: error in backend: Invalid operand for inline asm + constraint 'K'! + + - This inline assembly: + + asm ("pmulhuw %1, %0\n\t" + : "+y" (__A) + : "y" (__B) + ); + + results in + + fatal error: error in backend: Unsupported asm: input constraint + with a matching output constraint of incompatible type! + + So disable MMX when the compiler is Clang. + +commit 350e231b3f01d6f82a2fdc7d9a9945234c404d0a +Author: Matt Turner <mattst88@gmail.com> +Date: Tue Feb 21 23:33:02 2012 -0500 + + mmx: make load8888 take a pointer to data instead of the data itself + + Allows us to tune how we load data into the vector registers. + + Signed-off-by: Matt Turner <mattst88@gmail.com> + + And squashed in: + + mmx: define and use load8888u function + + For unaligned loads. + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit ab68316eda91bbf6bb41158c622347723e1fa8c4 +Author: Matt Turner <mattst88@gmail.com> +Date: Tue Feb 21 19:29:59 2012 -0500 + + mmx: make store8888 take uint32_t *dest as argument + + Allows us to tune how we store data from the vector registers. + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 57a245a6e00987191faad9a34bef9f4524a6848c +Author: Matt Turner <mattst88@gmail.com> +Date: Wed Feb 22 16:32:21 2012 -0500 + + Update .gitignore with more demos and tests + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 51ae3f2d7f25daebbc767f161f0097b581d1554b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Feb 21 19:30:04 2012 -0500 + + mmx: Delete unused function in_over_full_src_alpha() + + Also a few minor formatting fixes. + + Reviewed-by: Matt Turner <mattst88@gmail.com> + +commit bbd1e6941b39adcdb64c77670889314fa8461c0b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Feb 21 19:23:33 2012 -0500 + + mmx: Enable over_x888_8_8888() for x86 as well + + It used to be slower than the generic code (with the gcc that was + current in 2007), but that doesn't seem to be the case anymore: + + over_x888_8_8888 = L1: 22.97 L2: 22.88 M: 22.27 ( 5.29%) HT: 18.30 VT: 15.81 R: 15.54 RT: 10.35 ( 131Kops/s) + over_x888_8_8888 = L1: 53.56 L2: 53.20 M: 50.50 ( 11.99%) HT: 38.60 VT: 31.19 R: 29.00 RT: 17.37 ( 208Kops/s) + + Reviewed-by: Matt Turner <mattst88@gmail.com> + +commit 4fc586c3df9a53cc1406891e751a6eed3d7da400 +Author: Matt Turner <mattst88@gmail.com> +Date: Tue Feb 21 16:28:37 2012 -0500 + + mmx: fix typo in pix_add_mul on MSVC + + Typo introduced in commit a075a870. + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 84221f4c1687b8ea14e9cbdc78b2ba7258e62c9e +Author: Matt Turner <mattst88@gmail.com> +Date: Sun Feb 19 18:10:03 2012 -0500 + + mmx: Use _mm_shuffle_pi16 + + The pshufw x86 instruction is part of Extended 3DNow! and SSE1. The + equivalent ARM wshufh instruction was available from the first iwMMXt + instrucion set. + + This instruction is already used in the SSE2 code. + + Reduces code size by ~9%. + + amd64 + text data bss dec hex filename + 29925 2240 0 32165 7da5 .libs/libpixman_mmx_la-pixman-mmx.o + 27237 2240 0 29477 7325 .libs/libpixman_mmx_la-pixman-mmx.o + + x86 + text data bss dec hex filename + 27677 1792 0 29469 731d .libs/libpixman_mmx_la-pixman-mmx.o + 24959 1792 0 26751 687f .libs/libpixman_mmx_la-pixman-mmx.o + + arm + text data bss dec hex filename + 30176 1792 0 31968 7ce0 .libs/libpixman_iwmmxt_la-pixman-mmx.o + 27384 1792 0 29176 71f8 .libs/libpixman_iwmmxt_la-pixman-mmx.o + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 14208344964f341a7b4a704b05cf4804c23792e9 +Author: Matt Turner <mattst88@gmail.com> +Date: Sun Feb 19 01:32:31 2012 -0500 + + mmx: Use _mm_mulhi_pu16 + + The pmulhuw x86 instruction is part of Extended 3DNow! and SSE1. The + equivalent ARM wmuluh instruction was available from the first iwMMXt + instrucion set. + + This instruction is already used in the SSE2 code. + + Reduces code size by ~5%. + + amd64 + text data bss dec hex filename + 31325 2240 0 33565 831d .libs/libpixman_mmx_la-pixman-mmx.o + 29925 2240 0 32165 7da5 .libs/libpixman_mmx_la-pixman-mmx.o + + x86 + text data bss dec hex filename + 29165 1792 0 30957 78ed .libs/libpixman_mmx_la-pixman-mmx.o + 27677 1792 0 29469 731d .libs/libpixman_mmx_la-pixman-mmx.o + + arm + text data bss dec hex filename + 31632 1792 0 33424 8290 .libs/libpixman_iwmmxt_la-pixman-mmx.o + 30176 1792 0 31968 7ce0 .libs/libpixman_iwmmxt_la-pixman-mmx.o + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 69ed71fad11d541f89eee1238c587a03a9cf59cb +Author: Matt Turner <mattst88@gmail.com> +Date: Tue Feb 21 00:05:45 2012 +0000 + + mmx: enable over_x888_8_8888 on ARM/iwMMXt + + before: over_x888_8_8888 = L1: 7.63 L2: 7.72 M: 6.44 ( 19.17%) HT: 6.24 VT: 6.11 R: 5.87 RT: 4.61 ( 51Kops/s) + after : over_x888_8_8888 = L1: 11.88 L2: 11.11 M: 8.70 ( 26.01%) HT: 8.15 VT: 8.07 R: 7.76 RT: 5.62 ( 61Kops/s) + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit a14f0f66bba987d5cdcb4a3e0f3e9f7c35d3f6f0 +Author: Matt Turner <mattst88@gmail.com> +Date: Mon Feb 20 18:36:24 2012 -0500 + + autoconf: use #error instead of error + + We'd rather see the actual #error message rather than a syntax error in + config.log. + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit fced5c82c2f0d6d00cb8d0a30ce6a04ec196d274 +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Feb 17 18:17:49 2012 -0500 + + Convert while (w) to if (w) when possible + + Missed in commit 57fd8c37. + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit e27bdcd968e786079353432d14816600bf813d76 +Author: Matt Turner <mattst88@gmail.com> +Date: Wed Feb 15 18:16:42 2012 -0500 + + Make sure to run AC_SUBST IWMMXT_CFLAGS + + Allows you to compile without -flax-vector-conversions in your CFLAGS, + though -march=iwmmxt2 is still necessary since specifying some other + -march= value will override it, and disable iwmmxt. + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 82a3980701c68949ed762b1e93dc81698db1613e +Author: Jeremy Huddleston <jeremyhu@apple.com> +Date: Sat Feb 11 01:04:13 2012 -0800 + + configure.ac: Add an --enable-libpng option + + Now there is a way to not link against libpng even if it's available. + + Signed-off-by: Jeremy Huddleston <jeremyhu@apple.com> + +commit 46fc4eb234f5c4f281c2901ea7514ff69e8670a8 +Author: Matt Turner <mattst88@gmail.com> +Date: Sat Feb 11 23:21:45 2012 -0500 + + Use AC_LANG_SOURCE for iwMMXt configure program + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit e5555d7a749c90288463ed1c294f58963c607e52 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Jan 25 14:03:05 2012 -0500 + + Revert "Reject trapezoids where top (botttom) is above (below) the edges" + + Cairo 1.10 will sometimes generate trapezoids like this, so we can't + consider them invalid. Fixes bug 45009, reported by Michael Biebl. + + This reverts commit 2437ae80e5066dec9fe52f56b016bf136d7cea06. + +commit 35577876978e86783d49c500b4bb7ea1fc7fa89c +Author: Bobby Salazar <bobby8934@gmail.com> +Date: Thu Jan 26 13:19:18 2012 -0500 + + iOS Runtime Detection Support For ARM NEON + + This patch adds runtime detection support for the ARM NEON fast paths + for code compiled with the iOS SDK. + +commit 86ce1808829e3fa024acb0ebaa93ef9737ba51af +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Dec 19 19:31:25 2011 -0500 + + test: Port composite test over to use new pixel_checker_t object. + + Also make some tweaks to the way the errors are printed. + +commit f57034f678b419c3737b888f643e5bdfcaf727f9 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Dec 19 17:31:06 2011 -0500 + + test: Add a new "pixel_checker_t" object. + + Add a new pixel_checker_t object to test/utils.[ch]. This object + should be initialized with a format and can then be used to check + whether a given "real" pixel in that format is close enough to a + "perfect" pixel given as a double precision ARGB struct. + + The acceptable deviation is calcuated as follows. Each channel of the + perfect pixel has 0.004 subtracted from it and is then converted to + the format. The resulting value is the minimum value that will be + accepted. Similarly, to compute the maximum value, the channel has + 0.004 added to it and is then converted to the given format. Checking + a pixel is then a matter of splitting it into channels and checking + that each is within the computed bounds. + + The value of 0.004 was chosen because it is the minimum one that will + make the existing composite test pass (see next commit). A problem + with this value is that it causes 0xFE to be acceptable when the + correct value is 1.0, and 0x01 to be acceptable when the correct value + is 0. It would be better if, when the result is exactly 0 or exactly + 1, an a8r8g8b8 pixel were required to produce exactly 0x00 or 0xff to + preserve full black and full white. A deviation value of 0.003 would + produce this, but currently this would cause tests with operators that + involve divisions to fail. + +commit 0053a9f8694c837388b78ae26fe81979d0327d28 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Dec 19 19:53:28 2011 -0500 + + Rename color_correct() to round_color() + + And do the rounding from float to int in the same way cairo does: by + multiplying with (1 << width), then subtracting one when the input was 1.0. + +commit 55a010bf31d2eaf71126bdf93eca99fc02037535 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Dec 22 18:15:02 2011 -0500 + + Move the color_correct() function from composite.c to utils.c + +commit 065666f33c414582425e4ac0ec9f694e93c2baf1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Jan 8 10:32:47 2012 -0500 + + Get rid of delegates for combiners + + Add a new function _pixman_implementation_lookup_combiner() that will + find a usable combiner given an operator and information about whether + the combiner should apply component alpha and whether it should be 64 + bit. + + In pixman-general.c use this function to look up a combiner up front + instead of walking the delegate chain for every scanline. + +commit ab584ab500b4e7011a5b82051a90e2eea6744270 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jan 7 17:11:45 2012 -0500 + + test/alphamap.c: Make dst and orig_dst more independent of each other + + When making the copy of the destination, do so separately for the + image and the alpha map. This ensures that the alpha channel of the + alpha map will be different from the alpha channel of the actual + image. + + Previously, orig_dst would be copied onto dst along with its alpha + map, which mean that the alpha map of orig_dst would become the new + alpha channel of *both* dst and dst's alpha map. This meant that test + didn't actually test that the alpha maps alpha channel was actually + fetched. + +commit 4613f2caac595b3fa1298ac49f9c9fdcd907f14a +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jan 7 16:48:00 2012 -0500 + + Fix bugs with alpha maps + + The alpha channel from the alpha map must be inserted as the new alpha + channel when a scanline is fetched from an image. Previously the alpha + map would overwrite the buffer instead. This wasn't caught be the + alpha map test because it would only verify that the resulting alpha + channel was correct, and not pay attention to incorrect color + channels. + +commit 8bd63634cd2b2d92145b1d52b54b91ebcb9fb1b4 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jan 7 14:32:08 2012 -0500 + + test: In the alphamap test, also test that we get the right red value + + There is a bug where the red channel of the alpha map of the + destination image is used instead of the red channel of the + destination image itself. + +commit 007d8b1813e34a7f881d9b241806f8323e9667cd +Author: Alan Coopersmith <alan.coopersmith@oracle.com> +Date: Fri Dec 23 16:32:57 2011 -0800 + + Make mmx code compatible with Solaris Studio 12.3 compilers + + Rearranged some of the existing gcc & Intel compiler checks to allow + easier sharing of common cases among the compilers. + + Signed-off-by: Alan Coopersmith <alan.coopersmith@oracle.com> + +commit 37572455866114cbb8bb1bf3acfb1c61d200f98c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Dec 20 06:32:26 2011 -0500 + + Fix rounding for DIV_UNc() + + We need to compute floor (a/b * 255 + 0.5), not floor (a / b * 255), + so add b/2 to the numerator in the DIV_UNc() macro. + +commit 2437ae80e5066dec9fe52f56b016bf136d7cea06 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Dec 22 11:37:26 2011 -0500 + + Reject trapezoids where top (botttom) is above (below) the edges + + When a trapezoid has a top/bottom that is above/below the left/right + edges, degenerate trapezoids become possible. For example the edge + could be very short and close to horizontal. If the bottom edge is far + below the bottom point of such a short edge, the result is that the + lower right corner of the trapezoid will be extremely far to the left. + + This kind of trapezoid causes overflows in the rasterization code, so + change pixman_trapezoid_valid() to reject them. + +commit 6a8192b6dd88b833bb918de28331d3a85c84a4f7 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Dec 20 06:34:41 2011 -0500 + + In MUL_UNc() cast to comp2_t + + Otherwise, when comp1_t is 16 bits wide, we can end up with a signed + integer overflow. + +commit 33ac0a9084aabd0e47fb1c9e5638eafc809c52cb +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Dec 21 08:19:05 2011 -0500 + + Fix a bunch of signed overflow issues + + In pixman-fast-path.c: (1 << 31) - 1 causes a signed overflow, so + change to (1U << n) - 1. + + In pixman-image.c: The check for whether m10 == -m01 will overflow + when -m01 == INT_MIN. Instead just check whether the variables are 1 + and -1. + + In pixman-utils.c: When the depth of the topmost channel is 0, we can + end up shifting by 32. + + In blitters-test.c: Replicating the mask would end up shifting more + than 32. + + In region-contains-test.c: Computing the average of two large integers + could overflow. Instead add half the difference between them to the + first integer. + + In stress-test.c: Masking the value in fake_reader() would sometimes + shift by 32. Instead just use the most significant bits instead of + the least significant. + + All these issues were found by the IOC tool: + + http://embed.cs.utah.edu/ioc/ + +commit d788f762788c2178970ff0ff2cb6e0097171cc3c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Dec 18 09:54:47 2011 -0500 + + Add missing cast in _pixman_edge_multi_init() + + nx and e->dy are both 32 bit quantities, so a cast is needed to make + sure their product is 64 bit before subtracting it from a 64 bit + quantity. + +commit 4f3fe9c9096b2261217c2d4beb7d5eb8e578ed76 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Dec 18 08:16:45 2011 -0500 + + Fix some signed overflow bugs + + In the macros for the PDF blend modes, two comp1_t variables are + multiplied together and then used as if the result were a + comp4_t. When comp1_t is a uint8_t, this is fine because they are + promoted to int, and the product of two uint8_ts fits in an + int. However, when comp1_t is uint16, the product does not necessarily + fit in an int, so casts are necessary. + + Fix for bug 43906, reported by Siarhei Siamashka. + +commit 3e93bba3b04b42c2ab99d828dae12c18f29bcf7d +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Jan 5 10:37:51 2012 -0500 + + pixman-image.c: Fix typo in pixman_image_set_transform() + + A parenthesis was misplaced so that the size argument to memcmp() was + always 0. The bug is harmless except that the flags might be + unnecessarily recomputed in some cases. + + A bug reporting this in Mozilla's fork was discovered here: + + https://bugzilla.mozilla.org/show_bug.cgi?id=710992 + +commit ae651e7e739253f79f345f9fcbacad8627da0d85 +Author: Colin Walters <walters@verbum.org> +Date: Wed Jan 4 08:06:05 2012 -0500 + + autogen.sh: Support GNOME Build API + + http://people.gnome.org/~walters/docs/build-api.txt + +commit 89498a1178bc173857f3d1ee1f889afcc58b21b6 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Dec 18 07:29:59 2011 -0500 + + gradient-walker: For NONE repeats, when x < 0 or x > 1, set both colors to 0 + + ec7c9c2b6865b48b8bd14e4 introduced a bug where NONE gradients would be + misrendered, causing the area outside the gradient to be treated as a + (very) long fade to transparent.The problem was that a check for + positions outside the gradients were dropped in favor of relying on + the sentinels. + + Aside from misrendering, this also caused a signed integer overflow + when the code would compute a stepper size based on MIN_INT32. + + This patches fixes the issue by reinstating a check for these cases + and setting both the right and left colors to transparent black. + +commit d0091a33fcdb49b65a6f20f775cfde520380b1fa +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Dec 21 05:19:00 2011 -0500 + + Modify gradient-test to show a bug in NONE processing + + This patch modifies demos/gradient-test to display a bug in gradients + with a repeat mode of NONE. With the current gradient code, the left + side will be a solid red (actually an extremely long fade from solid + red to transparent) instead of a sharp transition from red to green. + +commit 9db980551518a09069b8ade34743238329a36661 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 9 03:59:04 2011 -0500 + + region: Add pixman_region{,32}_clear() functions. + + These functions simply reset the region to empty. They are equivalent + to + + pixman_region_fini (®ion); + pixman_region_init (®ion); + +commit 6b9d6a91ed4a85f27d7e5824ce2a63f37876e937 +Author: Bobby Salazar <bobby8934@gmail.com> +Date: Tue Dec 13 02:03:16 2011 -0500 + + Android Runtime Detection Support For ARM NEON + + This patch adds runtime detection support for the ARM NEON fast paths + for code compiled with the Android NDK. This is the only code change + needed to enable the ARM NEON pixman fast paths for the ever growing + Android platform (200 million+ smartphones, tablets, etc.). Just make + sure to #define USE_ARM_NEON in your makefile. + +commit 84450c411cc93309bb1d1b1f555640b3ad105500 +Author: Naohiro Aota <naota@gentoo.org> +Date: Thu Nov 24 13:12:15 2011 +0100 + + Don't use non-POSIX test + + test "$test_CFLAGS" == "" && \ + + may cause an error on some POSIX shells and uses a style which is not + consistent with the other tests in configure.ac + + Fixes https://bugs.freedesktop.org/show_bug.cgi?id=42588 and + https://bugs.gentoo.org/show_bug.cgi?id=387087 + +commit 9985febd7847b7a9c09d6395db7f89490c83be30 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Tue Nov 8 22:00:46 2011 +0100 + + test: Produce autotools-looking report in the win32 build system + + Tweak the commands used to run the tests on win32 to make the output + look mostly like that produced by the autotools test system. + + In addition to this, make sure that the exit status of the test target + is success (0) if and only if no failure occurred. + +commit b31da39f6f65d1784fc2f6915c30eb011cc2893b +Author: Andrea Canciani <ranma42@gmail.com> +Date: Thu Nov 3 11:07:25 2011 +0100 + + demos: Consistently use G_N_ELEMENTS() + + Instead of open-coding G_N_ELEMENTS(), just use it. + +commit 1662c94348eda19ec35db2625749febd1dceb35e +Author: Andrea Canciani <ranma42@gmail.com> +Date: Thu Nov 3 10:53:10 2011 +0100 + + test: Reuse the ARRAY_LENGTH() macro + + It is provided by utils.h, there is no need to redefine it. + +commit 97b9fa090c54f6feab54bde272df374a13c0c84d +Author: Andrea Canciani <ranma42@gmail.com> +Date: Thu Nov 3 10:51:27 2011 +0100 + + Use the ARRAY_LENGTH() macro when possible + + This patch has been generated by the following Coccinelle semantic patch: + + // Use the ARRAY_LENGTH() macro when possible + // + // Replace open-coded array length computations with the + // ARRAY_LENGTH() macro + + @@ + type T; + T[] E; + @@ + - (sizeof(E)/sizeof(T)) + + ARRAY_LENGTH (E) + +commit 06760f5cb069bdc041af7a0e73c9d5fc08741f28 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Thu Nov 3 10:40:24 2011 +0100 + + test: Cleanup includes + + All the tests are linked to libutil, hence it makes sence to always + include utils.h and reuse what it provides (config.h inclusion, access + to private pixman APIs, ARRAY_LENGTH, ...). + +commit cbd88a9416d9b33e6589e3f857ee839559a7e4de +Author: Andrea Canciani <ranma42@gmail.com> +Date: Thu Nov 3 10:21:41 2011 +0100 + + Remove useless checks for NULL before freeing + + This patch has been generated by the following Coccinelle semantic patch: + + // Remove useless checks for NULL before freeing + // + // free (NULL) is a no-op, so there is no need to avoid it + + @@ + expression E; + @@ + + free (E); + + E = NULL; + - if (unlikely (E != NULL)) { + - free(E); + ( + - E = NULL; + | + - E = 0; + ) + ... + - } + + @@ + expression E; + @@ + + free (E); + - if (unlikely (E != NULL)) { + - free (E); + - } + +commit 8d72d35b29b0fe0345e21525db9e5f25876364be +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Nov 6 16:36:01 2011 -0500 + + Post-release version bump to 0.25.1 + +commit 973dc7d319f373fc1bbb91ea54e8a7116cfaa932 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Nov 6 16:10:33 2011 -0500 + + Pre-release version bump to 0.24.0 + +commit 6bf590f38577b4c3c6f4876291360ef95086fb37 +Author: Alan Coopersmith <alan.coopersmith@oracle.com> +Date: Sun Oct 30 09:12:06 2011 -0700 + + Change MMX ldq_u to return _m64 instead of forcing all callers to cast + + Sun/Oracle Studio compilers allow the pointers to be cast, but not the + non-pointer forms, causing pixman compiles to fail with many errors of: + "pixman-mmx.c", line 1411: invalid cast expression + + Signed-off-by: Alan Coopersmith <alan.coopersmith@oracle.com> + +commit 5d7f5bc8ee178588194cb6acc2e0ceb6ff926d72 +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Wed Nov 2 18:49:58 2011 -0400 + + Add definitions of INT64_MIN and INT64_MAX + +commit 697cfe15377a8c420764ff824c0a8c2c8ff2148c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Oct 29 05:51:44 2011 -0400 + + Post-release version bump to 0.23.9 + +commit a0f1b565811388b0567c845b9b7063d5b93d325e +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Oct 29 05:33:44 2011 -0400 + + Pre-release version bump to 0.23.8 + +commit 498138c293a2abce44ce122114852f4e6c5b87fe +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Oct 25 08:45:34 2011 -0400 + + Fix use of uninitialized fields reported by valgrind + + In pixman-noop.c and pixman-sse2.c, we are accessing + image->bits.width/height without first making sure the image is a bits + image. The warning is harmless because we never act on this + information without checking that the image is a8r8g8b8, but valgrind + does warn about it. + + In pixman-noop.c, just reorder the clauses in the if statement; in + pixman-sse2.c require images to have the FAST_PATH_BITS_IMAGE flag + set. + +commit 6131707e8fc39187d1d358481f7c57c57cfab206 +Merge: 3d4d705 ec7c9c2 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Oct 20 09:13:12 2011 -0400 + + Merge branch 'gradients' + +commit 3d4d705d2ffa4aeab3dc02a23c2aadbea1374a3f +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Tue Oct 18 21:50:18 2011 +0900 + + ARM: NEON: Fix assembly typo error in src_n_8_8888 + + Binutils 2.21 does not complain about missing comma between ARM + register and alignement specifier in vld/vst instructions which + causes build error on binutils 2.20. + +commit 19f118f41f8725f22395d31eac5670cb350b55ec +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Mon Sep 26 18:33:27 2011 +0900 + + ARM: NEON: Standard fast path src_n_8_8 + + Performance numbers of before/after on cortex-a8 @ 1GHz + + - before + L1: 28.05 L2: 28.26 M: 26.97 ( 4.48%) HT: 19.79 VT: 19.14 R: 17.61 RT: 9.88 ( 101Kops/s) + + - after + L1:1430.28 L2:1252.10 M:421.93 ( 75.48%) HT:170.16 VT:138.03 R:145.86 RT: 35.51 ( 255Kops/s) + +commit 4db9e2bc13d3ed26416f249e57acec4b41f58b7f +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Mon Sep 26 17:03:54 2011 +0900 + + ARM: NEON: Standard fast path src_n_8_8888 + + Performance numbers of before/after on cortex-a8 @ 1GHz + + - before + L1: 32.39 L2: 31.79 M: 30.84 ( 13.77%) HT: 21.58 VT: 19.75 R: 18.83 RT: 10.46 ( 106Kops/s) + + - after + L1: 516.25 L2: 372.00 M:193.49 ( 85.59%) HT:136.93 VT:109.10 R:104.48 RT: 34.77 ( 253Kops/s) + +commit 26659de6cd2775c83a9a6e6660324d5baacf61f9 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Mon Sep 26 19:04:53 2011 +0900 + + ARM: NEON: Instruction scheduling of bilinear over_8888_8_8888 + + Instructions are reordered to eliminate pipeline stalls and get + better memory access. + + Performance of before/after on cortex-a8 @ 1GHz + + << 2000 x 2000 with scale factor close to 1.x >> + before : 40.53 Mpix/s + after : 50.76 Mpix/s + +commit 4481920f405e47b3a92811a8cb06afbd37dee01b +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Wed Sep 21 15:52:13 2011 +0900 + + ARM: NEON: Instruction scheduling of bilinear over_8888_8888 + + Instructions are reordered to eliminate pipeline stalls and get + better memory access. + + Performance of before/after on cortex-a8 @ 1GHz + + << 2000 x 2000 with scale factor close to 1.x >> + before : 50.43 Mpix/s + after : 61.09 Mpix/s + +commit 1cd916f3a5ebeb943f66eecf0b8ce99af0b95d11 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Fri Sep 23 00:03:22 2011 +0900 + + ARM: NEON: Replace old bilinear scanline generator with new template + + Bilinear scanline functions in pixman-arm-neon-asm-bilinear.S can + be replaced with new template just by wrapping existing macros. + +commit 6682b2b3597c9f431900bfe7b1b42dfbe006bae5 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Tue Sep 20 21:32:35 2011 +0900 + + ARM: NEON: Bilinear macro template for instruction scheduling + + This macro template takes 6 code blocks. + + 1. process_last_pixel + 2. process_two_pixels + 3. process_four_pixels + 4. process_pixblock_head + 5. process_pixblock_tail + 6. process_pixblock_tail_head + + process_last_pixel does not need to update horizontal weight. This + is done by the template. two and four code block should update + horizontal weight inside of them. head/tail/tail_head blocks + consist unrolled core loop. You can apply instruction scheduling + to the tail_head blocks. + + You can also specify size of the pixel block. Supported size is 4 + and 8. If you want to use mask, give BILINEAR_FLAG_USE_MASK flags + to the template, then you can use register MASK. When using d8~d15 + registers, give BILINEAR_FLAG_USE_ALL_NEON_REGS to make sure + registers are properly saved on the stack and later restored. + +commit b5e4355fa4973e3edd4abeb11bdc47c42371cc76 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Tue Sep 20 19:46:25 2011 +0900 + + ARM: NEON: Some cleanup of bilinear scanline functions + + Use STRIDE and initial horizontal weight update is done before + entering interpolation loop. Cache preload for mask and dst. + +commit ec7c9c2b6865b48b8bd14e4509538f8fcbe93463 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Oct 14 09:04:48 2011 -0400 + + Simplify gradient_walker_reset() + + The code that searches for the closest color stop to the given + position is duplicated across the various repeat modes. Replace the + switch with two if/else constructions, and put the search code between + them. + +commit 2d0da8ab8d8fef60ed1bbb9d6b75f66577c3f85d +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Oct 14 09:02:14 2011 -0400 + + Use sentinels instead of special casing first and last stops + + When storing the gradient stops internally, allocate two more stops, + one before the beginning of the stop list and one after the + end. Initialize those stops based on the repeat property of the + gradient. + + This allows gradient_walker_reset() to be simplified because it can + now simply pick the two closest stops to the position without special + casing the first and last stops. + +commit 84d6ca7c891601b019d4862a556ed98b7e6fe525 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Oct 14 07:42:00 2011 -0400 + + gradient walker: Correct types and fix formatting + + The type of pos in gradient_walker_reset() and gradient_walker_pixel() + is pixman_fixed_48_16_t and not pixman_fixed_32_32. The types of the + positions in the walker struct are pixman_fixed_t and not int32_t, and + need_reset is a boolean, not an integer. The spread field should be + called repeat and have the type pixman_repeat_t. + + Also fix some formatting issues, make gradient_walker_reset() static, + and delete the pointless PIXMAN_GRADIENT_WALKER_NEED_RESET() macro. + +commit ace225b53dee88d134753ac901f26ba3db6781da +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Oct 11 16:12:24 2011 -0400 + + Add stable release / development snapshot to draft release notes + + This will hopefully serve as a reminder to me that I should put this + information in the release notes. + +commit bb7142d361d56d66ac40debb60a7c4d099764ba8 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Oct 11 06:10:39 2011 -0400 + + Post-release version bump to 0.23.7 + +commit e20ac40bd30484f0f711b52d0c1993ef08760284 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Oct 11 06:00:51 2011 -0400 + + Pre-release version bump to 0.23.6 + +commit a43946a51fbbdc76be1af9bc25fe7c5c2a6314bb +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Thu Sep 22 18:42:38 2011 +0900 + + Simple repeat: Extend too short source scanlines into temporary buffer + + Too short scanlines can cause repeat handling overhead and optimized + pixman composite functions usually process a bunch of pixels in a + single loop iteration it might be beneficial to pre-extend source + scanlines. The temporary buffers will usually reside in cache, so + accessing them should be quite efficient. + +commit eaff774a3f8af6651a245711b9ab8af3211eeb10 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Mon Aug 29 21:44:36 2011 +0900 + + Simple repeat fast path + + We can implement simple repeat by stitching existing fast path + functions. First lookup COVER_CLIP function for given input and + then stitch horizontally using the function. + +commit a258e33fcb6cf08a2ef76e374cb92a12c9adb171 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Thu Sep 22 16:33:02 2011 +0900 + + Move _pixman_lookup_composite_function() to pixman-utils.c + +commit fc62785aabbe890b656c9cbaa57f2e65e74bbcc2 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jun 27 21:17:04 2011 +0000 + + Add src, mask, and dest flags to the composite args struct. + + These flags are useful in the various compositing routines, and the + flags stored in the image structs are missing some bits of information + that can only be computed when pixman_image_composite() is called. + +commit fa6523d13ae9b7986bb890df5ad66e8599bc3ed8 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Thu Sep 22 16:26:55 2011 +0900 + + Add new fast path flag FAST_PATH_BITS_IMAGE + + This fast path flag indicate that type of the image is bits image. + +commit 7272e2fcd2ff8e546cef19929cd370ae2f946135 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Thu Sep 22 16:20:03 2011 +0900 + + init/fini functions for pixman_image_t + + pixman_image_t itself can be on stack or heap. So segregating + init/fini from create/unref can be useful when we want to use + pixman_image_t on stack or other memory. + +commit 4dcf1b0107334857e1f0bb203c34efed1146535c +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Wed Sep 7 23:00:29 2011 +0900 + + sse2: Bilinear scaled over_8888_8_8888 + +commit 81050f2784407b260a1897efa921631a19eeec6b +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Wed Sep 7 22:57:29 2011 +0900 + + sse2: Bilinear scaled over_8888_8888 + +commit d67c0b883daeeaacf3f21f1ddbdcf9ecf94fac43 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Wed Sep 7 22:51:46 2011 +0900 + + sse2: Macros for assembling bilinear interpolation code fractions + + Primitive bilinear interpolation code is reusable to implement other + bilinear functions. + + BILINEAR_DECLARE_VARIABLES + - Declare variables needed to interpolate src pixels. + + BILINEAR_INTERPOLATE_ONE_PIXEL + - Interpolate one pixel and advance to next pixel + + BILINEAR_SKIP_ONE_PIXEL + - Skip interpolation and just advance to next pixel + This is useful for skipping zero mask + +commit 741eb8462c3ff72cbf2d9acfeb1e97208a414fcd +Author: Matt Turner <mattst88@gmail.com> +Date: Thu Oct 6 17:56:09 2011 -0400 + + Correct the minimum gcc version needed for iwmmxt + + Spotted by Søren Sandmann. + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 0a34277180d29f471a2554afc2e2b682fee33c79 +Author: Matt Turner <mattst88@gmail.com> +Date: Wed Oct 5 22:54:36 2011 -0400 + + Make sure iwMMXt is only detected on ARM + + iwMMXt is incorrectly detected on x86 and amd64. This happens because + the test uses standard _mm_* intrinsic functions which it compiles with + -march=iwmmxt, but when the user has set CFLAGS=-march=k8 for instance, + no error is generated from -march=iwmmxt, even though it's not a valid + flag on x86/amd64. Passing CFLAGS=-march=native does not override the + -march=iwmmxt flag though, which is why it wasn't noticed before. + + So, just #error out in the test if the __arm__ preprocessor directive + isn't defined. + + Fixes https://bugs.gentoo.org/show_bug.cgi?id=385179 + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 879b7c21e45b092272e689e05dc867f6260e258f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Sep 27 11:32:13 2011 -0400 + + Don't include stdint.h in scaling-helpers-test. + + Fixes bug 41257. + +commit 01c2dcbe6936a868651160d2617d783d5b8d3d7d +Author: Benjamin Otte <otte@redhat.com> +Date: Wed Sep 14 17:52:03 2011 +0200 + + build: replace @VAR@ with $(VAR) in makefiles + +commit 100f16eae94a54fbb9ee1f44fa3c34602ba25c4d +Author: Benjamin Otte <otte@redhat.com> +Date: Wed Sep 14 17:01:51 2011 +0200 + + tests: Add PNG_CFLAGS/LIBS to tests + + PNG flags were accidentally included by gdk-pixbuf. This has been fixed + recently, so we need to make sure to include it ourselves. + +commit d1313febbec2124ee175cd323a338caa3c1a8fc2 +Author: Matt Turner <mattst88@gmail.com> +Date: Thu Sep 22 15:28:00 2011 -0400 + + mmx: optimize unaligned 64-bit ARM/iwmmxt loads + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 7ab94c5f99cc524ddfbbcedca4304ec7943f74e1 +Author: Matt Turner <mattst88@gmail.com> +Date: Sun Jul 31 22:42:24 2011 -0400 + + mmx: compile on ARM for iwmmxt optimizations + + Check in configure for at least gcc-4.6, since gcc-4.7 (and hopefully + 4.6) will be the eariest version capable of compiling the _mm_* + intrinsics on ARM/iwmmxt. Even for suitable compile versions I use + _mm_srli_si64 which is known to cause unpatched compilers to fail. + + Select iwmmxt at runtime only after NEON, since we expect the NEON + optimizations to be more capable and faster than iwmmxt. + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit f66887d9eae9646c838d518020168b1403705b1e +Author: Matt Turner <mattst88@gmail.com> +Date: Sun Sep 4 14:11:46 2011 -0400 + + mmx: prepare pixman-mmx.c to be compiled for ARM/iwmmxt + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 7c6d5d1999989187b60f1e0e82e55ed55238eb27 +Author: Matt Turner <mattst88@gmail.com> +Date: Thu Sep 8 20:33:45 2011 +0200 + + mmx: fix unaligned accesses + + Simply return *p in the unaligned access functions, since alignment + constraints are very relaxed on x86 and this allows us to generate + identical code as before. + + Tested with the test suite, lowlevel-blit-test, and cairo-perf-trace on + ARM and Alpha with no unaligned accesses found. + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 5d98abb14ca9042af6d0ec7c14c8398cf4046b80 +Author: Matt Turner <mattst88@gmail.com> +Date: Thu Sep 22 15:39:53 2011 -0400 + + mmx: wrap x86/MMX inline assembly in ifdef USE_X86_MMX + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 02c1f1a022e86ced69fc91376232d75d5d6583c5 +Author: Matt Turner <mattst88@gmail.com> +Date: Sun Jul 31 20:20:12 2011 +0000 + + mmx: rename USE_MMX to USE_X86_MMX + + This will make upcoming ARM usage of pixman-mmx.c unambiguous. + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 57fd8c37aa3148b1d70bad65e1a49721e9a47d7e +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Sep 23 14:10:52 2011 -0400 + + mmx: convert while (w) to if (w) when possible + + gcc isn't able to see that w is no greater than 1, so it generates + unnecessary loop instructions with while (w). + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 38a7aae1d9c8e1e41de22f9c3846dfc975af6838 +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Sep 9 15:33:14 2011 +0200 + + mmx: fix formats in commented code + + b8r8g8 is apparently no longer supported sometime since this code was + commented. + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit b6b77488a0259da3662edf68568e78806ca97444 +Author: Matt Turner <mattst88@gmail.com> +Date: Fri Sep 9 15:34:04 2011 +0200 + + lowlevel-blt: add over_x888_8_8888 + + Signed-off-by: Matt Turner <mattst88@gmail.com> + +commit 9126f36b964c71b83c69235df4c3a46ab81ab5d5 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sun May 22 22:51:00 2011 +0300 + + BILINEAR->NEAREST filter optimization for simple rotation and translation + + Simple rotation and translation are the additional cases when BILINEAR + filter can be safely reduced to NEAREST. + +commit ad5c6bbb36c1c5e72313f7c7bc7c6e6b7e79daba +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Sep 4 02:53:39 2011 -0400 + + Strength-reduce BILINEAR filter to NEAREST filter for identity transforms + + An image with a bilinear filter and an identity transform is + equivalent to one with a nearest filter, so there is no reason the + standard fast paths shouldn't be usable. + + But because a BILINEAR filter samples a 2x2 pixel block in the source + image, FAST_PATH_SAMPLES_COVER_CLIP can't be set in the case where the + source area is the entire image, because some compositing operations + might then read pixels outside the image. + + This patch fixes the problem by splitting the + FAST_PATH_SAMPLES_COVER_CLIP flag into two separate flags + FAST_PATH_SAMPLES_COVER_CLIP_NEAREST and + FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR that indicate that the clip + covers the samples taking into account NEAREST/BILINEAR filters + respectively. + + All the existing compositing operations that require + FAST_PATH_SAMPLES_COVER_CLIP then have their flags modified to pick + either COVER_CLIP_NEAREST or COVER_CLIP_BILINEAR depending on which + filter they depend on. + + In compute_image_info() both COVER_CILP_NEAREST and + COVER_CLIP_BILINEAR can be set depending on how much room there is + around the clip rectangle. + + Finally, images with an identity transform and a bilinear filter get + FAST_PATH_NEAREST_FILTER set as well as FAST_PATH_BILINEAR_FILTER. + + Performance measurementas with render_bench against Xephyr: + + Before + + *** ROUND 1 *** + --------------------------------------------------------------- + Test: Test Xrender doing non-scaled Over blends + Time: 5.720 sec. + --------------------------------------------------------------- + Test: Test Xrender (offscreen) doing non-scaled Over blends + Time: 5.149 sec. + --------------------------------------------------------------- + Test: Test Imlib2 doing non-scaled Over blends + Time: 6.237 sec. + + After: + + *** ROUND 1 *** + --------------------------------------------------------------- + Test: Test Xrender doing non-scaled Over blends + Time: 4.947 sec. + --------------------------------------------------------------- + Test: Test Xrender (offscreen) doing non-scaled Over blends + Time: 4.487 sec. + --------------------------------------------------------------- + Test: Test Imlib2 doing non-scaled Over blends + Time: 6.235 sec. + +commit eb2e7ed81b324af730c1a7639c9ca9ed60152875 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Sep 5 14:43:25 2011 -0400 + + test: Occasionally use a BILINEAR filter in blitters-test + + To test that reductions of BILINEAR->NEAREST for identity + transformations happen correctly, occasionally use a bilinear filter + in blitters test. + +commit 2a9f88430e7088eccfbbbd6c6b4f4e534126b1e1 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sun May 22 22:16:38 2011 +0300 + + test: better coverage for BILINEAR->NEAREST filter optimization + + The upcoming optimization which is going to be able to replace BILINEAR filter + with NEAREST where appropriate needs to analyze the transformation matrix + and not to make any mistakes. + + The changes to affine-test include: + 1. Higher chance of using the same scale factor for x and y axes. This can help + to stress some special cases (for example the case when both x and y scale + factors are integer). The same applies to x/y translation. + 2. Introduced a small chance for "corrupting" transformation matrix by flipping + random bits. This supposedly can help to identify the cases when some of the + fast paths or other code logic is wrongly activated due to insufficient checks. + +commit 054922e2fce1f8d9db4b9b756e54b0fa5655956d +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Sep 5 00:19:51 2011 -0400 + + Eliminate compute_sample_extents() function + + In analyze_extents(), instead of calling compute_sample_extents() call + compute_transformed_extents() and inline the remaining part of + compute_sample_extents(). The upcoming bilinear->nearest optimization + will do something different with these two pieces of code. + +commit 577b6c46fd39c43c2c328fed48854b50b9e85e5b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Sep 4 17:43:29 2011 -0400 + + Split computation of sample area into own function + + compute_sample_extents() have two parts: one that computes the + transformed extents, and one that checks whether the computed extents + fit within the 16.16 coordinate space. + + Split the first part into its own function + compute_transformed_extents(). + +commit 5064f1803136cbc28d1e9f636feb2ff8ccfbdded +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Sep 4 17:17:53 2011 -0400 + + Remove x and y coordinates from analyze_extents() and compute_sample_extents() + + These coordinates were only ever used for subtracting from the extents + box to put it into the coordinate space of the image, so we might as + well do this coordinate translation only once before entering the + functions. + +commit dbcb4af60d8c688eaaa027c3c5bce9527a410465 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Aug 16 06:13:59 2011 -0400 + + Use MAKE_ACCESSORS() to generate accessors for paletted formats + + Add support in convert_pixel_from_a8r8g8b8() and + convert_pixel_to_a8r8g8b8() for conversion to/from paletted formats, + then use MAKE_ACCESSORS() to generate accessors for the indexed + formats: c8, g8, g4, c4, g1 + +commit c82c2c38538f5c3f25cf81ad697040d2332d64de +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun May 30 12:36:58 2010 -0400 + + Use MAKE_ACCESSORS() to generate accessors for the a1 format. + + Add FETCH_1 and STORE_1 macros and use them to add support for 1bpp + pixels to fetch_and_convert_pixel() and convert_and_store_pixel(), + then use MAKE_ACCESSORS() to generate the accessors for the a1 + format. (Not the g1 format as it is indexed). + +commit 2114dd8aa1f292541e55b6b84152732b37c1c1eb +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Aug 16 14:38:44 2011 -0400 + + Use MAKE_ACCESSORS() to generate accessors for 24bpp formats + + Add FETCH_24 and STORE_24 macros and use them to add support for 24bpp + pixels in fetch_and_convert_pixel() and + convert_and_store_pixel(). Then use MAKE_ACCESSORS() to generate + accessors for the 24 bpp formats: + + r8g8b8 + b8g8r8 + +commit f19f5daa1b111368bcf75435dce12483e08756f2 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Aug 18 05:09:07 2011 -0400 + + Use MAKE_ACCESSORS() to generate accessors for 4 bpp RGB formats + + Use FETCH_4 and STORE_4 macros to add support for 4bpp pixels to + fetch_and_convert_pixel() and convert_and_store_pixel(), then use + MAKE_ACCESSORS() to generate accessors for 4 bpp formats, except g4 and + c4 which are indexed: + + a4 + r1g2b1 + b1g2r1 + a1r1g1b1 + a1b1g1r1 + +commit af78fe24e41c30b5c9b3da4449813c75f760c845 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Aug 18 08:13:58 2011 -0400 + + Use MAKE_ACCESSORS() to generate accessors for 8bpp RGB formats + + Add support for 8 bpp formats to fetch_and_convert_pixel() and + convert_and_store_pixel(), then use MAKE_ACCESSORS() to generate the + accessors for all the 8 bpp formats, except g8 and c8, which are + indexed: + + a8 + r3g3b2 + b2g3r3 + a2r2g2b2 + a2b2g2r2 + x4a4 + +commit 5e1b9f897532ac0fa220880bf94dd660c837afe9 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Aug 18 08:13:44 2011 -0400 + + Use MAKE_ACCESSORS() to generate accessors for all the 16bpp formats + + Add support for 16bpp pixels to fetch_and_convert_pixel() and + convert_and_store_pixel(), then use MAKE_ACCESSORS() to generate + accessors for all the 16bpp formats: + + r5g6b5 + b5g6r5 + a1r5g5b5 + x1r5g5b5 + a1b5g5r5 + x1b5g5r5 + a4r4g4b4 + x4r4g4b4 + a4b4g4r4 + x4b4g4r4 + +commit a77597bcb8d10afd66980b8db8839049181b3743 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Aug 18 08:13:30 2011 -0400 + + Use MAKE_ACCESSORS() to generate all the 32 bit accessors + + Add support for 32bpp formats in fetch_and_convert_pixel() and + convert_and_store_pixel(), then use MAKE_ACCESSORS() to generate + accessors for all the 32 bpp formats: + + a8r8g8b8 + x8r8g8b8 + a8b8g8r8 + x8b8g8r8 + x14r6g6b6 + b8g8r8a8 + b8g8r8x8 + r8g8b8x8 + r8g8b8a8 + +commit 814af33df3e9892e4fc790c7ccd2702ce2b8ea97 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Aug 17 17:27:58 2011 -0400 + + Add initial version of the MAKE_ACCESSORS() macro + + This macro will eventually allow the fetchers and storers to be + generated automatically. For now, it's just a skeleton that doesn't + actually do anything. + +commit 5cae7a3fe6e148d2bb42b86efb7daf27dbf12ee0 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Aug 15 18:42:38 2011 -0400 + + Add general pixel converter + + This function can convert between any <= 32 bpp formats. Nothing uses + it yet. + +commit 22f54dde6bbf87251a0b4ad93bacbdaa7cb508d8 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Aug 15 10:22:05 2011 -0400 + + Add a generic unorm_to_unorm() conversion utility + + This function can convert between normalized numbers of different + depths. When converting to higher bit depths, it will replicate the + existing bits, when converting to lower bit depths, it will simply + truncate. + + This function replaces the expand16() function in pixman-utils.c + +commit d842669a467be490bb0a40000d0c0fccea0b1b85 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Sep 19 09:08:33 2011 -0400 + + A few tweaks to a comment in pixman-combine.c.template + + Include a link to + + http://marc.info/?l=xfree-render&m=99792000027857&w=2 + + where Keith explains how the disjoint/conjoint operators work. + +commit 3432e1a3444a55f71e294da7d350957a8e1232c3 +Author: Jon TURNEY <jon.turney@dronecode.org.uk> +Date: Mon Sep 19 06:17:58 2011 -0400 + + Fix build on cygwin after commit efdf65c0c4fff551fb3cd9104deda9adb6261e22 + + libutils depends on pixman and so needs to preceed it in the link order + + Found by tinderbox, see [1] + + [1] http://tinderbox.freedesktop.org/builds/2011-09-15-0005/logs/pixman/#build + + Signed-off-by: Jon TURNEY <jon.turney at dronecode.org.uk> + +commit f9faf4df440366ed36b197dc09b1c2b51af3387b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Sep 12 23:17:39 2011 -0400 + + test: Use smaller boxes in region_contains_test() + + The boxes used region_contains_test() sometimes overflow causing + + *** BUG *** + In pixman_region32_union_rect: Invalid rectangle passed + Set a breakpoint on '_pixman_log_error' to debug + + messages to be printed when pixman is compiled with DEBUG. Fix this by + dividing the x, y, w, h coordinates by 4 to prevent overflows. + +commit 9623b478f7e872af36ca77af5cc9e85f5ea132cf +Author: Andrea Canciani <ranma42@gmail.com> +Date: Sun Sep 4 21:33:05 2011 +0200 + + build-win32: Add 'check' target + + On win32 the tests are built but they are not run automatically by the + build system. + + A minimal 'check' target (depending on the tests being built) can + simply run them and log to the console their success/failure. + +commit 479d0944851fffda7ed860523feb388fec028545 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Sun Sep 4 13:52:53 2011 -0700 + + test: Do not include config.h unless HAVE_CONFIG_H is defined + + The win32 build system does not generate config.h and correctly runs + the compiler without defining HAVE_CONFIG_H. Nevertheless some files + include config.h without checking for its availability, breaking the + build from a clean directory: + + test\utils.h(2) : fatal error C1083: Cannot open include file: + 'config.h': No such file or directory + ... + +commit d46a9f3acef21b50639c64f190a0de7eca21747c +Author: Andrea Canciani <ranma42@gmail.com> +Date: Sun Sep 4 21:56:20 2011 +0200 + + build-win32: Add root Makefile.win32 + + Add Makefile.win32 to the pixman root. This makefile can recursively + run the other ones to compile the library or the test suite. + +commit a76b78c2daa61900572014070d3e856a460fd554 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Sun Sep 4 18:00:38 2011 +0200 + + build-win32: Share targets and variables across win32 makefiles + + The win32 build system repeatedly defines some basic variables + (notably program names and flags) and C sources compilation rules. + + They can be factored out to a common Makefile, to be included in every + other Makefile.win32. + +commit efdf65c0c4fff551fb3cd9104deda9adb6261e22 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Sun Sep 4 20:07:42 2011 +0200 + + build: Reuse test sources + + Makefile.am and Makefile.win32 should not duplicate content, as this + leads to breaking the build when they are not kept in sync. + + This can be avoided by listing sources, headers and common build + variables/rules in a Makefile.sources file. + + In order to further simplify the test makefiles, the utility functions + are now in a static library, which gets linked to all the tests and + benchmarks. + +commit a4f95d083b1aa644923d79e7b61df6c2eacca7c2 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Sun Sep 4 09:41:41 2011 -0700 + + build: Reuse sources and pixman-combine build rules + + Makefile.am and Makefile.win32 should not duplicate content, as this + leads to breaking the build when they are not kept in sync. + + This can be avoided by listing sources, headers and common build + variables/rules in a Makefile.sources file. + +commit 25bd96a3d0e935646d54c938bf065696d3a79e07 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Sun Sep 4 20:07:57 2011 +0200 + + test: Fix compilation on win32 + + Adding scaling-helpers-test to the testsuite on win32 makes MSVC + complain about int64_t being used as an expression: + + scaling-helpers-test.c(27) : error C2275: 'int64_t' : illegal use of + this type as an expression + +commit 9882d832f60419094c0b379b88fa344490ea36eb +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Sep 11 19:44:06 2011 -0400 + + Use pkg-config to determine the flags to use with libpng + + Previously we would unconditionally link with -lpng leading to build + failures on systems without libpng. + +commit 99a53667da3014a463b8a0e2b6c317efe0ebb220 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Feb 22 05:20:36 2011 -0500 + + test: New function to save a pixman image to .png + + When debugging it is often very useful to be able to save an image as + a png file. This commit adds a function "write_png()" that does that. + + If libpng is not available, then the function becomes a noop. + +commit 1e1ae0bf6e9dd2189133539b9c34a0f6826b7393 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Sep 9 23:59:20 2011 -0400 + + Post-release version bump to 0.23.5 + +commit f901e3b58b5d710cf136af89fc7395942bea9dfb +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Sep 9 23:51:11 2011 -0400 + + Pre-release version bump to 0.23.4 + +commit f5da52b6774bdefdfa88a28fdc3904797adb7e26 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Aug 22 15:29:25 2011 +0100 + + bits: optimise fetching width==1 repeats + + Profiling ign.com, 20% of the entire render time was absorbed in this + single operation: + + << /content //COLOR_ALPHA /width 480 /height 800 >> surface context + << /width 1 /height 677 /format //ARGB32 /source <|!!!@jGb!m5gD']#$jFHGWtZcK&2i)Up=!TuR9`G<8;ZQp[FQk;emL9ibhbEL&NTh-j63LhHo$E=mSG,0p71`cRJHcget4%<S\X+~> >> image pattern + //EXTEND_REPEAT set-extend + set-source + n 0 0 480 677 rectangle + fill+ + pop + + which is a simple composition of a single pixel wide image. Sadly this + is a workaround for lack of independent repeat-x/y handling in cairo and + pixman. Worse still is that the worst-case behaviour of the general repeat + path is for width 1 images... + + Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> + +commit 7ef44cae6ba6d1c2aae94cdc10851fa8d14821f7 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Fri Aug 19 21:20:08 2011 +0900 + + ARM: NEON better instruction scheduling of over_n_8888 + + New head, tail, tail/head blocks are added and instructions + are reordered to eliminate pipeline stalls + + Performance numbers of before/after + + - cortex a8 - + before : L1: 375.39 L2: 391.93 M:114.39 ( 40.99%) HT: 99.37 VT: 98.20 R: 90.24 RT: 32.87 ( 240Kops/s) + after : L1: 481.90 L2: 483.46 M:114.29 ( 40.69%) HT:106.91 VT: 93.38 R: 90.74 RT: 29.51 ( 236Kops/s) + + - cortex a9 - + before : L1: 324.50 L2: 332.79 M:155.55 ( 47.51%) HT:111.93 VT: 93.58 R: 71.92 RT: 28.21 ( 233Kops/s) + after : L1: 355.87 L2: 364.49 M:156.90 ( 47.59%) HT:111.52 VT: 91.76 R: 72.16 RT: 28.22 ( 234Kops/s) + +commit 6aa82b7a729ae7f0a26ae5a7c08ac74ebd5051cd +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Tue Aug 23 15:00:11 2011 +0900 + + ARM: NEON better instruction scheduling of over_n_8_8888 + + tail/head block is expanded and reordered to eliminate stalls + + Performance numbers of before/after + + - cortex a8 - + before : L1: 201.35 L2: 190.48 M:101.94 ( 54.85%) HT: 78.41 VT: 63.83 R: 58.25 RT: 21.74 ( 191Kops/s) + after : L1: 257.65 L2: 255.49 M:102.04 ( 55.33%) HT: 79.19 VT: 65.46 R: 59.23 RT: 21.12 ( 189Kops/s) + + - cortex a9 - + before : L1: 157.35 L2: 159.81 M:133.00 ( 60.94%) HT: 82.44 VT: 63.64 R: 51.66 RT: 19.15 ( 179Kops/s) + after : L1: 216.83 L2: 219.40 M:135.83 ( 61.80%) HT: 85.60 VT: 64.80 R: 52.23 RT: 19.16 ( 179Kops/s) + +commit 4ffa077487cb71ab17d12c37d298ca8a17e5bf35 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Sat Aug 13 16:18:17 2011 +0200 + + Workaround bug in llvm-gcc + + llvm-gcc (shipped in Apple XCode 4.1.1 as the default compiler or in + the 2.9 release of LLVM) performs an invalid optimization which + unifies the empty_region and the bad_region structures because they + have the same content. + + A bugreport has been filed against Apple Developers Tool for this + issue. This commit works around this bug by making one of the two + structures volatile, so that it cannot be merged. + + Fixes region-contains-test. + +commit a1ebff0dcbb52cd9eba2bf953b3ba251df6dd787 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Wed Jun 29 14:14:38 2011 +0200 + + win32: Build benchmarks + + Add the makefile rules needed to compile lowlevel-blt-bench on win32 + and fix the compilation errors. + +commit 2644d5a947ad82a82e914acf72bbb411097a4bae +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Mar 11 17:09:34 2011 -0500 + + Move bilinear interpolation to pixman-inlines.h + +commit 12ad42dd32240f08708eddb157a6b23904ae39a7 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Mar 11 16:09:21 2011 -0500 + + Use repeat() function from pixman-inlines.h in pixman-bits-image.c + + The repeat() functionality was duplicated between pixman-bits-image.c + and pixman-inlines.h + +commit 2f443466bb9b8901f658e30c606ddacc4fed8535 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Mar 11 16:07:24 2011 -0500 + + Rename pixman-fast-path.h to pixman-inlines.h + + It is not really specific to pixman-fast-path.c. + +commit e58b208958900803f74d5e20c855bcb14752d976 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Aug 11 06:30:43 2011 -0400 + + In pixman_image_create_bits() allow images larger than 2GB + + There is no reason for pixman_image_create_bits() to check that the + image size fits in int32_t. The correct check is against size_t since + that is what the argument to calloc() is. + + This patch fixes this by adding a new _pixman_multiply_overflows_size() + and using it in create_bits(). Also prepend an underscore to the names + of other similar functions since they are internal to pixman. + + V2: Use int, not ssize_t for the arguments in create_bits() since + width/height are still limited to 32 bits, as pointed out by Chris + Wilson. + +commit bdfb5944ffd460631c082e560c89a6c9830b37de +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Aug 8 10:18:07 2011 -0400 + + Don't include stdint.h in lowlevel-blt-bench.c + + Some systems don't have the file, and the types are already defined in + pixman.h. + + https://bugs.freedesktop.org//show_bug.cgi?id=37422 + +commit e5d85ce6629c84b9dad5a9c76bd9f895157c5a74 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Aug 2 03:03:48 2011 -0400 + + Use find_box_for_y() in pixman_region_contains_point() too + + The same binary search from the previous commit can be used in this + function too. + + V2: Remove check from loop that is not needed anymore, pointed out by + Andrea Canciani. + +commit 04bd4bdca622f060d7d39caddeaa495d3e6eb0cb +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Aug 1 22:32:09 2011 -0400 + + Speed up pixman_region{,32}_contains_rectangle() + + When someone selects some text in Firefox under a non-composited X + server and initiates a drag, a shaped window is created with a complex + shape corresponding to the outline of the text. Then, on every mouse + movement pixman_region_contains_rectangle() is called many times on + that complicated region. And pixman_region_contains_rectangle() is + doing a linear scan through the rectangles in the region, although the + scan does exit when it finds the first box that can't possibly + intersect the passed-in rectangle. + + This patch changes the loop so that it uses a binary search to skip + boxes that don't overlap the current y position. The performance + improvement for the text dragging case is easily noticable. + + V2: Use the binary search for the "getting up to speed or skippping + remainder of band" as well. + +commit 795ec5af2fc86fb0ebeca9ce82913d6002267a12 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Aug 2 01:32:15 2011 -0400 + + New test of pixman_region_contains_{rectangle,point} + + This test generates random regions and checks whether random boxes and + points are contained within them. The results are combined and a CRC32 + value is computed and compared to a known-correct one. + +commit 842591d9d12a24a9a06308ae03996153c5a99e64 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Aug 3 18:38:20 2011 -0400 + + Fix lcg_rand_u32() to return 32 random bits. + + The lcg_rand() function only returns 15 random bits, so lcg_rand_u32() + would always have 0 in bit 31 and bit 15. Fix that by calling + lcg_rand() three times, to generate 15, 15, and 2 random bits + respectively. + + V2: Use the 10/11 most significant bits from the 3 lcg results and mix + them with the low ones from the adjacent one, as suggested by Andrea + Canciani. + +commit 12da53f81c4a507a963641796132bbafe0cd6224 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Thu Aug 4 22:21:04 2011 +0900 + + ARM NEON: Standard fast path out_reverse_8_8888 + + This fast path is frequently used by cairo to do polygon rendering. + Existing NEON code generation framework is used. + +commit b395c3c5a28570ceac7cea55cb66a94096559897 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Mon Jul 18 08:15:23 2011 +0200 + + radial: Fix typos and trailing whitespace + + Correct a typo reported by James Cloos and some reported by automatic + spellchecking. + + Remove trailing whitespace. + +commit b8d6babc91459a9f854695b56f0265298a3c6427 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sat Jul 23 00:27:34 2011 +0300 + + ARM: workaround binutils bug #12931 (code sections alignment) + + More details in binutils bugtracker: + http://sourceware.org/bugzilla/show_bug.cgi?id=12931 + + The problem was encountered in the wild by Mozilla: + https://bugzilla.mozilla.org/show_bug.cgi?id=672787 + +commit 5754e5689d4cac8868d6416dffa4a7d0c2d15423 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Jul 15 23:35:21 2011 +0300 + + C fast path for scaled src_x888_8888 with nearest filter + + The necessity is justified by a message in the pixman mailing list: + http://lists.freedesktop.org/archives/pixman/2011-July/001330.html + + NONE repeat is not supported, but could be added by tweaking + the interpretation and making use of 'fully_transparent_src' + scanline function argument. + +commit c06af104546868ed748c8f771817f5e9ae9a6a2d +Author: Andrea Canciani <ranma42@gmail.com> +Date: Fri Jul 15 22:02:01 2011 +0200 + + radial: Improve documentation and naming + + Add a comment to explain why the tests guarantee that the code always + computes the greatest valid root. + + Rename "det" as "discr" to make it match the mathematical name + "discriminant". + + Based on a patch by Jeff Muizelaar <jmuizelaar@mozilla.com>. + +commit e814b50877bf313619fbf777dcab98d39874f8a4 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jul 4 15:55:52 2011 -0400 + + Makefile.am: Add pixman@lists.freedesktop.org to RELEASE_ANNOUNCE_LIST + +commit ed6d2f1cecb2f6d0cfe92bf493fde1abb4004856 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jul 4 15:35:17 2011 -0400 + + Post-release version bump to 0.23.3 + +commit 6c4001a0e1cc0350147638ba941d23e129d00e0d +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jul 4 08:13:19 2011 -0400 + + Pre-release version bump to 0.23.2 + +commit eff7c8efabe2da33edbf0bdc06e101352981286b +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Mon Jun 13 19:53:49 2011 +0900 + + Bilinear REPEAT_NORMAL source line extension for too short src_width + + To avoid function call and other calculation overhead, extend source + scanline into temporary buffer when source width is too small. + Temporary buffer will be repeatedly accessed, so extension cost is + very small due to cache effect. + +commit 828794d328e7ad1efc860baee8d6e72450b486b9 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Wed Jun 8 17:17:42 2011 +0900 + + Enable REPEAT_NORMAL bilinear fast path entries + +commit 1161b3f9edb6f5c396438b79f2df3218ea8d194e +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Wed Jun 8 17:14:29 2011 +0900 + + ARM: Add REPEAT_NORMAL functions to bilinear BIND macros + + Now bilinear template support REPEAT_NORMAL, so functions for that + is added to PIXMAN_ARM_BIND_SCALED_BILINEAR_ macros. Fast path + entries are not enabled yet. + +commit ebd2f06d96ee91f9f7f13b906ae328862da7dde8 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Wed Jun 8 17:11:24 2011 +0900 + + sse2: Declare bilinear src_8888_8888 REPEAT_NORMAL composite function + + Now bilinear template support REPEAT_NORMAL, so declare composite + functions using it. Function is just declared not used yet. + +commit 7e22b2f7824f844076e1bb1fb26a6ec5e5d029cd +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Wed Jun 8 15:58:01 2011 +0900 + + REPEAT_NORMAL support for bilinear fast path template + + The basic idea is to break down normal repeat into a set of + non-repeat scanline compositions and stitching them together. + + Bilinear may interpolate last and first pixels of source scanline. + In this case, we can use temporary wrap around buffer. + +commit 2f025bad436982a2b1c54d7cb49b426ebf198350 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Wed Jun 8 15:37:31 2011 +0900 + + Replace boolean arguments with flags for bilinear fast path template + + By replacing boolean arguments with flags, the code can be more + readable and flags can be extended to do some more things later. + + Currently following flags are defined. + + FLAG_NONE + - No flags are turned on. + + FLAG_HAVE_SOLID_MASK + - Template will generate solid mask composite functions. + + FLAG_HAVE_NON_SOLID_MASK + - Template will generate bits mask composite functions. + + FLAG_HAVE_SOLID_MASK and FLAG_NON_SOLID_MASK should be mutually + exclusive. + +commit 4d4d1760e8118aaea06783079a3b87f83deb4907 +Author: Søren Sandmann <ssp@redhat.com> +Date: Sat Jun 25 10:16:25 2011 -0400 + + test: Make fuzzer-find-diff.pl executable + +commit ece8d13bf77d050662bb9db9716576dabff37554 +Author: Søren Sandmann <sandmann@cs.au.dk> +Date: Sun Jun 19 20:29:08 2011 -0400 + + ARM: Fix two bugs in neon_composite_over_n_8888_0565_ca(). + + The first bug is that a vmull.u8 instruction would store its result in + the q1 register, clobbering the d2 register used later on. The second + is that a vraddhn instruction would overwrite d25, corrupting the q12 + register used later. + + Fixing the second bug caused a pipeline bubble where the d18 register + would be unavailable for a clock cycle. This is fixed by swapping the + instruction with its successor. + +commit 5715a394c41b2fd259ce7bf07b859d2a4eb2ec09 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Jun 19 19:10:45 2011 -0400 + + blitters-test: Make common formats more likely to be tested. + + Move the eight most common formats to the top of the list of image + formats and make create_random_image() much more likely to select one + of those eight formats. + + This should help catch more bugs in SIMD optimized operations. + +commit d815a1c54ae6ea6ccd16dedb7f83db0d2526d637 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Fri Jun 10 08:56:10 2011 +0200 + + Silence autoconf warnings + + Autoconf 2.86 reports: + + warning: AC_LANG_CONFTEST: no AC_LANG_SOURCE call detected in body + + Every code fragment must be wrapped in [AC_LANG_SOURCE([...])] + +commit a89f8cfaf11d0149b73ce40eca6e8a7f262f305a +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Mar 25 15:09:17 2011 -0400 + + Replace argumentxs to composite functions with a pointer to a struct + + This allows more information, such as flags or the composite region, + to be passed to the composite functions. + +commit 99e7d8fab546257ef729ea6db6e9beede984cec1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Mar 25 14:20:43 2011 -0400 + + In pixman-general.c rename image_parameters to {src, mask, dest}_image + + All the fast paths generally use these names as well. + +commit 4d713e3120909d82e37b0405d035e85bbc8a61a9 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Mar 25 14:17:08 2011 -0400 + + Replace instances of "dst_*" with "dest_*" + + The variables in question were dst_x, dst_y, dst_image. The majority + of _x and _y uses were already dest_x and dest_y, while the majority + of _image uses were dst_image. + +commit 6aceb767aa6eea38ec3021263ca1d83aa9e0a3df +Author: Søren Sandmann <ssp@redhat.com> +Date: Sat May 28 12:32:35 2011 -0400 + + demos: Comment out some unused variables + +commit 4abe76432a59dec2e7978bfa4a01ad032178da0a +Author: Søren Sandmann <ssp@redhat.com> +Date: Sat May 28 11:56:32 2011 -0400 + + sse2: Delete some unused variables + +commit 5c60e1855b082b1a323319e1d0ba2d6f916fb3d5 +Author: Søren Sandmann <ssp@redhat.com> +Date: Sat May 28 11:51:31 2011 -0400 + + mmx: Delete some unused variables + +commit 827e61333865dc94851eb79c8e640b103e3fd629 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Mon May 23 12:08:54 2011 +0200 + + Include noop in win32 builds + +commit 65b63728cc8ada802c9798e11e6fa92d21f2fbf8 +Author: Nis Martensen <nis.martensen@web.de> +Date: Mon May 2 21:43:58 2011 +0200 + + Fix a few typos in pixman-combine.c.template + + Some equations have too much multiplication with alpha. + +commit dd449a2a8ee1381fdc5297257917bc0786bf0ac4 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Apr 23 10:26:49 2011 -0400 + + Move NOP src iterator into noop implementation. + + The iterator for sources where neither RGB nor ALPHA is needed, really + belongs in the noop implementation. + +commit ba480882aa465d8414dc8a4472d89d94911af60a +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Apr 23 10:24:41 2011 -0400 + + Move NULL iterator into pixman-noop.c + + Iterating a NULL image returns NULL for all scanlines. We may as well + do this in the noop iterator. + +commit a4e984de19f7f2ca30b1d736cdd2dded91a75907 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Feb 8 23:42:36 2011 -0500 + + Add a noop src iterator + + When the image is a8r8g8b8 and not transformed, and the fetched + rectangle is within the image bounds, scanlines can be fetched by + simply returning a pointer instead of copying the bits. + +commit d4fff4a95921f734f26ef51953f4dddfcf423eab +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jan 24 12:16:03 2011 -0500 + + Move noop dest fetching to noop implementation + + It will at some point become useful to have CPU specific destination + iterators. However, a problem with that, is that such iterators should + not be used if we can composite directly in the destination image. + + By moving the noop destination iterator to the noop implementation, we + can ensure that it will be chosen before any CPU specific iterator. + +commit 13ce88f80095d0fa18330a23e03819368987e277 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jan 24 11:35:27 2011 -0500 + + Add a noop composite function for the DST operator + + The DST operator doesn't actually do anything, so add a noop "fast + path" for it, instead of checking in pixman_image_composite32(). + + The performance tradeoff here is that we get rid of a test for DST in + the common case where the operator is not DST, in return for an extra + walk over the clip rectangles in the uncommon case where the operator + actually is DST. + +commit 8c76235f41b2ac70ce6e652dcd1cab975e1283a4 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jan 24 11:31:49 2011 -0500 + + Add a "noop" implementation. + + This new implementation is ahead of all other implementations in the + fallback chain and is supposed to contain operations that are "noops", + ie., they don't require any work. For example, it might contain a + "fast path" for the DST operator that doesn't actually do anything or + an iterator for a8r8g8b8 that just returns a pointer into the image. + +commit 0f6a4d45886d64b244d57403609f0377b58cc7fb +Author: Andrea Canciani <ranma42@gmail.com> +Date: Thu May 5 10:17:08 2011 +0200 + + test: Fix compilation on win32 + + MSVC complains about uint32_t being used as an expression: + + composite.c(902) : error C2275: 'uint32_t' : illegal use of this type + as an expression + +commit 838c2b593ec5ebbbf82de5b7790f5b68fd86bbc1 +Author: Dave Yeo <dave.r.yeo@gmail.com> +Date: Mon May 9 12:38:44 2011 +0200 + + Check for working mmap() + + OS/2 doesn't have a working mmap(). + +commit c53625a36e28883684c3a6e640aa3656ecca7615 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon May 2 05:11:49 2011 -0400 + + Post-release version bump to 0.23.1 + +commit 918a544406df8f428056daff8a634ad6eadf67c9 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon May 2 05:06:33 2011 -0400 + + Pre-release version bump to 0.22.0 + +commit 71b2e2745be31e7d18a11f8c2cba8f6031ace17c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Apr 19 00:22:29 2011 -0400 + + Post-release version bump to 0.21.9 + +commit 89868e93bd8d66f0fac0f0b42cf7718756992e4e +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Apr 19 00:00:37 2011 -0400 + + Pre-release version bump to 0.21.8 + +commit 33f1652b953467f3910605b3be723e21b3ebe078 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Wed Apr 13 11:57:35 2011 +0900 + + ARM: Enable bilinear fast paths using scanline functions in pixman-arm-neon-asm-bilinear.S + + Enable fast paths which is supported by scanline functions in + pixman-arm-neon-asm-bilinear.S + +commit e8185f1cb43417d9f7b1d2856bb899f1b84fde81 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Wed Apr 13 11:48:40 2011 +0900 + + ARM: NEON scanline functions for bilinear scaling + + General fetch->combine->store based bilinear scanline functions. + Need further optimizations and eventually will be replaced with optimal + functions one by one. + General functions should be located in pixman-arm-neon-asm-bilinear.S and + optimal functions in pixman-arm-neon-asm.S + + Following general bilinear scanline functions are implemented + over_8888_8888 + add_8888_8888 + src_8888_8_8888 + src_8888_8_0565 + src_0565_8_x888 + src_0565_8_0565 + over_8888_8_8888 + add_8888_8_8888 + +commit 00939d35628e733fab63606cfb1d7fcb667860d3 +Author: Taekyun Kim <tkq.kim@samsung.com> +Date: Wed Apr 13 11:43:44 2011 +0900 + + ARM: Common macro for scaled bilinear scanline function with A8 mask + + Defining PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST macro for declaration of + scaled bilinear scanline functions in common header. + +commit b455496890f7f941d561c284aca14783300bedd6 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Mar 11 07:52:57 2011 -0500 + + Offset rendering in pixman_composite_trapezoids() by (x_dst, y_dst) + + Previously, this function would do coordinate calculations in such a + way that (x_dst, y_dst) would only affect the alignment of the source + image, but not of the traps, which would always be considered to be in + absolute destination coordinates. This is unlike the + pixman_image_composite() function which also registers the mask to the + destination. + + This patch makes it so that traps are also offset by (x_dst, y_dst). + + Also add a comment explaining how this function is supposed to + operate, and update tri-test.c and composite-trap-test.c to deal with + the new semantics. + +commit e75e6a4ef5c5a8ac8b0e8464f08f83fd2b6e86ed +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Apr 2 23:24:48 2011 -0400 + + ARM: Add 'neon_composite_over_n_8888_0565_ca' fast path + + This improves the performance of the firefox-talos-gfx benchmark with + the image16 backend. Benchmark on an 800 MHz ARM Cortex A8: + + Before: + + [ # ] backend test min(s) median(s) stddev. count + [ 0] image16 firefox-talos-gfx 121.773 122.218 0.15% 6/6 + + After: + + [ # ] backend test min(s) median(s) stddev. count + [ 0] image16 firefox-talos-gfx 85.247 85.563 0.22% 6/6 + + V2: Slightly better instruction scheduling based on comments from Taekyun Kim. + V3: Eliminate all stalls from the inner loop. Also based on comments from Taekyun Kim. + +commit 1670b952143284f480c39ff087b5694a64eb7db3 +Author: Gilles Espinasse <g.esp@free.fr> +Date: Tue Apr 12 22:44:56 2011 +0200 + + Fix OpenMP not supported case + + PIXMAN_LINK_WITH_ENV did not fail unless -Wall -Werror is used. + So even when the compiler did not support OpenMP, USE_OPENMP was defined. + Fix that by running the second OpenMP test only when first AC_OPENMP find supported + + configure tested in the cases : + gcc without libgomp support, no openmp option, --enable-openmp and --disable-openmp + gcc with libgomp support, no openmp option, --enable-openmp and --disable-openmp + + Not tested with autoconf version not knowing openmp (<2.62) + + Warn when --enable-openmp is requested but no support is found + + Signed-off-by: Gilles Espinasse <g.esp@free.fr> + +commit b9e8f7fb7494e4ee4be56d1555632233a494b28e +Author: Gilles Espinasse <g.esp@free.fr> +Date: Tue Apr 12 22:44:25 2011 +0200 + + Fix missing AC_MSG_RESULT value from Werror test + + Use the correct variable name + + Signed-off-by: Gilles Espinasse <g.esp@free.fr> + +commit caae4e82ffdeebfb9aa98a6c49dd563e065c0959 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Mar 21 20:25:27 2011 +0200 + + ARM: pipelined NEON implementation of bilinear scaled 'src_8888_0565' + + Benchmark on ARM Cortex-A8 r1p3 @600MHz, 32-bit LPDDR @166MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=20028888, dst=10020565, speed=33.59 MPix/s + after: op=1, src=20028888, dst=10020565, speed=46.25 MPix/s + + Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=20028888, dst=10020565, speed=63.86 MPix/s + after: op=1, src=20028888, dst=10020565, speed=84.22 MPix/s + +commit d080d59b802c351daed84b92bd4eb20c775b81c7 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Mar 16 17:24:49 2011 +0200 + + ARM: pipelined NEON implementation of bilinear scaled 'src_8888_8888' + + Performance of the inner loop when working with the data in L1 cache: + ARM Cortex-A8: 41 cycles per 4 pixels (no stalls and partial dual issue) + ARM Cortex-A9: 48 cycles per 4 pixels (no stalls) + + It might be still possible to improve performance even more on ARM Cortex-A8 + with a better use of dual issue. + + Benchmark on ARM Cortex-A8 r1p3 @600MHz, 32-bit LPDDR @166MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=20028888, dst=20028888, speed=40.38 MPix/s + after: op=1, src=20028888, dst=20028888, speed=48.47 MPix/s + + Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=20028888, dst=20028888, speed=79.68 MPix/s + after: op=1, src=20028888, dst=20028888, speed=93.11 MPix/s + +commit b496a8b279baebb8b9ab4fbcb2101583be08fe3b +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Mar 17 19:42:01 2011 +0200 + + ARM: support different levels of loop unrolling in bilinear scaler + + Now an extra 'flag' parameter is supported in bilinear scaline scaling + function generation macro. It can be used to enable 4 or 8 pixels per + loop iteration unrolling and provide save/restore code for d8-d15 + registers. + +commit 34ca9cf03fa897cd377cdb19acc22e876b2f4b0e +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Mar 21 18:41:53 2011 +0200 + + ARM: use less ARM instructions in NEON bilinear scaling code + + This reduces code size and also puts less pressure on the + instruction decoder. + +commit 0f7be9f72ef6bfe2555b7f2cc29297c4f4762740 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Mar 16 16:33:41 2011 +0200 + + ARM: support for software pipelining in bilinear macros + + Now it's possible to override the main loop of bilinear scaling code + with optimized pipelined implementation. + +commit 9638af95832563040d6bd861cf4c20ab632058df +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Mar 10 16:12:23 2011 +0200 + + ARM: use aligned memory writes in NEON bilinear scaling code + +commit 8bba3a0e1e54f03ea78fb44314f3bfa57ec8da31 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Mar 10 15:34:10 2011 +0200 + + ARM: tweaked horizontal weights update in NEON bilinear scaling code + + Moving horizontal interpolation weights update instructions from the + beginning of loop to its end allows to hide some pipeline stalls and + improve performance. + +commit a2153222677327be43251012f462d19a7e98ce14 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Apr 3 20:32:30 2011 -0400 + + ARM: Tiny improvement in over_n_8888_8888_ca_process_pixblock_head + + Instead of two + + mvn d24, d24 + mvn d25, d25 + + use just one + + mvn q12, q12 + + Also move another vmvn instruction into the created pipeline bubble, + as pointed out by Siarhei. + +commit 44f99735d9c6a897078db12172d9d2d07b204f37 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Apr 2 14:12:12 2011 -0400 + + Makefile.am: Put development releases in "snapshots" directory + + Up until now, all pixman release, both snapshots and releases were + uploaded to the "releases" directory on www.cairographics.org, but + it's better to development snapshots in the "snapshots" directory. + + This patch changes Makefile.am to do that. + +commit ad3cbfb073fc325e1b3152898ca71b8255675957 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Mar 22 13:42:05 2011 -0400 + + test: Fix infinite loop in composite + + When run in PIXMAN_RANDOMIZE_TESTS mode, this test would go into an + infinite loop because the loop started at 'seed' but the stop + condition was still N_TESTS. + +commit b514e63cfc58af21f7097db5a1b04292a758782a +Author: Alexandros Frantzis <alexandros.frantzis@linaro.org> +Date: Fri Mar 18 14:37:27 2011 +0200 + + Add support for the r8g8b8a8 and r8g8b8x8 formats to the tests. + +commit f05a90e5f8d1d0af60e2c684cbe9f1327c33135a +Author: Alexandros Frantzis <alexandros.frantzis@linaro.org> +Date: Fri Mar 18 14:36:15 2011 +0200 + + Add simple support for the r8g8b8a8 and r8g8b8x8 formats. + + This format is particularly useful on big-endian architectures, where RGBA in + memory/file order corresponds to r8g8b8a8 as an uint32_t. This is important + because RGBA is in some cases the only available choice (for example as a pixel + format in OpenGL ES 2.0). + +commit 7eb0abb5e819046537b9f809c7ec332c6679c557 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Mar 14 14:56:22 2011 -0400 + + test: Randomize some tests if PIXMAN_RANDOMIZE_TESTS is set + + This patch makes so that composite and stress-test will start from a + random seed if the PIXMAN_RANDOMIZE_TESTS environment variable is + set. Running the test suite in this mode is useful to get more test + coverage. + + Also, in stress-test.c make it so that setting the initial seed causes + threads to be turned off. This makes it much easier to see when + something fails. + +commit 6b27768d81c254a4f1d05473157328d5a5d99b9c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Mar 12 19:42:58 2011 -0500 + + Simplify the prototype for iterator initializers. + + All of the information previously passed to the iterator initializers + is now available in the iterator itself, so there is no need to pass + it as arguments anymore. + +commit 74d0f44b6d6d613d24541b849835da0464cc6fd0 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Mar 12 19:12:35 2011 -0500 + + Fill out parts of iters in _pixman_implementation_{src,dest}_iter_init() + + This makes _pixman_implementation_{src,dest}_iter_init() responsible + for filling parts of the information in the iterators. Specifically, + the information passed as arguments is stored in the iterator. + + Also add a height field to pixman_iter_t(). + +commit be4eaa0e4f79af38b7b89c5b09ca88d3a88d9396 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Mar 12 19:06:02 2011 -0500 + + In delegate_{src,dest}_iter_init() call delegate directly. + + There is no reason to go through + _pixman_implementation_{src,dest}_iter_init(), especially since + _pixman_implementation_src_iter_init() is doing various other checks + that only need to be done once. + + Also call delegate->src_iter_init() directly in pixman-sse2.c + +commit 70a923882ca24664344ba91a649e7aa12c3063f7 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Mar 9 13:55:48 2011 +0200 + + ARM: a bit faster NEON bilinear scaling for r5g6b5 source images + + Instructions scheduling improved in the code responsible for fetching r5g6b5 + pixels and converting them to the intermediate x8r8g8b8 color format used in + the interpolation part of code. Still a lot of NEON stalls are remaining, + which can be resolved later by the use of pipelining. + + Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=10020565, dst=10020565, speed=32.29 MPix/s + op=1, src=10020565, dst=20020888, speed=36.82 MPix/s + after: op=1, src=10020565, dst=10020565, speed=41.35 MPix/s + op=1, src=10020565, dst=20020888, speed=49.16 MPix/s + +commit fe99673719091d4a880d031add1369332a75731b +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Mar 9 13:27:41 2011 +0200 + + ARM: NEON optimization for bilinear scaled 'src_0565_0565' + + Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=10020565, dst=10020565, speed=3.30 MPix/s + after: op=1, src=10020565, dst=10020565, speed=32.29 MPix/s + +commit 29003c3befe2159396d181ef9ac1caaadcabf382 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Mar 9 13:21:53 2011 +0200 + + ARM: NEON optimization for bilinear scaled 'src_0565_x888' + + Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=10020565, dst=20020888, speed=3.39 MPix/s + after: op=1, src=10020565, dst=20020888, speed=36.82 MPix/s + +commit 2ee27e7d79637da9173ee1bf3423e5a81534ccb4 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Mar 9 11:53:04 2011 +0200 + + ARM: NEON optimization for bilinear scaled 'src_8888_0565' + + Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=20028888, dst=10020565, speed=6.56 MPix/s + after: op=1, src=20028888, dst=10020565, speed=61.65 MPix/s + +commit 11a0c5badbc59ce967707ef836313cc98f8aec4e +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Mar 9 11:46:48 2011 +0200 + + ARM: use common macro template for bilinear scaled 'src_8888_8888' + + This is a cleanup for old and now duplicated code. The performance improvement + is mostly coming from the enabled use of software prefetch, but instructions + scheduling is also slightly better. + + Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=20028888, dst=20028888, speed=53.24 MPix/s + after: op=1, src=20028888, dst=20028888, speed=74.36 MPix/s + +commit 34098dba6763afd3636a14f9c2a079ab08f23b2d +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Mar 9 11:34:15 2011 +0200 + + ARM: NEON: common macro template for bilinear scanline scalers + + This allows to generate bilinear scanline scaling functions targeting + various source and destination color formats. Right now a8r8g8b8/x8r8g8b8 + and r5g6b5 color formats are supported. More formats can be added if needed. + +commit 66f4ee1b3bccf4516433d61dbf2035551a712fa2 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Mar 9 10:59:46 2011 +0200 + + ARM: new bilinear fast path template macro in 'pixman-arm-common.h' + + It can be reused in different ARM NEON bilinear scaling fast path functions. + +commit 5921c17639fe5fdc595c850e3347281c1c8746ba +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sun Mar 6 22:16:32 2011 +0200 + + ARM: assembly optimized nearest scaled 'src_8888_8888' + + Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=20028888, dst=20028888, speed=44.36 MPix/s + after: op=1, src=20028888, dst=20028888, speed=39.79 MPix/s + + Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=20028888, dst=20028888, speed=102.36 MPix/s + after: op=1, src=20028888, dst=20028888, speed=163.12 MPix/s + +commit f3e17872f5522e25da8e32de83e62bee8cc198d7 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Mar 7 03:10:43 2011 +0200 + + ARM: common macro for nearest scaling fast paths + + The code of nearest scaled 'src_0565_0565' function was generalized + and moved to a common macro, so that it can be reused for other + fast paths. + +commit bb3d1b67fd0f42ae00af811c624ea1c44541034d +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sun Mar 6 16:17:12 2011 +0200 + + ARM: use prefetch in nearest scaled 'src_0565_0565' + + Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=10020565, dst=10020565, speed=75.02 MPix/s + after: op=1, src=10020565, dst=10020565, speed=73.63 MPix/s + + Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=10020565, dst=10020565, speed=176.12 MPix/s + after: op=1, src=10020565, dst=10020565, speed=267.50 MPix/s + +commit 84e361c8e357e26f299213fbeefe64c73447b116 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Mar 4 15:51:18 2011 -0500 + + test: Do endian swapping of the source and destination images. + + Otherwise the test fails on big endian. Fix for bug 34767, reported by + Siarhei Siamashka. + +commit 84f3c5a71a2de1a96dcf0c7f9ab0a8ee1b1b158f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Mar 7 13:45:54 2011 -0500 + + test: In image_endian_swap() use pixman_image_get_format() to get the bpp. + + There is no reason to pass in the bpp as an argument; it can be gotten + directly from the image. + +commit 17feaa9c50bb8521b0366345efe181bd99754957 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Feb 22 18:45:03 2011 +0200 + + ARM: NEON optimization for bilinear scaled 'src_8888_8888' + + Initial NEON optimization for bilinear scaling. Can be probably + improved more. + + Benchmark on ARM Cortex-A8: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=20028888, dst=20028888, speed=6.70 MPix/s + after: op=1, src=20028888, dst=20028888, speed=44.27 MPix/s + +commit 350029396d911941591149cc82b5e68a78ad6747 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Feb 21 20:18:02 2011 +0200 + + SSE2 optimization for bilinear scaled 'src_8888_8888' + + A primitive naive implementation of bilinear scaling using SSE2 intrinsics, + which only handles one pixel at a time. It is approximately 2x faster than + pixman general compositing path. Single pass processing without intermediate + temporary buffer contributes to ~15% and loop unrolling contributes to ~20% + of this speedup. + + Benchmark on Intel Core i7 (x86-64): + Using cairo-perf-trace: + before: image firefox-planet-gnome 12.566 12.610 0.23% 6/6 + after: image firefox-planet-gnome 10.961 11.013 0.19% 5/6 + + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=20028888, dst=20028888, speed=70.48 MPix/s + after: op=1, src=20028888, dst=20028888, speed=165.38 MPix/s + +commit 0df43b8ae5031dd83775d00b57b6bed809db0e89 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Feb 21 02:07:09 2011 +0200 + + test: check correctness of 'bilinear_pad_repeat_get_scanline_bounds' + + Individual correctness check for the new bilinear scaling related + supplementary function. This test program uses a bit wider range + of input arguments, not covered by other tests. + +commit d506bf68fd0e9a1c5dd484daee70631699918387 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Feb 21 01:29:02 2011 +0200 + + Main loop template for fast single pass bilinear scaling + + Can be used for implementing SIMD optimized fast path + functions which work with bilinear scaled source images. + + Similar to the template for nearest scaling main loop, the + following types of mask are supported: + 1. no mask + 2. non-scaled a8 mask with SAMPLES_COVER_CLIP flag + 3. solid mask + + PAD repeat is fully supported. NONE repeat is partially + supported (right now only works if source image has alpha + channel or when alpha channel of the source image does not + have any effect on the compositing operation). + +commit 9ebde285fa990bfa1524f166fbfb1368c346b14a +Author: Andrea Canciani <ranma42@gmail.com> +Date: Thu Feb 24 12:53:39 2011 +0100 + + test: Silence MSVC warnings + + MSVC does not notice non-returning functions (abort() / assert(0)) + and warns about paths which end with them in non-void functions: + + c:\cygwin\home\ranma42\code\fdo\pixman\test\fetch-test.c(114) : + warning C4715: 'reader' : not all control paths return a value + c:\cygwin\home\ranma42\code\fdo\pixman\test\stress-test.c(133) : + warning C4715: 'real_reader' : not all control paths return a value + c:\cygwin\home\ranma42\code\fdo\pixman\test\composite.c(431) : + warning C4715: 'calc_op' : not all control paths return a value + + These warnings can be silenced by adding a return after the + termination call. + +commit 8868778ea1fdc8e70da76b3b00ea78106c5840d8 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Tue Feb 22 22:43:48 2011 +0100 + + Do not include unused headers + + pixman-combine32.h is included without being used both in + pixman-image.c and in pixman-general.c. + +commit 72f5e5f608506c18c484bc5bc3e58bd83aeb7691 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Tue Feb 22 22:04:49 2011 +0100 + + test: Add Makefile for Win32 + +commit 11305b4ecdd36a17592c5c75de9157874853ab20 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Tue Feb 22 21:46:37 2011 +0100 + + test: Fix tests for compilation on Windows + + The Microsoft C compiler cannot handle subobject initialization and + Win32 does not provide snprintf. + + Work around these limitations by using normal struct initialization + and using sprintf (a manual check shows that the buffer size is + sufficient). + +commit 20ed723a5a42fb8636bc9a5f32974dec1b66a785 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Thu Feb 24 10:44:04 2011 +0100 + + Fix compilation on Win32 + + Makefile.win32 contained a typo and was missing the dependency from + the built sources. + +commit 48e951000c7ff14f40c671f3efb6abb18162c840 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Feb 22 16:13:32 2011 -0500 + + Post-release version bump to 0.21.7 + +commit 8b3332166094db657e96c365a524b2cd7513359b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Feb 22 15:43:41 2011 -0500 + + Pre-release version bump to 0.21.6 + +commit 2cb67d2a0b6bed4490a41c34a185cc54a445559a +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Feb 22 15:40:34 2011 -0500 + + Minor fix to the RELEASING file + +commit 3cdf74257bdb9d054637252f4fa7503abf580db9 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Feb 22 15:28:17 2011 -0500 + + Delete pixman-x64-mmx-emulation.h from pixman/Makefile.am + +commit 65919ad17fd7b4c6f963690fc78155c7cfe1a51a +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Feb 22 19:28:08 2011 +0200 + + Ensure that tests run as the last step of a build for 'make check' + + Previously 'make check' would compile and run tests first, and only + then proceed to compiling demos. Which is not very convenient + because of the need to scroll back console output to see the + tests verdict. Swapping order of SUBDIRS variable entries in + Makefile.am resolves this. + +commit 34a7ac047411d6c1f1708cb8dd4469cd1aa40b31 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Feb 18 07:38:49 2011 -0500 + + sse2: Minor coding style cleanups. + + Also make pixman_fill_sse2() static. + +commit 10f69e5ec844e2630f8e5b21fd5392719d34d060 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Feb 18 07:40:02 2011 -0500 + + sse2: Remove pixman-x64-mmx-emulation.h + + Also stop including mmintrin.h + +commit 984be4def2e62a05e9a91e77ac8c703fed30718b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Feb 18 07:38:03 2011 -0500 + + sse2: Delete obsolete or redundant comments + +commit 33d98902261ad73c1b6b1366968e49a1cb2bf68b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Feb 18 07:07:45 2011 -0500 + + sse2: Remove all the core_combine_* functions + + Now that _mm_empty() is not used anymore, they are no longer different + from the sse2_combine_* functions, so they can be consolidated. + +commit 87cd6b8056bbacb835eeb991f03b9135dcd58334 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Feb 18 05:15:50 2011 -0500 + + sse2: Don't compile pixman-sse2.c with -mmmx anymore + + It's not necessary now that the file doesn't use MMX instructions. + +commit e7fe5e35e9640c6d6bb08c24b96ce882434a7f9f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Feb 18 05:07:08 2011 -0500 + + sse2: Delete unused MMX functions and constants and all _mm_empty()s + + These are not needed because the SSE2 implementation doesn't use MMX + anymore. + +commit f88ae14c15040345a12ff0488c7b23d25639e49b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Feb 18 03:56:20 2011 -0500 + + sse2: Convert all uses of MMX registers to use SSE2 registers instead. + + By avoiding use of MMX registers we won't need to call emms all over + the place, which avoids various miscompilation issues. + +commit 7fb75bb3e6c3e004374d186ea2d6f02d1caccba4 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Feb 18 03:57:55 2011 -0500 + + Coding style: core_combine_in_u_pixelsse2 -> core_combine_in_u_pixel_sse2 + +commit 510c0d088a975efe75cc2b796547f3aaed1c18e6 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Feb 15 09:11:44 2011 -0500 + + In pixman_image_set_transform() allow NULL for transform + + Previously, this would crash unless the existing transform were also + NULL. + +commit 7feb710e60cdab5c448a396537a8de16e72091e2 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Feb 15 04:55:02 2011 -0500 + + Avoid marking images dirty when properties are reset + + When an image property is set to the same value that it already is, + there is no reason to mark the image dirty and incur a recomputation + of the flags. + +commit 3598ec26ecf761488e2ac1536553eaf3bb361e72 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Feb 11 08:57:42 2011 -0500 + + Add new public function pixman_add_triangles() + + This allows some more code to be deleted from the X server. The + implementation consists of converting to trapezoids, and is shared + with pixman_composite_triangles(). + +commit 964c7e7cd20a6ed414fdf92b71fdc83db91d7578 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Jan 14 06:19:08 2011 -0500 + + Optimize adding opaque trapezoids onto a8 destination. + + When the source is opaque and the destination is alpha only, we can + avoid the temporary mask and just add the trapezoids directly. + +commit 0bc03482f10d7bfe64a4199e9cd484ff1129d709 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Jan 12 03:02:59 2011 -0500 + + Add a test program, tri-test + + This program tests whether the new triangle support works. + +commit 79e69aac8cfe7d45707098735376a6e6c2dcfa06 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jan 11 10:15:21 2011 -0500 + + Add support for triangles to pixman. + + The Render X extension can draw triangles as well as trapezoids, but + the implementation has always converted them to trapezoids. This patch + moves the X server's triangle conversion code into pixman, where we + can reuse the pixman_composite_trapezoid() code. + +commit 4e6dd4928d817338ae406a620f5658bbddb66df1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Feb 10 10:37:08 2011 -0500 + + Add a test program for pixman_composite_trapezoids(). + + A CRC32 based test program to check that pixman_composite_trapezoids() + actually works. + +commit 803272e38c5b9b9abe347390c2ecd2ac4be7b9be +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jan 11 09:23:43 2011 -0500 + + Add pixman_composite_trapezoids(). + + This function is an implementation of the X server request + Trapezoids. That request is what the X backend of cairo is using all + the time; by moving it into pixman we can hopefully make it faster. + +commit 1feaf6bea707a97db44643c5bfa6218afea9b6be +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jan 18 19:40:53 2011 -0500 + + test/Makefile.am: Move all the TEST_LDADD into a new global LDADD. + + This gets rid of a bunch of replicated *_LDADD clauses + +commit 1237fd9bc84a27f232ceddf1c7b72645fcc99aec +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jan 18 19:20:18 2011 -0500 + + Add @TESTPROGS_EXTRA_LDFLAGS@ to AM_LDFLAGS + + Instead of explicitly adding it to each test program. + +commit 7dfe845786920d50c6f93165ef6f539e6f4d1b53 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jan 18 19:16:39 2011 -0500 + + Move all the GTK+ based test programs to a new subdir, "demos" + + This separates the test suite from the random gtk+ using test + programs. "demos" is somewhat misleading because the programs there + are not particularly exciting (with the possible exception of + composite-test which shows off all the compositing operators). + +commit 8e4100260bbdb827abc45a2a5e352a53246fe614 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Feb 4 00:47:36 2011 +0200 + + SSE2 optimization for nearest scaled over_8888_n_8888 + + This operation shows up a little bit in some of the html5 based + games from http://www.kesiev.com/akihabara/ + + === Cairo trace of the game intro animation for 'Legend of Sadness' === + + before: + [ 0] image firefox-legend-of-sadness 46.286 46.298 0.01% 5/6 + + after: + [ 0] image firefox-legend-of-sadness 45.088 45.102 0.04% 6/6 + + === Microbenchmark (scaling ~2000x~2000 -> ~2000x~2000) === + + before: + translucent: op=3, src=8888, mask=s dst=8888, speed=131.30 MPix/s + transparent: op=3, src=8888, mask=s dst=8888, speed=132.38 MPix/s + opaque: op=3, src=8888, mask=s dst=8888, speed=167.90 MPix/s + after: + translucent: op=3, src=8888, mask=s dst=8888, speed=301.93 MPix/s + transparent: op=3, src=8888, mask=s dst=8888, speed=770.70 MPix/s + opaque: op=3, src=8888, mask=s dst=8888, speed=301.80 MPix/s + +commit 39b86b032d1b81958d4dfc880ba7f129aecb1de0 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 3 15:22:28 2010 +0200 + + ARM: NEON optimization for nearest scaled over_0565_8_0565 + + In some cases may be used for html5 video when hardware acceleration + is not available. + +commit 9a90c1c90f1d128de68b3ed855a2ea1c3bed20c3 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 3 15:16:28 2010 +0200 + + ARM: NEON optimization for nearest scaled over_8888_8_0565 + + In some cases may be used for html5 video when hardware acceleration + is not available. + +commit cd1062ded44978fa97aa3d3295af016c80c6e2eb +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 3 15:15:15 2010 +0200 + + ARM: new macro template for using scaled fast paths with a8 mask + +commit b099957887ef69b795d542f8f2980b5a94fb823f +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Feb 2 18:14:56 2011 +0200 + + Better support for NONE repeat in nearest scaling main loop template + + Scaling function now gets an extra boolean argument, which is set + to TRUE when we are fetching padding pixels for NONE repeat. This + allows to make a decision whether to interpret alpha as 0xFF or 0x00 + for such pixels when working with formats which don't have alpha + channel (for example x8r8g8b8 and r5g6b5). + +commit 14f82083a12be07f340fdea491759b3bb77b4e66 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Oct 22 17:54:41 2010 +0300 + + Support for a8 and solid mask in nearest scaling main loop template + + In addition to the most common case of not having any mask at all, two + variants of scaling with mask show up in cairo traces: + 1. non-scaled a8 mask with SAMPLES_COVER_CLIP flag + 2. solid mask + + This patch extends the nearest scaling main loop template to also + support these cases. + +commit e83cee5aac26522f31a7e81ea3f972ae2248f6b0 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Oct 22 16:29:01 2010 +0300 + + test: Extend scaling-test to support a8/solid mask and ADD operation + + Image width also has been increased because SIMD optimizations typically + do more unrolling in the inner loops, and this needs to be tested. + +commit 97447f440fec9889bba6cc21c6d9366183c47e7e +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Jan 17 02:29:43 2011 +0200 + + Use const modifiers for source buffers in nearest scaling fast paths + +commit 8d359b00c5bb9960c3c584a7f77a943c0ce61368 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Jul 30 18:37:51 2010 +0300 + + C fast paths for a simple 90/270 degrees rotation + + Depending on CPU architecture, performance is in the range of 1.5 to 4 times + slower than simple nonrotated copy (which would be an ideal case, perfectly + utilizing memory bandwidth), but still is more than 7 times faster if + compared to general path. + + This implementation sets a performance baseline for rotation. The use + of SIMD instructions may further improve memory bandwidth utilization. + +commit e0c7948c970b816f323a6402241ca70fa855c12c +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Jul 29 17:58:13 2010 +0300 + + New flags for 90/180/270 rotation + + These flags are set when the transform is a simple nonscaled 90/180/270 + degrees rotation. + +commit 3b68c295fd45297a631569b35608364dbcb6d452 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Oct 26 15:40:01 2010 +0300 + + test: affine-test updated to stress 90/180/270 degrees rotation more + +commit 56f173f0af5a59a12596cf1ed9d6fb7c8ebe6318 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Feb 10 05:21:42 2011 -0500 + + Add pixman-conical-gradient.c to Makefile.win32. + + Pointed out by Kirill Tishin. + +commit 7fd4897730412977f730b850e6e697156fb3734b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Jan 23 16:53:26 2011 -0500 + + Add SSE2 fetcher for 0565 + + Before: + + add_0565_0565 = L1: 61.08 L2: 61.03 M: 60.57 ( 10.95%) HT: 46.85 VT: 45.25 R: 39.99 RT: 20.41 ( 233Kops/s) + + After: + + add_0565_0565 = L1: 77.84 L2: 76.25 M: 75.38 ( 13.71%) HT: 55.99 VT: 54.56 R: 45.41 RT: 21.95 ( 255Kops/s) + +commit 8414aa76c20732a6ed29a2d80175936570c5e592 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 31 00:57:46 2010 -0500 + + Improve performance of sse2_combine_over_u() + + Split this function into two, one that has a mask, and one that + doesn't. This is a fairly substantial speed-up in many cases. + + New output of lowlevel-blt-bench over_x888_8_0565: + + over_x888_8_0565 = L1: 63.76 L2: 62.75 M: 59.37 ( 21.55%) HT: 45.89 VT: 43.55 R: 34.51 RT: 16.80 ( 201Kops/s) + +commit 08e855f15cba24aac83145b994069d0bb50be5a1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Jan 23 16:17:17 2011 -0500 + + Add SSE2 fetcher for a8 + + New output of lowlevel-blt-bench over_x888_8_0565: + + over_x888_8_0565 = L1: 57.85 L2: 56.80 M: 54.14 ( 19.50%) HT: 42.64 VT: 40.56 R: 32.67 RT: 16.22 ( 195Kops/s) + + Based in part on code by Steve Snyder from + + https://bugs.freedesktop.org/show_bug.cgi?id=21173 + +commit 2b6b0cf3591ce4438f7e0571c7a762972a999cd8 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Jan 12 06:38:54 2011 -0500 + + Add SSE2 fetcher for x8r8g8b8 + + New output of lowlevel-blt-bench over_x888_8_0565: + + over_x888_8_0565 = L1: 55.68 L2: 55.11 M: 52.83 ( 19.04%) HT: 39.62 VT: 37.70 R: 30.88 RT: 14.62 ( 174Kops/s) + + The fetcher is looked up in a table, so that other fetchers can easily + be added. + + See also https://bugs.freedesktop.org/show_bug.cgi?id=20709 + +commit 13aed37758d1af5b5bc2a80d886b764d4c45827e +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jan 22 17:13:19 2011 -0500 + + Add a test for over_x888_8_0565 in lowlevel_blt_bench(). + + The next few commits will speed this up quite a bit. + + Current output: + + --- + reference memcpy speed = 2217.5MB/s (554.4MP/s for 32bpp fills) + --- + over_x888_8_0565 = L1: 54.67 L2: 54.01 M: 52.33 ( 18.88%) HT: 37.19 VT: 35.54 R: 29.40 RT: 13.63 ( 162Kops/s) + +commit 2de397c272fd60d6ce4311b411ad37a8e39daff6 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jan 24 12:24:42 2011 -0500 + + Move fallback decisions from implementations into pixman-cpu.c. + + Instead of having each individual implementation decide which fallback + to use, move it into pixman-cpu.c, where a more global decision can be + made. + + This is accomplished by adding a "fallback" argument to all the + pixman_implementation_create_*() implementations, and then in + _pixman_choose_implementation() pass in the desired fallback. + +commit ed781df1cc30748c8193be9b9a497def0b768b6b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Jan 21 14:47:33 2011 -0500 + + Print a warning when a development snapshot is being configured. + + It seems to be relatively common for people to use development + snapshots of pixman thinking they are ordinary releases. This patch + makes it such that if the current minor version is odd, configure will + print a banner explaining the version number scheme plus information + about where to report bugs. + +commit fead9eb82a7fc78a4927fff960d4cacea799bd9b +Author: Rolland Dudemaine <rolland@ghs.com> +Date: Tue Jan 25 15:08:26 2011 +0200 + + Fix "variable was set but never used" warnings + + Removes useless variable declarations. This can only result in more + efficient code, as these variables where sometimes assigned, but + their values were never used. + +commit 32e556df33b3cd3b31de8184f144b3065206406b +Author: Rolland Dudemaine <rolland@ghs.com> +Date: Tue Jan 25 14:14:57 2011 +0200 + + test: Use the right enum types instead of int to fix warnings + + Green Hills Software MULTI compiler was producing a number + of warnings due to incorrect uses of int instead of the correct + corresponding pixman_*_t type. + +commit b61ec0a6862ba101fff0afa082fb7490a0c44785 +Author: Rolland Dudemaine <rolland@ghs.com> +Date: Tue Jan 25 14:52:49 2011 +0200 + + Correct the initialization of 'max_vx' + + http://lists.freedesktop.org/archives/pixman/2011-January/000937.html + +commit e8a1b1c4e502ecbb70028bd5a86034bfe1b16997 +Author: Rolland Dudemaine <rolland@ghs.com> +Date: Tue Jan 25 13:55:28 2011 +0200 + + test: Fix for mismatched 'fence_malloc' prototype/implementation + + Solves compilation problem when 'mprotect' is not available. For + example, when using Green Hills Software MULTI compiler or mingw: + http://lists.freedesktop.org/archives/pixman/2011-January/000939.html + +commit a8e4677ecc2fcbf16a53902e26fc82d0860e9a21 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Jan 10 21:01:16 2011 +0200 + + The code in 'bitmap_addrect' already assumes non-null 'reg->data' + + So the check of 'reg->data' pointer can be safely removed. + +commit a6a04c07c354e10d787193af8d2f6a6d27f374ad +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Jan 19 07:47:52 2011 -0500 + + Post-release version bump to 0.21.5 + +commit 4e56cec5649b7e122ccfc815b4ff45611953afce +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Jan 19 07:38:24 2011 -0500 + + Pre-release version bump to 0.21.4 + +commit 1d7195dd6c68eab73d063f37de3a9331446111d4 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jan 17 14:12:20 2011 -0500 + + Fix dangling-pointer bug in bits_image_fetch_bilinear_no_repeat_8888(). + + The mask_bits variable is only declared in a limited scope, so the + pointer to it becomes invalid instantly. Somehow this didn't actually + trigger any bugs, but Brent Fulgham reported that Bounds Checker was + complaining about it. + + Fix the bug by moving mask_bits to the function scope. + +commit 2ac4ae1ae253f7c2efedab036a677dac2f9c9eed +Author: Andrea Canciani <ranma42@gmail.com> +Date: Wed Jan 12 17:43:40 2011 +0100 + + Add a test for radial gradients + + radial-test is a port of the radial-gradient test from the cairo test + suite. It has been modified so that some pixels have 0 in both the a + and b coefficients of the quadratic equation solved by the rasterizer, + to expose a division by zero in the original implementation. + +commit 7f4eabbeec92e55fd8f812c0e5d8568eacbb633d +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Dec 12 07:34:42 2010 -0500 + + Fix destination fetching + + When fetching from destinations, we need to ignore transformations, + repeat and filtering. Currently we don't ignore them, which means all + kinds of bad things can happen. + + This bug fixes this problem by directly calling the scanline fetchers + for destinations instead of going through the full + get_scanline_32/64(). + +commit 9489c2e04a5361fe19a89a0da9d7be28436c0a4b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Dec 12 09:19:13 2010 -0500 + + Turn on testing for destination transformation + +commit fffeda703e40ced90ec5ad6d6cd37a44294d3fe4 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Dec 11 08:10:04 2010 -0500 + + Skip fetching pixels when possible + + Add two new iterator flags, ITER_IGNORE_ALPHA and ITER_IGNORE_RGB that + are set when the alpha and rgb values are not needed. If both are set, + then we can skip fetching entirely and just use + _pixman_iter_get_scanline_noop. + +commit 3e635d6491d883304662aff3c72558dc9065f1f1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 10 16:55:55 2010 -0500 + + Add direct-write optimization back + + Introduce a new ITER_LOCALIZED_ALPHA flag that indicates that the + alpha value computed is used only for the alpha channel of the output; + it doesn't affect the RGB channels. + + Then in pixman-bits-image.c, if a destination is either a8r8g8b8 or + x8r8g8b8 with localized alpha, the iterator will return a pointer + directly into the image. + +commit 0f1a5c4a27d34dcf4525dc38fcb48c14f653e828 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 10 15:18:48 2010 -0500 + + Get rid of the classify methods + + They are not used anymore, and the linear gradient is now doing the + optimization in a different way. + +commit b66cabb88488413c4787845c7da67901dc988ee6 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 10 15:14:24 2010 -0500 + + Linear: Optimize for horizontal gradients + + If the gradient is horizontal, we can reuse the same scanline over and + over. Add support for this optimization to + _pixman_linear_gradient_iter_init(). + +commit cf14189c6993e42ae71977a4a4061417941ffee8 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 10 14:59:20 2010 -0500 + + Consolidate the various get_scanline_32() into get_scanline_narrow() + + The separate get_scanline_32() functions in solid, linear, radial and + conical images are no longer necessary because all access to these + images now go through iterators. + +commit 0a6360a7ee0983dd52d368f5352d8c313fb0570b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 10 14:44:22 2010 -0500 + + Allow NULL property_changed function + + Initialize the field to NULL, and then delete the empty functions from + the solid, linear, radial, and conical images. + +commit 34b5633105e5e2838ac8deb32d26e3bbe73a3d1a +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 10 14:39:01 2010 -0500 + + Move get_scanline_32/64 to the bits part of the image struct + + At this point these functions are basically a cache that the bits + image uses for its fetchers, so they can be moved to the bits image. + + With the scanline getters only being initialized in the bits image, + the _pixman_image_get_scanline_generic_64 can be moved to + pixman-bits-image.c. That gets rid of the final user of + _pixman_image_get_scanline_32/64, so these can be deleted. + +commit d6b13f99b41eac535d961b89d4b53f616c910c1e +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 10 10:53:02 2010 -0500 + + Use an iterator in pixman_image_get_solid() + + This is a step towards getting rid of the + _pixman_image_get_scanline_32/64() functions. + +commit 51a5e949f394560b057911d46aab768f8e07bd54 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 10 13:26:53 2010 -0500 + + Virtualize iterator initialization + + Make src_iter_init() and dest_iter_init() virtual methods in the + implementation struct. This allows individual implementations to plug + in their own CPU specific scanline fetchers. + +commit 6503c6edccbc6b08ea8efe398da3265126efa896 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 10 12:40:26 2010 -0500 + + Move iterator initialization to the respective image files + + Instead of calling _pixman_image_get_scanline_32/64(), move the + iterator initialization into the respecive image implementations and + call the scanline generators directly. + +commit 23c6e1d2c007cc661b31e1bcdfd84604d7a9a560 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 10 12:31:29 2010 -0500 + + Eliminate the _pixman_image_store_scanline_32/64 functions + + They were only called from next_line_write_narrow/wide, so they could + simply be absorbed into those functions. + +commit b2c9eaa5020d08cfaac6c2296895e5a65c971ffd +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 10 12:19:50 2010 -0500 + + Move initialization of iterators for bits images to pixman-bits-image.c + + pixman_iter_t is now defined in pixman-private.h, and iterators for + bits images are being initialized in pixman-bits-image.c + +commit 15b1645c7b96498788c9376e3bb7d8a5e7b4e584 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Dec 10 11:30:27 2010 -0500 + + Add iterators in the general implementation + + We add a new structure called a pixman_iter_t that encapsulates the + information required to read scanlines from an image. It contains two + functions, get_scanline() and write_back(). The get_scanline() + function will generate pixels for the current scanline. For iterators + for source images, it will also advance to the next scanline. The + write_back() function is only called for destination images. Its + function is to write back the modified pixels to the image and then + advance to the next scanline. + + When an iterator is initialized, it is passed this information: + + - The image to iterate + + - The rectangle to be iterated + + - A buffer that the iterator may (but is not required to) use. This + buffer is guaranteed to have space for at least width pixels. + + - A flag indicating whether a8r8g8b8 or a16r16g16b16 pixels should + be fetched + + There are a number of (eventual) benefits to the iterators: + + - The initialization of the iterator can be virtualized such that + implementations can plug in their own CPU specific get_scanline() + and write_back() functions. + + - If an image is horizontal, it can simply plug in an appropriate + get_scanline(). This way we can get rid of the annoying + classify() virtual function. + + - In general, iterators can remember what they did on the last + scanline, so for example a REPEAT_NONE image might reuse the same + data for all the empty scanlines generated by the zero-extension. + + - More detailed information can be passed to iterator, allowing + more specialized fetchers to be used. + + - We can fix the bug where destination filters and transformations + are not currently being ignored as they should be. + + However, this initial implementation is not optimized at all. We lose + several existing optimizations: + + - The ability to composite directly in the destination + - The ability to only fetch one scanline for horizontal images + - The ability to avoid fetching the src and mask for the CLEAR + operator + + Later patches will re-introduce these optimizations. + +commit 255d624e508e29b452e567c249ac75ae8d8e2abe +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Jan 11 14:36:24 2011 +0200 + + ARM: do /proc/self/auxv based cpu features detection only in linux + + This method is linux specific, but earlier it was tried for any platform + that did not have _MSC_VER macro defined. + +commit 2bbd553bd21dcc1b199eb11ec6cb78a5b9769d49 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Sep 13 04:21:33 2010 +0300 + + A new configure option --enable-static-testprogs + + This option can be used for building fully static binaries of the test + programs so that they can be easily run using qemu-user. With binfmt-misc + configured, 'make check' works fine for crosscompiled pixman builds. + +commit 55bbccf84e475b2e3c4536606cd08c946c041fd0 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Jan 10 18:29:33 2011 +0200 + + Make 'fast_composite_scaled_nearest_*' less suspicious + + Taking address of a variable and then using it as an array looks suspicious + to static code analyzers. So change it into an array with 1 element to make + them happy. Both old and new variants of this code are correct because 'vx' + and 'unit_x' arguments are set to 0 and it means that the called scanline + function can only access a single element of 'zero' buffer. + +commit ae70b38d40a587e29dc5e0dfe6250693598beca7 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Jan 10 18:09:16 2011 +0200 + + Bugfix for a corner case in 'pixman_transform_is_inverse' + + When 'pixman_transform_multiply' fails, the result of multiplication just + could not have been identity matrix (one of the values in the resulting + matrix can't be represented as 16.16 fixed point value). So it is safe + to return FALSE. + +commit ab3809f4da0d833944363c5c039c3a2e6a8389c5 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Jan 4 13:42:29 2011 +0200 + + Workaround for a preprocessor issue in old Sun Studio + + Patch from Peter O'Gorman with some modifications + + https://bugs.freedesktop.org//show_bug.cgi?id=32764 + +commit f5c0a60ac8c32ac37aaf58f67048585af58f3141 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Jan 4 08:41:02 2011 +0200 + + Fix for "syntax error: empty declaration" Solaris Studio warnings + +commit c71e24c9fc312cf0b8ec56d2e657efe79d062d2f +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Jan 4 08:18:38 2011 +0200 + + Revert "Fix "syntax error: empty declaration" warnings." + + This reverts commit b924bb1f8191cc7c386d8211d9822aeeaadcab44. + + There is a better fix for these Solaris Studio warnings. + +commit 29439bd7724031504e965ffe5b366baaeeae07d8 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Tue Nov 23 11:37:54 2010 +0100 + + Improve handling of tangent circles + + When b is 0, avoid the division by zero and just return transparent + black. + + When the solution t would have an invalid radius (negative or outside + [0,1] for none-extended gradients), return transparent black. + +commit a484a9c49c98dfad0d74af4440039f61bef24d48 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Dec 20 16:11:48 2010 -0500 + + sse2: Skip src pixels that are zero in sse2_composite_over_8888_n_8888() + + This is a big speed-up in the SVG helicopter game: + + http://ie.microsoft.com/testdrive/Performance/Helicopter/Default.xhtml + + when rendered by Firefox 4 since it is compositing big images + consisting almost entirely of zeros. + +commit 2610323545cb5ee3dff0b7d7da505a1cd1e01b73 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Dec 18 06:06:39 2010 -0500 + + Fix divide-by-zero in set_lum(). + + When (l - min) or (max - l) are zero, simply set all the channels to + the limit, 0 in the case of (l - min), and a in the case of (max - l). + +commit 3479050216a65e3ef6e966a8e801415145261216 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Dec 18 06:05:52 2010 -0500 + + Add a test compositing with the various PDF operators. + + The test has floating point exceptions enabled, and currently fails + with a divide-by-zero. + +commit 45a2d010773d05666e87b7a6502e8fcb25add4eb +Author: Cyril Brulebois <kibi@debian.org> +Date: Sun Dec 19 19:37:26 2010 +0100 + + Fix linking issues when HAVE_FEENABLEEXCEPT is set. + + All objects using test/util.c fail to link: + | CCLD region-test + | /usr/bin/ld: utils.o: in function enable_fp_exceptions:utils.c(.text+0x939): error: undefined reference to 'feenableexcept' + + There's indeed no explicit dependency on -lm, and if HAVE_FEENABLEEXCEPT + happens to be set, test/util.c uses feenableexcept(), which is nowhere + to be found while linking. + + Fix this by adding -lm to TEST_LDADD, although two alternatives could be + thought of: + - Only specifying -lm for objects using util.c. + - Introducing a conditional to add -lm only when configure detects + have_feenableexcept=yes. + + Signed-off-by: Cyril Brulebois <kibi@debian.org> + +commit 303de045ff21bd5c9cb756d50a41fe4cb8bc97b8 +Author: Jon TURNEY <jon.turney@dronecode.org.uk> +Date: Sat Dec 18 18:32:39 2010 +0000 + + Remove stray #include <fenv.h> + + Remove a stray #include <fenv.h> added in commit 2444b2265abeaf6dcf3df1763bc2711684e63bb8 + to fix compilation on platforms which don't have fenv.h + + Signed-off-by: Jon TURNEY <jon.turney@dronecode.org.uk> + +commit f914cf448630d4ba4af6603b827c621ae6705387 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Aug 23 21:55:02 2010 -0400 + + Add a stress-test program. + + This test program tries to use as many rarely-used features as + possible, including alpha maps, accessor functions, oddly-sized + images, strange transformations, conical gradients, etc. + + The hope is to provoke crashes or irregular behavior in pixman. + +commit 7d7b03c0911584f687a7fd57a3f5d5eed21080e0 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Oct 12 10:56:26 2010 -0400 + + Make the argument to fence_malloc() an int64_t + + That way we can detect if someone attempts to allocate a negative size + and abort instead of just returning NULL and segfaulting later. + +commit d41522113ec84e74f7915599fd7624f842be8862 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Aug 29 18:02:02 2010 -0400 + + test/utils.c: Initialize palette->rgba to 0. + + That way it can be used with palettes that are not statically + allocated, without causing valgrind issues. + +commit 337f0bff0d8965cb702175e0eedbf418b1e7f0b5 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Aug 23 21:02:02 2010 -0400 + + test: Move palette initialization to utils.[ch] + +commit 2444b2265abeaf6dcf3df1763bc2711684e63bb8 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Oct 20 13:12:37 2010 -0400 + + Extend gradient-crash-test + + Test the gradients with various transformations, and test cases where + the gradients are specified with two identical points. + +commit de2e51dacb1ccd312c0461088b942ef4e93e2731 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Oct 20 13:53:07 2010 -0400 + + Add enable_fp_exceptions() function in utils.[ch] + + This function enables floating point traps if possible. + +commit a2afcc9ba4ed5a2843fd133ca23704960846185b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Aug 23 20:56:11 2010 -0400 + + test: Make composite test use some existing macros instead of defining its own + + Also move the ARRAY_LENGTH macro into utils.h so it can be used elsewhere. + +commit 4d8d2fa47e457e3c8a5ab956b52cff4785aa45c3 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Dec 17 15:29:58 2010 +0200 + + COPYING: added Nokia to the list of copyright holders + +commit 3d094997b1820719d15cec7dc633ed37e1912bfc +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Nov 30 00:31:06 2010 +0200 + + Fix for potential unaligned memory accesses + + The temporary scanline buffer allocated on stack was declared + as uint8_t array. As a result, the compiler was free to select + any arbitrary alignment for it (even though there is typically + no reason to use really weird alignments here and the stack is + normally at least 4 bytes aligned on most platforms). Having + improper alignment is non-portable and can impact performance + or even make the code misbehave depending on the target platform. + + Using uint64_t type for this array should ensure that any possible + memory accesses done by pixman code are going to be handled correctly + (pixman-combine64.c can access this buffer via uint64_t * pointer). + + Some alignment related problem was reported in: + http://lists.freedesktop.org/archives/pixman/2010-November/000747.html + +commit 985e59a82fa5e644cb6516dc174ab3f79f1448df +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Nov 25 02:28:29 2010 +0200 + + ARM: added 'neon_src_rpixbuf_8888' fast path + + With this optimization added, pixman assisted conversion from + non-premultiplied to premultiplied alpha format is now fully + NEON optimized (both with and without R/B color components + swapping in the process). + +commit 733f68912f4a44c24ad3973049a7e1d98f4c6ea8 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Nov 29 09:11:29 2010 +0200 + + ARM: added 'neon_composite_in_n_8' fast path + +commit af7a69d90ea2b43a4e850870727723d719f09a1c +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Nov 29 09:00:46 2010 +0200 + + ARM: added flags parameter to some asm fast path wrapper macros + + Not all types of operations can be skipped when having transparent + solid source or transparent solid mask. Add an extra flags parameter + for providing this information to the wrappers. + +commit f6843e3797eea7e4aed7614b1086f5cefc06c0f9 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Nov 29 03:31:32 2010 +0200 + + ARM: added 'neon_composite_add_8888_n_8888' fast path + +commit b066b520dfaf0a9f4d1bc9a73c789091e9ce7cc8 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Nov 29 02:38:52 2010 +0200 + + ARM: added 'neon_composite_add_n_8_8888' fast path + +commit 1fba7790367d7b726d05a33bbbcebe10b9280a31 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Nov 29 02:10:22 2010 +0200 + + ARM: better NEON instructions scheduling for add_8888_8888_8888 + + Provides a minor performance improvement by using pipelining and hiding + instructions latencies. Also do not clobber d0-d3 registers (source + image pixels) while doing calculations in order to allow the use of + the same macro for add_n_8_8888 fast path later. + + Benchmark from ARM Cortex-A8 @500MHz: + + == before == + + add_8888_8888_8888 = L1: 95.94 L2: 42.27 M: 25.60 (121.09%) + HT: 14.54 VT: 13.13 R: 12.77 RT: 4.49 (48Kops/s) + add_8888_8_8888 = L1: 104.51 L2: 57.81 M: 36.06 (106.62%) + HT: 19.24 VT: 16.45 R: 14.71 RT: 4.80 (51Kops/s) + + == after == + + add_8888_8888_8888 = L1: 106.66 L2: 47.82 M: 27.32 (129.30%) + HT: 15.44 VT: 13.96 R: 12.86 RT: 4.48 (48Kops/s) + add_8888_8_8888 = L1: 107.72 L2: 61.02 M: 38.26 (113.16%) + HT: 19.48 VT: 16.72 R: 14.82 RT: 4.80 (51Kops/s) + +commit c3f48b6aa2f9354af02ffc8c938ec6753fdcbde3 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sun Nov 28 22:05:53 2010 +0200 + + ARM: added 'neon_composite_add_8888_8_8888' fast path + +commit 6d2f7f981b52b41f4321071c325babcf792bd666 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sat Nov 27 15:53:54 2010 +0200 + + ARM: added 'neon_composite_over_0565_n_0565' fast path + +commit 3990931bf6197eff1cec06cf24bce53ddf9a539a +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sat Nov 27 04:47:39 2010 +0200 + + ARM: reuse common NEON code for over_{n_8|8888_n|8888_8}_0565 + + Renamed suppementary macros from 'over_n_8_0565' to 'over_8888_8_0565', + because they can actually support all variants of this operation: + over_8888_8_0565/over_n_8_0565/over_8888_n_0565. + + Also 'over_8888_8_0565' now uses more optimized common code instead of its + own variant, improving performance a bit. Even though this operation is + still memory bandwidth limited, scaled variants of these fast paths may + put more stress on CPU later. + + Benchmarked on ARM Cortex-A8 @500MHz: + + == before == + + over_8888_8_0565 = L1: 67.10 L2: 53.82 M: 44.70 (105.17%) + HT: 18.73 VT: 16.91 R: 14.25 RT: 4.80 (52Kops/s) + + == after == + + over_8888_8_0565 = L1: 77.83 L2: 58.14 M: 44.82 (105.52%) + HT: 20.58 VT: 17.44 R: 15.05 RT: 4.88 (52Kops/s) + +commit a7c36681c0c1955ff9110b81f1789e56abb10a95 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sat Nov 27 03:53:12 2010 +0200 + + ARM: added 'neon_composite_over_8888_n_0565' fast path + +commit e6814837a6ccd3e4db329e0131eaf2055d2c864b +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Nov 26 17:06:58 2010 +0200 + + ARM: better NEON instructions scheduling for over_n_8_0565 + + Code rearranged to get better instructions scheduling for ARM Cortex-A8/A9. + Now it is ~30% faster for the pixel data in L1 cache and makes better use + of memory bandwidth when running at lower clock frequencies (ex. 500MHz). + Also register d24 (pixels from the mask image) is now not clobbered by + supplementary macros, which allows to reuse them for the other variants + of compositing operations later. + + Benchmark from ARM Cortex-A8 @500MHz: + + == before == + + over_n_8_0565 = L1: 63.90 L2: 63.15 M: 60.97 ( 73.53%) + HT: 28.89 VT: 24.14 R: 21.33 RT: 6.78 ( 67Kops/s) + + == after == + + over_n_8_0565 = L1: 82.64 L2: 75.19 M: 71.52 ( 84.14%) + HT: 30.49 VT: 25.56 R: 22.36 RT: 6.89 ( 68Kops/s) + +commit 3be86a92ccab240859062a541cdb871d81c9501a +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sun Nov 28 21:45:06 2010 +0200 + + ARM: introduced 'fetch_mask_pixblock' macro to simplify code + + This macro hides the implementation details of pixels fetching + for the mask image just like 'fetch_src_pixblock' does for the + source image. This provides more possibilities for reusing the + same code blocks in different compositing functions. + + This patch does not introduce any functional changes and the + resulting code in the compiled object file is exactly the same. + +commit 98d08b37f17a3379d0ceff8bb7de8f943873fbd8 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Nov 26 08:55:49 2010 +0200 + + ARM: added 'neon_composite_over_n_8_8' fast path + +commit 4b5b5a2a832cd67f2a0ec231f75a2825b45571fa +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Nov 15 18:26:43 2010 +0200 + + C fast path for a1 fill operation + + Can be used as one of the solutions to fix bug + https://bugs.freedesktop.org/show_bug.cgi?id=31604 + +commit 654961efe405ad1a7e54a77548ca8af322ecc1f8 +Author: Alan Coopersmith <alan.coopersmith@oracle.com> +Date: Sun Nov 21 11:42:22 2010 -0800 + + Sun's copyrights belong to Oracle now + + Signed-off-by: Alan Coopersmith <alan.coopersmith@oracle.com> + +commit e7ee43c39d2370716a4d011afa8f5067eced9899 +Author: Cyril Brulebois <kibi@debian.org> +Date: Wed Nov 17 16:16:56 2010 +0100 + + Fix argument quoting for AC_INIT. + + One gets rid of this accordingly: + | autoreconf -vfi + | autoreconf: Entering directory `.' + | autoreconf: configure.ac: not using Gettext + | autoreconf: running: aclocal --force + | configure.ac:61: warning: AC_INIT: not a literal: "pixman@lists.freedesktop.org" + | autoreconf: configure.ac: tracing + | configure.ac:61: warning: AC_INIT: not a literal: "pixman@lists.freedesktop.org" + + Signed-off-by: Cyril Brulebois <kibi@debian.org> + +commit c59db8af66510e8e0a852e5775cff46f7476c71c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Nov 16 17:14:47 2010 -0500 + + Post-release version bump to 0.21.3 + +commit 4646c238589986499834b28ed903c366b5ba15ed +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Nov 16 16:43:26 2010 -0500 + + Pre-release version bump + +commit 536cf4dd3bd144ad1c65fc05f4883a31247a0f5d +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Nov 2 23:38:10 2010 -0400 + + Generate {a,x}8r8g8b8, a8, 565 fetchers for nearest/affine images + + There are versions for all combinations of x8r8g8b8/a8r8g8b8 and + pad/repeat/none/normal repeat modes. The bulk of each function is an + inline function that takes a format and a repeat mode as parameters. + +commit da0176e8534e5b027818f6b695343d3e04130a93 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Tue Nov 2 17:04:35 2010 +0100 + + Improve conical gradients opacity check + + Conical gradients are completely opaque if all of their stops are + opaque and the repeat mode is not 'none'. + +commit 151f2554fc9c098ff86b0fdc0d785aa3ff496328 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Tue Nov 2 17:02:01 2010 +0100 + + Fix opacity check + + Radial gradients are "conical", thus they can have some non-opaque + parts even if all of their stops are completely opaque. + + To guarantee that a radial gradient is actually opaque, it needs to + also have one of the two circles containing the other one. In this + case when extrapolating, the whole plane is completely covered (as + explained in the comment in pixman-radial-gradient.c). + +commit 19ed415b74521ad5dcc7b6e3ed4bb644711c7bef +Author: Andrea Canciani <ranma42@gmail.com> +Date: Sun Oct 31 16:59:45 2010 +0100 + + Remove unused stop_range field + +commit d8fe87a6262ee661af8fb0d46bab223e4ab3d88e +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Oct 4 01:56:59 2010 +0300 + + ARM: optimization for scaled src_0565_0565 with nearest filter + + The performance improvement is only in the ballpark of 5% when + compared against C code built with a reasonably good compiler + (gcc 4.5.1). But gcc 4.4 produces approximately 30% slower code + here, so assembly optimization makes sense to avoid dependency + on the compiler quality and/or optimization options. + + Benchmark from ARM11: + == before == + op=1, src_fmt=10020565, dst_fmt=10020565, speed=34.86 MPix/s + + == after == + op=1, src_fmt=10020565, dst_fmt=10020565, speed=36.62 MPix/s + + Benchmark from ARM Cortex-A8: + == before == + op=1, src_fmt=10020565, dst_fmt=10020565, speed=89.55 MPix/s + + == after == + op=1, src_fmt=10020565, dst_fmt=10020565, speed=94.91 MPix/s + +commit b8007d042354fd9bd15711d9921e6f1ebb1c3c22 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Nov 2 16:12:42 2010 +0200 + + ARM: NEON optimization for scaled src_0565_8888 with nearest filter + + Benchmark from ARM Cortex-A8 @720MHz: + == before == + op=1, src_fmt=10020565, dst_fmt=20028888, speed=8.99 MPix/s + + == after == + op=1, src_fmt=10020565, dst_fmt=20028888, speed=76.98 MPix/s + + == unscaled == + op=1, src_fmt=10020565, dst_fmt=20028888, speed=137.78 MPix/s + +commit 2e855a2b4a2bb7b3d2ed1826cb4426d14080ca67 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Nov 2 15:25:51 2010 +0200 + + ARM: NEON optimization for scaled src_8888_0565 with nearest filter + + Benchmark from ARM Cortex-A8 @720MHz: + == before == + op=1, src_fmt=20028888, dst_fmt=10020565, speed=42.51 MPix/s + + == after == + op=1, src_fmt=20028888, dst_fmt=10020565, speed=55.61 MPix/s + + == unscaled == + op=1, src_fmt=20028888, dst_fmt=10020565, speed=117.99 MPix/s + +commit 4a09e472b8fbfae3e67d05a26ecc9c8a17225053 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Nov 2 14:39:02 2010 +0200 + + ARM: NEON optimization for scaled over_8888_0565 with nearest filter + + Benchmark from ARM Cortex-A8 @720MHz: + == before == + op=3, src_fmt=20028888, dst_fmt=10020565, speed=10.29 MPix/s + + == after == + op=3, src_fmt=20028888, dst_fmt=10020565, speed=36.36 MPix/s + + == unscaled == + op=3, src_fmt=20028888, dst_fmt=10020565, speed=79.40 MPix/s + +commit 67a4991f3341d38bc3477c8f99f2ef581cd609e3 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Nov 2 14:29:57 2010 +0200 + + ARM: NEON optimization for scaled over_8888_8888 with nearest filter + + Benchmark from ARM Cortex-A8 @720MHz: + == before == + op=3, src_fmt=20028888, dst_fmt=20028888, speed=12.73 MPix/s + + == after == + op=3, src_fmt=20028888, dst_fmt=20028888, speed=28.75 MPix/s + + == unscaled == + op=3, src_fmt=20028888, dst_fmt=20028888, speed=53.03 MPix/s + +commit 0b56244ac81f2bb2402629f8720c7e22893a24df +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Nov 2 19:16:46 2010 +0200 + + ARM: performance tuning of NEON nearest scaled pixel fetcher + + Interleaving the use of NEON registers helps to avoid some stalls + in NEON pipeline and provides a small performance improvement. + +commit 6e76af0d4b60ab74b309994926f28c532c5af155 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Nov 2 14:26:13 2010 +0200 + + ARM: macro template in C code to simplify using scaled fast paths + + This template can be used to instantiate scaled fast path functions + by providing main loop code and calling NEON assembly optimized + scanline processing functions from it. Another macro can be used + to simplify adding entries to fast path tables. + +commit 88014a0e6ffaa22b3ac363c2c73b72530cdba0cc +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Nov 1 10:03:59 2010 +0200 + + ARM: nearest scaling support for NEON scanline compositing functions + + Now it is possible to generate scanline processing functions + for the case when the source image is scaled with NEAREST filter. + + Only 16bpp and 32bpp pixel formats are supported for now. But the + others can be also added later when needed. All the existing NEON + fast path functions should be quite easy to reuse for implementing + fast paths which can work with scaled source images. + +commit 324712e48cf04df3cfcfc463fb221fcdf96e020a +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Nov 1 05:10:34 2010 +0200 + + ARM: NEON: source image pixel fetcher can be overrided now + + Added a special macro 'pixld_src' which is now responsible for fetching + pixels from the source image. Right now it just passes all its arguments + directly to 'pixld' macro, but it can be used in the future to provide + a special pixel fetcher for implementing nearest scaling. + + The 'pixld_src' has a lot of arguments which define its behavior. But + for each particular fast path implementation, we already know NEON + registers allocation and how many pixels are processed in a single block. + That's why a higher level macro 'fetch_src_pixblock' is also introduced + (it's easier to use because it has no arguments) and used everywhere + in 'pixman-arm-neon-asm.S' instead of VLD instructions. + + This patch does not introduce any functional changes and the resulting code + in the compiled object file is exactly the same. + +commit cb3f1830257a56f56abf7d50a8b34e215c616aec +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Nov 2 22:53:55 2010 +0200 + + ARM: fix 'vld1.8'->'vld1.32' typo in add_8888_8888 NEON fast path + + This was mostly harmless and had no effect on little endian systems. + But wrong vector element size is at least inconsistent and also + can theoretically cause problems on big endian ARM systems. + +commit fed4a2fde540916fc182917762b85b38052c04de +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Sep 24 16:36:16 2010 +0300 + + Do CPU features detection from 'constructor' function when compiled with gcc + + There is attribute 'constructor' supported since gcc 2.7 which allows + to have a constructor function for library initialization. This eliminates + an extra branch for each composite operation and also helps to avoid + complains from race condition detection tools like helgrind. + + The other compilers may or may not support this attribute properly. + Ideally, the compilers should fail to compile the code with unknown + attribute, so the configure check should do the right job. But in + reality the problems are surely possible. Fortunately such problems + should be quite easy to find because NULL pointer dereference should + happen almost immediately if the constructor fails to run. + + clang 2.7: + supports __attribute__((constructor)) properly and pretends to be gcc + + tcc 0.9.25: + ignores __attribute__((constructor)), but does not pretend to be gcc + +commit 99699771cd82e108fbace655bf44013bdccde3bf +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Oct 31 01:40:57 2010 -0400 + + Delete the source_image_t struct. + + It serves no purpose anymore now that the source_class_t field is gone. + +commit f405b4079872ecc312f9514fdadc5287e8f20b08 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Oct 30 17:20:22 2010 -0400 + + [mmx] Mark some of the output variables as early-clobber. + + GCC assumes that input variables in inline assembly are fully consumed + before any output variable is written. This means it may allocate the + variables in the same register unless the output variables are marked + as early-clobber. + + From Jeremy Huddleston: + + I noticed a problem building pixman with clang and reported it to + the clang developers. They responded back with a comment about + the inline asm in pixman-mmx.c and suggested a fix: + + """ + Incidentally, Jeremy, in the asm that reads + __asm__ ( + "movq %7, %0\n" + "movq %7, %1\n" + "movq %7, %2\n" + "movq %7, %3\n" + "movq %7, %4\n" + "movq %7, %5\n" + "movq %7, %6\n" + : "=y" (v1), "=y" (v2), "=y" (v3), + "=y" (v4), "=y" (v5), "=y" (v6), "=y" (v7) + : "y" (vfill)); + + all the output operands except the last one should be marked as + earlyclobber ("=&y"). This is working by accident with gcc. + """ + + Cc: jeremyhu@apple.com + Reviewed-by: Matt Turner <mattst88@gmail.com> + +commit 9c19a85b0037d48fdd180a2c59ef05bdc4f46680 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Oct 28 20:14:03 2010 -0400 + + Remove workaround for a bug in the 1.6 X server. + + There used to be a bug in the X server where it would rely on + out-of-bounds accesses when it was asked to composite with a + window as the source. It would create a pixman image pointing + to some bogus position in memory, but then set a clip region + to the position where the actual bits were. + + Due to a bug in old versions of pixman, where it would not clip + against the image bounds when a clip region was set, this would + actually work. So when the pixman bug was fixed, a workaround was + added to allow certain out-of-bound accesses. + + However, the 1.6 X server is so old now that we can remove this + workaround. This does mean that if you update pixman to 0.22 or later, + you will need to use a 1.7 X server or later. + +commit 56748ea9a698daec8f445d2bebbbaed5515380af +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sat Oct 30 15:51:30 2010 +0300 + + Fixed broken configure check for __thread support + + Somehow the patch from [1] was not applied correctly, fixing that. + + 1. http://lists.cairographics.org/archives/cairo/2010-September/020826.html + +commit ecc3612995d5d699a3dd49016a7e9ed40f0a4564 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Nov 1 17:52:29 2010 -0400 + + COPYING: Stop saying that a modification is currently under discussion. + + Also put the copyright text into a C comment for easier cut and paste. + +commit c993cd9614a47657228e3125bdcedc0bd0e34164 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Oct 27 17:21:06 2010 -0400 + + Version bump 0.21.1. + + The previous bump to 0.20.1 was a mistake; it belongs on the 0.20 branch. + +commit d890b684f68905ea5e242360f20e2a70251c89fd +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Oct 27 16:58:29 2010 -0400 + + Post-release version bump to 0.20.1 + +commit c5e048d46c32c43172fb8d1c067e82587f916953 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Oct 27 16:51:40 2010 -0400 + + Pre-release version bump to 0.20.0 + +commit 6a6d9758af478e9f5eae48ccf15f1cbea2cf30ed +Author: Scott McCreary <scottmc2@gmail.com> +Date: Wed Oct 27 12:31:27 2010 -0700 + + Added check to find pthread on Haiku. + +commit 00fdb3d8e8d5c04d01c352315b6a8e2e2dfe53ae +Author: Jon TURNEY <jon.turney@dronecode.org.uk> +Date: Sun Oct 24 15:58:39 2010 +0100 + + Plug another leak in alphamap test + + Even after commit e46be417cebac984a858da05e61d924889695c9e alphamap + test is still leaking the alphamap pixmap, leading to mmap() failures + on cygwin + + Signed-off-by: Jon TURNEY <jon.turney@dronecode.org.uk> + +commit 1c23142efa056124c594c72022e7f383e839d3b1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Oct 20 16:31:57 2010 -0400 + + Post-release version bump to 0.19.7 + +commit d1051340155a099a523e71377b1d889eec8b972e +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Oct 20 16:25:55 2010 -0400 + + Pre-release version bump to 0.19.6 + +commit a966cd04c16ad0c34b0f17e9021a4f3532575ca4 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Tue Oct 12 15:38:20 2010 +0200 + + Fix an overflow in the new radial gradient code + + huge-radial in the cairo test suite pointed out an undocumented + overflow in the radial gradient code. + By casting to pixman_fixed_48_16_t before doing the operations, + the overflow can be avoided. + +commit 70658f0a6bd451a21fbb43df7865a7dac95abe24 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Oct 20 16:09:44 2010 -0400 + + Remove the class field from source_image_t + + The linear gradient was the only image type that relied on the class + being stored in the image struct itself. With the previous changes, it + doesn't need that anymore, so we can delete the field. + +commit 741c30d9d9cf445fa2e3a2c43d37c221d49831b4 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Wed Oct 20 21:24:32 2010 +0200 + + Remove unused enum value + + The new linear gradient code doesn't use SOURCE_IMAGE_CLASS_VERTICAL + anymore and it was not used anywhere else. + +commit 9b72fd1b857494ea928795c89a4f827e56fe26d3 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Mon Oct 18 22:21:52 2010 +0200 + + Make classification consistent with rasterization + + Use the same computations to classify the gradient and to + rasterize it. + This improves the correctness of the classification by + avoiding integer division. + +commit 1d4f2d71facd5f2bbce74fbe3407ccea6cf4bea1 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Wed Aug 11 09:58:05 2010 +0200 + + Improve precision of linear gradients + + Integer division (without keeping the remainder) can discard a lot + of information. Doing the division maths in floating point (and + paying attention to error propagation) allows to greatly improve + the precision of linear gradients. + +commit f6ab20ca6604739b82311fc078d6ce850f43adc0 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Tue Oct 12 09:52:53 2010 +0200 + + Add comments about errors + + Explain how errors are introduced in the computation performed for + radial gradients. + +commit 1ca715ed1e6914e9bd9f050065e827d7a9e2efc9 +Author: Andrea Canciani <ranma42@gmail.com> +Date: Sun Aug 15 09:07:33 2010 +0200 + + Draw radial gradients with PDF semantics + + Change radial gradient computations and definition to reflect the + radial gradients in PDF specifications (see section 8.7.4.5.4, + Type 3 (Radial) Shadings of the PDF Reference Manual). + + Instead of having a valid interpolation parameter value for every + point of the plane, define it only for points withing the area + covered by the family of circles generated by interpolating or + extrapolating the start and end circles. + + Points outside this area are now transparent black (rgba 0 0 0 0). + Points within this area have the color assiciated with the maximum + value of the interpolation parameter in that point (if multiple + solutions exist within the range specified by the extend mode). + +commit e46be417cebac984a858da05e61d924889695c9e +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Oct 8 07:44:20 2010 -0400 + + Plug leak in the alphamap test. + + The images are being created with non-NULL data, so we have to free it + outselves. This is important because the Cygwin tinderbox is running + out of memory and produces this: + + mmap failed on 20000 1507328 + mmap failed on 40000 1507328 + mmap failed on 20000 1507328 + mmap failed on 40000 1507328 + mmap failed on 40000 1507328 + mmap failed on 40000 1507328 + + http://tinderbox.x.org/builds/2010-10-05-0014/logs/pixman/#check + +commit 6ed7164de5f74b752d85834b53e89810f1d0a560 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Oct 6 02:40:39 2010 -0400 + + Add no-op combiners for DST and the CA versions of the HSL operators. + + We already exit early for DST, but for the HSL operators with + component alpha, we crash at the moment. Fix that by adding a dummy + combine_dst() function. + +commit 233b27257b63ecd502c6392e5ef3a7f736f14365 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Oct 5 11:05:25 2010 -0400 + + test: Add some more colors to the color table in composite.c + + Specifically, add transparent black and superluminescent white with + alpha = 0. + +commit 3f7da59352b604bd6974230d0b149e8e7da77b5c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Oct 5 09:49:45 2010 -0400 + + test: Parallize composite.c with OpenMP + + Each test uses the test number as the random number seed; if it + didn't, all the threads would run the same tests since they would all + start from the same seed. + +commit a10ccc9f303ca6b4577afe68cc6b2d8840de5a27 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Mar 7 11:26:16 2010 -0500 + + test: Change composite so that it tests randomly generated images + + Previously this test would try to exhaustively test all combinations + of formats and operators, which meant that it would take hours to run. + Instead, generate images randomly and test compositing those. + + Cc: chris@chris-wilson.co.uk + +commit 55e4065cbbc5ffe2ce1986b51ef63e8a0b50fccb +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Mar 7 11:24:30 2010 -0500 + + test: Fix eval_diff() so that it provides useful error values. + + Previously, this function would evaluate the error under the + assumption that the format was 565 or wider. This patch changes it to + take the actual format into account. + + With that fixed, we can turn on testing for the rest of the formats. + + Cc: chris@chris-wilson.co.uk + +commit fe411cf2ac4d5b26a319b906dee87e0cc69d2ad6 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Mar 7 10:31:04 2010 -0500 + + test: Fix bug in color_correct() in composite.c + + This function was using the number of bits in a channel as if it were + a mask, which lead to many spurious errors. With that fixed, we can + turn on testing for all formats where all channels have 5 or more + bits. + + Cc: chris@chris-wilson.co.uk + +commit 4e89a5b7f3b039fcc86dff7fb8bec79884c913e8 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Oct 5 11:08:42 2010 -0400 + + Remove broken optimizations in combine_disjoint_over_u() + + The first broken optimization is that it checks "a != 0x00" where it + should check "s != 0x00". The other is that it skips the computation + when alpha is 0xff. That is wrong because in the formula: + + min (1, (1 - Aa)/Ab) + + the render specification states that if Ab is 0, the quotient is + defined to positive infinity. That is the case even if (1 - Aa) is 0. + +commit 8d76c1b3391e1165aaf9e0f331749aee1394f62c +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Oct 4 04:49:08 2010 +0300 + + ARM: restore fallback to ARMv6 implementation from NEON in the delegate chain + + After fast path cache introduction, the overhead of having this fallback is + insignificant. On the other hand, some of the ARM assembly optimizations (for + example nearest neighbor scaling) do not need NEON. + +commit c748650d700c2f18f1587f06ada3b58d6ddc18d3 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Sep 8 09:30:23 2010 +0300 + + Use more unrolling for scaled src_0565_0565 with nearest filter + + Benchmark from Intel Core i7 860: + + == before == + op=1, src_fmt=10020565, dst_fmt=10020565, speed=1335.29 MPix/s + + == after == + op=1, src_fmt=10020565, dst_fmt=10020565, speed=1550.96 MPix/s + + == performance of nonscaled src_0565_0565 operation as a reference == + op=1, src_fmt=10020565, dst_fmt=10020565, speed=2401.31 MPix/s + + Benchmark from ARM Cortex-A8: + + == before == + op=1, src_fmt=10020565, dst_fmt=10020565, speed=81.79 MPix/s + + == after == + op=1, src_fmt=10020565, dst_fmt=10020565, speed=89.55 MPix/s + + == performance of nonscaled src_0565_0565 operation as a reference == + op=1, src_fmt=10020565, dst_fmt=10020565, speed=197.44 MPix/s + +commit a520c15e1134d9e801bc2ab461a3c5ade60544f2 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Sep 23 23:41:50 2010 +0300 + + ARM: added 'neon_composite_out_reverse_8_0565' fast path + + == before == + + outrev_8_0565 = L1: 22.91 L2: 22.40 M: 18.75 ( 10.47%) + HT: 12.62 VT: 12.22 R: 11.32 RT: 5.30 ( 58Kops/s) + + == after == + + outrev_8_0565 = L1: 176.27 L2: 151.70 M:108.79 ( 60.81%) + HT: 50.43 VT: 37.16 R: 32.26 RT: 9.62 ( 97Kops/s) + +commit d8820360f79d07e03c91ecd201880bc6b1706f19 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Sep 23 22:28:55 2010 +0300 + + ARM: added 'neon_composite_add_0565_8_0565' fast path + + == before == + + add_0565_8_0565 = L1: 14.05 L2: 14.03 M: 11.57 ( 12.94%) + HT: 8.31 VT: 8.10 R: 7.47 RT: 3.64 ( 42Kops/s) + + == after == + + add_0565_8_0565 = L1: 123.36 L2: 94.70 M: 74.36 ( 83.15%) + HT: 31.17 VT: 23.97 R: 21.06 RT: 6.42 ( 70Kops/s) + +commit 2f6c7b4f9d36261d2efe494a925faf063376ba30 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri May 21 16:31:03 2010 +0300 + + ARM: NEON: added forgotten cache preload for over_n_8888/over_n_0565 + + Prefetch provides up to 40-50% better performance when working + with large images and/or when having lots of L2 cache misses + on ARM Cortex-A8 @ 720MHz: + + == before == + + over_n_8888 = L1: 225.83 L2: 181.02 M: 55.57 ( 41.41%) + HT: 38.96 VT: 36.92 R: 32.84 RT: 14.15 ( 123Kops/s) + + over_n_0565 = L1: 153.91 L2: 149.69 M: 83.17 ( 30.95%) + HT: 50.41 VT: 49.15 R: 40.56 RT: 15.45 ( 131Kops/s) + + == after == + + over_n_8888 = L1: 222.39 L2: 170.95 M: 76.86 ( 57.27%) + HT: 58.80 VT: 53.03 R: 45.51 RT: 14.13 ( 124Kops/s) + + over_n_0565 = L1: 151.87 L2: 149.54 M:125.63 ( 46.80%) + HT: 67.85 VT: 57.54 R: 50.21 RT: 15.32 ( 130Kops/s) + +commit b924bb1f8191cc7c386d8211d9822aeeaadcab44 +Author: Mika Yrjola <mika.yrjola@movial.com> +Date: Fri Oct 1 16:17:50 2010 +0300 + + Fix "syntax error: empty declaration" warnings. + + These minor changes should fix a large number of + macro declaration - related "syntax error: empty declaration" warnings + which are seen while compiling the code with the Solaris Studio + compiler. + +commit 73c1fefa1b99efa36b74599f455df9426209378e +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Sep 28 00:51:07 2010 -0400 + + Delete simple repeat code + + This was supposedly an optimization, but it has pathological cases + where it definitely isn't. For example a 1 x n image will cause it to + have terrible memory access patterns and to generate a ton of modulus + operations. + + Since no one has ever measured whether it actually is an improvement, + and since it is doing the repeating at the wrong the stage in the + pipeline, and since with the previous commit it can't be triggered + anymore because we now require SAMPLES_COVER_CLIP for regular fast + paths, just delete it. + +commit a4d1c9d3831751008db61a48d6a6cb12ed33f314 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Sep 28 00:42:25 2010 -0400 + + Fix bug in FAST_PATH_STD_FAST_PATH + + The standard fast paths deal with two kinds of images: solids and + bits. These two image types require different flags, but + PIXMAN_STD_FAST_PATH uses the same ones for both. + + This patch makes it so that solid images just get the standard flags, + while bits images must be untransformed contain the destination clip + within the sample grid. + + This means that the old FAST_PATH_COVERS_CLIP flag is now not used + anymore, so it can be deleted. + +commit 10e13135c3538f0909f27eaacc17e9e13f199a7c +Author: Dmitri Vorobiev <dmitri.vorobiev@movial.com> +Date: Tue Sep 28 14:42:02 2010 +0300 + + Some clean-ups in fence_malloc() and fence_free() + + This patch removes an unnecessary typecast of MAP_FAILED, + replaces an erroneous free() by the correct munmap() in the + error path for a failing mprotect(), and, finally, removes + redundant calls to mprotect() that aren't necessary, because + munmap() doesn't call for any specific memory protection. + +commit ba693d2e88b6f4c871d804fb62d7435915c85dfc +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Sep 28 02:52:02 2010 -0400 + + Fix search-and-replace issue in lowlevel-blt-bench.c + +commit 77d3e5f6ff719f53398b5675e5219d0e3b9746c1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Sep 17 09:21:09 2010 -0400 + + Rename all the fast paths with _8000 in their names to _8 + + This inconsistent naming somehow survived the refactoring from a while + back. + +commit ba69989374fe9cbe5151c5aac7b824da0806f94a +Author: Liu Xinyun <xinyun.liu@intel.com> +Date: Sat Sep 25 14:56:38 2010 +0800 + + Remove cache prefetch code. + + The performance is decreased with cache prefetch, especially for + ATOM. So remove these code. Following is the experiment. + + old: 0.19.5-with-cache-prefetch + new: 0.19.5-without-cache-prefetch + + CPU: Intel Atom N270@1.6GHz + OS: MeeGo (32 bits) + Speedups + ======== + image-rgba poppler-0 17125.68 (17279.58 0.92%) -> 14765.36 (15926.49 3.54%): 1.16x speedup + image-rgba ocitysmap-0 9008.25 (9040.41 7.50%) -> 8277.94 (8343.09 5.44%): 1.09x speedup + image-rgba xfce4-terminal-a1-0 18020.76 (18230.68 0.97%) -> 16703.77 (16712.42 1.22%): 1.08x speedup + image-rgba gnome-terminal-vim-0 25081.38 (25133.38 0.24%) -> 23407.47 (23652.98 0.54%): 1.07x speedup + image-rgba firefox-talos-gfx-0 57916.97 (57973.20 0.11%) -> 54556.64 (54624.55 0.39%): 1.06x speedup + image-rgba firefox-planet-gnome-0 102377.47 (103496.63 0.70%) -> 96816.65 (97075.54 0.15%): 1.06x speedup + image-rgba swfdec-giant-steps-0 12376.24 (12616.84 1.02%) -> 11705.30 (11825.20 1.06%): 1.06x speedup + + CPU: Intel Core(TM)2 Duo CPU T9600@2.80GHz + OS: Ubuntu 10.04 (64bits) + Speedups + ======== + image-rgba ocitysmap-0 2671.46 (2691.82 8.55%) -> 2296.20 (2307.26 5.77%): 1.16x speedup + image-rgba swfdec-giant-steps-0 1614.55 (1615.18 1.68%) -> 1532.84 (1538.52 0.72%): 1.05x speedup + + Signed-off-by: Liu Xinyun <xinyun.liu@intel.com> + Signed-off-by: Chen Miaobo <miaobo.chen@intel.com> + +commit 56777f3f675869806cd30bcd21a5b39d788507cb +Author: Dmitri Vorobiev <dmitri.vorobiev@movial.com> +Date: Wed Sep 22 12:34:57 2010 +0300 + + Use <sys/mman.h> macros only when they are available + + Not all systems are regular Unices, so let's be careful with the + mmap()-related stuff, which might be unavailable. This patch makes + sure that mmap() and friends is used only when the <sys/mman.h> + header is found. + +commit 39524a4687391c68f4177e8671f4b2bd39e05850 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Sep 21 14:20:43 2010 -0400 + + Revert "add enable-cache-prefetch option" + + Revert this accidentally committed patch. + + This reverts commit 19ea0e16b958e5abe491365c203293ab372f3586. + +commit e97da2104967f4c99aed40e89f3e0141ceed7040 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Sep 21 14:12:00 2010 -0400 + + If MAP_ANONYMOUS is not defined, define it to MAP_ANON. + + This hopefully fixes the build failure on OS X. + +commit 19ea0e16b958e5abe491365c203293ab372f3586 +Author: Liu Xinyun <xinyun.liu@intel.com> +Date: Wed Sep 22 00:15:10 2010 +0800 + + add enable-cache-prefetch option + + OK. here is the work to clear all cache prefetch. Please review it. 3x + + On Tue, Sep 21, 2010 at 11:36:30PM +0800, Soeren Sandmann wrote: + > Liu Xinyun <xinyun.liu@intel.com> writes: + > + > > This patch is to add a new configuration option: enable-cache-prefetch, + > > which is default yes. + > > + > > Here is a link which talks on cache issue. + > > http://lists.freedesktop.org/archives/pixman/2010-June/000218.html + > > + > > When disable it on Atom CPU(configured with --enable-cache-prefetch=no), + > > it will have a little performance gain. Here is the patch. + > + > I think the cache prefetch code should just be deleted outright. No + > benchmarks that I'm aware of show it to be an improvement. + > + > + > Thanks, + > Soren + + >From bca2192ef524bcae4eea84d0ffed9e8c4855675f Mon Sep 17 00:00:00 2001 + From: Liu Xinyun <xinyun.liu@intel.com> + Date: Wed, 22 Sep 2010 00:11:56 +0800 + Subject: [PATCH] remove cache prefetch + +commit edd173396604b052fd76971d0efa0c8db40cf1f3 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Sep 21 10:18:44 2010 -0400 + + Post-release version bump to 0.19.5 + +commit e5b3a6e7105af590d72e2ae986f9985f71cc88f5 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Sep 21 10:11:34 2010 -0400 + + Pre-release version bump to 0.19.4 + +commit 0742ba41646853a5edf90c2f3102f49b248321ee +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Sep 21 10:05:52 2010 -0400 + + compute_composite_region32: Zero extents before returning FALSE. + + If the extents of the composite region are broken such that x2 <= x1 + or y2 <= y1, then we need to zero the extents before returning so that + the region won't be completely broken when calling + pixman_region32_fini(). + +commit 7cd4f2fa201c4dc846153c022423e3dced2cfb13 +Author: Jonathan Morton <jonathan.morton@movial.com> +Date: Fri Sep 17 17:52:23 2010 +0300 + + Add a lowlevel blitter benchmark + + This test is a modified version of Siarhei's compositor throughput + benchmark. It's expanded with explicit reporting of memory bandwidth + consumption for the M-test, and with an additional 8x8-random test + intended to determine peak ops/sec capability. There are also quite a + lot more operations tested for. + +commit eab3a77877b0e850c46f95dacffb31994e6a7e41 +Author: Dmitri Vorobiev <dmitri.vorobiev@movial.com> +Date: Fri Sep 17 17:52:22 2010 +0300 + + Add noinline macro + + This patch adds a noinline macro, which expands to compiler-dependent + keywords that tell the compiler to never inline a function. + +commit cab3261c0da6e833d803a7f3ccab600adca7abe1 +Author: Dmitri Vorobiev <dmitri.vorobiev@movial.com> +Date: Fri Sep 17 17:52:21 2010 +0300 + + Add gettime() routine to test utils + + Impending benchmark code will need a function to get current time + in seconds, and this patch introduces such routine. We try to use + the POSIX gettimeofday() function when available, and fall back to + clock() when not. + +commit fd3c87d460a6d1803880d17af416cce344a086c4 +Author: Dmitri Vorobiev <dmitri.vorobiev@movial.com> +Date: Fri Sep 17 17:52:20 2010 +0300 + + Move aligned_malloc() to utils + + The aligned_malloc() routine will be used in more than one test utility. + At least, a low-level blitter benchmark needs it. Therefore, let's make + this function a part of common test utilities code. + +commit f474783607e51183d31814972d0f055907876079 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Sep 16 10:33:23 2010 -0400 + + Enable bits_image_fetch_bilinear_affine_normal_r5g6b5 + +commit 91521d30ab9b033a35fb7797e4566d575ad1c1dc +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Sep 16 10:33:10 2010 -0400 + + Enable bits_image_fetch_bilinear_affine_reflect_r5g6b5 + +commit 372d7b954aee4f3a2ad94ed8484a2b4084db0c7c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Sep 16 10:33:00 2010 -0400 + + Enable bits_image_fetch_bilinear_affine_none_r5g6b5 + +commit a826ae0e3a0279557e892856ef1333971b105d01 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Sep 16 10:32:44 2010 -0400 + + Enable bits_image_fetch_bilinear_affine_pad_r5g6b5 + +commit c5238bd1809433af5b0efc3add23c1ccb4da884c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Sep 16 10:32:27 2010 -0400 + + Enable bits_image_fetch_bilinear_affine_normal_a8 + +commit d12daefcdb8845e539309df46b08916829a86d9c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Sep 16 10:32:12 2010 -0400 + + Enable bits_image_fetch_bilinear_affine_reflect_a8 + +commit 9388be32932898ed424c8916a57a6201f995416b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Sep 16 10:31:57 2010 -0400 + + Enable bits_image_fetch_bilinear_affine_none_a8 + +commit 8e4d4e8d110c379cb85f53752660c6b2fab33d5e +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Sep 16 10:31:45 2010 -0400 + + Enable bits_image_fetch_bilinear_affine_pad_a8 + +commit ce1f6c50b4ddf8f7c48a3b272c19d281beca4b34 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Aug 28 02:41:20 2010 -0400 + + Enable bits_image_fetch_bilinear_affine_normal_x8r8g8b8 + +commit 83f2ee3e958a02fc85a2dc6eddc048b63d74cd5c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Aug 28 02:41:08 2010 -0400 + + Enable bits_image_fetch_bilinear_affine_reflect_x8r8g8b8 + +commit be37ae331c6e5e9539b0c1eac6e196366532df29 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Aug 28 02:40:56 2010 -0400 + + Enable bits_image_fetch_bilinear_affine_none_x8r8g8b8 + +commit 5f8a9bebc04deb55de79e7443578779a93b8cfa6 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Aug 28 02:40:46 2010 -0400 + + Enable bits_image_fetch_bilinear_affine_pad_x8r8g8b8 + +commit c59584cb862ef8774a2ef1eabb87fef18506d10f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Aug 28 02:40:16 2010 -0400 + + Enable bits_image_fetch_bilinear_affine_normal_a8r8g8b8 + +commit 2292cff304fd5aad6dbcc86342a57ea523136de6 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Aug 28 02:40:03 2010 -0400 + + Enable bits_image_fetch_bilinear_affine_reflect_a8r8g8b8 + +commit 8b29162693adc30dbb5c0f60098d2853c3942c36 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Aug 28 02:39:51 2010 -0400 + + Enable bits_image_fetch_bilinear_affine_none_a8r8g8b8 + +commit e8555874e122f6e113f85e37059932457ee509cb +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Aug 28 02:39:37 2010 -0400 + + Enable bits_image_fetch_bilinear_affine_pad_a8r8g8b8 + +commit f9778c15e9c01c02e0002edfc4d4a1d517d14d87 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun May 23 04:44:33 2010 -0400 + + Use a macro to generate some {a,x}8r8g8b8, a8, and r5g6b5 bilinear fetchers. + + There are versions for all combinations of x8r8g8b8/a8r8g8b8 and + pad/repeat/none/normal repeat modes. The bulk of each scaler is an + inline function that takes a format and a repeat mode as parameters. + + The new scalers are all commented out, but the next commits will + enable them one at a time to facilitate bisecting. + +commit 6d1e10a8b5c456ee501a309f5cf2f801efcf63b0 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Jul 14 16:27:27 2010 -0400 + + test: Add affine-test + + This test tests compositing with various affine transformations. It is + almost identical to scaling-test, except that it also applies a random + rotation in addition to the random scaling and translation. + +commit 4fa33537d7093ac759b7ded1718a628dacd2aff4 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Sep 12 06:07:41 2010 -0400 + + analyze_extents: Fast path for non-transformed BITS images + + Profiling various cairo traces showed that we were spending a lot of + time in analyze_extents and compute_sample_extents(). This was + especially bad for glyphs where all this computation was completely + unnecessary. + + This patch adds a fast path for the case of non-transformed BITS + images. The result is approximately a 6% improvement on the + firefox-talos-gfx benchmark: + + Before: + + [ # ] backend test min(s) median(s) stddev. count + [ 0] image firefox-talos-gfx 13.797 13.848 0.20% 6/6 + + After: + + [ # ] backend test min(s) median(s) stddev. count + [ 0] image firefox-talos-gfx 12.946 13.018 0.39% 6/6 + +commit c97881fe3c3a0af78cf5953d2c135654440b0269 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Sep 16 08:35:05 2010 -0400 + + Move some of the FAST_PATH_COVERS_CLIP computation to pixman-image.c + + When an image is solid or repeating, the FAST_PATH_COVERS_CLIP flag + can be set in compute_image_info(). + + Also the code that turned this flag off in pixman.c was not correct; + it didn't take transformations into account. With this patch, pixman.c + doesn't set the flag by default, but instead relies on the call to + compute_samples_extents() to set it when possible. + +commit 3411f9399c3ab6d642f350ea8e4c355f719d01d9 +Author: Tor Lillqvist <tml@iki.fi> +Date: Wed Sep 15 11:53:47 2010 -0400 + + Support __thread on MINGW 4.5 + + By the way, it seems that with gcc 4.5.0 from mingw.org, __thread, sse + and mmx work fine. + + I added the below to pixman 0.18 and as far as I can see, it works. + make check reports no problems. (Earlier I had to use --disable-mmx + and --disable-sse2.) Also gtk-demo and gimp run fine. + + (Also a change to get rid of the warnings about -fvisibility being ignored.) + +commit add0fd1bac84a5b6dddf7632b4100d6b3f2ebc18 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Aug 29 22:46:09 2010 -0400 + + Clip composite region against the destination alpha map extents. + + Otherwise we can end up writing outside the alpha map. + +commit af2f0080feada1abe569e2031acacf51be7f8f68 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Aug 29 17:07:40 2010 -0400 + + Remove FAST_PATH_NARROW_FORMAT flag if there is a wide alpha map + + If an image has an alpha map that has wide components, then we need to + use 64 bit processing for that image. We detect this situation in + pixman-image.c and remove the FAST_PATH_NARROW_FORMAT flag. + + In pixman-general, the wide/narrow decision is now based on the flags + instead of on the formats. + +commit 0afc61341526887c59d6dd9e43073f73451a74c6 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Aug 29 17:03:01 2010 -0400 + + Rename FAST_PATH_NO_WIDE_FORMAT to FAST_PATH_NARROW_FORMAT + + This avoids a negative in the name. Also, by renaming the "wide" + variable in pixman-general.c to "narrow" and fixing up the logic + correspondingly, the code there reads a lot more straightforwardly. + +commit ae77548f0d9ca95a86a466fc4ff099e000716067 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Aug 29 16:59:02 2010 -0400 + + Update and extend the alphamap test + + - Test many more combinations of formats + + - Test destination alpha maps + + - Test various different alpha origins + + Also add a transformation to the destination, but comment it out + because it is actually broken at the moment (and pretty difficult to + fix). + +commit dc9fe269ea2a1a0b8334d0936e2541af48b81bc7 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Sep 13 14:34:34 2010 -0400 + + Add fence_malloc() and fence_free(). + + These variants of malloc() and free() try to surround the allocated + memory with protected pages so that out-of-bounds accessess will cause + a segmentation fault. + + If mprotect() and getpagesize() are not available, these functions are + simply equivalent to malloc() and free(). + +commit f4dc73bad4f662bdc3c94cb1e224f9a1989beba5 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Sep 12 04:35:08 2010 -0400 + + Do opacity computation with shifts instead of comparing with 0 + + Also add a COMPILE_TIME_ASSERT() macro and use it to assert that the + shift is correct. + +commit 517a77a992255cb6dae7e74bc6f6b9ac21003ac1 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Sep 8 09:16:12 2010 +0300 + + SSE2 optimization for scaled over_8888_8888 operation with nearest filter + + This is the first demo implementation, it should be possible to + generalize it later to cover more operations with less lines of code. + + It should be also possible to introduce the use of '__builtin_constant_p' + gcc builtin function for an efficient way of checking if 'unit_x' is known + to be zero at compile time (when processing padding pixels for NONE, or + PAD repeat). + + Benchmarks from Intel Core i7 860: + + == before (nearest OVER) == + op=3, src_fmt=20028888, dst_fmt=20028888, speed=142.01 MPix/s + + == after (nearest OVER) == + op=3, src_fmt=20028888, dst_fmt=20028888, speed=314.99 MPix/s + + == performance of nonscaled operation as a reference == + op=3, src_fmt=20028888, dst_fmt=20028888, speed=652.09 MPix/s + +commit abc90dad57f03bf9293fc825835c6f0fddc6771b +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Sep 16 18:25:40 2010 +0300 + + NONE repeat support for fast scaling with nearest filter + + Implemented very similar to PAD repeat. + + And gcc also seems to be able to completely eliminate the + code responsible for left and right padding pixels for OVER + operation with NONE repeat. + +commit 45833d5b198507e9e69b918459eaaf6088e5de00 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Sep 16 17:10:40 2010 +0300 + + PAD repeat support for fast scaling with nearest filter + + When processing pixels from the left and right padding, the same + scanline function is used with 'unit_x' set to 0. + + Actually appears that gcc can handle this quite efficiently. When + using 'restrict' keyword, it is able to optimize the whole operation + performed on left or right padding pixels to a small unrolled loop + (the code is reduced to a simple fill implementation): + + 9b30: 89 08 mov %ecx,(%rax) + 9b32: 89 48 04 mov %ecx,0x4(%rax) + 9b35: 48 83 c0 08 add $0x8,%rax + 9b39: 49 39 c0 cmp %rax,%r8 + 9b3c: 75 f2 jne 9b30 + + Without 'restrict' keyword, there is one instruction more: reloading + source pixel data from memory in the beginning of each iteration. That + is slower, but also acceptable. + +commit 3db0cc5c75a4a764726059511fa6d67082fbeb64 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Sep 17 16:22:25 2010 +0300 + + Introduce a fake PIXMAN_REPEAT_COVER constant + + We need to implement a true PIXMAN_REPEAT_NONE support later (padding + the source with zero pixels). So it's better not to use PIXMAN_REPEAT_NONE + for handling FAST_PATH_SAMPLES_COVER_CLIP special case. + +commit e9b0740af76853f58df72cd40cd7cb4e2ac7261b +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Sep 16 13:02:18 2010 +0300 + + Nearest scaling fast path macro split into two parts + + Scanline processing is now split into a separate function. This provides + an easy way of overriding it with a platform specific implementation, + which may use SIMD optimizations. Only basic C data types are used as + the arguments for this function, so it may be implemented entirely in + assembly or be generated by some JIT engine. + + Also as a result of this split, the complexity of code is reduced a + bit and now it should be easier to introduce support for the currently + missing NONE, PAD and REFLECT repeat types. + +commit 066ce191a6d3bb970b5024c070193cac4c130418 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Sep 16 12:31:27 2010 +0300 + + Nearest scaling fast path macros moved to 'pixman-fast-path.h' + + These macros with some modifications can can be reused later by + various platform specific implementations, introducing SIMD + optimizations for nearest scaling fast paths. + +commit fb819c0e93b301757f8549cf7738c2b8c356ee7e +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Aug 29 16:26:45 2010 -0400 + + Add FAST_PATH_NO_ALPHA_MAP to the standard destination flags. + + We can't in general take a fast path if the destination has an alpha + map. + +commit ba6c98fc4b8f0ee02b846fd31c7e93e18e92d0af +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Sep 9 12:02:59 2010 +0300 + + test: detection of possible floating point registers corruption + + Added a pair of macros which can help to detect corruption + of floating point registers after a function call. This may + happen if _mm_empty() call is forgotten in MMX/SSE2 fast + path code, or ARM NEON assembly optimized function + forgets to save/restore d8-d15 registers before use. + +commit e470c0dc5bcbf1e153bf035a823a7bdf629e6e25 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Sep 7 01:15:57 2010 +0300 + + ARM: added 'neon_composite_over_0565_8_0565' fast path + +commit a5bf7c3b1a103c6b676c864df009b1f0ad3f8195 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Sep 7 01:10:43 2010 +0300 + + ARM: helper macros for conversion between 8888/x888/0565 formats + +commit 8e299702f315fc1f0f97ab93d905ed5d9c41410e +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Sep 7 01:05:44 2010 +0300 + + ARM: common init/cleanup macro for saving/restoring NEON registers + + This is a typical prologue/epilogue for many NEON fast path functions, so + it makes sense to provide common reusable macros for it in the header file. + +commit e29d9dfcb5935777333f6239b95c18c3da697ab2 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Sep 2 19:43:08 2010 -0400 + + Silence some warnings about uninitialized variables + + Neither were real problems, but GCC was complaining about them. + +commit 27f7852b5ac8d137c917e653fb7113f419a4c77a +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Aug 31 00:30:54 2010 -0400 + + When pixman_compute_composite_region32() returns FALSE, don't fini the region. + + The rule is that the region passed in must be initialized and that the + region returned will still be valid. Ie., the lifecycle is the + responsibility of the caller, regardless of what the function returns. + + Previously, compute_composite_region32() would finalize the region and + then return FALSE, and then the caller would finalize the region + again, leading to memory corruption in some cases. + +commit df6dbc90248a41b5b8362010e5b8d34358688786 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Aug 30 00:16:07 2010 -0400 + + Store a2b2g2r2 pixel through the WRITE macro + + Otherwise, accessor functions won't work. + +commit f42419a3e493bb325163a711fe50296c4c948edd +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Aug 23 18:24:32 2010 +0300 + + ARM: added 'neon_composite_over_8888_8_0565' fast path + +commit 765bde32e0a2e81fbbe15acc0f491695ba2726e8 +Author: Maarten Bosmans <mkbosmans@gmail.com> +Date: Mon Aug 30 08:55:00 2010 +0200 + + Add *.exe to .gitignore + +commit 85964082618fc5350cafcd22b48ba1e02cbc4276 +Author: Maarten Bosmans <mkbosmans@gmail.com> +Date: Sun Aug 29 06:28:42 2010 +0200 + + Use windows.h directly for mingw32 build + + This patch adresses the issue discussed in + http://lists.freedesktop.org/archives/pixman/2010-April/000163.html + + There were only two clashing identifiers. The first one is IN, which + obviously causes problems in Pixman for lines like + + PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8), + + Fortunately the mingw headers provide a solution: by defining + _NO_W32_PSEUDO_MODIFIERS, these stupid symbols are skipped. + + The other name is UINT64, used in pixman-mmx.c. I renamed that + function to to_uint64, but may be another name is more appropriate. + +commit 5b99710042e812d294f571ad6d86fb003a8071e3 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Aug 23 09:27:38 2010 -0400 + + Be more paranoid about checking for GTK+ + + From time to time people run into issues where the configure script + detects GTK+ when it is either not installed, or not functional due to + a missing pixman. Most recently: + + https://bugs.freedesktop.org/show_bug.cgi?id=29736 + + This patch makes the configure script more paranoid by + + - always using PKG_CHECK_MODULES and not PKG_CHECK_EXISTS, since it + seems PKG_CHECK_EXISTS will sometimes return true even if a dependency + of GTK+, such as pixman-1, is missing. + + - explicitly checking that pixman-1 is installed before enabling GTK+. + + Cc: my.somewhat.lengthy.loginname@gmail.com + +commit 5530bcab26508f38a25d2afffa7fef20f35a68e1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Aug 22 11:09:45 2010 -0400 + + Merge pixman_image_composite32() and do_composite(). + + There is not much point having a separate function that just validates + the images. Also add a boolean return to lookup_composite_function() + so that we can return if no composite function is found. + +commit a8ea889e5e3029c2aad0e54e849783242daca274 +Author: Benjamin Otte <otte@redhat.com> +Date: Mon Aug 23 18:20:09 2010 +0200 + + region: Fix pixman_region_translate() clipping bug + + Fixes the region-translate test case by clipping region translations to + the newly defined PIXMAN_REGION_MIN/MAX and using the newly introduced + type overflow_int_t to check for the overflow. + Also uses INT16_MAX or INT32_MAX for these values instead of relying on + the size of short and int types. + +commit 4d8fb1bc01654ba0d331e6aea8127920e8cdf0b8 +Author: Benjamin Otte <otte@redhat.com> +Date: Tue Aug 24 12:17:18 2010 +0200 + + region: Add a new test region-translate + + This test exercises a bug in pixman_region32_translate(). The function + clips the region to int16 coordinates SHRT_MIN/SHRT_MAX. + +commit 5ff359b8a0a4573722b1cba141b8f00cf24b6f09 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Aug 21 06:39:44 2010 -0400 + + Post-release version bump to 0.19.3 + +commit 39308ed3b07afb92140770007124b7e544b83090 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Aug 21 06:33:19 2010 -0400 + + Pre-release version bump to 0.19.2 + +commit 393ccab74e9aa466e2fdd91319012e2c18f4ef84 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Aug 16 07:24:48 2010 -0400 + + Only try to compute the FAST_SAMPLES_COVER_CLIP for bits images + + It doesn't make sense in other cases, and the computation would make + use of image->bits.{width,height} which lead to uninitialized memory + accesses when the image wasn't of type BITS. + +commit da6f33a798bf2ea10df610ccf1d9506d63d1a28c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Aug 9 20:54:49 2010 -0400 + + Introduce new FAST_PATH_SAMPLES_OPAQUE flag + + This flag is set whenever the pixels of a bits image don't have an + alpha channel. Together with FAST_PATH_SAMPLES_COVER_CLIP it implies + that the image effectively is opaque, so we can do operator reductions + such as OVER->SRC. + +commit 4e5d6f00bf409259ff6f5d5c3ef4b016146bcbb3 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Aug 4 17:51:49 2010 -0400 + + pixman_image_set_alpha_map(): Disallow alpha map cycles + + If someone tries to set an alpha map that itself has an alpha map, + simply return. Also, if someone tries to add an alpha map to an image + that is being _used_ as an alpha map, simply return. + + This ensures that an alpha map can never have an alpha map. + +commit 9fe7d32c4b704a10e780444530eaea28b4351110 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Aug 4 17:55:14 2010 -0400 + + Add alpha-loop test program + + This tests what happens if you attempt to make an image with an alpha + map that has the image as its alpha map. This results in an infinite + loop in _pixman_image_validate(), so the test sets up a SIGALRM to + exit if it runs for more than five seconds. + +commit 8a5d1be1dab799ed23239f3471b4a351d8356368 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon May 31 19:24:43 2010 +0300 + + ARM: 'neon_combine_out_reverse_u' combiner + + This operation was seen in mozilla browser profiling logs. + Implemented so that 'over' and 'out_reverse' operations + now reuse common parts of code. + +commit 731e9feaa6988f99e1e38e1b92ed1f15ba706da5 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Mar 19 12:21:32 2010 +0200 + + Code simplification (no need advancing 'vx' at the end of scanline) + +commit 41584f8fe140b7374a5ef5d437b070c1f32763bb +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Jul 2 14:14:21 2010 -0400 + + Store the various bits image fetchers in a table with formats and flags. + + Similarly to how the fast paths are done, put the various bits_image + fetchers in a table, so that we can quickly find the best one based on + the image's flags and format. + +commit 8e33643f44c397a37b822a95e071880d9a8e792a +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Jul 2 12:53:56 2010 -0400 + + Add some new FAST_PATH flags + + The flags are: + + * AFFINE_TRANSFORM, for affine transforms + + * Y_UNIT_ZERO, for when the 10 entry in the transformation is zero + + * FILTER_BILINEAR, for when the image has a bilinear filter + + * NO_NORMAL_REPEAT, for when the repeat mode is not NORMAL + + * HAS_TRANSFORM, for when the transform is not NULL + + Also add some new FAST_PATH_REPEAT_* macros. These are just shorthands + for the image not having any of the other repeat modes. For example + REPEAT_NORMAL is (NO_NONE | NO_PAD | NO_REFLECT). + +commit 6f62231d1580f5b67f36ec81b6c59a7e2f4978cb +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Jul 2 12:45:44 2010 -0400 + + Remove "_raw_" from all the accessors. + + There are no non-raw accessors anymore. + +commit 807fd3c08491c8baffaad993d8b867141fa55319 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Jul 2 12:34:42 2010 -0400 + + Eliminate the store_scanline_{32,64} function pointers. + + Now that we can't recurse on alpha maps, they are not needed anymore. + +commit e213d5fd6207873638a86d908d06d7597cb88422 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Jul 2 12:31:50 2010 -0400 + + Split bits_image_fetch_transformed() into two functions. + + One function deals with the common affine, no-alpha-map case. The + other deals with perspective transformations and alpha maps. + +commit cbb2a0d7929ec27e0a135d7fa11e1acf3942bce2 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Jul 2 12:11:44 2010 -0400 + + Eliminate get_pixel_32() and get_pixel_64() from bits_image. + + These functions can simply be passed as arguments to the various pixel + fetchers. We don't need to store them. Since they are known at compile + time and the pixel fetchers are force_inline, this is not a + performance issue. + + Also temporarily make all pixel access go through the alpha path. + +commit 6480c92312e1fb6662ad0d10940660a9439667ea +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Jul 2 11:58:23 2010 -0400 + + Eliminate recursion from alpha map code + + Alpha maps with alpha maps are no longer supported. It's not a useful + feature and it could could lead to infinite recursion. + +commit 1cc750ed92a936d84b47cac696aaffd226e1c02e +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Jul 22 04:27:45 2010 -0400 + + Replace compute_src_extent_flags() with analyze_extents() + + This commit fixes two separate problems: 1. Incorrect computation of + the FAST_PATH_SAMPLES_COVER_CLIP flag, and 2. FAST_PATH_16BIT_SAFE is + a nonsensical thing to compute. + + == 1. Incorrect computation of SAMPLES_COVER_CLIP: + + Previously we were using pixman_transform_bounds() to compute which + source samples would be used for a composite operation. This is + incorrect for several reasons: + + (a) pixman_transform_bounds() is transforming the integer bounding box + of the destination samples, where it should be transforming the + bounding box of the samples themselves. In other words, it is too + pessimistic in some cases. + + (b) pixman_transform_bounds() is not rounding the same way as we do + during sampling. For example, for a NEAREST filter we subtract + pixman_fixed_e before rounding off to the nearest sample so that a + transformed value of 1 will round to the sample at 0.5 and not to the + one at 1.5. However, pixman_transform_bounds() would simply truncate + to 1 which would imply that the first sample to be used was the one at + 1.5. In other words, it is too optimistic in some cases. + + (c) The result of pixman_transform_bounds() does not account for the + interpolation filter applied to the source. + + == 2. FAST_PATH_16BIT_SAFE is nonsensical + + The FAST_PATH_16BIT_SAFE is a flag that indicates that various + computations can be safely done within a 16.16 fixed-point + variable. It was used by certain fast paths who relied on those + computations succeeding. The problem is that many other compositing + functions were making similar assumptions but not actually requiring + the flag to be set. Notably, all the general compositing functions + simply walk the source region using 16.16 variables. If the + transformation happens to overflow, strange things will happen. + + So instead of computing this flag in certain cases, it is better to + simply detect that overflows will happen and not try to composite at + all in that case. This has the advantage that most compositing + functions can be written naturally way. + + It does have the disadvantage that we are giving up on some cases that + previously worked, but those are all corner cases where the areas + involved were very close to the limits of the coordinate + system. Relying on these working reliably was always a somewhat + dubious proposition. The most important case that might have worked + previously was untransformed compositing involving images larger than + 32 bits. But even in those cases, if you had REPEAT_PAD or + REPEAT_REFLECT turned on, you would hit bits_image_fetch_transformed() + which has the 16 bit limitations. + + == Fixes + + This patch fixes both problems by introducing a new function called + analyze_extents() that has the responsibility to reject corner cases, + and to compute flags based on the extents. + + It does this through a new compute_sample_extents() function that will + compute a conservative (but tight) approximation to the bounding box + of the samples that will actually be needed. By basing the computation + on the positions of the _sample_ locations in the destination, and by + taking the interpolation filter into account, it fixes problem one. + + The same function is also used with a one-pixel expanded version of + the destination extents. By checking if the transformed bounding box + will overflow 16.16 fixed point, it fixes problem two. + +commit 5b289d39cfd5e5cd8b1e0a7b654574ed3e7e90ac +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Jul 28 02:11:08 2010 -0400 + + Extend scaling-crash-test in various ways + + This extends scaling-crash-test to test some more things: + + - All combinations of NEAREST/BILINEAR/CONVOLUTION filters and + NORMAL/PAD/REFLECT repeat modes. + + - Tests various scale factors very close to 1/7th such that the source + area is very close to edge of the source image. + + - The same things, only with scale factors very close to 1/32767th. + + - Enables the commented-out tests for accessing memory outside the + source buffer. + + Also there is now a border around the source buffer which has a + different color than the source buffer itself so that if we sample + outside, it will show up. + + Finally, the test now allows the destination buffer to not be changed + at all. This allows pixman to simply bail out in cases where the + transformation too strange. + +commit 71ff55a3e567ace21e9120f826270253e7ec5edd +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Aug 5 19:00:56 2010 -0400 + + Fix Altivec/OpenBSD patch + + As Brad pointed out, I pushed the wrong version of this patch. + +commit cb50e9cc95a780a5e60d557f2aa23d82d2280b73 +Author: Brad Smith <brad@comstyle.com> +Date: Sat Jul 31 05:07:02 2010 -0400 + + Add support for AltiVec detection for OpenBSD/PowerPC. + + Bug 29331. + +commit 664132128ec430e28dad9f8088a3f6f2a1903f8e +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Aug 4 09:50:30 2010 -0400 + + CODING_STYLE: Delete the stuff about trailing spaces + + Also fix various other minor issues. + +commit cc9221ce96c23f6d6f1a17d98e5221e3aeff6567 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Jul 28 03:17:35 2010 -0400 + + If we bail out of do_composite, make sure to undo any workarounds. + + The workaround for an old X bug has to be undone if we bail from + do_composite, so we can't just return. + +commit b243a66041456dba278b04f813deac4f99bbe621 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Aug 4 08:58:51 2010 -0400 + + Add x14r6g6b6 format to blitters-test + +commit d6a7b1542448e7ee41f2c2a129bd0af2668185bb +Author: Marek Vasut <marek.vasut@gmail.com> +Date: Sun Aug 1 02:18:52 2010 +0200 + + Add support for 32bpp X14R6G6B6 format. + + This format is used on PXA framebuffer with some boards. It uses only 18 bits + from the 32 bit framebuffer to interpret color. + + Signed-off-by: Marek Vasut <marek.vasut@gmail.com> + +commit 226a6df4f947f718d82e85ca53561a968ec0c0a1 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Jul 14 16:43:16 2010 +0300 + + test: 'scaling-test' updated to provide better coverage + + Negative scale factors are now also tested. A small additional + translate transform helps to stress the use of fractional + coordinates better. + + Also the number of iterations to run by default increased in order + to compensate increased variety of operations to be tested. + +commit af3eeaeb1352148ca671a45768d11160fcfd8567 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Jul 19 20:25:05 2010 +0300 + + test: 'scaling-crash-test' added + + This test tries to exploit some corner cases and previously known + bugs in nearest neighbor scaling fast path code, attempting to + crash pixman or cause some other nasty effect. + +commit 90483fcabbd19b35ded094a6a592ee224029fd07 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Jul 15 23:40:28 2010 -0400 + + bits: Fix potential divide-by-zero in projective code + + If the homogeneous coordinate is 0, just set the coordinates to 0. + +commit bf125fbbb701788d5d9ed9ff368bb4fe9d9c895e +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Apr 25 20:25:50 2010 -0400 + + [sse2] Add sse2_composite_add_n_8() + + This shows up when epiphany displays the "ImageTest" on + glimr.rubyforge.org/cake/canvas.html + +commit 16ae3285e6601ea177637dddd20d2857d13decac +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Apr 25 19:54:28 2010 -0400 + + [sse2] Add sse2_composite_in_n_8() + + This shows up when epiphany displays the "ImageTest" on + glimr.rubyforge.org/cake/canvas.html + +commit e0b430a13ee4619bd6d82c4ebff8a401a254e9bc +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jul 13 00:31:35 2010 -0400 + + [sse2] Add sse2_composite_src_x888_8888() + + This operation shows up when Firefox displays + http://dougx.net/plunder/plunder.html + +commit 16bae8347529c1c976e6d7af90e0fb5811605af1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jul 13 00:08:10 2010 -0400 + + [fast] Add fast_composite_src_x888_8888() + + This shows up on when Firefox displays http://dougx.net/plunder/plunder.html + +commit 9399b1a5af69cc9890aa7918cd09318ddeefc05d +Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> +Date: Wed Jul 14 09:51:27 2010 +0300 + + Fix thinko in configure.ac's macro to test linking. + + Copy-paste carnage. Renames save_{cflags,libs,ldflags} to + save_{CFLAGS,LIBS,LDFLAGS}. + +commit 5537e51cd0ffda53cc392a4bafe05070954fc36d +Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> +Date: Sun Jul 11 19:59:01 2010 +0300 + + Avoid trailing slashes on automake install dirs. + + The install-sh on a Solaris box couldn't copy with + trailing slashes. + +commit 1d9c6fa62385c42d67926982704c398d8b495d47 +Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> +Date: Sat Jul 10 15:36:41 2010 +0300 + + Check for specific flags by actually trying to compile and link. + + Instead of relying on preprocessor version checks to see if a + some compiler flags are supported, actually try to compile and + link a test program with the flags. + +commit d95ae7060442712315d29c8b307df131ba9ffce6 +Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> +Date: Sat Jul 10 02:41:01 2010 +0100 + + Check that the OpenMP pragmas don't cause link errors. + + This patch adds extra guards around our use of + OpenMP pragmas and checks that the pragmas won't + cause link errors. This fixes the build on + Tru64 and Solaris with the native compilers and clang. + +commit eb247ac377623d2a722aab1e6eae7adab5f7ebea +Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> +Date: Fri Jul 9 12:09:07 2010 +0300 + + Don't trust OpenBSD's gcc to produce working code for __thread. + + The gcc on OpenBSD 4.5 to 4.7 at least produces bad code for __thread, + without as much as a warning. + + See PR #6410 "Using __thread TLS variables compiles ok but segfault at runtime." + + http://cvs.openbsd.org/cgi-bin/query-pr-wrapper?full=yes&numbers=6410 + +commit dbf35f1f276a673bc4a1eb932dd5cf9266f948da +Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> +Date: Fri Jul 9 12:07:35 2010 +0300 + + Try harder to find suitable flags for pthreads. + + The flags -D_REENTRANT -lpthread work on more systems than + does -pthread unfortunately, so give that a go too. + +commit 9897bb4eeed165b76001dfefd3a89bcb96d38a72 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jul 12 15:13:49 2010 -0400 + + Check for read accessors before taking the bilinear fast path + + The bilinear fast path accesses pixels directly, so if the image has a + read accessor, then it can't be used. + +commit ce3d9fca73bb8abe4d5b1023cfdb06ca53b6161c +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Jul 11 19:58:49 2010 -0400 + + fast-path: Some formatting fixes + + Add spaces before parentheses; fix indentation in the macro. + +commit 839326e471a8a6c96dea1693501550d79043bb81 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Jul 11 19:57:29 2010 -0400 + + In the FAST_NEAREST macro call the function 8888_8888 and not x888_x888 + + The x888 suggests that they have something to do with the x8r8g8b8 + formats, but that's not the case; they are assuming a8r8g8b8 + formats. (Although in some cases they also work for x8r8g8b8 type + formats). + +commit e13d9f9684a47a6e0be4f8ae1a39cce8b1334238 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Jul 11 19:45:22 2010 -0400 + + Make the repeat mode explicit in the FAST_NEAREST macro. + + Before, it was 0 or 1 meaning 'no repeat' and 'normal repeat' + respectively. Now we explicitly pass in either NONE or NORMAL. + +commit 2e7fb6655334789f8a5e290245d47c8d6b221c24 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jul 10 20:47:01 2010 -0400 + + When converting indexed formats to 64 bits, don't correct for channel widths + + Indexed formats are mapped to a8r8g8b8 with full precision, so when + expanding we shouldn't correct for the width of the channels + +commit 2df6dac0be678e1683223faeddadb35b1d2dbe36 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jul 10 18:40:06 2010 -0400 + + test: Make sure the palettes for indexed format roundtrip properly + + The palettes for indexed formats must satisfy the condition that if + some index maps to a color C, then the 15 bit version of that color + must map back to the index. This ensures that the destination operator + is always a no-op, which seems like a reasonable assumption to make. + +commit 5dd59c8b7cf1543605713a2ac30f31d8726f5444 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jul 10 16:49:51 2010 -0400 + + Split the fast path caching into its own force_inline function + + The do_composite() function is a lot more readable this way. + +commit 98d19d9abd9d62b8d2871871b0be74e022f1f89f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jul 10 16:08:51 2010 -0400 + + Cache the implementation along with the fast paths. + + When calling a fast path, we need to pass the corresponding + implementation since it might contain information necessary to run the + fast path. + +commit f18bcf1f6e984c33dca30ad1ce03c58628fe39df +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jul 10 15:47:12 2010 -0400 + + Hide the global implementation variable behind a force_inline function. + + Previously the global variable was called 'imp' which was confusing + with the argument to various other functions also being called imp. + +commit 5c935473d8a193b3510f8605a6658ea6ac998fd1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Jun 30 02:31:10 2010 -0400 + + Fix memory leak in the pthreads thread local storage code + + When a thread exits, we leak whatever is stored in thread local + variables, so install a destructor to free it. + +commit 7114b2d63bd5702c94cb8aa9401c023e550c77bc +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Jul 1 16:54:30 2010 -0400 + + Make the combiner macros less likely to cause name collisions. + + Protect the arguments to the combiner macros with parentheses, and + postfix their temporary variables with underscores to avoid name space + collisions with the surrounding code. + + Alexander Shulgin pointed out that underscore-prefixed identifiers are + reserved for the C implementation, so we use postfix underscores + instead. + +commit a92e4a6a9475e07435efb60aa2fde5fa04592d89 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Jun 21 15:30:46 2010 -0400 + + Minor tweaks to README + +commit ca846806cbc4e11cd134e464c7740c1cde19422b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Jun 20 13:12:27 2010 -0400 + + Store the conical angle in floating point radians, not fixed point degrees + + This is a slight simplification. + +commit 3074d57b560d5ec9be2a0e1a6846012698f51208 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jun 19 18:57:45 2010 -0400 + + Fix conical gradients to match QConicalGradient from Qt + + Under the assumption that pixman gradients are supposed to match + QConicalgradient, described here: + + http://doc.trolltech.com/4.4/qconicalgradient.html + + this patch fixes two separate bugs in pixman-conical-gradient.c. + + The first bug is that the output of atan2() is in the range of [-pi, + pi], which means the parameter into the gradient can be negative. This + is wrong since a QConicalGradient always interpolates around the + center from 0 to 1. The fix for that is to (a) make sure the given + angle is between 0 and 360, and (b) add or subtract 2 * M_PI if the + computed angle ends up outside [0, 2 * pi]. + + The other bug is that we were interpolating clockwise, whereas + QConicalGradient calls for a counter-clockwise interpolation. This is + easily fixed by subtracting the parameter from 1. + + Finally, this patch encapsulates the computation in a new force-inline + function so that it can be reused in both the affine and non-affine + case. + +commit 66365b5ef1bb85863669227ae6e31134d8d57013 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun May 30 18:26:28 2010 -0400 + + Make separate gray scanline storers. + + For gray formats the palettes are indexed by luminance, not RGB, so we + can't use the color storers for gray too. + +commit 4e1d4847c9199f96b73376bef061c6728742e621 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun May 30 16:52:09 2010 -0400 + + When storing a g1 pixel, store the lowest bit, rather than comparing with 0. + +commit 445eb6385f60d09058826b44894e17165c91381c +Author: Andrea Canciani <ranma42@gmail.com> +Date: Wed Jun 9 16:35:37 2010 +0200 + + test: verify that gradients do not crash pixman + + Test gradients under particular conditions (no stops, all the stops + at the same offset) to check that pixman does not misbehave. + +commit de0320258167c24fc652d28f4aeca8713243323e +Author: Andrea Canciani <ranma42@gmail.com> +Date: Tue Jun 8 20:36:15 2010 +0200 + + support single-stop gradients + + Just like conical gradients, linear and radial gradients can now + have a single stop. + +commit 32bd31d677ab018849af5e0165d1dfacb1e01ed0 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue May 18 22:27:46 2010 -0400 + + Eliminate mask_bits from all the scanline fetchers. + + Back in the day, the mask_bits argument was used to distinguish + between masks used for component alpha (where it was 0xffffffff) and + masks for unified alpha (where it was 0xff000000). In this way, the + fetchers could check if just the alpha channel was 0 and in that case + avoid fetching the source. + + However, we haven't actually used it like that for a long time; it is + currently always either 0xffffffff or 0 (if the mask is NULL). It also + doesn't seem worthwhile resurrecting it because for premultiplied + buffers, if alpha is 0, then so are the color channels + normally. + + This patch eliminates the mask_bits and changes the fetchers to just + assume it is 0xffffffff if mask is non-NULL. + +commit 78778e5963c948de5ce5f7c5a2a3bb9f279a8eda +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Mon Mar 15 14:56:38 2010 +0200 + + create getter for component alpha + + This patch comes from the mozilla central tree. See + http://hg.mozilla.org/mozilla-central/rev/89338a224278 for the + original changeset. + + Signed-off-by: Jeff Muizelaar <jmuizelaar@mozilla.com> + Signed-off-by: Egor Starkov <egor.starkov@nokia.com> + Signed-off-by: Rami Ylimaki <ext-rami.ylimaki@nokia.com> + Signed-off-by: Siarhei Siamashka <siarhei.siamashka@nokia.com> + +commit cfc4e38852dc244198a9bfcab07d9014bba21d53 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed May 12 01:34:57 2010 +0300 + + test: added OpenMP support for better utilization of multiple CPU cores + + Some of the tests are quite heavy CPU users and may benefit from + using multiple CPU cores, so the programs from 'test' directory + are now built with OpenMP support. OpenMP is easy to use, portable + and also takes care of making a decision about how many threads + to spawn. + +commit f905ebb03d8ed8a3ceb76c84a10735aa209168d3 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed May 12 00:10:04 2010 +0300 + + test: scaling-test updated to use new fuzzer_test_main() function + +commit be387701a5b44e68110d5c9df07924d1029e87ac +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue May 11 23:21:05 2010 +0300 + + test: blitters-test updated to use new fuzzer_test_main() function + +commit 9ed9abd1541a0353ba4234dc77dd46d6b8771d88 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue May 11 22:57:48 2010 +0300 + + test: blitters-test-bisect.rb converted to perl + + This new script can be used to run continuously to compare two test + programs based on fuzzer_test_main() function from 'util.c' and + narrow down to a single problematic test from the batch which results + in different behavior. + +commit 30c3e91c3f97cf3d5932ba639d8ac126b83efb70 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue May 11 22:46:47 2010 +0300 + + test: main loop from blitters-test added as a new function to utils.c + + This new generalized function can be reused in both blitters-test + and scaling-test. Final checksum calculation changed in order to make + it parallelizable (it is a sum of individual 32-bit values returned + by a callback function, which is now responsible for running test-specific + code). Return values may be crc32, some other hash or even just zero on + success and non-zero on error (in this case, the expected result of the + whole test run should be 0). + +commit 164fe215f2c904cf74537caf9d76b7f9ce2667ec +Merge: e1594f2 5158d67 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun May 9 14:24:24 2010 -0400 + + Merge branch 'for-master' + +commit e1594f204d3a3c2d2083793c8830f0ebf390ed66 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu May 6 01:05:40 2010 +0300 + + test/gtk-utils: Set the size of the window to the size of the image + +commit 2f4f2fb4859931bf6dc5632d8c919e7296736427 +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Tue May 4 11:55:30 2010 -0400 + + Add support for compiling pixman without thread/tls support + +commit 5158d6740c8e2643611a623a0caa649f4b0bc5bd +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Apr 24 18:43:38 2010 -0400 + + Add macros for thread local storage on MinGW 32 + + These macros are identical to the ones that Tor Lillqvist posted here: + + http://lists.freedesktop.org/archives/pixman/2010-April/000160.html + + with one exception: the variable is allocated with calloc() and not + malloc(). + + Cc: tml@iki.fi + +commit 582fa58bba7008c2b852ba56557612866f7522d5 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Apr 23 12:34:19 2010 -0400 + + Don't use __thread on MinGW. + + It is apparently broken. See this: + + http://mingw-users.1079350.n2.nabble.com/gcc-4-4-multi-threaded-exception-handling-thread-specifier-not-working-td3440749.html + + We'll need to support thread local storage on MinGW32 some other way. + + Cc: tml@iki.fi + +commit 95d4026866b4655c88de75c9756e9f18881b7c29 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Mar 28 23:02:43 2010 -0400 + + Add support for 8bpp to pixman_fill_sse2() + +commit d539e0c661e2ec8e8405c0110469e117c5c70526 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Apr 24 13:11:50 2010 -0400 + + sse2: Add sse2_composite_over_reverse_n_8888 + + This is a small speed-up for the poppler benchmark: + + Before: + [ # ] backend test min(s) median(s) stddev. count + [ 0] image poppler 4.443 4.474 0.31% 6/6 + + After: + [ # ] backend test min(s) median(s) stddev. count + [ 0] image poppler 4.224 4.248 0.42% 6/6 + +commit 2d65fb033b57c701e2c16747470f86bda1d861e0 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Apr 24 15:15:05 2010 -0400 + + Don't consider indexed formats opaque. + + The indexed formats have 0 bits of alpha, but can't be considered + opaque because there may be non-opaque colors in the palette. + +commit 19459672ce68b6ad6a4d376cb2d7c9a6d889ae01 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Feb 24 19:21:50 2010 -0500 + + Add an over_8888_8888_8888 sse2 fast path. + +commit a3d29157b4a33162cabbda616c34c00d9a13f7a8 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Feb 17 23:03:25 2009 -0500 + + Add pixman_region{,32}_intersect_rect() + +commit c0d0d20bd282c1d049b5bce4d23e10ab5b28751e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 21 20:52:26 2009 -0400 + + Rename fast_composite_src_8888_x888 to fast_composite_src_memcpy() + + Then generalize it and use it for SRC copying between various + identical formats. + +commit 1f0cba3bdcc5d9a48c9189e8110c90d79260888a +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Tue Apr 27 15:23:20 2010 -0400 + + Add missing HAVE_CONFIG_H guards for config.h inclusion + +commit 526132fa652a42d94826760aa9c72537e3ecaf35 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Apr 22 12:14:23 2010 -0400 + + Remove alphamap from the GTK+ part of tests/Makefile.am + + It doesn't use GTK+ and it was already listed in the non-GTK+ part. + +commit 8f7cc5e4388e83eb1b77aea978f3c58338232320 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Apr 21 09:59:29 2010 -0400 + + Add pixman_image_get_format() accessor + +commit 2b1cae1ef62289288ef00ea7cc1dfef8e01750e6 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Apr 21 09:55:35 2010 -0400 + + Some minor updates to README + +commit 15f5868f6301a51d46cdb0833bc538f2fc68e3df +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Apr 18 16:24:39 2010 -0400 + + Update README to mention the pixman mailing list + +commit a652d5c15476cb60e1ca96ac115df625f8a1b76f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Apr 7 19:34:41 2010 -0400 + + [mmx] Fix mask creation bugs + + This line: + + mask = mask | mask >> 8 | mask >> 16 | mask >> 24; + + only works when mask has 0s in the lower 24 bits, so add + + mask &= 0xff000000; + + before. + + Reported by Todd Rinaldo on the #cairo IRC channel. + +commit 714559dccda3165a72f0a9935c1edc3aef535f30 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Apr 7 01:44:12 2010 -0400 + + Fixes for pthread thread local storage. + + The tls_name_key variable is passed to tls_name_get(), and the first + time this happens it isn't initialized. tls_name_get() then passes it + on to tls_name_alloc() which passes it on to pthread_setspecific() + leading to undefined behavior. + + None of this is actually necessary at all because there is only one + such variable per thread local variable, so it doesn't need to passed + as a parameter at all. + + All of this was pointed out by Tor Lillqvist on the cairo mailing + list. + +commit 634ba33b5b1fcfd5a0e7910f9991b4ed4f674549 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Apr 7 01:39:14 2010 -0400 + + Fix uninitialized cache when pthreads are used + + The thread local cache is allocated with malloc(), but we rely on it + being initialized to zero, so allocate it with calloc() instead. + +commit bc11545a1b5c22fe74fc954e26e8a8e9d7cfa39e +Author: Siddharth Agarwal <sid.bugzilla@gmail.com> +Date: Tue Apr 13 10:15:29 2010 -0400 + + Visual Studio 2010 includes stdint.h + + Use the builtin version instead of defining the types ourselves. + +commit 0345c343e55ec19ae3c8c8ed598eab7e1c1e12f3 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Apr 1 06:21:21 2010 -0400 + + Post-release version bump to 0.19.1 + +commit e9dc568d6f585a153c47e970168a9c71d3e45fde +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Apr 1 05:23:31 2010 -0400 + + Pre-release version bump to 0.18.0 + +commit efd41c62875d97c5127233cb6a4c353b4d495531 +Author: Matthias Hopf <mhopf@suse.de> +Date: Wed Mar 24 18:54:29 2010 +0100 + + Revert "Improve PIXREGION_NIL to return true on degenerated regions." + + This reverts commit ebba1493136a5a0dd7667073165b2115de203eda. + Scheduled for re-discussion after stable 0.18 has been released. + +commit ebba1493136a5a0dd7667073165b2115de203eda +Author: Matthias Hopf <mhopf@suse.de> +Date: Wed Mar 24 12:00:21 2010 +0100 + + Improve PIXREGION_NIL to return true on degenerated regions. + + Fixes Novell bug 568811. + +commit c0f8d417b512b7d526fb6127954a50d14214f420 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Mar 23 17:25:54 2010 -0400 + + Post-release version bump to 0.17.15 + +commit b35f0b0158cd7aac388ba4c72c6c8aada77d2e22 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Mar 23 16:52:02 2010 -0400 + + Pre-release version bump to 0.17.14 + +commit 27a9f0468bdfa257e70270bf9addd5ad064f918b +Merge: 69f1ec9 3ef2033 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Mar 23 11:00:04 2010 -0400 + + Merge remote branch 'ssvb/arm-fixes' + +commit 3ef203331f124bf137c6e0c8d5516b1209c92dd9 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Mar 22 21:56:17 2010 +0200 + + ARM: SIMD optimizations moved to a separate .S file + + This should be the last step in providing full armv4t compatibility + with CPU features runtime autodetection in pixman. + +commit 0a0591c2f7abde8880f4aebd510c27517a414450 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Mar 22 19:51:00 2010 +0200 + + ARM: SIMD optimizations updated to use common assembly calling conventions + +commit c1e8d4533aea3aa10c49465cf5e9a44d946f70bb +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Mar 22 18:51:54 2010 +0200 + + ARM: Helper ARM NEON assembly binding macros moved into a separate header + + This is needed for future reuse of the same macros for the other + ARM assembly optimizations (armv4t, armv6) + +commit 5791026e45f79d8f5168e302a498455870363ac6 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sun Dec 27 00:27:53 2009 +0200 + + ARM: Workaround for a NEON bug in assembler from binutils 2.18 + + The problem was reported as bug 25534 against pixman in + freedesktop.org bugzila. Link to a patch for binutils: + http://sourceware.org/ml/binutils/2008-03/msg00260.html + + For pixman the impact is a build failure when using + binutils 2.18. Versions 2.19 and higer are fine. Still + some distros may be using older versions of binutils and + this is causing problems. + + This patch workarounds the problem by replacing a problematic + "vmov a, b" instruction with equivalent "vorr a, b, b". Actually + they even map to the same instruction opcode in the generated + code, so the resulting binary is identical with and without patch. + +commit 68d8d83223b5a35e25d379c2ee9e2e3a1d242323 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Mar 22 11:54:51 2010 +0200 + + ARM: Use '.object_arch' directive in NEON assembly file + + This can be used to override the architecture recorded in the EABI object + attribute section. We set a minimum arch to 'armv4'. Binutils documentation + recommends to use this directive with the code performing runtime detection + of CPU features. + + Additionally NEON/VFP EABI attributes are suppressed. And the instruction + set to use is explicitly set to '.arm'. + + Configure test for NEON support is also updated to include a bunch of + these new directives (if any of these is unsupported by the assembler, + it is better to fail configure test than to fail library build). + + All these changes are required to fix SIGILL problem on armv4t, reported in + http://lists.freedesktop.org/archives/pixman/2010-March/000123.html + +commit 69f1ec9a7827aeb522fcae99846237ef0f896e7b +Author: Jon TURNEY <jon.turney@dronecode.org.uk> +Date: Wed Mar 17 21:07:06 2010 +0000 + + Avoid a potential division-by-zero exeception in window-test + + Avoid a division-by-zero exception if the first number returned by + rand() is a multiple of 500, causing us to create a zero width pixmap, + and then attempt to use get_rand(0) when generating a random stride... + + Fixes https://bugs.freedesktop.org/attachment.cgi?id=34162 + +commit 50713d9d0d9241597724551315f05d958ce7a283 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Mar 17 15:12:06 2010 -0400 + + Post-release version bump to 0.17.13 + +commit fb68d6c14dd76121af009213df46e37ee17e38d7 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Mar 17 13:46:44 2010 -0400 + + Pre-release version bump to 0.17.12 + +commit 265ea1fb4d05a920323f23a02f9dc379312bbdae +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Mar 17 10:50:42 2010 -0400 + + Specialize the fast_composite_scaled_nearest_* scalers to positive x units + + This avoids a test in the inner loop, which improves performance + especially for tiled sources. + + On x86-32, I get these results: + + Before: + op=1, src_fmt=20028888, dst_fmt=20028888, speed=306.96 MPix/s (73.18 FPS) + op=1, src_fmt=20028888, dst_fmt=10020565, speed=102.67 MPix/s (24.48 FPS) + op=1, src_fmt=10020565, dst_fmt=10020565, speed=324.85 MPix/s (77.45 FPS) + + After: + op=1, src_fmt=20028888, dst_fmt=20028888, speed=332.19 MPix/s (79.20 FPS) + op=1, src_fmt=20028888, dst_fmt=10020565, speed=110.41 MPix/s (26.32 FPS) + op=1, src_fmt=10020565, dst_fmt=10020565, speed=363.28 MPix/s (86.61 FPS) + +commit 9cd1051523493e0926b146f05cdde34158391602 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Mar 17 10:35:34 2010 -0400 + + Add a FAST_PATH_X_UNIT_POSITIVE flag + + This is the common case for a lot of transformed images. If the unit + were negative, the transformation would be a reflection which is + fairly rare. + +commit a5b51bb03c5c1258d7558efa13eca6c570e34ce6 +Author: Alexander Larsson <alexl@redhat.com> +Date: Wed Mar 17 11:58:05 2010 +0100 + + Use the right format for the OVER_8888_565 fast path + +commit 3b92b711d031a7752e06d0a5f688f4c54f50a1e6 +Author: Alexander Larsson <alexl@redhat.com> +Date: Fri Mar 12 15:45:04 2010 +0100 + + Add specialized fast nearest scalers + + This is a macroized version of SRC/OVER repeat normal/unneeded nearest + neighbour scaling instantiated for some common 8888 and 565 formats. + + Based on work by Siarhei Siamashka + +commit 5750408e48259f42373a5233231104d9bd3eb35a +Author: Alexander Larsson <alexl@redhat.com> +Date: Fri Mar 12 15:41:01 2010 +0100 + + Add FAST_PATH_SAMPLES_COVER_CLIP and FAST_PATH_16BIT_SAFE + + FAST_PATH_SAMPLES_COVER_CLIP: + + This is set of the source sample grid, unrepeated but transformed + completely completely covers the clip destination. If this is set + you can use a simple scaled that doesn't have to care about the repeat + mode. + + FAST_PATH_16BIT_SAFE: + + This signifies two things: + 1) The size of the src/mask fits in a 16.16 fixed point, so something like: + + max_vx = src_image->bits.width << 16; + + Is allowed and is guaranteed to not overflow max_vx + + 2) When stepping the source space we're guaranteed to never overflow + a 16.16 bit fix point variable, even if we step one extra step + in the destination space. This means that a loop doing: + + x = vx >> 16; + vx += unit_x; d = src_row[x]; + + will never overflow vx causing x to be negative. + + And additionally, if you track vx like above and apply NORMAL repeat + after the vx addition with something like: + + while (vx >= max_vx) vx -= max_vx; + + This will never overflow the vx even on the final increment that + takes vx one past the end of where we will read, which makes the + repeat loop safe. + +commit cba6fbbddce5edfd8e28ef570c493b044761f870 +Author: Alexander Larsson <alexl@redhat.com> +Date: Fri Mar 12 15:40:07 2010 +0100 + + Add FAST_PATH_NO_NONE_REPEAT flag + +commit 7ec023ede155b9dacf574c4323740ef981802aa9 +Author: Alexander Larsson <alexl@redhat.com> +Date: Tue Mar 16 14:18:29 2010 +0100 + + Add CONVERT_8888_TO_8888 and CONVERT_0565_TO_0565 macros + + These are useful for macroization + +commit c903d03052e1c34478556964338959b34928a388 +Author: Alexander Larsson <alexl@redhat.com> +Date: Fri Mar 12 16:23:42 2010 +0100 + + Add CONVERT_0565_TO_8888 macro + + This lets us simplify some fast paths since we get a consistent + naming that always has 8888 and gets some value for alpha. + +commit de27f45ddd46fc48ec9598f2f177155328d55580 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Mar 15 11:51:09 2010 -0400 + + Ensure that only the low 4 bit of 4 bit pixels are stored. + + In some cases we end up trying to use the STORE_4 macro with an 8 bit + values, which resulted in other pixels getting overwritten. Fix this + by always masking off the low 4 bits. + + This fixes blitters-test on big-endian machines. + +commit 6532f8488abffb89501cb76de7d80b8ab2d49aed +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Mar 16 08:17:10 2010 -0400 + + Fix contact address in configure.ac + +commit 7c9f121efe7ee6afafad8b294974f5498054559b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Mar 16 12:23:50 2010 -0400 + + Add PIXMAN_DEFINE_THREAD_LOCAL() and PIXMAN_GET_THREAD_LOCAL() macros + + These macros hide the various types of thread local support. On Linux + and Unix, they expand to just __thread. On Microsoft Visual C++, they + expand to __declspec(thread). + + On OS X and other systems that don't have __thread, they expand to a + complicated concoction that uses pthread_once() and + pthread_get/set_specific() to get thread local variables. + +commit 6b9c54820015f69e667ed54441e83042c9a84cc1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Mar 16 11:01:08 2010 -0400 + + Add checks for various types of thread local storage. + + OS X does not support __thread, so we have to check for it before + using it. It does however support pthread_get/setspecific(), so if we + don't have __thread, check if those are available. + +commit 313353f1fb9d40d0c3aaf7cfb99ca978b29003a4 +Author: Alan Coopersmith <alan.coopersmith@sun.com> +Date: Mon Mar 15 15:20:05 2010 -0700 + + Add Sun cc to thread-local support checks in pixman-compiler.h + + Clears '#warning: "unknown compiler"' messages when building + + Signed-off-by: Alan Coopersmith <alan.coopersmith@sun.com> + +commit b67f784a5dc51f41e40bb1a165411f5259ea0ee4 +Author: Alan Coopersmith <alan.coopersmith@sun.com> +Date: Mon Mar 15 10:52:17 2010 -0700 + + Make .s target asm flag selection more portable + + The previous code worked in GNU make, but caused a syntax error in Solaris + make ( https://bugs.freedesktop.org/show_bug.cgi?id=27062 ) - this seems to + work in both, and should hopefully not cause syntax errors in any versions + of make not supporting the macro-substitution-in-macro-name feature, just + cause the macro to expand to nothing. + + Signed-off-by: Alan Coopersmith <alan.coopersmith@sun.com> + +commit 7a5dc747852d46fa382ef885bb6299723ef6ed00 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Mar 15 07:40:46 2010 -0400 + + Fix typo: WORDS_BIG_ENDIAN => WORDS_BIGENDIAN in pixman-edge.c + + Pointed out by Andreas Falkenhahn on the cairo mailing list. + +commit ff30a5cbb941a9559082c6a6052ef761c7de949c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Mar 3 13:24:13 2010 -0500 + + test: Add support for indexed formats to blitters-test + + These formats work fine, they just need to have a palette set. + +commit 2b5f7be6c05ce3643b7d29e7237f91bfaedd80e5 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Mar 1 10:32:39 2010 -0500 + + pixman.h: Only define stdint types when PIXMAN_DONT_DEFINE_STDINT is undefined + + In SPICE, with Microsoft Visual C++, pixman.h is included after + another file that defines these types, which causes warnings and + errors. + + This patch allows such code to just define PIXMAN_DONT_DEFINE_STDINT + to use its own version of those types. + +commit f4da05c9f988133079cac2b8d54589386f46398f +Merge: a12d868 f534509 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Mar 14 12:12:05 2010 -0400 + + Merge branch 'operator-table' + +commit a12d868df8b673df2b563f309563954e2b3f977d +Merge: 18f0de4 54e39e0 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Mar 14 12:12:00 2010 -0400 + + Merge branch 'fast-path-cache' + +commit f534509d007de40592dedc574e7eb78445453ec0 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sat Jan 30 11:37:25 2010 -0500 + + Change operator table to be an array of arrays of four bytes. + + This makes gcc generate slightly better code for optimize_operator. + +commit 94d75ebd2167b44c142a6202b2d7bbe238dfd830 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Sep 19 02:40:56 2009 -0400 + + Strength reduce certain conjoint/disjoint to their normal counterparts. + + This allows us to not test for them later on. + +commit 58be9c71d2b1d0ed9d8feed1db0581b250d0a7d2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Sep 19 02:32:28 2009 -0400 + + Store the operator table more compactly. + + The four cases for each operator: + + none-are-opaque, src-is-opaque, dest-is-opaque, both-are-opaque + + are packed into one uint32_t per operator. The relevant strength + reduced operator can then be found by packing the source-is-opaque and + dest-is-opaque into two bits and shifting that number of bytes. + + Chris Wilson pointed out a bug in the original version of this commit: + dest_is_opaque and source_is_opaque were used as booleans, but their + actual values were the results of a logical AND with the + FAST_PATH_OPAQUE flag, so the shift value was wildly wrong. + + The only reason it actually passed the test suite (on x86) was that + the compiler computed the shift amount in the cl register, and the low + byte of FAST_PATH_OPAQUE happens to be 0, so no shifting actually took + place, and the original operator was returned. + +commit 7fe35f0e6b660f5667ff653f3b753bc3e5d07901 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Sep 18 14:10:31 2009 -0400 + + Make the operator strength reduction constant time. + + By extending the operator information table to cover all operators we + can replace the loop with a table look-up. At the same time, base the + operator optimization on the computed flags rather than the ones in + the image struct. + + Finally, as an extra optimization, we no longer ignore the case where + there is a mask. Instead we consider the source opaque if both source + and mask are opaque, or if the source is opaque and the mask is + missing. + +commit 18f0de452dc7e12e4cb544d761a626d5c6031663 +Author: Loïc Minier <loic.minier@ubuntu.com> +Date: Tue Mar 9 20:57:34 2010 +0100 + + ARM: SIMD: Try without any CFLAGS before forcing -mcpu= + + http://bugs.launchpad.net/bugs/535183 + +commit 933540861383da27402680593edefe8d61e6fb02 +Author: Egor Starkov <starkov.egor@gmail.com> +Date: Fri Mar 12 09:47:59 2010 -0500 + + Eliminate trailing comma in enum + + https://bugs.freedesktop.org/show_bug.cgi?id=27050 + + Pixman is not compiling with c++ compiler. During compilation it gives + the following error: + + /usr/include/pixman-1/pixman.h:335: error: comma at end of enumerator list + + Signed-off-by: Søren Sandmann Pedersen <ssp@redhat.com> + +commit 54e39e00386fd2fd0eb76ead6396ddb93f1cf6c2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Sep 17 03:16:27 2009 -0400 + + Add a fast path cache + + This patch adds a cache in front of the fast path tables to reduce the + overhead of pixman_composite(). It is fixed size with move-to-front to + make sure the most popular fast paths are at the beginning of the cache. + + The cache is thread local to avoid locking. + +commit 84b009ae9f128c838d0e046e07947f8f9b2ce879 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Mar 5 20:40:41 2010 -0500 + + Post-release version bump to 0.17.11 + +commit 14fd287efb63c1f31d37053ebbbf500d0841c053 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Fri Mar 5 20:06:08 2010 -0500 + + Pre-release version bump to 0.17.10 + +commit bd9934551f72f4993016cab1d7be3b1e545629b1 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Feb 26 14:15:22 2010 -0500 + + Move __force_align_arg_pointer workaround before composite32() + + Since otherwise the workaround won't take effect when you call + pixman_image_composite32() directly. + +commit 14bb054d9695abb284e22a1de31337e0e41bb4e3 +Merge: 9a8e404 3db76b9 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Mar 4 02:30:22 2010 -0500 + + Merge branch 'more-flags' + +commit 9a8e404d44b6ed9817d088966cec324a38e60897 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Feb 27 00:37:19 2010 -0500 + + test: Remove obsolete comment + +commit 182e4c2635fdb90c50b2e86253738b7e9c8ea282 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Feb 24 04:14:45 2010 +0200 + + ARM: added 'neon_composite_over_reverse_n_8888' fast path + + This fast path function improves performance of 'poppler' cairo-perf trace. + + Benchmark from ARM Cortex-A8 @720MHz + + before: + + [ # ] backend test min(s) median(s) stddev. count + [ 0] image poppler 38.986 39.158 0.23% 6/6 + + after: + + [ # ] backend test min(s) median(s) stddev. count + [ 0] image poppler 24.981 25.136 0.28% 6/6 + +commit 072a7d31a8c872666787b69a6bd1b537565c5b96 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Feb 24 02:26:57 2010 +0200 + + ARM: added 'neon_composite_src_x888_8888' fast path + + This fast path function improves performance of 'gnome-system-monitor' + cairo-perf trace. + + Benchmark from ARM Cortex-A8 @720MHz + + before: + + [ # ] backend test min(s) median(s) stddev. count + [ 0] image gnome-system-monitor 68.838 68.899 0.05% 5/6 + + after: + + [ # ] backend test min(s) median(s) stddev. count + [ 0] image gnome-system-monitor 53.336 53.384 0.09% 6/6 + +commit 2ed7c13922f83404bd9976c00d00738d0314693f +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Feb 24 01:44:00 2010 +0200 + + ARM: added 'neon_composite_over_n_8888_8888_ca' fast path + + This fast path function improves performance of 'firefox-talos-gfx' + cairo-perf trace. + + Benchmark from ARM Cortex-A8 @720MHz + + before: + + [ # ] backend test min(s) median(s) stddev. count + [ 0] image firefox-talos-gfx 139.969 141.176 0.35% 6/6 + + after: + + [ # ] backend test min(s) median(s) stddev. count + [ 0] image firefox-talos-gfx 111.810 112.196 0.23% 6/6 + +commit 3db76b90049f23723a0519d572b9cda7c672f7d5 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Feb 14 19:18:35 2010 -0500 + + Restructure the flags computation in compute_image_info(). + + Restructure the code to use switches instead of ifs. This saves a few + comparisons and make the code slightly easier to follow. Also add some + comments. + +commit ac44db334066f68a837914a52d8d1368c85161ad +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Feb 14 19:14:44 2010 -0500 + + Move workaround code to pixman-image.c + + It is more natural to put it where all the other flags are computed. + +commit 35af45d5e3d3f893ccaa4ab2f947100eb9d840ac +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Feb 22 06:06:22 2010 -0500 + + Turn need_workaround into another flag. + + Instead of storing it as a boolean in the image struct, just use + another flag for it. + +commit f27f17ce22b6d0ac587600930c3657180066aac8 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Sep 18 11:54:21 2009 -0400 + + Eliminate _pixman_image_is_opaque() in favor of a new FAST_PATH_IS_OPAQUE flag + + The new FAST_PATH_IS_OPAQUE flag is computed along with the others in + _pixman_image_validate(). + +commit 2a6ba862abd8859014d11a742247fa1f1225729b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Sep 18 04:17:57 2009 -0400 + + Eliminate _pixman_image_is_solid() + + Instead of calling this function in compute_image_info(), just do the + relevant checks when the extended format is computed. + + Move computation of solidness to validate + +commit 45006e5e648b85df65b922f893c9802c9ecce38e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Sep 18 04:06:30 2009 -0400 + + Move computation of extended format code to validate. + + Instead of computing the extended format on every composite, just + compute it once and store it in the image. + +commit fb0096a282c5b6e7ca9eb59a05d9ff738dccfd4b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Mon Feb 22 05:16:27 2010 -0500 + + Add new FAST_PATH_SIMPLE_REPEAT flag + + This flags indicates that the image is untransformed an + repeating. Such images can be composited quickly by simply repeating + the composite operation. + +commit a7ad9c7c9dcb78e0c5ad00145b119dfe929eb307 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Sep 18 03:54:36 2009 -0400 + + Compute the image flags at validation time instead of composite time + + Instead of computing all the image flags at composite time, we compute + them once in _pixman_image_validate() and cache them in the image. + +commit 7bc4cd42c3549f3f2354f50a7cf21ce9ccc8de7b +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Feb 24 22:09:41 2010 -0500 + + RELEASING: Update the release instructions. + +commit 7392a350f2808146842be0924ca289c5df6c8922 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Feb 24 22:02:13 2010 -0500 + + Post-release version bump + +commit 4d1c216af3d6fc58829c2f5ea434e97ff8012493 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Feb 24 21:52:30 2010 -0500 + + Pre-release version bump + +commit e0f1d8410715083498a35284ea7e5bb71fabe090 +Merge: 16ef3ab 282f5cf +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Feb 24 21:01:29 2010 -0500 + + Merge branch 'trap-fixes' + +commit 16ef3ab230047221f813905d390bf762a3d8508a +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Feb 24 20:51:25 2010 -0500 + + Add a1-trap-test + + When a trapezoid sample point is exactly on a polygon edge, the rule + is that it is considered inside the trapezoid if the edge is a top or + left edge, but outside for bottom and right edges. + + This program tests that for a1 trapezoids. + +commit ad5cbba4c05f8521004c6aa1afd5aa74040afad0 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Sun Feb 21 02:01:16 2010 -0500 + + Hide the C++ extern "C" declarations behind macros. + + That way they don't confuse the indenting algorithm in editors such as + Emacs. + +commit 14f201dc47ba76fcf677936f4f809249054fd6ad +Merge: 94f5859 6b2da68 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Feb 20 13:09:01 2010 -0500 + + Merge branch 'eliminate-composite' + + Conflicts: + pixman/pixman-sse2.c + +commit 94f585916a2385146ee5c803f7850b21149d728b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Feb 13 20:08:13 2010 -0500 + + Move all code to do debugging spew into pixman-private. + + Rather than the region code having its own little debug system, move + all of it into pixman-private where there is already return_if_fail() + macros etc. These macros are now enabled in development snapshots and + nowhere else. Previously they were never enabled unless you modified + the code. + + At the same time, remove all the asserts from the region code since we + can never turn them on anyway, and replace them with + critical_if_fail() macros that will print spew to standard error when + DEBUG is defined. + + Finally, also change the debugging spew in pixman-bits-image.c to use + return_val_if_fail() instead of its own fprintf(). + +commit f32d585069e77f09f84de42eda8ed8f6849aab57 +Author: Alexander Larsson <alexl@redhat.com> +Date: Fri Feb 19 11:22:52 2010 +0100 + + Test pixman_region32_init_from_image in region-test + +commit 48ef4befd88e06e83a583a70f0172f1a08a65cda +Author: Alexander Larsson <alexl@redhat.com> +Date: Mon Feb 15 09:40:50 2010 +0100 + + Add pixman_region{32}_init_from_image + + This creates a region from an image in PIXMAN_a1 format. + +commit 5dee05fcab16dbd3e2c5e4d85b9edf26cf523e3f +Author: Alexander Larsson <alexl@redhat.com> +Date: Mon Feb 15 09:39:59 2010 +0100 + + Move SCREEN_SHIFT_LEFT/RIGHT to pixman-private.h + + This is needed for later use in other code. + +commit 61f4ed9c7a3ff6afbbb42d3f3b8dc3b9331bdcbd +Author: Makoto Kato <m_kato@ga2.so-net.ne.jp> +Date: Thu Feb 18 14:30:01 2010 +0900 + + Compile by USE_SSE2 only without USE_MMX + + Although we added MMX emulation for Microsoft Visual C++ compiler for x64, + USE_SSE2 still requires USE_MMX. So we remove dependency of USE_MMX + for Windows x64. + + Signed-off-by: Makoto Kato <m_kato@ga2.so-net.ne.jp> + +commit 6b2da683debd1b3fc1862752cb26e6799b644e05 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Sep 16 07:29:08 2009 -0400 + + Move NULL check out of get_image_info() + + The NULL check is only necessary for masks, so there is no reason to + do it for destinations and sources. + +commit 1dd8744f40f4b754fb3aa26a3c7f4fbe54c27155 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Sep 16 06:54:43 2009 -0400 + + Add a fast path for non-repeating sources in walk_region_internal(). + + In the common case where there is no repeating, the loop in + walk_region_internal() reduces to just walking of the boxes involved + and calling the composite function. + +commit 362a9f564a9a58c48ab0129ca3ac997d0cb84bab +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Sep 15 01:34:33 2009 -0400 + + Move more things out of the inner loop in do_composite(). + + Specifically, + + - the src_ and mask_repeat computations + + - the check for whether the involved images cover the composite + region. + +commit 129d9c187146a060863598d154e6770394547afd +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Sep 15 01:16:40 2009 -0400 + + Move region computation out of the loop in do_composite() + + We only need to compute the composite region once, not on every + iteration. + +commit 4c185503d26374915942d1f64c02134e4a2c5a99 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Sep 15 01:14:41 2009 -0400 + + Move get_image_info() out of the loop in do_composite + + The computation of image formats and flags is invariant to the loop, + so it can all be moved out. + +commit 81b7d7b18050c770f272fd10aa7f5cf85ddc25cc +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Sep 15 01:13:36 2009 -0400 + + Manually inline _pixman_run_fast_path() + + Move all of the code into do_composite(). + +commit e914cccb24c1391aa25eca8df87a08bd935cd870 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Sep 15 01:11:04 2009 -0400 + + Move compositing functionality from pixman-utils.c into pixman.c + + _pixman_run_fast_path() and pixman_compute_composite_region() are both + moved to pixman-image, since at this point that's the only place they + are being called from. + +commit 0eeb197599cca78a645f8a5498b0124ef170c523 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Nov 7 15:13:03 2009 -0500 + + Move compositing to its own function, do_composite() + +commit f831552bce70f2619ea8db00983d80dd1abd0003 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Sep 15 00:58:51 2009 -0400 + + Optimize for the common case wrt. the workaround. + + In the common case no images need the workaround, so we check for that + first, and only if an image does need a workaround do we check which + one of the images actually need it. + +commit fa4df6225d4fa0b740c0ce69361e2f7cee1686f9 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Sep 15 00:48:12 2009 -0400 + + Eliminate all the composite methods. + + They are no longer necessary because we will just walk the fast path + tables, and the general composite path is treated as another fast + path. + + This unfortunately means that sse2_composite() can no longer be + responsible for realigning the stack to 16 bytes, so we have to move + that to pixman_image_composite(). + +commit c3d7b5125585a7d974ccd904100777a0e18d425f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Sep 15 00:23:11 2009 -0400 + + Delete unused _pixman_walk_composite_region() function + +commit 488480301c7ca9cb4e41c8d0f489fb56e5d9efdd +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Nov 7 15:28:57 2009 -0500 + + Don't call _pixman_implementation_composite() anymore. + + Instead just call _pixman_run_fast_path(). Since we view + general_composite() as a fast path now, we know that it will find + *some* compositing routine. + +commit 06ae5ed5971fe616b96bb97a63abf6cc27c5b669 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Sep 14 23:58:40 2009 -0400 + + Delete unused sources_cover() function + +commit 543a04a3bbd2c622842739ab923ff8761c05ed83 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Sep 14 23:47:39 2009 -0400 + + Store a pointer to the array of fast paths in the implementation struct. + + Also add an empty fast path table to the vmx implementation, so that + we can assume sure the pointer is never NULL. + +commit 376f2a3f853f829c78983a51bffc1bacb9bec9a3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Sep 15 00:20:22 2009 -0400 + + Make fast_composite_scaled_nearest() another fast path. + + This requires another couple of flags + + FAST_PATH_SCALE_TRANSFORM + FAST_PATH_NEAREST_FILTER + +commit 87430cfc35c6e51bb1a947795e0ddb198c460253 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Sep 13 05:29:48 2009 -0400 + + Make general_composite_rect() just another fast path. + + We introduce a new PIXMAN_OP_any fake operator and a PIXMAN_any fake + format that match anything. Then general_composite_rect() can be used + as another fast path. + + Because general_composite_rect() does not require the sources to cover + the clip region, we add a new flag FAST_PATH_COVERS_CLIP which is part + of the set of standard flags for fast paths. + + Because this flag cannot be computed until after the clip region is + available, we have to call pixman_compute_composite_region32() before + checking for fast paths. This will resolve itself when we get to the + point where _pixman_run_fast_path() is only called once per composite + operation. + +commit d7e281e0a1f7b1aecd245070736e03d2953b0911 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Feb 13 18:23:34 2010 -0500 + + Post-release version bump + +commit 9bcadc340866c49dab1cb40ff79c683972e8a37d +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Feb 13 18:12:32 2010 -0500 + + Pre-release version bump + +commit 97a12457394b36b5b052927af65ac3944ceccf09 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun May 31 15:06:33 2009 -0400 + + Once unrolled version of fast_path_composite_nearest_scaled() + + Separate out the fetching and combining code in two inline + functions. Then do two pixels per iteration. + +commit e5972110750b32929a474c35362f4639dbbd1222 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 30 02:50:38 2009 -0400 + + Generalize and optimize fast_composite_src_scaled_nearest() + + - Make it work for PIXMAN_OP_OVER + + - Split repeat computation for x and y, and only the x part in the + inner loop. + + - Move stride multiplication outside of inner loop + +commit 337e916473069a76a44757b3664f8d49da350773 +Merge: bdc4a6a 8e85059 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Feb 13 12:26:09 2010 -0500 + + Merge branch 'bitmasks' + +commit bdc4a6afe0bcea6dfb0df221006f4fe188324678 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Feb 13 11:18:13 2010 -0500 + + Makefile.am: Remove 'check' from release-check + + It's already included in distcheck. + +commit edee4be052cf0d466922759efd2613e5a2be9e2b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Feb 13 09:40:33 2010 -0500 + + Turn off asserts in development snapshots (bug 26314). + + There is not much real benefit in having asserts turned on in + snapshots because it doesn't lead to any new bug reports, just to + people not installing development snapshots since they case X server + crashes. So just turn them off. + + While we are at it, limit the number of messages to stderr to 5 + instead of 50. + +commit cf1f034fef34478c528bedf1e59be443fa72429c +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sun Feb 7 07:33:52 2010 +0200 + + ARM: Remove any use of environment variables for cpu features detection + + Old code assumed that all ARMv7 processors support NEON instructions + unless overrided by environment variable ARM_TRUST_HWCAP. This causes + X server to die with SIGILL if NEON support is disabled in the kernel + configuration. Additionally, ARMv7 processors lacking NEON unit are + going to become available eventually. + + The problem was reported by user bearsh at irc.freenode.net #gentoo-embedded + +commit 865c37d57421f6888151486ae1a2ec986a7cd9d2 +Author: Alexander Larsson <alexl@redhat.com> +Date: Tue Feb 9 15:57:18 2010 +0100 + + Add pixman_image_get_destroy_data() + + This way you can get back user data that was set using + pixman_image_set_destroy_function(). + +commit cca1cef3f29d244f0a57bd3ed8b09e2892f8934a +Author: Alexander Larsson <alexl@redhat.com> +Date: Tue Feb 9 13:22:38 2010 +0100 + + Add extern "C" guards for c++ + +commit 8e8505943651ac46e0ad5a2dd0b9e85704095cc1 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Sep 13 04:28:20 2009 -0400 + + Move checks for src/mask repeat right before walking the region. + + Also add a couple of additional checks to the src/mask repeat check. + +commit eea58eab93aefd4430544754f8a0f5460b4a30aa +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Sep 13 03:43:16 2009 -0400 + + Compute src, mask, dest flags and base fast path decisions on them. + + This makes sets the stage for caching the information by image instead + of computing it on each composite invocation. + + This patch also computes format codes for images such as PIXMAN_solid, + so that we can no longer end up in the situation that a fast path is + selected for a 1x1 solid image, when that fast path doesn't actually + understand repeating. + +commit 6197db91a32da7ea281fd87b59f5bb74b989361b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Sep 13 02:34:32 2009 -0400 + + Add src_, mask_, and dest_flags fields to fast path arrays + + Update all the fast path tables to match using a new + PIXMAN_STD_FAST_PATH macro. + + For now, use 0 for the flags fields. + +commit ff6eaac50eaa8778ba15fd0f796e94cc751dea0a +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Sep 12 05:26:50 2009 -0400 + + Move calls to source_is_fastpathable() into get_source_format() + +commit 171dc4875644f72d65ff2e31533edacc781069ec +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Sep 12 05:09:53 2009 -0400 + + Fold get_fast_path() into _pixman_run_fast_path() + + Also factor out the source format code computation to its own + function. + +commit 459c7a52f67c9628e94107599e3abbc6463cbd0f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Sep 12 04:30:22 2009 -0400 + + Consolidate the source and mask sanity checks in a function + +commit 27a4fb4747426ee935d2149cca2197a369c4556d +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Sep 12 04:10:30 2009 -0400 + + Move pixbuf checks after src_format and mask_format have been computed. + +commit 2def1a8867a1ab0ccab720d1cc3f3c7b61c74619 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Sep 12 04:03:25 2009 -0400 + + Move the sanity checks for src, mask and destination into get_fast_path() + +commit d76aab4d03d9e700c3c431b077a4b506a5e53df1 +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Wed Jan 27 07:11:11 2010 -0500 + + Turn some uint16_t variables to int32_t in the fast paths. + + This is necessary now that we have a 32 bit version of + pixman_image_composite(). + +commit 15d07d6c2ac4ed5d41dc80b476e09e8d7dd6a84a +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Thu Jan 21 13:43:53 2010 -0500 + + Implement get_scanline_64() correctly for solid fill images. + + Previously they would be evaluated at 8 bits and then expanded. + +commit 0e8550798f69ef69dbde59eda6341ab4e0801069 +Author: Benjamin Otte <otte@redhat.com> +Date: Tue Jan 26 19:37:34 2010 +0100 + + Make pixman_image_fill_rectangles() call pixman_image_fill_boxes() + + Avoids duplication of code + +commit d0d284da0a8810e7435b8e932ac5de352793a39a +Author: Benjamin Otte <otte@redhat.com> +Date: Tue Jan 26 19:03:38 2010 +0100 + + Add pixman_image_fill_boxes() API + + It's basically the 32bit version of pixman_image_fill_rectangles(), just + with a saner data type. + +commit e841c556d59ca0aa6d86eaf6dbf061ae0f4287de +Author: Benjamin Otte <otte@redhat.com> +Date: Tue Jan 26 18:52:27 2010 +0100 + + Add pixman_image_composite32() + + This is equal to pixman_image_composite(), just with 32bit parameters. + pixman_image_composite() now just calls pixman_image_composite32() + +commit 78b6c470789eb226708a5d98bb06a962d2ae0b0d +Author: Benjamin Otte <otte@redhat.com> +Date: Tue Jan 26 19:09:56 2010 +0100 + + Make region argument to pixman_region(32)_init_rects() const + + No indenting of the header to keep git blame working + +commit b194bb78c8a32b7252cccaebdc085cd8e759427d +Author: Benjamin Otte <otte@redhat.com> +Date: Tue Jan 26 19:08:29 2010 +0100 + + Fix typo + +commit c066c347aeaa779b7a3c9e3cde45413277370f0f +Author: Søren Sandmann Pedersen <ssp@redhat.com> +Date: Tue Jan 19 14:23:57 2010 -0500 + + Fix some warnings + +commit 8fce7b18f3033aa1423b96b9847f6ae3133fad7a +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jan 17 19:34:27 2010 -0500 + + Post-release version bump + +commit 23e1ba3c062711fe256612ca7f39478e048a6708 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jan 17 18:56:11 2010 -0500 + + Pre-release version bump + +commit 8dabd1fdd8f0030086cfe70f0baba7c502a0e1b8 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jan 17 16:45:23 2010 -0500 + + bits: Print an error if someone tries to create an image with bpp < depth + + Something in the X server apparently does this. + +commit 2c3cbc83c4018173d9deae3f24c457b3ca16dbcd +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Jan 16 10:31:22 2010 -0500 + + When fetching from an alpha map, replace the alpha channel of the image + + Previously it would be multiplied onto the image pixel, but the Render + specification is pretty clear that the alpha map should be used + *instead* of any alpha channel within the image. + + This makes the assumption that the pixels in the image are already + premultiplied with the alpha channel from the alpha map. If we don't + make this assumption and the image has an alpha channel of its own, we + would have to first unpremultiply that pixel, and then premultiply the + alpha value onto the color channels, and then replace the alpha + channel. + +commit 0df6098f3d941608f945d02e2af65b70ac499e0a +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Jan 16 10:09:25 2010 -0500 + + pixman_image_validate() needs to also validate the alpha map. + + This is the other half of bug 25950. + +commit 7f00dc62e4aa4b2b417ca1c86813a6b4c7f78673 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Jan 16 10:07:48 2010 -0500 + + When fetching from an alpha map, use the alpha map's fetch function. + + Don't use the one from the image. This is the first half of bug 25950. + +commit 042f978b04aefe56ec912c88ec879e668153a287 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Jan 16 09:48:45 2010 -0500 + + test: Add new alphamap test program. + + This program demonstrates three bugs relating to alpha maps: + + - When fetching from an alpha map into 32 bit intermediates, we use + the fetcher from the image, and not the one from the alpha map. + + - For 64 bit intermediates we call fetch_pixel_generic_lossy_32() + which then calls fetch_pixel_raw_64, which is NULL because alpha + images are never validated. + + - The alpha map should be used *in place* of any existing alpha + channel, but we are actually multiplying it onto the image. + +commit 05c38141b4861348bf61235341d634019e39e8a9 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Jan 16 07:40:07 2010 -0500 + + fetch-test: Fix spelling error (pallete -> palette) + +commit c46a87e45afc6eb53ae93f9ca3c1545bd26d18f5 +Author: Alan Coopersmith <alan.coopersmith@sun.com> +Date: Thu Jan 14 09:42:34 2010 -0800 + + Update Sun license notices to current X.Org standard form + + Signed-off-by: Alan Coopersmith <alan.coopersmith@sun.com> + +commit 3df6cb34315ebaeb2ce3f341160355650d856518 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jan 10 09:15:24 2010 -0500 + + fetch-test: Various formatting fixes + +commit 7862f9b96e8e8456cc60852790c7f244a5e3425e +Author: Pierre-Loup A. Griffais <pgriffais@nvidia.com> +Date: Wed Jan 6 01:26:07 2010 +0200 + + Interpret the angle of a conical gradient in degrees. + + The conical gradient angle's fixed point degrees to + radians conversion code is missing a factor of pi. + +commit 54f51c4a7595d685397838006ba67473eee47e7d +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Dec 11 11:14:19 2009 -0500 + + region: Enable or disable fatal errors and selfchecks based on version number + + There is a couple of bugs in bugzilla where bugs in the X server + triggered asserts in the pixman region code. It is probably better to + let the X server survive this. (In fact, I thought I had disabled them + for 0.16.0, but apparently not). + + The patch below uses these rules: + + - In _stable_ pixman releases, assertions and selfchecks are turned + off. Assertions, so that the X server doesn't die. Selfchecks, + for performance reasons. + + - In _unstable_ pixman releases, both assertions and selfcheck are + turned on. These releases are what get added to development + distributions such as rawhide, so we want as much self-checking + as possible. + + - In _random git checkouts_, assertions are enabled, so that bugs + are caught, but selfchecks are disabled so that you can use them + for performance work without having to fiddle with turning + selfchecks off. + +commit 91ec7fecc9b16ec1d18c46d08698e6128baaba1d +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Sep 12 03:27:44 2009 -0400 + + Some minor formatting fixes. + +commit 97cf4d494cb6fc0a75eec7d6f06f81ba7644d820 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Dec 16 17:54:41 2009 -0500 + + arm-simd: Whitespace fixes + +commit 28778c997e60af35d26df61fd82860748deb6fab +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Dec 16 17:49:44 2009 -0500 + + mmx: Eliminate trailing whitespace. + +commit c6c43c65f76aa84c57a94155117487199bc0323e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Dec 16 15:23:50 2009 -0500 + + Add 'check' to release-check make target + +commit b3afacf9c970fa7de5ffdebebbd8526a55d3e7d8 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Dec 16 15:15:17 2009 -0500 + + Reorder tests so that they fastest ones run first. + +commit bbc5108bf8cfcb1f2334e51a8e904b5be48376e1 +Author: Marvin Schmidt <marv@exherbo.org> +Date: Sun Nov 15 16:04:09 2009 +0100 + + Build tests and run non-GTK+ ones on `make check` + + Setting TESTS will run the tests on `make check` + + Bug 25131 + +commit 44768320709183a341d219f97c03c5b592a69355 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Dec 10 00:25:58 2009 +0200 + + ARM: added 'neon_combine_add_u' function + +commit f2c7a04c41440b15a5ce1db7ab87dd5bd8c088da +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Dec 10 00:22:12 2009 +0200 + + ARM: added 'neon_combine_over_u' function + +commit 24cd286af6f4507eb9937ced6d9998d296c77a0a +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Dec 9 23:49:04 2009 +0200 + + ARM: macro template for single scanline compositing functions + + Existing template already supports 2D images processing, + but pixman also needs some NEON optimized functions for + improving performance when compositing is decoupled + into "fetch -> process -> store" stages and done via + temporary scanline buffer. That's why a new simplified + template which deals only with the generation of single + scanline processing functions is handy. + +commit ae8d9df6248445170702c244cd60f894aa761267 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Dec 14 19:14:36 2009 +0200 + + Use canonical pixman license notice for recently added ARM NEON assembly files + +commit ce78288d7783a27700223c39e23880f4f425f70b +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Nov 6 02:25:47 2009 +0200 + + ARM: added 'neon_composite_src_pixbuf_8888' fast path + + This is ARM NEON optimized conversion of native RGBA format used by + GTK/GDK into native 32bpp RGBA format used by cairo/pixman. + +commit a732d3baeb0697b91a713fd6b51b68ee7ca68e03 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Nov 5 20:27:38 2009 +0200 + + ARM: added 'neon_composite_src_0888_0565_rev' fast path + + This is ARM NEON optimized conversion of native RGB format used by + GTK/GDK into r5g6b5 format. + +commit a1386a1ceb0c50d2e23cf30be30ea165d2d2ea7c +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Nov 5 19:43:09 2009 +0200 + + ARM: added 'neon_src_0888_8888_rev' fast path + + This is ARM NEON optimized conversion of native RGB format used by + GTK/GDK into native 32bpp RGB format used by cairo/pixman. + +commit 78a60047ac0f85423e0474ef54930e1f537f646b +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Dec 9 11:29:13 2009 +0200 + + ARM: added 'neon_composite_over_n_8888' fast path + +commit 96fd17488f0966d2df53623195810dc640bf5ca6 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Dec 9 11:02:04 2009 +0200 + + ARM: added 'neon_composite_over_n_0565' fast path + +commit 2d332c7a569803107e11b41c7b2c020b4050e26e +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Dec 9 10:33:01 2009 +0200 + + ARM: added 'neon_composite_src_0565_8888' fast path + +commit 062da411d81c7d970a302dd2c283ef5327b867da +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Dec 8 15:04:41 2009 +0200 + + ARM: added 'neon_composite_add_8888_8888_8888' fast path + +commit 3d0eedb5d9af97fed68e2da03d6aee40197e2a76 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Dec 8 14:39:41 2009 +0200 + + ARM: added 'neon_composite_add_8888_8888' fast path + +commit 86b54c6701666d087f0234047128fbf0fd6468b6 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Dec 7 22:53:30 2009 +0200 + + ARM: added 'neon_composite_over_8888_8_8888' fast path + +commit aec1524e773758369ab627553dc5c23d18619a85 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Dec 7 22:42:17 2009 +0200 + + ARM: added 'neon_composite_over_8888_8888_8888' fast path + +commit ba59d53d0b61effc422c4004a9f0e6cf848598d8 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Dec 8 14:13:12 2009 +0200 + + ARM: minor source formatting changes + + Now it's a bit harder to exceed 80 characters line limit + when binding assembly functions. + +commit a47b5167c4c1b55b2f51e29ab0782c2659bec312 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Dec 8 08:52:34 2009 +0200 + + ARM: added '.arch armv7a' directive to NEON assembly file + + This fix prevents build failure due to not accepting PLD instruction when + compiling for armv4 cpu with the relevant -mcpu/-march options set in CFLAGS. + +commit 3fba7dc6fa52bbf01cfc5c4aab1ab06d49a117b2 +Author: Benjamin Otte <otte@gnome.org> +Date: Sat Nov 28 11:38:43 2009 +0100 + + Make test program not throw warnings about undefined variables + +commit 10ab592d57bbeefb3e3297c4a905e5cec233a006 +Author: Benjamin Otte <otte@gnome.org> +Date: Fri Nov 27 22:02:54 2009 +0100 + + Fix bug that prevented pixman_fill MMX and SSE paths for 16 and 8bpp + +commit 7c7b6f5de75a998deaab5d00baf69a895ceba795 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 18 04:26:18 2009 +0200 + + ARM: NEON optimized pixman_blt + + NEON unit has fast access to L1/L2 caches and even simple + copy of memory buffers using NEON provides more than 1.5x + performance improvement on ARM Cortex-A8. + +commit dce6e1bd6840ce0646d8738aaa0927c003dbb361 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu Nov 5 23:15:27 2009 +0200 + + test: support for testing pixbuf fast path functions in blitters-test + +commit 0901ef41fbca2b8fb504c64a2b694bd764770292 +Author: Benjamin Otte <otte@gnome.org> +Date: Sun Nov 22 04:34:01 2009 +0100 + + Remove nonexistant function from header + +commit c97b1e803fc214e9880eaeff98410c8fa37f9ddc +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Nov 20 12:02:50 2009 +0100 + + Post-release version bump + +commit 5a7597f81862ecf7b098ed254fb8e4197ccae3a8 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Nov 20 11:55:40 2009 +0100 + + Pre-release version bump + +commit 95a08dece37080e199e436fa6f2dc02e60d346dd +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Nov 20 09:35:48 2009 +0100 + + Remove stray semicolon from blitters-test.c + + Pointed out by scottmc2@gmail.com in bug 25137. + +commit 6e2c7d54c6786b52ae7dc683d2dbb4c7c033bb09 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Nov 9 14:10:00 2009 +0200 + + C fast path function for 'over_n_1_0565' + + This function is needed to improve performance of xfce4 terminal when + using bitmap fonts and running with 16bpp desktop. Some other applications + may potentially benefit too. + + After applying this patch, top functions from Xorg process in + oprofile log change from + + samples % image name symbol name + 13296 29.1528 libpixman-1.so.0.17.1 combine_over_u + 6452 14.1466 libpixman-1.so.0.17.1 fetch_scanline_r5g6b5 + 5516 12.0944 libpixman-1.so.0.17.1 fetch_scanline_a1 + 2273 4.9838 libpixman-1.so.0.17.1 store_scanline_r5g6b5 + 1741 3.8173 libpixman-1.so.0.17.1 fast_composite_add_1000_1000 + 1718 3.7669 libc-2.9.so memcpy + + to + + samples % image name symbol name + 5594 14.7033 libpixman-1.so.0.17.1 fast_composite_over_n_1_0565 + 4323 11.3626 libc-2.9.so memcpy + 3695 9.7119 libpixman-1.so.0.17.1 fast_composite_add_1000_1000 + + when scrolling text in terminal (reading man page). + +commit 282f5cf8b821a34bab1e32957913ef8d9f9ee43c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Nov 12 17:54:40 2009 -0500 + + Round horizontal sampling points towards northwest. + + This is a similar change as the top/bottom one, but in this case the + rounding is simpler because it's just always rounding down. + + Based on a patch by M Joonas Pihlaja. + +commit f44431986f667eb49571e9365960524361f833c5 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Nov 12 17:20:32 2009 -0500 + + Fix rounding of top and bottom coordinates. + + The rules for trap rasterization is that coordinates are rounded + towards north-west. + + The pixman_sample_ceil() function is used to compute the first + (top-most) sample row included in the trap, so when the input + coordinate is already exactly on a sample row, no rounding should take + place. + + On the other hand, pixman_sample_floor() is used to compute the final + (bottom-most) sample row, so if the input is precisely on a sample + row, it needs to be rounded down to the previous row. + + This commit fixes the rounding computation. The idea of the + computation is like this: + + Floor operation that rounds exact matches down: First subtract + pixman_fixed_e to make sure input already on a sample row gets rounded + down. Then find out how many small steps are between the input and the + first fraction. Then add those small steps to the first fraction. + + The ceil operation first adds (small_step + pixman_e), then runs a + floor. This ensures that exact matches are not rounded off. + + Based on a patch by M Joonas Pihlaja. + +commit 3bea18e3ea587c84423e9f7bafff21150c37d287 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Nov 12 17:03:53 2009 -0500 + + Fix slightly skewed sampling grid for antialiased traps + + The sampling grid is slightly skewed in the antialiased case. Consider + the case where we have n = 8 bits of alpha. + + The small step is + + small_step = fixed_1 / 15 = 65536 / 15 = 4369 + + The first fraction is then + + frac_first = (small_step / 2) = (65536 - 15) / 2 = 2184 + + and the last fraction becomes + + frac_last + = frac_first + (15 - 1) * small_step = 2184 + 14 * 4369 = 63350 + + which means the size of the last bit of the pixel is + + 65536 - 63350 = 2186 + + which is 2 bigger than the first fraction. This is not the end of the + world, but it would be more correct to have 2185 and 2185, and we can + accomplish that simply by making the first fraction half the *big* + step instead of half the small step. + + If we ever move to coordinates with 8 fractional bits, the + corresponding values become 8 and 10 out of 256, where 9 and 9 would + be better. + + Similarly in the X direction. + +commit 98bb0a509f401563b8e6e15f4ee26947e9c3d419 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Sep 12 04:24:04 2009 -0400 + + Delete the flags field from fast_path_info_t + +commit b7fb7e6c700891a12300aaf5c8a4c4b2584a194f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Sep 12 02:47:39 2009 -0400 + + Eliminate NEED_PIXBUF flag. + + Instead introduce two new fake formats + + PIXMAN_pixbuf + PIXMAN_rpixbuf + + and compute whether the source and mask have them in + find_fast_path(). This lead to some duplicate entries in the fast path + tables that could then be removed. + +commit 542b79c30d88788028d391285aa8cd038e96f2b3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Sep 12 02:11:12 2009 -0400 + + Compute src_format outside the fast path loop. + + Inside the loop all we have to do is check that the formats match. + +commit 12108ecbe488d1b65e6787585e1caa57af17a008 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Sep 12 01:53:54 2009 -0400 + + Eliminate the NEED_COMPONENT_ALPHA flag. + + Instead introduce two new fake formats + + PIXMAN_a8r8g8b8_ca + PIXMAN_a8b8g8r8_ca + + that are used in the fast path tables for this case. + +commit 4686d1f53b09b5dd12df6f10f8c0403b2a1e2427 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Sep 12 01:35:56 2009 -0400 + + Eliminate the NEED_SOLID_MASK flag + + This flag was used to indicate that the mask was solid while still + allowing a specific format to be required. However, there is not + actually any need for this because the fast paths all used + _pixman_image_get_solid() which already allowed arbitrary formats. + + The one thing that had to be dealt with was component alpha. In + addition to interpreting the presence of the NEED_COMPONENT_ALPHA + flag, we now also interprete the *absence* of this flag as a + requirement that the mask does *not* have component alpha. + + Siarhei Siamashka pointed out that the first version of this commit + had a bug, in which a NEED_SOLID_MASK was accidentally not turned into + a PIXMAN_solid in the ARM NEON implementation. + +commit 2ef8b394d72d6c13f96347626b09613f805d9f8c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Sep 19 06:14:38 2009 -0400 + + Use the destination buffer directly in more cases instead of fetching. + + When the destination buffer is either a8r8g8b8 or x8r8g8b8, we can use + it directly instead of fetching into a temporary buffer. When the + format is x8r8g8b8, we require the operator to not make use of + destination alpha, but when it is a8r8g8b8, there are no restrictions. + + This is approximately a 5% speedup on the poppler cairo benchmark: + + [ # ] backend test min(s) median(s) stddev. count + + Before: + [ 0] image poppler 6.661 6.709 0.59% 6/6 + + After: + [ 0] image poppler 6.307 6.320 0.12% 5/6 + +commit 13f4e02b1429d62b08487beebd8697887a5a9608 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Nov 10 15:48:36 2009 -0500 + + test: Move image_endian_swap() from blitters-test.c to utils.[ch] + +commit 24e203a8a8394edb3a89f3d6be1bdcab41fbe7f9 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Nov 10 15:45:17 2009 -0500 + + test: Move random number generator from blitters/scaling-test to utils.[ch] + +commit cc34554652bf9a402127fa06a03105b49a425895 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Nov 10 15:32:12 2009 -0500 + + test: In scaling-test use the crc32 from utils.c + +commit b465b8b79dc008f4f4dcddf45754fef260e51619 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Nov 10 15:29:20 2009 -0500 + + test: Move CRC32 code from blitters-test to new files utils.[ch] + +commit 56bd91340102e915a239d2afa1db223109cf6639 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Nov 10 14:58:19 2009 -0500 + + test: Rename utils.[ch] to gtk-utils.[ch] + +commit 7be529f3bd6455259e24163a27a0a5a761ee0cc3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Sep 20 17:37:36 2009 -0400 + + sse2: Add a fast path for OVER 8888 x 8 x 8888 + + This is a small speedup on the swfdec-youtube benchmark: + + Before: + [ 0] image swfdec-youtube 5.789 5.806 0.20% 6/6 + + After: + [ 0] image swfdec-youtube 5.489 5.524 0.27% 6/6 + + Ie., approximately 5% faster. + +commit abefe68ae2a422fecf315f17430c0cda5561be66 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 4 17:14:14 2009 +0200 + + ARM: enabled 'neon_composite_add_8000_8000' fast path + +commit 635f389ff477a0afe82c6038a835e262d5034d99 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 4 17:13:31 2009 +0200 + + ARM: enabled 'neon_composite_add_8_8_8' fast path + +commit 7e1bfed6767774a43c288ab780f62a20eccff805 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 4 17:12:56 2009 +0200 + + ARM: enabled 'neon_composite_add_n_8_8' fast path + +commit deeb67b13a0f9267b59d9755e7a0102da29a6747 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 4 17:12:14 2009 +0200 + + ARM: enabled 'neon_composite_over_8888_8888' fast path + +commit f449364849b2cc75a48cc3b35d2a373d38b71c09 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 4 17:11:32 2009 +0200 + + ARM: enabled 'neon_composite_over_8888_0565' fast path + +commit 2dfbf6c4a520da4647bb480a124dfe5cbece225b +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 4 17:10:55 2009 +0200 + + ARM: enabled 'neon_composite_over_8888_n_8888' fast path + +commit 43824f98f1fc41d923dd8ddd97e74942c01aadf8 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 4 17:10:09 2009 +0200 + + ARM: enabled 'neon_composite_over_n_8_8888' fast path + +commit 189d0d783cc62aa3b739218689042c9235c04fa1 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 4 17:09:31 2009 +0200 + + ARM: enabled 'neon_composite_over_n_8_0565' fast path + +commit cccfc87f4f597f99b74691af172126a2346f9239 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 4 17:08:48 2009 +0200 + + ARM: enabled 'neon_composite_src_0888_0888' fast path + +commit e89b4f8105beaa27b6098a5dc7dfec62879ebd1d +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 4 17:08:09 2009 +0200 + + ARM: enabled 'neon_composite_src_8888_0565' fast path + +commit 2d54ed46fb7428aa1d9f114450554fc33acff2c4 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 4 17:07:36 2009 +0200 + + ARM: enabled 'neon_composite_src_0565_0565' fast path + +commit 5d695cb86eaad151c9402ead5dfb7e867ff58d29 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 4 17:05:46 2009 +0200 + + ARM: added 'bindings' for NEON assembly optimized functions + + These functions serve as 'adaptors', converting standard internal + pixman fast path function arguments into arguments expected + by assembly functions. + +commit dcfade3df96559ce942df5d16b7915c94f7d9e57 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 4 15:29:27 2009 +0200 + + ARM: enabled new implementation for pixman_fill_neon + +commit bcb4bc79321659635d706bade25851cddf563856 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 4 15:18:38 2009 +0200 + + ARM: introduction of the new framework for NEON fast path optimizations + + GNU assembler and its macro preprocessor is now used to generate + NEON optimized functions from a common template. This automatically + takes care of nuisances like ensuring optimal alignment, dealing with + leading/trailing pixels, doing prefetch, etc. + + Implementations for a lot of compositing functions are also added, + but not enabled. + +commit 1eff0ab487efe4720451b8bd92c8423b9772a69a +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Nov 4 14:25:27 2009 +0200 + + ARM: removed old ARM NEON optimizations + +commit b8898d77d0e7cc1c50321fcb216af3ba6c634959 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Nov 7 14:47:22 2009 -0500 + + Define PIXMAN_USE_INTERNAL_API in pixman-private.h + + Instead of mucking around with CFLAGS in configure.ac, preventing + users from setting their own CFLAGS, just define the + PIXMAN_USE_INTERNAL_API and PIXMAN_DISABLE_DEPRECATED in + pixman-private.h + +commit 67bf739187cd43b5fff754b25693f76bb788d1fa +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Oct 27 09:11:28 2009 -0400 + + Include <inttypes.h> when compiled with HP's C compiler. + + Fixes bug 23169. + +commit 384fb88b905823e62c1f1733a20073bfea15d411 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Oct 27 12:25:13 2009 +0200 + + C fast path function for 'over_n_1_8888' + + This function is needed to improve performance of xfce4 terminal. + Some other applications may potentially benefit too. + +commit a2985da94704af050b4422dca70fd2dd770faba4 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Oct 27 12:11:05 2009 +0200 + + C fast path function for 'add_1000_1000' + + This function is needed to improve performance of xfce4 terminal. + Some other applications may potentially benefit too. + +commit 5f429e45106d79c48ee102987ef84be54fd421d8 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Oct 23 20:56:30 2009 +0300 + + blitters-test updated to also randomly generate mask_x/mask_y + +commit 0d5562747ce25ecac06f4c44e935662eb6ee328a +Author: André Tupinambá <andrelrt@gmail.com> +Date: Sat Sep 19 23:01:50 2009 -0400 + + Add fast path scaled, bilinear fetcher. + + This adds a bilinear fetcher for the case where the image has a scaled + transformation, does not repeat, and the format {ax}8r8g8b8. + + Results for the swfdec-youtube benchmark + + Before: + + [ # ] backend test min(s) median(s) stddev. count + [ 0] image swfdec-youtube 7.841 7.915 0.72% 6/6 + + After: + + [ # ] backend test min(s) median(s) stddev. count + [ 0] image swfdec-youtube 6.677 6.780 0.94% 6/6 + + These results were measured on a faster machine than the ones in the + previous commit, so the numbers are not comparable. + + Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com> + +commit 88323c5abe68906472049537b54b0e7eea343f43 +Author: André Tupinambá <andrelrt@gmail.com> +Date: Sat Sep 19 09:32:37 2009 -0400 + + Speed up bilinear interpolation. + + Speed up bilinear interpolation by processing more than one component + at a time on 64 bit architectures, and by precomputing the dist{ixiy} + products on 32 bit architectures. + + Previously bilinear interpolation for one pixel would take 24 + multiplications. With this improvement it takes 12 on 64 bit, and 20 + on 32 bit. + + This is a small but consistent speedup on the swfdec-youtube + benchmark: + + [ # ] backend test min(s) median(s) stddev. count + Before: + [ 0] image swfdec-youtube 18.010 18.020 0.09% 4/5 + + After: + [ 0] image swfdec-youtube 17.488 17.584 0.22% 5/6 + + Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com> + +commit f0c157f888185279681bad305973f246dca2e535 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Sep 27 09:41:25 2009 -0400 + + Extend scaling-test to also test bilinear filtering. + +commit eab882ef38509bfc9719fcee6020d882ee030694 +Author: Jeremy Huddleston <jeremyhu@freedesktop.org> +Date: Wed Oct 21 12:47:27 2009 -0700 + + This is not a GNU project, so declare it foreign. + + On Wed, 2009-10-21 at 13:36 +1000, Peter Hutterer wrote: + > On Tue, Oct 20, 2009 at 08:23:55PM -0700, Jeremy Huddleston wrote: + > > I noticed an INSTALL file in xlsclients and libXvMC today, and it + > > was quite annoying to work around since 'autoreconf -fvi' replaces + > > it and git wants to commit it. Should these files even be in git? + > > Can I nuke them for the betterment of humanity and since they get + > > created by autoreconf anyways? + > + > See https://bugs.freedesktop.org/show_bug.cgi?id=24206 + + As an interim measure, replace AM_INIT_AUTOMAKE([dist-bzip2]) with + AM_INIT_AUTOMAKE([foreign dist-bzip2]). This will prevent the generation + of the INSTALL file. It is also part of the 24206 solution. + + Signed-off-by: Jeremy Huddleston <jeremyhu@freedesktop.org> + +commit dc46ad274a47d351bacf3c2167c359d23dbaf8b3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Oct 19 20:32:37 2009 -0400 + + Make walk_region_internal() use 32 bit dimensions + +commit bb3698d47925db77925810c3128be1641f455c60 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Oct 19 20:31:54 2009 -0400 + + Make pixman_compute_composite_region32() use 32 bit dimensions + +commit 895c281c4094844f9f955621e4ac1e4394d865f0 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Oct 19 20:30:22 2009 -0400 + + Change prototype of _pixman_walk_composite_region from int16_t to int32_t + +commit 9cd470665b1bb7e0cb810f2457d3788f7c981072 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Oct 19 20:27:36 2009 -0400 + + Remove unused color_table and color_table_size fields + +commit 8186937637f25088e61c22a3ce1740a56f5d6e13 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Oct 18 03:02:28 2009 -0400 + + Remove BOUNDS() macro. + + It was bounding the clip region to INT16_MIN, INT16_MAX, but this was + a relic from the X server. We don't need it since we are already + restricting the clip region to the geometry of the destination. + +commit 9bcfc0ac547277d3a3f4e5ff0922450566ad8be8 +Author: Benjamin Otte <otte@gnome.org> +Date: Wed Sep 30 08:02:39 2009 +0200 + + --enable-maintainer-mode is gone from configure, so remove it + +commit fa49ef81f7b39d32b626ed235958448835e2d2c2 +Author: Benjamin Otte <otte@gnome.org> +Date: Thu Sep 17 13:19:04 2009 +0200 + + Add default cases for all switch statements + + Fixes compilation with -Wswitch-default. Compilation with -Wswitch-enums + works fine as is. + +commit 5c3ef4e9798f3395c55fe7b57df32f77c0de2c71 +Author: Benjamin Otte <otte@gnome.org> +Date: Thu Sep 17 13:18:22 2009 +0200 + + Fix compile warnings + +commit ad484078854572cf640d7ffbb66f1e99328e79b8 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Jul 27 01:21:26 2009 +0300 + + ARM: Removal of unused/broken NEON code + +commit 358f96d20219b4460bfd8ecf88e69ff10044b577 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Oct 8 13:01:27 2009 -0400 + + Fix double semicolon; pointed out by Travis Griggs + +commit 93acc10617c88fbf933120c6980ae8ef80cf94f0 +Author: Gerdus van Zyl <gerdusvanzyl@gmail.com> +Date: Tue Sep 29 12:28:03 2009 +0200 + + Fix build with Visual Studio 2008 + + moved __m64 ms declaration in sse2_composite_over_x888_8_8888 to top + of function so it compiles with visual studio 2008 + +commit f135f74ff3a4f55262b611b452566daff5e936ce +Author: Andrea Canciani <ranma42@gmail.com> +Date: Sun Sep 27 11:40:52 2009 +0200 + + Fix composite on big-endian systems. + + Data narrower than 32bpp is padded to an unsigned long and on + big-endian systems this shifts the value by the padding bits. + +commit 15c14691a71daa29c86cce40ac0b4a14acf8f2fc +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Sep 26 13:12:14 2009 -0400 + + Fix fetch-test for big-endian systems. + + Data narrower than 32bpp should be stored in the correct + endian. Reported by Andrea Canciani. + +commit 02d70998885065bcea55cb2a8bfa75473083bc17 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Sep 24 08:57:26 2009 -0400 + + Add missing break in composite.c + +commit 8ce004af3670a183c78fc7c61fbfcfd8c7f17e54 +Author: Guillem Jover <guillem@hadrons.org> +Date: Tue Sep 22 19:51:13 2009 +0200 + + pixman: Update .gitignore + + Generalize to catch all .pc files. Add more tests. + + Signed-off-by: Guillem Jover <guillem@hadrons.org> + +commit 59e877cffe6497d865031d79e9a742414407d544 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Sep 24 08:10:00 2009 -0400 + + In the compositing test, Don't try to use component alpha with solid fills. + + It's not supported yet. + +commit 16adb09c8a003936a1ef17042776a725c9aa6813 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Sep 18 11:33:18 2009 -0400 + + Update CRC value in blitters-test for the new bug fixes + +commit e156964d3e005be3dbc9ff80580d98c6dd617afd +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Sep 18 08:16:56 2009 -0400 + + Fix bug in blitters-test with BGRA formats. + + When masking out the x bits, blitter-test would make the incorrect + assumption that the they were always in the topmost position. This is + not correct for formats of type PIXMAN_TYPE_BGRA. + +commit eb72bfb97d10283964c070f0a0e26f0520a22ff3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Sep 18 09:43:14 2009 -0400 + + Fix bugs in fetch_*_b2g3r3(). + + The red channel should only be shifted five positions, not six. + +commit b4f6113cb975110c33f607aa39d19290f58be398 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Sep 24 07:48:46 2009 -0400 + + Fix bugs in a1b2g1r1. + + The first bug is that it is treating the input as if it were a1r1g1b1; + the second one is that the red channel should only be shifted two + bits, not three. + +commit efdf15e677d506c2049a34e92eb2172712101afa +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Sep 18 08:48:04 2009 -0400 + + Fix shift bug in fetch_scanline/pixel_a2b2g2r2() + + 0x30 * 0x55 is 0xff0, so the red channel should be shifted four bits, + not six. + +commit 679c2dabda094491599ce770ddba11611d08efc8 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Sep 18 08:13:46 2009 -0400 + + Fix four bit formats. + + The original Render code used to index pixels with their position in + bits in the image. When the scanline code was introduced pixels were + indexed in bytes, but the FETCH/STORE_4/8 macros still assumed bits. + + This commit fixes that by making the FETCH/STORE_4 macros first + convert the index to bit position. + +commit 3d1714cd1f8ae7d47ad5f01a1140133ae71a00e2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Sep 20 16:50:37 2009 -0400 + + Hide PIXMAN_OP_NONE and PIXMAN_N_OPERATORS behind PIXMAN_INTERNAL_API. + + These cannot sanely be used by applications since they may change in + new versions. + +commit 0683f34c418bc5fb2fa5e2a41bdc102195edbe67 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Sep 18 08:06:32 2009 -0400 + + Add a few notes about testing to TODO + +commit 48ba7d946146ea7b0e33e963942bedde22a3b806 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Sep 18 09:11:04 2009 -0400 + + Fix alpha handling for 10 bpc formats. + + These generally extracted the 2 bits of alpha, then shifted them 62 + bits and replicated across 16 bits. Then they were shifted another 48 + bits, making the resulting alpha channel 0. + +commit c673c83e070ed2392c00716fe20a80a798588b39 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Sep 24 05:22:33 2009 -0400 + + Return result from pixman_image_set_transform(). + + Previously it would always return TRUE, even when malloc() had failed. + +commit eb16d171882d200a390345ec8a3db1b931e9676a +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Sep 15 07:43:23 2009 -0400 + + Revert "Enable component alpha on solid masks." + + For consistency we will probably want to allow component alpha to be + set on all masks at some point, but this commit only enabled it for + solid images. + + This reverts commit 29e22cf38e8abc54b9dddbdeb3909d02866a82a0. + +commit b96e37f8d0f5c94f5d117e1622d7cae7246d6345 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Sep 15 13:16:17 2009 +0100 + + [Makefile] Set the SIMD specific CFLAGS for inspecting asm. + +commit 273e89750b3ce901fa6769a835fa441ee986d508 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Sep 14 18:48:32 2009 -0400 + + Remove optimization for 0xffffffff and 0xff the add_n_8888_8888_ca fast path + + This is an ADD operation, not an OVER. Fixes bug 23934, reported by + Siarhei Siamashka. + +commit ec7c1affcc66c12af1fc29fd000f9885a5d48320 +Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> +Date: Mon Sep 14 22:52:29 2009 +0300 + + Don't prefetch from NULL in the SSE2 fast paths. + + On an Athlon64 box prefetch from NULL slows down + the rgba OVER rgba fast for predominantly solid sources + by up to 3.5x in the one-rounded-rectangle test case + when run using a tiling polygon renderer. This patch + conditionalises the prefetches of the mask everywhere + where the mask pointer may be NULL in a fast path. + +commit 1b5269a585d8dcdb8f5ff9f71113bcf2d5efab26 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Sep 14 06:58:03 2009 -0400 + + Reformat test/composite.c to follow the standard coding style. + +commit 0431a0af6c566c8990c88dc22fd0dc76fdd72cf2 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Sep 13 18:02:10 2009 +0100 + + [test] Exercise repeating patterns for composite. + +commit c28e39f17a87cdaa7ce43ec99f2f764cc935f484 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Sep 13 15:04:30 2009 +0100 + + [build] Add rule to generate asm for inspection. + +commit 823bb1a9430bc0c4735ffefbbe19efe45887e32c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Sep 13 15:04:54 2009 +0100 + + [sse2] Don't emit prefetch 0 for an absent mask + +commit 8f2daa7ca25de754522abfb9ed1158d090f00780 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Sep 13 15:07:08 2009 +0100 + + [test] Add composite test from rendercheck + + Iterate over all destination formats for dst, src and composite and + compare the result of all oprators with a selection of colours. + +commit cda0ee5165812b86a052ceb01830a1d42d02a03b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Aug 27 09:19:14 2009 +0100 + + build: Suppress verbose compile lines + + Compile warnings are being lost in the sea of noise. Automake-1.11 finally + introduced AM_SILENT_RULES to suppress the echoing of the compile line for + every object. Enable this to bring sanity to the pixman build. + +commit 56cc06f89b7db733e5036a00df7aea27cf8d0951 +Merge: 8aff99e 8035df8 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Sep 13 16:32:27 2009 +0100 + + Merge branch '0.16' + + Conflicts: + configure.ac + pixman/pixman-sse2.c + +commit 8035df8bcb01c2df42b8adf8b96c7ac796f384cc +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Aug 16 12:16:46 2009 +0100 + + Remove duplicated declaration + + The pixman_tranform_pixman_f_transform() declaration is repeated 4 lines + down. + +commit 29e22cf38e8abc54b9dddbdeb3909d02866a82a0 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Sep 13 16:26:29 2009 +0100 + + Enable component alpha on solid masks. + +commit 9fe2628702785e8db45593709c0aec54043a50e7 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sun Sep 13 16:26:52 2009 +0100 + + [sse2] Bit-reversing typo: src != dst + +commit 8aff99e231dcb83fa6c08e760711b0a1e979d012 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Sep 10 21:33:24 2009 -0400 + + Fix off-by-one error in source_image_needs_out_of_bounds_workaround() + + If extents->x2/y2 are equal to image->width/height, then the clip is + still inside the drawable, so no workaround is necessary. + +commit fefe2a5d24591846281bb9bf0e85d42822e1716e +Author: Gaetan Nadon <memsize@videotron.ca> +Date: Tue Sep 8 20:06:19 2009 -0400 + + Remove unused generated libcomp.pc #23801 + +commit 2186bc89486f9f11161b0db280a869c6849c867e +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Sep 4 14:14:00 2009 +0300 + + Change CFLAGS order for PPC and ARM configure tests + + CFLAGS are always appended to the end of gcc options when compiling + sources in autotools based projects. Configure tests should do the + same. Otherwise build fails on PPC when using CFLAGS="-O2 -mno-altivec" + for example. Similar problem affects ARM. + +commit 15304e3cddd6568ba6d5d1d3030568c3db7b05cc +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Sep 2 19:46:47 2009 +0300 + + ARM: Remove fallback to ARMv6 implementation from NEON delegate chain + + This can help to fix build problems with '-mthumb' gcc option in CFLAGS. + ARMv6 optimized code can't be compiled for thumb (because of its inline + assembly) and gets automatically disabled in configure. Reference + to it from NEON optimized code resulted in linking problems. + + Every ARMv6 optimized fast path function also has a better NEON + counterpart, so there is no need to fallback to ARMv6. Shorter + delegate chain should additionally result in a bit better performance. + +commit 2679d93e22b4f3922a39bc53680f1aab6ea7c73c +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Sep 4 14:14:00 2009 +0300 + + Change CFLAGS order for PPC and ARM configure tests + + CFLAGS are always appended to the end of gcc options when compiling + sources in autotools based projects. Configure tests should do the + same. Otherwise build fails on PPC when using CFLAGS="-O2 -mno-altivec" + for example. Similar problem affects ARM. + +commit 91232ee40d8499cb91ad27717f751f15c805b4a6 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Sep 2 19:46:47 2009 +0300 + + ARM: Remove fallback to ARMv6 implementation from NEON delegate chain + + This can help to fix build problems with '-mthumb' gcc option in CFLAGS. + ARMv6 optimized code can't be compiled for thumb (because of its inline + assembly) and gets automatically disabled in configure. Reference + to it from NEON optimized code resulted in linking problems. + + Every ARMv6 optimized fast path function also has a better NEON + counterpart, so there is no need to fallback to ARMv6. Shorter + delegate chain should additionally result in a bit better performance. + +commit 61b616067c3e8b2ff84fbf57f479a90cc9fa5344 +Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> +Date: Mon Aug 31 23:02:53 2009 +0100 + + Default to optimised builds when using a Sun Studio compiler. + + Autoconf's AC_PROG_CC sets the default CFLAGS to -O2 -g for + gcc and -g for every other compiler. This patch defaults + CFLAGS to the equivalent -O -g when we're using Sun Studio's cc + if the user or site admin hasn't already set CFLAGS. + +commit 20acda6fde8441e18aab33980a33b099a16063eb +Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> +Date: Mon Aug 31 20:27:32 2009 +0100 + + Work around a Sun Studio 12 code generation bug involving _mm_set_epi32(). + + Calling a static function wrapper around _mm_set_epi32() when not + using optimisation causes Sun Studio 12's cc to emit a spurious + floating point load which confuses the assembler. Using a macro wrapper + rather than a function steps around the problem. + +commit e30c0037d44bf76a26182080be24c7037d7be5b5 +Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> +Date: Mon Aug 31 20:24:04 2009 +0100 + + Work around differing _mm_prefetch() prototypes on Solaris. + + Sun Studio 12 expects the address to prefetch to be + a const char pointer rather than a __m128i pointer or + void pointer. + +commit 29e7d6063f7b93dd4fde3d42a2931ec0f55158c3 +Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> +Date: Mon Aug 31 23:02:53 2009 +0100 + + Default to optimised builds when using a Sun Studio compiler. + + Autoconf's AC_PROG_CC sets the default CFLAGS to -O2 -g for + gcc and -g for every other compiler. This patch defaults + CFLAGS to the equivalent -O -g when we're using Sun Studio's cc + if the user or site admin hasn't already set CFLAGS. + +commit e7018685f0618640221ebc61446ee98ea3056bbb +Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> +Date: Mon Aug 31 20:27:32 2009 +0100 + + Work around a Sun Studio 12 code generation bug involving _mm_set_epi32(). + + Calling a static function wrapper around _mm_set_epi32() when not + using optimisation causes Sun Studio 12's cc to emit a spurious + floating point load which confuses the assembler. Using a macro wrapper + rather than a function steps around the problem. + +commit 04ade7b68c620a62daff6212eee4d1b96bfbc3c9 +Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> +Date: Mon Aug 31 20:24:04 2009 +0100 + + Work around differing _mm_prefetch() prototypes on Solaris. + + Sun Studio 12 expects the address to prefetch to be + a const char pointer rather than a __m128i pointer or + void pointer. + +commit 698b686d58c510e1b8a9183750d00cbd9ed504b2 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Aug 28 22:34:21 2009 +0300 + + ARM: workaround for gcc bug in vshll_n_u8 intrinsic + + Some versions of gcc (cs2009q1, 4.4.1) incorrectly reject + shift operand having value >= 8, claiming that it is out of + range. So inline assembly is used as a workaround. + +commit 3e228377f9e7159a52a3716d8e4930c5a9dbb1af +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Aug 28 22:34:21 2009 +0300 + + ARM: workaround for gcc bug in vshll_n_u8 intrinsic + + Some versions of gcc (cs2009q1, 4.4.1) incorrectly reject + shift operand having value >= 8, claiming that it is out of + range. So inline assembly is used as a workaround. + +commit 632125d4108f9a53d625a6b997832fa45a295807 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 2 08:27:33 2009 -0400 + + Enable the x888_8_8888 sse2 fast path. + +commit b02b644d7017f794be2296c6354e44fd119d2477 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Sep 2 16:09:32 2009 -0400 + + Set version number to 0.16.1 + +commit d465f854b3d5f2ffcc122aebfbead2d64cca7169 +Author: Makoto Kato <m_kato@ga2.so-net.ne.jp> +Date: Tue Sep 1 10:59:05 2009 +0900 + + Add CPU detection for VC++ x64 + + VC++ x64 has no inline assembler and x64 mode supports SSE2. + So, it is unnecessary to call cpuid. + +commit 097342a65d81fb957dfc17486f615f887540e146 +Author: Makoto Kato <m_kato@ga2.so-net.ne.jp> +Date: Tue Sep 1 10:59:05 2009 +0900 + + Add CPU detection for VC++ x64 + + VC++ x64 has no inline assembler and x64 mode supports SSE2. + So, it is unnecessary to call cpuid. + +commit 64085c91b6a1deca4007b18d63b707b896653ee9 +Author: Søren Sandmann Pedersen <ssp@dhcp-100-3-19.bos.redhat.com> +Date: Tue Sep 1 08:23:23 2009 -0400 + + Change names of add_8888_8_8 fast paths to add_n_8_8 + + The source is solid in those. + +commit 7af985a69a9147e54dd5946a8062dbc2e534b735 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 28 08:14:04 2009 -0400 + + Post-release version bump + +commit 57812465bf975e943e78d59dabbd5e6bb2ad87ef +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 28 07:55:30 2009 -0400 + + Pre-release version bump + +commit 9e1a34a0d177e8c2381f419b0a04310da8cdde2b +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Aug 28 06:31:06 2009 -0400 + + _pixman_run_fast_path: typo + + This is one example of a compiler warning that was lost amit the build + noise. + + The error here is that in a list of required conditions we used ';' + instead of '&&' with the result of continuing to use the fast-path + even if we had a wide mask. + + Another error is that it was testing src, not mask as it should. + +commit 83d607cbf1d3852f91b52a427bee30fffc3029e7 +Author: Makoto Kato <m_kato@ga2.so-net.ne.jp> +Date: Fri Aug 28 04:09:15 2009 -0400 + + Remove spurious spaces in pixman-x64-mmx-emulation.h + +commit ce966f4d8e8aa9a4465e2ab28666bae891194a72 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Aug 12 14:08:58 2009 -0400 + + Check if we have posix_memalign() in configure.ac. [23260, 23261] + + Fall back to malloc() in blitters-test.c if we don't. + +commit 29c2ae4a0cf924cb011467687a4c43237fb2316c +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Aug 12 20:22:24 2009 +0300 + + ARM: a fix to pass blitters-test for 'neon_composite_over_n_8_0565' + + Inline assembly for handling <8 pixels width did not pass blitters-test. + Fortunately gcc has no problems compiling alternative implementation + which is using RVCT style intrinsics, so it can be used instead. + +commit ed4598f08a359346d14fe5b2bbb7b74c40332b18 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Aug 11 14:03:24 2009 -0400 + + Post-release version bump + +commit bfdae053eaff528aa869bc05a0520ab22d540a08 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Aug 11 13:56:16 2009 -0400 + + Pre-release version-bump + +commit d6016d406a649f7a95bec2a477dfd89ba280188d +Merge: 93923c6 e084351 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Aug 11 02:04:40 2009 -0400 + + Merge branch 'blitter-test' + +commit 93923c626f2df18e29e215410e6d4bb2f5c1616f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Aug 10 20:47:36 2009 -0400 + + Fix x/y mixup in bits_image_fetch_pixel_convolution() + + Bug 23224, reported by Michel Dänzer. + +commit e084351b13faad6a3ba67808b5721957b51d16f0 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 00:45:53 2009 -0400 + + Update CRC value in blitters-test. + + At this point, the SIMD, SSE2, MMX and general implementations all + agree. + +commit ba5c5325e77b36374d3be22bd92816c332a321bb +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 00:25:56 2009 -0400 + + Various formatting fixes + +commit cc5c59b3f25dc595ce17a876e89ca549bb477e46 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Aug 5 16:28:10 2009 -0400 + + Add the ability to print intermediate CRC values + +commit 0bc4adae3eb758a2cd9026397c284bd6cc7bcd65 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Aug 5 15:53:33 2009 -0400 + + Reenable commented-out tests in blitter-test. + + The crashes and valgrind issues are all fixed at this point. + +commit 9ee18806a944ddde36dc1b045f89f02d025cbe48 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Aug 3 00:01:01 2009 +0300 + + One more update to blitters-test - use aligned memory + allocations in order in order to make reproducibility + of alignment sensitive bugs more deterministic + Also testing of masks is reenabled + +commit 4fc0f9dd456bb4ad1f47e1733b02a3b491f425ed +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sat Aug 1 02:20:12 2009 +0300 + + HACK: updated test to better cover new neon optimizations + +commit 67769ad5bf15450d0fd0d83643e3533a9f563916 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Jul 22 01:29:51 2009 +0300 + + Test program for stressing the use of different formats and operators + + The code and overall method is mostly based on scaling-test. This one + focuses on trying to stress as many different color formats and types + of composition operations as possible. + + This is an initial implementation which may need more tuning. Also + not all color format and operator combinations are actually used. + + When cpu specific optimizations are disabled, this test provides + identical deterministic results on x86, PPC and ARM. + + Script blitters-test-bisect.rb now works in non-stop mode, until + it finds any problem. This allows to run it for example overnight + in order to test a lot more variants of pixman calls and increase + chances of detecting problems in pixman. Just like with scaling-test, + running blitters-test binary alone with no command line arguments + runs a small predefined number of tests and compares checksum + with a reference value for quick verification. + +commit 51f597ad3258dd85b4620ac2bf0df8ca2e0ed182 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 12:00:07 2009 -0400 + + Delete commented out code in pixman-vmx.c + +commit a590eabead0a0c405a7293d8689b9992de5a689b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 11:53:50 2009 -0400 + + Misc formatting fixes for pixman-vmx.c + +commit 0ebb587e2460024fb306597799ae4974441511ec +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 11:48:22 2009 -0400 + + In vmx_combine_atop_reverse_ca() extract alpha after inversing + +commit 3d2f00783f2972ba5311937057ea8d452f942a36 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 11:46:09 2009 -0400 + + Really fix vmx_combine_over_reverse_ca() + + The inverse destination alpha is just one component, not four. + +commit 2f62a4f46c1e99ddb1b7ca6d5db9410d12f32e63 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 11:40:42 2009 -0400 + + Fix vmx_combine_out_reverse_ca() + + The source alpha is just one component, not four. + +commit 7e58323385e442fb2cea207780db5e30be88be96 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 11:38:03 2009 -0400 + + Fix vmx_over_reverse_ca() + + Destination alpha must be extracted after inversing, otherwise we end + up with 0xFFs in the rgb channels. + +commit 2382bd9e2724944a05ce8a581e9ddc31e299a0c6 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 11:35:20 2009 -0400 + + Multiply with the alpha of dest, not inverse alpha + +commit 498df0f0bf2437130ed305fb757ae0fae90bebb7 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 11:32:31 2009 -0400 + + Fix vmx_combine_vmx_atop_ca() + + It didn't compute the mask correct before. + +commit 9650cd7432ef03c05895df04940e2ab6245f2618 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 11:26:23 2009 -0400 + + Fix vmx_combine_over_ca(). + + In the non-vector code, the mask needs to be multiplied with source + alpha. + +commit 38b9589fe6b14c822a2a4000df364d132e390f7a +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 11:21:43 2009 -0400 + + In vmx_combine_out_ca() multiply with the alpha of the negated vdest. + +commit de180baba3a3e7eedeb09ff7d5f4d3eff3ffc6f4 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 11:16:31 2009 -0400 + + Fix vmx_combine_out_ca() + + It should multiply with just the destination alpha channel, not all + four channels. + +commit 5191421d1f143cca76afa1f4fbffa68f89a5d393 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 11:07:16 2009 -0400 + + Do the full four-component IN computation in vmx_combine_in_ca(). + +commit 27fb8378fdae930475cf4528c539a78bfbd751c5 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 10:54:16 2009 -0400 + + Fix bug in vmx_combine_xor_ca() + + The destination needs to be inverted before the alpha channel is + extracted; otherwise, the RGB channels of da will be 0xff. + +commit c750667d7ac542dfa922a7970961b7095b44b8d3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 01:07:01 2009 -0400 + + Make pix_multiply bit-exact + +commit 6243a0a015043f39531b98b9e8c4167f8bd47d82 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Aug 6 23:50:32 2009 -0400 + + Change the SSE2 versions of pix_add_multiply() to produce bit-exact results. + +commit 404f4a6f3e71de5e411cb3bb1107d8ffb7c52e62 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Aug 6 23:52:11 2009 -0400 + + Fix a couple of alpha==0 vs src==0 issues in pixman-sse2.c + +commit d9f80370a4d2ab54688e75256b3ea4267d8cc602 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Aug 6 23:05:36 2009 -0400 + + Rename mmx_composite_add_8888_8_8() to mmx_composite_add_n_8_8(). + +commit 04619c3636697684fdd9ada9842845f6c8dd3914 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Aug 6 22:46:50 2009 -0400 + + Fix a couple more alpha==0 vs src==0 bugs in pixman-mmx.c + +commit a075a870fd7e1fa70ae176d5089c695011667388 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Aug 6 22:42:25 2009 -0400 + + Make pix_add_mul() in pixman-mmx.c produce exact results. + + Previously this routine would compute (x * a + y * b) / 255. Now it + computes (x * a) / 255 + (y * b) / 255, so that the results are + bitwise equivalent to the non-mmx versions. + +commit f7463ffafb8876c1f47ed9c527df33d45255e16c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Aug 6 20:29:44 2009 -0400 + + Rewrite the two-component arithmetic macros. + + Previously they were not bit-for-bit equivalent to the one-component + versions. The new code is also simpler and easier to read because it + factors out some common sub-macros. + + The x * a + y * b macro now only uses four multiplications - the + previous version used eight. + +commit 04ae08992f6381a8ffb50d8cba37753fdb58e3bf +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Aug 6 20:41:04 2009 -0400 + + Fix a bunch of srca == 0 checks that should be src == 0 in pixman-mmx.c + +commit 8bb58a3ce83d6b9c1f6796ce8e62450bdaa52cf0 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Aug 5 21:24:50 2009 -0400 + + Don't run fast paths if the format requires wide compositing. + + This could happen because the wide formats would still be considered + solid if the image was 1x1 and repeating. + +commit d937b622389797a8c605b2cc50c24ca759dc57d2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Aug 5 21:16:14 2009 -0400 + + Fix bug in combine_mask_alpha_ca() + + If the mask was 0xffffffff, the source would end up being shifted + twice by A_SHIFT. + +commit 0d576b965c34a6d89b00f7b93dba6a7b8737c731 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Aug 5 20:40:36 2009 -0400 + + Fix another case of changing the solid source. + + This time in fast_path_composite_n_8888_8888(). + +commit 8b82cbb69197f9c367069a77ba992f3163d40230 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Aug 5 20:31:41 2009 -0400 + + Fix incorrect optimization in combine_over_ca(). + + Previously the code assumed that an alpha of 0 meant that no change + would take place. This is incorrect because an alpha of 0 can happen + as the result of the source having alpha=0, but rgb != 0. + +commit ec8b36f01030fd2fa67595f2aef4ca568b060899 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Aug 5 18:18:37 2009 -0400 + + Don't change the constant source in fast_composite_over_n_8888_0565. + +commit de8fff746bfa80278f85859bef2dc0ab166f7a69 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Aug 5 16:17:52 2009 -0400 + + Fix bugs in combine_over_reverse_ca(). + + The computation cannot be optimized away when alpha is 0 because that + can happen when the source has alpha zero and rgb non-zero. + +commit 7b1df41b6110424b8dca9fa655dbc8dd95a76882 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jul 31 17:27:38 2009 -0400 + + Add a dirty bit to the image struct, and validate before using the image. + + This cuts down the number of property_changed calls significantly. + +commit 942c4ac28209381668208a39ccc9aec4f11bf63f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jul 31 10:39:41 2009 -0400 + + Add sse2 version of add_n_8888_8888() + +commit 23d38201165876c031d314f73e09a75afcac4f00 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jul 31 10:26:10 2009 -0400 + + Add a fast path for the add_n_8888_8888() operation. + + It shows up on gnome-terminal traces. + +commit c606a05213d1fe5d73b39454407414a2a245da39 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jul 31 07:29:31 2009 -0400 + + Move bounds checks for REPEAT_NONE to get_pixel() + + On a P4, this is a large speedup for the swfdec-fill-rate-2xaa trace: + + After: + + [ # ] backend test min(s) median(s) stddev. count + [ 0] image swfdec-fill-rate-2xaa 33.061 33.061 0.00% 1/1 + + Before: + + [ # ] backend test min(s) median(s) stddev. count + [ 0] image swfdec-fill-rate-2xaa 40.342 40.342 0.00% 1/1 + + Pixman 0.14.0 produces this: + + [ # ] backend test min(s) median(s) stddev. count + [ 0] image swfdec-fill-rate-2xaa 36.896 36.896 0.00% 1/1 + +commit 1bec3e8395a307812b25fb195823ac7cf2915340 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jul 30 10:51:38 2009 -0400 + + Remove leftover 0xffffffff in repeat() + +commit 1b98166b016af5fa374ad534d53b772c7fd2c4a5 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jul 30 10:45:18 2009 -0400 + + Remove unused function + +commit 06836d35d26941e826e99fe35e06da50756da641 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jul 30 10:03:44 2009 -0400 + + Misc formatting + +commit 7c8959ea3b2ff3d3abf995b3feccc677e15b4e27 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jul 30 09:58:12 2009 -0400 + + Change all the fetch_pixels() functions to only fetch one pixel. + +commit 31096446b6866de0a85ca6eb4fb68a45b21c4b49 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 28 09:43:12 2009 -0400 + + Add fetch_pixel_raw_32 and fetch_pixel_32 virtual functions. + + By default both are intialized to bits_image_fetch_pixel_raw(), but if + there is an alpha map, then fetch_pixel_32() is set to + bits_image_fetch_pixel_alpha(). + +commit a233b332cd9408d35e57a400874cca6188347cc2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 28 09:12:51 2009 -0400 + + Various renamings and clean-ups + +commit 073399b09f073c44dd10b027788c09eddfcdf2e0 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 28 08:58:41 2009 -0400 + + Change bits_image_fetch_alpha_pixels() to fetch just one pixel. + +commit 6d1dfc3945917b507d40f1f3c1b1cf07858d18dd +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 28 08:44:40 2009 -0400 + + Change bits_image_fetch_pixels_convolution() to fetch just one pixel. + +commit b3f849f74f848c407afda1be15b966e1d6eda745 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 28 08:33:28 2009 -0400 + + Change bits_image_fetch_bilinear_pixels() to fetch one pixel at a time. + +commit a37383a2c646ee10ebe36d03df6bd1c0f8a75052 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 28 08:03:44 2009 -0400 + + Make the repeat routine work on only one coordinate at a time. + +commit a4f3fd3b2592b1b4791075187016ad444c2d60d4 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 28 07:55:27 2009 -0400 + + Make bits_image_fetch_nearest() return one pixel. + + Previously it would work on a buffer of coordinates. + +commit f382865ebe5e1e8d4b5299b908dab9b719fcb8ec +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 28 07:42:34 2009 -0400 + + Change bits_image_fetch_transformed() to work one pixel at a time. + + Previously, it would generate a buffer of coordinates, then pass that + off to a pixel fetcher, but this caused a large performance regression + with the swfdec-fill-rate-2xfsaa cairo trace. + + This is the first step towards fixing that. + +commit 4ef2807c3a6697731ada43ddad2fa915ed7cfe11 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Aug 7 00:11:20 2009 -0400 + + Only define PIXMAN_TIMERS if timers are actually enabled [bug 23169] + +commit 9dec2e352b24bdccaac4f570b8cf12e61a9194ee +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 28 09:58:52 2009 -0400 + + Various updates to the CODING_STYLE document + +commit 2abd56e9e3d012fcb0b7c6d459ed4831464c0f2f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 28 04:05:26 2009 -0400 + + Add a CODING_STYLE document based on the one from cairo. + +commit fdd01bcbd473f7a46c66ce8538657e32400974ed +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jul 22 04:51:08 2009 -0400 + + Remove a couple of unused variables + +commit 845910c200db3f279229da67fbd330d903776777 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jul 22 04:32:07 2009 -0400 + + Rename source_pict_class_t to source_image_class_t + +commit edd476d5be8f248ea21d9d80cf1986d0ccdecc3b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jul 22 04:28:08 2009 -0400 + + Replace a bunch of 'pict's with 'image' + +commit 11d888a2837b3fe309348126b4f7c56df559df4e +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Jul 24 09:36:08 2009 +0100 + + Explain how we can simplify the radial gradient computation + + Soeren rightfully complained that I had removed all the comments from + André's patch, most importantly that explain why the transformation is + valid. So add a few details to show that B varies linearly across the + scanline and how we can therefore reduce the per-pixel cost of evaluating + B. + +commit 20d2df03059d6a5941464d80e81e8116ebf4dbfe +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Thu Jul 23 19:08:40 2009 +0100 + + Fix inversion of radial gradients when r2 > r1 + + Fixes: Bug 22908 -- Invalid output of radial gradient + http://bugs.freedesktop.org/show_bug.cgi?id=22908 + + We also include a modified patch by André Tupinambá <andrelrt@gmail.com>, + to pull constant expressions out of the inner radial gradient walker. + +commit 84b7df09394ac7237fb42fe25c0fbde77e065a16 +Author: Benjamin Otte <otte@gnome.org> +Date: Thu Jul 23 09:54:49 2009 +0200 + + Don't warn for empty rectangles, only degenerate ones + +commit 1435c8aa3db3b6bde26216e260cc94baba225664 +Author: Benjamin Otte <otte@gnome.org> +Date: Tue Jul 21 15:00:52 2009 +0200 + + Log errors for invalid rectangles passed to region code + +commit 1796e6bf17a5d20039e098c4e352cd2765ed444e +Author: Benjamin Otte <otte@gnome.org> +Date: Tue Jul 21 14:57:59 2009 +0200 + + Simplify code that logs errors + +commit 85d56f3f7cb9c90c5af52b28fb6f7c1b14f09f07 +Author: Benjamin Otte <otte@gnome.org> +Date: Tue Jul 21 14:50:30 2009 +0200 + + Make the text when reporting a broken region more useful + +commit a77d4ffeb661d4d75109fc368bded26843662259 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 21 07:24:40 2009 -0400 + + Post-release version bump + +commit f3ac1368775542e09f3741d2ad7b72af20bd9663 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 21 07:20:57 2009 -0400 + + Pre-release version bump + +commit 7c56911e3b5b97b26dceff9b68d9fed32693d57b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 21 07:01:10 2009 -0400 + + Don't assert when malformed regions are detected. + + Instead print a message to stderr so that it will end up in the X log + file. + +commit f9660ce29ed072c6cbaec711c5d18b9f0ba113ae +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 21 04:23:56 2009 -0400 + + Fix another search and replace issue + +commit b3196b63274134a594fc091ec2f8be3b44734411 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 21 04:18:35 2009 -0400 + + Fix search-and-replace issue pointed out by Koen Kooi. + +commit 0ff5733c16804d5b10782556eeeade7061924846 +Author: George Yohng <georgefd@oss3d.com> +Date: Tue Jul 21 03:43:42 2009 -0400 + + Add implementation of MMX __m64 functions for MSVC x64. + + Microsoft C++ does not define __m64 and all related MMX functions in + x64. However, it succeeds in generating object files for SSE2 code + inside pixman. + + The real problem happens during linking, when it cannot find MMX functions + (which are not defined as intrinsics for AMD64 platform). + + I have implemented those missing functions using general programming. + + MMX __m64 is used relatively scarcely within SSE2 implementation, and the + performance impact probably is negligible. + + Bug 22390. + +commit 0b95afd259bb839a026955e7fda15b44fa22a805 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Mon Jul 20 14:07:18 2009 +0100 + + Fix read of BITS members from a solid image. + + During the fast-path query, the read_func and write_func from the bits + structure are queried for the solid image. + + ==32723== Conditional jump or move depends on uninitialised value(s) + ==32723== at 0x412AF20: _pixman_run_fast_path (pixman-utils.c:681) + ==32723== by 0x4136319: sse2_composite (pixman-sse2.c:5554) + ==32723== by 0x4100CD2: _pixman_implementation_composite + (pixman-implementation.c:227) + ==32723== by 0x412396E: pixman_image_composite (pixman.c:140) + ==32723== by 0x4123D64: pixman_image_fill_rectangles (pixman.c:322) + ==32723== by 0x40482B7: _cairo_image_surface_fill_rectangles + (cairo-image-surface.c:1180) + ==32723== by 0x4063BE7: _cairo_surface_fill_rectangles + (cairo-surface.c:1883) + ==32723== by 0x4063E38: _cairo_surface_fill_region + (cairo-surface.c:1840) + ==32723== by 0x4067FDC: _clip_and_composite_trapezoids + (cairo-surface-fallback.c:625) + ==32723== by 0x40689C5: _cairo_surface_fallback_paint + (cairo-surface-fallback.c:835) + ==32723== by 0x4065731: _cairo_surface_paint (cairo-surface.c:1923) + ==32723== by 0x4044098: _cairo_gstate_paint (cairo-gstate.c:900) + ==32723== Uninitialised value was created by a heap allocation + ==32723== at 0x402732D: malloc (vg_replace_malloc.c:180) + ==32723== by 0x410099F: _pixman_image_allocate (pixman-image.c:100) + ==32723== by 0x41265B8: pixman_image_create_solid_fill + (pixman-solid-fill.c:75) + ==32723== by 0x4123CE1: pixman_image_fill_rectangles (pixman.c:314) + ==32723== by 0x40482B7: _cairo_image_surface_fill_rectangles + (cairo-image-surface.c:1180) + ==32723== by 0x4063BE7: _cairo_surface_fill_rectangles + (cairo-surface.c:1883) + ==32723== by 0x4063E38: _cairo_surface_fill_region + (cairo-surface.c:1840) + ==32723== by 0x4067FDC: _clip_and_composite_trapezoids + (cairo-surface-fallback.c:625) + ==32723== by 0x40689C5: _cairo_surface_fallback_paint + (cairo-surface-fallback.c:835) + ==32723== by 0x4065731: _cairo_surface_paint (cairo-surface.c:1923) + ==32723== by 0x4044098: _cairo_gstate_paint (cairo-gstate.c:900) + ==32723== by 0x403C10B: cairo_paint (cairo.c:2052) + +commit c7b84f8b043018368fade4ad13730cfcaaf5c8cc +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 21 00:17:15 2009 -0400 + + Only apply the workaround if the clip region extends beyond the drawable. + + This works because the X server always attempts to set a clip region + within the bounds of the drawable, and it only fails at it when it is + computing the wrong translation and therefore needs the workaround. + +commit 6bd17f1e9861693262fa88bfeff5d3279b3f6e7d +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 20 23:46:06 2009 -0400 + + Rework the workaround for bogus X server images. + + Bug 22844 demonstrates that it is not sufficient to play tricks with + the clip regions to work around the bogus images from the X + server. The problem there is that if the operation hits the general + path and the destination has a different format than a8r8g8b8, the + destination pixels will be fetched into a temporary array. But because + those pixels would be outside the clip region, they would be fetched + as black. The previous workaround was relying on fast paths fetching + those pixels without checking the clip region. + + In the new scheme we work around the problem at the + pixman_image_composite() level. If an image is determined to need a + work around, we translate both the bits pointer, the coordinates, and + the clip region, thus effectively undoing the X server's broken + computation. + +commit dfdb8509e2160a0db7d72e775dd348090e6fb968 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 20 22:45:47 2009 -0400 + + Add test case for bug 22844. + +commit 96340123eba05bff85433bb2db3a7ad80c8e57ba +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 21 01:24:43 2009 -0400 + + Fix typo in sse2_combine_over_pixbuf_0565() + +commit 767542cfb955ba22dad1259eff8a2fe16e7b8ba4 +Author: Adrian Bunk <adrian.bunk@movial.com> +Date: Mon Jul 20 20:16:32 2009 -0400 + + Fix NEON build for older ARM CPUs + + The pld instruction used in the NEON assembler code is only available + for ARMv5e and >= ARMv6. + + Set -mcpu=cortex-a8 when compiling the source file (similar to what is + already done for the SIMD build). + +commit 184cd80aa46dd9d8bd023d3b70a345330b72d7e7 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 20 19:56:46 2009 -0400 + + Some formatting changes to pixman-arm-neon.c + +commit 5d2c527a2234d34b6269c561b08ebcaabf0b3ea3 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Jul 17 13:03:21 2009 +0300 + + ARM: Fixes for the inline assembly constraints in pixman_fill_neon + + Some of the variables in the inline assembly arguments list are + actually modified by the assembly code, they are now marked + appropriately. + +commit c27a60f94cea7deb0afb21e734c892d475bfa06d +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Jul 17 12:54:44 2009 +0300 + + ARM: Workaround cs2007q3 compiler bug for NEON registers clobber list + + 128-bit registers "qX" are incorrectly handled in inline assembly + clobber list for codesourcery cs2007q3 gcc toolchain. Only the + first 64-bit half is saved and restored by gcc. Changing clobber + list to use only 64-bit register aliases can solve this problem. + For example, 128-bit register q0 is mapped to two 64-bit + registers d0 and d1, q1 is mapped to d2 and d3, etc. + +commit cb4a5fd18f20f49ed2721f04a886c2ffd1645d09 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Jul 17 00:11:14 2009 +0300 + + ARM: Commented out the rest of buggy NEON optimizations + + These functions have problems with invalid memory accesses and often + crash X server + +commit 1aee6813ac45e6b206522623f58f1110a54186b1 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Jul 17 00:08:42 2009 +0300 + + ARM: Use Ian's variant of 'neon_composite_over_n_8_0565' function again + + This patch effectively reverts the changes done by commit + 8eeeca993252edc39da9c5c57545b81215fafc81 which was causing + severe stability issues, and restores old variant of + 'neon_composite_over_n_8_0565' function, which used to work + correctly. + +commit 2356ba38fd0c0002be4484adb8ca51de32b2ff81 +Author: Miha Vrhovnik <miha.vrhovnik@cordia.si> +Date: Mon Jul 20 19:30:59 2009 -0400 + + Update Makefile.win32 to make it work again. + +commit d4b22bd9935662912641abe5fd010d906c839405 +Author: Benjamin Otte <otte@gnome.org> +Date: Tue Jul 21 00:49:40 2009 +0200 + + Add a lot more sanity checks to region code + + - Introduce a GOOD_RECT() macro that checks that a pixman_box_t is not + empty or degenerate an use it. + - Use GOOD_RECT() instead of magic if statements for funtions that take + x, y, width, height arguments + - Use GOOD_RECT() in _reset(). The checks in the previous code seemed to + allow an empty box, but then created a broken region from it. + - Add GOOD(region) check at the end of _translate() + +commit a3ad8bb5412f47776285bfc954d2275f075c8796 +Author: Benjamin Otte <otte@gnome.org> +Date: Mon Jul 20 23:38:36 2009 +0200 + + Handle degenerate case in pixman_init_extents() + + Create an empty region instead + +commit 854ec2ea4d0fc940e91c4ec1c419fb4e5635dc95 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 19 12:29:42 2009 -0400 + + Replace // comments with /* */ comments in various places + +commit 54cad29dc55fc0a670bf87abacd5f45e1289db54 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Jul 18 17:59:43 2009 -0400 + + Align the stack in _pixman_implementation_create_sse2() + + When compiled without optimization, GCC will place various temporaries + on the stack. Since Firefox sometimes causes the stack to be aligned + to four bytes, this causes movdqa to generate faults. + +commit 6aa26296f5831bddc9b3f3e3e2ea018fc0cefb75 +Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> +Date: Sun Jul 19 18:20:53 2009 +0300 + + Check whether the linker understands the hwcap file before using it. + + If we're trying to use the GNU linker on Solaris we shouldn't use + our solaris-hwcap.mapfile since it doesn't grok the mapfile format. + +commit 934f4f4604ccf06db5d5aec07e58f0a0fbe7d283 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jul 17 22:40:41 2009 -0400 + + Move read and write functions to the bits_image_t struct. + + Those fields were duplicated between image_common and bits_image_t + before. + +commit 737d00063e8b8aaeaab9aecd0fbe731e8ab3f6b3 +Author: Benjamin Otte <otte@gnome.org> +Date: Fri Jul 17 16:01:59 2009 +0200 + + Handle degenerate case in pixman_init_rect() + + Create an empty region in that case. + +commit e3a6df08a4bedfe82a8d3a7c1143e4db00a18d27 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jul 17 06:43:35 2009 -0400 + + Add back check for need_workaround that got removed during reindenting + +commit ecc54a7f02dbb6f57043e51173584f96c42fd2cc +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jul 16 10:38:22 2009 -0400 + + Fix combine_src_ca() to fill out all of the destination line. + +commit 4df925bb28196974617804d680380522c048dedd +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jul 16 09:34:22 2009 -0400 + + Change composite test to use a rainbow gradient as source. + + Also make the destination a yellow patch. This makes the output a bit + more comparable to the image in the PDF specification. + +commit 4f369faffa7670e0e57c83c298359992223a998b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jul 16 06:23:25 2009 -0400 + + Various minor formatting changes + +commit 3d3baa3c5e76a4f851614a7794d92d15a56ac04e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jul 16 06:06:17 2009 -0400 + + Change name of macro from RGB16_TO_ENTRY to RGB15_TO_ENTRY + +commit 12e829a8de1e45708b5dfeaa8137af6eeaf0f662 +Author: Benjamin Otte <otte@gnome.org> +Date: Thu Jul 16 15:33:43 2009 +0200 + + Clarify color burn code - no semantical changes + + - Improve documentation to be equal to Proposed ISO32000 Extension + - Simplify code in default case + +commit acf5738a82afb51a2284f5e61d9fac8dae7a47d5 +Author: Benjamin Otte <otte@gnome.org> +Date: Thu Jul 16 15:28:05 2009 +0200 + + Clarify color dodge code - no semantical changes + + - Improve documentation to be equal to Proposed ISO32000 Extension + - Simplify code in default case + +commit 3dafe926b2405ba3404a41bb5b2842413308c8fa +Author: Benjamin Otte <otte@gnome.org> +Date: Thu Jul 16 15:02:51 2009 +0200 + + Fix terms in comments describing the blend modes + +commit 90ac94b9cb3defa8bf174af8c7c9fc6c42e8762b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 14 18:49:43 2009 -0400 + + Add -fno-strict-aliasing + +commit 268561a3c674c5a4da945124b7b8f075b792a170 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 13 22:22:20 2009 -0400 + + Post-release version bump + +commit 466cf2b4452ec9bf4fa17cbf2186f5c472b66c26 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 13 22:00:52 2009 -0400 + + Pre-release version bump + +commit 83f6e2eacff826ef9dbdefb95fcb76fa1247ac4e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 13 19:58:04 2009 -0400 + + Reindent and rename identifiers in scaling-test.c + +commit 9a6ad34810421a30250ef331fb75b2a48ce9e564 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 13 02:37:19 2009 -0400 + + Reformat pixman.h + +commit 22f322fe246155d40465d4e14d65051a204f27f6 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 13 01:35:15 2009 -0400 + + Reindent and reformat pixman-private.h + +commit b4d196009881a4121b49996bdc87f7770bfa5c1b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 13 01:17:53 2009 -0400 + + Reindent and reformat pixman-combine.h.template + +commit f54c776e75a44a095304dd7374384a2a0c96d479 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 13 01:13:47 2009 -0400 + + Reindent and reformat pixman-combine.c.template + +commit d57b55bb2662837feafb4f9f88d10549164ee142 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 22:05:32 2009 -0400 + + Reindent and reformat pixman-vmx.c + +commit 01b604c606cd0842c8f4eccc41511a472e4450e9 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 21:57:08 2009 -0400 + + Reformat and reindent pixman-utils.c + +commit 1d52ecbbe04c810d3f30e7915663b2cd21befcba +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 21:40:41 2009 -0400 + + Reformat and reindent pixman-trap.c + +commit c1178e49417bbea7f91b23f71c9ba957500da0ff +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 21:37:16 2009 -0400 + + Reformat pixman-timer.c + +commit 74774bad001504b4b2283689b6b55e21fa943dd8 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 21:36:32 2009 -0400 + + Reformat and reindent pixman-sse2.c + +commit 7dc3593d8172e292b39a7d6cc7772fcf9a04bb1f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 20:31:26 2009 -0400 + + Reformat and reindent pixman-solid-fill.c + +commit 3db9f5ff6e32c353cff640d3504eb54bd2a69ed3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 20:30:44 2009 -0400 + + Reformat and reindent pixman-region.c + +commit 317df68e94498b6a287eb736a6e5991e8b7d2d78 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 19:59:17 2009 -0400 + + Reindent and reformat pixman-radial-gradient.c + +commit 8820c81b50299f13791594fe6ddd01d536745231 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 19:37:45 2009 -0400 + + Reindent and reformat pixman-mmx.c + +commit c68283360d2e5917f15bddc0a14aa7a1c1b3852e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 19:24:31 2009 -0400 + + Reindent and reformat pixman-matrix.c + +commit 19397bc732b30a861416220974edca6404d2890b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 19:19:13 2009 -0400 + + Reindent and reformat pixman-linear-gradient.c + +commit e8e08b35e7a8d221378e3a411efdfad74b1614e3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 19:15:17 2009 -0400 + + Reindent and reformat pixman-implementation.c + +commit 2c74165179b07f31b82402d74dc9fbaf8bf52191 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 19:13:17 2009 -0400 + + Reindent and reformat pixman-image.c + +commit 5aadc28e19328054b15c7ee88996c407a9a7d9b3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 19:10:53 2009 -0400 + + Reindent and reformat pixman-gradient-walker.c + +commit ac043ac2da643d872f519971a316f8bc6bdca0f8 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 19:09:24 2009 -0400 + + Reindent and reformat pixman-general.c + +commit 7b3f5fdc571e8d6b4d64f950f2578d47b1056c86 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 19:06:30 2009 -0400 + + Reindent and reformat pixman-fastpath.c + +commit c332e229bb274447b8b46c8f8ba7bce8cfaa21b2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 19:02:29 2009 -0400 + + Reindent and reformat pixman-edge.c + +commit 4ba9a44e8f4098fc61bfb62650c521b2e37cf9cb +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 18:59:10 2009 -0400 + + Reindent and reformat pixman-cpu.c + +commit e01fc6bba7f90c80e86b206f28efa3f1f94a083b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 18:55:45 2009 -0400 + + Reindent and reformat pixman-conical-gradient.c + +commit 2871add52ece8bc4a02c0f4fae98912a0f4a830c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 18:54:31 2009 -0400 + + Reindent and reformat pixman.c + +commit 0e6e08d380a16f8804706270e74f66960a681167 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 18:52:33 2009 -0400 + + Reindent and reformat pixman-bits-image.c + +commit 89eda86549e8cf938556b1a238960f2fbb7b911c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 18:45:56 2009 -0400 + + Reindent and reformat pixman-arm-simd.c + +commit 9a26a60a233955aadab65fde5bf31fc0199663ea +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 18:42:31 2009 -0400 + + Reindent and reformat pixman-arm-neon.c + +commit 0af8ef742c6e1aa150b591bc7cdacb8d2293f7af +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 18:04:21 2009 -0400 + + Reindent and reformat pixman-access.c. + +commit be3a1b04ae9ef52f60fa1c6423d743b628aa57ca +Author: Benjamin Otte <otte@gnome.org> +Date: Mon Jul 13 18:02:09 2009 -0400 + + Fix burn and dodge operators to match acroread output + +commit 7dc2c48bcab7404ace9b41959e2233d0025801b3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 13 05:58:43 2009 -0400 + + Return immediately if the operator is CONJOINT_DST or DISJOINT_DST + + These are noops just like plain DST is. + +commit bb383def00b3d30d991295274b58a841a7162620 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 13 05:39:40 2009 -0400 + + Fix bits_image_fetch_{un}transformed() for 64 bit buffers. + + The buffer pointer has to advance twice as far as in the 32 bit case. + +commit c88b75d24c6fc618d638373dce5e5f0281de1f40 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 13 04:43:37 2009 -0400 + + Make sure we get all 64 bits in bits_image_fetch_solid_64() + + Previously we would only store the first 32 bits. + +commit f73ecb3f0ff516e4411de9a2738b8851e679a163 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 13 04:04:59 2009 -0400 + + Fix typo in bits_image_fetch_solid_64(). + + Found by blitters-test. + +commit 03c6b294a4517f5dfbc87504fceb3a88efef6a17 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 12 03:55:23 2009 -0400 + + Only destroy the regions when pixman_compute_composite_region() returns TRUE + +commit 2af5f64e938f0d3b81f3f014441cdff650fe5457 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Jul 11 03:25:25 2009 -0400 + + Move workaround logic to pixman-bits-image.c. + + Instead of computing whether a workaround is needed on every call to + _pixman_run_fast_path(), just cache this information in the image. + + Also, when workarounds are needed, clip against the source geometry to + prevent out of bound reads. + +commit 5088ca8d97c9c918746c3e261a31b6edab6c964b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jul 10 18:18:00 2009 -0400 + + Only apply the workaround to source images when out_of_bounds_workaround is set. + + Pointed out by Siarhei Siamashka. + +commit 51418786e4cba2e8fbb44052fbed2f107244b733 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Jul 10 13:50:15 2009 -0400 + + Return nonzero code from scaling-test program in case of failure. + + This can potentially help in tests automation. + +commit 71862fe84e5eeb0b178ed3a7dec8430d506b8515 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri Jul 10 11:49:53 2009 +0300 + + Fix scaling-test to work on big endian systems + +commit 0f8c5d2fd447d2d9a0350c33715f140ab0dac452 +Author: Michel Dänzer <daenzer@vmware.com> +Date: Fri Jul 10 11:28:11 2009 +0200 + + Convert some leftover instances of Alpha() in pixman-vmx.c. + + They were probably missed due to the space before the parens. + +commit 0fce356762864572ae126733f657600fbb9116ce +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jul 9 01:35:11 2009 -0400 + + Add workarounds for X servers doing out-of-bounds accesses. + + Old X servers rely on out-of-bounds accesses when they are asked + to composite with a window as the source. They create a pixman image + pointing to some bogus position in memory, but then they set a clip + region to the position where the actual bits are. + + Due to a bug in old versions of pixman, where it would not clip + against the image bounds when a clip region was set, this would + actually work. + + The workaround added by this commit is to try and detect whether a + source drawable is actually a window without a client clip set. Such a + window will generally have a clip region that corresponds exactly to + the hierarchy clip in the server, whereas pixmaps will have a clip + region that is an exact match to the drawable. + + When we detect such a window, we allow a fast path to run that would + normally be rejected due to the sources not completely subsuming the + composite region. + + Fixed X servers should call the new function + pixman_disable_out_of_bounds_workaround() to disable the workaround. + + This was reported in bug 22484. + +commit 61254a3c09497214a9c7ca89e275286533a3be2e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jul 9 23:17:56 2009 -0400 + + Add a BUILT_SOURCES with pixman-combine{32,64}.{c.h} + + Bug 22681. + +commit 34ec50a4761cab50c6216b08ef5cfb36bf94209e +Author: Adrian Bunk <adrian.bunk@movial.com> +Date: Fri Jul 10 00:11:15 2009 +0300 + + Fix the NEON build after "Convert CamelCase names to underscore_names." + + This patch fixes the following build error caused + by commit a98b71eff4041df58c9dcc2b1e25cefa38f364ff + (Convert CamelCase names to underscore_names.): + + <-- snip --> + + ... + pixman-arm-neon.c: In function 'neon_composite_over_n_8_0565': + pixman-arm-neon.c:1784: error: 'x_dst' undeclared (first use in this function) + pixman-arm-neon.c:1784: error: (Each undeclared identifier is reported only once + pixman-arm-neon.c:1784: error: for each function it appears in.) + pixman-arm-neon.c:1785: error: 'p_dst' undeclared (first use in this function) + pixman-arm-neon.c: In function 'neon_composite_over_n_0565': + pixman-arm-neon.c:1937: error: 'x_dst' undeclared (first use in this function) + pixman-arm-neon.c:1938: error: 'p_dst' undeclared (first use in this function) + pixman-arm-neon.c: In function 'neon_composite_over_8888_0565': + pixman-arm-neon.c:2074: error: 'x_dst' undeclared (first use in this function) + pixman-arm-neon.c:2075: error: 'p_dst' undeclared (first use in this function) + make[3]: *** [libpixman_arm_neon_la-pixman-arm-neon.lo] Error 1 + ... + + <-- snip --> + +commit 6b34482870fd53a9285f795f47656ac73fd706b0 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jul 9 04:04:00 2009 -0400 + + Update the CRC value in scaling-test.c. + + The changes in pixman behavior justifying this are: + + - New clipping rules + + - Bug fixes in region code. In particular, when + pixman_region_init_rects() is called on these two boxes: + + { 2, 6, 7, 6 } + { 4, 1, 6, 7 } + + it now ignores the first one, which is empty and produces + + { 4, 1, 6, 7 } + + Previously, it would produce: + + { 2, 1, 7, 7 } + + for some reason. + +commit 4c60ed5ec662e2d7088a7800dd7c71d3926c42a4 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jul 9 04:00:19 2009 -0400 + + Fix bug in pixman-region.c where empty regions would not be properly initialized. + + Also add a couple more tests to region-test.c. + +commit bcf01c21d704717264011182e71cfaaf6922a437 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jul 9 01:47:19 2009 -0400 + + Fix a couple of problems with the tests when HAVE_GTK is not defined. + + - Make sure the non-gtk+ test programs are added to noinst_PROGRAMS + when HAVE_GTK is not set. + + - Don't include glib.h in oob-test.c + +commit 0db0430d1d410855863e669f0de9e8b5d26db7fd +Merge: 31a40a1 b3cf3f0 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jul 8 18:59:15 2009 -0400 + + Merge branch 'naming' + +commit 31a40a172591ab373add9dd41a52881bea9dc6f2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jul 8 18:41:41 2009 -0400 + + Eliminate boxes with x1 > x2 or y1 > y2 in pixman_region_init_rects(). + +commit 706e6594310a490956d21e939c23de2b5dbe1561 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jul 8 18:39:43 2009 -0400 + + Add a box with y2 < y1 in region-test.c + +commit b3cf3f0c2be462cd61e63e07655d1b45e55f4a7b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 6 20:33:05 2009 -0400 + + Fix up some overeager search-and-replace renamings + +commit c2e331693d858c01b69135342c139546780b7021 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 6 12:48:59 2009 -0400 + + Rename num_rects back to numRects. + + The name numRects is public API and can't be changed. + +commit 8261b4d57cfdf77d7fdd4e4c0fc805ba48f7e0a0 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 5 02:12:21 2009 -0400 + + Rename combine_*_c to combine_*_ca + + s/combine_(.+)_c([^a-z0-9A-Z])/combine_$1_ca$2/g; + +commit 3c03990ba214bff000d3494587353b94f9432453 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 5 01:42:28 2009 -0400 + + Various sse2 renamings + +commit 9d0be1d4c81153ef2407518f605bc55380485955 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 5 01:38:10 2009 -0400 + + s/sse2combine/sse2_combine/g + +commit a98b71eff4041df58c9dcc2b1e25cefa38f364ff +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 5 01:35:14 2009 -0400 + + Convert CamelCase names to underscore_names. + + s/sizeRI/size_ri/g; + s/numRI/num_ri/g; + s/RepeatNone/REPEAT_NONE/g; + s/fbOver/over/g; + s/fbIn/in/g; + s/iSrc/src_image/g; + s/iMask/mask_image/g; + s/iDst/dest_image/g; + s/SaDa/Sa.Da/g; + s/FbMaskBits/MASK_BITS/g; + s/RenderSamplesX/RENDER_SAMPLES_X/g; + s/MMXData/mmx_data_t/g; + s/RegionInfo/region_info_t/g; + + s/([^0x])([a-z])([A-Z])/$1$2_\l$3/g; + s/([^0x])([A-Z])([A-Z])([a-z])/$1$2_\l$3$4/g; + s/([^0x])([A-Z])([a-z]+)_([a-z])/$1\l$2$3_$4/g; + s/([a-z])_([A-Z])/$1_\l$2/g; + + s/su_sE/SuSE/g; + s/X_Free86/XFree86/g; + s/X_free86/XFree86/g; + + s/_ULL/ULL/g; + s/_uLL/ULL/g; + + s/U_nc/UNc/g; + s/combine ##/combine_ ##/g; + s/## U/## _u/g; + s/## C/## _c/g; + s/UNc_aDD/UNc_ADD/g; + + s/BLEND_MODE \((.+)\)/BLEND_MODE (\l$1)/g; + s/blend_(.+)/blend_\l$1/g; + + s/AN_ds/ANDs/g; + s/O_rs/ORs/g; + s/over565/over_565/g; + s/8pix/8_pix/g; + s/Over565/over_565/g; + s/inU/in_u/g; + s/inPart/in_part/g; + s/inC/in_c/g; + s/inreverse/in_reverse/g; + s/get_exception_code/GetExceptionCode/g; # GetExceptionCode is WinCE API + s/CP_us/CPUs/g; + s/authentic_aMD/AuthenticAMD/g; + s/op_sR_cx_mAS_kx_dST/op_src_mask_dest/g; + s/no_VERBOSE/noVERBOSE/g; + s/mc_cormack/McCormack/g; + s/r1band/r1_band/g; + s/r2band/r2_band/g; + s/as GOOD things/as good things/g; + s/brokendata/broken_data/g; + s/X_render/XRender/g; + s/__open_bSD__/__OpenBSD__/g; + s/^Quick/quick/g; + s/NextRect/next_rect/g; + s/RectIn/rect_in/g; + s/pboxout/pbox_out/g; + s/F_sorted/FSorted/g; + s/usse2/u_sse2/g; + s/csse2/c_sse2/g; + s/cPixelsse2/c_pixel_sse2/g; + s/Mask565/mask_565/g; + s/565fix_rB/565_fix_rb/g; + s/565fix_g/565_fix_g/g; + s/565r/565_r/g; + s/565g/565_g/g; + s/565b/565_b/g; + s/uPixelsse2/u_pixel_sse2/g; + s/Mask00ff/mask_00ff/g; + s/Mask0080/mask_0080/g; + s/Mask0101/mask_0101/g; + s/Maskffff/mask_ffff/g; + s/Maskff000000/mask_ff000000/g; + s/load128Aligned/load_128_aligned/g; + s/load128Unaligned/load_128_unaligned/g; + s/save128Aligned/save_128_aligned/g; + s/save128Unaligned/save_128_unaligned/g; + s/fillsse2/fill_sse2/g; + s/unpack565/unpack_565/g; + s/pack565/pack_565/g; + s/bltsse2/blt_sse2/g; + s/x565Unpack/x565_unpack/g; + s/r1End/r1_end/g; + s/r2End/r2_end/g; + s/argb8Pixels/argb8_pixels/g; + +commit 437ab049872063c78ee934766596dc6859749a3d +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 5 01:34:07 2009 -0400 + + Remove reference to 8888_RevNP + +commit 55e63bd0f09290cf1165030edbb4e92efb09ee6e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 5 00:55:45 2009 -0400 + + Remove reference to 8888RevNP + +commit 01994a59ca642f4e5ce126d3ad01e864d3daa0bb +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 5 00:41:53 2009 -0400 + + NoFeatures => NO_FEATURES + +commit 309d358ea673b5d4c163670c3c449fb855df7775 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 5 00:31:07 2009 -0400 + + s/FbScrRight/SCREEN_SHIFT_RIGHT/g + +commit 71fe4e3e5c64f177a8756e51eddc190b3a08ea40 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 5 00:26:30 2009 -0400 + + CPUFeatures => cpu_features + +commit 255ddbe5358b0ed4a7a01ef0ab127833dba94b02 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 5 00:19:18 2009 -0400 + + Rename FbGet8 to GET8 + +commit 446276c36fd336531745fc1427c4af2ccdbe9875 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 5 00:11:57 2009 -0400 + + Rename RBmask/Gmask => rb_mask/g_mask in pixman-arm-neon.c + +commit 412b4b50f7bd8ac29e4c9b20e613154c1b5e371a +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 5 00:06:59 2009 -0400 + + Use ALPHA_8 in pixman-image.c instead of Alpha + +commit 887383b0adab89bcc131a9a28c4d60af9e4773d1 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 5 00:05:42 2009 -0400 + + Uppercase a few more macros in pixman-combine.c.template + +commit 4153361c52f332bce9e9cc32adf1e01064014e15 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 5 00:02:45 2009 -0400 + + Rename macros for non-separable blend modes + + Lum => LUM + Sat => SAT + Min => CH_MIN + Max => CH_MAX + +commit 68405c326db4cd087bdb6290ae42953a98b81838 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Jul 4 23:45:01 2009 -0400 + + Rename some macros in pixman-combine.c.template + + s/Combine([AB])([a-zA-Z]+)([^a-zA-Z])/COMBINE_$1_\U$2$3/g; + s/CombineA/COMBINE_A/g; + s/CombineB/COMBINE_B/g; + s/CombineXor/COMBINE_XOR/g; + s/CombineClear/COMBINE_CLEAR/g; + +commit 835520b28ff1412bd9b00460a107e72c9ea21e35 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Jul 4 23:24:27 2009 -0400 + + Rename U{no}mask => U_{no_}mask in pixman-vmx.c + +commit f9bdd1a82c7629a360109bdf4519c73ba5a99225 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Jul 4 23:13:55 2009 -0400 + + Change name fbComposeGetStart to PIXMAN_IMAGE_GET_LINE. + +commit e064aa761831296c8570e0fdfaa0c3585c4a3871 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Jul 4 23:12:18 2009 -0400 + + Rename fbCombine* to combine* + + s/fbCombine/combine/g; + +commit f61855e186519a490b5d013d2de67dcc8da7a0ac +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 12:51:28 2009 -0400 + + Fix overeager search and replace + +commit 1de32ae2ef8044b349f3ec87ae339fdcedeb83ef +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 12:07:56 2009 -0400 + + Uppercase some more macro names + +commit 47296209dae2e3d33426532a3e896e06373fc088 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 11:40:21 2009 -0400 + + Consolidate channel macros in pixman-combine.h + + There are now RED_8/RED_16 etc. macros instead of the old Red/Green/Blue. + +commit 2f3e3d62f7727a652090ea003c98218f3b550818 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 11:17:28 2009 -0400 + + Change some macro names to be all uppercase + +commit 8339a4abc4edcaee6fafbde1a147ba7fcaa9c108 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 09:29:32 2009 -0400 + + Change names of the FbByte* macros to be more descriptive. + + But also more cryptic unfortunately. For example FbByteMul() becomes + UN8x4_MUL_UN8() to indicate that it multiplies 4 UN8 numbers with one + UN8 number. + +commit e7f162a5a81221ca6abca79a9a77924d39bf4e16 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 08:42:19 2009 -0400 + + Clarify a couple of comments + +commit b02c33e7da3eb733ca4ada66a6c35b293a191144 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 08:30:36 2009 -0400 + + Change name of macros that operate on normalized integers. + + For example IntMul becomes MUL_UN8 to indicate that it multiplies two + unsigned normalized 8 bit integers. + +commit d4a366193b12cf241980a621a15ec0ee67e8f6bb +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 08:10:20 2009 -0400 + + Fix names in the trap rasterizer. + + s/Shift4/SHIFT_4/g; + s/Get4/GET_4/g; + s/Put4/PUT_4/g; + s/DefineAlpha/DEFINE_ALPHA/g; + s/AddAlpha/ADD_ALPHA/g; + s/StepAlpha/STEP_ALPHA/g; + s/add_saturate_8/ADD_SATURATE_8/g; + s/RenderEdgeStepSmall/RENDER_EDGE_STEP_SMALL/g; + s/RenderEdgeStepBig/RENDER_EDGE_STEP_BIG/g; + s/fbRasterizeEdges/b00_re/g; + s/rasterizeEdges/RASTERIZE_EDGES/g; + s/b00_re/rasterize_edges_/g; + +commit bcdf0861be346a8a4662376f4305474da9236163 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 08:02:45 2009 -0400 + + Rename QuadwordCopy_neon to neon_quadword_copy + + s/QuadwordCopy_neon/neon_quadword_copy/g; + +commit a08548bd5275c69c1e7a7fd894a844ad6ad59638 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 08:00:56 2009 -0400 + + Fix up the names in pixman_compute_composite_region() + + s/miClipPictureSrc/clip_source_image/g; + s/miClipPictureReg/clip_general_image/g; + +commit e27b2a1fcc890d3abf272cc27fa2c0a2e8d7ab09 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 07:59:10 2009 -0400 + + Fix some more pFoo names + + s/([^a-z])pReg/$1region/g; + s/([^a-z])pY/$1y_line/g; + s/([^a-z])pU/$1u_line/g; + s/([^a-z])pV/$1v_line/g; + s/([^a-z])p([A-Z])/$1\l$2/g; + +commit 006f21b02b23e1865c0e35d0f9b97af63f52a469 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 07:54:10 2009 -0400 + + Fix the names of some common parameters + + s/xDst/dest_x/g; + s/yDst/dest_y/g; + s/xMask/mask_x/g; + s/yMask/mask_y/g; + s/xSrc/src_x/g; + s/ySrc/src_y/g; + +commit d2a4281376786fc7f31f7367807c7caa8a99d414 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 07:46:11 2009 -0400 + + Various simple renamings + + s/CvtR8G8B8toY15/CONVERT_RGB24_TO_Y15/g; + s/cvt8888to0565/CONVERT_8888_TO_0565/g; + s/cvt0565to0888/CONVERT_0565_TO_0888/g; + s/miIndexToEnt15/RGB16_TO_ENTRY/g; + s/miIndexToEnt24/RGB24_TO_ENTRY/g; + s/miIndexToEntY24/RGB24_TO_ENTRY_Y/g; + s/miCvtR8G8B8to15/CONVERT_RGB24_TO_RGB15/g; + s/is_same/IS_SAME/g; + s/is_zero/IS_ZERO/g; + s/is_int([ (])/IS_INT$1/g; + s/is_one/IS_ONE/g; + s/is_unit/IS_UNIT/g; + s/Fetch4/FETCH_4/g; + s/Store4/STORE_4/g; + s/Fetch8/FETCH_8/g; + s/Store8/STORE_8/g; + s/Fetch24/fetch_24/g; + s/Store24/store_24/g; + s/_64_generic/64_generic/g; + s/64_generic/_generic_64/g; + s/32_generic_lossy/_generic_lossy_32/g; + s/PdfSeparableBlendMode/PDF_SEPARABLE_BLEND_MODE/g; + s/PdfNonSeparableBlendMode/PDF_NON_SEPARABLE_BLEND_MODE/g; + s/([^_])HSL/$1Hsl/g; + s/Blend/blend_/g; + s/FbScrLeft/SCREEN_SHIFT_LEFT/g; + s/FbScrRigth/SCREEN_SHIFT_RIGHT/g; + s/FbLeftMask/LEFT_MASK/g; + s/FbRightMask/RIGHT_MASK/g; + s/Splita/SPLIT_A/g; + s/Split/SPLIT/g; + s/MMX_Extensions/MMX_EXTENSIONS/g; + +commit 1c5774bf6d39e7b349c03866c96811ee1754c9d7 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 07:35:40 2009 -0400 + + Get rid of pFoo names. + + s/([^o])pSrc/$1src_image/g; + s/([^o])pDst/$1dst_image/g; + s/([^o])pMask/$1mask_image/g; + s/pRegion/region/g; + s/pNextRect/next_rect/g; + +commit e3489730c317061a2cd888b927d36bda0590a3f2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 07:30:47 2009 -0400 + + Change the name of some routines that were simply misnamed. + + s/Src_pixbuf/_over_pixbuf/g; + s/Src_x888_n/_over_x888_n/g; + s/CompositeSrc_8888_8888/composite_over_8888_8888/g; + s/CompositeSrc_8888_0565/composite_over_8888_0565/g; + s/CompositeSrc_8888_8_8888/composite_over_8888_n_8888/g; + +commit 90cac1115551c0fd70ace419179bcf2a30d6b1c2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 28 21:06:01 2009 -0400 + + Fix up names of compositing functions + + s/SrcAdd/Add/g; + s/SolidMaskSrc/Src/g; + s/SolidMaskIn/In/g; + s/SolidMask/Over/g; + s/Solid_n/Over_n/g; + s/SrcIn/In/g; + + s/(fb)(Composite.*)sse2/sse2_$2/g; + s/(fb)(Composite.*)mmx/mmx_$2/g; + s/(fb)(Composite.*)neon/neon_$2/g; + s/(fb)(Composite.*)arm/arm_$2/g; + s/(fb)(Composite.*)vmx/vmx_$2/g; + s/(fb)(Composite.*)/fast_$2/g; + + s/b8g8r8x8/f00bar/g; + s/8888C/8888_ca/g; + s/0565C/0565_ca/g; + s/8888RevNPx/pixbuf_x_/g; + s/8x0/8_x_0/g; + s/00x8/00_x_8/g; + s/8x8/8_x_8/g; + s/8x8/8_x_8/g; + s/nx8/n_x_8/g; + s/24x16/24_x_16/g; + s/16x16/16_x_16/g; + s/8xx8/8_x_x8/g; + s/8xn/8_x_n/g; + s/nx0/n_x_0/g; + s/_x_/_/g; + s/f00bar/b8g8r8x8/; + + # Fix up NEON type names + s/uint8_8/uint8x8/g; + +commit e987661667ac5c650af1c3a2ba173558ff287e06 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 28 20:45:58 2009 -0400 + + Rename fetchers. + + s/fbFetchPixel/fetch_pixels/g; + s/fbFetch/fetch_scanline/g; + s/fbStore/store_scanline/g; + +commit 2d32d91e5d89bb04fcbaffb23244a9f023d39239 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jul 8 00:38:28 2009 -0400 + + Use postfix decrement, not prefix, in region-test.c + +commit 4e41905bacbf533740e999ba79e0620f358c0597 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jul 8 00:08:49 2009 -0400 + + Eliminate empty rectangles in pixman_region_init_rects(). + + Otherwise they show up in the validated regions. + +commit 967ff0bdc7f46806b7a6d16332ad39cf2c1f01c1 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jul 7 22:55:32 2009 -0400 + + Add an initialization with an empty rectangle to region-test.c + + This should produce a valid region without empty rectangles in + it. Currently it doesn't. + +commit 40fcc14d1cf8cb6b6c71f27b0a3d3ccc9a845949 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jul 6 20:37:14 2009 -0400 + + Expand comment in miClipPictureSrc() to explain why a client clip is required. + +commit eba3be7b7a2b9a8df235af6255b9d8c70d2b8c93 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jul 5 00:35:31 2009 -0400 + + Fix forgotten use BITMAP_BIT_ORDER to be ifndef WORDS_BIG_ENDIAN + +commit 06f5b51fee35727a823bd86294654178cbfac629 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Jul 4 22:49:16 2009 -0400 + + Return TRUE for the two new formats in pixman_format_supported_source(). + +commit b0f220b7f236b5dea30ddc5dec51b73c11120e10 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 11:21:33 2009 -0400 + + Inlucde pixman-private.h in pixman-region.c + + Delete some duplicated macros. + +commit f6ef071e2805bcf52473f06cd7171097b4afd926 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 29 11:07:20 2009 -0400 + + Rename OptimizedOperatorInfo to optimized_operator_info_t + +commit 7b7e4b23cab361b444d0c69a1b9c1678d3c5df2b +Author: Ben Skeggs <bskeggs@redhat.com> +Date: Wed Jul 1 10:18:29 2009 +1000 + + Add accessor functions for PIXMAN_x2r10g10b10 and PIXMAN_a2r10g10b10 + +commit 968f720d0e8b97bbeb2db9edb75ec524d697e1d6 +Author: Jonathan Morton <jmorton@sd070.hel.movial.fi> +Date: Wed Jul 1 16:29:48 2009 +0300 + + Avoid overrunning scanlines in NEON blitters. + +commit 863f9e9b7599b89c9dd42dd9c7913c2513384761 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 28 18:54:32 2009 -0400 + + Change comma to semicolon in pixman-combine.c.template + +commit 10aa32315529eaff848b8348cad47b2673f853cf +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Sat Jun 27 11:56:38 2009 +0300 + + Really fix ARM build. + + Commit 9d3f71d726c8b959b64c3e6b43ca4d3ccb320c32 broke the build + on ARM architectures by just removing custom include files and + not providing "pixman-private.h" as a replacement. + +commit 996e59f7f81864f7935d6dd58d8efd5a5ea265ea +Author: Guillaume Letellier <glet.n800@googlemail.com> +Date: Fri Jun 26 19:02:08 2009 -0400 + + Fix ARM build. + + Commit 6e20c2574354d1cb071a1201ff166cb5e92c00d2 broke the build on ARM + architectures by not updating the use of fbComposeGetSolid() + correctly. + +commit e8addcc69a36375d1330749e00854d9651c8f8d0 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jun 26 18:58:23 2009 -0400 + + Change checks for srca == 0 to src == 0 + + It is not generally correct to bail out just because the source alpha + is 0. The color channels still mig not be and in that case the correct + result is: + + s + (1 - srca) * d = s + d + + which is not generally 0. + +commit 9a7ce32ef5cf70a17d83154cfd1b96aa54ce9232 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jun 25 12:42:03 2009 -0400 + + Make arm compositing functions static + +commit 9d3f71d726c8b959b64c3e6b43ca4d3ccb320c32 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jun 25 10:20:44 2009 -0400 + + Delete ARM header files + +commit 9837465fd9a5d4e7280d4c79c41d2d9a9c8f71c0 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Jun 24 01:30:34 2009 +0300 + + Use -mcpu instead of -march for ARM SIMD runtime autodetection + + Option -mcpu has higher priority than -march with the current versions + of gcc and that's why it is better to use. There is no particular + reason why 'arm1136j-s' is used in this patch, it could be any armv6 + compatible core. + +commit 6b8251039a905114e4b0776c3f8f58cb0678a532 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 24 20:08:50 2009 -0400 + + Add test cases to oob-test using PIXMAN_{a,x}2b10g10r10 + +commit f94053cd9b1dc8db6c924c8cf50d75ccc1898cce +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 24 13:12:07 2009 -0400 + + Post-release version bump + +commit f6faa06ef85fc4c9ff38dbc9243c060b4cdacc1a +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 24 12:51:40 2009 -0400 + + Pre-release version bump + +commit 084392fbd72e55f87e9bc37dd02384fc145f7d36 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 24 05:44:18 2009 -0400 + + Delete scanFetchProc type. Use fetch_scanline_t instead. + + fetch_scanline_t now takes a pixman_image_t argument instead of an + bits_image_t, so there is also a bunch of updates in pixman-access.c + +commit 588b42dc1e8fe252bde1eb0905bb9fac806e8ca3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 21:28:28 2009 -0400 + + Constify the mask argument to scanline fetchers. + +commit 5cfdee917d3cac38b103f7453c5a8a0047b95337 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 21:22:06 2009 -0400 + + Add a mask and mask_bits argument to the raw scanline fetchers. + +commit d3bebaf731b4e1714653b50a4a861171f497b42f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 21:10:52 2009 -0400 + + Rename fetchProc32 to fetch_scanline_t and fetch_pixels_32_t to fetch_pixels_t + +commit 6af8672c69b770ce229bd1d156f1fe70d74800f9 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 20:58:39 2009 -0400 + + Get rid of the 64 bit fetcher types. + + It's simpler to just declare everything as 32 bit fetchers and do the + conversion in the few functions that actually need to know the size of + the pixel type. + +commit 70cba5cfa8a5d702c32170c511a7318766e45731 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 20:38:58 2009 -0400 + + Consolidate the three scanline store types into one. + + The 64 bit storers do their own type conversion. + +commit 973ebf1631de695483fcb4b5e4c2b27e037ca3bf +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 19:03:11 2009 -0400 + + Get rid of remaining scanFetchProc casts + +commit 24303475c26dada40474f5972b1abee2315ba8f9 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 19:02:10 2009 -0400 + + Get rid of scanFetchProc casts in pixman-radial-gradient.c + +commit 99780d3b2264f6e2bb210d3fdc1237c8fbfc3f25 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 19:00:52 2009 -0400 + + Get rid of scanFetchProc casts in pixman-conical-gradient.c + +commit 2d2d3a2625fcc1151f61d0dc1a6ff268d7491be8 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 18:58:39 2009 -0400 + + Get rid of casts to scanFetchProc in pixman-bits-image.c + + Instead just declare the functions with the required type and do any + type conversions in the function itself. + +commit 4597ad88d9ade51b5a0b4eb87503e1278b29ef56 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 18:44:01 2009 -0400 + + Fix bug where 64 bit pixel were fetched as 32 bit ones. + +commit aa6adb646a2c61062d867cece2b0669f658abb39 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 18:41:35 2009 -0400 + + Delete FbIntMult and FbIntDiv macros, and move FbIntAdd to pixman-combine.h + +commit 53ada03119d44984775877f2a2fee5ce442ac1c8 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 24 12:10:48 2009 -0400 + + Add a table to oob-test so that it can test more than one setup. + +commit 895a8da63370635b05ffb91d3d670c6627d8b2ab +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 24 11:28:03 2009 -0400 + + Fix offset bug in pixman_run_fast_path(). + + Fast paths should only run when the source images complete cover the + composite region, because otherwise they would be required to sample + the border, and fast paths generally don't know how to do that. + + The check for this did not work right because it didn't take the + offset generated by the composite coordinates into account. This + commits fixes that by adding (x, y) coordinates to image cover + indicating the new position of the source in destination coordinates. + + Based on this we now compare against the region extents which are + already in destination coordinates. + +commit fd90429a32927d8aa516a3d26cc309ca7043e4d3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 24 11:23:04 2009 -0400 + + Fix typo in oob-test.c + +commit bed9c378ff9d01c8e646241dd96a43e2eb870cca +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 24 10:37:07 2009 -0400 + + Add test case for out-of-bounds memory access. + +commit b6c97ae2c934ca5adade10303d6faa6e827f826d +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 24 09:04:54 2009 -0400 + + Fix comment in pixman-utils to have the right sense. + +commit c0047fbfd54d519698a0991111f2440dc8e081b9 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 16:55:53 2009 -0400 + + Subtract x_off/y_off before conversion to integer. + + They are fixed-point values, not integers. + + Bug 22437, reported by Michel Dänzer. + +commit 905856f43d38b5f2932d8b459e805e1c86b7a2f3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 16:37:35 2009 -0400 + + Add convolution-test.c program + +commit 79d397003f56238aa680b0670e1e7d7ba1594dda +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 16:23:26 2009 -0400 + + Delete leftover use of PIXMAN_OP_FLASH_SUBTRACT + +commit ebc4a4df9c92934891d202ae2603216a046ec939 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 14:55:36 2009 -0400 + + Remove support for component alpha with HSL blend modes. + + It isn't clear that component alpha makes sense with HSL blend modes. + +commit ca4ff13027b76d0ac7398f159a731f7606b7bd51 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 14:39:49 2009 -0400 + + Remove FLASH_SUBTRACT blend mode. + + We may resurrect it later, but leave it out for now, as the closest + thing we have to a spec: + + http://www.kaourantin.net/2005/09/some-word-on-blend-modes-in-flash.html + + claims that alpha values should be subtracted, whereas real-world flash + files indicate that they shouldn't. + +commit 5dab62a2f922a515634d65b133aeb089e855b399 +Author: Carlos Garcia Campos <carlosgc@gnome.org> +Date: Tue Jun 23 17:12:39 2009 +0200 + + Fix BlendColorBurn + + It should return 0 when sa == 0 + +commit e3a94e892850f91d2cb0463dc2c86f7217deb8a4 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Jun 20 20:19:57 2009 -0400 + + Add screen-test.c test program + +commit 16873f6d1baa3b0c26b31e71ad6d36d53efaf9e3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 17 11:46:01 2009 -0400 + + Make the composite test window bigger by default. + + Also rearrange the squares to better match typical aspect ratios. + +commit eb4fd0477a4f3acd111fc9132f2dec7f1f63f3e1 +Author: Carlos Garcia Campos <carlosgc@gnome.org> +Date: Thu Jun 18 15:24:33 2009 +0200 + + Use floating point in SetLum + +commit 96d5044c0c4a9e34deb97655679f1d688c192c99 +Author: Carlos Garcia Campos <carlosgc@gnome.org> +Date: Wed Jun 17 17:40:52 2009 +0200 + + Do not use combineMask in component-alpha functions + +commit bf356c6d8cdbabf2faf4b6d77f94ccd3bd0459fb +Author: Carlos Garcia Campos <carlosgc@gnome.org> +Date: Wed Jun 17 16:59:45 2009 +0200 + + Fix typo + +commit cdae71ee85c74f702a8f0b999432e4d6d5caf766 +Author: Carlos Garcia Campos <carlosgc@gnome.org> +Date: Wed Jun 17 10:46:44 2009 +0200 + + [TEST] Update composite-test to test more operators + +commit c35685255f07a5a1f656d3153b5534876481b65b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Nov 29 15:12:45 2008 -0500 + + Fix ColorDodge and ColorBurn to conform to the spec + +commit 49a4fc09694d241f6b9f725a084c27eba3e31d00 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Nov 29 15:11:07 2008 -0500 + + Remove optimizations that I'm not convinced are correct + +commit 3fb71f8b41dedd55982eccd16b8518cce10258fa +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Nov 29 15:02:04 2008 -0500 + + Fix some more problems in MultiplyC + +commit 254e62159b4a8652c1dd9c47d0e5e0d0ff2ced3a +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Nov 29 14:53:57 2008 -0500 + + Fix various problems in FbCombineMultiplyC + + Don't read the destination unless we have to. fbByteMulAddC() + produces its result in its first argument, not its last. + +commit a158d7f14f4b987e9e6380ffe896dbcfd41799ec +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Nov 26 13:52:00 2008 -0500 + + Add some comments about the linearity of the non-separable blend modes + +commit cae5062d3bf16d32cf675ccb0030e29cc940b25e +Author: Benjamin Otte <otte@gnome.org> +Date: Tue Nov 25 22:53:37 2008 +0100 + + fix component-alpha versions to set source to 0 when mask is 0 + +commit 9df72ebdb8280c7ca3b2696c3f7f48b69438502b +Author: Benjamin Otte <otte@gnome.org> +Date: Tue Nov 25 22:50:54 2008 +0100 + + remove debugging leftovers + +commit 19aae37bfb8fb349258675dd96872c5ba65dcce1 +Author: Benjamin Otte <otte@gnome.org> +Date: Tue Nov 25 22:50:17 2008 +0100 + + correct subtract implementation + +commit f130d99c94edbf5aeebeb317df64dbd7a6d20efd +Author: Benjamin Otte <otte@gnome.org> +Date: Sun Nov 23 18:36:32 2008 +0100 + + fix Multiply component-alpha version + +commit 4bb1eac4e7c6c785da3c2b2b1836c83446befc80 +Author: Benjamin Otte <otte@gnome.org> +Date: Sun Nov 23 18:34:50 2008 +0100 + + fix comment + +commit 73810b320ec5eab5bcbd9137f012cf0e4bf6867f +Author: Benjamin Otte <otte@gnome.org> +Date: Sun Nov 23 17:03:50 2008 +0100 + + fix component-alpha versions for seperable blend modes + +commit 4b921c1d910a5d78ca4784a6879789a5af6718d3 +Author: Benjamin Otte <otte@gnome.org> +Date: Sun Nov 23 16:23:22 2008 +0100 + + rename operator SUBTRACT to FLASH_SUBTRACT + + Also document it and move it out of the PDF blend modes to make clear + that it is not in any way related to PDF. + +commit 7cbfe3ba214006dda5fa6d21871ef6fc61067005 +Author: Benjamin Otte <otte@gnome.org> +Date: Sun Nov 23 15:42:53 2008 +0100 + + rework blend-mode documentation to match current code better + +commit f26c9ec438c97515ae874711859e012971ea920a +Author: Benjamin Otte <otte@gnome.org> +Date: Wed Nov 19 21:07:13 2008 +0100 + + typo fix: Seperable => Separable + +commit ea17e2e2e43e578b3799fe9a6f7533569aed880c +Author: Benjamin Otte <otte@gnome.org> +Date: Wed Nov 19 21:06:06 2008 +0100 + + remove semicolon at end of macro + +commit ecf9f83ac64236b0834d268e6235306ab84fb749 +Author: Benjamin Otte <otte@gnome.org> +Date: Wed Nov 19 21:05:16 2008 +0100 + + remove a leftover debugging statement + + That was an assertion check by infinite loop + +commit c061b4dd16af52383afae470e845bd43a552d925 +Author: Benjamin Otte <otte@gnome.org> +Date: Thu Nov 13 17:40:10 2008 +0100 + + invent a Subtract operator for component alpha + + This seems to make sense, and as I can't test it against Adobe's Flash + player as that one can't do component alpha, this one looks best. + +commit 93e32235e6a72bfea14d36a0407fbbe6482e20d9 +Author: Benjamin Otte <otte@gnome.org> +Date: Thu Nov 13 17:34:19 2008 +0100 + + add non-seperable versions for component alpha + +commit 239cc46aa77b4be71d738c0136a5465796a29886 +Author: Benjamin Otte <otte@gnome.org> +Date: Thu Nov 13 17:29:00 2008 +0100 + + add component-alpha versions of the seperable blend-modes + +commit 2f57b6f4e9020654ad175a593b17ff07fc3f5cbd +Author: Benjamin Otte <otte@gnome.org> +Date: Thu Nov 13 16:12:22 2008 +0100 + + rewrite nonseperable blend modes the same way as seperable ones + +commit fd1bec2859f775feaff329315cdf16ad27ec4728 +Author: Benjamin Otte <otte@gnome.org> +Date: Thu Nov 13 15:13:17 2008 +0100 + + clean up seperable blend modes + + The code is now shorter and faster than before + +commit e8b4394a409cda48b6598847292b768ad027dbf0 +Author: Benjamin Otte <otte@gnome.org> +Date: Wed Nov 12 19:26:13 2008 +0100 + + prefix HSL operators with HSL + + This is necessary to distinguish SATURATE from PDF's HSL SATURATION + +commit f08263a25181a5f18991490629ca2e9582836ac6 +Author: Benjamin Otte <otte@gnome.org> +Date: Wed Nov 12 19:12:12 2008 +0100 + + remove invert operator + + src INVERT dest == (white IN src) DIFFERENCE dest + +commit 755638d73cfc5879bd440f0148e982e562509fd0 +Author: Benjamin Otte <otte@gnome.org> +Date: Thu Oct 23 21:20:23 2008 +0200 + + add nonseperable blend modes from PDF spec + +commit e3ad87033e3771a3c54b1b8e49813a6959315cd7 +Author: Benjamin Otte <otte@gnome.org> +Date: Thu Oct 9 21:46:50 2008 +0200 + + fix ColorDodge and ColorBurn to conform to the PDF spec + +commit 35bb57e7234994c4169458275e362f02cb5138aa +Author: Benjamin Otte <otte@gnome.org> +Date: Thu Oct 9 18:00:45 2008 +0200 + + use PDF algorithm for soft-light + +commit 0735aeeaeba04f0c33f22b25a191cfd1f27c271d +Author: Benjamin Otte <otte@gnome.org> +Date: Tue Oct 7 15:13:45 2008 +0200 + + Add INVERT and SUBTRACT blend modes used in Flash + +commit 740425ab969adda1aaf36d8f52dec6f6e5303ed6 +Author: Benjamin Otte <otte@gnome.org> +Date: Wed Aug 29 23:15:33 2007 +0200 + + use a pixman_op_t here + + This improves the readability in gdb when debugging this structure + +commit 94e9673eaaf9e22530159f0335a0a30d2f2a0047 +Author: Benjamin Otte <otte@gnome.org> +Date: Thu Sep 25 12:53:06 2008 +0200 + + Add support for extended blend mode. First pass. + + This adds support only for FbCombineU function. + This work is based on equations provided in SVG 1.2 specification draft. + + http://www.w3.org/TR/SVG12/ + + Based on a previous patch by Emmanuel Pacaud <emmanuel.pacaud@free.fr> + +commit 99108040f03726bf4bddf55baa7ff6acd796fcf0 +Author: Michel Dänzer <michel@daenzer.net> +Date: Tue Jun 23 14:02:26 2009 -0400 + + Fix the build on big endian machines. + +commit bb3b3da18ac6e1f935008fa50cd854b3de19afc3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 13:44:48 2009 -0400 + + Rename PIXMAN_FORMAT_16BPC macro to PIXMAN_FORMAT_IS_WIDE + +commit 039d4618f79e384d93a7548466f80acae6da738c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 13:41:27 2009 -0400 + + Write alpha map fetching with FbByteMul() instead of div_255() + + Delete the div_255/div_65535 macros. + +commit 3e39b566ee2aaa414b95b0dae98cc5971c399359 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 23 07:34:17 2009 -0400 + + Remove unused typedefs. + +commit 2c70814b6bff2091bcc55ae4252fe82ae53439e4 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 22 20:43:08 2009 -0400 + + Delete unused _pixman_image_get_fetcher() function + +commit b3bd7394477a64ca0460655ca3a8e5326c402167 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 22 19:51:13 2009 -0400 + + Move accessor macros to their own header. + + Also rearrange some things in pixman-private.h + +commit fe8ef09e9835f90b669a2b1ddfda49e839d6de53 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 22 19:38:58 2009 -0400 + + Move FbGet8() macro into pixman-bits-image.c + + It is only used for bilinear filtering now. Also some formatting + changes in pixman-private.h + +commit 03587764455bd41684bf29bbecb657ba45b0c341 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 22 19:35:11 2009 -0400 + + Delete FbInOverC macro + +commit 1c429b4fbedc5287659c836c0d30801a6209bf57 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 22 19:25:25 2009 -0400 + + Make pixman-mmx.c compile again. + +commit 7bb9df038293b591e687cbf3a9830476bef7f9fc +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 22 16:34:15 2009 -0400 + + Fix typo in CLIP macro. + +commit 63b050de5b2627aee0d75c66244e55757ba007ab +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 22 16:05:27 2009 -0400 + + Turn the FbAdd() macro into an FbIntAdd() which doesn't take a channel. + + The only use of the channel argument could be written better with FbByteAdd(). + +commit 026ef583288e1e63e5a84ba998aea2e674f02a17 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 22 15:58:28 2009 -0400 + + Delete FbInU macro. + + Replace uses of it with FbIntMult(). + +commit 5028c1599ad9119dbb7b58d2f93e60c857aec769 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 22 15:21:40 2009 -0400 + + Use fbOver() instead of fbOver24. + + fbOver() is faster anyway, and this lets us get rid of fbOverU. + + Also use FbByteMul() in fbIn instead of four times FbInU. + + Finally, delete FbOverC and FbInC since they weren't used. + +commit ca4750be0a5ea0a6910ad9f4eed6a9989c91c230 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 22 08:09:11 2009 -0400 + + Eliminate Fetch/Store24 macros. + + Replace them with inline functions in pixman-bits-image.c. + +commit e68f8bc1187785309ed3befcda1e1a211fe624e6 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 22 07:05:24 2009 -0400 + + Remove unused ACCESS macros in pixman-bits-image.c + +commit 233d8907ed02d6624f458dd40c9db46055fc7630 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 22 06:51:04 2009 -0400 + + Various minor changes + + - Add underscores to the pixman_image_get_solid(), + pixman_image_is_solid(), and pixman_is_opaque() names. + + - A number of formatting fixes + + - Move debug code to the end of pixman-private.h + + - Collect all prototypes for image methods in one place + +commit 950bcd7d4a6226d969b0b69513f6806a2d40e08e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 22:33:31 2009 -0400 + + Some formatting changes + +commit 8b616c5725891f2f2d21b71796fb9af5644260e4 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 22:16:39 2009 -0400 + + Delete struct point + +commit 653fe825c92935318e0d2d552c3a0336ef82a1de +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 22:12:25 2009 -0400 + + Move region helpers into pixman-utils.c + +commit 00d852c96931f4bc27dfec124062e71eb49dc9bc +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 22:11:07 2009 -0400 + + Move code around + +commit a4ef790faac2c822df8336ee00c6fc5ea84aaa53 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 22:05:49 2009 -0400 + + Move pixman_version() to pixman.c + +commit f1049c61d6b6b977f56533644bbfa7e2a95ca3a1 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 22:01:58 2009 -0400 + + Move pixman_compute_composite_region() into pixman-utils.c + +commit 7690af20fcf7f341a5162b77a66660cd05a155b9 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 21:42:14 2009 -0400 + + Eliminate pointless Red/Green/Blue macros + +commit f6faf538eebed4722c085c2eef7b3ae524e3e00c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jun 4 07:39:13 2009 -0400 + + Get rid of indexed argument to store functions + +commit fdb25d97477635dafb0f8c328de65727a2d73a48 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 21:36:43 2009 -0400 + + Move macros around in pixman-private.h + +commit 76bf3073d45e184973cfc992d8f366a4a5ed0127 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 21:32:23 2009 -0400 + + Move some macros into pixman-access.c + +commit e2b5b05b3818f6a4ecf24dd0030e22784af22e22 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 21:28:38 2009 -0400 + + Delete obsolete comment + +commit 271a0d34a07ee04d8de0cb435ab9242aeb0a4c5c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 21:28:24 2009 -0400 + + Move edge stepper macros into pixman-edge.c + +commit 92eca118ad9cdeb61a00a591916f4e34aaaab916 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 19:45:29 2009 -0400 + + Delete FB_MASK and FB_ALLONES macros + +commit 9541538a97b1101a886a26653a8b416701b2e065 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 19:38:57 2009 -0400 + + Implement fbStore_a2g2b2r2 + +commit 433d94e60b8404df39582b6149e60a5faa965160 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 19:35:20 2009 -0400 + + Replace switch functions in pixman-access.c with a table of accessors. + + Also delete unused orig_data pointer. + +commit d78e30b26be15683062a1a3b76fbbe7d3b5abe0f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 19:06:25 2009 -0400 + + Rename pixman_image_can_get_solid() to pixman_image_is_solid + +commit 6e20c2574354d1cb071a1201ff166cb5e92c00d2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 18:58:53 2009 -0400 + + Turn fbComposeGetSolid() macro into a pixman_image_get_solid() function. + +commit 76aa72e8cac12400ac8f635b81642335b0d27310 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 18:51:36 2009 -0400 + + Delete unused WRITE_ACCESS() macro + +commit 3c0ed5b92dc205d4fa6c9fa2f2772022f2404549 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 18:51:06 2009 -0400 + + Move pixman_image_fill_rectangles() to pixman.c + +commit fb0fe616f2e0ce8f31f88887ca2a7ec394886b90 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 18:43:27 2009 -0400 + + Delete unused mod macro + +commit bfa6f8c0b0418a3b4337da6c8bd0d4e9eda7e83e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 18:40:25 2009 -0400 + + Eliminate bit fiddling macros from pixman-private.h. + + There was one remaining use of FbMaskBits in the a1 trap rasterizer; + just move that macro there. + +commit 84886292e2c9be4149a32c7499015960331db426 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 18:29:39 2009 -0400 + + Implement fbComposeGetSolid() as a call to pixman_image_get_scanline() + +commit 8e40734174e97ff319c31ba49096cc8b7d5117ae +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 16:45:17 2009 -0400 + + Eliminate FbStipMask macro. + + It was only used for storing into a1 images, and that code could be + written more clearly by computing the bit index directly. + +commit 590d034bb399d28b191ac50c764d03ebd342e149 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 16:07:50 2009 -0400 + + Implement pixman_format_supported_destination() in terms of pixman_format_supported_source() + +commit 16a87a89e1330c18876aaf17ccc6f07243062ca8 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 15:09:02 2009 -0400 + + Move FbGen macro into pixman-combine.c + +commit d18722cdb6ddde7abba9cd1492e636f2668fadf1 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 13:18:46 2009 -0400 + + Use DIV instead of _div In pixman-trap.c. + +commit 81d6725f3106a888fe0fbffba4a0d05d553d0777 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 11:19:00 2009 -0400 + + Move edge utilities into pixman-trap.c + +commit 793c92dadb6f9c82ace50711c0e8c51e62368f19 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 11:15:30 2009 -0400 + + Move compiler dependencies to a new pixman-compiler.h file + +commit 5624ca0417bf7a30b5b05235e902b237a77b8543 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 10:12:15 2009 -0400 + + Rename FastPathInfo to pixman_fast_path_t + +commit afcfc8efc48630f0f349aefc8c86619fc7514647 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 10:09:22 2009 -0400 + + Eliminate MSBFirst, LSBFirst, IMAGE_BYTE_ORDER, and BITMAP_BIT_ORDER. + + Just use WORDS_BIGENDIAN instead. + +commit fd83e3594b440ade9acc1263dcd2a0980aa7ebcc +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 09:50:24 2009 -0400 + + Rename combine.inc and combine.h.inc to pixman-combine.{c,h}.template. + +commit d7234efc9a5d6a371692287555820fcd0f7ba48d +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 21 09:44:22 2009 -0400 + + Only use force_align_arg_pointer on gcc/x86-32 + +commit 093112a1b720c3a74b28b7b4289feb16fbe4afd1 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Jun 20 20:28:36 2009 -0400 + + Rename pixman-pict.c to pixman.c + + There are no traces of fbpict.c in it anymore. + +commit b7b6847b6692796a5da8590dd6254add6d566a7a +Author: M Joonas Pihlaja <jpihlaja@cc.helsinki.fi> +Date: Fri Jun 19 17:29:11 2009 +0300 + + Remove redundant NULL checks from general_composite_rect(). + + The general_composite_rect() function has two invocations + of the return_if_fail() macro before any of its variable + declarations. Removing them allows for compilation to + succeed using a pre-C99 compiler. + +commit d4dc812380f937908e466bfab52bfcc3b5334ebe +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jun 19 13:19:02 2009 -0400 + + Get rid of pixman_region_internal_set_static_pointers() + + Instead just define the function in pixman-region16.c + +commit 8b344e417e06f80a24bff9b6fadf4d82b54ab911 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jun 4 07:31:39 2009 -0400 + + Rename PixmanTimer to pixman_timer_t + +commit 2f9787a9cf3fe0783d1b46a01534ba6588b53e3f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jun 4 07:29:14 2009 -0400 + + Rename GradientWalker to pixman_gradient_walker_t + +commit cacfd7fe33e7e7643199de0dffb8312c0c432ccf +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jun 19 13:14:11 2009 -0400 + + Delete unused IS_SOURCE_IMAGE() macro + +commit 216f46eb7e3f468f2b64421bdfbcb6e58eafc7e8 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jun 4 07:17:36 2009 -0400 + + Remove commented-out fbAddTriangles + +commit 43f3825660914aae7786537ad069758a057488ce +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jun 19 13:04:26 2009 -0400 + + Remove useless FbBits typedef + +commit 8821885207f74bf9a18b374a1ee5de2442f603a3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jun 4 06:52:32 2009 -0400 + + Delete unused CombineFunc{32,64} types + +commit e063bd5555ed874a351bada2ef2a7082c42cb426 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jun 19 12:57:56 2009 -0400 + + Rename bits_image.fetch_pixels_{32,64} to fetch_pixels_raw_{32,64} + + Also add a couple of comments about what these functions do. + +commit ce2944747455265d24bbbd6ab4b843bf974c8126 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jun 19 12:28:59 2009 -0400 + + Delete empty FASTCALL macro + +commit 207c9480b5fdb30dd5b9bfc37707ff9cbf1d2d8a +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jun 19 12:15:19 2009 -0400 + + Delete FbComposeData type + +commit 9dfaa6365f247c1fefb84805ecf850deebb05193 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jun 4 06:02:32 2009 -0400 + + Remove unused pixmanFetchGradient() declaration + +commit da001051d876051763dc0bc1a90d58ec7ca31e96 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 3 22:51:18 2009 -0400 + + Remove dstMask from pixman-fast-path.c + + These were used to zero the x8 channel of PIXMAN_x8r8g8b8 + destinations. However, we treat this channel as undefined, so there is + no need to zero it. + +commit 304412752e2cbb7a8d407ca1af45d4ec1508e5b2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 3 22:48:57 2009 -0400 + + Eliminate trivial READ and WRITE macros in pixman-fast-path.c + +commit ac3fdeb97b21bd03e1902166310533377abd441d +Author: William Bonnet <william@wbonnet.net> +Date: Fri Jun 19 07:27:28 2009 -0400 + + Fix build on Sun Studio. + + Don't use return in a void function. + +commit 94964c221fe8141e5177d98f5357dca33fa00544 +Author: Jonathan Morton <jmorton@sd070.hel.movial.fi> +Date: Tue Jun 16 12:08:29 2009 -0400 + + [NEON] Add ARGB8-over-RGB565 compositing blitter. + +commit af660613eefbb474fd62f01b6f073fae389bd6f7 +Author: Jonathan Morton <jmorton@sd070.hel.movial.fi> +Date: Tue Jun 16 12:08:29 2009 -0400 + + [NEON] Add transparent rect blitter. + +commit 8eeeca993252edc39da9c5c57545b81215fafc81 +Author: Jonathan Morton <jmorton@sd070.hel.movial.fi> +Date: Tue Jun 16 12:08:29 2009 -0400 + + [NEON] Replace Ian's glyph-blitter with a better one. + + Each scanline of the destination is bulk-loaded into a cached buffer on + the stack (using the QuadWordCopy routine) before being processed. This + is the primary benefit on uncached framebuffers, since it is necessary + to minimise the number of accesses to such things and avoid + write-to-read turnarounds. + + This also simplifies edge handling, since QuadWordCopy() can do a + precise writeback efficiently via the write-combiner, allowing the main + routine to "over-read" the scanline edge safely when required. This is + why the glyph's mask data is also copied into a temporary buffer of + known size. + + Each group of 8 pixels is then processed using fewer instructions, + taking advantage of the lower precision requirements of the 6-bit + destination (so a simpler pixel multiply can be used) and using a more + efficient bit-repacking method. + + (As an aside, this patch removes nearly twice as much code as it + introduces. Most of this is due to duplication of Ian's inner loop, + since he has to handle narrow cases separately. RVCT support is of + course preserved.) + + We measured the doubling of performance by rendering 96-pixel height + glyph strings, which are fillrate limited rather than latency/overhead + limited. The performance is also improved, albeit by a smaller amount, + on the more usual smaller text, demonstrating that internal overhead is + not a problem. + +commit 1a7f25946b5b64aa604fab0f6d428bacb5296a4e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 16 11:59:20 2009 -0400 + + Post-release version bump + +commit 9733b2c4d4ed8fbd3f6e770446b457e0526db152 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 16 11:52:48 2009 -0400 + + Pre-release version bump + +commit b1cb5922f785310ef790811b52e4e2b0c85dfccc +Author: Jonathan Morton <jmorton@sd070.hel.movial.fi> +Date: Mon Jun 15 16:09:32 2009 +0300 + + Add RVCT support for straight blitter. + +commit b6a3868ced67eb363273bfbee0d850c4d06cca34 +Author: Jonathan Morton <jmorton@sd070.hel.movial.fi> +Date: Mon Jun 15 16:02:04 2009 +0300 + + Better CFLAGS handling for recent ARM platforms. + +commit 1217c11a02ef60a3955fd98f7cec48de4cb9561b +Author: Jonathan Morton <jmorton@sd070.hel.movial.fi> +Date: Wed May 27 15:31:59 2009 +0300 + + Misc warning fixes. + +commit 68ec1244cdd4aa2703739a19c7c3917231b7b889 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Jun 13 09:32:59 2009 -0400 + + Add API to set a function to be called when the image is destroyed. + +commit ebc39ed35a9f79ac9bb329bfc7dc27f290f6e1b0 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 10 08:52:31 2009 -0400 + + Work around X server bug. + + X servers prior to + + ebfd6688d1927288155221e7a78fbca9f9293952 + + relied on pixman not clipping to destination geometry whenever an + explicit clip region was set. Since only X servers set + source_clipping, we can just trigger off of that. + +commit 08eb065c568de5c0cb67b7b02ccb17bf72d5059c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 3 05:21:29 2009 -0400 + + Move region computation closer to the region walking. + + Computing the composite is region is a bit expensive, so only compute + it if we are likely to actually walk it. + +commit 78ca4eea6467dbb6b9da1198b9526750a0a8dca3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 2 23:17:34 2009 -0400 + + Simplify clipping rule + + The new rule is: + + - Output is clipped to the destination clip region. + + - If a source image has the clip_sources property set, then there + is an additional step, after repeating and transforming, but before + compositing, where pixels that are not in the source clip are + rejected. Rejected means no compositing takes place (not that the + pixel is treated as 0). By default source clipping is turned off; + when they are turned on, only client-set clips are honored. + + The old rules were unclear and inconsistently implemented. + +commit b9683cb2ae519707e06a0b9302f8a373d336da12 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 3 00:25:54 2009 -0400 + + Fix pixman_image_is_opaque() + + - Don't claim that non-repeating bits images are opaque. + + - Don't claim that conical gradients are opaque ever. + +commit 7aeed3fc08b3359a3e4e6178f569dbb28ffdad08 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 2 22:57:23 2009 -0400 + + Only call fast paths when the images cover the composite region + +commit e67c7eedf203f4424bdfac7982d2bc7c6e1748d2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 2 22:17:00 2009 -0400 + + Pass the region to walk_region_internal() + +commit 85a2f55e6b55833cb4092c6e9e58497fbd9e7167 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 2 22:08:02 2009 -0400 + + Remove srcRepeat and maskRepeat arguments from _pixman_walk_composite_region() + +commit dc0a9dd65ab2622646d1220adf3e5ea70dcae951 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 2 22:04:47 2009 -0400 + + Remove all the srcRepeat/srcTransform stuff from the general implementation. + +commit f885caad4a709d7d2c4f0bf63d735080bcca3c24 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 2 22:02:52 2009 -0400 + + Make _pixman_walk_composite_region() a wrapper around an internal function + +commit d5768884a1576e7ad4a9d1e24063d214babb7157 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 2 21:31:58 2009 -0400 + + Handle repeat_none/normal for 64 bit fetchers + +commit c9ea4a9722bc3c2223e8c8d72aa1b23598db489e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 2 21:20:42 2009 -0400 + + Make the untransformed path handle REPEAT_NONE and REPEAT_NORMAL + +commit cf7bf4eb57351b44f467eda9f4d9fa8f97754550 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jun 5 01:33:28 2009 -0400 + + Post-release version bump + +commit b721bc49199a24364bceb6e76ad9c6e6b2996905 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jun 5 01:10:00 2009 -0400 + + Pre-release version bump + + Also squash some warnings and correct the variable name in RELEASING. + +commit 5f086792eeaea6b2c401105b8bbf0c92fb7d192e +Author: Jonathan Morton <jmorton@sd070.hel.movial.fi> +Date: Thu Jun 4 11:24:26 2009 -0400 + + [NEON] Really fix filler bug. + + Advance the destination pointer (r4 register) properly. + Found by Siarhei Siamashka. + +commit 3b12cc7a23f81581b027764be96bf028785b1b5f +Author: Jonathan Morton <jmorton@sd070.hel.movial.fi> +Date: Thu Jun 4 11:24:22 2009 -0400 + + [NEON] Fix filler bug. + + r5 is being sourced explicitly instead of the %[width] reference. + It's probably a copy-paste bug, not spotted because I didn't + originally write it. + +commit 3c570a815afb282df01f41acad385ff0e3e33899 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Jun 4 00:05:06 2009 -0400 + + Add an --enable-timers configure option to enable the TIMER_BEGIN/END macros + +commit 7077138fb3c633e8791b2a4139ade07dbc677dd3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 3 23:54:57 2009 -0400 + + Some cleanups in the configure.ac file + +commit 9d442a6bc6f1ae28c3f36247bf3e2ab959fb2712 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 3 22:25:25 2009 -0400 + + Correct link to bugzilla in README + +commit 4465866cba3700e831101ea429d5de2a95cf7470 +Merge: a673a89 d4d716c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 3 21:49:59 2009 -0400 + + Merge branch 'many-pixels' + +commit a673a898e1e119836c9c68eff71feaec49f97bf1 +Author: Jonathan Morton <jmorton@sd070.hel.movial.fi> +Date: Wed Jun 3 10:43:42 2009 -0400 + + Delete now-unused fbCompositeSrc_x888x0565neon(). + +commit 7b3e90c361602c4599ec592d83b4eb1eb0ea76cf +Author: Jonathan Morton <jmorton@sd070.hel.movial.fi> +Date: Wed Jun 3 10:43:42 2009 -0400 + + Replace fbCompositeSrc_x888x0565neon with fbCompositeSrc_24x16neon. + +commit 0bfd9904e4adafbaa04ddfe1c0b22df1dac411e5 +Author: Jonathan Morton <jmorton@sd070.hel.movial.fi> +Date: Wed Jun 3 10:43:41 2009 -0400 + + Enable NEON straight blits. + +commit 4da5316285976f43d19231548f79c8b3b02ce060 +Author: Jonathan Morton <jmorton@sd070.hel.movial.fi> +Date: Wed Jun 3 10:43:41 2009 -0400 + + Enable NEON copies. + +commit 15ec3977843029f61f9e869610123977da8b446a +Author: Jonathan Morton <jmorton@sd070.hel.movial.fi> +Date: Wed Jun 3 10:43:41 2009 -0400 + + Enable NEON fills. + +commit 78faaa58d60f982eb4fdb674b7740315282d4d65 +Author: Jonathan Morton <jmorton@sd070.hel.movial.fi> +Date: Wed Jun 3 10:43:41 2009 -0400 + + Add more NEON fast paths + +commit d4d716cc25536b5a9db3ed216d64f5f9be8b69a2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 2 03:27:46 2009 -0400 + + Only advance the Z coordinate for non-affine transformations + +commit c2b119492949d89ae1823961438e7086c700ea3f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 2 03:25:46 2009 -0400 + + Consistently use 256 pixels as the size of the temp buffers + +commit 10bc25b01a00b94eac72d9afe890bd0ff3699951 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Jun 1 19:51:06 2009 -0400 + + Inline repeating instead of doing it as a separate pass + +commit db4f7fc9df581af54c4ed760dee14ef8a09873d2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 23 21:10:46 2009 -0400 + + Move pixman_expand/contract to pixman-utils.c + +commit abb60f43f46b10f8057baa0fcc3eb480883ef23b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 23 21:03:39 2009 -0400 + + Change pixel wise fetcher to fetch many pixels at a time instead of just one + +commit 9a7ded161c014ba51f9d3723f29a32b759717673 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 22 11:37:01 2009 -0400 + + Remove unused access macro + +commit 2b82a4c14d8e8c8686a8b2d38abdc0df259e087c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 22 09:00:24 2009 -0400 + + Add a 64 bit pixel fetcher and use it for solid colors + +commit f9fa5bcac04af660a2c873e7cfbc969cb37bee77 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 22 08:47:51 2009 -0400 + + Move simple fetchers after transformed fetcher + +commit c981eb95bb3f0806ad92a13e45cfff1ad6cf362c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 22 08:39:18 2009 -0400 + + Rename some of the fetchers + +commit e043530553ebfcebe106b5cc3a35448727e597aa +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 22 08:31:50 2009 -0400 + + Split filter switching into its own bits_image_fetch_filtered() function + +commit cb04bfd6b4932030b6e9114b926d6c71ce57b97f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 22 08:28:35 2009 -0400 + + Move 'adjust' code into the individual filters + +commit 94c6abe8fcddce64efa7975e7f71296b1a049b1c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 22 08:18:40 2009 -0400 + + Add bits_image prefix to fetchers + +commit 5b8304fd17b86639a3adb3eec8ce1116a9d3425e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 22 08:15:35 2009 -0400 + + Move some code around - use image->fetch_pixel in FbFetchSolid + +commit 48a2d0bba24ac5c9c1426efd3cab08c652ab8952 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 22 08:13:39 2009 -0400 + + Rename _pixman_image_fetch_pixels() to bits_image_fetch_alpha_pixels + +commit 0486f0f3241225c887549def31e5e246ff16255f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 22 08:04:38 2009 -0400 + + Get rid of the StoreExternalAlpha() functions + +commit d9b045d18e4723e710dab410fc011d36fc5dd327 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 22 07:49:55 2009 -0400 + + Add fetch_scanline_raw{32,64} + +commit 67cd7fcbdfe026ddc4967c07939c2a548a80d6b5 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 22 07:13:51 2009 -0400 + + Add store_scanline_raw_{32,64} virtual functions to bits image + +commit 2434524fd9b8258af88afb1d71fe25813a5def2e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 20 20:32:42 2009 -0400 + + Move remaining pixman-transformed.c code into pixman-bits-image.c + +commit 4bd73c4d6a1d0489c8eb35a867ef55187ca75650 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 20 20:27:51 2009 -0400 + + Fix coordinate computation for perspective transformations + + - Don't convert to integer + + - Saturate to INT16_MIN/MAX on overflow + +commit 3dd2496890ccf499721df8b86f95c8f5b8ad196a +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 20 18:39:26 2009 -0400 + + Get rid of pixman-transformed-accessors + +commit df23b360a0330e89c1f38f9d64e8ac150ea740e4 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 20 16:52:31 2009 -0400 + + Remove unused code from pixman-transformed.c + +commit 2557931bac461d8a0274ad638c12687afbe26145 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 20 12:44:55 2009 -0400 + + Handle alpha maps in _pixman_image_fetch_pixels() + +commit 72ae714b7400db7282aa0f92cc740bc106685e54 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 20 08:28:54 2009 -0400 + + Only do region lookups when the source region is different from the full region + +commit ccbe5cf8f39f57a973e5901ad5fe583557947e98 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 20 07:40:29 2009 -0400 + + Fix typo in fetch_bilinear_pixels() + + Change the number of temp pixels in FbFetchTransformed() to something a little more reasonable. + +commit 72a3e20c722b16f1b28975451d33e934f54da46f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue May 19 10:04:33 2009 -0400 + + Process the correct number of coordinates for the NEAREST filter + + The number of coordinates to process is twice the number of pixels. + +commit e8b4ebf59143a04f5b6f10fb112d39ca50250293 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue May 19 09:23:01 2009 -0400 + + Fix a couple of bugs in the bilinear fetcher + + - The x and y distances are the most significant fractional bits. + + - We need to fetch four times the number pixels produced. + +commit b24fc024fabed9406958611edc607f2af51e46cd +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue May 19 08:30:01 2009 -0400 + + Make fbFetchTransformed() used the new filtered many-pixel fetchers + +commit 8e0ad050e7ce7b3200e6126f782bad94d9df97f6 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue May 19 08:09:03 2009 -0400 + + Add fetch_convolution_pixels() function + +commit 1510ffb750b8b74c32dffd11cc0f20ce091767c1 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue May 19 06:59:13 2009 -0400 + + Add fetch_bilinear_pixels() function + + A bilinear fetcher that fetches many pixels instead of just one. + +commit a156e4e097f424722c4f1d03f0cf4bb9370962d4 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue May 19 06:18:00 2009 -0400 + + Add fetch_extended() function + + This function takes a list of coordinates and transforms it into + another list of coordinates, according to the repeat method of the + picture. + +commit d2cbfeca0efbf108c320e38bb39970af57c84438 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue May 19 05:23:29 2009 -0400 + + Add _pixman_image_fetch_pixels() + + Including a virtual fetch_pixel() function in bits_image_t. + +commit bd1cc87da39ad2e631bec5fa988a2e03eae0f929 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 2 16:51:28 2009 -0400 + + Get rid of toplevel argument to implementation constructors. + + It was always NULL anyway. + +commit 8d523bd9f3eb44b9e9a3c64f153626c39a0fffdc +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Jun 2 07:47:29 2009 -0400 + + Make sure the whole delegate chain has the correct toplevel + +commit 812a993843542f1ff051c46fe627315fcb73bc56 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 30 22:23:27 2009 -0400 + + Post-release version bump + +commit 3bad5eefd0d4e6ceb4ea52dd5117bf38649b31aa +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 30 22:09:11 2009 -0400 + + Pre-release version bump. + + Also delete non-existant header files from pixman/Makefile.am + +commit e3dba0f61af9583728f94210392cc12b602acc2e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 30 21:44:08 2009 -0400 + + Create a vmx pixman_implementation_t + +commit 0c92309a8ab887efd73737b627baca36d800c6f8 +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Sun May 24 18:41:06 2009 +0200 + + Update vmxCombine*U to the new interface + + Make the functions use different codepaths depending on mask being non + NULL. It could be speed up a bit more but should work as before + speedwise. Conformance checked with cairo tests. + +commit 21034db1daf90ac2b17f6929e72b3a0b953e81c4 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed May 27 22:46:23 2009 +0300 + + Scaling test updated to provide better coverage for problematic cases + + Now scaling test should reliably detect problems in new scaling code. + Maximum image size reduced to improve performance (more tests can be + run per second) and also simplify detected errors analysis. + +commit 53ce8838254d436b6a4d527aacdece7dba7ceacd +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 29 22:21:37 2009 -0400 + + In pixman-sse2.c test for non-zero source, not just non-zero source alpha. + +commit da9f3266fd00a5634fd2fb8a9cffbf24d668aaab +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 29 21:20:20 2009 -0400 + + In the mmx implementation, check for source == 0 rather than alpha == 0. + + Otherwise we compute the incorrect value when the source has zero in + the alpha channel, but non-zero in the color channels. + +commit f889ad9f362293f00c142aa14e87fd212aea54c1 +Author: Jonathan Morton <jonathan.morton@movial.com> +Date: Fri May 29 13:38:45 2009 -0700 + + Fixup the arm-simd and arm-neon implementations. + +commit d6dfafd9584c37d0d382c5ca974eab04209ad834 +Author: Magnus Kessler <Magnus.Kessler@gmx.net> +Date: Thu May 28 12:09:07 2009 +0100 + + pixman: define pixman_have_{mmx,sse2} on 64-bit Linux + + The refactoring of pixman removed pixman-sse2.h and pixman-mmx.h in commit + 41a9a17e0308f2075bb1bd59c4411e43a67d49ec + (http://cgit.freedesktop.org/pixman/commit/?id=41a9a17e0308f2075bb1bd59c4411e43a67d49ec). + On 64-bit Linux this breaks linking of new programs as well as execution of + existing programs with the following errors: + + ../pixman/.libs/libpixman-1.so: undefined reference to `pixman_have_mmx' + ../pixman/.libs/libpixman-1.so: undefined reference to `pixman_have_sse2' + + This patch fixes the issue for me by re-introducing the definitions for these + functions. It might be preferable, though, to create proper trivial static + inline functions instead. + + Signed-off-by: Magnus Kessler <Magnus.Kessler@gmx.net> + +commit 3d93070db88563b5a8f1e07f53f86c5e8ada8bbf +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 27 21:51:00 2009 -0400 + + Really fix PPC build. + + Add a pixman_composeFunctions variable to pixman-vmx.c. + +commit 3f5c2936c67d2b0dcf08b80e11c954ba73602ee2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 27 21:15:18 2009 -0400 + + Add back pixman_composeFunctions since vmx is not ported to implementations yet + +commit 6f93d36915fe3a8d8c75d26af8d5b9ba58aba4e3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 20 06:04:15 2009 -0400 + + In _pixman_implementation_fill() don't call the delegate; call the actual implementation + +commit a5a249613ba44ff791a7415f32192b1a0cc717db +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun May 17 04:56:13 2009 -0400 + + Call the toplevel implementation for combining + +commit e5c367120adaa5ae265866336d097f0435300706 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 16 11:22:54 2009 -0400 + + Set up combiner functions for an implementation directly in combine.inc. + + Previously it would go through two big tables, and the general + implementation would select the correct one dynmcailly. + +commit fb272d1464f12bd913d3fdbc4ec512758b5c4c98 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 16 10:24:30 2009 -0400 + + Consolidate the general implementation into one function + +commit 6a22abd899b2c226c01be055145c6ee3e469ee3c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 16 09:56:31 2009 -0400 + + Move the argument struct into pixman_image_composite_rect + +commit 4983f6b26cdd36eafbb97c21e5eb8d54ba59fa21 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 11:32:59 2009 -0400 + + Make a couple of functions static + +commit 41a9a17e0308f2075bb1bd59c4411e43a67d49ec +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 11:31:30 2009 -0400 + + Delete pixman-sse2.h and pixman-mmx.h + +commit 5dc9671b2588bfe084d69789e5c367474c5efa92 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 11:27:01 2009 -0400 + + Make the fast_path implementation run the c_fast_paths + +commit 364e218ad6a68028b4c11d051faee33f80513af4 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 11:24:28 2009 -0400 + + Split fill implementations out in the implementations + +commit 24e73d69ee99c2dc19d474b75f262e6efddfccf1 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 11:04:07 2009 -0400 + + Add alignment workaround to sse2 + +commit 1369b0b9d4ce89c50f56ec1c552b534f96273c1c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 11:03:15 2009 -0400 + + Add a general_blt() that just returns FALSE + +commit 9955b1516902d7671d41777bf1989f23cb0a87ed +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 10:56:15 2009 -0400 + + Move sse2 and mmx blt implementations to their respective files + +commit 46f0707481d50950fdb5d4588486affef0baa9ef +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 10:32:36 2009 -0400 + + Move gcc alignment workaround to pixman-sse2.c + +commit 53150f4fcafba0a5a69fddaee4b2ae687f0a2149 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 10:28:41 2009 -0400 + + Set up SSE2 combiners + +commit 63c1ab031347dd2f26a25f29589516e1e59ba8db +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 10:23:01 2009 -0400 + + Make pixman_implementation call the right combiner + +commit c8a2c336a7a90abc094ec57a4ae15ffabf6e1763 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 10:15:58 2009 -0400 + + Use the implementation's combiner's + +commit cb236a85df18f0f5e04698fb63c3895c2a2762dc +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 10:06:29 2009 -0400 + + Move SSE2 variable initializations to pixman_implementations_create_sse2 + +commit 03fa1bcb9af2cf48148b03c9a02cf5b4a7340356 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 09:50:55 2009 -0400 + + Move mmx fast path code to pixman-mmx.c + +commit 6e13149f99d7922ae84086f7867c9a9b69a49203 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 09:44:45 2009 -0400 + + Move sse2 fast path running to the sse2 implementation + +commit cb8608bba4f212aceef0cf579c650ee4988f56bd +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 09:42:42 2009 -0400 + + Change pixman_lookup_fast_path() to actually run the fast path + + Then just return in the general implementation if we ran a fast path. + +commit bee5549f6b469989a45cb3bcd4a916a6799c182d +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 09:26:42 2009 -0400 + + Add _pixman_choose_implementation + +commit 713fb295761f13989bc0da31c26b3a1535ab449e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 09:21:37 2009 -0400 + + Remove fast path lookup code from pixman-general + +commit f5837da6e24cb1adf116f42724b83948c70476f0 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 09:12:04 2009 -0400 + + Beginning of sse2 implementation + +commit 9a25f0fb672c2b6aee488958cf7f7c6e9ea3a33b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 09:06:19 2009 -0400 + + Copy fast path lookup code into pixman-utils.c + +commit 248ef3ec24bfcb4759f12e1839456c0c422b994c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 08:51:09 2009 -0400 + + Initial fast path implementation + + Move fbSrcScaleNearest() here, and move + _pixman_walk_composite_region() to pixman-utils.c + +commit 2c64b2a6487114263be8f26fc9328ddc36c61b9a +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 08:14:01 2009 -0400 + + Change prototypes for compositing functions to use 32 bit integers + +commit d6345a69fb7179ce6dc71117423e83baef427071 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 05:12:52 2009 -0400 + + Add component alpha combiners in pixman-implementation.c + +commit 918f763a9111f643d5b8a1460258089e79f68fb0 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 04:52:28 2009 -0400 + + Beginning of MMX implementation + +commit 4b8f440d494f675c2ae5b9d41d950a6c79d14548 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 04:32:32 2009 -0400 + + Move entire C implementation into pixman-general.c + +commit 12726de921a621b8147d12d7e0788076bc4cc80d +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 04:04:36 2009 -0400 + + Add beginning of general implementation + +commit d2faa63aee2179188dba712835c40068729565ff +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 03:40:05 2009 -0400 + + Formatting + +commit a17e27c2b4afc6118e5aeae380eb96d98f982033 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 03:31:11 2009 -0400 + + Beginning of pluggable implementations + +commit 25509f4b0b3a6b17810605706e5b93e0b9f4cb08 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 20 18:28:03 2009 -0400 + + Move fbStoreExternalAlpha{,64} into pixman-bits-image.c + +commit d74ad7c0fe9bd50ae04b59806f6c2bb9e6289634 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu May 21 10:45:51 2009 -0400 + + Add new store_scanline_{32,64} in bits_image_t + + Also move fbStore and fbStore64 into pixman-bits-image.c + +commit 74f837b1a2e85f2bfcaaf5c659077e4883fe6ac7 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 23 11:33:21 2009 -0400 + + Post-release version bump + +commit a282b640becfa1bb4979382f6a49cb59a7f992e2 +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Fri May 22 12:13:57 2009 -0700 + + NEON: Remove some unneeded casts + + There are some unnecessary (void*) casts. Eliminate some of them. Doesn't + change the generated code. + +commit ff866e70e399e655ad9b5a851bb682463fdda5ac +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Fri May 22 11:56:44 2009 -0700 + + Fix uses of dst_keep + +commit 19d6669aacd8f0ba5246448e423c1cbce9cb4fd3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 22 12:07:55 2009 -0400 + + Pre-release version bump + +commit 67addb4b3800f754155c8607bca85d23d840e056 +Author: Jonathan Morton <jonathan.morton@movial.com> +Date: Fri May 22 12:01:26 2009 -0400 + + Initialize the ARM SIMD fast path array. + +commit 2f1732359787f946bd1efd92be1f2f86aa91be3c +Author: Jonathan Morton <jonathan.morton@movial.com> +Date: Fri May 22 08:25:26 2009 -0700 + + Fix compile error caused by e42fae9e8364f5f0791f9fce749ab18b33acf598 + +commit 85b390cadf8c60808ed17df95885e72c082ad180 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 20 10:45:29 2009 -0400 + + Fix alpha map computation in pixman_compute_composite_region() + + According to the RENDER spec, the origin of the alpha map is + interpreted relative to the origin of the drawable of the image, not + the origin of the drawable of the alpha map. + + This commit fixes that and adds an alpha-test.c test program. + + The only use of alpha maps I have been able to find is in Qt and they + don't use a non-zero alpha origin. + +commit cb4085bdb5a40c38209f69c26b3ffe60d08ff4de +Author: Jonathan Morton <jonathan.morton@movial.com> +Date: Thu May 21 07:16:34 2009 -0400 + + Avoid malloc() by allocating a fixed set of boxes on the stack + +commit 5424d0245b28dff81032341a60dea1dd70c594b7 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu May 21 05:07:19 2009 -0400 + + Fix build on ppc. Pointed out by Chris Ball + +commit 14cd45dc4a63296a549bcc53453ca40beed67f51 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 05:53:40 2009 -0400 + + Make SSE2 fast paths static and remove them from the header file + +commit 0f1a212bf24490cbf80d6135bac17c5122d18cd2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 05:49:13 2009 -0400 + + Make MMX fast paths static and remove them from the header file + +commit 87f18154c1198752f2217241c568c28a103e69f6 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue May 5 09:07:32 2009 -0400 + + Notes on component alpha + +commit ac2299693f76be9c0d19a015096497d26aaf2c7d +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue May 5 08:49:49 2009 -0400 + + Note about glyphs polygons + +commit c093ee8a415602d78b53dbe936ca743ed816d393 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue May 5 07:31:52 2009 -0400 + + Notes on output kernels + +commit 90ae09f2e4826d21ebab21c6538cfa7fe1e0b90b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue May 5 03:10:44 2009 -0400 + + Further notes on the rendering pipeline + +commit fa274ffc6180fc0d57f11bf7b691fe95f344c5d9 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue May 5 01:58:48 2009 -0400 + + Some roadmap notes + +commit ba1dcec76ae1033b0cbb3048c3d82450922a02cc +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon May 4 17:39:19 2009 -0400 + + Describe alpha map in the pipeline + +commit 3fdefd683b5cbaaa4a93f1737197954f1df8bc57 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun May 3 22:46:34 2009 -0400 + + Notes on the rendering pipeline + +commit e07a4c6e8c1571f762c6f583204f16e3aca42882 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun May 3 21:07:06 2009 -0400 + + Move C fast paths to their own file pixman-fast-path.c + +commit e42fae9e8364f5f0791f9fce749ab18b33acf598 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun May 3 21:05:45 2009 -0400 + + Move the arch specific fast path tables into their arch files + +commit 93900a591c530a310542dfcca7e41d3391dc3565 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun May 3 19:12:02 2009 -0400 + + Move CPU detection code to its own file + +commit e6e6f6350230cc2e10e7dfe0ebd89ec4b587b660 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun May 3 01:18:49 2009 -0400 + + Move conical gradient code to pixman-conical-gradient.c and delete pixman-source.c + +commit 47abb3c7659a4eb1214c358796965f92f98fc901 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun May 3 01:08:54 2009 -0400 + + Move the radial gradient code form pixman-source.c into pixman-radial-gradient.c + +commit a10b0e7e136116cea95d6717f119d92599491f27 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun May 3 00:59:36 2009 -0400 + + Duplicate some code that was shared between radial and conical gradients. + + It is going to live in separate files. + +commit 9a867fa231e37d945f1dc3d18cb17359b24dbde3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun May 3 00:56:16 2009 -0400 + + Move the linear gradient code from pixman-source.c into pixman-linear-gradient.c + +commit ade664ced3b9ac64120424f0fc80dc0deef69b00 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun May 3 00:46:30 2009 -0400 + + Rename pixmanFetchSourcePict to pixmanFetchGradient + + Move the solid fill parts into pixman-solid-fill.c + +commit 8267d8d38f794c51e09f440c470f1c23c59e11aa +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 2 23:26:30 2009 -0400 + + Add a generic 64 bit fetcher and use it for gradients and transformed images + +commit ecaaef2f505fb61b383b194236b68ee59d52ecda +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 2 23:08:59 2009 -0400 + + Move the gradient walker code to its own file + +commit 51d972ecd885b05165a09d19fb3491ecb3ce813a +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 2 22:55:56 2009 -0400 + + Replace pixman_image_get_fetchers() with pixman_image_get_scanline_{32,64} + +commit b7f113200e285c003b9225de83d8fe83492717ee +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 2 22:46:20 2009 -0400 + + Set up scanline getters for bits images + +commit b496d566dcc3e277f9ed9a8e93dbb3963a6d14e6 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 2 22:42:59 2009 -0400 + + Set up scanline getters for source pictures + +commit c62f2a14f433a07c5333cfefeed934214507d63a +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 2 22:26:23 2009 -0400 + + Store get_scanline() functions in the image struct + +commit 0b497b33fe8bdfc404ed377f3b7525b4e5c11ad5 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 2 22:00:25 2009 -0400 + + Add stubs for property_changed virtual functions + +commit 7bb615f6baf39e3d7c31a8ce521c0ff0b5172d7e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 2 21:14:36 2009 -0400 + + Split pixel images into pixman-bits-image.c + +commit 53bae97c7e7bf9b20ddfd400fd0bd11d03431d39 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 2 21:08:12 2009 -0400 + + Split conical gradient images into pixman-conical-gradient.c + +commit c43c3628935722f489d5e5359413dbb17d4c4a44 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 2 21:06:23 2009 -0400 + + Split radial gradient images into pixman-radial-gradient.c + +commit 76418e388e1439f8e7f33eb777856c8eb475a2fc +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 2 20:54:17 2009 -0400 + + Split linear gradient images into pixman-linear-gradient.c + +commit 58de62bfada0d0ca945350fe3da38dee48aac7b4 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 2 20:40:16 2009 -0400 + + Split solid fill images into pixman-solid-fill.c + +commit aa234489b0653ef63cf1b3d162aa7a339779c4da +Merge: 7a9bfa1 010e286 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 16 12:51:05 2009 -0400 + + Merge branch 'master' of git+ssh://sandmann@git.freedesktop.org/git/pixman + +commit 010e28653f95bb78215e3cacb6a4f47d9a289fde +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 16 08:33:35 2009 -0400 + + Don't read potentially uninitialized data in pixman_CombineMaskU() + + This is mainly to quiet valgrind. The data in question would only be + uninitialized when the corresponding mask pixel was zero, so the end + result is zero in any case. + +commit 822cd47562c138002b45b24e6d4e25de3893088d +Author: Loïc Minier <lool@dooz.org> +Date: Fri May 15 16:11:16 2009 +0200 + + Fix fd check after auxv file open() [Bug 21749] + + Signed-off-by: Julien Cristau <jcristau@debian.org> + +commit 58e08374e1cd01371786469787b3709eca27f463 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 15 07:47:59 2009 -0400 + + Don't call hyphenated git commands as they don't exist anymore + +commit b365547e7dd9cb02fb43d85ae4104903083c4ebf +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 15 07:47:22 2009 -0400 + + Post-release version bump + +commit 564ecfe8e9a1aba41f5a798de461294ae2fe1c6d +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 15 06:57:59 2009 -0400 + + Pre-release version bump + +commit e483af47db769fcba559dda72699bc80d154b575 +Author: Adam Jackson <ajax@nwnk.net> +Date: Fri May 15 06:26:48 2009 -0400 + + Fix overflows during trap rasterization. [Bug 16560]. + + Avoid overflows when rasterizing traps that fall entirely in the space + between the final sample row and the end of the coordinate system, or + in the space between the beginning of the coordinate system and the + first sample row. Such traps don't contain any sample points, so the + top and bottom of the edges can safely be moved to the beginning/end. + +commit 7a9bfa146154e555a9e2e8a807bb7df2b30f860c +Merge: 81b94d7 e17fc72 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 13:10:01 2009 -0400 + + Merge branch 'master' of git+ssh://sandmann@git.freedesktop.org/git/pixman + +commit 81b94d725834d03f7856ae0d505a7340f1135326 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 13:09:45 2009 -0400 + + Add test that shows difference in clipping with transformed vs untransformed + +commit e17fc72e958e1ddee0b24e8a90ae9609e1e44391 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 06:17:55 2009 -0400 + + Remove unused CombineMask64 type + +commit a1bc6bf15995fae8be2de61f859fcc73d80f7b64 +Merge: ffce146 e74a284 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed May 13 05:58:36 2009 -0400 + + Merge branch 'master' into refactor + +commit e74a2847ddcb3b4c1675efaaa923e78556277dff +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 9 08:34:12 2009 -0400 + + Fix divide-by-zero crash + +commit a95c39c7d533ed7d6c8c7708604c5844cdc22dfe +Author: Jonathan Morton <jmorton@sd070.hel.movial.fi> +Date: Thu May 7 11:54:15 2009 +0300 + + Test USE_GCC_INLINE_ASM instead of USE_NEON_INLINE_ASM. + + The former is now Autoconf enabled, and does what it says on the tin. + +commit 62af131a5a7222c58ed9aac38b7dddb75c0e87f7 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue May 5 15:25:43 2009 +0300 + + Fixed rendering bug for source alpha == 0 in OVER fastpath functions + + Handling of the case when source alpha is zero was keeping destination + pixel unmodified. But this is different from how generic path behaves. + For example fbOver(0x00200483, 0x9CAC7E9F) == 0x9CCC82FF and the + destination pixel changes from 0x9CAC7E9F to 0x9CCC82FF in spite + of having zero alpha. + + Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com> + +commit a589afa3a7f3430a5b37bb9efb6574fe6ac8d9af +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Wed May 6 13:30:29 2009 -0400 + + Enable NEON assembly when we can build it + + This adds detection for ARM NEON build support as well as gnu assembler + syntax detection from Jonathan Morton. + +commit ffce1461502c9cc4dbf613b64eddba4c4de6a4d4 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat May 2 19:55:10 2009 -0400 + + Remove unused combineMaskU functions + +commit 38e5929400ea8d8bdf0830006f761a5498f558a5 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 14:18:32 2009 -0400 + + Optimize source reading in combine4 + +commit 3d6720a22777523c428914c2f84439d240778484 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 03:04:36 2009 -0400 + + Enable mmxCombineSaturateU + +commit 742d444f96bf160d2b7707cc894dd9b516f3179c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 03:04:29 2009 -0400 + + Enable mmxCombineAddU + +commit fd31818cfba0a750672bf50fbe550fa29ec77d99 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 03:04:23 2009 -0400 + + Enable mmxCombineXorU + +commit b7fe2f3378c6fb0828e863cb1a2df9191fb3e25e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 03:04:16 2009 -0400 + + Enable mmxCombineAtopReverseU + +commit 55a703f88c60acef5f1053d2d409c6e7048a714c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 03:04:10 2009 -0400 + + Enable mmxCombineAtopU + +commit f747b4184865c5e8b1c36c7116b6a47560f26e8d +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 03:04:02 2009 -0400 + + Enable mmxCombineOutReverseU + +commit 3c6fd2699dc2741b6ad121eb441a32b52b169a82 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 03:03:50 2009 -0400 + + Enable mmxCombineOutU + +commit 9d13da03b7d4525aa8ffbb9b787dee8964323810 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 03:03:39 2009 -0400 + + Enable mmxCombineInReverseU + +commit 2262f0084722d8548071730f8fcbe318560e9fbf +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 03:03:32 2009 -0400 + + Enable mmxCombineInU + +commit 5e5c78a6cc962f154b749d954c35ac663f8ac483 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 03:03:24 2009 -0400 + + Enable mmxCombineOverReverseU + +commit 81342af3bda044c059a13a37a9ede542212dc5a2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 03:03:16 2009 -0400 + + Enable mmxCombineOverU + +commit 3d684556dbdb087fa6d0631f06ccde38bb02dea5 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 03:02:58 2009 -0400 + + Implement the mmx combiners with masks (disabled) + +commit cdb6aa49bec3595a00eb203c258111c713de9bbc +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 01:01:09 2009 -0400 + + Enable sse2CombineSaturateU + +commit 29528b9523e779ff59029f11f79f1e22cbeaf4cd +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 01:00:52 2009 -0400 + + Enable sse2CombineAddU + +commit 374ad0c363baf93e724409f575e1bbd7cfd8914a +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 01:00:47 2009 -0400 + + Enable sse2CombineAtopXorU + +commit c1bdbff80ac724cab8213d41f91c525e10ca9ff1 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 01:00:39 2009 -0400 + + Enable sse2CombineAtopReverseU + +commit 74d79f271c45807bf23b395e7050130f7da1139c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 01:00:33 2009 -0400 + + Enable sse2CombineAtopU + +commit c3d92fe51869c4e7b4ed83fb3bed5d0e7e651782 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 01:00:26 2009 -0400 + + Enable sse2CombineOutReverseU + +commit 53809bde5265378c400792bdb0b2639a0cde6c08 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 01:00:20 2009 -0400 + + Enable sse2CombineOutU + +commit 9293a51323e7e2b4aedb75c3fa55475aa4a269e7 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 01:00:12 2009 -0400 + + Enable sse2CombineInReverseU + +commit d45c0db9603a84989d59e5e1519b424ab756f221 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 01:00:07 2009 -0400 + + Enable sse2CombineInU + +commit 92c1199bf7e9379beca52fa880a0a436ffdda7e2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 00:59:59 2009 -0400 + + Enable sse2CombineOverReverseU + +commit d1879bc048be083198a35bb037273171bc07a211 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 00:59:51 2009 -0400 + + Enable sse2CombineOverU + +commit 22fda2d1aba7368a7545d1659b737e695a6c5b26 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 00:59:29 2009 -0400 + + Implement the sse2 combiners with masks (disabled) + +commit 1ddd91bfee87c13ce18d82d9ab9b2fb2de7cca22 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri May 1 00:58:38 2009 -0400 + + Use memcpy() in fbCombineSrcU when there is no mask + +commit 24012542295f80455c8df01262099b98d2b2de37 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Apr 30 17:59:09 2009 -0400 + + Have the generic code go through the component alpha path always + +commit 8b2e08d494c6da1512f44d0754b0f52a184cc6f3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Apr 30 17:53:48 2009 -0400 + + Get rid of separate combineU and combineC types + +commit 6d6e33d33818b56982f15da1943da499db220bc1 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Apr 30 17:14:04 2009 -0400 + + Change type of combineU to take a mask + +commit fe571035f8889fd12892f2257b64536feced3f4e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Apr 30 19:50:24 2009 -0400 + + Make combineC function type take const args + + Fix type of mmx component alpha combiners + + Fix type of sse2 component alpha combiners + + Fix type of vmx component alpha combiners + +commit f9a9ce8940c5644f25721abe6af6c72c4eabcfe7 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Apr 30 02:54:32 2009 -0400 + + Remove accessor version of pixman-compose.c + +commit 0236393b031798a36144820a6254b646f9279580 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Apr 30 02:51:09 2009 -0400 + + Remove unused xoff/yoff variables + +commit d0a6c1e9a5447e982dc4d544146c1b5234e490cf +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Apr 30 02:50:18 2009 -0400 + + Move store logic into pixman-image.c + +commit 363be5285950d20cc77cf4a7eb50d5f1f5fea0f7 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Apr 30 02:42:47 2009 -0400 + + Move fetch logic to pixman-image.c + +commit bf879f1b37cfe5ee2ec921d26bf9d9126ca59b9c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Apr 30 02:25:48 2009 -0400 + + Simplify logic to compute store function + +commit 20cedd756f54bc735fe25ab29aafd3cdfeddda30 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Apr 30 02:14:13 2009 -0400 + + Formatting + +commit 4c74f519ca3788fe357caf54e22e6cab609b681e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Apr 30 02:08:44 2009 -0400 + + Factor out duplicated fetcher finding code + +commit eb5d30a9d3bfb1bddaf9e60e2092353fe6b1dd48 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Apr 30 02:03:03 2009 -0400 + + Get rid of SOURCE_IMAGE_CLASS_NEITHER again + +commit 87922006e506a252c81d42f0c1bacb59d1c67e60 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Apr 30 01:49:13 2009 -0400 + + More refactoring notes + +commit 8c646172743568584f7cefd3177b410fd3b22b2d +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Apr 29 23:13:14 2009 -0400 + + Add notes on how Render actually works + +commit 57a3d09b01834103e61785c6269d152bdfd91a4f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Apr 29 20:15:20 2009 -0400 + + Move calls of the classification out of the if statements. + +commit cb73922ab9ab7d627f059601a03714cfff07d25b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Apr 29 19:55:19 2009 -0400 + + Move SourcePictClassify into pixman-image.c + + In preparation for making pixman_image_t more of a real object, move + SourcePictClassify into pixman-image.c and expose it through a + function pointer. Later, this function will be split into smaller + functions depending on the exact type of the image. + +commit 3d73ce6813743811ff4413df554b438d3790c361 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Apr 29 01:44:56 2009 -0400 + + More refactoring notes + +commit 4d255141f78451ec5edb27ed29437651d6f64491 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Apr 28 23:02:49 2009 -0400 + + Add refactoring notes + +commit f98c800fba076197c56df7a990a30a98a115b9e0 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon May 4 18:54:27 2009 -0400 + + Mention utils.h in test/Makefile.am so that make dist will pick it up + + Pointed out by Julien Cristau. + +commit e047f605e2e9ef4f23e63c38259c5ceb720060dc +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon May 4 18:34:54 2009 -0400 + + Move all the GTK+ test code into its own file, utils.c + +commit c882260d4b481283c3d59385bfa31bfeffc0a58f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Apr 28 22:49:31 2009 -0400 + + Include string.h and fix warning in trap-test.c + +commit c74becfdb939af56d19b1d8cef94f3cfc11f238c +Author: Alan Coopersmith <alan.coopersmith@sun.com> +Date: Tue Apr 28 08:44:47 2009 -0700 + + Add solaris-hwcap.mapfile to EXTRA_DIST + + Signed-off-by: Alan Coopersmith <alan.coopersmith@sun.com> + +commit dbb72c02fda0d59d0da4ba100c7120ebc244835f +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon Apr 20 16:13:44 2009 +0300 + + Updated scaling test to support rgb565, source clipping, etc. + + Now test provides better coverage for various image scaling + cases. Unused byte for x8r8g8b8 format is ignored. Running + the test program without any command line options provides + a PASS/FAIL verdict based on the precalculated crc32 value + for using pixman with all the fastpath functions disabled, + which should simplify testing for correcteness. + +commit 880afeecc0d8bd610733292fd1cb692bba98dd5b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Apr 24 21:35:46 2009 -0400 + + Add trap-crasher.c test program + + Based off of Pavel Kankovsky's test case in bug 16560. + +commit eac663a7c8d254842224f1aed992c91691a425b8 +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Tue Apr 21 17:32:38 2009 -0400 + + Enable fbCompositeSolidMask_nx8x8888neon + +commit 99fd917adf9bf649fb94b32feae466250433a6ff +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Tue Apr 21 17:32:38 2009 -0400 + + Enable fbCompositeSolidMask_nx8x0565neon + +commit c0436ed25598bff4b6c426faf528e56726632ed2 +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Tue Apr 21 17:32:38 2009 -0400 + + Enable fbCompositeSrc_8888x8x8888neon + +commit f2b3dfcce4add4027163289c0a282430418a4857 +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Tue Apr 21 17:32:38 2009 -0400 + + Enable fbCompositeSrc_8888x8888neon + +commit 6da2f2b360225b6f1f20c00734db66499b94bfa0 +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Tue Apr 21 17:32:37 2009 -0400 + + Enable fbCompositeSrc_x888x0565neon + +commit b8625fa2dd0811092f6c96b31596277bb0c61021 +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Tue Apr 21 17:32:37 2009 -0400 + + Enable fbCompositeSrcAdd_8000x8000neon + +commit 452ed13867d4a7b1509030f7f79cbb17b51e7b36 +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Tue Apr 21 17:32:37 2009 -0400 + + Enable fbCompositeSrcAdd_8888x8x8neon + +commit f2af00bf02dcf3e7e27ac3e035d41c387fc9400b +Author: Ian Rickards <Ian.Rickards@arm.com> +Date: Tue Apr 21 17:32:36 2009 -0400 + + Add support for ARM NEON fast paths + + Currently disabled + +commit 9fdca26d3087da5a620d720f5a56ccbfdf55587e +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Tue Apr 21 17:32:36 2009 -0400 + + Add support for doing ARM simd detection on Windows + +commit 2423118e239e9c85dd68403bf8b97b30965df38e +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Wed Apr 15 10:38:37 2009 -0400 + + Add support for doing ARM simd detection on Windows + +commit 3d9716f44ea799e003c19783f087239fe89c88dc +Author: Søren Sandmann Pedersen <ssp@dhcp-100-2-40.bos.redhat.com> +Date: Thu Apr 9 20:38:53 2009 -0400 + + Post-release version bump + +commit 0a63858b07e1d7bccf54a69881e50246a530785d +Author: Søren Sandmann Pedersen <ssp@dhcp-100-2-40.bos.redhat.com> +Date: Thu Apr 9 16:38:05 2009 -0400 + + Pre-release version bump + +commit 86ec5419b238cba841bed303679fdaf8b4299a6f +Author: Søren Sandmann Pedersen <ssp@dhcp-100-2-40.bos.redhat.com> +Date: Mon Apr 6 15:25:03 2009 -0400 + + Fix search and replace issue + +commit 179750544d911adf5b01749e33b3ef44a62b0b0e +Author: Alan Coopersmith <alan.coopersmith@sun.com> +Date: Fri Apr 3 13:02:37 2009 -0700 + + Replace custom type ullong with standard uint64_t in pixman-mmx.c + + Signed-off-by: Alan Coopersmith <alan.coopersmith@sun.com> + +commit fd7449c4855e77d31770f9042ba43e186c541ce5 +Author: Alan Coopersmith <alan.coopersmith@sun.com> +Date: Fri Apr 3 12:40:10 2009 -0700 + + Fix MMX & SSE intrinsics to work with Sun compilers & Solaris + + Signed-off-by: Alan Coopersmith <alan.coopersmith@sun.com> + +commit 47dabe1d025c420a07ac940ab46e5d00c752d2d8 +Author: Søren Sandmann Pedersen <ssp@dhcp-100-2-40.bos.redhat.com> +Date: Sat Apr 4 06:04:42 2009 -0400 + + Fix pixbuf_from_argb32() to take premultiplied alpha into account + +commit fb8e9b16d5760aa82c1ca4c180faed964a4e7ff5 +Author: Søren Sandmann Pedersen <ssp@dhcp-100-2-40.bos.redhat.com> +Date: Sat Apr 4 05:57:20 2009 -0400 + + Add a new trap-test test program. + + Also some tweaks to the build system in the test directory to make it + build non-gtk+-using application when use of gtk+ is disabled. + +commit cdcbd9be3a2318bf650b8f56fea51acb5991b075 +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Wed Apr 1 18:26:24 2009 -0400 + + Set srcRepeat = False when using fbCompositeSrcScaleNearest + + NORMAL repeat was broken (the optimized function can handle repeat operation + itself and can be screwed up if 'pixman_walk_composite_region' tries to help it + by splitting the work into handling multiple separate areas). + + Splitting work into handling different areas does not work right for the + transform case (and it is never used for generic path). The point is that this + splitting only has full pixel precision at the moment, while correct blitting + needs to preserve some fractional part in calculations when moving from one + "tile" to another. + +commit a9adae3dc38764fe055b66e38175be5220fb3f9a +Author: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Tue Mar 31 20:58:56 2009 +0300 + + Image scaling regression test script + + This test script can help in finding regressions in image scaling + fastpath implementations. It uses test program compiled with + and without fastpath code and can compare results of execution + for different pseudorandom compositing operations involving scaling. + + Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com> + +commit 29e8556814ddddf269da90989e0de6d406d0afe6 +Author: Mark Kettenis <kettenis@openbsd.org> +Date: Tue Mar 24 14:28:38 2009 -0400 + + Add support for BGRA and BGRx formats. + +commit e92417b7805315ff38a3d5758bd7075418d3ae6d +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Mar 17 13:28:31 2009 +0000 + + Check for failure when intersecting regions. + + Need to check and report the failure of intersecting the rectangle with + the clip region during pixman_image_fill_rectangles(). + +commit 9ba3236354deb472edf109b6842a5b8749bd746c +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Tue Mar 17 08:39:48 2009 +0000 + + Check for allocation errors during pixman_op() + + Propagate the error returns from pixman_rect_alloc(). + +commit 2664c2d57552176052d753def2d307f63c2c9ff4 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Tue Feb 10 19:47:19 2009 -0500 + + Remove stale comment + +commit ced5a4f356f5f2322a8aeb2876348707cf56dbcd +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Feb 6 19:41:24 2009 -0500 + + Add license and copyright holders to COPYING + +commit 2d9c7cd84b276ebe2ff72d03c34a2d7f4f98b9f9 +Author: Thomas Jaeger <ThJaeger@gmail.com> +Date: Tue Jan 20 18:40:46 2009 -0500 + + Implement PIXMAN_REPEAT_REFLECT for images + +commit 3d0911dee7f1d00b1e61fb183ab337c693b49adc +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Feb 6 18:03:31 2009 -0500 + + Reinstate SrcScaledNearest optimization + +commit 6815e754d31d5a431028c8ca62911c07b753edc8 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Feb 6 18:01:57 2009 -0500 + + Bump version number post-release + +commit 6df6a43dc7119a510cf9db2e62fcc970a539e5a3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Feb 6 17:31:32 2009 -0500 + + Bump version number pre release + +commit 6e6c7ac5e0bce2e1893675eb45a8d98876085794 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Feb 6 17:30:24 2009 -0500 + + Comment out SrcScaledNearest optimization that hasn't been tested much + +commit e651118b67111d36193f55a752d13e66df5ca953 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Feb 6 17:29:04 2009 -0500 + + Fix release targets to remove all hashfiles before generating tar balls + +commit 072d848f592530973f1f0a0066a320ec5965625c +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Fri Jan 9 12:48:22 2009 -0500 + + Add pixman-matrix.c to Makefile.win32 + +commit c55db2905706ae78364bfb63dcfa62c00cc486c7 +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Fri Jan 9 12:48:20 2009 -0500 + + Conditionally include config.h in pixman-matrix.c to fix win32 build + +commit 8f98ffadf58de1e28294b3ab2c09f380ccc535e5 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Sat Dec 20 17:18:51 2008 +0000 + + Fix pixman-1-uninstalled.pc to point to the libtool library + + Otherwise we fail to link when compiling cairo against the uninstalled + library. + +commit 9d726712c22d8555d00b9f1ebacd5425dc9a5b61 +Author: Chris Wilson <chris@chris-wilson.co.uk> +Date: Fri Nov 21 01:20:38 2008 +0000 + + Allocate initial array of RegionInfo on the stack. + + The region validate() code is frequently called by cairo as it is used to + extract regions from the trapezoids for fast-paths through the drawing + code and also for fast-path clipping and the RegionInfo allocation (as + well as the pixman_rect_alloc during the final union) appears as a hot + spot on application memory profiles. + +commit 08530f5bf23386355a19b83db88173302c7a5300 +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Wed Dec 17 10:35:03 2008 -0800 + + Don't treat PIXMAN_TYPE_YUY2 and PIXMAN_TYPE_YV12 as PIXMAN_FORMAT_COLOR. + + Various pieces of code expect PIXMAN_FORMAT_COLOR (and its less cool older + brother, PICT_FORMAT_COLOR) formats to have ARGB bits, and the YUV formats do + not. + +commit 4546234c18f5bb5e2d193d2fa8ff5c3ca78bc716 +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Fri Dec 5 12:01:03 2008 -0500 + + [arm-simd] Add a comment about aligning source and destination pointers. + +commit 985829f26b15aaa3e336127412c771027577313f +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Fri Dec 5 11:45:03 2008 -0500 + + Check alignment of 'src' pointer in optimized ARM routines + + fbCompositeSrcAdd_8000x8000arm() tries to align 'dst' already but must check + 'src' too. Otherwise, the next 4-byte copy loop might access an odd 'src' address + causing an alignment trap. + + Patch from Enrico Scholz + +commit 4238047c228ca885a24bd341aa48a3ad54590837 +Merge: bfa76d4 d625ca5 +Author: Keith Packard <keithp@keithp.com> +Date: Tue Nov 25 22:04:29 2008 -0800 + + Merge commit 'origin/master' + +commit bfa76d47ac85c88fbb9d7226f09c6c6654b10342 +Author: Keith Packard <keithp@keithp.com> +Date: Tue Nov 25 22:03:55 2008 -0800 + + Bump to 0.13.3 after 0.13.2 release + +commit 0191d1a41ea273e5b1920ed83dfa33820870ebae +Author: Keith Packard <keithp@keithp.com> +Date: Tue Nov 25 21:37:54 2008 -0800 + + Bump version to 0.13.2 for release + +commit 6002963ea32d05592da05a6eeafd5d8ee9d9d496 +Author: Keith Packard <keithp@keithp.com> +Date: Mon Nov 24 11:49:32 2008 -0800 + + Move matrix operations from X server to pixman + + Signed-off-by: Keith Packard <keithp@keithp.com> + +commit d625ca5f291c01b3672648e5897f30a17326367f +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Mon Nov 24 15:11:24 2008 -0500 + + Optimize rectilinear nearest-neighbour scaling + + Add a special case for a source transformation that is only a scale and + preserves rectangular pixels and doesn't rotate the image. Currently, only + SOURCE is special cased, however I plan to do more work in this area as needed. + The biggest advantage the specialization currently has is writing directly to + the destination surface instead of a temporary scanline buffer. However, it is + still pretty unoptimized but I want to keep things simple for now. + +commit 0c3dd54f6bf02156e4b94a2b5bfadef148715643 +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Fri Nov 21 21:35:30 2008 -0500 + + Unify scanline buffer types + + Change the type of the stack based scanline buffer to uint8_t to match the rest + of the variables. Also premultiply the scanline buffer size by sizeof(uint32_t) + because the bpp can be either sizeof(uint32_t) or sizeof(uint64_t). + +commit e201504da81cd9ceb7d20d12b3a923289f93b108 +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Thu Nov 20 10:41:40 2008 -0500 + + [arm-simd] Fix typo found by 王新拓 + + 'and r7, %[upper_component_mask]' appears to by a short hand for + 'and r7, %[upper_component_mask], %[upper_component_mask]'. Use + the explicit form to avoid any confusion. + +commit cd2a79ab81045aa7e35bc901081e57dea6ac4845 +Author: David Woodhouse <dwmw2@infradead.org> +Date: Tue Nov 18 16:01:11 2008 -0500 + + Less fragile Linux altivec detection + + Instead of using really fragile SIGILL trapping, use a more reliable + detection method by checking what the CPU really supports. + + https://bugzilla.redhat.com/show_bug.cgi?id=472000 + https://bugzilla.redhat.com/show_bug.cgi?id=451831 + +commit b1b0507c24d7a3afb1ee09fc23783fa22cd0e56e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Nov 10 23:18:09 2008 -0500 + + Make comments about PIXMAN_REFLECT more useful + +commit 056c6d97db753a928ac2794ec215c86cceffe901 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Nov 5 23:58:56 2008 -0500 + + Various formatting fixes, and a simplification of the adjusting code + +commit 607562b2a6cc8536350d0a9bcb6fe99224ad4f1f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Nov 5 17:51:55 2008 -0500 + + Inline the fetchers + +commit 7a1717e605e502b52ebca999991d2e07791e0cd1 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Nov 5 17:23:45 2008 -0500 + + Use fetch_nearest() instead of having duplicated code + +commit 49647e705438e0827d4a0e955dfaaf01cfae7524 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Nov 5 17:12:22 2008 -0500 + + Do the fetch_bilinear inline rather than in separate functions + +commit c8b314c43bd70a1c375aef3cacfe717ca9dbc85b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Nov 5 16:59:56 2008 -0500 + + Make use of fetch_bilinear() in the various bilinear implementations + +commit bad1ee39d3cc27ec07303f6484515a886430cda6 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Nov 5 16:38:33 2008 -0500 + + Add fetch_bilinear function + +commit 95f2af9584f8f4327ddf6d6948dee17ab48ad8b3 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Mon Nov 3 13:09:02 2008 -0500 + + Add missing pixman-arm-simd.[ch] files. + + Pointed out by Chris Ball and Adrian Bunk. + +commit 1d5bb7a3f17fb88cdabee8a27b79fb9fb129e189 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Oct 31 15:08:03 2008 -0400 + + Inlucde inttypes.h on digital Tru64 + + Bug 18007, reported by Joonas Pihlaja. + +commit f9db3ec7b94db45f388b210d7bed639048f1aa23 +Author: Adrian Bunk <adrian.bunk@movial.fi> +Date: Fri Oct 31 14:59:30 2008 -0400 + + Rename the current ARM code to ARM SIMD + + This code is only for CPUs supporting the SIMD instructions, not for all ARM + CPUs. + + I stumbled above the recent commit with the ARM SIMD code while preparing a + patch that models the patch from #13445 after the MMX and SSE2 cases: + + The ARM SIMD option currently uses --disable-arm, although this code is only + for CPUs >= ARMv6. That's as if one would call the option to disable the SSE2 + code --disable-x86. + + This patch therefore renames the configure option and the function and file + names to arm-simd/arm_simd. + +commit b9e2dd783e34969aa7c41877b0aa49920788637b +Author: Alan Coopersmith <alan.coopersmith@sun.com> +Date: Wed Oct 29 18:13:59 2008 -0700 + + Define force_inline for compilers other than gcc & MS Visual C + +commit 3a775610f22d09fc030804c6dd02208d36920b54 +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Wed Oct 29 17:14:41 2008 -0700 + + Cleanup inline definitions + + Replace all inline definitions with a common one in pixman-private.h. Also, add + 'force_inline' and replace all existing uses of 'inline' as a forced inline + with 'force_inline'. + +commit d68ebb7701e61d7f19c87d2d3686eb30e85e2ede +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Oct 15 18:46:38 2008 -0400 + + Remove unused AddMul_256 macro + +commit 8f5cb6916ddea072e453681d80a879a64bd22f8c +Author: Benjamin Otte <otte@gnome.org> +Date: Mon Oct 13 00:05:40 2008 +0200 + + unswitch red and blue + + The previous code assumed a color format of ABGR when naming the + variables. The true color format is ARGB. This did not cause any bugs + because no functions rely on the order of colors so far. This patch + renames the variables just to avoid confusion. + +commit f5d4e01c399d6d23fd7e4cfaa26e0b07e2279690 +Author: Benjamin Otte <otte@gnome.org> +Date: Wed Oct 8 21:21:58 2008 +0200 + + update .gitignore + +commit 69dadf231283fadcb117b4d9e799e8633a0e4dab +Author: Benjamin Otte <otte@gnome.org> +Date: Wed Oct 8 21:18:12 2008 +0200 + + force alignment of arg pointer in potential SSE users + + Fix for bug 17824 + +commit 1c5de7b05831b3d66821707276b71974a232f5c7 +Author: David Müller <dave.mueller@gmx.ch> +Date: Tue Sep 23 07:45:51 2008 -0400 + + Move _mm_empty() to correct place + + The "fbComposeSetupSSE2()" function is guarding most of its code + depending on the capabilities of the CPU, but unfortunately the call + to "_mm_empty()" is not part of this code path but executed + unconditionally. This results in a "illegal instruction" crash on + non-MMX / non-SSE capable CPUs caused by the the "emms" instruction + (embedded in "_mm_empty()"). + + Fix bug 17729. + +commit aadcc7f011004794cf88c126641ef8258183878f +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Sun Sep 21 11:01:07 2008 -0400 + + Update TODO + +commit 9cb60e142bad01fd54cb7e6f3fa2504ddc87a7da +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Sun Sep 21 11:00:33 2008 -0400 + + Make use of SSE2 blt/fill in more places + +commit d69d2705d16c813756acd8a685dc9a28a178423a +Merge: b5a9002 412b0d5 +Author: Julien Cristau <jcristau@debian.org> +Date: Fri Sep 19 10:29:05 2008 +0200 + + Merge tag 'pixman-0.12.0' + + Conflicts: + + configure.ac + +commit b5a9002d2e3f0d961bcfe2acee6e9a0f1c583d5d +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Thu Sep 18 19:28:31 2008 -0400 + + Inline fetch functions in pixman-transformed.c + + There was significant performance overhead associated with indirect + call to the fetch functions, so inline them. + + Also, the old code assumed that if the number of rectangles in the + source clip was 1, then the clip was identical to the image + boundaries. Fix that by running a full region_contains_point() + whenever the source clip is different from pict->common.full_region. + + Based on a patch from Antoine Azar. + +commit a57e7bf34a2f312285c8065b8b1328bd5650788a +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Thu Sep 18 11:44:36 2008 -0400 + + Fix inner branch code. + + The entire source must be 0 not just the alpha component. + Fix some comments too. + +commit 54ee41a6603441bf09bbc95c2fec3d0a41c82d0f +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Wed Sep 17 14:38:47 2008 -0400 + + Post release version bump + +commit 24de3b74f4db3db569e1edface5e5804b58c02b5 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Wed Sep 17 14:03:23 2008 -0400 + + Pre-release version bump + +commit 7180230d4d87c55dfef1e17a0cc3b125d45aa3a0 +Author: Vladimir Vukicevic <vladimir@slide.(none)> +Date: Wed Sep 17 16:01:31 2008 -0400 + + Add SRC x888x0565 C fast path + +commit d0b181f347ef4720d130beee3f03196afbd28aba +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Wed Sep 17 15:53:20 2008 -0400 + + Add support for ARMv6 SIMD fastpaths. + +commit 412b0d5cbc2c0a5200649cbb0b5e26f8b874437d +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Wed Sep 17 14:03:23 2008 -0400 + + Pre-release version bump + +commit 3f5d6f90b753175a888f36a93d1e79fdc80d95de +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Wed Sep 17 09:50:57 2008 -0400 + + Don't include stdio.h + +commit eba402092082bf48072671e04e224589af872acd +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Sun Sep 14 14:58:00 2008 -0400 + + [sse2] Fix rounding bug in conversion from 565 to 8888 + + When converting from 565 to 8888, replicate the topmost bits instead + of appending zeros. + +commit 6f00d98f87c019849c611d27e9593c5eecfef4c2 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Tue Sep 9 10:49:56 2008 -0400 + + Fix for bug 17477. + + over_2x128 was changing the alphaLo and alphaHi arguments, causing + stripes. + +commit bf76505cc6cc5e54c25eb145748e9e364fb367e9 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Sun Sep 7 00:40:09 2008 -0400 + + Update TODO + +commit da18a5675b3107c9bf99e228d85619d247fa19a6 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Sun Sep 7 00:13:10 2008 -0400 + + Extend clip-test to demonstrate a bug in source clipping + +commit 00f3d6ef22b5a062323208fd540a17ca65dca42b +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Sat Sep 6 23:49:25 2008 -0400 + + Fix bug in pixman_image_is_opaque() + + Non-repeating gradient images would be reported as opaque. Also add + new test program to test source clipping. + +commit d5b4fd7e11c2f2b2e8ab3cb95bef252ce142982e +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Sat Sep 6 06:17:32 2008 -0400 + + Update RELEASING + +commit 35fcdf352a29241f235f2bc7a692c20ad8baf240 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Sat Sep 6 06:15:31 2008 -0400 + + Bump release + +commit 5e7388540f2cd201331cb3d1f616e3c300dbc45f +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Sat Sep 6 05:14:18 2008 -0400 + + Check for __sun || __sun in pixman.h. Update TODO + + Reported by Bernd Nies. + +commit f369d612b3d65529e4b10d8a0b1e015407357d9b +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Sat Sep 6 04:33:16 2008 -0400 + + Use error instead of #error in a couple of other places + +commit f921c8c57ffdd6d0afd3d41d50e3565084ebd49c +Author: Peter O'Gorman <pogma@thewrittenword.com> +Date: Fri Aug 15 15:00:24 2008 -0500 + + Minor portability fixes + + Use AC_C_INLINE to figure out `inline'. + IRIX compiler does not exit with a non-zero exit status when it sees #error + +commit e2cbe1a0a4db750ab05d804901f155adb312746b +Author: Frédéric Plourde <frederic.plourde@polymtl.ca> +Date: Thu Sep 4 16:30:21 2008 -0400 + + Win32 build system fixes + + Signed-off-by: Søren Sandmann Pedersen <sandmann@daimi.au.dk> + +commit ed862f1b2f62ee27884b9b429c54162039f3cb10 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Thu Sep 4 16:21:08 2008 -0400 + + Make sure pixman-combine{32,64}.h are disted + +commit f9d3f372f907c997abe4c4a65cc4a3dbe0bb41e2 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Aug 24 00:40:16 2008 -0400 + + Rename pixman-sse.h pixman-sse2.h + +commit fdff58cfa2ed77d2ceb38f48711ac5c91513aab1 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Aug 23 23:59:49 2008 -0400 + + A few other renamings of SSE->SSE2 + +commit 9bfa8aaf17b256d90832802dcd386c05b904b97e +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Aug 23 23:54:24 2008 -0400 + + Be consistent in naming SSE2 related things SSE2 + +commit 00841cb314a3b737dc5f492e113f36c19ba336e1 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sat Aug 23 23:42:36 2008 -0400 + + Remove use of MMX extensions. + + They were never a big improvement anyway, and now that we have SSE2 + support, they would only be useful on Pentium IIIs, and only if + explicitly enabled, which most distributions couldn't do anyway. + +commit 3cd6acfe74dabfbc18af49bed077c2a39b9a742d +Author: Andre Tupinamba <andrelrt@gmail.com> +Date: Thu Aug 21 14:43:17 2008 -0700 + + [sse2] Change pixman-sse to pass __mm128i args as pointers, so as not to confuse MSVC + +commit 9b9f7b59e5ce17735157ca9b154e8bc545f5c96b +Author: Ginn Chen <ginn.chen@sun.com> +Date: Thu Aug 21 14:21:01 2008 -0700 + + Use hidden attribute for private functions when compiling with Sun Studio + + https://bugs.freedesktop.org/show_bug.cgi?id=17183 + + Signed-off-by: Alan Coopersmith <alan.coopersmith@sun.com> + +commit e78eee87319e0290025c9d87bffe379bec440587 +Author: Vladimir Vukicevic <vladimir@pobox.com> +Date: Tue Aug 19 11:57:53 2008 -0700 + + Add sys/inttypes.h include for AIX + +commit cb9d5750582ea93ef1902a5185164088cdaa0140 +Merge: daf1745 dfe1f63 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Thu Jul 31 13:32:59 2008 -0400 + + Merge branch 'master' of sandmann@git.freedesktop.org:/git/pixman + +commit daf17450607e533dc590b4673c88241862b6b138 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Thu Jul 31 13:32:38 2008 -0400 + + Bug 16921. MMX and SSE2 intrinsics not enabled when compiling with Intel's icc + + Remove GCC specific inline-growth flags, and use __force_inline__ in + pixman-sse2 instead. Based on patch by Matt Turner. + +commit dfe1f63f709fc711c15d1be317dc1404d72b3efd +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Fri Jul 25 10:14:29 2008 -0700 + + Add depth 30 formats to pixman_format_supported_source. + + Thanks to Julien Cristau for pointing out that these were missing. + +commit db3fb5eb605c4e1a6fcb93902389a22fc496151c +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Mon Jul 21 22:06:02 2008 -0400 + + Don't require GCC 4.2 on x86-64 + +commit 53fa7133fdf7f5879f10847228f478983c480b79 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Mon Jul 21 21:55:55 2008 -0400 + + TODO + +commit 58ab45b85d1732da7c84a274acdca3bfcf1c36b1 +Author: André Tupinambá <andrelrt@gmail.com> +Date: Mon Jul 21 21:53:20 2008 -0400 + + Use CopyAreasse2, plus a compatibility fix + +commit 51576cda8ba830c0d7dbbeaebb79fbef276f7e2d +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jul 18 14:49:35 2008 -0400 + + Fix typo in sse2 configure logic + +commit 596218e488fc92c5a03d07892015a106dcd541b4 +Author: Frederic Plourde <frederic.plourde@polymtl.ca> +Date: Thu Jul 17 13:51:31 2008 -0700 + + Win32 Makefile fix + +commit f729457da5de4a96ccd220ce71c583cdec971483 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Thu Jul 17 14:13:34 2008 -0400 + + Update RELEASING and release targets in Makefile.am + +commit 72045eabd57cf793266424c39246b435b655b19f +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Thu Jul 17 14:07:23 2008 -0400 + + Post-release version-bump + +commit 2a14ecf1cd07e9a3f367ea66a7b10da929ccc06d +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Thu Jul 17 13:38:57 2008 -0400 + + Pre-release version bump + +commit 411c0e990f7a96d4e15f2cbbe07d3b50b6a20f95 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Thu Jul 17 13:37:56 2008 -0400 + + Make a couple of functions static + +commit 96f57c07f24cd6d86c0aad624bd1a16b85f08e04 +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Thu Jul 17 13:32:45 2008 -0400 + + Speed up fbOver + + Use FbByteMulAdd to operate on two components at a time and force the function + to be inlined. + +commit d4855cd9d028c49c2a12f61c6227579fcb8af5b6 +Author: Jeff Muizelaar <jmuizelaar@mozilla.com> +Date: Thu Jul 17 13:32:08 2008 -0400 + + Comment FbByteMulAdd + +commit dcbe4b3f80fb8dc6a83efa7e66a891f8b102608d +Author: Julien Cristau <jcristau@debian.org> +Date: Wed Jul 16 22:03:29 2008 +0200 + + fix --enable-{mmx,sse2,vmx} + + If --enable-foo is used, don't treat it as --disable-foo, and + error out if the appropriate compiler support isn't detected. + +commit e21f00db50c30e80c8969fbf63982aeb7434dc1d +Author: Damien Carbery <damien.carbery@sun.com> +Date: Wed Jul 16 15:47:38 2008 -0400 + + Add pixman-1-uninstalled.pc file + + Signed-off-by: Søren Sandmann <sandmann@redhat.com> + +commit 1570746350baf82d36aeb7ea1702350700b795d7 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Wed Jul 16 15:19:17 2008 -0400 + + Use -no-undefined -- fixes bug 15927 + +commit 99fead412e7602daf193035831b952e252dac0e1 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Wed Jul 16 15:07:30 2008 -0400 + + Use <inttypes.h> on most types of Unix. + + Fix for bug 15560. + +commit c5bb4ab8fcb784d479c71996f2ff081374741d43 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Wed Jul 16 14:56:48 2008 -0400 + + Update TODO + +commit 2070f10db927a97d7d38024e607093f5a6e00291 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Wed Jul 16 14:30:46 2008 -0400 + + Remove unused pixman-combine.c + +commit 9d54568d5f93701313fdbb49c1ef9dc79fe5850c +Author: André Tupinambá <andrelrt@gmail.com> +Date: Wed Jul 16 14:29:29 2008 -0400 + + Fix SSE2 bug where x888 pixels were treated as 8888 + +commit 7dfd023e94d9ab5940d7062c2e5cde11ab02f43e +Author: Benjamin Otte <otte@gnome.org> +Date: Wed Jul 16 20:41:02 2008 +0200 + + fix Altivec detection + + the old code used to cause infinite looping on G3 machines. + +commit 0a92401678286eb438fe24979fd032efba540a0a +Author: Behdad Esfahbod <behdad@behdad.org> +Date: Tue Jul 15 16:49:32 2008 -0700 + + Fix fbCompositeSrc_x888xnx8888mmx to properly ignore source alpha + +commit 687176023caf53f2fe234827d152f270048b1cd3 +Author: Julien Cristau <jcristau@debian.org> +Date: Tue Jul 15 16:59:21 2008 +0200 + + make --{en,dis}able-gtk work correctly + + --enable-gtk had the same effect as --disable-gtk. + Now we check for it by default, and error out if we can't + find it and it was explicitly enabled. + +commit 9d97716d2fd91d5611a5e7a7b29b38887b672e9f +Author: André Tupinambá <andrelrt@gmail.com> +Date: Tue Jul 8 14:15:27 2008 -0400 + + Fix bug 16310 in the SSE2 fast path in function fbCompositeSolidMask_nx8888x0565Csse2 + +commit 47b95be38ae0ebc61963c0b3b4dddb240a59c18a +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Tue Jul 8 14:10:56 2008 -0400 + + Update TODO + +commit eb53d111bd8651cd00c1b728b09e8be09b8482f7 +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Wed Jun 25 17:55:28 2008 -0700 + + Add a lossy 32-bit generic pixel fetch function for wide surfaces. + + The transformed fetch path currently only works at 32-bit precision. Until a + wide version of that function is added, we need to have a path to perform a + wide FetchPixel and then contract it down to a8r8g8b8. + + Also, use the right format when expanding the result in fbFetchTransformed64. + fbFetchTransformed returns a8r8g8b8 results. + +commit 3c43b869f3821495978c61b9195f0b0e9e9e1245 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Fri Jun 27 01:11:30 2008 -0400 + + Unexport or delete various functions + + - Delete pixman_region_append() + - Delete pixman_region_empty() + - Make pixman_region_validate() static and don't export it. + + Reported by Julien Cristau. + +commit 36b05f2cac7fa28e31131ca0d6b8b133ccd95ab8 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Fri Jun 27 01:05:30 2008 -0400 + + Only export the 16 bit version of pixman_region_set_static_pointers() + +commit ab4d45806aacf6208820af07b42852c9e4c89fff +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Wed Jun 25 09:46:12 2008 -0400 + + Update versioning documentation + +commit a3c12f4ccfda470574bfe8b7796c0b90237dabe0 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Wed Jun 25 09:36:13 2008 -0400 + + Update release instructions + +commit ca6edbee41e2e4bd18e3615a86bae441822b90e3 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Wed Jun 25 09:24:00 2008 -0400 + + Explicitly use my GPG key for signing. + +commit c48e3bc5ccf3a2f0aa4cadbaa4a0ae968a5d5a91 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Wed Jun 25 09:16:09 2008 -0400 + + Post-release version bump + +commit d8e5ff20f12c52a32dcf0543ab436eb7194b794c +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Wed Jun 25 07:58:21 2008 -0400 + + Pre-release version bump + +commit 4a9df4be7e384cf18e3d7a65d1e0023d2e2a280f +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 22 13:44:23 2008 -0400 + + TODO + +commit a766b62880108f278478888f5167a5fbf2819a97 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Sun Jun 22 13:42:21 2008 -0400 + + Add configure time options to disable mmx/sse2/vmx + +commit 02268903e4311709744c11e495f9b17f171ec5e9 +Author: David Sharp <whereami@gmail.com> +Date: Thu Jun 19 20:23:33 2008 -0700 + + pixman-sse.c: silence pointer-cast compiler warnings. + + Cast pointers to words of the same size, not 32-bits. + + Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com> + +commit 534e65d54831018b47c169932a04224e5ba53cb8 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jun 13 16:16:59 2008 -0400 + + Update TODO + +commit eb2d95de98683a387153f010077ad9c3c3b1b01d +Author: Luo Jinghua <sunmoon1997@gmail.com> +Date: Sat Jun 14 09:07:22 2008 +0800 + + Fix implicit declaration of function 'free'. + + Otherwise pointer will be truncated on 64bit arch and your programs will crash. + +commit 29d144712e558aaeb49f4384028dd669d76a410b +Author: Maximilian Grothusmann <maxi@own-hero.net> +Date: Fri Jun 13 12:44:50 2008 -0700 + + Fix memory leak by freeing boxes{16,32}. + + After calling pixman_region_init_rects() or + pixman_region32_init_rects(), boxes{16,32} were not freed before + returning. Fixes bug 16312. + +commit 5d32519316b40b35113c6df9e15d955a16709ba2 +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Fri Jun 13 09:52:53 2008 -0700 + + Use pixman_malloc_ab instead of plain malloc for the fbStore64_generic scratch buffer. + +commit b1c70c4e6435d7f15751111828c381feb1d139cf +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Jun 13 00:25:45 2008 -0400 + + Move PIXMAN_FORMAT_16BPC to pixman-private.h + +commit 7fa966df0ee781fa486715710f389b148c11d36e +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Fri Jun 6 23:02:51 2008 -0700 + + Decide based on the image formats whether we need wide compositing. + + Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com> + +commit 7cb735c9c0fa55ae1f4d8d13da9f33e3da2ae8fe +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Fri Jun 6 19:40:25 2008 -0700 + + Take the source format into account in pixman_expand. + + Extract the original bits of the source image for each component and then + replicate up to 16 bits to fill the wide components. Make sure to hard-code the + alpha value to 1 if the source format didn't have alpha. + + Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com> + +commit c0d98e96605c6d03f4b02f337f2f5827165bb092 +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Fri Jun 6 18:51:48 2008 -0700 + + Make expansion and contraction loops clearer. + + Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com> + +commit fc0b28bf6af81428b7ac045614eea97fbf9c4a70 +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Fri Jun 6 18:05:15 2008 -0700 + + Add wide source picture, external alpha, and transformed image routines. + + The wide external alpha path should work correctly with wide formats. The wide + transformed fetch code for now just does a 32-bit fetch and then expands, which + will lose precision. Source pictures, for now, are evaluated at depth 32 and + then are expanded to depth 64. We could get higher precision by evaluating them + directly at depth 64, but this should be good enough for now. + + Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com> + +commit 42a3a2d6fc8d3e521c6914ff8fb89f1fbc673e28 +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Fri Jun 6 15:45:17 2008 -0700 + + Add wide fetch/store functions. + + Use the narrow ones and expand/contract where necessary. Still need wide paths + for fancy pictures, but this gets the basic stuff working. + + Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com> + +commit 7591d3f1d7e0884f9362018edf720724095cf380 +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Fri Jun 6 15:30:52 2008 -0700 + + Add expand and contract functions to convert between ARGB8 and ARGB16. + + The expansion function isn't quite correct, but gives reasonable results. + + Signed-off-by: Søren Sandmann Pedersen <sandmann@redhat.com> + +commit bae5db893f20eaf9c8ec56057617a8ce800cce48 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Mon Jun 9 13:06:50 2008 -0400 + + Post-release version bump + +commit 88e02d36792d3ad2615a775c9d21136c760087ef +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Mon Jun 9 12:32:56 2008 -0400 + + Pre-release version bump + +commit fe43a2c1638ee75ca6bbc437a59461cc5c2b6f46 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Mon Jun 9 12:28:05 2008 -0400 + + Add back prototypes for pixman_version() and pixman_version_string() to pixman.h + +commit fd380c7764c9cbc05f070b178a4b38b342503471 +Author: Julien Cristau <jcristau@debian.org> +Date: Mon Jun 9 12:01:37 2008 +0200 + + Add a couple more missing PIXMAN_EXPORTs + +commit f8f3b454a8dc4f91b4aff0e3c7ab28d65383ec8f +Author: Julien Cristau <jcristau@debian.org> +Date: Mon Jun 9 11:53:23 2008 +0200 + + Add PIXMAN_EXPORT for pixman_region_equal + + This used to be exported, so removing the export broke ABI. + +commit 6e8f785ae7b629f5f81627fdb61f07d99df0b9ba +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Mon Jun 9 02:43:37 2008 -0400 + + Post-release version bump + +commit 26b486b9a9e599a5b45a73f8b3ec72f9a3560cfb +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Mon Jun 9 01:59:56 2008 -0400 + + Bump version number + +commit 95e749168d08cd35e151e612404c9318dcb0df9e +Merge: f7f5da7 ae1f016 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Mon Jun 9 01:57:26 2008 -0400 + + Merge region32 branch; fix conflicts + +commit ae1f016d404d323158072499f83185d4e83eaedb +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Mon Jun 9 01:53:03 2008 -0400 + + Unbump version number + +commit f7f5da7fc446c3cadb3f82a2ddc6ae57f4c4b795 +Author: Søren Sandmann Pedersen <sandmann@daimi.au.dk> +Date: Mon Jun 9 01:52:36 2008 -0400 + + Unbump version number; remove file added to EXTRA_DIST + +commit e84db894933b25ef9ab83a6c15a0521e15d61bae +Author: Søren Sandmann <sandmann@redhat.com> +Date: Mon Jun 9 01:29:14 2008 -0400 + + Bump version number and make it distcheck + +commit 7145d6dbdab79930ed923ae9551501127be28864 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Mon Jun 9 01:29:14 2008 -0400 + + Bump version number and make it distcheck + +commit a76826f19893a0575c0e16079fc1ec6d33171594 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Mon Jun 9 01:03:15 2008 -0400 + + Fix uses of short variables in pixman-region.c + +commit 85bad33380b7b78bdd6a7238c1439bcc42e28231 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Mon Jun 9 00:51:28 2008 -0400 + + Add infinite loop test + +commit 62e9b4d6cfcbc10046539b8e9643691bb02bea39 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Mon Jun 9 00:00:07 2008 -0400 + + Export pixman_compute_composite_region32() and use it in walk_region + +commit 57819ae3c219252db98df5eefa36499a6b77970c +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 23:53:35 2008 -0400 + + Fix bug in pixman_compute_composite_region() + + It was using the output region as the input. Add and use + pixman_region16_copy_from_region32(). + +commit 664b891aac50642d6d2ab6c482f4765029ae9b91 +Merge: 0b4c6dc 2b91152 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 23:39:29 2008 -0400 + + Merge branch 'master' into region32 + +commit 2b9115293e5fca70ca9ffe44ef74c80885dcedbb +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 23:39:03 2008 -0400 + + Call _mm_empty() at the end of fbComposeSetupSSE + +commit 0b4c6dcefd63a43aa9bb6556017e259589116522 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 22:48:59 2008 -0400 + + Add pixman_image_set_clip_region32 + +commit 703f82cd02f5224632b4b7f7f3f072067fa4f76d +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 22:20:44 2008 -0400 + + Use 32 bit regions internally + +commit de150bf82fbe0e346fa38eae10a5bd43538bb3d9 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 22:07:46 2008 -0400 + + Add pixman_region32_copy_from_region16 + +commit e30f7e2eb56b53667ee83e2cad942f171a9486a0 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 21:43:01 2008 -0400 + + Some formatting fixing + +commit cb7cc369f500a7828dc3c9935d8d82af47573df5 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 21:41:54 2008 -0400 + + Move all the PIXMAN_EXPORT into .c files + +commit 890f1a4280af4c7b8d8913ba592a9dd617482463 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 21:13:52 2008 -0400 + + Move PIXMAN_EXPORT into pixman-region.c + +commit 7a32c864e95d35e13d5473f5519639d91f62e20a +Merge: 81369a4 1248418 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 20:14:52 2008 -0400 + + Merge branch 'master' into region32 + +commit 1248418854b0e6e2f7fa8c2760a05b6604d3ded7 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 20:01:37 2008 -0400 + + Fix forgotten use of pixman-combine.h + +commit fb8f17fdf1eaec4ab8edba1486bfa83c0965d738 +Merge: 9267b0b 1063933 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 19:55:43 2008 -0400 + + Merge branch 'vmx' + +commit 1063933bacb8b5d06b42b7b06a116339ce7c1f0c +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 19:55:35 2008 -0400 + + Rename pixman-combine.h -> pixman-combin32.h + +commit 81369a4c4f09bc6cceef173947df4c9a78952c7a +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 19:32:15 2008 -0400 + + Fix up types in pixman32-region.c + +commit 4cf2b696266214482dad6e40184b74e1161e8ea7 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 19:31:31 2008 -0400 + + Add prototypes for 32 bit region methods + +commit 7d0c507b084e78217145ab82c35d579e5c8c26db +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 19:31:04 2008 -0400 + + Add pixman-region32.c + +commit c22df027d6b9c9957e73ea9e34f14d504deb9d91 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 19:24:28 2008 -0400 + + Add prefixes to global primitive regions + +commit cab6a175ec4a1b29b51060293aacaf2cf2f987a0 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 19:20:27 2008 -0400 + + Delete more unused stuff + +commit 9395f08d6e6316ee853a976b806e57821e13991e +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 19:16:55 2008 -0400 + + Delete more unused stuff + +commit b00fcb6d627370930daec9d1e9601af7ce28944c +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 19:15:41 2008 -0400 + + More data types; remove unused stuff + +commit 4bdcd3bdb1223d5e611af9721e2eceb7e867b138 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 19:12:29 2008 -0400 + + Add point_type_t + +commit abf6b6ca6ce8f54cb8ba9d34570d4cdf0537bbd2 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 19:11:24 2008 -0400 + + Replace pixman_box16_t with box_type_t + +commit 68ccaa06751e76b9d9c70a7c0b8e9b22cf7d6f62 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 19:07:30 2008 -0400 + + Add pixman-region16.c; compile that instead of pixman-region.c + +commit 149477457c9463e22350c15ccfca5ddf8a78e7af +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sun Jun 8 19:05:43 2008 -0400 + + macroize pixman-region.c + +commit 567b4c255050ee3cc2dd0c03fb091d1f981332eb +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Sat Jun 7 19:38:01 2008 +0200 + + Use sigaction instead of signal to restore the previous handler + +commit 7ef19261ee5bb4c78ca55533c67e1f267faed61e +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Sat Jun 7 19:28:10 2008 +0200 + + Use combine macros from the generated header + +commit 795fd8a4c0f9417fb92beaff8595064c573b7652 +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Sat Jun 7 19:25:09 2008 +0200 + + Split combine.inc generated files in source and header + +commit 9267b0bb549044591f28124a86e169b2235339b6 +Author: Dimitrios Apostolou <jimis@gmx.net> +Date: Tue Jun 3 16:33:58 2008 -0400 + + Fix compilation on SGI + + Signed-off-by: Søren Sandmann <sandmann@redhat.com> + +commit 8ef3f49a9580fb148c2e5f567c0aafddd4b0f136 +Merge: 27b753c 9a6d3a1 +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Sun Jun 1 16:37:52 2008 +0200 + + Fixup + +commit 9a6d3a1dcf89fc04f71a9dfed1aeeda1e3fb83bc +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Fri May 2 17:33:17 2008 -0700 + + Fix wide alpha fetch macro. + + Signed-off-by: Soren Sandmann Pedersen <sandmann@redhat.com> + +commit 86ed05b0f93505c136fb279fa4529596fc7c682a +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Wed Apr 23 17:30:14 2008 -0700 + + Use wide compositing functions when wide == 1. + + Signed-off-by: Soren Sandmann Pedersen <sandmann@redhat.com> + +commit 4e2d2546b79354a1accff8614d50eb8f75a15c98 +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Tue Apr 15 13:58:27 2008 -0700 + + Add infrastructure for allocating wide scanline buffers. Not yet used. + + Signed-off-by: Soren Sandmann Pedersen <sandmann@redhat.com> + +commit 598334a15723dc3857d1e932c17365a1f8c5f094 +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Tue Apr 15 13:13:46 2008 -0700 + + Split fetch/fetchPixel/store proc types into 32-bit and 64-bit versions. + + Signed-off-by: Soren Sandmann Pedersen <sandmann@redhat.com> + +commit 4a7e1676fd381bda53ece2f13204fbe568e07b0d +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Fri Apr 4 14:43:48 2008 -0700 + + Move the scanline buffer allocation logic into pixman_composite_rect_general. + + Pass the src, mask, and dest buffers into pixman_composite_rect_general_* as + void* pointers since those functions should not do pointer arithmetic. + + Signed-off-by: Soren Sandmann Pedersen <sandmann@redhat.com> + +commit 30bdbbed073750efa0a0c1e3f68925196e118953 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Thu May 15 01:07:59 2008 -0400 + + Update TODO + +commit d71bfc7777a48edfeb0aee0e3da97ac700a3dc90 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Thu May 15 01:00:28 2008 -0400 + + Work around GCC bug causing crashes in Mozilla with SSE2 + + When using SSE2 intrinsics, gcc assumes that the stack is 16 byte + aligned. Unfortunately some code, such as Mozilla and Mono contain + code that aligns the stack to 4 bytes. + + The __force_align_arg_pointer__ makes gcc generate a prologue that + realigns the stack pointer to 16 bytes. Fix bug 15693. + +commit 1f275b4bdc28b121f4afd3cfd2df187bebedf35d +Author: Søren Sandmann <sandmann@redhat.com> +Date: Wed May 7 22:27:56 2008 -0400 + + Add support for 8 bit fills in pixman_fill_mmx() + +commit a56df9149d2af0414ba8311b413dbc2f01c94427 +Author: Richard Hult <richard@imendio.com> +Date: Sat May 3 15:03:00 2008 -0400 + + Add --disable-gtk switch to configure + + Signed-off-by: Søren Sandmann <sandmann@redhat.com> + +commit 9aa1c4e5d2db3793ae6e301496fe39475de84382 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Fri May 2 21:10:07 2008 -0400 + + Update TODO + +commit 1e1c8a1f52183edecfea346257468681d6a88c36 +Author: Oswald Buddenhagen <ossi@kde.org> +Date: Thu May 1 10:50:58 2008 +0200 + + fix pixman compile with srcdir != builddir + + Signed-off-by: Alan Coopersmith <alan.coopersmith@sun.com> + +commit 510ee5d8c23e2d9133dc83634e15b7660f411a1f +Author: Thomas Zimmermann <kuhundbaer@web.de> +Date: Fri Apr 25 22:13:24 2008 -0400 + + Remove trailing comma from enum (bug 15364) + +commit 3cea659d72bc3429a8766e63c66346e76e61c9b5 +Author: Alan Coopersmith <alan.coopersmith@sun.com> +Date: Thu Apr 24 18:10:56 2008 -0700 + + Fix pixman/Makefile.am to work with Solaris make + +commit 27b753c9deabe5ac775021abfae98a6a1830cfc2 +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Thu Apr 24 01:08:29 2008 +0200 + + Remove unused macro + +commit 584118fb6c15d695b6a203c2df51411958957880 +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Thu Apr 24 01:06:38 2008 +0200 + + Remove VMX from CPUFeatures, ppc isn't using it at all + +commit fc96121afd5d8451c9d8ba8a693e589d1999d131 +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Thu Apr 24 01:03:08 2008 +0200 + + Simplify cpu feature check + +commit 08b317a5f519978cfabebd75d5595b19fc1d1425 +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Thu Apr 24 00:41:16 2008 +0200 + + Refactor path selection + +commit 083cadd4c7d1270b0ee9f0365327b872898d1561 +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Thu Apr 24 00:36:51 2008 +0200 + + Force inlining + +commit 92ef26dfed3337831dd5156bfe0d20b132a26a29 +Author: André Tupinambá <andrelrt@gmail.com> +Date: Wed Apr 23 00:18:39 2008 -0400 + + Add SSE2 implementations of many compositing operations. + +commit 8e68544e0d8cc7af24fb8b298fd6afd47c620136 +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Sat Apr 12 13:16:46 2008 +0200 + + Unbreak vmx pixman + +commit 1ec7bd2cb2d02caca06742b0091f293d29d95a44 +Merge: e63bf15 5388222 +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Sat Apr 12 09:53:24 2008 +0200 + + Merge branch 'master' into vmx + + Conflicts: + + pixman/pixman-pict.c + +commit 53882228c9bbd50609e2858502b9bc087ca76903 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Sat Apr 12 00:22:35 2008 -0400 + + Simplify handling compiler handling in MMX code + + Instead of using the parameters --param inline-unit-growth=10000 and + --param large-function-growth=10000", just make the inline functions + __always_inline__. + + Also support Intel's compiler in pixman-mmx.c. + + Based on a patch by Serhat <ubunturk@gmail.com> + +commit 9f76747adec7ea00e31dd817427c1ed1bfa86aa8 +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Thu Apr 3 17:27:08 2008 -0700 + + Use a macro to append _accessors to things. + + Signed-off-by: Søren Sandmann <sandmann@redhat.com> + +commit a7065162aff9d1de9fc3db9756e4e76f6b8d063b +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Tue Apr 1 17:26:03 2008 -0700 + + Generate 64-bit combining functions. + + Copy some macros from pixman-private.h into combine.inc and update them to + generate 64-bit versions as appropriate. Add a rule to generate + pixman-combine64.c and add it to the build. + + Signed-off-by: Søren Sandmann <sandmann@redhat.com> + +commit 48521e6fe4378f9cc49d2dfb8d87490e65267a49 +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Tue Apr 1 16:31:26 2008 -0700 + + Replace hardcoded numbers and uint*_t in combine.inc. + + The masks and shifts have been replaced with preprocessor defines generated by + combine.pl. The uint*_t types have been replaced with comp4_t, comp2_t, and + comp1_t depending on how many components the value is supposed to hold. + + Signed-off-by: Søren Sandmann <sandmann@redhat.com> + +commit e0e5c4b72937728d0b36b1077d94ce92a2374c9a +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Mon Mar 31 16:42:53 2008 -0700 + + Tack 32 onto the ends of the combining function types. + + Signed-off-by: Søren Sandmann <sandmann@redhat.com> + +commit f88519ed3fed42e41aa5623540466d0bee1a887b +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Mon Mar 31 16:33:09 2008 -0700 + + Move combining routines into combine.inc and add a Perl rule to generate it. + + This will eventually be used to search & replace types and mask/shift + calculations to generate a wide version of these functions. + + Signed-off-by: Søren Sandmann <sandmann@redhat.com> + +commit 30746b1e1e5101fd1502c676e777e27953772f75 +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Mon Mar 31 16:09:44 2008 -0700 + + Move combining macros into pixman-combine.c. + + Signed-off-by: Søren Sandmann <sandmann@redhat.com> + +commit 1ae751215e255adecacad960b5bc98ff86039a07 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Wed Apr 9 21:39:29 2008 -0400 + + Require GCC 3.4 for SSE and SSE2. + + Also include <emmintrin.h> in the SSE2 test and use SSE_FLAGS rather than MMX_FLAGS + + Reported by Dave Yeo. + +commit a1f51cb09ead3e0007c12e98ce74948a92e80ec5 +Author: Søren Sandmann <sandmann@redhat.com> +Date: Tue Apr 8 21:58:00 2008 -0400 + + Fix bug in FbStore_b2g3r3 + +commit 0b207ae11065c740f2644a89fc13207a5343554e +Author: Antoine Azar <cairo@antoineazar.com> +Date: Sun Apr 6 10:56:53 2008 -0400 + + Optimize operators based on source or dest opacity. + + Check if we can replace our operator by a simpler one if the src or + dest are opaque The output operator should be mathematically + equivalent to the source. + +commit 2976e690410d10b916014effe5d1842d0c8a7908 +Author: Julien Cristau <jcristau@debian.org> +Date: Sun Apr 6 17:12:12 2008 +0200 + + Revert "pixman-version.h is generated, don't distribute it" + + This reverts commit 4fd8910ea499eb484e29c44dbee7dbc029656e9c. + +commit 4fd8910ea499eb484e29c44dbee7dbc029656e9c +Author: Julien Cristau <jcristau@debian.org> +Date: Fri Apr 4 21:36:02 2008 +0200 + + pixman-version.h is generated, don't distribute it + +commit 4cde0886b52c82b792e8fbf2248bf8ff9aa079fa +Author: Frederic Plourde <frederic.plourde@polymtl.ca> +Date: Thu Apr 3 13:52:54 2008 -0700 + + Add CopyAreammx fast path for argb32 SRC xrgb32 and abgr32 SRC xbgr32 + +commit f45b331f7bb7d7effe279159d8c899952b52a270 +Author: Julien Cristau <jcristau@debian.org> +Date: Sun Mar 30 20:53:42 2008 +0200 + + Remove prototype for nonexistent pixman_image_set_filter_params + +commit a331519b865d157ac5fec231fda02ee74ba1ede1 +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Fri Mar 28 12:26:18 2008 -0700 + + Get rid of pixman_composeFunctions_accessors. + + The combining functions operate on scratch memory, so they don't need the access + wrappers. There's also no reason not to use the MMX combining functions in the + accessor path. + +commit 0c3547bad41e1a5b12c0ffaa1c106043399dc3fc +Author: Aaron Plattner <aplattner@nvidia.com> +Date: Fri Mar 28 12:16:07 2008 -0700 + + Fix test build when srcdir != builddir. + + The tests were including pixman.h, but pixman.h couldn't find pixman-version.h + because it was in $(top_builddir)/pixman rather than $(top_srcdir)/pixman. + +commit cd3799317fced697e4bc729c3ea8d42ed7edf526 +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Fri Mar 28 05:23:34 2008 -0400 + + Post-release version bump + +commit 92b675db8d3ef44c3c87110443b15a68a871cf7b +Author: Søren Sandmann Pedersen <sandmann@redhat.com> +Date: Thu Mar 27 10:08:35 2008 -0400 + + Fix log generation + commit 0c33317f59b93f5cab348619b1c38a5dce97de94 Author: Søren Sandmann Pedersen <sandmann@redhat.com> Date: Thu Mar 27 10:07:11 2008 -0400 @@ -106,6 +19603,25 @@ Date: Tue Mar 25 11:45:56 2008 -0700 .. otherwise it's already a macro evaluating to FALSE. +commit e63bf1554b4adf9e687ec86213a97caab2218a77 +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Sun Mar 23 16:12:31 2008 +0100 + + Make configure message alike the mmx/sse/sse2 ones + +commit dcc530178050522705e70ff2f09b9da2b358ac01 +Merge: 550e5f5 29a8ae4 +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Sun Mar 23 16:04:26 2008 +0100 + + Update vmx + +commit 550e5f54abe4f3f0b6fcd278c3b4533036276e3f +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Sat Mar 22 11:28:48 2008 +0100 + + update patch + commit 4f33f454c6dbaf356d20cee8d35fdf02f9a0317d Author: Aaron Plattner <aplattner@nvidia.com> Date: Fri Mar 21 18:59:45 2008 -0700 @@ -208,13 +19724,6 @@ Date: Mon Mar 10 23:41:52 2008 -0400 Reported by Robert O'Callahan. -commit 52f7f3909f71da1e4331f0333cfed17682984c12 -Merge: b7f65f3... 5bcde57... -Author: Matthieu Herrb <matthieu@bluenote.herrb.net> -Date: Sat Mar 8 21:13:22 2008 +0100 - - Merge branch 'master' into obsd - commit 5bcde57da704000f0fbb52291d01c7f3c0dc8655 Author: Antoine Azar <cairo@antoineazar.com> Date: Wed Mar 5 15:27:11 2008 -0800 @@ -283,7 +19792,7 @@ Date: Tue Jan 22 16:33:58 2008 -0800 drop the #else part. commit bcac3335893a24e1d16790cb38d8a105b1e19951 -Merge: e5ceddf... 054be10... +Merge: e5ceddf 054be10 Author: Carl Worth <cworth@cworth.org> Date: Tue Jan 22 16:31:44 2008 -0800 @@ -341,7 +19850,7 @@ commit 9a0639650e276c4c0da5dfe37a8d3ba5a6b81712 Author: Vladimir Vukicevic <vladimir@pobox.com> Date: Tue Nov 6 16:15:01 2007 -0800 - Remove last CVS $Id: ChangeLog,v 1.5 2008/04/10 21:04:35 matthieu Exp $ tags + Remove last CVS $Id: ChangeLog,v 1.6 2013/06/07 17:18:00 matthieu Exp $ tags commit df964790e893a8b511e9322e7161087d8ba182cc Author: Søren Sandmann <sandmann@redhat.com> @@ -350,12 +19859,12 @@ Date: Thu Dec 20 00:23:18 2007 -0500 Don't use the pixbuf fast paths when the source picture has alpha. Bug 13650, reported by Wu Nian. -commit b7f65f3f75e127b1f4ac280f1141801ad547ebd4 -Merge: 5b62915... 72b46bc... -Author: Matthieu Herrb <matthieu@bluenote.herrb.com> -Date: Sun Dec 9 10:20:33 2007 +0100 +commit 49240111dbb31c335856f9653544a039275bf033 +Merge: 808e4f5 72b46bc +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Sun Dec 16 00:38:16 2007 +0100 - Merge branch 'master' into obsd + Merge branch 'master' of git://anongit.freedesktop.org/pixman commit 72b46bcf345db668b3ec00e7f27c5454cf2ad8b5 Author: Søren Sandmann <sandmann@redhat.com> @@ -423,47 +19932,14 @@ Date: Sat Dec 1 19:56:26 2007 -0500 Add tables with information about the MMX and plain-C fast paths. Update TODO. -commit 5b629154a48eded16a562a0ff5661b76e97c5f48 -Author: Matthieu Herrb <matthieu@bluenote.herrb.com> -Date: Thu Nov 22 14:31:26 2007 +0100 - - pixman 1.9.6 - -commit 621ec860c023ec6577ef1b6ea063302e26d54823 -Merge: 40a4dd4... f1f52ae... -Author: Matthieu Herrb <matthieu@bluenote.herrb.com> -Date: Thu Nov 22 14:29:40 2007 +0100 - - Merge branch 'master' into obsd - -commit 40a4dd4c1a2b389a6d4ce704ede7207bb3a5ca76 -Merge: 98f7081... 399ef1f... -Author: Matthieu Herrb <matthieu@bluenote.herrb.com> -Date: Thu Nov 22 14:28:44 2007 +0100 - - Merge branch 'obsd' of ssh://xenocara.org/git/xenocara/lib/pixman into obsd - -commit 98f7081ae9fd5d5271feaf77cd7d9bd1d1be8367 -Author: Matthieu Herrb <matthieu@bluenote.herrb.com> -Date: Thu Nov 22 14:27:30 2007 +0100 - - Bump libpixman revision to have it at least equal to the version from fd.o - commit f1f52ae4b1b2cca27104c1210625356084baf2f8 Author: Carl Worth <cworth@cworth.org> Date: Tue Oct 30 11:48:31 2007 -0700 Track name change from pixman.pc to pixman-1.pc in .gitignore -commit 274a34b84a1df33b233981bdf0a2c49b71e84db3 -Merge: 80c6b00... d4d78c8... -Author: Matthieu Herrb <matthieu@bluenote.herrb.com> -Date: Sun Oct 28 08:07:15 2007 +0100 - - Merge branch 'master' into obsd - commit d4d78c8c1a73d1007ebaae3117923bb72d09118f -Merge: 85bccce... b39ca42... +Merge: 85bccce b39ca42 Author: Alan Hourihane <alanh@tungstengraphics.com> Date: Wed Oct 24 21:39:34 2007 +0100 @@ -476,7 +19952,7 @@ Date: Wed Oct 24 15:48:45 2007 -0400 Bump version number commit 85bccce4d863b99be4b9ce62a8ac7d95f0acab3d -Merge: 8aa38d2... 2853243... +Merge: 8aa38d2 2853243 Author: Alan Hourihane <alanh@tungstengraphics.com> Date: Thu Oct 18 17:33:43 2007 +0100 @@ -513,7 +19989,7 @@ Date: Wed Oct 17 18:40:28 2007 -0400 Add README file based on text from Bjorn Lindquist commit 8aa38d2256c191bf3437034f6176bae30c3c3d19 -Merge: cea752b... 39a67d3... +Merge: cea752b 39a67d3 Author: Alan Hourihane <alanh@tungstengraphics.com> Date: Wed Oct 17 19:39:41 2007 +0100 @@ -523,18 +19999,12 @@ Date: Wed Oct 17 19:39:41 2007 +0100 pixman/pixman-image.c -commit 80c6b00af24c3f997eac30a1618669030edcf7bd -Author: Matthieu Herrb <matthieu@bluenote.herrb.com> -Date: Wed Oct 3 23:00:44 2007 +0200 - - library is called libpixman-1 for some reason. - -commit f41fd34f65bfb6f13d299466b8e1368384cbd305 -Merge: 340d678... 39a67d3... -Author: Matthieu Herrb <matthieu@bluenote.herrb.com> -Date: Wed Oct 3 22:42:44 2007 +0200 +commit 808e4f541b4cfde40c91e6c6cd942f9074d38e94 +Merge: 33d4028 39a67d3 +Author: Luca Barbato <lu_zero@gentoo.org> +Date: Mon Oct 1 22:13:05 2007 +0000 - Merge branch 'master' into obsd + Merge branch 'master' of git://anongit.freedesktop.org/pixman commit 39a67d35f05aa47cf50191e0837a2125593a7bbc Author: Tilman Sauerbeck <tilman@code-monkey.de> @@ -675,18 +20145,6 @@ Date: Tue Sep 4 17:24:04 2007 -0700 blends RGB24 over ARGB32 and notices that "alpha" values are making it from the source to the destination. -commit 399ef1f44e7c0059f96ce01be83455a2fbe38ec8 -Author: Matthieu Herrb <matthieu.herrb@laas.fr> -Date: Sun Sep 2 18:13:15 2007 +0200 - - regen - -commit 340d678af2ec0876b7c86a6817cd38037f424cec -Author: Matthieu Herrb <matthieu@bluenote.herrb.com> -Date: Sat Sep 1 16:28:05 2007 +0200 - - BSD Makefile & rerun automake. - commit 8ff7213f39edc1b2b8b60d6b0cc5d5f14ca1928d Author: Vladimir Vukicevic <vladimir@pobox.com> Date: Sat Aug 25 23:30:41 2007 -0700 @@ -724,7 +20182,7 @@ Date: Fri Aug 24 16:12:30 2007 -0400 Add conjoint and disjoint operators to pixman.h commit 245a5e04eb4bf3b973d32ce5f21e6e2eac00b48b -Merge: 25846ed... 9c09561... +Merge: 25846ed 9c09561 Author: Søren Sandmann Pedersen <sandmann@redhat.com> Date: Tue Aug 21 16:31:45 2007 -0400 @@ -847,6 +20305,12 @@ Date: Mon Jul 2 12:18:42 2007 -0400 Port Vlad's fixes for integer overflows with malloc(). +commit 33d4028e3fffa231f40d66b5843de589ec2642fe +Author: root <root@echo.(none)> +Date: Sun Jul 1 11:42:49 2007 +0000 + + First import of vmx + commit 2e61f30e4c8d0e01e175495e13a5f132521ad6f2 Author: Søren Sandmann <sandmann@redhat.com> Date: Fri Jun 22 13:37:46 2007 -0400 @@ -982,7 +20446,7 @@ Date: Sun Jun 17 18:50:00 2007 -0400 Remove accidentally committed use of TIMER_BEGIN/END commit e71844095ea75b4f9f66c85c87b4b3b6c287e02f -Merge: 4f9f7ae... 647852d... +Merge: 4f9f7ae 647852d Author: Søren Sandmann Pedersen <sandmann@redhat.com> Date: Sun Jun 17 18:49:02 2007 -0400 diff --git a/lib/pixman/Makefile.am b/lib/pixman/Makefile.am index ba579e5be..6e56d5140 100644 --- a/lib/pixman/Makefile.am +++ b/lib/pixman/Makefile.am @@ -10,7 +10,7 @@ snapshot: test -d "$(srcdir)/.git" && distdir=$$distdir-`cd "$(srcdir)" && git rev-parse HEAD | cut -c 1-6`; \ $(MAKE) $(AM_MAKEFLAGS) distdir="$$distdir" dist -GPGKEY=6FF7C1A8 +GPGKEY=3892336E USERNAME=$$USER RELEASE_OR_SNAPSHOT = $$(if test "x$(PIXMAN_VERSION_MINOR)" = "x$$(echo "$(PIXMAN_VERSION_MINOR)/2*2" | bc)" ; then echo release; else echo snapshot; fi) RELEASE_CAIRO_HOST = $(USERNAME)@cairographics.org @@ -121,7 +121,7 @@ release-publish-message: $(HASHFILES) ensure-prev @echo "" @echo "GPG signature:" @echo " $(RELEASE_CAIRO_URL)/$(gpg_file)" - @echo " (signed by `git config --get user.name` <`git config --get user.email`>)" + @echo " (signed by`gpg --list-keys $(GPGKEY) | grep uid | cut -b4- | tr -s " "`)" @echo "" @echo "Git:" @echo " git://git.freedesktop.org/git/pixman" diff --git a/lib/pixman/Makefile.bsd-wrapper b/lib/pixman/Makefile.bsd-wrapper index 8340aaca4..e06c1ce13 100644 --- a/lib/pixman/Makefile.bsd-wrapper +++ b/lib/pixman/Makefile.bsd-wrapper @@ -1,8 +1,8 @@ -# $OpenBSD: Makefile.bsd-wrapper,v 1.19 2013/02/19 20:56:17 brad Exp $ +# $OpenBSD: Makefile.bsd-wrapper,v 1.20 2013/06/07 17:18:00 matthieu Exp $ .include <bsd.own.mk> -SHARED_LIBS= pixman-1 28.0 +SHARED_LIBS= pixman-1 30.0 .if ${MACHINE_ARCH} == arm CONFIGURE_ARGS += --disable-arm-simd --disable-arm-neon diff --git a/lib/pixman/Makefile.in b/lib/pixman/Makefile.in index 43a0fbc82..6cfa1d14d 100644 --- a/lib/pixman/Makefile.in +++ b/lib/pixman/Makefile.in @@ -1,4 +1,4 @@ -# Makefile.in generated by automake 1.12.3 from Makefile.am. +# Makefile.in generated by automake 1.12.6 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2012 Free Software Foundation, Inc. @@ -247,6 +247,8 @@ PIXMAN_VERSION_MAJOR = @PIXMAN_VERSION_MAJOR@ PIXMAN_VERSION_MICRO = @PIXMAN_VERSION_MICRO@ PIXMAN_VERSION_MINOR = @PIXMAN_VERSION_MINOR@ PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ PNG_CFLAGS = @PNG_CFLAGS@ PNG_LIBS = @PNG_LIBS@ PTHREAD_LDFLAGS = @PTHREAD_LDFLAGS@ @@ -318,7 +320,7 @@ top_srcdir = @top_srcdir@ SUBDIRS = pixman pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = pixman-1.pc -GPGKEY = 6FF7C1A8 +GPGKEY = 3892336E USERNAME = $$USER RELEASE_OR_SNAPSHOT = $$(if test "x$(PIXMAN_VERSION_MINOR)" = "x$$(echo "$(PIXMAN_VERSION_MINOR)/2*2" | bc)" ; then echo release; else echo snapshot; fi) RELEASE_CAIRO_HOST = $(USERNAME)@cairographics.org @@ -964,7 +966,7 @@ release-publish-message: $(HASHFILES) ensure-prev @echo "" @echo "GPG signature:" @echo " $(RELEASE_CAIRO_URL)/$(gpg_file)" - @echo " (signed by `git config --get user.name` <`git config --get user.email`>)" + @echo " (signed by`gpg --list-keys $(GPGKEY) | grep uid | cut -b4- | tr -s " "`)" @echo "" @echo "Git:" @echo " git://git.freedesktop.org/git/pixman" diff --git a/lib/pixman/README b/lib/pixman/README index 3cfbc5053..6d8cfd8ad 100644 --- a/lib/pixman/README +++ b/lib/pixman/README @@ -1,22 +1,116 @@ -pixman is a library that provides low-level pixel manipulation +Pixman is a library that provides low-level pixel manipulation features such as image compositing and trapezoid rasterization. -All questions regarding this software should be directed to the pixman +Questions, bug reports and patches should be directed to the pixman mailing list: http://lists.freedesktop.org/mailman/listinfo/pixman -Please send patches and bug reports either to the mailing list above, -or file them at the freedesktop bug tracker: +You can also file bugs at https://bugs.freedesktop.org/enter_bug.cgi?product=pixman -The master development code repository can be found at: +For real time discussions about pixman, feel free to join the IRC +channels #cairo and #xorg-devel on the FreeNode IRC network. + + +Contributing +------------ + +In order to contribute to pixman, you will need a working knowledge of +the git version control system. For a quick getting started guide, +there is the "Everyday Git With 20 Commands Or So guide" + + http://www.kernel.org/pub/software/scm/git/docs/everyday.html + +from the Git homepage. For more in depth git documentation, see the +resources on the Git community documentation page: + + http://git-scm.com/documentation + +Pixman uses the infrastructure from the freedesktop.org umbrella +project. For instructions about how to use the git service on +freedesktop.org, see: + + http://www.freedesktop.org/wiki/Infrastructure/git/Developers + +The Pixman master repository can be found at: git://anongit.freedesktop.org/git/pixman - http://gitweb.freedesktop.org/?p=pixman;a=summary +and browsed on the web here: + + http://cgit.freedesktop.org/pixman/ + + +Sending patches +--------------- + +The general workflow for sending patches is to first make sure that +git can send mail on your system. Then, + + - create a branch off of master in your local git repository + + - make your changes as one or more commits + + - use the + + git send-email + + command to send the patch series to pixman@lists.freedesktop.org. + +In order for your patches to be accepted, please consider the +following guidelines: + + - This link: + + http://www.kernel.org/pub/software/scm/git/docs/user-manual.html#patch-series + + describes how what a good patch series is, and to create one with + git. + + - At each point in the series, pixman should compile and the test + suite should pass. + + The exception here is if you are changing the test suite to + demonstrate a bug. In this case, make one commit that makes the + test suite fail due to the bug, and then another commit that fixes + the bug. + + You can run the test suite with + + make check + + It will take around two minutes to run on a modern PC. + + - Follow the coding style described in the CODING_STYLE file + + - For bug fixes, include an update to the test suite to make sure + the bug doesn't reappear. + + - For new features, add tests of the feature to the test + suite. Also, add a program demonstrating the new feature to the + demos/ directory. + + - Write descriptive commit messages. Useful information to include: + - Benchmark results, before and after + - Description of the bug that was fixed + - Detailed rationale for any new API + - Alternative approaches that were rejected (and why they + don't work) + - If review comments were incorporated, a brief version + history describing what those changes were. + + - For big patch series, send an introductory email with an overall + description of the patch series, including benchmarks and + motivation. Each commit message should still be descriptive and + include enough information to understand why this particular commit + was necessary. -For more information on the git code manager, see: +Pixman has high standards for code quality and so almost everybody +should expect to have the first versions of their patches rejected. - http://wiki.x.org/wiki/GitPage +If you think that the reviewers are wrong about something, or that the +guidelines above are wrong, feel free to discuss the issue on the +list. The purpose of the guidelines and code review is to ensure high +code quality; it is not an exercise in compliance. diff --git a/lib/pixman/aclocal.m4 b/lib/pixman/aclocal.m4 index 04c2ce6a6..ed2570902 100644 --- a/lib/pixman/aclocal.m4 +++ b/lib/pixman/aclocal.m4 @@ -1,4 +1,4 @@ -# generated automatically by aclocal 1.12.3 -*- Autoconf -*- +# generated automatically by aclocal 1.12.6 -*- Autoconf -*- # Copyright (C) 1996-2012 Free Software Foundation, Inc. @@ -8607,6 +8607,7 @@ m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) # pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- +# serial 1 (pkg-config-0.24) # # Copyright © 2004 Scott James Remnant <scott@netsplit.com>. # @@ -8633,8 +8634,12 @@ m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) # ---------------------------------- AC_DEFUN([PKG_PROG_PKG_CONFIG], [m4_pattern_forbid([^_?PKG_[A-Z_]+$]) -m4_pattern_allow([^PKG_CONFIG(_PATH)?$]) -AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])dnl +m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$]) +m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$]) +AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility]) +AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path]) +AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path]) + if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) fi @@ -8647,7 +8652,6 @@ if test -n "$PKG_CONFIG"; then AC_MSG_RESULT([no]) PKG_CONFIG="" fi - fi[]dnl ])# PKG_PROG_PKG_CONFIG @@ -8656,34 +8660,32 @@ fi[]dnl # Check to see whether a particular set of modules exists. Similar # to PKG_CHECK_MODULES(), but does not set variables or print errors. # -# -# Similar to PKG_CHECK_MODULES, make sure that the first instance of -# this or PKG_CHECK_MODULES is called, or make sure to call -# PKG_CHECK_EXISTS manually +# Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) +# only at the first occurence in configure.ac, so if the first place +# it's called might be skipped (such as if it is within an "if", you +# have to call PKG_CHECK_EXISTS manually # -------------------------------------------------------------- AC_DEFUN([PKG_CHECK_EXISTS], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl if test -n "$PKG_CONFIG" && \ AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then - m4_ifval([$2], [$2], [:]) + m4_default([$2], [:]) m4_ifvaln([$3], [else $3])dnl fi]) - # _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) # --------------------------------------------- m4_define([_PKG_CONFIG], -[if test -n "$PKG_CONFIG"; then - if test -n "$$1"; then - pkg_cv_[]$1="$$1" - else - PKG_CHECK_EXISTS([$3], - [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`], - [pkg_failed=yes]) - fi -else - pkg_failed=untried +[if test -n "$$1"; then + pkg_cv_[]$1="$$1" + elif test -n "$PKG_CONFIG"; then + PKG_CHECK_EXISTS([$3], + [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes ], + [pkg_failed=yes]) + else + pkg_failed=untried fi[]dnl ])# _PKG_CONFIG @@ -8725,16 +8727,17 @@ and $1[]_LIBS to avoid the need to call pkg-config. See the pkg-config man page for more details.]) if test $pkg_failed = yes; then + AC_MSG_RESULT([no]) _PKG_SHORT_ERRORS_SUPPORTED if test $_pkg_short_errors_supported = yes; then - $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "$2"` + $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` else - $1[]_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "$2"` + $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` fi # Put the nasty error message in config.log where it belongs echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD - ifelse([$4], , [AC_MSG_ERROR(dnl + m4_default([$4], [AC_MSG_ERROR( [Package requirements ($2) were not met: $$1_PKG_ERRORS @@ -8742,28 +8745,67 @@ $$1_PKG_ERRORS Consider adjusting the PKG_CONFIG_PATH environment variable if you installed software in a non-standard prefix. -_PKG_TEXT -])], - [AC_MSG_RESULT([no]) - $4]) +_PKG_TEXT])[]dnl + ]) elif test $pkg_failed = untried; then - ifelse([$4], , [AC_MSG_FAILURE(dnl + AC_MSG_RESULT([no]) + m4_default([$4], [AC_MSG_FAILURE( [The pkg-config script could not be found or is too old. Make sure it is in your PATH or set the PKG_CONFIG environment variable to the full path to pkg-config. _PKG_TEXT -To get pkg-config, see <http://www.freedesktop.org/software/pkgconfig>.])], - [$4]) +To get pkg-config, see <http://pkg-config.freedesktop.org/>.])[]dnl + ]) else $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS $1[]_LIBS=$pkg_cv_[]$1[]_LIBS AC_MSG_RESULT([yes]) - ifelse([$3], , :, [$3]) + $3 fi[]dnl ])# PKG_CHECK_MODULES + +# PKG_INSTALLDIR(DIRECTORY) +# ------------------------- +# Substitutes the variable pkgconfigdir as the location where a module +# should install pkg-config .pc files. By default the directory is +# $libdir/pkgconfig, but the default can be changed by passing +# DIRECTORY. The user can override through the --with-pkgconfigdir +# parameter. +AC_DEFUN([PKG_INSTALLDIR], +[m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])]) +m4_pushdef([pkg_description], + [pkg-config installation directory @<:@]pkg_default[@:>@]) +AC_ARG_WITH([pkgconfigdir], + [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],, + [with_pkgconfigdir=]pkg_default) +AC_SUBST([pkgconfigdir], [$with_pkgconfigdir]) +m4_popdef([pkg_default]) +m4_popdef([pkg_description]) +]) dnl PKG_INSTALLDIR + + +# PKG_NOARCH_INSTALLDIR(DIRECTORY) +# ------------------------- +# Substitutes the variable noarch_pkgconfigdir as the location where a +# module should install arch-independent pkg-config .pc files. By +# default the directory is $datadir/pkgconfig, but the default can be +# changed by passing DIRECTORY. The user can override through the +# --with-noarch-pkgconfigdir parameter. +AC_DEFUN([PKG_NOARCH_INSTALLDIR], +[m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])]) +m4_pushdef([pkg_description], + [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@]) +AC_ARG_WITH([noarch-pkgconfigdir], + [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],, + [with_noarch_pkgconfigdir=]pkg_default) +AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir]) +m4_popdef([pkg_default]) +m4_popdef([pkg_description]) +]) dnl PKG_NOARCH_INSTALLDIR + # Copyright (C) 2002-2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation @@ -8779,7 +8821,7 @@ AC_DEFUN([AM_AUTOMAKE_VERSION], [am__api_version='1.12' dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to dnl require some minimum version. Point them to the right macro. -m4_if([$1], [1.12.3], [], +m4_if([$1], [1.12.6], [], [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl ]) @@ -8795,7 +8837,7 @@ m4_define([_AM_AUTOCONF_VERSION], []) # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. # This function is AC_REQUIREd by AM_INIT_AUTOMAKE. AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], -[AM_AUTOMAKE_VERSION([1.12.3])dnl +[AM_AUTOMAKE_VERSION([1.12.6])dnl m4_ifndef([AC_AUTOCONF_VERSION], [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) @@ -9171,15 +9213,6 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"]) ]) -# Copyright (C) 1996-2012 Free Software Foundation, Inc. -# -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# AM_CONFIG_HEADER is obsolete. It has been replaced by AC_CONFIG_HEADERS. -AU_DEFUN([AM_CONFIG_HEADER], [AC_CONFIG_HEADERS($@)]) - # Do all the work for Automake. -*- Autoconf -*- # Copyright (C) 1996-2012 Free Software Foundation, Inc. diff --git a/lib/pixman/config.guess b/lib/pixman/config.guess index aa04f04bd..872b96a16 100644 --- a/lib/pixman/config.guess +++ b/lib/pixman/config.guess @@ -4,7 +4,7 @@ # 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, # 2011, 2012 Free Software Foundation, Inc. -timestamp='2012-06-17' +timestamp='2012-09-25' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -306,7 +306,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) echo arm-acorn-riscix${UNAME_RELEASE} exit ;; - arm:riscos:*:*|arm:RISCOS:*:*) + arm*:riscos:*:*|arm*:RISCOS:*:*) echo arm-unknown-riscos exit ;; SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) @@ -805,6 +805,9 @@ EOF i*:CYGWIN*:*) echo ${UNAME_MACHINE}-pc-cygwin exit ;; + *:MINGW64*:*) + echo ${UNAME_MACHINE}-pc-mingw64 + exit ;; *:MINGW*:*) echo ${UNAME_MACHINE}-pc-mingw32 exit ;; @@ -1205,6 +1208,9 @@ EOF BePC:Haiku:*:*) # Haiku running on Intel PC compatible. echo i586-pc-haiku exit ;; + x86_64:Haiku:*:*) + echo x86_64-unknown-haiku + exit ;; SX-4:SUPER-UX:*:*) echo sx4-nec-superux${UNAME_RELEASE} exit ;; @@ -1334,9 +1340,6 @@ EOF exit ;; esac -#echo '(No uname command or uname output not recognized.)' 1>&2 -#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 - eval $set_cc_for_build cat >$dummy.c <<EOF #ifdef _SEQUENT_ diff --git a/lib/pixman/config.h.in b/lib/pixman/config.h.in index c06fbaa41..d26107ff8 100644 --- a/lib/pixman/config.h.in +++ b/lib/pixman/config.h.in @@ -15,6 +15,9 @@ /* Define to 1 if we have <fenv.h> */ #undef HAVE_FENV_H +/* Whether the tool chain supports __float128 */ +#undef HAVE_FLOAT128 + /* Define to 1 if you have the `getisax' function. */ #undef HAVE_GETISAX @@ -165,3 +168,6 @@ #ifndef __cplusplus #undef inline #endif + +/* Define to sqrt if you do not have the `sqrtf' function. */ +#undef sqrtf diff --git a/lib/pixman/config.sub b/lib/pixman/config.sub index aa2cf19b8..8df551109 100644 --- a/lib/pixman/config.sub +++ b/lib/pixman/config.sub @@ -4,7 +4,7 @@ # 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, # 2011, 2012 Free Software Foundation, Inc. -timestamp='2012-06-17' +timestamp='2012-12-06' # This file is (in principle) common to ALL GNU software. # The presence of a machine in this file suggests that SOME GNU software @@ -123,7 +123,7 @@ esac maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` case $maybe_os in nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ - linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ + linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ knetbsd*-gnu* | netbsd*-gnu* | \ kopensolaris*-gnu* | \ storm-chaos* | os2-emx* | rtmk-nova*) @@ -156,7 +156,7 @@ case $os in -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ - -apple | -axis | -knuth | -cray | -microblaze) + -apple | -axis | -knuth | -cray | -microblaze*) os= basic_machine=$1 ;; @@ -259,8 +259,10 @@ case $basic_machine in | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ | am33_2.0 \ - | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ - | be32 | be64 \ + | arc \ + | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \ + | avr | avr32 \ + | be32 | be64 \ | bfin \ | c4x | clipper \ | d10v | d30v | dlx | dsp16xx \ @@ -273,7 +275,7 @@ case $basic_machine in | le32 | le64 \ | lm32 \ | m32c | m32r | m32rle | m68000 | m68k | m88k \ - | maxq | mb | microblaze | mcore | mep | metag \ + | maxq | mb | microblaze | microblazeel | mcore | mep | metag \ | mips | mipsbe | mipseb | mipsel | mipsle \ | mips16 \ | mips64 | mips64el \ @@ -389,7 +391,8 @@ case $basic_machine in | lm32-* \ | m32c-* | m32r-* | m32rle-* \ | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ - | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \ + | m88110-* | m88k-* | maxq-* | mcore-* | metag-* \ + | microblaze-* | microblazeel-* \ | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ | mips16-* \ | mips64-* | mips64el-* \ @@ -788,9 +791,13 @@ case $basic_machine in basic_machine=ns32k-utek os=-sysv ;; - microblaze) + microblaze*) basic_machine=microblaze-xilinx ;; + mingw64) + basic_machine=x86_64-pc + os=-mingw64 + ;; mingw32) basic_machine=i386-pc os=-mingw32 @@ -1019,7 +1026,11 @@ case $basic_machine in basic_machine=i586-unknown os=-pw32 ;; - rdos) + rdos | rdos64) + basic_machine=x86_64-pc + os=-rdos + ;; + rdos32) basic_machine=i386-pc os=-rdos ;; @@ -1359,8 +1370,8 @@ case $os in | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ | -chorusos* | -chorusrdb* | -cegcc* \ | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -linux-gnu* | -linux-android* \ - | -linux-newlib* | -linux-uclibc* \ + | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ + | -linux-newlib* | -linux-musl* | -linux-uclibc* \ | -uxpv* | -beos* | -mpeix* | -udk* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ diff --git a/lib/pixman/configure b/lib/pixman/configure index fdc5e5289..2858a5212 100644 --- a/lib/pixman/configure +++ b/lib/pixman/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for pixman 0.28.0. +# Generated by GNU Autoconf 2.69 for pixman 0.30.0. # # Report bugs to <pixman@lists.freedesktop.org>. # @@ -590,8 +590,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='pixman' PACKAGE_TARNAME='pixman' -PACKAGE_VERSION='0.28.0' -PACKAGE_STRING='pixman 0.28.0' +PACKAGE_VERSION='0.30.0' +PACKAGE_STRING='pixman 0.30.0' PACKAGE_BUGREPORT='pixman@lists.freedesktop.org' PACKAGE_URL='' @@ -647,6 +647,8 @@ HAVE_GTK_FALSE HAVE_GTK_TRUE GTK_LIBS GTK_CFLAGS +PKG_CONFIG_LIBDIR +PKG_CONFIG_PATH PKG_CONFIG PIXMAN_TIMERS TESTPROGS_EXTRA_LDFLAGS @@ -837,6 +839,8 @@ CCAS CCASFLAGS CPP PKG_CONFIG +PKG_CONFIG_PATH +PKG_CONFIG_LIBDIR GTK_CFLAGS GTK_LIBS PNG_CFLAGS @@ -1381,7 +1385,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures pixman 0.28.0 to adapt to many kinds of systems. +\`configure' configures pixman 0.30.0 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1451,7 +1455,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of pixman 0.28.0:";; + short | recursive ) echo "Configuration of pixman 0.30.0:";; esac cat <<\_ACEOF @@ -1510,6 +1514,10 @@ Some influential environment variables: CCASFLAGS assembler compiler flags (defaults to CFLAGS) CPP C preprocessor PKG_CONFIG path to pkg-config utility + PKG_CONFIG_PATH + directories to add to pkg-config's search path + PKG_CONFIG_LIBDIR + path overriding pkg-config's built-in search path GTK_CFLAGS C compiler flags for GTK, overriding pkg-config GTK_LIBS linker flags for GTK, overriding pkg-config PNG_CFLAGS C compiler flags for PNG, overriding pkg-config @@ -1581,7 +1589,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -pixman configure 0.28.0 +pixman configure 0.30.0 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2179,7 +2187,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by pixman $as_me 0.28.0, which was +It was created by pixman $as_me 0.30.0, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -3003,7 +3011,7 @@ fi # Define the identity of the package. PACKAGE='pixman' - VERSION='0.28.0' + VERSION='0.30.0' cat >>confdefs.h <<_ACEOF @@ -12187,11 +12195,11 @@ fi -LT_VERSION_INFO="28:0:28" +LT_VERSION_INFO="30:0:30" PIXMAN_VERSION_MAJOR=0 -PIXMAN_VERSION_MINOR=28 +PIXMAN_VERSION_MINOR=30 PIXMAN_VERSION_MICRO=0 @@ -13254,6 +13262,11 @@ fi + + + + + if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args. @@ -13366,7 +13379,6 @@ $as_echo "yes" >&6; } $as_echo "no" >&6; } PKG_CONFIG="" fi - fi if test $enable_gtk = yes ; then @@ -13420,46 +13432,46 @@ pkg_failed=no { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GTK" >&5 $as_echo_n "checking for GTK... " >&6; } -if test -n "$PKG_CONFIG"; then - if test -n "$GTK_CFLAGS"; then - pkg_cv_GTK_CFLAGS="$GTK_CFLAGS" - else - if test -n "$PKG_CONFIG" && \ +if test -n "$GTK_CFLAGS"; then + pkg_cv_GTK_CFLAGS="$GTK_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"gtk+-2.0 pixman-1\""; } >&5 ($PKG_CONFIG --exists --print-errors "gtk+-2.0 pixman-1") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_GTK_CFLAGS=`$PKG_CONFIG --cflags "gtk+-2.0 pixman-1" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes else pkg_failed=yes fi - fi -else - pkg_failed=untried + else + pkg_failed=untried fi -if test -n "$PKG_CONFIG"; then - if test -n "$GTK_LIBS"; then - pkg_cv_GTK_LIBS="$GTK_LIBS" - else - if test -n "$PKG_CONFIG" && \ +if test -n "$GTK_LIBS"; then + pkg_cv_GTK_LIBS="$GTK_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"gtk+-2.0 pixman-1\""; } >&5 ($PKG_CONFIG --exists --print-errors "gtk+-2.0 pixman-1") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_GTK_LIBS=`$PKG_CONFIG --libs "gtk+-2.0 pixman-1" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes else pkg_failed=yes fi - fi -else - pkg_failed=untried + else + pkg_failed=untried fi if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes @@ -13467,9 +13479,9 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - GTK_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "gtk+-2.0 pixman-1"` + GTK_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "gtk+-2.0 pixman-1" 2>&1` else - GTK_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "gtk+-2.0 pixman-1"` + GTK_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "gtk+-2.0 pixman-1" 2>&1` fi # Put the nasty error message in config.log where it belongs echo "$GTK_PKG_ERRORS" >&5 @@ -13483,9 +13495,10 @@ installed software in a non-standard prefix. Alternatively, you may set the environment variables GTK_CFLAGS and GTK_LIBS to avoid the need to call pkg-config. -See the pkg-config man page for more details. -" "$LINENO" 5 +See the pkg-config man page for more details." "$LINENO" 5 elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it @@ -13496,14 +13509,14 @@ Alternatively, you may set the environment variables GTK_CFLAGS and GTK_LIBS to avoid the need to call pkg-config. See the pkg-config man page for more details. -To get pkg-config, see <http://www.freedesktop.org/software/pkgconfig>. +To get pkg-config, see <http://pkg-config.freedesktop.org/>. See \`config.log' for more details" "$LINENO" 5; } else GTK_CFLAGS=$pkg_cv_GTK_CFLAGS GTK_LIBS=$pkg_cv_GTK_LIBS { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } - : + fi fi @@ -13558,46 +13571,46 @@ pkg_failed=no { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GTK" >&5 $as_echo_n "checking for GTK... " >&6; } -if test -n "$PKG_CONFIG"; then - if test -n "$GTK_CFLAGS"; then - pkg_cv_GTK_CFLAGS="$GTK_CFLAGS" - else - if test -n "$PKG_CONFIG" && \ +if test -n "$GTK_CFLAGS"; then + pkg_cv_GTK_CFLAGS="$GTK_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"gtk+-2.0 pixman-1\""; } >&5 ($PKG_CONFIG --exists --print-errors "gtk+-2.0 pixman-1") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_GTK_CFLAGS=`$PKG_CONFIG --cflags "gtk+-2.0 pixman-1" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes else pkg_failed=yes fi - fi -else - pkg_failed=untried + else + pkg_failed=untried fi -if test -n "$PKG_CONFIG"; then - if test -n "$GTK_LIBS"; then - pkg_cv_GTK_LIBS="$GTK_LIBS" - else - if test -n "$PKG_CONFIG" && \ +if test -n "$GTK_LIBS"; then + pkg_cv_GTK_LIBS="$GTK_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"gtk+-2.0 pixman-1\""; } >&5 ($PKG_CONFIG --exists --print-errors "gtk+-2.0 pixman-1") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_GTK_LIBS=`$PKG_CONFIG --libs "gtk+-2.0 pixman-1" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes else pkg_failed=yes fi - fi -else - pkg_failed=untried + else + pkg_failed=untried fi if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes @@ -13605,17 +13618,17 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - GTK_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "gtk+-2.0 pixman-1"` + GTK_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "gtk+-2.0 pixman-1" 2>&1` else - GTK_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "gtk+-2.0 pixman-1"` + GTK_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "gtk+-2.0 pixman-1" 2>&1` fi # Put the nasty error message in config.log where it belongs echo "$GTK_PKG_ERRORS" >&5 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - enable_gtk=no + enable_gtk=no elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } enable_gtk=no else GTK_CFLAGS=$pkg_cv_GTK_CFLAGS @@ -13805,6 +13818,68 @@ $as_echo "#define HAVE_GETTIMEOFDAY 1" >>confdefs.h fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing sqrtf" >&5 +$as_echo_n "checking for library containing sqrtf... " >&6; } +if ${ac_cv_search_sqrtf+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_func_search_save_LIBS=$LIBS +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char sqrtf (); +int +main () +{ +return sqrtf (); + ; + return 0; +} +_ACEOF +for ac_lib in '' m; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + if ac_fn_c_try_link "$LINENO"; then : + ac_cv_search_sqrtf=$ac_res +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if ${ac_cv_search_sqrtf+:} false; then : + break +fi +done +if ${ac_cv_search_sqrtf+:} false; then : + +else + ac_cv_search_sqrtf=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_sqrtf" >&5 +$as_echo "$ac_cv_search_sqrtf" >&6; } +ac_res=$ac_cv_search_sqrtf +if test "$ac_res" != no; then : + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +else + +$as_echo "#define sqrtf sqrt" >>confdefs.h + +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for thread local storage (TLS) support" >&5 $as_echo_n "checking for thread local storage (TLS) support... " >&6; } if ${ac_cv_tls+:} false; then : @@ -13834,7 +13909,7 @@ main () } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_tls=$kw + ac_cv_tls=$kw; break fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext done @@ -14217,6 +14292,32 @@ $as_echo "$support_for_attribute_constructor" >&6; } +support_for_float128=no + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __float128" >&5 +$as_echo_n "checking for __float128... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +__float128 a = 1.0Q, b = 2.0Q; int main (void) { return a + b; } + +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + support_for_float128=yes +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + +if test x$support_for_float128 = xyes; then + +$as_echo "#define HAVE_FLOAT128 /**/" >>confdefs.h + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $support_for_float128" >&5 +$as_echo "$support_for_float128" >&6; } + + # Check whether --enable-libpng was given. if test "${enable_libpng+set}" = set; then : enableval=$enable_libpng; have_libpng=$enableval @@ -14231,46 +14332,46 @@ pkg_failed=no { $as_echo "$as_me:${as_lineno-$LINENO}: checking for PNG" >&5 $as_echo_n "checking for PNG... " >&6; } -if test -n "$PKG_CONFIG"; then - if test -n "$PNG_CFLAGS"; then - pkg_cv_PNG_CFLAGS="$PNG_CFLAGS" - else - if test -n "$PKG_CONFIG" && \ +if test -n "$PNG_CFLAGS"; then + pkg_cv_PNG_CFLAGS="$PNG_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libpng\""; } >&5 ($PKG_CONFIG --exists --print-errors "libpng") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_PNG_CFLAGS=`$PKG_CONFIG --cflags "libpng" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes else pkg_failed=yes fi - fi -else - pkg_failed=untried + else + pkg_failed=untried fi -if test -n "$PKG_CONFIG"; then - if test -n "$PNG_LIBS"; then - pkg_cv_PNG_LIBS="$PNG_LIBS" - else - if test -n "$PKG_CONFIG" && \ +if test -n "$PNG_LIBS"; then + pkg_cv_PNG_LIBS="$PNG_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libpng\""; } >&5 ($PKG_CONFIG --exists --print-errors "libpng") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_PNG_LIBS=`$PKG_CONFIG --libs "libpng" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes else pkg_failed=yes fi - fi -else - pkg_failed=untried + else + pkg_failed=untried fi if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes @@ -14278,9 +14379,9 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - PNG_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "libpng"` + PNG_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libpng" 2>&1` else - PNG_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "libpng"` + PNG_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libpng" 2>&1` fi # Put the nasty error message in config.log where it belongs echo "$PNG_PKG_ERRORS" >&5 @@ -14294,9 +14395,10 @@ installed software in a non-standard prefix. Alternatively, you may set the environment variables PNG_CFLAGS and PNG_LIBS to avoid the need to call pkg-config. -See the pkg-config man page for more details. -" "$LINENO" 5 +See the pkg-config man page for more details." "$LINENO" 5 elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it @@ -14307,14 +14409,14 @@ Alternatively, you may set the environment variables PNG_CFLAGS and PNG_LIBS to avoid the need to call pkg-config. See the pkg-config man page for more details. -To get pkg-config, see <http://www.freedesktop.org/software/pkgconfig>. +To get pkg-config, see <http://pkg-config.freedesktop.org/>. See \`config.log' for more details" "$LINENO" 5; } else PNG_CFLAGS=$pkg_cv_PNG_CFLAGS PNG_LIBS=$pkg_cv_PNG_LIBS { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } - : + fi ;; xno) ;; *) @@ -14322,46 +14424,46 @@ pkg_failed=no { $as_echo "$as_me:${as_lineno-$LINENO}: checking for PNG" >&5 $as_echo_n "checking for PNG... " >&6; } -if test -n "$PKG_CONFIG"; then - if test -n "$PNG_CFLAGS"; then - pkg_cv_PNG_CFLAGS="$PNG_CFLAGS" - else - if test -n "$PKG_CONFIG" && \ +if test -n "$PNG_CFLAGS"; then + pkg_cv_PNG_CFLAGS="$PNG_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libpng\""; } >&5 ($PKG_CONFIG --exists --print-errors "libpng") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_PNG_CFLAGS=`$PKG_CONFIG --cflags "libpng" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes else pkg_failed=yes fi - fi -else - pkg_failed=untried + else + pkg_failed=untried fi -if test -n "$PKG_CONFIG"; then - if test -n "$PNG_LIBS"; then - pkg_cv_PNG_LIBS="$PNG_LIBS" - else - if test -n "$PKG_CONFIG" && \ +if test -n "$PNG_LIBS"; then + pkg_cv_PNG_LIBS="$PNG_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libpng\""; } >&5 ($PKG_CONFIG --exists --print-errors "libpng") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then pkg_cv_PNG_LIBS=`$PKG_CONFIG --libs "libpng" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes else pkg_failed=yes fi - fi -else - pkg_failed=untried + else + pkg_failed=untried fi if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes @@ -14369,17 +14471,17 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - PNG_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "libpng"` + PNG_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libpng" 2>&1` else - PNG_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "libpng"` + PNG_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libpng" 2>&1` fi # Put the nasty error message in config.log where it belongs echo "$PNG_PKG_ERRORS" >&5 - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } - have_libpng=no + have_libpng=no elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } have_libpng=no else PNG_CFLAGS=$pkg_cv_PNG_CFLAGS @@ -14975,7 +15077,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by pixman $as_me 0.28.0, which was +This file was extended by pixman $as_me 0.30.0, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -15041,7 +15143,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -pixman config.status 0.28.0 +pixman config.status 0.30.0 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/lib/pixman/configure.ac b/lib/pixman/configure.ac index 38e669818..dfd68e4fe 100644 --- a/lib/pixman/configure.ac +++ b/lib/pixman/configure.ac @@ -53,7 +53,7 @@ AC_PREREQ([2.57]) # m4_define([pixman_major], 0) -m4_define([pixman_minor], 28) +m4_define([pixman_minor], 30) m4_define([pixman_micro], 0) m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro]) @@ -64,7 +64,7 @@ AM_INIT_AUTOMAKE([foreign dist-bzip2]) # Suppress verbose compile lines m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) -AM_CONFIG_HEADER(config.h) +AC_CONFIG_HEADERS(config.h) AC_CANONICAL_HOST @@ -845,6 +845,13 @@ if test x$have_gettimeofday = xyes && test x$have_sys_time_h = xyes; then fi dnl ===================================== +dnl Check for missing sqrtf() as, e.g., for Solaris 9 + +AC_SEARCH_LIBS([sqrtf], [m], [], + [AC_DEFINE([sqrtf], [sqrt], + [Define to sqrt if you do not have the `sqrtf' function.])]) + +dnl ===================================== dnl Thread local storage AC_MSG_CHECKING(for thread local storage (TLS) support) @@ -860,7 +867,7 @@ AC_CACHE_VAL(ac_cv_tls, [ #error OpenBSD has broken __thread support #endif -int $kw test;], [], ac_cv_tls=$kw) +int $kw test;], [], [ac_cv_tls=$kw; break]) done ]) AC_MSG_RESULT($ac_cv_tls) @@ -969,6 +976,22 @@ fi AC_MSG_RESULT($support_for_attribute_constructor) AC_SUBST(TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR) +dnl ===================================== +dnl __float128 + +support_for_float128=no + +AC_MSG_CHECKING(for __float128) +AC_LINK_IFELSE([AC_LANG_SOURCE([[ +__float128 a = 1.0Q, b = 2.0Q; int main (void) { return a + b; } +]])], support_for_float128=yes) + +if test x$support_for_float128 = xyes; then + AC_DEFINE([HAVE_FLOAT128], [], [Whether the tool chain supports __float128]) +fi + +AC_MSG_RESULT($support_for_float128) + dnl ================== dnl libpng diff --git a/lib/pixman/demos/Makefile.am b/lib/pixman/demos/Makefile.am index f324f5f5b..9be9ab670 100644 --- a/lib/pixman/demos/Makefile.am +++ b/lib/pixman/demos/Makefile.am @@ -4,9 +4,10 @@ AM_CFLAGS = $(OPENMP_CFLAGS) AM_LDFLAGS = $(OPENMP_CFLAGS) LDADD = $(top_builddir)/pixman/libpixman-1.la -lm $(GTK_LIBS) $(PNG_LIBS) -INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS) $(PNG_CFLAGS) +AM_CPPFLAGS = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS) $(PNG_CFLAGS) -GTK_UTILS = gtk-utils.c gtk-utils.h ../test/utils.c ../test/utils.h +GTK_UTILS = gtk-utils.c gtk-utils.h ../test/utils.c ../test/utils.h \ + ../test/utils-prng.c ../test/utils-prng.h DEMOS = \ clip-test \ @@ -14,6 +15,8 @@ DEMOS = \ composite-test \ gradient-test \ radial-test \ + linear-gradient \ + conical-test \ alpha-test \ screen-test \ convolution-test \ @@ -22,9 +25,10 @@ DEMOS = \ quad2quad \ checkerboard \ srgb-trap-test \ - srgb-test + srgb-test \ + scale -EXTRA_DIST = parrot.c parrot.jpg +EXTRA_DIST = parrot.c parrot.jpg scale.ui gradient_test_SOURCES = gradient-test.c $(GTK_UTILS) alpha_test_SOURCES = alpha-test.c $(GTK_UTILS) @@ -35,10 +39,13 @@ trap_test_SOURCES = trap-test.c $(GTK_UTILS) screen_test_SOURCES = screen-test.c $(GTK_UTILS) convolution_test_SOURCES = convolution-test.c $(GTK_UTILS) radial_test_SOURCES = radial-test.c $(GTK_UTILS) +linear_gradient_SOURCES = linear-gradient.c $(GTK_UTILS) +conical_test_SOURCES = conical-test.c $(GTK_UTILS) tri_test_SOURCES = tri-test.c $(GTK_UTILS) checkerboard_SOURCES = checkerboard.c $(GTK_UTILS) srgb_test_SOURCES = srgb-test.c $(GTK_UTILS) srgb_trap_test_SOURCES = srgb-trap-test.c $(GTK_UTILS) +scale_SOURCES = scale.c $(GTK_UTILS) noinst_PROGRAMS = $(DEMOS) diff --git a/lib/pixman/demos/Makefile.in b/lib/pixman/demos/Makefile.in index d4aed19b4..277649371 100644 --- a/lib/pixman/demos/Makefile.in +++ b/lib/pixman/demos/Makefile.in @@ -1,4 +1,4 @@ -# Makefile.in generated by automake 1.12.3 from Makefile.am. +# Makefile.in generated by automake 1.12.6 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2012 Free Software Foundation, Inc. @@ -64,15 +64,19 @@ CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = @HAVE_GTK_TRUE@am__EXEEXT_1 = clip-test$(EXEEXT) clip-in$(EXEEXT) \ @HAVE_GTK_TRUE@ composite-test$(EXEEXT) gradient-test$(EXEEXT) \ -@HAVE_GTK_TRUE@ radial-test$(EXEEXT) alpha-test$(EXEEXT) \ +@HAVE_GTK_TRUE@ radial-test$(EXEEXT) linear-gradient$(EXEEXT) \ +@HAVE_GTK_TRUE@ conical-test$(EXEEXT) alpha-test$(EXEEXT) \ @HAVE_GTK_TRUE@ screen-test$(EXEEXT) convolution-test$(EXEEXT) \ @HAVE_GTK_TRUE@ trap-test$(EXEEXT) tri-test$(EXEEXT) \ @HAVE_GTK_TRUE@ quad2quad$(EXEEXT) checkerboard$(EXEEXT) \ -@HAVE_GTK_TRUE@ srgb-trap-test$(EXEEXT) srgb-test$(EXEEXT) +@HAVE_GTK_TRUE@ srgb-trap-test$(EXEEXT) srgb-test$(EXEEXT) \ +@HAVE_GTK_TRUE@ scale$(EXEEXT) PROGRAMS = $(noinst_PROGRAMS) am__alpha_test_SOURCES_DIST = alpha-test.c gtk-utils.c gtk-utils.h \ - ../test/utils.c ../test/utils.h -@HAVE_GTK_TRUE@am__objects_1 = gtk-utils.$(OBJEXT) utils.$(OBJEXT) + ../test/utils.c ../test/utils.h ../test/utils-prng.c \ + ../test/utils-prng.h +@HAVE_GTK_TRUE@am__objects_1 = gtk-utils.$(OBJEXT) utils.$(OBJEXT) \ +@HAVE_GTK_TRUE@ utils-prng.$(OBJEXT) @HAVE_GTK_TRUE@am_alpha_test_OBJECTS = alpha-test.$(OBJEXT) \ @HAVE_GTK_TRUE@ $(am__objects_1) alpha_test_OBJECTS = $(am_alpha_test_OBJECTS) @@ -86,7 +90,8 @@ am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = am__checkerboard_SOURCES_DIST = checkerboard.c gtk-utils.c gtk-utils.h \ - ../test/utils.c ../test/utils.h + ../test/utils.c ../test/utils.h ../test/utils-prng.c \ + ../test/utils-prng.h @HAVE_GTK_TRUE@am_checkerboard_OBJECTS = checkerboard.$(OBJEXT) \ @HAVE_GTK_TRUE@ $(am__objects_1) checkerboard_OBJECTS = $(am_checkerboard_OBJECTS) @@ -95,7 +100,8 @@ checkerboard_LDADD = $(LDADD) @HAVE_GTK_TRUE@ $(top_builddir)/pixman/libpixman-1.la \ @HAVE_GTK_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am__clip_in_SOURCES_DIST = clip-in.c gtk-utils.c gtk-utils.h \ - ../test/utils.c ../test/utils.h + ../test/utils.c ../test/utils.h ../test/utils-prng.c \ + ../test/utils-prng.h @HAVE_GTK_TRUE@am_clip_in_OBJECTS = clip-in.$(OBJEXT) $(am__objects_1) clip_in_OBJECTS = $(am_clip_in_OBJECTS) clip_in_LDADD = $(LDADD) @@ -103,7 +109,8 @@ clip_in_LDADD = $(LDADD) @HAVE_GTK_TRUE@ $(top_builddir)/pixman/libpixman-1.la \ @HAVE_GTK_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am__clip_test_SOURCES_DIST = clip-test.c gtk-utils.c gtk-utils.h \ - ../test/utils.c ../test/utils.h + ../test/utils.c ../test/utils.h ../test/utils-prng.c \ + ../test/utils-prng.h @HAVE_GTK_TRUE@am_clip_test_OBJECTS = clip-test.$(OBJEXT) \ @HAVE_GTK_TRUE@ $(am__objects_1) clip_test_OBJECTS = $(am_clip_test_OBJECTS) @@ -112,7 +119,8 @@ clip_test_LDADD = $(LDADD) @HAVE_GTK_TRUE@ $(top_builddir)/pixman/libpixman-1.la \ @HAVE_GTK_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am__composite_test_SOURCES_DIST = composite-test.c gtk-utils.c \ - gtk-utils.h ../test/utils.c ../test/utils.h + gtk-utils.h ../test/utils.c ../test/utils.h \ + ../test/utils-prng.c ../test/utils-prng.h @HAVE_GTK_TRUE@am_composite_test_OBJECTS = composite-test.$(OBJEXT) \ @HAVE_GTK_TRUE@ $(am__objects_1) composite_test_OBJECTS = $(am_composite_test_OBJECTS) @@ -120,8 +128,19 @@ composite_test_LDADD = $(LDADD) @HAVE_GTK_TRUE@composite_test_DEPENDENCIES = \ @HAVE_GTK_TRUE@ $(top_builddir)/pixman/libpixman-1.la \ @HAVE_GTK_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +am__conical_test_SOURCES_DIST = conical-test.c gtk-utils.c gtk-utils.h \ + ../test/utils.c ../test/utils.h ../test/utils-prng.c \ + ../test/utils-prng.h +@HAVE_GTK_TRUE@am_conical_test_OBJECTS = conical-test.$(OBJEXT) \ +@HAVE_GTK_TRUE@ $(am__objects_1) +conical_test_OBJECTS = $(am_conical_test_OBJECTS) +conical_test_LDADD = $(LDADD) +@HAVE_GTK_TRUE@conical_test_DEPENDENCIES = \ +@HAVE_GTK_TRUE@ $(top_builddir)/pixman/libpixman-1.la \ +@HAVE_GTK_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am__convolution_test_SOURCES_DIST = convolution-test.c gtk-utils.c \ - gtk-utils.h ../test/utils.c ../test/utils.h + gtk-utils.h ../test/utils.c ../test/utils.h \ + ../test/utils-prng.c ../test/utils-prng.h @HAVE_GTK_TRUE@am_convolution_test_OBJECTS = \ @HAVE_GTK_TRUE@ convolution-test.$(OBJEXT) $(am__objects_1) convolution_test_OBJECTS = $(am_convolution_test_OBJECTS) @@ -130,7 +149,8 @@ convolution_test_LDADD = $(LDADD) @HAVE_GTK_TRUE@ $(top_builddir)/pixman/libpixman-1.la \ @HAVE_GTK_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am__gradient_test_SOURCES_DIST = gradient-test.c gtk-utils.c \ - gtk-utils.h ../test/utils.c ../test/utils.h + gtk-utils.h ../test/utils.c ../test/utils.h \ + ../test/utils-prng.c ../test/utils-prng.h @HAVE_GTK_TRUE@am_gradient_test_OBJECTS = gradient-test.$(OBJEXT) \ @HAVE_GTK_TRUE@ $(am__objects_1) gradient_test_OBJECTS = $(am_gradient_test_OBJECTS) @@ -138,6 +158,16 @@ gradient_test_LDADD = $(LDADD) @HAVE_GTK_TRUE@gradient_test_DEPENDENCIES = \ @HAVE_GTK_TRUE@ $(top_builddir)/pixman/libpixman-1.la \ @HAVE_GTK_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +am__linear_gradient_SOURCES_DIST = linear-gradient.c gtk-utils.c \ + gtk-utils.h ../test/utils.c ../test/utils.h \ + ../test/utils-prng.c ../test/utils-prng.h +@HAVE_GTK_TRUE@am_linear_gradient_OBJECTS = linear-gradient.$(OBJEXT) \ +@HAVE_GTK_TRUE@ $(am__objects_1) +linear_gradient_OBJECTS = $(am_linear_gradient_OBJECTS) +linear_gradient_LDADD = $(LDADD) +@HAVE_GTK_TRUE@linear_gradient_DEPENDENCIES = \ +@HAVE_GTK_TRUE@ $(top_builddir)/pixman/libpixman-1.la \ +@HAVE_GTK_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) quad2quad_SOURCES = quad2quad.c quad2quad_OBJECTS = quad2quad.$(OBJEXT) quad2quad_LDADD = $(LDADD) @@ -145,7 +175,8 @@ quad2quad_LDADD = $(LDADD) @HAVE_GTK_TRUE@ $(top_builddir)/pixman/libpixman-1.la \ @HAVE_GTK_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am__radial_test_SOURCES_DIST = radial-test.c gtk-utils.c gtk-utils.h \ - ../test/utils.c ../test/utils.h + ../test/utils.c ../test/utils.h ../test/utils-prng.c \ + ../test/utils-prng.h @HAVE_GTK_TRUE@am_radial_test_OBJECTS = radial-test.$(OBJEXT) \ @HAVE_GTK_TRUE@ $(am__objects_1) radial_test_OBJECTS = $(am_radial_test_OBJECTS) @@ -153,8 +184,18 @@ radial_test_LDADD = $(LDADD) @HAVE_GTK_TRUE@radial_test_DEPENDENCIES = \ @HAVE_GTK_TRUE@ $(top_builddir)/pixman/libpixman-1.la \ @HAVE_GTK_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +am__scale_SOURCES_DIST = scale.c gtk-utils.c gtk-utils.h \ + ../test/utils.c ../test/utils.h ../test/utils-prng.c \ + ../test/utils-prng.h +@HAVE_GTK_TRUE@am_scale_OBJECTS = scale.$(OBJEXT) $(am__objects_1) +scale_OBJECTS = $(am_scale_OBJECTS) +scale_LDADD = $(LDADD) +@HAVE_GTK_TRUE@scale_DEPENDENCIES = \ +@HAVE_GTK_TRUE@ $(top_builddir)/pixman/libpixman-1.la \ +@HAVE_GTK_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am__screen_test_SOURCES_DIST = screen-test.c gtk-utils.c gtk-utils.h \ - ../test/utils.c ../test/utils.h + ../test/utils.c ../test/utils.h ../test/utils-prng.c \ + ../test/utils-prng.h @HAVE_GTK_TRUE@am_screen_test_OBJECTS = screen-test.$(OBJEXT) \ @HAVE_GTK_TRUE@ $(am__objects_1) screen_test_OBJECTS = $(am_screen_test_OBJECTS) @@ -163,7 +204,8 @@ screen_test_LDADD = $(LDADD) @HAVE_GTK_TRUE@ $(top_builddir)/pixman/libpixman-1.la \ @HAVE_GTK_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am__srgb_test_SOURCES_DIST = srgb-test.c gtk-utils.c gtk-utils.h \ - ../test/utils.c ../test/utils.h + ../test/utils.c ../test/utils.h ../test/utils-prng.c \ + ../test/utils-prng.h @HAVE_GTK_TRUE@am_srgb_test_OBJECTS = srgb-test.$(OBJEXT) \ @HAVE_GTK_TRUE@ $(am__objects_1) srgb_test_OBJECTS = $(am_srgb_test_OBJECTS) @@ -172,7 +214,8 @@ srgb_test_LDADD = $(LDADD) @HAVE_GTK_TRUE@ $(top_builddir)/pixman/libpixman-1.la \ @HAVE_GTK_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am__srgb_trap_test_SOURCES_DIST = srgb-trap-test.c gtk-utils.c \ - gtk-utils.h ../test/utils.c ../test/utils.h + gtk-utils.h ../test/utils.c ../test/utils.h \ + ../test/utils-prng.c ../test/utils-prng.h @HAVE_GTK_TRUE@am_srgb_trap_test_OBJECTS = srgb-trap-test.$(OBJEXT) \ @HAVE_GTK_TRUE@ $(am__objects_1) srgb_trap_test_OBJECTS = $(am_srgb_trap_test_OBJECTS) @@ -181,7 +224,8 @@ srgb_trap_test_LDADD = $(LDADD) @HAVE_GTK_TRUE@ $(top_builddir)/pixman/libpixman-1.la \ @HAVE_GTK_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am__trap_test_SOURCES_DIST = trap-test.c gtk-utils.c gtk-utils.h \ - ../test/utils.c ../test/utils.h + ../test/utils.c ../test/utils.h ../test/utils-prng.c \ + ../test/utils-prng.h @HAVE_GTK_TRUE@am_trap_test_OBJECTS = trap-test.$(OBJEXT) \ @HAVE_GTK_TRUE@ $(am__objects_1) trap_test_OBJECTS = $(am_trap_test_OBJECTS) @@ -190,7 +234,8 @@ trap_test_LDADD = $(LDADD) @HAVE_GTK_TRUE@ $(top_builddir)/pixman/libpixman-1.la \ @HAVE_GTK_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am__tri_test_SOURCES_DIST = tri-test.c gtk-utils.c gtk-utils.h \ - ../test/utils.c ../test/utils.h + ../test/utils.c ../test/utils.h ../test/utils-prng.c \ + ../test/utils-prng.h @HAVE_GTK_TRUE@am_tri_test_OBJECTS = tri-test.$(OBJEXT) \ @HAVE_GTK_TRUE@ $(am__objects_1) tri_test_OBJECTS = $(am_tri_test_OBJECTS) @@ -234,18 +279,21 @@ am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = $(alpha_test_SOURCES) $(checkerboard_SOURCES) \ $(clip_in_SOURCES) $(clip_test_SOURCES) \ - $(composite_test_SOURCES) $(convolution_test_SOURCES) \ - $(gradient_test_SOURCES) quad2quad.c $(radial_test_SOURCES) \ - $(screen_test_SOURCES) $(srgb_test_SOURCES) \ + $(composite_test_SOURCES) $(conical_test_SOURCES) \ + $(convolution_test_SOURCES) $(gradient_test_SOURCES) \ + $(linear_gradient_SOURCES) quad2quad.c $(radial_test_SOURCES) \ + $(scale_SOURCES) $(screen_test_SOURCES) $(srgb_test_SOURCES) \ $(srgb_trap_test_SOURCES) $(trap_test_SOURCES) \ $(tri_test_SOURCES) DIST_SOURCES = $(am__alpha_test_SOURCES_DIST) \ $(am__checkerboard_SOURCES_DIST) $(am__clip_in_SOURCES_DIST) \ $(am__clip_test_SOURCES_DIST) \ $(am__composite_test_SOURCES_DIST) \ + $(am__conical_test_SOURCES_DIST) \ $(am__convolution_test_SOURCES_DIST) \ - $(am__gradient_test_SOURCES_DIST) quad2quad.c \ - $(am__radial_test_SOURCES_DIST) \ + $(am__gradient_test_SOURCES_DIST) \ + $(am__linear_gradient_SOURCES_DIST) quad2quad.c \ + $(am__radial_test_SOURCES_DIST) $(am__scale_SOURCES_DIST) \ $(am__screen_test_SOURCES_DIST) $(am__srgb_test_SOURCES_DIST) \ $(am__srgb_trap_test_SOURCES_DIST) \ $(am__trap_test_SOURCES_DIST) $(am__tri_test_SOURCES_DIST) @@ -331,6 +379,8 @@ PIXMAN_VERSION_MAJOR = @PIXMAN_VERSION_MAJOR@ PIXMAN_VERSION_MICRO = @PIXMAN_VERSION_MICRO@ PIXMAN_VERSION_MINOR = @PIXMAN_VERSION_MINOR@ PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ PNG_CFLAGS = @PNG_CFLAGS@ PNG_LIBS = @PNG_LIBS@ PTHREAD_LDFLAGS = @PTHREAD_LDFLAGS@ @@ -402,14 +452,18 @@ top_srcdir = @top_srcdir@ @HAVE_GTK_TRUE@AM_CFLAGS = $(OPENMP_CFLAGS) @HAVE_GTK_TRUE@AM_LDFLAGS = $(OPENMP_CFLAGS) @HAVE_GTK_TRUE@LDADD = $(top_builddir)/pixman/libpixman-1.la -lm $(GTK_LIBS) $(PNG_LIBS) -@HAVE_GTK_TRUE@INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS) $(PNG_CFLAGS) -@HAVE_GTK_TRUE@GTK_UTILS = gtk-utils.c gtk-utils.h ../test/utils.c ../test/utils.h +@HAVE_GTK_TRUE@AM_CPPFLAGS = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS) $(PNG_CFLAGS) +@HAVE_GTK_TRUE@GTK_UTILS = gtk-utils.c gtk-utils.h ../test/utils.c ../test/utils.h \ +@HAVE_GTK_TRUE@ ../test/utils-prng.c ../test/utils-prng.h + @HAVE_GTK_TRUE@DEMOS = \ @HAVE_GTK_TRUE@ clip-test \ @HAVE_GTK_TRUE@ clip-in \ @HAVE_GTK_TRUE@ composite-test \ @HAVE_GTK_TRUE@ gradient-test \ @HAVE_GTK_TRUE@ radial-test \ +@HAVE_GTK_TRUE@ linear-gradient \ +@HAVE_GTK_TRUE@ conical-test \ @HAVE_GTK_TRUE@ alpha-test \ @HAVE_GTK_TRUE@ screen-test \ @HAVE_GTK_TRUE@ convolution-test \ @@ -418,9 +472,10 @@ top_srcdir = @top_srcdir@ @HAVE_GTK_TRUE@ quad2quad \ @HAVE_GTK_TRUE@ checkerboard \ @HAVE_GTK_TRUE@ srgb-trap-test \ -@HAVE_GTK_TRUE@ srgb-test +@HAVE_GTK_TRUE@ srgb-test \ +@HAVE_GTK_TRUE@ scale -@HAVE_GTK_TRUE@EXTRA_DIST = parrot.c parrot.jpg +@HAVE_GTK_TRUE@EXTRA_DIST = parrot.c parrot.jpg scale.ui @HAVE_GTK_TRUE@gradient_test_SOURCES = gradient-test.c $(GTK_UTILS) @HAVE_GTK_TRUE@alpha_test_SOURCES = alpha-test.c $(GTK_UTILS) @HAVE_GTK_TRUE@composite_test_SOURCES = composite-test.c $(GTK_UTILS) @@ -430,10 +485,13 @@ top_srcdir = @top_srcdir@ @HAVE_GTK_TRUE@screen_test_SOURCES = screen-test.c $(GTK_UTILS) @HAVE_GTK_TRUE@convolution_test_SOURCES = convolution-test.c $(GTK_UTILS) @HAVE_GTK_TRUE@radial_test_SOURCES = radial-test.c $(GTK_UTILS) +@HAVE_GTK_TRUE@linear_gradient_SOURCES = linear-gradient.c $(GTK_UTILS) +@HAVE_GTK_TRUE@conical_test_SOURCES = conical-test.c $(GTK_UTILS) @HAVE_GTK_TRUE@tri_test_SOURCES = tri-test.c $(GTK_UTILS) @HAVE_GTK_TRUE@checkerboard_SOURCES = checkerboard.c $(GTK_UTILS) @HAVE_GTK_TRUE@srgb_test_SOURCES = srgb-test.c $(GTK_UTILS) @HAVE_GTK_TRUE@srgb_trap_test_SOURCES = srgb-trap-test.c $(GTK_UTILS) +@HAVE_GTK_TRUE@scale_SOURCES = scale.c $(GTK_UTILS) all: all-am .SUFFIXES: @@ -492,18 +550,27 @@ clip-test$(EXEEXT): $(clip_test_OBJECTS) $(clip_test_DEPENDENCIES) $(EXTRA_clip_ composite-test$(EXEEXT): $(composite_test_OBJECTS) $(composite_test_DEPENDENCIES) $(EXTRA_composite_test_DEPENDENCIES) @rm -f composite-test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(composite_test_OBJECTS) $(composite_test_LDADD) $(LIBS) +conical-test$(EXEEXT): $(conical_test_OBJECTS) $(conical_test_DEPENDENCIES) $(EXTRA_conical_test_DEPENDENCIES) + @rm -f conical-test$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(conical_test_OBJECTS) $(conical_test_LDADD) $(LIBS) convolution-test$(EXEEXT): $(convolution_test_OBJECTS) $(convolution_test_DEPENDENCIES) $(EXTRA_convolution_test_DEPENDENCIES) @rm -f convolution-test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(convolution_test_OBJECTS) $(convolution_test_LDADD) $(LIBS) gradient-test$(EXEEXT): $(gradient_test_OBJECTS) $(gradient_test_DEPENDENCIES) $(EXTRA_gradient_test_DEPENDENCIES) @rm -f gradient-test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(gradient_test_OBJECTS) $(gradient_test_LDADD) $(LIBS) +linear-gradient$(EXEEXT): $(linear_gradient_OBJECTS) $(linear_gradient_DEPENDENCIES) $(EXTRA_linear_gradient_DEPENDENCIES) + @rm -f linear-gradient$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(linear_gradient_OBJECTS) $(linear_gradient_LDADD) $(LIBS) quad2quad$(EXEEXT): $(quad2quad_OBJECTS) $(quad2quad_DEPENDENCIES) $(EXTRA_quad2quad_DEPENDENCIES) @rm -f quad2quad$(EXEEXT) $(AM_V_CCLD)$(LINK) $(quad2quad_OBJECTS) $(quad2quad_LDADD) $(LIBS) radial-test$(EXEEXT): $(radial_test_OBJECTS) $(radial_test_DEPENDENCIES) $(EXTRA_radial_test_DEPENDENCIES) @rm -f radial-test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(radial_test_OBJECTS) $(radial_test_LDADD) $(LIBS) +scale$(EXEEXT): $(scale_OBJECTS) $(scale_DEPENDENCIES) $(EXTRA_scale_DEPENDENCIES) + @rm -f scale$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(scale_OBJECTS) $(scale_LDADD) $(LIBS) screen-test$(EXEEXT): $(screen_test_OBJECTS) $(screen_test_DEPENDENCIES) $(EXTRA_screen_test_DEPENDENCIES) @rm -f screen-test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(screen_test_OBJECTS) $(screen_test_LDADD) $(LIBS) @@ -531,16 +598,20 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clip-in.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clip-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/composite-test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/conical-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/convolution-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gradient-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gtk-utils.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/linear-gradient.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/quad2quad.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radial-test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scale.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/screen-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/srgb-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/srgb-trap-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/trap-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tri-test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utils-prng.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utils.Po@am__quote@ .c.o: @@ -578,6 +649,20 @@ utils.obj: ../test/utils.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o utils.obj `if test -f '../test/utils.c'; then $(CYGPATH_W) '../test/utils.c'; else $(CYGPATH_W) '$(srcdir)/../test/utils.c'; fi` +utils-prng.o: ../test/utils-prng.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT utils-prng.o -MD -MP -MF $(DEPDIR)/utils-prng.Tpo -c -o utils-prng.o `test -f '../test/utils-prng.c' || echo '$(srcdir)/'`../test/utils-prng.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/utils-prng.Tpo $(DEPDIR)/utils-prng.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='../test/utils-prng.c' object='utils-prng.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o utils-prng.o `test -f '../test/utils-prng.c' || echo '$(srcdir)/'`../test/utils-prng.c + +utils-prng.obj: ../test/utils-prng.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT utils-prng.obj -MD -MP -MF $(DEPDIR)/utils-prng.Tpo -c -o utils-prng.obj `if test -f '../test/utils-prng.c'; then $(CYGPATH_W) '../test/utils-prng.c'; else $(CYGPATH_W) '$(srcdir)/../test/utils-prng.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/utils-prng.Tpo $(DEPDIR)/utils-prng.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='../test/utils-prng.c' object='utils-prng.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o utils-prng.obj `if test -f '../test/utils-prng.c'; then $(CYGPATH_W) '../test/utils-prng.c'; else $(CYGPATH_W) '$(srcdir)/../test/utils-prng.c'; fi` + mostlyclean-libtool: -rm -f *.lo diff --git a/lib/pixman/demos/conical-test.c b/lib/pixman/demos/conical-test.c new file mode 100644 index 000000000..6b3243016 --- /dev/null +++ b/lib/pixman/demos/conical-test.c @@ -0,0 +1,100 @@ +#include "../test/utils.h" +#include "gtk-utils.h" + +#define SIZE 128 +#define GRADIENTS_PER_ROW 7 +#define NUM_ROWS ((NUM_GRADIENTS + GRADIENTS_PER_ROW - 1) / GRADIENTS_PER_ROW) +#define WIDTH (SIZE * GRADIENTS_PER_ROW) +#define HEIGHT (SIZE * NUM_ROWS) +#define NUM_GRADIENTS 35 + +#define double_to_color(x) \ + (((uint32_t) ((x)*65536)) - (((uint32_t) ((x)*65536)) >> 16)) + +#define PIXMAN_STOP(offset,r,g,b,a) \ + { pixman_double_to_fixed (offset), \ + { \ + double_to_color (r), \ + double_to_color (g), \ + double_to_color (b), \ + double_to_color (a) \ + } \ + } + + +static const pixman_gradient_stop_t stops[] = { + PIXMAN_STOP (0.25, 1, 0, 0, 0.7), + PIXMAN_STOP (0.5, 1, 1, 0, 0.7), + PIXMAN_STOP (0.75, 0, 1, 0, 0.7), + PIXMAN_STOP (1.0, 0, 0, 1, 0.7) +}; + +#define NUM_STOPS (sizeof (stops) / sizeof (stops[0])) + +static pixman_image_t * +create_conical (int index) +{ + pixman_point_fixed_t c; + double angle; + + c.x = pixman_double_to_fixed (0); + c.y = pixman_double_to_fixed (0); + + angle = (0.5 / NUM_GRADIENTS + index / (double)NUM_GRADIENTS) * 720 - 180; + + return pixman_image_create_conical_gradient ( + &c, pixman_double_to_fixed (angle), stops, NUM_STOPS); +} + +int +main (int argc, char **argv) +{ + pixman_transform_t transform; + pixman_image_t *src_img, *dest_img; + int i; + + enable_divbyzero_exceptions (); + + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + NULL, 0); + + draw_checkerboard (dest_img, 25, 0xffaaaaaa, 0xff888888); + + pixman_transform_init_identity (&transform); + + pixman_transform_translate (NULL, &transform, + pixman_double_to_fixed (0.5), + pixman_double_to_fixed (0.5)); + + pixman_transform_scale (NULL, &transform, + pixman_double_to_fixed (SIZE), + pixman_double_to_fixed (SIZE)); + pixman_transform_translate (NULL, &transform, + pixman_double_to_fixed (0.5), + pixman_double_to_fixed (0.5)); + + for (i = 0; i < NUM_GRADIENTS; i++) + { + int column = i % GRADIENTS_PER_ROW; + int row = i / GRADIENTS_PER_ROW; + + src_img = create_conical (i); + pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL); + + pixman_image_set_transform (src_img, &transform); + + pixman_image_composite32 ( + PIXMAN_OP_OVER, src_img, NULL,dest_img, + 0, 0, 0, 0, column * SIZE, row * SIZE, + SIZE, SIZE); + + pixman_image_unref (src_img); + } + + show_image (dest_img); + + pixman_image_unref (dest_img); + + return 0; +} diff --git a/lib/pixman/demos/gtk-utils.c b/lib/pixman/demos/gtk-utils.c index 8291a1ed2..32d4aecc7 100644 --- a/lib/pixman/demos/gtk-utils.c +++ b/lib/pixman/demos/gtk-utils.c @@ -3,6 +3,72 @@ #include "../test/utils.h" #include "gtk-utils.h" +pixman_image_t * +pixman_image_from_file (const char *filename, pixman_format_code_t format) +{ + GdkPixbuf *pixbuf; + pixman_image_t *image; + int width, height; + uint32_t *data, *d; + uint8_t *gdk_data; + int n_channels; + int j, i; + int stride; + + if (!(pixbuf = gdk_pixbuf_new_from_file (filename, NULL))) + return NULL; + + image = NULL; + + width = gdk_pixbuf_get_width (pixbuf); + height = gdk_pixbuf_get_height (pixbuf); + n_channels = gdk_pixbuf_get_n_channels (pixbuf); + gdk_data = gdk_pixbuf_get_pixels (pixbuf); + stride = gdk_pixbuf_get_rowstride (pixbuf); + + if (!(data = malloc (width * height * sizeof (uint32_t)))) + goto out; + + d = data; + for (j = 0; j < height; ++j) + { + uint8_t *gdk_line = gdk_data; + + for (i = 0; i < width; ++i) + { + int r, g, b, a; + uint32_t pixel; + + r = gdk_line[0]; + g = gdk_line[1]; + b = gdk_line[2]; + + if (n_channels == 4) + a = gdk_line[3]; + else + a = 0xff; + + r = (r * a + 127) / 255; + g = (g * a + 127) / 255; + b = (b * a + 127) / 255; + + pixel = (a << 24) | (r << 16) | (g << 8) | b; + + *d++ = pixel; + gdk_line += n_channels; + } + + gdk_data += stride; + } + + image = pixman_image_create_bits ( + format, width, height, data, width * 4); + +out: + g_object_unref (pixbuf); + return image; +} + GdkPixbuf * pixbuf_from_argb32 (uint32_t *bits, int width, @@ -29,14 +95,31 @@ pixbuf_from_argb32 (uint32_t *bits, static gboolean on_expose (GtkWidget *widget, GdkEventExpose *expose, gpointer data) { - GdkPixbuf *pixbuf = data; + pixman_image_t *pimage = data; + int width = pixman_image_get_width (pimage); + int height = pixman_image_get_height (pimage); + int stride = pixman_image_get_stride (pimage); + cairo_surface_t *cimage; + cairo_format_t format; + cairo_t *cr; + + if (pixman_image_get_format (pimage) == PIXMAN_x8r8g8b8) + format = CAIRO_FORMAT_RGB24; + else + format = CAIRO_FORMAT_ARGB32; + + cimage = cairo_image_surface_create_for_data ( + (uint8_t *)pixman_image_get_data (pimage), + format, width, height, stride); - gdk_draw_pixbuf (widget->window, NULL, - pixbuf, 0, 0, 0, 0, - gdk_pixbuf_get_width (pixbuf), - gdk_pixbuf_get_height (pixbuf), - GDK_RGB_DITHER_NONE, - 0, 0); + cr = gdk_cairo_create (widget->window); + + cairo_rectangle (cr, 0, 0, width, height); + cairo_set_source_surface (cr, cimage, 0, 0); + cairo_fill (cr); + + cairo_destroy (cr); + cairo_surface_destroy (cimage); return TRUE; } @@ -45,7 +128,6 @@ void show_image (pixman_image_t *image) { GtkWidget *window; - GdkPixbuf *pixbuf; int width, height; int argc; char **argv; @@ -66,22 +148,15 @@ show_image (pixman_image_t *image) format = pixman_image_get_format (image); - /* Three cases: - * - * - image is a8r8g8b8_sRGB: we will display without modification - * under the assumption that the monitor is sRGB - * - * - image is a8r8g8b8: we will display without modification - * under the assumption that whoever created the image - * probably did it wrong by using sRGB inputs - * - * - other: we will convert to a8r8g8b8 under the assumption that - * whoever created the image probably did it wrong. + /* We always display the image as if it contains sRGB data. That + * means that no conversion should take place when the image + * has the a8r8g8b8_sRGB format. */ switch (format) { case PIXMAN_a8r8g8b8_sRGB: case PIXMAN_a8r8g8b8: + case PIXMAN_x8r8g8b8: copy = pixman_image_ref (image); break; @@ -95,11 +170,7 @@ show_image (pixman_image_t *image) break; } - pixbuf = pixbuf_from_argb32 (pixman_image_get_data (copy), - width, height, - pixman_image_get_stride (copy)); - - g_signal_connect (window, "expose_event", G_CALLBACK (on_expose), pixbuf); + g_signal_connect (window, "expose_event", G_CALLBACK (on_expose), copy); g_signal_connect (window, "delete_event", G_CALLBACK (gtk_main_quit), NULL); gtk_widget_show (window); diff --git a/lib/pixman/demos/gtk-utils.h b/lib/pixman/demos/gtk-utils.h index 55cb7018a..36be4def6 100644 --- a/lib/pixman/demos/gtk-utils.h +++ b/lib/pixman/demos/gtk-utils.h @@ -6,6 +6,9 @@ void show_image (pixman_image_t *image); +pixman_image_t * +pixman_image_from_file (const char *filename, pixman_format_code_t format); + GdkPixbuf *pixbuf_from_argb32 (uint32_t *bits, int width, int height, diff --git a/lib/pixman/demos/linear-gradient.c b/lib/pixman/demos/linear-gradient.c new file mode 100644 index 000000000..46433a6e5 --- /dev/null +++ b/lib/pixman/demos/linear-gradient.c @@ -0,0 +1,50 @@ +#include "../test/utils.h" +#include "gtk-utils.h" + +#define WIDTH 1024 +#define HEIGHT 640 + +int +main (int argc, char **argv) +{ + pixman_image_t *src_img, *dest_img; + pixman_gradient_stop_t stops[] = { + { 0x00000, { 0x0000, 0x0000, 0x4444, 0xdddd } }, + { 0x10000, { 0xeeee, 0xeeee, 0x8888, 0xdddd } }, +#if 0 + /* These colors make it very obvious that dithering + * is useful even for 8-bit gradients + */ + { 0x00000, { 0x6666, 0x3333, 0x3333, 0xffff } }, + { 0x10000, { 0x3333, 0x6666, 0x6666, 0xffff } }, +#endif + }; + pixman_point_fixed_t p1, p2; + + enable_divbyzero_exceptions (); + + dest_img = pixman_image_create_bits (PIXMAN_x8r8g8b8, + WIDTH, HEIGHT, + NULL, 0); + + p1.x = p1.y = 0x0000; + p2.x = WIDTH << 16; + p2.y = HEIGHT << 16; + + src_img = pixman_image_create_linear_gradient (&p1, &p2, stops, ARRAY_LENGTH (stops)); + + pixman_image_composite32 (PIXMAN_OP_OVER, + src_img, + NULL, + dest_img, + 0, 0, + 0, 0, + 0, 0, + WIDTH, HEIGHT); + + show_image (dest_img); + + pixman_image_unref (dest_img); + + return 0; +} diff --git a/lib/pixman/demos/radial-test.c b/lib/pixman/demos/radial-test.c index e64f3577f..08a367cd2 100644 --- a/lib/pixman/demos/radial-test.c +++ b/lib/pixman/demos/radial-test.c @@ -1,7 +1,7 @@ #include "../test/utils.h" #include "gtk-utils.h" -#define NUM_GRADIENTS 7 +#define NUM_GRADIENTS 9 #define NUM_STOPS 3 #define NUM_REPEAT 4 #define SIZE 128 @@ -28,6 +28,9 @@ * centers (0, 0) and (1, 0), but with different radiuses. From left * to right: * + * - Degenerate start circle completely inside the end circle + * 0.00 -> 1.75; dr = 1.75 > 0; a = 1 - 1.75^2 < 0 + * * - Small start circle completely inside the end circle * 0.25 -> 1.75; dr = 1.5 > 0; a = 1 - 1.50^2 < 0 * @@ -49,15 +52,20 @@ * - Small end circle completely inside the start circle * 1.75 -> 0.25; dr = -1.5 > 0; a = 1 - 1.50^2 < 0 * + * - Degenerate end circle completely inside the start circle + * 0.00 -> 1.75; dr = 1.75 > 0; a = 1 - 1.75^2 < 0 + * */ const static double radiuses[NUM_GRADIENTS] = { + 0.00, 0.25, 0.50, 0.50, 1.00, 1.00, 1.50, + 1.75, 1.75 }; @@ -139,6 +147,8 @@ main (int argc, char **argv) WIDTH, HEIGHT, NULL, 0); + draw_checkerboard (dest_img, 25, 0xffaaaaaa, 0xffbbbbbb); + pixman_transform_init_identity (&transform); /* diff --git a/lib/pixman/demos/scale.c b/lib/pixman/demos/scale.c new file mode 100644 index 000000000..869ada12b --- /dev/null +++ b/lib/pixman/demos/scale.c @@ -0,0 +1,436 @@ +/* + * Copyright 2012, Red Hat, Inc. + * Copyright 2012, Soren Sandmann + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Soren Sandmann <soren.sandmann@gmail.com> + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include <math.h> +#include <gtk/gtk.h> +#include <pixman.h> +#include <stdlib.h> +#include "gtk-utils.h" + +typedef struct +{ + GtkBuilder * builder; + pixman_image_t * original; + GtkAdjustment * scale_x_adjustment; + GtkAdjustment * scale_y_adjustment; + GtkAdjustment * rotate_adjustment; + GtkAdjustment * subsample_adjustment; + int scaled_width; + int scaled_height; +} app_t; + +static GtkWidget * +get_widget (app_t *app, const char *name) +{ + GtkWidget *widget = GTK_WIDGET (gtk_builder_get_object (app->builder, name)); + + if (!widget) + g_error ("Widget %s not found\n", name); + + return widget; +} + +static double +min4 (double a, double b, double c, double d) +{ + double m1, m2; + + m1 = MIN (a, b); + m2 = MIN (c, d); + return MIN (m1, m2); +} + +static double +max4 (double a, double b, double c, double d) +{ + double m1, m2; + + m1 = MAX (a, b); + m2 = MAX (c, d); + return MAX (m1, m2); +} + +static void +compute_extents (pixman_f_transform_t *trans, double *sx, double *sy) +{ + double min_x, max_x, min_y, max_y; + pixman_f_vector_t v[4] = + { + { { 1, 1, 1 } }, + { { -1, 1, 1 } }, + { { -1, -1, 1 } }, + { { 1, -1, 1 } }, + }; + + pixman_f_transform_point (trans, &v[0]); + pixman_f_transform_point (trans, &v[1]); + pixman_f_transform_point (trans, &v[2]); + pixman_f_transform_point (trans, &v[3]); + + min_x = min4 (v[0].v[0], v[1].v[0], v[2].v[0], v[3].v[0]); + max_x = max4 (v[0].v[0], v[1].v[0], v[2].v[0], v[3].v[0]); + min_y = min4 (v[0].v[1], v[1].v[1], v[2].v[1], v[3].v[1]); + max_y = max4 (v[0].v[1], v[1].v[1], v[2].v[1], v[3].v[1]); + + *sx = (max_x - min_x) / 2.0; + *sy = (max_y - min_y) / 2.0; +} + +typedef struct +{ + char name [20]; + pixman_kernel_t value; +} named_int_t; + +static const named_int_t filters[] = +{ + { "Box", PIXMAN_KERNEL_BOX }, + { "Impulse", PIXMAN_KERNEL_IMPULSE }, + { "Linear", PIXMAN_KERNEL_LINEAR }, + { "Cubic", PIXMAN_KERNEL_CUBIC }, + { "Lanczos2", PIXMAN_KERNEL_LANCZOS2 }, + { "Lanczos3", PIXMAN_KERNEL_LANCZOS3 }, + { "Lanczos3 Stretched", PIXMAN_KERNEL_LANCZOS3_STRETCHED }, + { "Gaussian", PIXMAN_KERNEL_GAUSSIAN }, +}; + +static const named_int_t repeats[] = +{ + { "None", PIXMAN_REPEAT_NONE }, + { "Normal", PIXMAN_REPEAT_NORMAL }, + { "Reflect", PIXMAN_REPEAT_REFLECT }, + { "Pad", PIXMAN_REPEAT_PAD }, +}; + +static pixman_kernel_t +get_value (app_t *app, const named_int_t table[], const char *box_name) +{ + GtkComboBox *box = GTK_COMBO_BOX (get_widget (app, box_name)); + + return table[gtk_combo_box_get_active (box)].value; +} + +static void +copy_to_counterpart (app_t *app, GObject *object) +{ + static const char *xy_map[] = + { + "reconstruct_x_combo_box", "reconstruct_y_combo_box", + "sample_x_combo_box", "sample_y_combo_box", + "scale_x_adjustment", "scale_y_adjustment", + }; + GObject *counterpart = NULL; + int i; + + for (i = 0; i < G_N_ELEMENTS (xy_map); i += 2) + { + GObject *x = gtk_builder_get_object (app->builder, xy_map[i]); + GObject *y = gtk_builder_get_object (app->builder, xy_map[i + 1]); + + if (object == x) + counterpart = y; + if (object == y) + counterpart = x; + } + + if (!counterpart) + return; + + if (GTK_IS_COMBO_BOX (counterpart)) + { + gtk_combo_box_set_active ( + GTK_COMBO_BOX (counterpart), + gtk_combo_box_get_active ( + GTK_COMBO_BOX (object))); + } + else if (GTK_IS_ADJUSTMENT (counterpart)) + { + gtk_adjustment_set_value ( + GTK_ADJUSTMENT (counterpart), + gtk_adjustment_get_value ( + GTK_ADJUSTMENT (object))); + } +} + +static double +to_scale (double v) +{ + return pow (1.15, v); +} + +static void +rescale (GtkWidget *may_be_null, app_t *app) +{ + pixman_f_transform_t ftransform; + pixman_transform_t transform; + double new_width, new_height; + double fscale_x, fscale_y; + double rotation; + pixman_fixed_t *params; + int n_params; + double sx, sy; + + pixman_f_transform_init_identity (&ftransform); + + if (may_be_null && gtk_toggle_button_get_active ( + GTK_TOGGLE_BUTTON (get_widget (app, "lock_checkbutton")))) + { + copy_to_counterpart (app, G_OBJECT (may_be_null)); + } + + fscale_x = gtk_adjustment_get_value (app->scale_x_adjustment); + fscale_y = gtk_adjustment_get_value (app->scale_y_adjustment); + rotation = gtk_adjustment_get_value (app->rotate_adjustment); + + fscale_x = to_scale (fscale_x); + fscale_y = to_scale (fscale_y); + + new_width = pixman_image_get_width (app->original) * fscale_x; + new_height = pixman_image_get_height (app->original) * fscale_y; + + pixman_f_transform_scale (&ftransform, NULL, fscale_x, fscale_y); + + pixman_f_transform_translate (&ftransform, NULL, - new_width / 2.0, - new_height / 2.0); + + rotation = (rotation / 360.0) * 2 * M_PI; + pixman_f_transform_rotate (&ftransform, NULL, cos (rotation), sin (rotation)); + + pixman_f_transform_translate (&ftransform, NULL, new_width / 2.0, new_height / 2.0); + + pixman_f_transform_invert (&ftransform, &ftransform); + + compute_extents (&ftransform, &sx, &sy); + + pixman_transform_from_pixman_f_transform (&transform, &ftransform); + pixman_image_set_transform (app->original, &transform); + + params = pixman_filter_create_separable_convolution ( + &n_params, + sx * 65536.0 + 0.5, + sy * 65536.0 + 0.5, + get_value (app, filters, "reconstruct_x_combo_box"), + get_value (app, filters, "reconstruct_y_combo_box"), + get_value (app, filters, "sample_x_combo_box"), + get_value (app, filters, "sample_y_combo_box"), + gtk_adjustment_get_value (app->subsample_adjustment), + gtk_adjustment_get_value (app->subsample_adjustment)); + + pixman_image_set_filter (app->original, PIXMAN_FILTER_SEPARABLE_CONVOLUTION, params, n_params); + + pixman_image_set_repeat ( + app->original, get_value (app, repeats, "repeat_combo_box")); + + free (params); + + app->scaled_width = ceil (new_width); + app->scaled_height = ceil (new_height); + + gtk_widget_set_size_request ( + get_widget (app, "drawing_area"), new_width + 0.5, new_height + 0.5); + + gtk_widget_queue_draw ( + get_widget (app, "drawing_area")); +} + +static gboolean +on_expose (GtkWidget *da, GdkEvent *event, gpointer data) +{ + app_t *app = data; + GdkRectangle *area = &event->expose.area; + cairo_surface_t *surface; + pixman_image_t *tmp; + cairo_t *cr; + uint32_t *pixels; + + pixels = calloc (1, area->width * area->height * 4); + tmp = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, area->width, area->height, pixels, area->width * 4); + + if (area->x < app->scaled_width && area->y < app->scaled_height) + { + pixman_image_composite ( + PIXMAN_OP_SRC, + app->original, NULL, tmp, + area->x, area->y, 0, 0, 0, 0, + app->scaled_width - area->x, app->scaled_height - area->y); + } + + surface = cairo_image_surface_create_for_data ( + (uint8_t *)pixels, CAIRO_FORMAT_ARGB32, + area->width, area->height, area->width * 4); + + cr = gdk_cairo_create (da->window); + + cairo_set_source_surface (cr, surface, area->x, area->y); + + cairo_paint (cr); + + cairo_destroy (cr); + cairo_surface_destroy (surface); + free (pixels); + pixman_image_unref (tmp); + + return TRUE; +} + +static void +set_up_combo_box (app_t *app, const char *box_name, + int n_entries, const named_int_t table[]) +{ + GtkWidget *widget = get_widget (app, box_name); + GtkListStore *model; + GtkCellRenderer *cell; + int i; + + model = gtk_list_store_new (1, G_TYPE_STRING); + + cell = gtk_cell_renderer_text_new (); + gtk_cell_layout_pack_start (GTK_CELL_LAYOUT (widget), cell, TRUE); + gtk_cell_layout_set_attributes (GTK_CELL_LAYOUT (widget), cell, + "text", 0, + NULL); + + gtk_combo_box_set_model (GTK_COMBO_BOX (widget), GTK_TREE_MODEL (model)); + + for (i = 0; i < n_entries; ++i) + { + const named_int_t *info = &(table[i]); + GtkTreeIter iter; + + gtk_list_store_append (model, &iter); + gtk_list_store_set (model, &iter, 0, info->name, -1); + } + + gtk_combo_box_set_active (GTK_COMBO_BOX (widget), 0); + + g_signal_connect (widget, "changed", G_CALLBACK (rescale), app); +} + +static void +set_up_filter_box (app_t *app, const char *box_name) +{ + set_up_combo_box (app, box_name, G_N_ELEMENTS (filters), filters); +} + +static char * +format_value (GtkWidget *widget, double value) +{ + return g_strdup_printf ("%.4f", to_scale (value)); +} + +static app_t * +app_new (pixman_image_t *original) +{ + GtkWidget *widget; + app_t *app = g_malloc (sizeof *app); + GError *err = NULL; + + app->builder = gtk_builder_new (); + app->original = original; + + if (!gtk_builder_add_from_file (app->builder, "scale.ui", &err)) + g_error ("Could not read file scale.ui: %s", err->message); + + app->scale_x_adjustment = + GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "scale_x_adjustment")); + app->scale_y_adjustment = + GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "scale_y_adjustment")); + app->rotate_adjustment = + GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "rotate_adjustment")); + app->subsample_adjustment = + GTK_ADJUSTMENT (gtk_builder_get_object (app->builder, "subsample_adjustment")); + + g_signal_connect (app->scale_x_adjustment, "value_changed", G_CALLBACK (rescale), app); + g_signal_connect (app->scale_y_adjustment, "value_changed", G_CALLBACK (rescale), app); + g_signal_connect (app->rotate_adjustment, "value_changed", G_CALLBACK (rescale), app); + g_signal_connect (app->subsample_adjustment, "value_changed", G_CALLBACK (rescale), app); + + widget = get_widget (app, "scale_x_scale"); + gtk_scale_add_mark (GTK_SCALE (widget), 0.0, GTK_POS_LEFT, NULL); + g_signal_connect (widget, "format_value", G_CALLBACK (format_value), app); + widget = get_widget (app, "scale_y_scale"); + gtk_scale_add_mark (GTK_SCALE (widget), 0.0, GTK_POS_LEFT, NULL); + g_signal_connect (widget, "format_value", G_CALLBACK (format_value), app); + widget = get_widget (app, "rotate_scale"); + gtk_scale_add_mark (GTK_SCALE (widget), 0.0, GTK_POS_LEFT, NULL); + + widget = get_widget (app, "drawing_area"); + g_signal_connect (widget, "expose_event", G_CALLBACK (on_expose), app); + + set_up_filter_box (app, "reconstruct_x_combo_box"); + set_up_filter_box (app, "reconstruct_y_combo_box"); + set_up_filter_box (app, "sample_x_combo_box"); + set_up_filter_box (app, "sample_y_combo_box"); + + set_up_combo_box ( + app, "repeat_combo_box", G_N_ELEMENTS (repeats), repeats); + + g_signal_connect ( + gtk_builder_get_object (app->builder, "lock_checkbutton"), + "toggled", G_CALLBACK (rescale), app); + + rescale (NULL, app); + + return app; +} + +int +main (int argc, char **argv) +{ + GtkWidget *window; + pixman_image_t *image; + app_t *app; + + gtk_init (&argc, &argv); + + if (argc < 2) + { + printf ("%s <image file>\n", argv[0]); + return -1; + } + + if (!(image = pixman_image_from_file (argv[1], PIXMAN_a8r8g8b8))) + { + printf ("Could not load image \"%s\"\n", argv[1]); + return -1; + } + + app = app_new (image); + + window = get_widget (app, "main"); + + g_signal_connect (window, "delete_event", G_CALLBACK (gtk_main_quit), NULL); + + gtk_window_set_default_size (GTK_WINDOW (window), 1024, 768); + + gtk_widget_show_all (window); + + gtk_main (); + + return 0; +} diff --git a/lib/pixman/demos/scale.ui b/lib/pixman/demos/scale.ui new file mode 100644 index 000000000..b3450d34d --- /dev/null +++ b/lib/pixman/demos/scale.ui @@ -0,0 +1,332 @@ +<?xml version="1.0" encoding="UTF-8"?> +<interface> + <!-- interface-requires gtk+ 2.12 --> + <!-- interface-naming-policy toplevel-contextual --> + <object class="GtkAdjustment" id="rotate_adjustment"> + <property name="lower">-180</property> + <property name="upper">190</property> + <property name="step_increment">1</property> + <property name="page_increment">10</property> + <property name="page_size">10</property> + </object> + <object class="GtkAdjustment" id="scale_y_adjustment"> + <property name="lower">-32</property> + <property name="upper">42</property> + <property name="step_increment">1</property> + <property name="page_increment">10</property> + <property name="page_size">10</property> + </object> + <object class="GtkAdjustment" id="scale_x_adjustment"> + <property name="lower">-32</property> + <property name="upper">42</property> + <property name="step_increment">1</property> + <property name="page_increment">10</property> + <property name="page_size">10</property> + </object> + <object class="GtkAdjustment" id="subsample_adjustment"> + <property name="lower">1</property> + <property name="upper">12</property> + <property name="step_increment">1</property> + <property name="page_increment">1</property> + <property name="page_size">0</property> + <property name="value">4</property> + </object> + <object class="GtkWindow" id="main"> + <child> + <object class="GtkHBox" id="u"> + <property name="visible">True</property> + <property name="spacing">12</property> + <child> + <object class="GtkScrolledWindow" id="scrolledwindow1"> + <property name="visible">True</property> + <property name="can_focus">True</property> + <property name="shadow_type">in</property> + <child> + <object class="GtkViewport" id="viewport1"> + <property name="visible">True</property> + <child> + <object class="GtkDrawingArea" id="drawing_area"> + <property name="visible">True</property> + </object> + </child> + </object> + </child> + </object> + <packing> + <property name="position">0</property> + </packing> + </child> + <child> + <object class="GtkVBox" id="box1"> + <property name="visible">True</property> + <property name="spacing">12</property> + <child> + <object class="GtkHBox" id="box2"> + <property name="visible">True</property> + <property name="homogeneous">True</property> + <child> + <object class="GtkVBox" id="box3"> + <property name="visible">True</property> + <property name="spacing">6</property> + <child> + <object class="GtkLabel" id="label1"> + <property name="visible">True</property> + <property name="label" translatable="yes"><b>Scale X</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="expand">False</property> + <property name="position">0</property> + </packing> + </child> + <child> + <object class="GtkVScale" id="scale_x_scale"> + <property name="visible">True</property> + <property name="can_focus">True</property> + <property name="adjustment">scale_x_adjustment</property> + <property name="fill_level">32</property> + <property name="value_pos">right</property> + </object> + <packing> + <property name="position">1</property> + </packing> + </child> + </object> + <packing> + <property name="expand">False</property> + <property name="position">0</property> + </packing> + </child> + <child> + <object class="GtkVBox" id="box4"> + <property name="visible">True</property> + <property name="spacing">6</property> + <child> + <object class="GtkLabel" id="label2"> + <property name="visible">True</property> + <property name="label" translatable="yes"><b>Scale Y</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="expand">False</property> + <property name="position">0</property> + </packing> + </child> + <child> + <object class="GtkVScale" id="scale_y_scale"> + <property name="visible">True</property> + <property name="can_focus">True</property> + <property name="adjustment">scale_y_adjustment</property> + <property name="fill_level">32</property> + <property name="value_pos">right</property> + </object> + <packing> + <property name="position">1</property> + </packing> + </child> + </object> + <packing> + <property name="expand">False</property> + <property name="position">1</property> + </packing> + </child> + <child> + <object class="GtkVBox" id="box5"> + <property name="visible">True</property> + <property name="spacing">6</property> + <child> + <object class="GtkLabel" id="label3"> + <property name="visible">True</property> + <property name="label" translatable="yes"><b>Rotate</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="expand">False</property> + <property name="position">0</property> + </packing> + </child> + <child> + <object class="GtkVScale" id="rotate_scale"> + <property name="visible">True</property> + <property name="can_focus">True</property> + <property name="adjustment">rotate_adjustment</property> + <property name="fill_level">180</property> + <property name="value_pos">right</property> + </object> + <packing> + <property name="position">1</property> + </packing> + </child> + </object> + <packing> + <property name="expand">False</property> + <property name="position">2</property> + </packing> + </child> + </object> + <packing> + <property name="padding">6</property> + <property name="position">0</property> + </packing> + </child> + <child> + <object class="GtkVBox" id="box6"> + <property name="visible">True</property> + <child> + <object class="GtkCheckButton" + id="lock_checkbutton"> + <property name="label" translatable="yes">Lock X and Y Dimensions</property> + <property name="xalign">0.0</property> + </object> + <packing> + <property name="expand">False</property> + <property name="fill">False</property> + <property name="padding">6</property> + <property name="position">1</property> + </packing> + </child> + <child> + <object class="GtkTable" id="grid1"> + <property name="visible">True</property> + <property name="column_spacing">8</property> + <property name="row_spacing">6</property> + <child> + <object class="GtkLabel" id="label4"> + <property name="visible">True</property> + <property name="xalign">1</property> + <property name="label" translatable="yes"><b>Reconstruct X:</b></property> + <property name="use_markup">True</property> + </object> + </child> + <child> + <object class="GtkLabel" id="label5"> + <property name="visible">True</property> + <property name="xalign">1</property> + <property name="label" translatable="yes"><b>Reconstruct Y:</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="top_attach">1</property> + </packing> + </child> + <child> + <object class="GtkLabel" id="label6"> + <property name="visible">True</property> + <property name="xalign">1</property> + <property name="label" translatable="yes"><b>Sample X:</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="top_attach">2</property> + </packing> + </child> + <child> + <object class="GtkLabel" id="label7"> + <property name="visible">True</property> + <property name="xalign">1</property> + <property name="label" translatable="yes"><b>Sample Y:</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="top_attach">3</property> + </packing> + </child> + <child> + <object class="GtkLabel" id="label8"> + <property name="visible">True</property> + <property name="xalign">1</property> + <property name="label" translatable="yes"><b>Repeat:</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="top_attach">4</property> + </packing> + </child> + <child> + <object class="GtkLabel" id="label9"> + <property name="visible">True</property> + <property name="xalign">1</property> + <property name="label" translatable="yes"><b>Subsample:</b></property> + <property name="use_markup">True</property> + </object> + <packing> + <property name="top_attach">5</property> + </packing> + </child> + <child> + <object class="GtkComboBox" id="reconstruct_x_combo_box"> + <property name="visible">True</property> + </object> + <packing> + <property name="left_attach">1</property> + </packing> + </child> + <child> + <object class="GtkComboBox" id="reconstruct_y_combo_box"> + <property name="visible">True</property> + </object> + <packing> + <property name="left_attach">1</property> + <property name="top_attach">1</property> + </packing> + </child> + <child> + <object class="GtkComboBox" id="sample_x_combo_box"> + <property name="visible">True</property> + </object> + <packing> + <property name="left_attach">1</property> + <property name="top_attach">2</property> + </packing> + </child> + <child> + <object class="GtkComboBox" id="sample_y_combo_box"> + <property name="visible">True</property> + </object> + <packing> + <property name="left_attach">1</property> + <property name="top_attach">3</property> + </packing> + </child> + <child> + <object class="GtkComboBox" id="repeat_combo_box"> + <property name="visible">True</property> + </object> + <packing> + <property name="left_attach">1</property> + <property name="top_attach">4</property> + </packing> + </child> + <child> + <object class="GtkSpinButton" id="subsample_spin_button"> + <property name="visible">True</property> + <property name="adjustment">subsample_adjustment</property> + </object> + <packing> + <property name="left_attach">1</property> + <property name="top_attach">5</property> + </packing> + </child> + </object> + <packing> + <property name="expand">False</property> + <property name="padding">6</property> + <property name="position">1</property> + </packing> + </child> + </object> + <packing> + <property name="expand">False</property> + <property name="position">0</property> + </packing> + </child> + </object> + <packing> + <property name="expand">False</property> + <property name="position">1</property> + </packing> + </child> + </object> + </child> + </object> +</interface> diff --git a/lib/pixman/depcomp b/lib/pixman/depcomp index 0544c6835..e1f51f482 100644 --- a/lib/pixman/depcomp +++ b/lib/pixman/depcomp @@ -74,6 +74,9 @@ tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} rm -f "$tmpdepfile" +# Avoid interferences from the environment. +gccflag= dashmflag= + # Some modes work just like other modes, but use different flags. We # parameterize here, but still list the modes in the big case below, # to make depend.m4 easier to write. Note that we *cannot* use a case @@ -108,7 +111,7 @@ if test "$depmode" = msvc7msys; then fi if test "$depmode" = xlc; then - # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency informations. + # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information. gccflag=-qmakedep=gcc,-MF depmode=gcc fi @@ -142,13 +145,17 @@ gcc3) ;; gcc) +## Note that this doesn't just cater to obsosete pre-3.x GCC compilers. +## but also to in-use compilers like IMB xlc/xlC and the HP C compiler. +## (see the conditional assignment to $gccflag above). ## There are various ways to get dependency output from gcc. Here's ## why we pick this rather obscure method: ## - Don't want to use -MD because we'd like the dependencies to end ## up in a subdir. Having to rename by hand is ugly. ## (We might end up doing this anyway to support other compilers.) ## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like -## -MM, not -M (despite what the docs say). +## -MM, not -M (despite what the docs say). Also, it might not be +## supported by the other compilers which use the 'gcc' depmode. ## - Using -M directly means running the compiler twice (even worse ## than renaming). if test -z "$gccflag"; then diff --git a/lib/pixman/pixman/Makefile.am b/lib/pixman/pixman/Makefile.am index cbed0855a..b9ea75424 100644 --- a/lib/pixman/pixman/Makefile.am +++ b/lib/pixman/pixman/Makefile.am @@ -58,7 +58,9 @@ noinst_LTLIBRARIES += libpixman-arm-simd.la libpixman_arm_simd_la_SOURCES = \ pixman-arm-simd.c \ pixman-arm-common.h \ - pixman-arm-simd-asm.S + pixman-arm-simd-asm.S \ + pixman-arm-simd-asm-scaled.S \ + pixman-arm-simd-asm.h libpixman_1_la_LIBADD += libpixman-arm-simd.la ASM_CFLAGS_arm_simd= @@ -85,7 +87,7 @@ noinst_LTLIBRARIES += libpixman-iwmmxt.la libpixman_1_la_LIBADD += libpixman-iwmmxt.la libpixman_iwmmxt_la-pixman-mmx.lo: pixman-mmx.c - $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $(IWMMXT_CFLAGS) -MT libpixman_iwmmxt_la-pixman-mmx.lo -MD -MP -MF $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo -c -o libpixman_iwmmxt_la-pixman-mmx.lo `test -f 'pixman-mmx.c' || echo '$(srcdir)/'`pixman-mmx.c + $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(AM_CPPFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $(IWMMXT_CFLAGS) -MT libpixman_iwmmxt_la-pixman-mmx.lo -MD -MP -MF $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo -c -o libpixman_iwmmxt_la-pixman-mmx.lo `test -f 'pixman-mmx.c' || echo '$(srcdir)/'`pixman-mmx.c $(AM_V_at)$(am__mv) $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Plo libpixman_iwmmxt_la_DEPENDENCIES = $(am__DEPENDENCIES_1) diff --git a/lib/pixman/pixman/Makefile.in b/lib/pixman/pixman/Makefile.in index 14747fd53..ec79ebf6d 100644 --- a/lib/pixman/pixman/Makefile.in +++ b/lib/pixman/pixman/Makefile.in @@ -1,4 +1,4 @@ -# Makefile.in generated by automake 1.12.3 from Makefile.am. +# Makefile.in generated by automake 1.12.6 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2012 Free Software Foundation, Inc. @@ -132,14 +132,14 @@ libpixman_1_la_DEPENDENCIES = $(am__append_3) $(am__append_5) \ am__objects_1 = pixman.lo pixman-access.lo pixman-access-accessors.lo \ pixman-bits-image.lo pixman-combine32.lo \ pixman-combine-float.lo pixman-conical-gradient.lo \ - pixman-x86.lo pixman-mips.lo pixman-arm.lo pixman-ppc.lo \ - pixman-edge.lo pixman-edge-accessors.lo pixman-fast-path.lo \ - pixman-glyph.lo pixman-general.lo pixman-gradient-walker.lo \ - pixman-image.lo pixman-implementation.lo \ - pixman-linear-gradient.lo pixman-matrix.lo pixman-noop.lo \ - pixman-radial-gradient.lo pixman-region16.lo \ - pixman-region32.lo pixman-solid-fill.lo pixman-timer.lo \ - pixman-trap.lo pixman-utils.lo + pixman-filter.lo pixman-x86.lo pixman-mips.lo pixman-arm.lo \ + pixman-ppc.lo pixman-edge.lo pixman-edge-accessors.lo \ + pixman-fast-path.lo pixman-glyph.lo pixman-general.lo \ + pixman-gradient-walker.lo pixman-image.lo \ + pixman-implementation.lo pixman-linear-gradient.lo \ + pixman-matrix.lo pixman-noop.lo pixman-radial-gradient.lo \ + pixman-region16.lo pixman-region32.lo pixman-solid-fill.lo \ + pixman-timer.lo pixman-trap.lo pixman-utils.lo am__objects_2 = am_libpixman_1_la_OBJECTS = $(am__objects_1) $(am__objects_2) libpixman_1_la_OBJECTS = $(am_libpixman_1_la_OBJECTS) @@ -162,9 +162,11 @@ libpixman_arm_neon_la_OBJECTS = $(am_libpixman_arm_neon_la_OBJECTS) @USE_ARM_NEON_TRUE@am_libpixman_arm_neon_la_rpath = libpixman_arm_simd_la_LIBADD = am__libpixman_arm_simd_la_SOURCES_DIST = pixman-arm-simd.c \ - pixman-arm-common.h pixman-arm-simd-asm.S + pixman-arm-common.h pixman-arm-simd-asm.S \ + pixman-arm-simd-asm-scaled.S pixman-arm-simd-asm.h @USE_ARM_SIMD_TRUE@am_libpixman_arm_simd_la_OBJECTS = \ -@USE_ARM_SIMD_TRUE@ pixman-arm-simd.lo pixman-arm-simd-asm.lo +@USE_ARM_SIMD_TRUE@ pixman-arm-simd.lo pixman-arm-simd-asm.lo \ +@USE_ARM_SIMD_TRUE@ pixman-arm-simd-asm-scaled.lo libpixman_arm_simd_la_OBJECTS = $(am_libpixman_arm_simd_la_OBJECTS) @USE_ARM_SIMD_TRUE@am_libpixman_arm_simd_la_rpath = libpixman_iwmmxt_la_LIBADD = @@ -366,6 +368,8 @@ PIXMAN_VERSION_MAJOR = @PIXMAN_VERSION_MAJOR@ PIXMAN_VERSION_MICRO = @PIXMAN_VERSION_MICRO@ PIXMAN_VERSION_MINOR = @PIXMAN_VERSION_MINOR@ PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ PNG_CFLAGS = @PNG_CFLAGS@ PNG_LIBS = @PNG_LIBS@ PTHREAD_LDFLAGS = @PTHREAD_LDFLAGS@ @@ -442,6 +446,7 @@ libpixman_sources = \ pixman-combine32.c \ pixman-combine-float.c \ pixman-conical-gradient.c \ + pixman-filter.c \ pixman-x86.c \ pixman-mips.c \ pixman-arm.c \ @@ -515,7 +520,9 @@ EXTRA_DIST = \ @USE_ARM_SIMD_TRUE@libpixman_arm_simd_la_SOURCES = \ @USE_ARM_SIMD_TRUE@ pixman-arm-simd.c \ @USE_ARM_SIMD_TRUE@ pixman-arm-common.h \ -@USE_ARM_SIMD_TRUE@ pixman-arm-simd-asm.S +@USE_ARM_SIMD_TRUE@ pixman-arm-simd-asm.S \ +@USE_ARM_SIMD_TRUE@ pixman-arm-simd-asm-scaled.S \ +@USE_ARM_SIMD_TRUE@ pixman-arm-simd-asm.h @USE_ARM_SIMD_TRUE@ASM_CFLAGS_arm_simd = @USE_ARM_NEON_TRUE@libpixman_arm_neon_la_SOURCES = \ @@ -661,6 +668,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-arm-neon-asm-bilinear.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-arm-neon-asm.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-arm-neon.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-arm-simd-asm-scaled.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-arm-simd-asm.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-arm-simd.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-arm.Plo@am__quote@ @@ -671,6 +679,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-edge-accessors.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-edge.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-fast-path.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-filter.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-general.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-glyph.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixman-gradient-walker.Plo@am__quote@ @@ -1017,7 +1026,7 @@ uninstall-am: uninstall-libLTLIBRARIES \ @USE_ARM_IWMMXT_TRUE@libpixman_iwmmxt_la-pixman-mmx.lo: pixman-mmx.c -@USE_ARM_IWMMXT_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $(IWMMXT_CFLAGS) -MT libpixman_iwmmxt_la-pixman-mmx.lo -MD -MP -MF $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo -c -o libpixman_iwmmxt_la-pixman-mmx.lo `test -f 'pixman-mmx.c' || echo '$(srcdir)/'`pixman-mmx.c +@USE_ARM_IWMMXT_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(AM_CPPFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $(IWMMXT_CFLAGS) -MT libpixman_iwmmxt_la-pixman-mmx.lo -MD -MP -MF $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo -c -o libpixman_iwmmxt_la-pixman-mmx.lo `test -f 'pixman-mmx.c' || echo '$(srcdir)/'`pixman-mmx.c @USE_ARM_IWMMXT_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Tpo $(DEPDIR)/libpixman_iwmmxt_la-pixman-mmx.Plo @USE_ARM_IWMMXT_TRUE@libpixman-iwmmxt.la: libpixman_iwmmxt_la-pixman-mmx.lo $(libpixman_iwmmxt_la_DEPENDENCIES) diff --git a/lib/pixman/pixman/Makefile.sources b/lib/pixman/pixman/Makefile.sources index 5351fb03d..c624eb9a8 100644 --- a/lib/pixman/pixman/Makefile.sources +++ b/lib/pixman/pixman/Makefile.sources @@ -6,6 +6,7 @@ libpixman_sources = \ pixman-combine32.c \ pixman-combine-float.c \ pixman-conical-gradient.c \ + pixman-filter.c \ pixman-x86.c \ pixman-mips.c \ pixman-arm.c \ diff --git a/lib/pixman/pixman/pixman-arm-common.h b/lib/pixman/pixman/pixman-arm-common.h index fa436ad77..3a7cb2bef 100644 --- a/lib/pixman/pixman/pixman-arm-common.h +++ b/lib/pixman/pixman/pixman-arm-common.h @@ -1,5 +1,5 @@ /* - * Copyright © 2010 Nokia Corporation + * Copyright © 2010 Nokia Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/lib/pixman/pixman/pixman-arm-neon-asm.S b/lib/pixman/pixman/pixman-arm-neon-asm.S index 42f1f57ac..187197dc3 100644 --- a/lib/pixman/pixman/pixman-arm-neon-asm.S +++ b/lib/pixman/pixman/pixman-arm-neon-asm.S @@ -1,5 +1,5 @@ /* - * Copyright © 2009 Nokia Corporation + * Copyright © 2009 Nokia Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/lib/pixman/pixman/pixman-arm-neon-asm.h b/lib/pixman/pixman/pixman-arm-neon-asm.h index a4d60b47e..d0d92d74c 100644 --- a/lib/pixman/pixman/pixman-arm-neon-asm.h +++ b/lib/pixman/pixman/pixman-arm-neon-asm.h @@ -1,5 +1,5 @@ /* - * Copyright © 2009 Nokia Corporation + * Copyright © 2009 Nokia Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -385,7 +385,7 @@ * execute simultaneously with NEON and be completely shadowed by it. Thus * we get no performance overhead at all (*). This looks like a very nice * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher, - * but still can implement some rather advanced prefetch logic in sofware + * but still can implement some rather advanced prefetch logic in software * for almost zero cost! * * (*) The overhead of the prefetcher is visible when running some trivial diff --git a/lib/pixman/pixman/pixman-arm-simd-asm-scaled.S b/lib/pixman/pixman/pixman-arm-simd-asm-scaled.S new file mode 100644 index 000000000..711099548 --- /dev/null +++ b/lib/pixman/pixman/pixman-arm-simd-asm-scaled.S @@ -0,0 +1,165 @@ +/* + * Copyright © 2008 Mozilla Corporation + * Copyright © 2010 Nokia Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Mozilla Corporation not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Mozilla Corporation makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Jeff Muizelaar (jeff@infidigm.net) + * + */ + +/* Prevent the stack from becoming executable */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + + .text + .arch armv6 + .object_arch armv4 + .arm + .altmacro + .p2align 2 + +/* Supplementary macro for setting function attributes */ +.macro pixman_asm_function fname + .func fname + .global fname +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif +fname: +.endm + +/* + * Note: This code is only using armv5te instructions (not even armv6), + * but is scheduled for ARM Cortex-A8 pipeline. So it might need to + * be split into a few variants, tuned for each microarchitecture. + * + * TODO: In order to get good performance on ARM9/ARM11 cores (which don't + * have efficient write combining), it needs to be changed to use 16-byte + * aligned writes using STM instruction. + * + * Nearest scanline scaler macro template uses the following arguments: + * fname - name of the function to generate + * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes + * t - type suffix for LDR/STR instructions + * prefetch_distance - prefetch in the source image by that many + * pixels ahead + * prefetch_braking_distance - stop prefetching when that many pixels are + * remaining before the end of scanline + */ + +.macro generate_nearest_scanline_func fname, bpp_shift, t, \ + prefetch_distance, \ + prefetch_braking_distance + +pixman_asm_function fname + W .req r0 + DST .req r1 + SRC .req r2 + VX .req r3 + UNIT_X .req ip + TMP1 .req r4 + TMP2 .req r5 + VXMASK .req r6 + PF_OFFS .req r7 + SRC_WIDTH_FIXED .req r8 + + ldr UNIT_X, [sp] + push {r4, r5, r6, r7, r8, r10} + mvn VXMASK, #((1 << bpp_shift) - 1) + ldr SRC_WIDTH_FIXED, [sp, #28] + + /* define helper macro */ + .macro scale_2_pixels + ldr&t TMP1, [SRC, TMP1] + and TMP2, VXMASK, VX, asr #(16 - bpp_shift) + adds VX, VX, UNIT_X + str&t TMP1, [DST], #(1 << bpp_shift) +9: subpls VX, VX, SRC_WIDTH_FIXED + bpl 9b + + ldr&t TMP2, [SRC, TMP2] + and TMP1, VXMASK, VX, asr #(16 - bpp_shift) + adds VX, VX, UNIT_X + str&t TMP2, [DST], #(1 << bpp_shift) +9: subpls VX, VX, SRC_WIDTH_FIXED + bpl 9b + .endm + + /* now do the scaling */ + and TMP1, VXMASK, VX, asr #(16 - bpp_shift) + adds VX, VX, UNIT_X +9: subpls VX, VX, SRC_WIDTH_FIXED + bpl 9b + subs W, W, #(8 + prefetch_braking_distance) + blt 2f + /* calculate prefetch offset */ + mov PF_OFFS, #prefetch_distance + mla PF_OFFS, UNIT_X, PF_OFFS, VX +1: /* main loop, process 8 pixels per iteration with prefetch */ + pld [SRC, PF_OFFS, asr #(16 - bpp_shift)] + add PF_OFFS, UNIT_X, lsl #3 + scale_2_pixels + scale_2_pixels + scale_2_pixels + scale_2_pixels + subs W, W, #8 + bge 1b +2: + subs W, W, #(4 - 8 - prefetch_braking_distance) + blt 2f +1: /* process the remaining pixels */ + scale_2_pixels + scale_2_pixels + subs W, W, #4 + bge 1b +2: + tst W, #2 + beq 2f + scale_2_pixels +2: + tst W, #1 + ldrne&t TMP1, [SRC, TMP1] + strne&t TMP1, [DST] + /* cleanup helper macro */ + .purgem scale_2_pixels + .unreq DST + .unreq SRC + .unreq W + .unreq VX + .unreq UNIT_X + .unreq TMP1 + .unreq TMP2 + .unreq VXMASK + .unreq PF_OFFS + .unreq SRC_WIDTH_FIXED + /* return */ + pop {r4, r5, r6, r7, r8, r10} + bx lr +.endfunc +.endm + +generate_nearest_scanline_func \ + pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32 + +generate_nearest_scanline_func \ + pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32 diff --git a/lib/pixman/pixman/pixman-arm-simd-asm.S b/lib/pixman/pixman/pixman-arm-simd-asm.S index d7cf0f5d4..c20968879 100644 --- a/lib/pixman/pixman/pixman-arm-simd-asm.S +++ b/lib/pixman/pixman/pixman-arm-simd-asm.S @@ -1,14 +1,14 @@ /* - * Copyright © 2008 Mozilla Corporation - * Copyright © 2010 Nokia Corporation + * Copyright © 2012 Raspberry Pi Foundation + * Copyright © 2012 RISC OS Open Ltd * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Mozilla Corporation not be used in + * documentation, and that the name of the copyright holders not be used in * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Mozilla Corporation makes no + * specific, written prior permission. The copyright holders make no * representations about the suitability of this software for any purpose. It * is provided "as is" without express or implied warranty. * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS * SOFTWARE. * - * Author: Jeff Muizelaar (jeff@infidigm.net) + * Author: Ben Avison (bavison@riscosopen.org) * */ @@ -37,412 +37,577 @@ .altmacro .p2align 2 -/* Supplementary macro for setting function attributes */ -.macro pixman_asm_function fname - .func fname - .global fname -#ifdef __ELF__ - .hidden fname - .type fname, %function -#endif -fname: -.endm +#include "pixman-arm-simd-asm.h" -/* - * The code below was generated by gcc 4.3.4 from the commented out - * functions in 'pixman-arm-simd.c' file with the following optimization - * options: "-O3 -mcpu=arm1136jf-s -fomit-frame-pointer" - * - * TODO: replace gcc generated code with hand tuned versions because - * the code quality is not very good, introduce symbolic register - * aliases for better readability and maintainability. +/* A head macro should do all processing which results in an output of up to + * 16 bytes, as far as the final load instruction. The corresponding tail macro + * should complete the processing of the up-to-16 bytes. The calling macro will + * sometimes choose to insert a preload or a decrement of X between them. + * cond ARM condition code for code block + * numbytes Number of output bytes that should be generated this time + * firstreg First WK register in which to place output + * unaligned_src Whether to use non-wordaligned loads of source image + * unaligned_mask Whether to use non-wordaligned loads of mask image + * preload If outputting 16 bytes causes 64 bytes to be read, whether an extra preload should be output */ -pixman_asm_function pixman_composite_add_8_8_asm_armv6 - push {r4, r5, r6, r7, r8, r9, r10, r11} - mov r10, r1 - sub sp, sp, #4 - subs r10, r10, #1 - mov r11, r0 - mov r8, r2 - str r3, [sp] - ldr r7, [sp, #36] - bcc 0f -6: cmp r11, #0 - beq 1f - orr r3, r8, r7 - tst r3, #3 - beq 2f - mov r1, r8 - mov r0, r7 - mov r12, r11 - b 3f -5: tst r3, #3 - beq 4f -3: ldrb r2, [r0], #1 - subs r12, r12, #1 - ldrb r3, [r1] - uqadd8 r3, r2, r3 - strb r3, [r1], #1 - orr r3, r1, r0 - bne 5b -1: ldr r3, [sp] - add r8, r8, r3 - ldr r3, [sp, #40] - add r7, r7, r3 -10: subs r10, r10, #1 - bcs 6b -0: add sp, sp, #4 - pop {r4, r5, r6, r7, r8, r9, r10, r11} - bx lr -2: mov r12, r11 - mov r1, r8 - mov r0, r7 -4: cmp r12, #3 - subgt r6, r12, #4 - movgt r9, r12 - lsrgt r5, r6, #2 - addgt r3, r5, #1 - movgt r12, #0 - lslgt r4, r3, #2 - ble 7f -8: ldr r3, [r0, r12] - ldr r2, [r1, r12] - uqadd8 r3, r3, r2 - str r3, [r1, r12] - add r12, r12, #4 - cmp r12, r4 - bne 8b - sub r3, r9, #4 - bic r3, r3, #3 - add r3, r3, #4 - subs r12, r6, r5, lsl #2 - add r1, r1, r3 - add r0, r0, r3 - beq 1b -7: mov r4, #0 -9: ldrb r3, [r1, r4] - ldrb r2, [r0, r4] - uqadd8 r3, r2, r3 - strb r3, [r1, r4] - add r4, r4, #1 - cmp r4, r12 - bne 9b - ldr r3, [sp] - add r8, r8, r3 - ldr r3, [sp, #40] - add r7, r7, r3 - b 10b -.endfunc - -pixman_asm_function pixman_composite_over_8888_8888_asm_armv6 - push {r4, r5, r6, r7, r8, r9, r10, r11} - sub sp, sp, #20 - cmp r1, #0 - mov r12, r2 - str r1, [sp, #12] - str r0, [sp, #16] - ldr r2, [sp, #52] - beq 0f - lsl r3, r3, #2 - str r3, [sp] - ldr r3, [sp, #56] - mov r10, #0 - lsl r3, r3, #2 - str r3, [sp, #8] - mov r11, r3 - b 1f -6: ldr r11, [sp, #8] -1: ldr r9, [sp] - mov r0, r12 - add r12, r12, r9 - mov r1, r2 - str r12, [sp, #4] - add r2, r2, r11 - ldr r12, [sp, #16] - ldr r3, =0x00800080 - ldr r9, =0xff00ff00 - mov r11, #255 - cmp r12, #0 - beq 4f -5: ldr r5, [r1], #4 - ldr r4, [r0] - sub r8, r11, r5, lsr #24 - uxtb16 r6, r4 - uxtb16 r7, r4, ror #8 - mla r6, r6, r8, r3 - mla r7, r7, r8, r3 - uxtab16 r6, r6, r6, ror #8 - uxtab16 r7, r7, r7, ror #8 - and r7, r7, r9 - uxtab16 r6, r7, r6, ror #8 - uqadd8 r5, r6, r5 - str r5, [r0], #4 - subs r12, r12, #1 - bne 5b -4: ldr r3, [sp, #12] - add r10, r10, #1 - cmp r10, r3 - ldr r12, [sp, #4] - bne 6b -0: add sp, sp, #20 - pop {r4, r5, r6, r7, r8, r9, r10, r11} - bx lr -.endfunc - -pixman_asm_function pixman_composite_over_8888_n_8888_asm_armv6 - push {r4, r5, r6, r7, r8, r9, r10, r11} - sub sp, sp, #28 - cmp r1, #0 - str r1, [sp, #12] - ldrb r1, [sp, #71] - mov r12, r2 - str r0, [sp, #16] - ldr r2, [sp, #60] - str r1, [sp, #24] - beq 0f - lsl r3, r3, #2 - str r3, [sp, #20] - ldr r3, [sp, #64] - mov r10, #0 - lsl r3, r3, #2 - str r3, [sp, #8] - mov r11, r3 - b 1f -5: ldr r11, [sp, #8] -1: ldr r4, [sp, #20] - mov r0, r12 - mov r1, r2 - add r12, r12, r4 - add r2, r2, r11 - str r12, [sp] - str r2, [sp, #4] - ldr r12, [sp, #16] - ldr r2, =0x00800080 - ldr r3, [sp, #24] - mov r11, #255 - cmp r12, #0 - beq 3f -4: ldr r5, [r1], #4 - ldr r4, [r0] - uxtb16 r6, r5 - uxtb16 r7, r5, ror #8 - mla r6, r6, r3, r2 - mla r7, r7, r3, r2 - uxtab16 r6, r6, r6, ror #8 - uxtab16 r7, r7, r7, ror #8 - uxtb16 r6, r6, ror #8 - uxtb16 r7, r7, ror #8 - orr r5, r6, r7, lsl #8 - uxtb16 r6, r4 - uxtb16 r7, r4, ror #8 - sub r8, r11, r5, lsr #24 - mla r6, r6, r8, r2 - mla r7, r7, r8, r2 - uxtab16 r6, r6, r6, ror #8 - uxtab16 r7, r7, r7, ror #8 - uxtb16 r6, r6, ror #8 - uxtb16 r7, r7, ror #8 - orr r6, r6, r7, lsl #8 - uqadd8 r5, r6, r5 - str r5, [r0], #4 - subs r12, r12, #1 - bne 4b -3: ldr r1, [sp, #12] - add r10, r10, #1 - cmp r10, r1 - ldr r12, [sp] - ldr r2, [sp, #4] - bne 5b -0: add sp, sp, #28 - pop {r4, r5, r6, r7, r8, r9, r10, r11} - bx lr -.endfunc - -pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6 - push {r4, r5, r6, r7, r8, r9, r10, r11} - sub sp, sp, #28 - cmp r1, #0 - ldr r9, [sp, #60] - str r1, [sp, #12] - bic r1, r9, #-16777216 - str r1, [sp, #20] - mov r12, r2 - lsr r1, r9, #8 - ldr r2, [sp, #20] - bic r1, r1, #-16777216 - bic r2, r2, #65280 - bic r1, r1, #65280 - str r2, [sp, #20] - str r0, [sp, #16] - str r1, [sp, #4] - ldr r2, [sp, #68] - beq 0f - lsl r3, r3, #2 - str r3, [sp, #24] - mov r0, #0 - b 1f -5: ldr r3, [sp, #24] -1: ldr r4, [sp, #72] - mov r10, r12 - mov r1, r2 - add r12, r12, r3 - add r2, r2, r4 - str r12, [sp, #8] - str r2, [sp] - ldr r12, [sp, #16] - ldr r11, =0x00800080 - ldr r2, [sp, #4] - ldr r3, [sp, #20] - cmp r12, #0 - beq 3f -4: ldrb r5, [r1], #1 - ldr r4, [r10] - mla r6, r3, r5, r11 - mla r7, r2, r5, r11 - uxtab16 r6, r6, r6, ror #8 - uxtab16 r7, r7, r7, ror #8 - uxtb16 r6, r6, ror #8 - uxtb16 r7, r7, ror #8 - orr r5, r6, r7, lsl #8 - uxtb16 r6, r4 - uxtb16 r7, r4, ror #8 - mvn r8, r5 - lsr r8, r8, #24 - mla r6, r6, r8, r11 - mla r7, r7, r8, r11 - uxtab16 r6, r6, r6, ror #8 - uxtab16 r7, r7, r7, ror #8 - uxtb16 r6, r6, ror #8 - uxtb16 r7, r7, ror #8 - orr r6, r6, r7, lsl #8 - uqadd8 r5, r6, r5 - str r5, [r10], #4 - subs r12, r12, #1 - bne 4b -3: ldr r4, [sp, #12] - add r0, r0, #1 - cmp r0, r4 - ldr r12, [sp, #8] - ldr r2, [sp] - bne 5b -0: add sp, sp, #28 - pop {r4, r5, r6, r7, r8, r9, r10, r11} - bx lr -.endfunc +.macro blit_init + line_saved_regs STRIDE_D, STRIDE_S +.endm -/* - * Note: This code is only using armv5te instructions (not even armv6), - * but is scheduled for ARM Cortex-A8 pipeline. So it might need to - * be split into a few variants, tuned for each microarchitecture. - * - * TODO: In order to get good performance on ARM9/ARM11 cores (which don't - * have efficient write combining), it needs to be changed to use 16-byte - * aligned writes using STM instruction. - * - * Nearest scanline scaler macro template uses the following arguments: - * fname - name of the function to generate - * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes - * t - type suffix for LDR/STR instructions - * prefetch_distance - prefetch in the source image by that many - * pixels ahead - * prefetch_braking_distance - stop prefetching when that many pixels are - * remaining before the end of scanline +.macro blit_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + pixld cond, numbytes, firstreg, SRC, unaligned_src +.endm + +.macro blit_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment + WK4 .req STRIDE_D + WK5 .req STRIDE_S + WK6 .req MASK + WK7 .req STRIDE_M +110: pixld , 16, 0, SRC, unaligned_src + pixld , 16, 4, SRC, unaligned_src + pld [SRC, SCRATCH] + pixst , 16, 0, DST + pixst , 16, 4, DST + subs X, X, #32*8/src_bpp + bhs 110b + .unreq WK4 + .unreq WK5 + .unreq WK6 + .unreq WK7 +.endm + +generate_composite_function \ + pixman_composite_src_8888_8888_asm_armv6, 32, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \ + 4, /* prefetch distance */ \ + blit_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + blit_process_head, \ + nop_macro, /* process tail */ \ + blit_inner_loop + +generate_composite_function \ + pixman_composite_src_0565_0565_asm_armv6, 16, 0, 16, \ + FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \ + 4, /* prefetch distance */ \ + blit_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + blit_process_head, \ + nop_macro, /* process tail */ \ + blit_inner_loop + +generate_composite_function \ + pixman_composite_src_8_8_asm_armv6, 8, 0, 8, \ + FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_SPILL_LINE_VARS_WIDE | FLAG_PROCESS_PRESERVES_SCRATCH, \ + 3, /* prefetch distance */ \ + blit_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + blit_process_head, \ + nop_macro, /* process tail */ \ + blit_inner_loop + +/******************************************************************************/ + +.macro src_n_8888_init + ldr SRC, [sp, #ARGS_STACK_OFFSET] + mov STRIDE_S, SRC + mov MASK, SRC + mov STRIDE_M, SRC +.endm + +.macro src_n_0565_init + ldrh SRC, [sp, #ARGS_STACK_OFFSET] + orr SRC, SRC, lsl #16 + mov STRIDE_S, SRC + mov MASK, SRC + mov STRIDE_M, SRC +.endm + +.macro src_n_8_init + ldrb SRC, [sp, #ARGS_STACK_OFFSET] + orr SRC, SRC, lsl #8 + orr SRC, SRC, lsl #16 + mov STRIDE_S, SRC + mov MASK, SRC + mov STRIDE_M, SRC +.endm + +.macro fill_process_tail cond, numbytes, firstreg + WK4 .req SRC + WK5 .req STRIDE_S + WK6 .req MASK + WK7 .req STRIDE_M + pixst cond, numbytes, 4, DST + .unreq WK4 + .unreq WK5 + .unreq WK6 + .unreq WK7 +.endm + +generate_composite_function \ + pixman_composite_src_n_8888_asm_armv6, 0, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \ + 0, /* prefetch distance doesn't apply */ \ + src_n_8888_init \ + nop_macro, /* newline */ \ + nop_macro /* cleanup */ \ + nop_macro /* process head */ \ + fill_process_tail + +generate_composite_function \ + pixman_composite_src_n_0565_asm_armv6, 0, 0, 16, \ + FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \ + 0, /* prefetch distance doesn't apply */ \ + src_n_0565_init \ + nop_macro, /* newline */ \ + nop_macro /* cleanup */ \ + nop_macro /* process head */ \ + fill_process_tail + +generate_composite_function \ + pixman_composite_src_n_8_asm_armv6, 0, 0, 8, \ + FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_PSR | FLAG_PROCESS_DOES_STORE | FLAG_PROCESS_PRESERVES_SCRATCH \ + 0, /* prefetch distance doesn't apply */ \ + src_n_8_init \ + nop_macro, /* newline */ \ + nop_macro /* cleanup */ \ + nop_macro /* process head */ \ + fill_process_tail + +/******************************************************************************/ + +.macro src_x888_8888_pixel, cond, reg + orr&cond WK®, WK®, #0xFF000000 +.endm + +.macro pixman_composite_src_x888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + pixld cond, numbytes, firstreg, SRC, unaligned_src +.endm + +.macro pixman_composite_src_x888_8888_process_tail cond, numbytes, firstreg + src_x888_8888_pixel cond, %(firstreg+0) + .if numbytes >= 8 + src_x888_8888_pixel cond, %(firstreg+1) + .if numbytes == 16 + src_x888_8888_pixel cond, %(firstreg+2) + src_x888_8888_pixel cond, %(firstreg+3) + .endif + .endif +.endm + +generate_composite_function \ + pixman_composite_src_x888_8888_asm_armv6, 32, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_SCRATCH, \ + 3, /* prefetch distance */ \ + nop_macro, /* init */ \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + pixman_composite_src_x888_8888_process_head, \ + pixman_composite_src_x888_8888_process_tail + +/******************************************************************************/ + +.macro src_0565_8888_init + /* Hold loop invariants in MASK and STRIDE_M */ + ldr MASK, =0x07E007E0 + mov STRIDE_M, #0xFF000000 + /* Set GE[3:0] to 1010 so SEL instructions do what we want */ + ldr SCRATCH, =0x80008000 + uadd8 SCRATCH, SCRATCH, SCRATCH +.endm + +.macro src_0565_8888_2pixels, reg1, reg2 + and SCRATCH, WK®1, MASK @ 00000GGGGGG0000000000gggggg00000 + bic WK®2, WK®1, MASK @ RRRRR000000BBBBBrrrrr000000bbbbb + orr SCRATCH, SCRATCH, SCRATCH, lsr #6 @ 00000GGGGGGGGGGGG0000ggggggggggg + mov WK®1, WK®2, lsl #16 @ rrrrr000000bbbbb0000000000000000 + mov SCRATCH, SCRATCH, ror #19 @ GGGG0000ggggggggggg00000GGGGGGGG + bic WK®2, WK®2, WK®1, lsr #16 @ RRRRR000000BBBBB0000000000000000 + orr WK®1, WK®1, WK®1, lsr #5 @ rrrrrrrrrr0bbbbbbbbbb00000000000 + orr WK®2, WK®2, WK®2, lsr #5 @ RRRRRRRRRR0BBBBBBBBBB00000000000 + pkhtb WK®1, WK®1, WK®1, asr #5 @ rrrrrrrr--------bbbbbbbb-------- + sel WK®1, WK®1, SCRATCH @ rrrrrrrrggggggggbbbbbbbb-------- + mov SCRATCH, SCRATCH, ror #16 @ ggg00000GGGGGGGGGGGG0000gggggggg + pkhtb WK®2, WK®2, WK®2, asr #5 @ RRRRRRRR--------BBBBBBBB-------- + sel WK®2, WK®2, SCRATCH @ RRRRRRRRGGGGGGGGBBBBBBBB-------- + orr WK®1, STRIDE_M, WK®1, lsr #8 @ 11111111rrrrrrrrggggggggbbbbbbbb + orr WK®2, STRIDE_M, WK®2, lsr #8 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB +.endm + +/* This version doesn't need STRIDE_M, but is one instruction longer. + It would however be preferable for an XRGB target, since we could knock off the last 2 instructions, but is that a common case? + and SCRATCH, WK®1, MASK @ 00000GGGGGG0000000000gggggg00000 + bic WK®1, WK®1, MASK @ RRRRR000000BBBBBrrrrr000000bbbbb + orr SCRATCH, SCRATCH, SCRATCH, lsr #6 @ 00000GGGGGGGGGGGG0000ggggggggggg + mov WK®2, WK®1, lsr #16 @ 0000000000000000RRRRR000000BBBBB + mov SCRATCH, SCRATCH, ror #27 @ GGGGGGGGGGGG0000ggggggggggg00000 + bic WK®1, WK®1, WK®2, lsl #16 @ 0000000000000000rrrrr000000bbbbb + mov WK®2, WK®2, lsl #3 @ 0000000000000RRRRR000000BBBBB000 + mov WK®1, WK®1, lsl #3 @ 0000000000000rrrrr000000bbbbb000 + orr WK®2, WK®2, WK®2, lsr #5 @ 0000000000000RRRRRRRRRR0BBBBBBBB + orr WK®1, WK®1, WK®1, lsr #5 @ 0000000000000rrrrrrrrrr0bbbbbbbb + pkhbt WK®2, WK®2, WK®2, lsl #5 @ --------RRRRRRRR--------BBBBBBBB + pkhbt WK®1, WK®1, WK®1, lsl #5 @ --------rrrrrrrr--------bbbbbbbb + sel WK®2, SCRATCH, WK®2 @ --------RRRRRRRRGGGGGGGGBBBBBBBB + sel WK®1, SCRATCH, WK®1 @ --------rrrrrrrrggggggggbbbbbbbb + orr WK®2, WK®2, #0xFF000000 @ 11111111RRRRRRRRGGGGGGGGBBBBBBBB + orr WK®1, WK®1, #0xFF000000 @ 11111111rrrrrrrrggggggggbbbbbbbb +*/ + +.macro src_0565_8888_1pixel, reg + bic SCRATCH, WK®, MASK @ 0000000000000000rrrrr000000bbbbb + and WK®, WK®, MASK @ 000000000000000000000gggggg00000 + mov SCRATCH, SCRATCH, lsl #3 @ 0000000000000rrrrr000000bbbbb000 + mov WK®, WK®, lsl #5 @ 0000000000000000gggggg0000000000 + orr SCRATCH, SCRATCH, SCRATCH, lsr #5 @ 0000000000000rrrrrrrrrr0bbbbbbbb + orr WK®, WK®, WK®, lsr #6 @ 000000000000000gggggggggggg00000 + pkhbt SCRATCH, SCRATCH, SCRATCH, lsl #5 @ --------rrrrrrrr--------bbbbbbbb + sel WK®, WK®, SCRATCH @ --------rrrrrrrrggggggggbbbbbbbb + orr WK®, WK®, #0xFF000000 @ 11111111rrrrrrrrggggggggbbbbbbbb +.endm + +.macro src_0565_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + .if numbytes == 16 + pixldst ld,, 8, firstreg, %(firstreg+2),,, SRC, unaligned_src + .elseif numbytes == 8 + pixld , 4, firstreg, SRC, unaligned_src + .elseif numbytes == 4 + pixld , 2, firstreg, SRC, unaligned_src + .endif +.endm + +.macro src_0565_8888_process_tail cond, numbytes, firstreg + .if numbytes == 16 + src_0565_8888_2pixels firstreg, %(firstreg+1) + src_0565_8888_2pixels %(firstreg+2), %(firstreg+3) + .elseif numbytes == 8 + src_0565_8888_2pixels firstreg, %(firstreg+1) + .else + src_0565_8888_1pixel firstreg + .endif +.endm + +generate_composite_function \ + pixman_composite_src_0565_8888_asm_armv6, 16, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_BRANCH_OVER, \ + 3, /* prefetch distance */ \ + src_0565_8888_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + src_0565_8888_process_head, \ + src_0565_8888_process_tail + +/******************************************************************************/ + +.macro add_8_8_8pixels cond, dst1, dst2 + uqadd8&cond WK&dst1, WK&dst1, MASK + uqadd8&cond WK&dst2, WK&dst2, STRIDE_M +.endm + +.macro add_8_8_4pixels cond, dst + uqadd8&cond WK&dst, WK&dst, MASK +.endm + +.macro add_8_8_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + WK4 .req MASK + WK5 .req STRIDE_M + .if numbytes == 16 + pixld cond, 8, 4, SRC, unaligned_src + pixld cond, 16, firstreg, DST, 0 + add_8_8_8pixels cond, firstreg, %(firstreg+1) + pixld cond, 8, 4, SRC, unaligned_src + .else + pixld cond, numbytes, 4, SRC, unaligned_src + pixld cond, numbytes, firstreg, DST, 0 + .endif + .unreq WK4 + .unreq WK5 +.endm + +.macro add_8_8_process_tail cond, numbytes, firstreg + .if numbytes == 16 + add_8_8_8pixels cond, %(firstreg+2), %(firstreg+3) + .elseif numbytes == 8 + add_8_8_8pixels cond, firstreg, %(firstreg+1) + .else + add_8_8_4pixels cond, firstreg + .endif +.endm + +generate_composite_function \ + pixman_composite_add_8_8_asm_armv6, 8, 0, 8, \ + FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_PRESERVES_SCRATCH, \ + 2, /* prefetch distance */ \ + nop_macro, /* init */ \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + add_8_8_process_head, \ + add_8_8_process_tail + +/******************************************************************************/ + +.macro over_8888_8888_init + /* Hold loop invariant in MASK */ + ldr MASK, =0x00800080 + /* Set GE[3:0] to 0101 so SEL instructions do what we want */ + uadd8 SCRATCH, MASK, MASK + line_saved_regs STRIDE_D, STRIDE_S, ORIG_W +.endm + +.macro over_8888_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + WK4 .req STRIDE_D + WK5 .req STRIDE_S + WK6 .req STRIDE_M + WK7 .req ORIG_W + pixld , numbytes, %(4+firstreg), SRC, unaligned_src + pixld , numbytes, firstreg, DST, 0 + .unreq WK4 + .unreq WK5 + .unreq WK6 + .unreq WK7 +.endm + +.macro over_8888_8888_check_transparent numbytes, reg0, reg1, reg2, reg3 + /* Since these colours a premultiplied by alpha, only 0 indicates transparent (any other colour with 0 in the alpha byte is luminous) */ + teq WK®0, #0 + .if numbytes > 4 + teqeq WK®1, #0 + .if numbytes > 8 + teqeq WK®2, #0 + teqeq WK®3, #0 + .endif + .endif +.endm + +.macro over_8888_8888_prepare next + mov WK&next, WK&next, lsr #24 +.endm + +.macro over_8888_8888_1pixel src, dst, offset, next + /* src = destination component multiplier */ + rsb WK&src, WK&src, #255 + /* Split even/odd bytes of dst into SCRATCH/dst */ + uxtb16 SCRATCH, WK&dst + uxtb16 WK&dst, WK&dst, ror #8 + /* Multiply through, adding 0.5 to the upper byte of result for rounding */ + mla SCRATCH, SCRATCH, WK&src, MASK + mla WK&dst, WK&dst, WK&src, MASK + /* Where we would have had a stall between the result of the first MLA and the shifter input, + * reload the complete source pixel */ + ldr WK&src, [SRC, #offset] + /* Multiply by 257/256 to approximate 256/255 */ + uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8 + /* In this stall, start processing the next pixel */ + .if offset < -4 + mov WK&next, WK&next, lsr #24 + .endif + uxtab16 WK&dst, WK&dst, WK&dst, ror #8 + /* Recombine even/odd bytes of multiplied destination */ + mov SCRATCH, SCRATCH, ror #8 + sel WK&dst, SCRATCH, WK&dst + /* Saturated add of source to multiplied destination */ + uqadd8 WK&dst, WK&dst, WK&src +.endm + +.macro over_8888_8888_process_tail cond, numbytes, firstreg + WK4 .req STRIDE_D + WK5 .req STRIDE_S + WK6 .req STRIDE_M + WK7 .req ORIG_W + over_8888_8888_check_transparent numbytes, %(4+firstreg), %(5+firstreg), %(6+firstreg), %(7+firstreg) + beq 10f + over_8888_8888_prepare %(4+firstreg) + .set PROCESS_REG, firstreg + .set PROCESS_OFF, -numbytes + .rept numbytes / 4 + over_8888_8888_1pixel %(4+PROCESS_REG), %(0+PROCESS_REG), PROCESS_OFF, %(5+PROCESS_REG) + .set PROCESS_REG, PROCESS_REG+1 + .set PROCESS_OFF, PROCESS_OFF+4 + .endr + pixst , numbytes, firstreg, DST +10: + .unreq WK4 + .unreq WK5 + .unreq WK6 + .unreq WK7 +.endm + +generate_composite_function \ + pixman_composite_over_8888_8888_asm_armv6, 32, 0, 32 \ + FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \ + 2, /* prefetch distance */ \ + over_8888_8888_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + over_8888_8888_process_head, \ + over_8888_8888_process_tail + +/******************************************************************************/ + +/* Multiply each byte of a word by a byte. + * Useful when there aren't any obvious ways to fill the stalls with other instructions. + * word Register containing 4 bytes + * byte Register containing byte multiplier (bits 8-31 must be 0) + * tmp Scratch register + * half Register containing the constant 0x00800080 + * GE[3:0] bits must contain 0101 */ +.macro mul_8888_8 word, byte, tmp, half + /* Split even/odd bytes of word apart */ + uxtb16 tmp, word + uxtb16 word, word, ror #8 + /* Multiply bytes together with rounding, then by 257/256 */ + mla tmp, tmp, byte, half + mla word, word, byte, half /* 1 stall follows */ + uxtab16 tmp, tmp, tmp, ror #8 /* 1 stall follows */ + uxtab16 word, word, word, ror #8 + /* Recombine bytes */ + mov tmp, tmp, ror #8 + sel word, tmp, word +.endm + +/******************************************************************************/ + +.macro over_8888_n_8888_init + /* Mask is constant */ + ldr MASK, [sp, #ARGS_STACK_OFFSET+8] + /* Hold loop invariant in STRIDE_M */ + ldr STRIDE_M, =0x00800080 + /* We only want the alpha bits of the constant mask */ + mov MASK, MASK, lsr #24 + /* Set GE[3:0] to 0101 so SEL instructions do what we want */ + uadd8 SCRATCH, STRIDE_M, STRIDE_M + line_saved_regs Y, STRIDE_D, STRIDE_S, ORIG_W +.endm + +.macro over_8888_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + WK4 .req Y + WK5 .req STRIDE_D + WK6 .req STRIDE_S + WK7 .req ORIG_W + pixld , numbytes, %(4+(firstreg%2)), SRC, unaligned_src + pixld , numbytes, firstreg, DST, 0 + .unreq WK4 + .unreq WK5 + .unreq WK6 + .unreq WK7 +.endm + +.macro over_8888_n_8888_1pixel src, dst + mul_8888_8 WK&src, MASK, SCRATCH, STRIDE_M + sub WK7, WK6, WK&src, lsr #24 + mul_8888_8 WK&dst, WK7, SCRATCH, STRIDE_M + uqadd8 WK&dst, WK&dst, WK&src +.endm + +.macro over_8888_n_8888_process_tail cond, numbytes, firstreg + WK4 .req Y + WK5 .req STRIDE_D + WK6 .req STRIDE_S + WK7 .req ORIG_W + over_8888_8888_check_transparent numbytes, %(4+(firstreg%2)), %(5+(firstreg%2)), %(6+firstreg), %(7+firstreg) + beq 10f + mov WK6, #255 + .set PROCESS_REG, firstreg + .rept numbytes / 4 + .if numbytes == 16 && PROCESS_REG == 2 + /* We're using WK6 and WK7 as temporaries, so half way through + * 4 pixels, reload the second two source pixels but this time + * into WK4 and WK5 */ + ldmdb SRC, {WK4, WK5} + .endif + over_8888_n_8888_1pixel %(4+(PROCESS_REG%2)), %(PROCESS_REG) + .set PROCESS_REG, PROCESS_REG+1 + .endr + pixst , numbytes, firstreg, DST +10: + .unreq WK4 + .unreq WK5 + .unreq WK6 + .unreq WK7 +.endm + +generate_composite_function \ + pixman_composite_over_8888_n_8888_asm_armv6, 32, 0, 32 \ + FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \ + 2, /* prefetch distance */ \ + over_8888_n_8888_init, \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + over_8888_n_8888_process_head, \ + over_8888_n_8888_process_tail + +/******************************************************************************/ + +.macro over_n_8_8888_init + /* Source is constant, but splitting it into even/odd bytes is a loop invariant */ + ldr SRC, [sp, #ARGS_STACK_OFFSET] + /* Not enough registers to hold this constant, but we still use it here to set GE[3:0] */ + ldr SCRATCH, =0x00800080 + uxtb16 STRIDE_S, SRC + uxtb16 SRC, SRC, ror #8 + /* Set GE[3:0] to 0101 so SEL instructions do what we want */ + uadd8 SCRATCH, SCRATCH, SCRATCH + line_saved_regs Y, STRIDE_D, STRIDE_M, ORIG_W +.endm + +.macro over_n_8_8888_newline + ldr STRIDE_D, =0x00800080 + b 1f + .ltorg +1: +.endm + +.macro over_n_8_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + WK4 .req STRIDE_M + pixld , numbytes/4, 4, MASK, unaligned_mask + pixld , numbytes, firstreg, DST, 0 + .unreq WK4 +.endm + +.macro over_n_8_8888_1pixel src, dst + uxtb Y, WK4, ror #src*8 + /* Trailing part of multiplication of source */ + mla SCRATCH, STRIDE_S, Y, STRIDE_D + mla Y, SRC, Y, STRIDE_D + mov ORIG_W, #255 + uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8 + uxtab16 Y, Y, Y, ror #8 + mov SCRATCH, SCRATCH, ror #8 + sub ORIG_W, ORIG_W, Y, lsr #24 + sel Y, SCRATCH, Y + /* Then multiply the destination */ + mul_8888_8 WK&dst, ORIG_W, SCRATCH, STRIDE_D + uqadd8 WK&dst, WK&dst, Y +.endm + +.macro over_n_8_8888_process_tail cond, numbytes, firstreg + WK4 .req STRIDE_M + teq WK4, #0 + beq 10f + .set PROCESS_REG, firstreg + .rept numbytes / 4 + over_n_8_8888_1pixel %(PROCESS_REG-firstreg), %(PROCESS_REG) + .set PROCESS_REG, PROCESS_REG+1 + .endr + pixst , numbytes, firstreg, DST +10: + .unreq WK4 +.endm + +generate_composite_function \ + pixman_composite_over_n_8_8888_asm_armv6, 0, 8, 32 \ + FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS \ + 2, /* prefetch distance */ \ + over_n_8_8888_init, \ + over_n_8_8888_newline, \ + nop_macro, /* cleanup */ \ + over_n_8_8888_process_head, \ + over_n_8_8888_process_tail + +/******************************************************************************/ -.macro generate_nearest_scanline_func fname, bpp_shift, t, \ - prefetch_distance, \ - prefetch_braking_distance - -pixman_asm_function fname - W .req r0 - DST .req r1 - SRC .req r2 - VX .req r3 - UNIT_X .req ip - TMP1 .req r4 - TMP2 .req r5 - VXMASK .req r6 - PF_OFFS .req r7 - SRC_WIDTH_FIXED .req r8 - - ldr UNIT_X, [sp] - push {r4, r5, r6, r7, r8, r10} - mvn VXMASK, #((1 << bpp_shift) - 1) - ldr SRC_WIDTH_FIXED, [sp, #28] - - /* define helper macro */ - .macro scale_2_pixels - ldr&t TMP1, [SRC, TMP1] - and TMP2, VXMASK, VX, asr #(16 - bpp_shift) - adds VX, VX, UNIT_X - str&t TMP1, [DST], #(1 << bpp_shift) -9: subpls VX, VX, SRC_WIDTH_FIXED - bpl 9b - - ldr&t TMP2, [SRC, TMP2] - and TMP1, VXMASK, VX, asr #(16 - bpp_shift) - adds VX, VX, UNIT_X - str&t TMP2, [DST], #(1 << bpp_shift) -9: subpls VX, VX, SRC_WIDTH_FIXED - bpl 9b - .endm - - /* now do the scaling */ - and TMP1, VXMASK, VX, asr #(16 - bpp_shift) - adds VX, VX, UNIT_X -9: subpls VX, VX, SRC_WIDTH_FIXED - bpl 9b - subs W, W, #(8 + prefetch_braking_distance) - blt 2f - /* calculate prefetch offset */ - mov PF_OFFS, #prefetch_distance - mla PF_OFFS, UNIT_X, PF_OFFS, VX -1: /* main loop, process 8 pixels per iteration with prefetch */ - pld [SRC, PF_OFFS, asr #(16 - bpp_shift)] - add PF_OFFS, UNIT_X, lsl #3 - scale_2_pixels - scale_2_pixels - scale_2_pixels - scale_2_pixels - subs W, W, #8 - bge 1b -2: - subs W, W, #(4 - 8 - prefetch_braking_distance) - blt 2f -1: /* process the remaining pixels */ - scale_2_pixels - scale_2_pixels - subs W, W, #4 - bge 1b -2: - tst W, #2 - beq 2f - scale_2_pixels -2: - tst W, #1 - ldrne&t TMP1, [SRC, TMP1] - strne&t TMP1, [DST] - /* cleanup helper macro */ - .purgem scale_2_pixels - .unreq DST - .unreq SRC - .unreq W - .unreq VX - .unreq UNIT_X - .unreq TMP1 - .unreq TMP2 - .unreq VXMASK - .unreq PF_OFFS - .unreq SRC_WIDTH_FIXED - /* return */ - pop {r4, r5, r6, r7, r8, r10} - bx lr -.endfunc -.endm - -generate_nearest_scanline_func \ - pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32 - -generate_nearest_scanline_func \ - pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32 diff --git a/lib/pixman/pixman/pixman-arm-simd-asm.h b/lib/pixman/pixman/pixman-arm-simd-asm.h new file mode 100644 index 000000000..65436062b --- /dev/null +++ b/lib/pixman/pixman/pixman-arm-simd-asm.h @@ -0,0 +1,908 @@ +/* + * Copyright © 2012 Raspberry Pi Foundation + * Copyright © 2012 RISC OS Open Ltd + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of the copyright holders not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. The copyright holders make no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Ben Avison (bavison@riscosopen.org) + * + */ + +/* + * Because the alignment of pixel data to cachelines, and even the number of + * cachelines per row can vary from row to row, and because of the need to + * preload each scanline once and only once, this prefetch strategy treats + * each row of pixels independently. When a pixel row is long enough, there + * are three distinct phases of prefetch: + * * an inner loop section, where each time a cacheline of data is + * processed, another cacheline is preloaded (the exact distance ahead is + * determined empirically using profiling results from lowlevel-blt-bench) + * * a leading section, where enough cachelines are preloaded to ensure no + * cachelines escape being preloaded when the inner loop starts + * * a trailing section, where a limited number (0 or more) of cachelines + * are preloaded to deal with data (if any) that hangs off the end of the + * last iteration of the inner loop, plus any trailing bytes that were not + * enough to make up one whole iteration of the inner loop + * + * There are (in general) three distinct code paths, selected between + * depending upon how long the pixel row is. If it is long enough that there + * is at least one iteration of the inner loop (as described above) then + * this is described as the "wide" case. If it is shorter than that, but + * there are still enough bytes output that there is at least one 16-byte- + * long, 16-byte-aligned write to the destination (the optimum type of + * write), then this is the "medium" case. If it is not even this long, then + * this is the "narrow" case, and there is no attempt to align writes to + * 16-byte boundaries. In the "medium" and "narrow" cases, all the + * cachelines containing data from the pixel row are prefetched up-front. + */ + +/* + * Determine whether we put the arguments on the stack for debugging. + */ +#undef DEBUG_PARAMS + +/* + * Bit flags for 'generate_composite_function' macro which are used + * to tune generated functions behavior. + */ +.set FLAG_DST_WRITEONLY, 0 +.set FLAG_DST_READWRITE, 1 +.set FLAG_COND_EXEC, 0 +.set FLAG_BRANCH_OVER, 2 +.set FLAG_PROCESS_PRESERVES_PSR, 0 +.set FLAG_PROCESS_CORRUPTS_PSR, 4 +.set FLAG_PROCESS_DOESNT_STORE, 0 +.set FLAG_PROCESS_DOES_STORE, 8 /* usually because it needs to conditionally skip it */ +.set FLAG_NO_SPILL_LINE_VARS, 0 +.set FLAG_SPILL_LINE_VARS_WIDE, 16 +.set FLAG_SPILL_LINE_VARS_NON_WIDE, 32 +.set FLAG_SPILL_LINE_VARS, 48 +.set FLAG_PROCESS_CORRUPTS_SCRATCH, 0 +.set FLAG_PROCESS_PRESERVES_SCRATCH, 64 + +/* + * Offset into stack where mask and source pointer/stride can be accessed. + */ +#ifdef DEBUG_PARAMS +.set ARGS_STACK_OFFSET, (9*4+9*4) +#else +.set ARGS_STACK_OFFSET, (9*4) +#endif + +/* + * Constants for selecting preferable prefetch type. + */ +.set PREFETCH_TYPE_NONE, 0 +.set PREFETCH_TYPE_STANDARD, 1 + +/* + * Definitions of macros for load/store of pixel data. + */ + +.macro pixldst op, cond=al, numbytes, reg0, reg1, reg2, reg3, base, unaligned=0 + .if numbytes == 16 + .if unaligned == 1 + op&r&cond WK®0, [base], #4 + op&r&cond WK®1, [base], #4 + op&r&cond WK®2, [base], #4 + op&r&cond WK®3, [base], #4 + .else + op&m&cond&ia base!, {WK®0,WK®1,WK®2,WK®3} + .endif + .elseif numbytes == 8 + .if unaligned == 1 + op&r&cond WK®0, [base], #4 + op&r&cond WK®1, [base], #4 + .else + op&m&cond&ia base!, {WK®0,WK®1} + .endif + .elseif numbytes == 4 + op&r&cond WK®0, [base], #4 + .elseif numbytes == 2 + op&r&cond&h WK®0, [base], #2 + .elseif numbytes == 1 + op&r&cond&b WK®0, [base], #1 + .else + .error "unsupported size: numbytes" + .endif +.endm + +.macro pixst_baseupdated cond, numbytes, reg0, reg1, reg2, reg3, base + .if numbytes == 16 + stm&cond&db base, {WK®0,WK®1,WK®2,WK®3} + .elseif numbytes == 8 + stm&cond&db base, {WK®0,WK®1} + .elseif numbytes == 4 + str&cond WK®0, [base, #-4] + .elseif numbytes == 2 + str&cond&h WK®0, [base, #-2] + .elseif numbytes == 1 + str&cond&b WK®0, [base, #-1] + .else + .error "unsupported size: numbytes" + .endif +.endm + +.macro pixld cond, numbytes, firstreg, base, unaligned + pixldst ld, cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base, unaligned +.endm + +.macro pixst cond, numbytes, firstreg, base + .if (flags) & FLAG_DST_READWRITE + pixst_baseupdated cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base + .else + pixldst st, cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base + .endif +.endm + +.macro PF a, x:vararg + .if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_STANDARD) + a x + .endif +.endm + + +.macro preload_leading_step1 bpp, ptr, base +/* If the destination is already 16-byte aligned, then we need to preload + * between 0 and prefetch_distance (inclusive) cache lines ahead so there + * are no gaps when the inner loop starts. + */ + .if bpp > 0 + PF bic, ptr, base, #31 + .set OFFSET, 0 + .rept prefetch_distance+1 + PF pld, [ptr, #OFFSET] + .set OFFSET, OFFSET+32 + .endr + .endif +.endm + +.macro preload_leading_step2 bpp, bpp_shift, ptr, base +/* However, if the destination is not 16-byte aligned, we may need to + * preload more cache lines than that. The question we need to ask is: + * are the bytes corresponding to the leading pixels more than the amount + * by which the source pointer will be rounded down for preloading, and if + * so, by how many cache lines? Effectively, we want to calculate + * leading_bytes = ((-dst)&15)*src_bpp/dst_bpp + * inner_loop_offset = (src+leading_bytes)&31 + * extra_needed = leading_bytes - inner_loop_offset + * and test if extra_needed is <= 0, <= 32, or > 32 (where > 32 is only + * possible when there are 4 src bytes for every 1 dst byte). + */ + .if bpp > 0 + .ifc base,DST + /* The test can be simplified further when preloading the destination */ + PF tst, base, #16 + PF beq, 61f + .else + .if bpp/dst_w_bpp == 4 + PF add, SCRATCH, base, WK0, lsl #bpp_shift-dst_bpp_shift + PF and, SCRATCH, SCRATCH, #31 + PF rsb, SCRATCH, SCRATCH, WK0, lsl #bpp_shift-dst_bpp_shift + PF sub, SCRATCH, SCRATCH, #1 /* so now ranges are -16..-1 / 0..31 / 32..63 */ + PF movs, SCRATCH, SCRATCH, #32-6 /* so this sets NC / nc / Nc */ + PF bcs, 61f + PF bpl, 60f + PF pld, [ptr, #32*(prefetch_distance+2)] + .else + PF mov, SCRATCH, base, lsl #32-5 + PF add, SCRATCH, SCRATCH, WK0, lsl #32-5+bpp_shift-dst_bpp_shift + PF rsbs, SCRATCH, SCRATCH, WK0, lsl #32-5+bpp_shift-dst_bpp_shift + PF bls, 61f + .endif + .endif +60: PF pld, [ptr, #32*(prefetch_distance+1)] +61: + .endif +.endm + +#define IS_END_OF_GROUP(INDEX,SIZE) ((SIZE) < 2 || ((INDEX) & ~((INDEX)+1)) & ((SIZE)/2)) +.macro preload_middle bpp, base, scratch_holds_offset + .if bpp > 0 + /* prefetch distance = 256/bpp, stm distance = 128/dst_w_bpp */ + .if IS_END_OF_GROUP(SUBBLOCK,256/128*dst_w_bpp/bpp) + .if scratch_holds_offset + PF pld, [base, SCRATCH] + .else + PF bic, SCRATCH, base, #31 + PF pld, [SCRATCH, #32*prefetch_distance] + .endif + .endif + .endif +.endm + +.macro preload_trailing bpp, bpp_shift, base + .if bpp > 0 + .if bpp*pix_per_block > 256 + /* Calculations are more complex if more than one fetch per block */ + PF and, WK1, base, #31 + PF add, WK1, WK1, WK0, lsl #bpp_shift + PF add, WK1, WK1, #32*(bpp*pix_per_block/256-1)*(prefetch_distance+1) + PF bic, SCRATCH, base, #31 +80: PF pld, [SCRATCH, #32*(prefetch_distance+1)] + PF add, SCRATCH, SCRATCH, #32 + PF subs, WK1, WK1, #32 + PF bhi, 80b + .else + /* If exactly one fetch per block, then we need either 0, 1 or 2 extra preloads */ + PF mov, SCRATCH, base, lsl #32-5 + PF adds, SCRATCH, SCRATCH, X, lsl #32-5+bpp_shift + PF adceqs, SCRATCH, SCRATCH, #0 + /* The instruction above has two effects: ensures Z is only + * set if C was clear (so Z indicates that both shifted quantities + * were 0), and clears C if Z was set (so C indicates that the sum + * of the shifted quantities was greater and not equal to 32) */ + PF beq, 82f + PF bic, SCRATCH, base, #31 + PF bcc, 81f + PF pld, [SCRATCH, #32*(prefetch_distance+2)] +81: PF pld, [SCRATCH, #32*(prefetch_distance+1)] +82: + .endif + .endif +.endm + + +.macro preload_line narrow_case, bpp, bpp_shift, base +/* "narrow_case" - just means that the macro was invoked from the "narrow" + * code path rather than the "medium" one - because in the narrow case, + * the row of pixels is known to output no more than 30 bytes, then + * (assuming the source pixels are no wider than the the destination + * pixels) they cannot possibly straddle more than 2 32-byte cachelines, + * meaning there's no need for a loop. + * "bpp" - number of bits per pixel in the channel (source, mask or + * destination) that's being preloaded, or 0 if this channel is not used + * for reading + * "bpp_shift" - log2 of ("bpp"/8) (except if "bpp"=0 of course) + * "base" - base address register of channel to preload (SRC, MASK or DST) + */ + .if bpp > 0 + .if narrow_case && (bpp <= dst_w_bpp) + /* In these cases, each line for each channel is in either 1 or 2 cache lines */ + PF bic, WK0, base, #31 + PF pld, [WK0] + PF add, WK1, base, X, LSL #bpp_shift + PF sub, WK1, WK1, #1 + PF bic, WK1, WK1, #31 + PF cmp, WK1, WK0 + PF beq, 90f + PF pld, [WK1] +90: + .else + PF bic, WK0, base, #31 + PF pld, [WK0] + PF add, WK1, base, X, lsl #bpp_shift + PF sub, WK1, WK1, #1 + PF bic, WK1, WK1, #31 + PF cmp, WK1, WK0 + PF beq, 92f +91: PF add, WK0, WK0, #32 + PF cmp, WK0, WK1 + PF pld, [WK0] + PF bne, 91b +92: + .endif + .endif +.endm + + +.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx + process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 + .if decrementx + sub&cond X, X, #8*numbytes/dst_w_bpp + .endif + process_tail cond, numbytes, firstreg + .if !((flags) & FLAG_PROCESS_DOES_STORE) + pixst cond, numbytes, firstreg, DST + .endif +.endm + +.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx + .if (flags) & FLAG_BRANCH_OVER + .ifc cond,mi + bpl 100f + .endif + .ifc cond,cs + bcc 100f + .endif + .ifc cond,ne + beq 100f + .endif + conditional_process1_helper , process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx +100: + .else + conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx + .endif +.endm + +.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx + .if (flags) & (FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE) + /* Can't interleave reads and writes */ + test + conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx + .if (flags) & FLAG_PROCESS_CORRUPTS_PSR + test + .endif + conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx + .else + /* Can interleave reads and writes for better scheduling */ + test + process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 + process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 + .if decrementx + sub&cond1 X, X, #8*numbytes1/dst_w_bpp + sub&cond2 X, X, #8*numbytes2/dst_w_bpp + .endif + process_tail cond1, numbytes1, firstreg1 + process_tail cond2, numbytes2, firstreg2 + pixst cond1, numbytes1, firstreg1, DST + pixst cond2, numbytes2, firstreg2, DST + .endif +.endm + + +.macro test_bits_1_0_ptr + movs SCRATCH, WK0, lsl #32-1 /* C,N = bits 1,0 of DST */ +.endm + +.macro test_bits_3_2_ptr + movs SCRATCH, WK0, lsl #32-3 /* C,N = bits 3, 2 of DST */ +.endm + +.macro leading_15bytes process_head, process_tail + /* On entry, WK0 bits 0-3 = number of bytes until destination is 16-byte aligned */ + /* Use unaligned loads in all cases for simplicity */ + .if dst_w_bpp == 8 + conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, 1 + .elseif dst_w_bpp == 16 + test_bits_1_0_ptr + conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, 1 + .endif + conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, 1 +.endm + +.macro test_bits_3_2_pix + movs SCRATCH, X, lsl #dst_bpp_shift+32-3 +.endm + +.macro test_bits_1_0_pix + .if dst_w_bpp == 8 + movs SCRATCH, X, lsl #dst_bpp_shift+32-1 + .else + movs SCRATCH, X, lsr #1 + .endif +.endm + +.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask + conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 + .if dst_w_bpp == 16 + test_bits_1_0_pix + conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 + .elseif dst_w_bpp == 8 + conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 + .endif +.endm + + +.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment +110: + .set SUBBLOCK, 0 /* this is a count of STMs; there can be up to 8 STMs per block */ + .rept pix_per_block*dst_w_bpp/128 + process_head , 16, 0, unaligned_src, unaligned_mask, 1 + .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) + preload_middle src_bpp, SRC, 1 + .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) + preload_middle mask_bpp, MASK, 1 + .else + preload_middle src_bpp, SRC, 0 + preload_middle mask_bpp, MASK, 0 + .endif + .if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0) + /* Because we know that writes are 16-byte aligned, it's relatively easy to ensure that + * destination prefetches are 32-byte aligned. It's also the easiest channel to offset + * preloads for, to achieve staggered prefetches for multiple channels, because there are + * always two STMs per prefetch, so there is always an opposite STM on which to put the + * preload. Note, no need to BIC the base register here */ + PF pld, [DST, #32*prefetch_distance - dst_alignment] + .endif + process_tail , 16, 0 + .if !((flags) & FLAG_PROCESS_DOES_STORE) + pixst , 16, 0, DST + .endif + .set SUBBLOCK, SUBBLOCK+1 + .endr + subs X, X, #pix_per_block + bhs 110b +.endm + +.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask + /* Destination now 16-byte aligned; we have at least one block before we have to stop preloading */ + .if dst_r_bpp > 0 + tst DST, #16 + bne 111f + process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 16 + b 112f +111: + .endif + process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 0 +112: + /* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */ + .if (src_bpp*pix_per_block > 256) || (mask_bpp*pix_per_block > 256) || (dst_r_bpp*pix_per_block > 256) + PF and, WK0, X, #pix_per_block-1 + .endif + preload_trailing src_bpp, src_bpp_shift, SRC + preload_trailing mask_bpp, mask_bpp_shift, MASK + preload_trailing dst_r_bpp, dst_bpp_shift, DST + add X, X, #(prefetch_distance+2)*pix_per_block - 128/dst_w_bpp + /* The remainder of the line is handled identically to the medium case */ + medium_case_inner_loop_and_trailing_pixels process_head, process_tail,, exit_label, unaligned_src, unaligned_mask +.endm + +.macro medium_case_inner_loop_and_trailing_pixels process_head, process_tail, unused, exit_label, unaligned_src, unaligned_mask +120: + process_head , 16, 0, unaligned_src, unaligned_mask, 0 + process_tail , 16, 0 + .if !((flags) & FLAG_PROCESS_DOES_STORE) + pixst , 16, 0, DST + .endif + subs X, X, #128/dst_w_bpp + bhs 120b + /* Trailing pixels */ + tst X, #128/dst_w_bpp - 1 + beq exit_label + trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask +.endm + +.macro narrow_case_inner_loop_and_trailing_pixels process_head, process_tail, unused, exit_label, unaligned_src, unaligned_mask + tst X, #16*8/dst_w_bpp + conditional_process1 ne, process_head, process_tail, 16, 0, unaligned_src, unaligned_mask, 0 + /* Trailing pixels */ + /* In narrow case, it's relatively unlikely to be aligned, so let's do without a branch here */ + trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask +.endm + +.macro switch_on_alignment action, process_head, process_tail, process_inner_loop, exit_label + /* Note that if we're reading the destination, it's already guaranteed to be aligned at this point */ + .if mask_bpp == 8 || mask_bpp == 16 + tst MASK, #3 + bne 141f + .endif + .if src_bpp == 8 || src_bpp == 16 + tst SRC, #3 + bne 140f + .endif + action process_head, process_tail, process_inner_loop, exit_label, 0, 0 + .if src_bpp == 8 || src_bpp == 16 + b exit_label +140: + action process_head, process_tail, process_inner_loop, exit_label, 1, 0 + .endif + .if mask_bpp == 8 || mask_bpp == 16 + b exit_label +141: + .if src_bpp == 8 || src_bpp == 16 + tst SRC, #3 + bne 142f + .endif + action process_head, process_tail, process_inner_loop, exit_label, 0, 1 + .if src_bpp == 8 || src_bpp == 16 + b exit_label +142: + action process_head, process_tail, process_inner_loop, exit_label, 1, 1 + .endif + .endif +.endm + + +.macro end_of_line restore_x, vars_spilled, loop_label, last_one + .if vars_spilled + /* Sadly, GAS doesn't seem have an equivalent of the DCI directive? */ + /* This is ldmia sp,{} */ + .word 0xE89D0000 | LINE_SAVED_REGS + .endif + subs Y, Y, #1 + .if vars_spilled + .if (LINE_SAVED_REGS) & (1<<1) + str Y, [sp] + .endif + .endif + add DST, DST, STRIDE_D + .if src_bpp > 0 + add SRC, SRC, STRIDE_S + .endif + .if mask_bpp > 0 + add MASK, MASK, STRIDE_M + .endif + .if restore_x + mov X, ORIG_W + .endif + bhs loop_label + .ifc "last_one","" + .if vars_spilled + b 197f + .else + b 198f + .endif + .else + .if (!vars_spilled) && ((flags) & FLAG_SPILL_LINE_VARS) + b 198f + .endif + .endif +.endm + + +.macro generate_composite_function fname, \ + src_bpp_, \ + mask_bpp_, \ + dst_w_bpp_, \ + flags_, \ + prefetch_distance_, \ + init, \ + newline, \ + cleanup, \ + process_head, \ + process_tail, \ + process_inner_loop + + .func fname + .global fname + /* For ELF format also set function visibility to hidden */ +#ifdef __ELF__ + .hidden fname + .type fname, %function +#endif + +/* + * Make some macro arguments globally visible and accessible + * from other macros + */ + .set src_bpp, src_bpp_ + .set mask_bpp, mask_bpp_ + .set dst_w_bpp, dst_w_bpp_ + .set flags, flags_ + .set prefetch_distance, prefetch_distance_ + +/* + * Select prefetch type for this function. + */ + .if prefetch_distance == 0 + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE + .else + .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_STANDARD + .endif + + .if src_bpp == 32 + .set src_bpp_shift, 2 + .elseif src_bpp == 24 + .set src_bpp_shift, 0 + .elseif src_bpp == 16 + .set src_bpp_shift, 1 + .elseif src_bpp == 8 + .set src_bpp_shift, 0 + .elseif src_bpp == 0 + .set src_bpp_shift, -1 + .else + .error "requested src bpp (src_bpp) is not supported" + .endif + + .if mask_bpp == 32 + .set mask_bpp_shift, 2 + .elseif mask_bpp == 24 + .set mask_bpp_shift, 0 + .elseif mask_bpp == 8 + .set mask_bpp_shift, 0 + .elseif mask_bpp == 0 + .set mask_bpp_shift, -1 + .else + .error "requested mask bpp (mask_bpp) is not supported" + .endif + + .if dst_w_bpp == 32 + .set dst_bpp_shift, 2 + .elseif dst_w_bpp == 24 + .set dst_bpp_shift, 0 + .elseif dst_w_bpp == 16 + .set dst_bpp_shift, 1 + .elseif dst_w_bpp == 8 + .set dst_bpp_shift, 0 + .else + .error "requested dst bpp (dst_w_bpp) is not supported" + .endif + + .if (((flags) & FLAG_DST_READWRITE) != 0) + .set dst_r_bpp, dst_w_bpp + .else + .set dst_r_bpp, 0 + .endif + + .set pix_per_block, 16*8/dst_w_bpp + .if src_bpp != 0 + .if 32*8/src_bpp > pix_per_block + .set pix_per_block, 32*8/src_bpp + .endif + .endif + .if mask_bpp != 0 + .if 32*8/mask_bpp > pix_per_block + .set pix_per_block, 32*8/mask_bpp + .endif + .endif + .if dst_r_bpp != 0 + .if 32*8/dst_r_bpp > pix_per_block + .set pix_per_block, 32*8/dst_r_bpp + .endif + .endif + +/* The standard entry conditions set up by pixman-arm-common.h are: + * r0 = width (pixels) + * r1 = height (rows) + * r2 = pointer to top-left pixel of destination + * r3 = destination stride (pixels) + * [sp] = source pixel value, or pointer to top-left pixel of source + * [sp,#4] = 0 or source stride (pixels) + * The following arguments are unused for non-mask operations + * [sp,#8] = mask pixel value, or pointer to top-left pixel of mask + * [sp,#12] = 0 or mask stride (pixels) + */ + +/* + * Assign symbolic names to registers + */ + X .req r0 /* pixels to go on this line */ + Y .req r1 /* lines to go */ + DST .req r2 /* destination pixel pointer */ + STRIDE_D .req r3 /* destination stride (bytes, minus width) */ + SRC .req r4 /* source pixel pointer */ + STRIDE_S .req r5 /* source stride (bytes, minus width) */ + MASK .req r6 /* mask pixel pointer (if applicable) */ + STRIDE_M .req r7 /* mask stride (bytes, minus width) */ + WK0 .req r8 /* pixel data registers */ + WK1 .req r9 + WK2 .req r10 + WK3 .req r11 + SCRATCH .req r12 + ORIG_W .req r14 /* width (pixels) */ + +fname: + push {r4-r11, lr} /* save all registers */ + + subs Y, Y, #1 + blo 199f + +#ifdef DEBUG_PARAMS + sub sp, sp, #9*4 +#endif + + .if src_bpp > 0 + ldr SRC, [sp, #ARGS_STACK_OFFSET] + ldr STRIDE_S, [sp, #ARGS_STACK_OFFSET+4] + .endif + .if mask_bpp > 0 + ldr MASK, [sp, #ARGS_STACK_OFFSET+8] + ldr STRIDE_M, [sp, #ARGS_STACK_OFFSET+12] + .endif + +#ifdef DEBUG_PARAMS + add Y, Y, #1 + stmia sp, {r0-r7,pc} + sub Y, Y, #1 +#endif + + init + + lsl STRIDE_D, #dst_bpp_shift /* stride in bytes */ + sub STRIDE_D, STRIDE_D, X, lsl #dst_bpp_shift + .if src_bpp > 0 + lsl STRIDE_S, #src_bpp_shift + sub STRIDE_S, STRIDE_S, X, lsl #src_bpp_shift + .endif + .if mask_bpp > 0 + lsl STRIDE_M, #mask_bpp_shift + sub STRIDE_M, STRIDE_M, X, lsl #mask_bpp_shift + .endif + + /* Are we not even wide enough to have one 16-byte aligned 16-byte block write? */ + cmp X, #2*16*8/dst_w_bpp - 1 + blo 170f + .if src_bpp || mask_bpp || dst_r_bpp /* Wide and medium cases are the same for fill */ + /* To preload ahead on the current line, we need at least (prefetch_distance+2) 32-byte blocks on all prefetch channels */ + cmp X, #(prefetch_distance+3)*pix_per_block - 1 + blo 160f + + /* Wide case */ + /* Adjust X so that the decrement instruction can also test for + * inner loop termination. We want it to stop when there are + * (prefetch_distance+1) complete blocks to go. */ + sub X, X, #(prefetch_distance+2)*pix_per_block + mov ORIG_W, X + .if (flags) & FLAG_SPILL_LINE_VARS_WIDE + /* This is stmdb sp!,{} */ + .word 0xE92D0000 | LINE_SAVED_REGS + .endif +151: /* New line */ + newline + preload_leading_step1 src_bpp, WK1, SRC + preload_leading_step1 mask_bpp, WK2, MASK + preload_leading_step1 dst_r_bpp, WK3, DST + + tst DST, #15 + beq 154f + rsb WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */ + .if (src_bpp != 0 && src_bpp != 2*dst_w_bpp) || (mask_bpp != 0 && mask_bpp != 2*dst_w_bpp) + PF and, WK0, WK0, #15 + .endif + + preload_leading_step2 src_bpp, src_bpp_shift, WK1, SRC + preload_leading_step2 mask_bpp, mask_bpp_shift, WK2, MASK + preload_leading_step2 dst_r_bpp, dst_bpp_shift, WK3, DST + + leading_15bytes process_head, process_tail + +154: /* Destination now 16-byte aligned; we have at least one prefetch on each channel as well as at least one 16-byte output block */ + .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) + and SCRATCH, SRC, #31 + rsb SCRATCH, SCRATCH, #32*prefetch_distance + .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) + and SCRATCH, MASK, #31 + rsb SCRATCH, SCRATCH, #32*prefetch_distance + .endif + .ifc "process_inner_loop","" + switch_on_alignment wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, wide_case_inner_loop, 157f + .else + switch_on_alignment wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, process_inner_loop, 157f + .endif + +157: /* Check for another line */ + end_of_line 1, %((flags) & FLAG_SPILL_LINE_VARS_WIDE), 151b + .endif + + .ltorg + +160: /* Medium case */ + mov ORIG_W, X + .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE + /* This is stmdb sp!,{} */ + .word 0xE92D0000 | LINE_SAVED_REGS + .endif +161: /* New line */ + newline + preload_line 0, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */ + preload_line 0, mask_bpp, mask_bpp_shift, MASK + preload_line 0, dst_r_bpp, dst_bpp_shift, DST + + sub X, X, #128/dst_w_bpp /* simplifies inner loop termination */ + tst DST, #15 + beq 164f + rsb WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */ + + leading_15bytes process_head, process_tail + +164: /* Destination now 16-byte aligned; we have at least one 16-byte output block */ + switch_on_alignment medium_case_inner_loop_and_trailing_pixels, process_head, process_tail,, 167f + +167: /* Check for another line */ + end_of_line 1, %((flags) & FLAG_SPILL_LINE_VARS_NON_WIDE), 161b + + .ltorg + +170: /* Narrow case, less than 31 bytes, so no guarantee of at least one 16-byte block */ + .if dst_w_bpp < 32 + mov ORIG_W, X + .endif + .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE + /* This is stmdb sp!,{} */ + .word 0xE92D0000 | LINE_SAVED_REGS + .endif +171: /* New line */ + newline + preload_line 1, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */ + preload_line 1, mask_bpp, mask_bpp_shift, MASK + preload_line 1, dst_r_bpp, dst_bpp_shift, DST + + .if dst_w_bpp == 8 + tst DST, #3 + beq 174f +172: subs X, X, #1 + blo 177f + process_head , 1, 0, 1, 1, 0 + process_tail , 1, 0 + .if !((flags) & FLAG_PROCESS_DOES_STORE) + pixst , 1, 0, DST + .endif + tst DST, #3 + bne 172b + .elseif dst_w_bpp == 16 + tst DST, #2 + beq 174f + subs X, X, #1 + blo 177f + process_head , 2, 0, 1, 1, 0 + process_tail , 2, 0 + .if !((flags) & FLAG_PROCESS_DOES_STORE) + pixst , 2, 0, DST + .endif + .endif + +174: /* Destination now 4-byte aligned; we have 0 or more output bytes to go */ + switch_on_alignment narrow_case_inner_loop_and_trailing_pixels, process_head, process_tail,, 177f + +177: /* Check for another line */ + end_of_line %(dst_w_bpp < 32), %((flags) & FLAG_SPILL_LINE_VARS_NON_WIDE), 171b, last_one + +197: + .if (flags) & FLAG_SPILL_LINE_VARS + add sp, sp, #LINE_SAVED_REG_COUNT*4 + .endif +198: + cleanup + +#ifdef DEBUG_PARAMS + add sp, sp, #9*4 /* junk the debug copy of arguments */ +#endif +199: + pop {r4-r11, pc} /* exit */ + + .ltorg + + .unreq X + .unreq Y + .unreq DST + .unreq STRIDE_D + .unreq SRC + .unreq STRIDE_S + .unreq MASK + .unreq STRIDE_M + .unreq WK0 + .unreq WK1 + .unreq WK2 + .unreq WK3 + .unreq SCRATCH + .unreq ORIG_W + .endfunc +.endm + +.macro line_saved_regs x:vararg + .set LINE_SAVED_REGS, 0 + .set LINE_SAVED_REG_COUNT, 0 + .irp SAVED_REG,x + .ifc "SAVED_REG","Y" + .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<1) + .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 + .endif + .ifc "SAVED_REG","STRIDE_D" + .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<3) + .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 + .endif + .ifc "SAVED_REG","STRIDE_S" + .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<5) + .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 + .endif + .ifc "SAVED_REG","STRIDE_M" + .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<7) + .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 + .endif + .ifc "SAVED_REG","ORIG_W" + .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<14) + .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 + .endif + .endr +.endm + +.macro nop_macro x:vararg +.endm diff --git a/lib/pixman/pixman/pixman-arm-simd.c b/lib/pixman/pixman/pixman-arm-simd.c index 3d19bfac1..af062e19d 100644 --- a/lib/pixman/pixman/pixman-arm-simd.c +++ b/lib/pixman/pixman/pixman-arm-simd.c @@ -31,369 +31,191 @@ #include "pixman-arm-common.h" #include "pixman-inlines.h" -#if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */ - -void -pixman_composite_add_8_8_asm_armv6 (int32_t width, - int32_t height, - uint8_t *dst_line, - int32_t dst_stride, - uint8_t *src_line, - int32_t src_stride) -{ - uint8_t *dst, *src; - int32_t w; - uint8_t s, d; - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - /* ensure both src and dst are properly aligned before doing 32 bit reads - * we'll stay in this loop if src and dst have differing alignments - */ - while (w && (((unsigned long)dst & 3) || ((unsigned long)src & 3))) - { - s = *src; - d = *dst; - asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s)); - *dst = d; - - dst++; - src++; - w--; - } +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_x888_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_0565, + uint16_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, src_0565_8888, + uint16_t, 1, uint32_t, 1) - while (w >= 4) - { - asm ("uqadd8 %0, %1, %2" - : "=r" (*(uint32_t*)dst) - : "r" (*(uint32_t*)src), "r" (*(uint32_t*)dst)); - dst += 4; - src += 4; - w -= 4; - } +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8, + uint8_t, 1, uint8_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888, + uint32_t, 1, uint32_t, 1) - while (w) - { - s = *src; - d = *dst; - asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s)); - *dst = d; +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) - dst++; - src++; - w--; - } - } +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888, + uint8_t, 1, uint32_t, 1) -} +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC, + uint16_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC, + uint32_t, uint32_t) void -pixman_composite_over_8888_8888_asm_armv6 (int32_t width, - int32_t height, - uint32_t *dst_line, - int32_t dst_stride, - uint32_t *src_line, - int32_t src_stride) -{ - uint32_t *dst; - uint32_t *src; - int32_t w; - uint32_t component_half = 0x800080; - uint32_t upper_component_mask = 0xff00ff00; - uint32_t alpha_mask = 0xff; - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; +pixman_composite_src_n_8888_asm_armv6 (int32_t w, + int32_t h, + uint32_t *dst, + int32_t dst_stride, + uint32_t src); -/* #define inner_branch */ - asm volatile ( - "cmp %[w], #0\n\t" - "beq 2f\n\t" - "1:\n\t" - /* load src */ - "ldr r5, [%[src]], #4\n\t" -#ifdef inner_branch - /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. - * The 0x0 case also allows us to avoid doing an unecessary data - * write which is more valuable so we only check for that - */ - "cmp r5, #0\n\t" - "beq 3f\n\t" - - /* = 255 - alpha */ - "sub r8, %[alpha_mask], r5, lsr #24\n\t" - - "ldr r4, [%[dest]] \n\t" - -#else - "ldr r4, [%[dest]] \n\t" - - /* = 255 - alpha */ - "sub r8, %[alpha_mask], r5, lsr #24\n\t" -#endif - "uxtb16 r6, r4\n\t" - "uxtb16 r7, r4, ror #8\n\t" - - /* multiply by 257 and divide by 65536 */ - "mla r6, r6, r8, %[component_half]\n\t" - "mla r7, r7, r8, %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - /* recombine the 0xff00ff00 bytes of r6 and r7 */ - "and r7, r7, %[upper_component_mask]\n\t" - "uxtab16 r6, r7, r6, ror #8\n\t" - - "uqadd8 r5, r6, r5\n\t" - -#ifdef inner_branch - "3:\n\t" - -#endif - "str r5, [%[dest]], #4\n\t" - /* increment counter and jmp to top */ - "subs %[w], %[w], #1\n\t" - "bne 1b\n\t" - "2:\n\t" - : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) - : [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask), - [alpha_mask] "r" (alpha_mask) - : "r4", "r5", "r6", "r7", "r8", "cc", "memory" - ); - } -} +void +pixman_composite_src_n_0565_asm_armv6 (int32_t w, + int32_t h, + uint16_t *dst, + int32_t dst_stride, + uint16_t src); void -pixman_composite_over_8888_n_8888_asm_armv6 (int32_t width, - int32_t height, - uint32_t *dst_line, - int32_t dst_stride, - uint32_t *src_line, - int32_t src_stride, - uint32_t mask) +pixman_composite_src_n_8_asm_armv6 (int32_t w, + int32_t h, + uint8_t *dst, + int32_t dst_stride, + uint8_t src); + +static pixman_bool_t +arm_simd_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, /* in 32-bit words */ + int bpp, + int x, + int y, + int width, + int height, + uint32_t _xor) { - uint32_t *dst; - uint32_t *src; - int32_t w; - uint32_t component_half = 0x800080; - uint32_t alpha_mask = 0xff; - - mask = (mask) >> 24; + /* stride is always multiple of 32bit units in pixman */ + uint32_t byte_stride = stride * sizeof(uint32_t); - while (height--) + switch (bpp) { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - -/* #define inner_branch */ - asm volatile ( - "cmp %[w], #0\n\t" - "beq 2f\n\t" - "1:\n\t" - /* load src */ - "ldr r5, [%[src]], #4\n\t" -#ifdef inner_branch - /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. - * The 0x0 case also allows us to avoid doing an unecessary data - * write which is more valuable so we only check for that - */ - "cmp r5, #0\n\t" - "beq 3f\n\t" - -#endif - "ldr r4, [%[dest]] \n\t" - - "uxtb16 r6, r5\n\t" - "uxtb16 r7, r5, ror #8\n\t" - - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, r6, %[mask_alpha], %[component_half]\n\t" - "mla r7, r7, %[mask_alpha], %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" - - /* recombine */ - "orr r5, r6, r7, lsl #8\n\t" - - "uxtb16 r6, r4\n\t" - "uxtb16 r7, r4, ror #8\n\t" - - /* 255 - alpha */ - "sub r8, %[alpha_mask], r5, lsr #24\n\t" - - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, r6, r8, %[component_half]\n\t" - "mla r7, r7, r8, %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" - - /* recombine */ - "orr r6, r6, r7, lsl #8\n\t" - - "uqadd8 r5, r6, r5\n\t" - -#ifdef inner_branch - "3:\n\t" - -#endif - "str r5, [%[dest]], #4\n\t" - /* increment counter and jmp to top */ - "subs %[w], %[w], #1\n\t" - "bne 1b\n\t" - "2:\n\t" - : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) - : [component_half] "r" (component_half), [mask_alpha] "r" (mask), - [alpha_mask] "r" (alpha_mask) - : "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory" - ); + case 8: + pixman_composite_src_n_8_asm_armv6 ( + width, + height, + (uint8_t *)(((char *) bits) + y * byte_stride + x), + byte_stride, + _xor & 0xff); + return TRUE; + case 16: + pixman_composite_src_n_0565_asm_armv6 ( + width, + height, + (uint16_t *)(((char *) bits) + y * byte_stride + x * 2), + byte_stride / 2, + _xor & 0xffff); + return TRUE; + case 32: + pixman_composite_src_n_8888_asm_armv6 ( + width, + height, + (uint32_t *)(((char *) bits) + y * byte_stride + x * 4), + byte_stride / 4, + _xor); + return TRUE; + default: + return FALSE; } } -void -pixman_composite_over_n_8_8888_asm_armv6 (int32_t width, - int32_t height, - uint32_t *dst_line, - int32_t dst_stride, - uint32_t src, - int32_t unused, - uint8_t *mask_line, - int32_t mask_stride) +static pixman_bool_t +arm_simd_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, /* in 32-bit words */ + int dst_stride, /* in 32-bit words */ + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height) { - uint32_t srca; - uint32_t *dst; - uint8_t *mask; - int32_t w; - - srca = src >> 24; - - uint32_t component_mask = 0xff00ff; - uint32_t component_half = 0x800080; - - uint32_t src_hi = (src >> 8) & component_mask; - uint32_t src_lo = src & component_mask; + if (src_bpp != dst_bpp) + return FALSE; - while (height--) + switch (src_bpp) { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - -/* #define inner_branch */ - asm volatile ( - "cmp %[w], #0\n\t" - "beq 2f\n\t" - "1:\n\t" - /* load mask */ - "ldrb r5, [%[mask]], #1\n\t" -#ifdef inner_branch - /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. - * The 0x0 case also allows us to avoid doing an unecessary data - * write which is more valuable so we only check for that - */ - "cmp r5, #0\n\t" - "beq 3f\n\t" - -#endif - "ldr r4, [%[dest]] \n\t" - - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, %[src_lo], r5, %[component_half]\n\t" - "mla r7, %[src_hi], r5, %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" - - /* recombine */ - "orr r5, r6, r7, lsl #8\n\t" - - "uxtb16 r6, r4\n\t" - "uxtb16 r7, r4, ror #8\n\t" - - /* we could simplify this to use 'sub' if we were - * willing to give up a register for alpha_mask - */ - "mvn r8, r5\n\t" - "mov r8, r8, lsr #24\n\t" - - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, r6, r8, %[component_half]\n\t" - "mla r7, r7, r8, %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" - - /* recombine */ - "orr r6, r6, r7, lsl #8\n\t" - - "uqadd8 r5, r6, r5\n\t" - -#ifdef inner_branch - "3:\n\t" - -#endif - "str r5, [%[dest]], #4\n\t" - /* increment counter and jmp to top */ - "subs %[w], %[w], #1\n\t" - "bne 1b\n\t" - "2:\n\t" - : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask) - : [component_half] "r" (component_half), - [src_hi] "r" (src_hi), [src_lo] "r" (src_lo) - : "r4", "r5", "r6", "r7", "r8", "cc", "memory"); + case 8: + pixman_composite_src_8_8_asm_armv6 ( + width, height, + (uint8_t *)(((char *) dst_bits) + + dest_y * dst_stride * 4 + dest_x * 1), dst_stride * 4, + (uint8_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 1), src_stride * 4); + return TRUE; + case 16: + pixman_composite_src_0565_0565_asm_armv6 ( + width, height, + (uint16_t *)(((char *) dst_bits) + + dest_y * dst_stride * 4 + dest_x * 2), dst_stride * 2, + (uint16_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 2), src_stride * 2); + return TRUE; + case 32: + pixman_composite_src_8888_8888_asm_armv6 ( + width, height, + (uint32_t *)(((char *) dst_bits) + + dest_y * dst_stride * 4 + dest_x * 4), dst_stride, + (uint32_t *)(((char *) src_bits) + + src_y * src_stride * 4 + src_x * 4), src_stride); + return TRUE; + default: + return FALSE; } } -#endif - -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8, - uint8_t, 1, uint8_t, 1) -PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888, - uint32_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888, - uint32_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888, - uint8_t, 1, uint32_t, 1) - -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC, - uint16_t, uint16_t) -PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC, - uint32_t, uint32_t) - static const pixman_fast_path_t arm_simd_fast_paths[] = { + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, armv6_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, armv6_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, armv6_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, armv6_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, armv6_composite_src_8888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, armv6_composite_src_8888_8888), + + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, armv6_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, armv6_composite_src_x888_8888), + + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, a1r5g5b5, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a1b5g5r5, null, a1b5g5r5, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a1b5g5r5, null, x1b5g5r5, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, x1b5g5r5, null, x1b5g5r5, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a4r4g4b4, null, a4r4g4b4, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a4b4g4r4, null, a4b4g4r4, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a4r4g4b4, null, x4r4g4b4, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, a4b4g4r4, null, x4b4g4r4, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, x4r4g4b4, null, x4r4g4b4, armv6_composite_src_0565_0565), + PIXMAN_STD_FAST_PATH (SRC, x4b4g4r4, null, x4b4g4r4, armv6_composite_src_0565_0565), + + PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, r3g3b2, null, r3g3b2, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, b2g3r3, null, b2g3r3, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, a2r2g2b2, null, a2r2g2b2, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, a2b2g2r2, null, a2b2g2r2, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, c8, null, c8, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, g8, null, g8, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, x4a4, null, x4a4, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, x4c4, null, x4c4, armv6_composite_src_8_8), + PIXMAN_STD_FAST_PATH (SRC, x4g4, null, x4g4, armv6_composite_src_8_8), + + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, armv6_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, armv6_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, armv6_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, armv6_composite_src_0565_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888), @@ -428,5 +250,8 @@ _pixman_implementation_create_arm_simd (pixman_implementation_t *fallback) { pixman_implementation_t *imp = _pixman_implementation_create (fallback, arm_simd_fast_paths); + imp->blt = arm_simd_blt; + imp->fill = arm_simd_fill; + return imp; } diff --git a/lib/pixman/pixman/pixman-bits-image.c b/lib/pixman/pixman/pixman-bits-image.c index 085dd1606..75a39a115 100644 --- a/lib/pixman/pixman/pixman-bits-image.c +++ b/lib/pixman/pixman/pixman-bits-image.c @@ -413,10 +413,108 @@ bits_image_fetch_pixel_convolution (bits_image_t *image, } } - satot >>= 16; - srtot >>= 16; - sgtot >>= 16; - sbtot >>= 16; + satot = (satot + 0x8000) >> 16; + srtot = (srtot + 0x8000) >> 16; + sgtot = (sgtot + 0x8000) >> 16; + sbtot = (sbtot + 0x8000) >> 16; + + satot = CLIP (satot, 0, 0xff); + srtot = CLIP (srtot, 0, 0xff); + sgtot = CLIP (sgtot, 0, 0xff); + sbtot = CLIP (sbtot, 0, 0xff); + + return ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot)); +} + +static uint32_t +bits_image_fetch_pixel_separable_convolution (bits_image_t *image, + pixman_fixed_t x, + pixman_fixed_t y, + get_pixel_t get_pixel) +{ + pixman_fixed_t *params = image->common.filter_params; + pixman_repeat_t repeat_mode = image->common.repeat; + int width = image->width; + int height = image->height; + int cwidth = pixman_fixed_to_int (params[0]); + int cheight = pixman_fixed_to_int (params[1]); + int x_phase_bits = pixman_fixed_to_int (params[2]); + int y_phase_bits = pixman_fixed_to_int (params[3]); + int x_phase_shift = 16 - x_phase_bits; + int y_phase_shift = 16 - y_phase_bits; + int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1; + int y_off = ((cheight << 16) - pixman_fixed_1) >> 1; + pixman_fixed_t *y_params; + int srtot, sgtot, sbtot, satot; + int32_t x1, x2, y1, y2; + int32_t px, py; + int i, j; + + /* Round x and y to the middle of the closest phase before continuing. This + * ensures that the convolution matrix is aligned right, since it was + * positioned relative to a particular phase (and not relative to whatever + * exact fraction we happen to get here). + */ + x = ((x >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1); + y = ((y >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1); + + px = (x & 0xffff) >> x_phase_shift; + py = (y & 0xffff) >> y_phase_shift; + + y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight; + + x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); + y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); + x2 = x1 + cwidth; + y2 = y1 + cheight; + + srtot = sgtot = sbtot = satot = 0; + + for (i = y1; i < y2; ++i) + { + pixman_fixed_48_16_t fy = *y_params++; + pixman_fixed_t *x_params = params + 4 + px * cwidth; + + if (fy) + { + for (j = x1; j < x2; ++j) + { + pixman_fixed_t fx = *x_params++; + int rx = j; + int ry = i; + + if (fx) + { + pixman_fixed_t f; + uint32_t pixel; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &rx, width); + repeat (repeat_mode, &ry, height); + + pixel = get_pixel (image, rx, ry, FALSE); + } + else + { + pixel = get_pixel (image, rx, ry, TRUE); + } + + f = (fy * fx + 0x8000) >> 16; + + srtot += (int)RED_8 (pixel) * f; + sgtot += (int)GREEN_8 (pixel) * f; + sbtot += (int)BLUE_8 (pixel) * f; + satot += (int)ALPHA_8 (pixel) * f; + } + } + } + } + + satot = (satot + 0x8000) >> 16; + srtot = (srtot + 0x8000) >> 16; + sgtot = (sgtot + 0x8000) >> 16; + sbtot = (sbtot + 0x8000) >> 16; satot = CLIP (satot, 0, 0xff); srtot = CLIP (srtot, 0, 0xff); @@ -449,6 +547,10 @@ bits_image_fetch_pixel_filtered (bits_image_t *image, return bits_image_fetch_pixel_convolution (image, x, y, get_pixel); break; + case PIXMAN_FILTER_SEPARABLE_CONVOLUTION: + return bits_image_fetch_pixel_separable_convolution (image, x, y, get_pixel); + break; + default: break; } @@ -618,11 +720,155 @@ bits_image_fetch_general (pixman_iter_t *iter, return buffer; } -static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x); static force_inline void +bits_image_fetch_separable_convolution_affine (pixman_image_t * image, + int offset, + int line, + int width, + uint32_t * buffer, + const uint32_t * mask, + + convert_pixel_t convert_pixel, + pixman_format_code_t format, + pixman_repeat_t repeat_mode) +{ + bits_image_t *bits = &image->bits; + pixman_fixed_t *params = image->common.filter_params; + int cwidth = pixman_fixed_to_int (params[0]); + int cheight = pixman_fixed_to_int (params[1]); + int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1; + int y_off = ((cheight << 16) - pixman_fixed_1) >> 1; + int x_phase_bits = pixman_fixed_to_int (params[2]); + int y_phase_bits = pixman_fixed_to_int (params[3]); + int x_phase_shift = 16 - x_phase_bits; + int y_phase_shift = 16 - y_phase_bits; + pixman_fixed_t vx, vy; + pixman_fixed_t ux, uy; + pixman_vector_t v; + int k; + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (image->common.transform, &v)) + return; + + ux = image->common.transform->matrix[0][0]; + uy = image->common.transform->matrix[1][0]; + + vx = v.vector[0]; + vy = v.vector[1]; + + for (k = 0; k < width; ++k) + { + pixman_fixed_t *y_params; + int satot, srtot, sgtot, sbtot; + pixman_fixed_t x, y; + int32_t x1, x2, y1, y2; + int32_t px, py; + int i, j; + + if (mask && !mask[k]) + goto next; + + /* Round x and y to the middle of the closest phase before continuing. This + * ensures that the convolution matrix is aligned right, since it was + * positioned relative to a particular phase (and not relative to whatever + * exact fraction we happen to get here). + */ + x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1); + y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1); + + px = (x & 0xffff) >> x_phase_shift; + py = (y & 0xffff) >> y_phase_shift; + + x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); + y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); + x2 = x1 + cwidth; + y2 = y1 + cheight; + + satot = srtot = sgtot = sbtot = 0; + + y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight; + + for (i = y1; i < y2; ++i) + { + pixman_fixed_t fy = *y_params++; + + if (fy) + { + pixman_fixed_t *x_params = params + 4 + px * cwidth; + + for (j = x1; j < x2; ++j) + { + pixman_fixed_t fx = *x_params++; + int rx = j; + int ry = i; + + if (fx) + { + pixman_fixed_t f; + uint32_t pixel, mask; + uint8_t *row; + + mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; + + if (repeat_mode != PIXMAN_REPEAT_NONE) + { + repeat (repeat_mode, &rx, bits->width); + repeat (repeat_mode, &ry, bits->height); + + row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry; + pixel = convert_pixel (row, rx) | mask; + } + else + { + if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height) + { + pixel = 0; + } + else + { + row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry; + pixel = convert_pixel (row, rx) | mask; + } + } + + f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16; + srtot += (int)RED_8 (pixel) * f; + sgtot += (int)GREEN_8 (pixel) * f; + sbtot += (int)BLUE_8 (pixel) * f; + satot += (int)ALPHA_8 (pixel) * f; + } + } + } + } + + satot = (satot + 0x8000) >> 16; + srtot = (srtot + 0x8000) >> 16; + sgtot = (sgtot + 0x8000) >> 16; + sbtot = (sbtot + 0x8000) >> 16; + + satot = CLIP (satot, 0, 0xff); + srtot = CLIP (srtot, 0, 0xff); + sgtot = CLIP (sgtot, 0, 0xff); + sbtot = CLIP (sbtot, 0, 0xff); + + buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0); + + next: + vx += ux; + vy += uy; + } +} + +static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + +static force_inline void bits_image_fetch_bilinear_affine (pixman_image_t * image, int offset, int line, @@ -868,9 +1114,26 @@ convert_a8 (const uint8_t *row, int x) static force_inline uint32_t convert_r5g6b5 (const uint8_t *row, int x) { - return CONVERT_0565_TO_0888 (*((uint16_t *)row + x)); + return convert_0565_to_0888 (*((uint16_t *)row + x)); } +#define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode) \ + static uint32_t * \ + bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t *iter, \ + const uint32_t * mask) \ + { \ + bits_image_fetch_separable_convolution_affine ( \ + iter->image, \ + iter->x, iter->y++, \ + iter->width, \ + iter->buffer, mask, \ + convert_ ## format, \ + PIXMAN_ ## format, \ + repeat_mode); \ + \ + return iter->buffer; \ + } + #define MAKE_BILINEAR_FETCHER(name, format, repeat_mode) \ static uint32_t * \ bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t *iter, \ @@ -903,7 +1166,8 @@ convert_r5g6b5 (const uint8_t *row, int x) #define MAKE_FETCHERS(name, format, repeat_mode) \ MAKE_NEAREST_FETCHER (name, format, repeat_mode) \ - MAKE_BILINEAR_FETCHER (name, format, repeat_mode) + MAKE_BILINEAR_FETCHER (name, format, repeat_mode) \ + MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode) MAKE_FETCHERS (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD) MAKE_FETCHERS (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE) @@ -1153,6 +1417,20 @@ static const fetcher_info_t fetcher_info[] = FAST_PATH_AFFINE_TRANSFORM | \ FAST_PATH_NEAREST_FILTER) +#define GENERAL_SEPARABLE_CONVOLUTION_FLAGS \ + (FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_HAS_TRANSFORM | \ + FAST_PATH_AFFINE_TRANSFORM | \ + FAST_PATH_SEPARABLE_CONVOLUTION_FILTER) + +#define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \ + { PIXMAN_ ## format, \ + GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ + bits_image_fetch_separable_convolution_affine_ ## name, \ + _pixman_image_get_scanline_generic_float \ + }, + #define BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ { PIXMAN_ ## format, \ GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \ @@ -1168,6 +1446,7 @@ static const fetcher_info_t fetcher_info[] = }, #define AFFINE_FAST_PATHS(name, format, repeat) \ + SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \ BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \ NEAREST_AFFINE_FAST_PATH(name, format, repeat) diff --git a/lib/pixman/pixman/pixman-combine-float.c b/lib/pixman/pixman/pixman-combine-float.c index c3d54f025..5ea739f76 100644 --- a/lib/pixman/pixman/pixman-combine-float.c +++ b/lib/pixman/pixman/pixman-combine-float.c @@ -201,56 +201,56 @@ get_factor (combine_factor_t factor, float sa, float da) break; case SA_OVER_DA: - if (da == 0.0f) + if (FLOAT_IS_ZERO (da)) f = 1.0f; else f = CLAMP (sa / da); break; case DA_OVER_SA: - if (sa == 0.0f) + if (FLOAT_IS_ZERO (sa)) f = 1.0f; else f = CLAMP (da / sa); break; case INV_SA_OVER_DA: - if (da == 0.0f) + if (FLOAT_IS_ZERO (da)) f = 1.0f; else f = CLAMP ((1.0f - sa) / da); break; case INV_DA_OVER_SA: - if (sa == 0.0f) + if (FLOAT_IS_ZERO (sa)) f = 1.0f; else f = CLAMP ((1.0f - da) / sa); break; case ONE_MINUS_SA_OVER_DA: - if (da == 0.0f) + if (FLOAT_IS_ZERO (da)) f = 0.0f; else f = CLAMP (1.0f - sa / da); break; case ONE_MINUS_DA_OVER_SA: - if (sa == 0.0f) + if (FLOAT_IS_ZERO (sa)) f = 0.0f; else f = CLAMP (1.0f - da / sa); break; case ONE_MINUS_INV_DA_OVER_SA: - if (sa == 0.0f) + if (FLOAT_IS_ZERO (sa)) f = 0.0f; else f = CLAMP (1.0f - (1.0f - da) / sa); break; case ONE_MINUS_INV_SA_OVER_DA: - if (da == 0.0f) + if (FLOAT_IS_ZERO (da)) f = 0.0f; else f = CLAMP (1.0f - (1.0f - sa) / da); @@ -403,11 +403,11 @@ blend_lighten (float sa, float s, float da, float d) static force_inline float blend_color_dodge (float sa, float s, float da, float d) { - if (d == 0.0f) + if (FLOAT_IS_ZERO (d)) return 0.0f; else if (d * sa >= sa * da - s * da) return sa * da; - else if (sa - s == 0.0f) + else if (FLOAT_IS_ZERO (sa - s)) return sa * da; else return sa * sa * d / (sa - s); @@ -420,7 +420,7 @@ blend_color_burn (float sa, float s, float da, float d) return sa * da; else if (sa * (da - d) >= s * da) return 0.0f; - else if (s == 0.0f) + else if (FLOAT_IS_ZERO (s)) return 0.0f; else return sa * (da - sa * (da - d) / s); @@ -440,14 +440,14 @@ blend_soft_light (float sa, float s, float da, float d) { if (2 * s < sa) { - if (da == 0.0f) + if (FLOAT_IS_ZERO (da)) return d * sa; else return d * sa - d * (da - d) * (sa - 2 * s) / da; } else { - if (da == 0.0f) + if (FLOAT_IS_ZERO (da)) { return 0.0f; } @@ -651,10 +651,12 @@ clip_color (rgb_t *color, float a) float l = get_lum (color); float n = channel_min (color); float x = channel_max (color); + float t; if (n < 0.0f) { - if ((l - n) < 4 * FLT_EPSILON) + t = l - n; + if (FLOAT_IS_ZERO (t)) { color->r = 0.0f; color->g = 0.0f; @@ -662,14 +664,15 @@ clip_color (rgb_t *color, float a) } else { - color->r = l + (((color->r - l) * l) / (l - n)); - color->g = l + (((color->g - l) * l) / (l - n)); - color->b = l + (((color->b - l) * l) / (l - n)); + color->r = l + (((color->r - l) * l) / t); + color->g = l + (((color->g - l) * l) / t); + color->b = l + (((color->b - l) * l) / t); } } if (x > a) { - if ((x - l) < 4 * FLT_EPSILON) + t = x - l; + if (FLOAT_IS_ZERO (t)) { color->r = a; color->g = a; @@ -677,9 +680,9 @@ clip_color (rgb_t *color, float a) } else { - color->r = l + (((color->r - l) * (a - l) / (x - l))); - color->g = l + (((color->g - l) * (a - l) / (x - l))); - color->b = l + (((color->b - l) * (a - l) / (x - l))); + color->r = l + (((color->r - l) * (a - l) / t)); + color->g = l + (((color->g - l) * (a - l) / t)); + color->b = l + (((color->b - l) * (a - l) / t)); } } } @@ -700,6 +703,7 @@ static void set_sat (rgb_t *src, float sat) { float *max, *mid, *min; + float t; if (src->r > src->g) { @@ -750,14 +754,16 @@ set_sat (rgb_t *src, float sat) } } - if (*max > *min) + t = *max - *min; + + if (FLOAT_IS_ZERO (t)) { - *mid = (((*mid - *min) * sat) / (*max - *min)); - *max = sat; + *mid = *max = 0.0f; } else { - *mid = *max = 0.0f; + *mid = ((*mid - *min) * sat) / t; + *max = sat; } *min = 0.0f; diff --git a/lib/pixman/pixman/pixman-combine32.c b/lib/pixman/pixman/pixman-combine32.c index 54cc8771b..3ac7576bd 100644 --- a/lib/pixman/pixman/pixman-combine32.c +++ b/lib/pixman/pixman/pixman-combine32.c @@ -196,14 +196,58 @@ combine_over_u (pixman_implementation_t *imp, { int i; - for (i = 0; i < width; ++i) + if (!mask) { - uint32_t s = combine_mask (src, mask, i); - uint32_t d = *(dest + i); - uint32_t ia = ALPHA_8 (~s); - - UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); - *(dest + i) = d; + for (i = 0; i < width; ++i) + { + uint32_t s = *(src + i); + uint32_t a = ALPHA_8 (s); + if (a == 0xFF) + { + *(dest + i) = s; + } + else if (s) + { + uint32_t d = *(dest + i); + uint32_t ia = a ^ 0xFF; + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + *(dest + i) = d; + } + } + } + else + { + for (i = 0; i < width; ++i) + { + uint32_t m = ALPHA_8 (*(mask + i)); + if (m == 0xFF) + { + uint32_t s = *(src + i); + uint32_t a = ALPHA_8 (s); + if (a == 0xFF) + { + *(dest + i) = s; + } + else if (s) + { + uint32_t d = *(dest + i); + uint32_t ia = a ^ 0xFF; + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); + *(dest + i) = d; + } + } + else if (m) + { + uint32_t s = *(src + i); + if (s) + { + uint32_t d = *(dest + i); + UN8x4_MUL_UN8 (s, m); + UN8x4_MUL_UN8_ADD_UN8x4 (d, ALPHA_8 (~s), s); + *(dest + i) = d; + } + } + } } } diff --git a/lib/pixman/pixman/pixman-combine32.h b/lib/pixman/pixman/pixman-combine32.h index 875dde3cf..cdd56a61a 100644 --- a/lib/pixman/pixman/pixman-combine32.h +++ b/lib/pixman/pixman/pixman-combine32.h @@ -20,6 +20,47 @@ #define BLUE_8(x) ((x) & MASK) /* + * ARMv6 has UQADD8 instruction, which implements unsigned saturated + * addition for 8-bit values packed in 32-bit registers. It is very useful + * for UN8x4_ADD_UN8x4, UN8_rb_ADD_UN8_rb and ADD_UN8 macros (which would + * otherwise need a lot of arithmetic operations to simulate this operation). + * Since most of the major ARM linux distros are built for ARMv7, we are + * much less dependent on runtime CPU detection and can get practical + * benefits from conditional compilation here for a lot of users. + */ + +#if defined(USE_GCC_INLINE_ASM) && defined(__arm__) && \ + !defined(__aarch64__) && (!defined(__thumb__) || defined(__thumb2__)) +#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) + +static force_inline uint32_t +un8x4_add_un8x4 (uint32_t x, uint32_t y) +{ + uint32_t t; + asm ("uqadd8 %0, %1, %2" : "=r" (t) : "%r" (x), "r" (y)); + return t; +} + +#define UN8x4_ADD_UN8x4(x, y) \ + ((x) = un8x4_add_un8x4 ((x), (y))) + +#define UN8_rb_ADD_UN8_rb(x, y, t) \ + ((t) = un8x4_add_un8x4 ((x), (y)), (x) = (t)) + +#define ADD_UN8(x, y, t) \ + ((t) = (x), un8x4_add_un8x4 ((t), (y))) + +#endif +#endif + +/*****************************************************************************/ + +/* * Helper macros. */ @@ -29,9 +70,11 @@ #define DIV_UN8(a, b) \ (((uint16_t) (a) * MASK + ((b) / 2)) / (b)) +#ifndef ADD_UN8 #define ADD_UN8(x, y, t) \ ((t) = (x) + (y), \ (uint32_t) (uint8_t) ((t) | (0 - ((t) >> G_SHIFT)))) +#endif #define DIV_ONE_UN8(x) \ (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT) @@ -56,6 +99,7 @@ /* * x_rb = min (x_rb + y_rb, 255) */ +#ifndef UN8_rb_ADD_UN8_rb #define UN8_rb_ADD_UN8_rb(x, y, t) \ do \ { \ @@ -63,6 +107,7 @@ t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ x = (t & RB_MASK); \ } while (0) +#endif /* * x_rb = (x_rb * a_rb) / 255 @@ -208,6 +253,7 @@ /* x_c = min(x_c + y_c, 255) */ +#ifndef UN8x4_ADD_UN8x4 #define UN8x4_ADD_UN8x4(x, y) \ do \ { \ @@ -223,3 +269,4 @@ \ x = r1__ | (r2__ << G_SHIFT); \ } while (0) +#endif diff --git a/lib/pixman/pixman/pixman-compiler.h b/lib/pixman/pixman/pixman-compiler.h index a978accfd..9b190b422 100644 --- a/lib/pixman/pixman/pixman-compiler.h +++ b/lib/pixman/pixman/pixman-compiler.h @@ -19,6 +19,12 @@ #endif #if defined (__GNUC__) +# define unlikely(expr) __builtin_expect ((expr), 0) +#else +# define unlikely(expr) (expr) +#endif + +#if defined (__GNUC__) # define MAYBE_UNUSED __attribute__((unused)) #else # define MAYBE_UNUSED @@ -56,6 +62,10 @@ # define INT64_MAX (9223372036854775807) #endif +#ifndef SIZE_MAX +# define SIZE_MAX ((size_t)-1) +#endif + #ifndef M_PI # define M_PI 3.14159265358979323846 diff --git a/lib/pixman/pixman/pixman-edge.c b/lib/pixman/pixman/pixman-edge.c index 8d498ab44..ad6dfc4cf 100644 --- a/lib/pixman/pixman/pixman-edge.c +++ b/lib/pixman/pixman/pixman-edge.c @@ -374,6 +374,7 @@ pixman_rasterize_edges (pixman_image_t *image, pixman_fixed_t b) { return_if_fail (image->type == BITS); + return_if_fail (PIXMAN_FORMAT_TYPE (image->bits.format) == PIXMAN_TYPE_A); if (image->bits.read_func || image->bits.write_func) pixman_rasterize_edges_accessors (image, l, r, t, b); diff --git a/lib/pixman/pixman/pixman-fast-path.c b/lib/pixman/pixman/pixman-fast-path.c index d95cb4dee..247aea645 100644 --- a/lib/pixman/pixman/pixman-fast-path.c +++ b/lib/pixman/pixman/pixman-fast-path.c @@ -35,7 +35,7 @@ static force_inline uint32_t fetch_24 (uint8_t *a) { - if (((unsigned long)a) & 1) + if (((uintptr_t)a) & 1) { #ifdef WORDS_BIGENDIAN return (*a << 16) | (*(uint16_t *)(a + 1)); @@ -57,7 +57,7 @@ static force_inline void store_24 (uint8_t *a, uint32_t v) { - if (((unsigned long)a) & 1) + if (((uintptr_t)a) & 1) { #ifdef WORDS_BIGENDIAN *a = (uint8_t) (v >> 16); @@ -507,15 +507,15 @@ fast_composite_over_n_8_0565 (pixman_implementation_t *imp, else { d = *dst; - d = over (src, CONVERT_0565_TO_0888 (d)); + d = over (src, convert_0565_to_0888 (d)); } - *dst = CONVERT_8888_TO_0565 (d); + *dst = convert_8888_to_0565 (d); } else if (m) { d = *dst; - d = over (in (src, m), CONVERT_0565_TO_0888 (d)); - *dst = CONVERT_8888_TO_0565 (d); + d = over (in (src, m), convert_0565_to_0888 (d)); + *dst = convert_8888_to_0565 (d); } dst++; } @@ -541,7 +541,7 @@ fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, if (src == 0) return; - src16 = CONVERT_8888_TO_0565 (src); + src16 = convert_8888_to_0565 (src); PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); @@ -566,14 +566,14 @@ fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, else { d = *dst; - d = over (src, CONVERT_0565_TO_0888 (d)); - *dst = CONVERT_8888_TO_0565 (d); + d = over (src, convert_0565_to_0888 (d)); + *dst = convert_8888_to_0565 (d); } } else if (ma) { d = *dst; - d = CONVERT_0565_TO_0888 (d); + d = convert_0565_to_0888 (d); s = src; @@ -582,7 +582,7 @@ fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, ma = ~ma; UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); - *dst = CONVERT_8888_TO_0565 (d); + *dst = convert_8888_to_0565 (d); } dst++; } @@ -729,9 +729,9 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp, else { d = *dst; - d = over (s, CONVERT_0565_TO_0888 (d)); + d = over (s, convert_0565_to_0888 (d)); } - *dst = CONVERT_8888_TO_0565 (d); + *dst = convert_8888_to_0565 (d); } dst++; } @@ -739,36 +739,6 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp, } static void -fast_composite_src_x888_0565 (pixman_implementation_t *imp, - pixman_composite_info_t *info) -{ - PIXMAN_COMPOSITE_ARGS (info); - uint16_t *dst_line, *dst; - uint32_t *src_line, *src, s; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - *dst = CONVERT_8888_TO_0565 (s); - dst++; - } - } -} - -static void fast_composite_add_8_8 (pixman_implementation_t *imp, pixman_composite_info_t *info) { @@ -838,13 +808,13 @@ fast_composite_add_0565_0565 (pixman_implementation_t *imp, if (s) { d = *dst; - s = CONVERT_0565_TO_8888 (s); + s = convert_0565_to_8888 (s); if (d) { - d = CONVERT_0565_TO_8888 (d); + d = convert_0565_to_8888 (d); UN8x4_ADD_UN8x4 (s, d); } - *dst = CONVERT_8888_TO_0565 (s); + *dst = convert_8888_to_0565 (s); } dst++; } @@ -1094,7 +1064,7 @@ fast_composite_over_n_1_0565 (pixman_implementation_t *imp, if (srca == 0xff) { - src565 = CONVERT_8888_TO_0565 (src); + src565 = convert_8888_to_0565 (src); while (height--) { dst = dst_line; @@ -1142,8 +1112,8 @@ fast_composite_over_n_1_0565 (pixman_implementation_t *imp, } if (bitcache & bitmask) { - d = over (src, CONVERT_0565_TO_0888 (*dst)); - *dst = CONVERT_8888_TO_0565 (d); + d = over (src, convert_0565_to_0888 (*dst)); + *dst = convert_8888_to_0565 (d); } bitmask = UPDATE_BITMASK (bitmask); dst++; @@ -1176,7 +1146,7 @@ fast_composite_solid_fill (pixman_implementation_t *imp, else if (dest_image->bits.format == PIXMAN_r5g6b5 || dest_image->bits.format == PIXMAN_b5g6r5) { - src = CONVERT_8888_TO_0565 (src); + src = convert_8888_to_0565 (src); } pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, @@ -1243,6 +1213,18 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp, pixman_composite_func_t func; pixman_format_code_t mask_format; uint32_t src_flags, mask_flags; + int32_t sx, sy; + int32_t width_remain; + int32_t num_pixels; + int32_t src_width; + int32_t i, j; + pixman_image_t extended_src_image; + uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; + pixman_bool_t need_src_extension; + uint32_t *src_line; + int32_t src_stride; + int32_t src_bpp; + pixman_composite_info_t info2 = *info; src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; @@ -1258,149 +1240,131 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp, mask_flags = FAST_PATH_IS_OPAQUE; } - if (_pixman_implementation_lookup_composite ( - imp->toplevel, info->op, - src_image->common.extended_format_code, src_flags, - mask_format, mask_flags, - dest_image->common.extended_format_code, info->dest_flags, - &imp, &func)) + _pixman_implementation_lookup_composite ( + imp->toplevel, info->op, + src_image->common.extended_format_code, src_flags, + mask_format, mask_flags, + dest_image->common.extended_format_code, info->dest_flags, + &imp, &func); + + src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); + + if (src_image->bits.width < REPEAT_MIN_WIDTH && + (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) && + !src_image->bits.indexed) { - int32_t sx, sy; - int32_t width_remain; - int32_t num_pixels; - int32_t src_width; - int32_t i, j; - pixman_image_t extended_src_image; - uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; - pixman_bool_t need_src_extension; - uint32_t *src_line; - int32_t src_stride; - int32_t src_bpp; - pixman_composite_info_t info2 = *info; - - src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); - - if (src_image->bits.width < REPEAT_MIN_WIDTH && - (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) && - !src_image->bits.indexed) - { - sx = src_x; - sx = MOD (sx, src_image->bits.width); - sx += width; - src_width = 0; + sx = src_x; + sx = MOD (sx, src_image->bits.width); + sx += width; + src_width = 0; - while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) - src_width += src_image->bits.width; + while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) + src_width += src_image->bits.width; - src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); + src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); - /* Initialize/validate stack-allocated temporary image */ - _pixman_bits_image_init (&extended_src_image, src_image->bits.format, - src_width, 1, &extended_src[0], src_stride, - FALSE); - _pixman_image_validate (&extended_src_image); + /* Initialize/validate stack-allocated temporary image */ + _pixman_bits_image_init (&extended_src_image, src_image->bits.format, + src_width, 1, &extended_src[0], src_stride, + FALSE); + _pixman_image_validate (&extended_src_image); - info2.src_image = &extended_src_image; - need_src_extension = TRUE; - } - else - { - src_width = src_image->bits.width; - need_src_extension = FALSE; - } + info2.src_image = &extended_src_image; + need_src_extension = TRUE; + } + else + { + src_width = src_image->bits.width; + need_src_extension = FALSE; + } - sx = src_x; - sy = src_y; + sx = src_x; + sy = src_y; - while (--height >= 0) - { - sx = MOD (sx, src_width); - sy = MOD (sy, src_image->bits.height); + while (--height >= 0) + { + sx = MOD (sx, src_width); + sy = MOD (sy, src_image->bits.height); - if (need_src_extension) + if (need_src_extension) + { + if (src_bpp == 32) { - if (src_bpp == 32) - { - PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); - for (i = 0; i < src_width; ) - { - for (j = 0; j < src_image->bits.width; j++, i++) - extended_src[i] = src_line[j]; - } - } - else if (src_bpp == 16) + for (i = 0; i < src_width; ) { - uint16_t *src_line_16; - - PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, - src_line_16, 1); - src_line = (uint32_t*)src_line_16; - - for (i = 0; i < src_width; ) - { - for (j = 0; j < src_image->bits.width; j++, i++) - ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; - } + for (j = 0; j < src_image->bits.width; j++, i++) + extended_src[i] = src_line[j]; } - else if (src_bpp == 8) - { - uint8_t *src_line_8; + } + else if (src_bpp == 16) + { + uint16_t *src_line_16; - PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, - src_line_8, 1); - src_line = (uint32_t*)src_line_8; + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, + src_line_16, 1); + src_line = (uint32_t*)src_line_16; - for (i = 0; i < src_width; ) - { - for (j = 0; j < src_image->bits.width; j++, i++) - ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; - } + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; } - - info2.src_y = 0; } - else + else if (src_bpp == 8) { - info2.src_y = sy; + uint8_t *src_line_8; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, + src_line_8, 1); + src_line = (uint32_t*)src_line_8; + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; + } } - width_remain = width; + info2.src_y = 0; + } + else + { + info2.src_y = sy; + } - while (width_remain > 0) - { - num_pixels = src_width - sx; + width_remain = width; - if (num_pixels > width_remain) - num_pixels = width_remain; + while (width_remain > 0) + { + num_pixels = src_width - sx; - info2.src_x = sx; - info2.width = num_pixels; - info2.height = 1; + if (num_pixels > width_remain) + num_pixels = width_remain; - func (imp, &info2); + info2.src_x = sx; + info2.width = num_pixels; + info2.height = 1; - width_remain -= num_pixels; - info2.mask_x += num_pixels; - info2.dest_x += num_pixels; - sx = 0; - } + func (imp, &info2); - sx = src_x; - sy++; - info2.mask_x = info->mask_x; - info2.mask_y++; - info2.dest_x = info->dest_x; - info2.dest_y++; + width_remain -= num_pixels; + info2.mask_x += num_pixels; + info2.dest_x += num_pixels; + sx = 0; } - if (need_src_extension) - _pixman_image_fini (&extended_src_image); - } - else - { - _pixman_log_error (FUNC, "Didn't find a suitable function "); + sx = src_x; + sy++; + info2.mask_x = info->mask_x; + info2.mask_y++; + info2.dest_x = info->dest_x; + info2.dest_y++; } + + if (need_src_extension) + _pixman_image_fini (&extended_src_image); } /* Use more unrolling for src_0565_0565 because it is typically CPU bound */ @@ -1913,10 +1877,6 @@ static const pixman_fast_path_t c_fast_paths[] = PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8), PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8), @@ -2067,12 +2027,12 @@ pixman_fill1 (uint32_t *bits, int y, int width, int height, - uint32_t xor) + uint32_t filler) { uint32_t *dst = bits + y * stride + (x >> 5); int offs = x & 31; - if (xor & 1) + if (filler & 1) { while (height--) { @@ -2097,11 +2057,11 @@ pixman_fill8 (uint32_t *bits, int y, int width, int height, - uint32_t xor) + uint32_t filler) { int byte_stride = stride * (int) sizeof (uint32_t); uint8_t *dst = (uint8_t *) bits; - uint8_t v = xor & 0xff; + uint8_t v = filler & 0xff; int i; dst = dst + y * byte_stride + x; @@ -2122,12 +2082,12 @@ pixman_fill16 (uint32_t *bits, int y, int width, int height, - uint32_t xor) + uint32_t filler) { int short_stride = (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t); uint16_t *dst = (uint16_t *)bits; - uint16_t v = xor & 0xffff; + uint16_t v = filler & 0xffff; int i; dst = dst + y * short_stride + x; @@ -2148,7 +2108,7 @@ pixman_fill32 (uint32_t *bits, int y, int width, int height, - uint32_t xor) + uint32_t filler) { int i; @@ -2157,7 +2117,7 @@ pixman_fill32 (uint32_t *bits, while (height--) { for (i = 0; i < width; ++i) - bits[i] = xor; + bits[i] = filler; bits += stride; } @@ -2172,24 +2132,24 @@ fast_path_fill (pixman_implementation_t *imp, int y, int width, int height, - uint32_t xor) + uint32_t filler) { switch (bpp) { case 1: - pixman_fill1 (bits, stride, x, y, width, height, xor); + pixman_fill1 (bits, stride, x, y, width, height, filler); break; case 8: - pixman_fill8 (bits, stride, x, y, width, height, xor); + pixman_fill8 (bits, stride, x, y, width, height, filler); break; case 16: - pixman_fill16 (bits, stride, x, y, width, height, xor); + pixman_fill16 (bits, stride, x, y, width, height, filler); break; case 32: - pixman_fill32 (bits, stride, x, y, width, height, xor); + pixman_fill32 (bits, stride, x, y, width, height, filler); break; default: @@ -2199,12 +2159,200 @@ fast_path_fill (pixman_implementation_t *imp, return TRUE; } +/*****************************************************************************/ + +static uint32_t * +fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) +{ + int32_t w = iter->width; + uint32_t *dst = iter->buffer; + const uint16_t *src = (const uint16_t *)iter->bits; + + iter->bits += iter->stride; + + /* Align the source buffer at 4 bytes boundary */ + if (w > 0 && ((uintptr_t)src & 3)) + { + *dst++ = convert_0565_to_8888 (*src++); + w--; + } + /* Process two pixels per iteration */ + while ((w -= 2) >= 0) + { + uint32_t sr, sb, sg, t0, t1; + uint32_t s = *(const uint32_t *)src; + src += 2; + sr = (s >> 8) & 0x00F800F8; + sb = (s << 3) & 0x00F800F8; + sg = (s >> 3) & 0x00FC00FC; + sr |= sr >> 5; + sb |= sb >> 5; + sg |= sg >> 6; + t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) | + (sb & 0xFF) | 0xFF000000; + t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) | + (sb >> 16) | 0xFF000000; +#ifdef WORDS_BIGENDIAN + *dst++ = t1; + *dst++ = t0; +#else + *dst++ = t0; + *dst++ = t1; +#endif + } + if (w & 1) + { + *dst = convert_0565_to_8888 (*src); + } + + return iter->buffer; +} + +static uint32_t * +fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask) +{ + iter->bits += iter->stride; + return iter->buffer; +} + +/* Helper function for a workaround, which tries to ensure that 0x1F001F + * constant is always allocated in a register on RISC architectures. + */ +static force_inline uint32_t +convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F) +{ + uint32_t a, b; + a = (s >> 3) & x1F001F; + b = s & 0xFC00; + a |= a >> 5; + a |= b >> 5; + return a; +} + +static void +fast_write_back_r5g6b5 (pixman_iter_t *iter) +{ + int32_t w = iter->width; + uint16_t *dst = (uint16_t *)(iter->bits - iter->stride); + const uint32_t *src = iter->buffer; + /* Workaround to ensure that x1F001F variable is allocated in a register */ + static volatile uint32_t volatile_x1F001F = 0x1F001F; + uint32_t x1F001F = volatile_x1F001F; + + while ((w -= 4) >= 0) + { + uint32_t s1 = *src++; + uint32_t s2 = *src++; + uint32_t s3 = *src++; + uint32_t s4 = *src++; + *dst++ = convert_8888_to_0565_workaround (s1, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s2, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s3, x1F001F); + *dst++ = convert_8888_to_0565_workaround (s4, x1F001F); + } + if (w & 2) + { + *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); + *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); + } + if (w & 1) + { + *dst = convert_8888_to_0565_workaround (*src, x1F001F); + } +} + +typedef struct +{ + pixman_format_code_t format; + pixman_iter_get_scanline_t get_scanline; + pixman_iter_write_back_t write_back; +} fetcher_info_t; + +static const fetcher_info_t fetchers[] = +{ + { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, + { PIXMAN_null } +}; + +static pixman_bool_t +fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ + pixman_image_t *image = iter->image; + +#define FLAGS \ + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + + if ((iter->iter_flags & ITER_NARROW) && + (iter->image_flags & FLAGS) == FLAGS) + { + const fetcher_info_t *f; + + for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + { + if (image->common.extended_format_code == f->format) + { + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; + iter->stride = s; + + iter->get_scanline = f->get_scanline; + return TRUE; + } + } + } + + return FALSE; +} + +static pixman_bool_t +fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ + pixman_image_t *image = iter->image; + + if ((iter->iter_flags & ITER_NARROW) && + (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS) + { + const fetcher_info_t *f; + + for (f = &fetchers[0]; f->format != PIXMAN_null; f++) + { + if (image->common.extended_format_code == f->format) + { + uint8_t *b = (uint8_t *)image->bits.bits; + int s = image->bits.rowstride * 4; + + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; + iter->stride = s; + + if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == + (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) + { + iter->get_scanline = fast_dest_fetch_noop; + } + else + { + iter->get_scanline = f->get_scanline; + } + iter->write_back = f->write_back; + return TRUE; + } + } + } + return FALSE; +} + + pixman_implementation_t * _pixman_implementation_create_fast_path (pixman_implementation_t *fallback) { pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); imp->fill = fast_path_fill; + imp->src_iter_init = fast_src_iter_init; + imp->dest_iter_init = fast_dest_iter_init; return imp; } diff --git a/lib/pixman/pixman/pixman-filter.c b/lib/pixman/pixman/pixman-filter.c new file mode 100644 index 000000000..26b39d571 --- /dev/null +++ b/lib/pixman/pixman/pixman-filter.c @@ -0,0 +1,348 @@ +/* + * Copyright 2012, Red Hat, Inc. + * Copyright 2012, Soren Sandmann + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Soren Sandmann <soren.sandmann@gmail.com> + */ +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <assert.h> +#include <config.h> +#include "pixman-private.h" + +typedef double (* kernel_func_t) (double x); + +typedef struct +{ + pixman_kernel_t kernel; + kernel_func_t func; + double width; +} filter_info_t; + +static double +impulse_kernel (double x) +{ + return (x == 0.0)? 1.0 : 0.0; +} + +static double +box_kernel (double x) +{ + return 1; +} + +static double +linear_kernel (double x) +{ + return 1 - fabs (x); +} + +static double +gaussian_kernel (double x) +{ +#define SQRT2 (1.4142135623730950488016887242096980785696718753769480) +#define SIGMA (SQRT2 / 2.0) + + return exp (- x * x / (2 * SIGMA * SIGMA)) / (SIGMA * sqrt (2.0 * M_PI)); +} + +static double +sinc (double x) +{ + if (x == 0.0) + return 1.0; + else + return sin (M_PI * x) / (M_PI * x); +} + +static double +lanczos (double x, int n) +{ + return sinc (x) * sinc (x * (1.0 / n)); +} + +static double +lanczos2_kernel (double x) +{ + return lanczos (x, 2); +} + +static double +lanczos3_kernel (double x) +{ + return lanczos (x, 3); +} + +static double +nice_kernel (double x) +{ + return lanczos3_kernel (x * 0.75); +} + +static double +general_cubic (double x, double B, double C) +{ + double ax = fabs(x); + + if (ax < 1) + { + return ((12 - 9 * B - 6 * C) * ax * ax * ax + + (-18 + 12 * B + 6 * C) * ax * ax + (6 - 2 * B)) / 6; + } + else if (ax >= 1 && ax < 2) + { + return ((-B - 6 * C) * ax * ax * ax + + (6 * B + 30 * C) * ax * ax + (-12 * B - 48 * C) * + ax + (8 * B + 24 * C)) / 6; + } + else + { + return 0; + } +} + +static double +cubic_kernel (double x) +{ + /* This is the Mitchell-Netravali filter. + * + * (0.0, 0.5) would give us the Catmull-Rom spline, + * but that one seems to be indistinguishable from Lanczos2. + */ + return general_cubic (x, 1/3.0, 1/3.0); +} + +static const filter_info_t filters[] = +{ + { PIXMAN_KERNEL_IMPULSE, impulse_kernel, 0.0 }, + { PIXMAN_KERNEL_BOX, box_kernel, 1.0 }, + { PIXMAN_KERNEL_LINEAR, linear_kernel, 2.0 }, + { PIXMAN_KERNEL_CUBIC, cubic_kernel, 4.0 }, + { PIXMAN_KERNEL_GAUSSIAN, gaussian_kernel, 6 * SIGMA }, + { PIXMAN_KERNEL_LANCZOS2, lanczos2_kernel, 4.0 }, + { PIXMAN_KERNEL_LANCZOS3, lanczos3_kernel, 6.0 }, + { PIXMAN_KERNEL_LANCZOS3_STRETCHED, nice_kernel, 8.0 }, +}; + +/* This function scales @kernel2 by @scale, then + * aligns @x1 in @kernel1 with @x2 in @kernel2 and + * and integrates the product of the kernels across @width. + * + * This function assumes that the intervals are within + * the kernels in question. E.g., the caller must not + * try to integrate a linear kernel ouside of [-1:1] + */ +static double +integral (pixman_kernel_t kernel1, double x1, + pixman_kernel_t kernel2, double scale, double x2, + double width) +{ + /* If the integration interval crosses zero, break it into + * two separate integrals. This ensures that filters such + * as LINEAR that are not differentiable at 0 will still + * integrate properly. + */ + if (x1 < 0 && x1 + width > 0) + { + return + integral (kernel1, x1, kernel2, scale, x2, - x1) + + integral (kernel1, 0, kernel2, scale, x2 - x1, width + x1); + } + else if (x2 < 0 && x2 + width > 0) + { + return + integral (kernel1, x1, kernel2, scale, x2, - x2) + + integral (kernel1, x1 - x2, kernel2, scale, 0, width + x2); + } + else if (kernel1 == PIXMAN_KERNEL_IMPULSE) + { + assert (width == 0.0); + return filters[kernel2].func (x2 * scale); + } + else if (kernel2 == PIXMAN_KERNEL_IMPULSE) + { + assert (width == 0.0); + return filters[kernel1].func (x1); + } + else + { + /* Integration via Simpson's rule */ +#define N_SEGMENTS 128 +#define SAMPLE(a1, a2) \ + (filters[kernel1].func ((a1)) * filters[kernel2].func ((a2) * scale)) + + double s = 0.0; + double h = width / (double)N_SEGMENTS; + int i; + + s = SAMPLE (x1, x2); + + for (i = 1; i < N_SEGMENTS; i += 2) + { + double a1 = x1 + h * i; + double a2 = x2 + h * i; + + s += 2 * SAMPLE (a1, a2); + + if (i >= 2 && i < N_SEGMENTS - 1) + s += 4 * SAMPLE (a1, a2); + } + + s += SAMPLE (x1 + width, x2 + width); + + return h * s * (1.0 / 3.0); + } +} + +static pixman_fixed_t * +create_1d_filter (int *width, + pixman_kernel_t reconstruct, + pixman_kernel_t sample, + double scale, + int n_phases) +{ + pixman_fixed_t *params, *p; + double step; + double size; + int i; + + size = scale * filters[sample].width + filters[reconstruct].width; + *width = ceil (size); + + p = params = malloc (*width * n_phases * sizeof (pixman_fixed_t)); + if (!params) + return NULL; + + step = 1.0 / n_phases; + + for (i = 0; i < n_phases; ++i) + { + double frac = step / 2.0 + i * step; + pixman_fixed_t new_total; + int x, x1, x2; + double total; + + /* Sample convolution of reconstruction and sampling + * filter. See rounding.txt regarding the rounding + * and sample positions. + */ + + x1 = ceil (frac - *width / 2.0 - 0.5); + x2 = x1 + *width; + + total = 0; + for (x = x1; x < x2; ++x) + { + double pos = x + 0.5 - frac; + double rlow = - filters[reconstruct].width / 2.0; + double rhigh = rlow + filters[reconstruct].width; + double slow = pos - scale * filters[sample].width / 2.0; + double shigh = slow + scale * filters[sample].width; + double c = 0.0; + double ilow, ihigh; + + if (rhigh >= slow && rlow <= shigh) + { + ilow = MAX (slow, rlow); + ihigh = MIN (shigh, rhigh); + + c = integral (reconstruct, ilow, + sample, 1.0 / scale, ilow - pos, + ihigh - ilow); + } + + total += c; + *p++ = (pixman_fixed_t)(c * 65535.0 + 0.5); + } + + /* Normalize */ + p -= *width; + total = 1 / total; + new_total = 0; + for (x = x1; x < x2; ++x) + { + pixman_fixed_t t = (*p) * total + 0.5; + + new_total += t; + *p++ = t; + } + + if (new_total != pixman_fixed_1) + *(p - *width / 2) += (pixman_fixed_1 - new_total); + } + + return params; +} + +/* Create the parameter list for a SEPARABLE_CONVOLUTION filter + * with the given kernels and scale parameters + */ +PIXMAN_EXPORT pixman_fixed_t * +pixman_filter_create_separable_convolution (int *n_values, + pixman_fixed_t scale_x, + pixman_fixed_t scale_y, + pixman_kernel_t reconstruct_x, + pixman_kernel_t reconstruct_y, + pixman_kernel_t sample_x, + pixman_kernel_t sample_y, + int subsample_bits_x, + int subsample_bits_y) +{ + double sx = fabs (pixman_fixed_to_double (scale_x)); + double sy = fabs (pixman_fixed_to_double (scale_y)); + pixman_fixed_t *horz = NULL, *vert = NULL, *params = NULL; + int subsample_x, subsample_y; + int width, height; + + subsample_x = (1 << subsample_bits_x); + subsample_y = (1 << subsample_bits_y); + + horz = create_1d_filter (&width, reconstruct_x, sample_x, sx, subsample_x); + vert = create_1d_filter (&height, reconstruct_y, sample_y, sy, subsample_y); + + if (!horz || !vert) + goto out; + + *n_values = 4 + width * subsample_x + height * subsample_y; + + params = malloc (*n_values * sizeof (pixman_fixed_t)); + if (!params) + goto out; + + params[0] = pixman_int_to_fixed (width); + params[1] = pixman_int_to_fixed (height); + params[2] = pixman_int_to_fixed (subsample_bits_x); + params[3] = pixman_int_to_fixed (subsample_bits_y); + + memcpy (params + 4, horz, + width * subsample_x * sizeof (pixman_fixed_t)); + memcpy (params + 4 + width * subsample_x, vert, + height * subsample_y * sizeof (pixman_fixed_t)); + +out: + free (horz); + free (vert); + + return params; +} diff --git a/lib/pixman/pixman/pixman-general.c b/lib/pixman/pixman/pixman-general.c index 0bf91e444..93a1b9acf 100644 --- a/lib/pixman/pixman/pixman-general.c +++ b/lib/pixman/pixman/pixman-general.c @@ -42,9 +42,7 @@ general_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) { pixman_image_t *image = iter->image; - if (image->type == SOLID) - _pixman_solid_fill_iter_init (image, iter); - else if (image->type == LINEAR) + if (image->type == LINEAR) _pixman_linear_gradient_iter_init (image, iter); else if (image->type == RADIAL) _pixman_radial_gradient_iter_init (image, iter); @@ -52,7 +50,9 @@ general_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) _pixman_conical_gradient_iter_init (image, iter); else if (image->type == BITS) _pixman_bits_image_src_iter_init (image, iter); - else + else if (image->type == SOLID) + _pixman_log_error (FUNC, "Solid image not handled by noop"); + else _pixman_log_error (FUNC, "Pixman bug: unknown image type\n"); return TRUE; @@ -188,9 +188,6 @@ general_composite_rect (pixman_implementation_t *imp, compose = _pixman_implementation_lookup_combiner ( imp->toplevel, op, component_alpha, narrow); - if (!compose) - return; - for (i = 0; i < height; ++i) { uint32_t *s, *m, *d; diff --git a/lib/pixman/pixman/pixman-glyph.c b/lib/pixman/pixman/pixman-glyph.c index 15b3f1fea..5a271b64b 100644 --- a/lib/pixman/pixman/pixman-glyph.c +++ b/lib/pixman/pixman/pixman-glyph.c @@ -463,16 +463,13 @@ pixman_composite_glyphs_no_mask (pixman_op_t op, { glyph_format = glyph_img->common.extended_format_code; glyph_flags = glyph_img->common.flags; - + _pixman_implementation_lookup_composite ( get_implementation(), op, src->common.extended_format_code, src->common.flags, glyph_format, glyph_flags | extra, dest_format, dest_flags, &implementation, &func); - - if (!func) - goto out; } info.src_x = src_x + composite_box.x1 - dest_x; @@ -508,7 +505,7 @@ add_glyphs (pixman_glyph_cache_t *cache, uint32_t glyph_flags = 0; pixman_composite_func_t func = NULL; pixman_implementation_t *implementation = NULL; - uint32_t dest_format; + pixman_format_code_t dest_format; uint32_t dest_flags; pixman_box32_t dest_box; pixman_composite_info_t info; @@ -582,9 +579,6 @@ add_glyphs (pixman_glyph_cache_t *cache, mask_format, info.mask_flags, dest_format, dest_flags, &implementation, &func); - - if (!func) - goto out; } glyph_box.x1 = glyphs[i].x - glyph->origin_x + off_x; diff --git a/lib/pixman/pixman/pixman-gradient-walker.c b/lib/pixman/pixman/pixman-gradient-walker.c index e7e724fa6..5944a559a 100644 --- a/lib/pixman/pixman/pixman-gradient-walker.c +++ b/lib/pixman/pixman/pixman-gradient-walker.c @@ -37,11 +37,14 @@ _pixman_gradient_walker_init (pixman_gradient_walker_t *walker, walker->stops = gradient->stops; walker->left_x = 0; walker->right_x = 0x10000; - walker->stepper = 0; - walker->left_ag = 0; - walker->left_rb = 0; - walker->right_ag = 0; - walker->right_rb = 0; + walker->a_s = 0.0f; + walker->a_b = 0.0f; + walker->r_s = 0.0f; + walker->r_b = 0.0f; + walker->g_s = 0.0f; + walker->g_b = 0.0f; + walker->b_s = 0.0f; + walker->b_b = 0.0f; walker->repeat = repeat; walker->need_reset = TRUE; @@ -55,6 +58,9 @@ gradient_walker_reset (pixman_gradient_walker_t *walker, pixman_color_t *left_c, *right_c; int n, count = walker->num_stops; pixman_gradient_stop_t *stops = walker->stops; + float la, lr, lg, lb; + float ra, rr, rg, rb; + float lx, rx; if (walker->repeat == PIXMAN_REPEAT_NORMAL) { @@ -116,24 +122,49 @@ gradient_walker_reset (pixman_gradient_walker_t *walker, left_c = right_c; } - walker->left_x = left_x; - walker->right_x = right_x; - walker->left_ag = ((left_c->alpha >> 8) << 16) | (left_c->green >> 8); - walker->left_rb = ((left_c->red & 0xff00) << 8) | (left_c->blue >> 8); - walker->right_ag = ((right_c->alpha >> 8) << 16) | (right_c->green >> 8); - walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8); - - if (walker->left_x == walker->right_x || - (walker->left_ag == walker->right_ag && - walker->left_rb == walker->right_rb)) + /* The alpha channel is scaled to be in the [0, 255] interval, + * and the red/green/blue channels are scaled to be in [0, 1]. + * This ensures that after premultiplication all channels will + * be in the [0, 255] interval. + */ + la = (left_c->alpha * (1.0f/257.0f)); + lr = (left_c->red * (1.0f/257.0f)); + lg = (left_c->green * (1.0f/257.0f)); + lb = (left_c->blue * (1.0f/257.0f)); + + ra = (right_c->alpha * (1.0f/257.0f)); + rr = (right_c->red * (1.0f/257.0f)); + rg = (right_c->green * (1.0f/257.0f)); + rb = (right_c->blue * (1.0f/257.0f)); + + lx = left_x * (1.0f/65536.0f); + rx = right_x * (1.0f/65536.0f); + + if (FLOAT_IS_ZERO (rx - lx) || left_x == INT32_MIN || right_x == INT32_MAX) { - walker->stepper = 0; + walker->a_s = walker->r_s = walker->g_s = walker->b_s = 0.0f; + walker->a_b = (la + ra) / 2.0f; + walker->r_b = (lr + rr) / 510.0f; + walker->g_b = (lg + rg) / 510.0f; + walker->b_b = (lb + rb) / 510.0f; } else { - int32_t width = right_x - left_x; - walker->stepper = ((1 << 24) + width / 2) / width; + float w_rec = 1.0f / (rx - lx); + + walker->a_b = (la * rx - ra * lx) * w_rec; + walker->r_b = (lr * rx - rr * lx) * w_rec * (1.0f/255.0f); + walker->g_b = (lg * rx - rg * lx) * w_rec * (1.0f/255.0f); + walker->b_b = (lb * rx - rb * lx) * w_rec * (1.0f/255.0f); + + walker->a_s = (ra - la) * w_rec; + walker->r_s = (rr - lr) * w_rec * (1.0f/255.0f); + walker->g_s = (rg - lg) * w_rec * (1.0f/255.0f); + walker->b_s = (rb - lb) * w_rec * (1.0f/255.0f); } + + walker->left_x = left_x; + walker->right_x = right_x; walker->need_reset = FALSE; } @@ -142,31 +173,30 @@ uint32_t _pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker, pixman_fixed_48_16_t x) { - int dist, idist; - uint32_t t1, t2, a, color; + float a, r, g, b; + uint8_t a8, r8, g8, b8; + uint32_t v; + float y; if (walker->need_reset || x < walker->left_x || x >= walker->right_x) - gradient_walker_reset (walker, x); - - dist = ((int)(x - walker->left_x) * walker->stepper) >> 16; - idist = 256 - dist; + gradient_walker_reset (walker, x); - /* combined INTERPOLATE and premultiply */ - t1 = walker->left_rb * idist + walker->right_rb * dist; - t1 = (t1 >> 8) & 0xff00ff; + y = x * (1.0f / 65536.0f); - t2 = walker->left_ag * idist + walker->right_ag * dist; - t2 &= 0xff00ff00; + a = walker->a_s * y + walker->a_b; + r = a * (walker->r_s * y + walker->r_b); + g = a * (walker->g_s * y + walker->g_b); + b = a * (walker->b_s * y + walker->b_b); - color = t2 & 0xff000000; - a = t2 >> 24; + a8 = a + 0.5f; + r8 = r + 0.5f; + g8 = g + 0.5f; + b8 = b + 0.5f; - t1 = t1 * a + 0x800080; - t1 = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8; + v = ((a8 << 24) & 0xff000000) | + ((r8 << 16) & 0x00ff0000) | + ((g8 << 8) & 0x0000ff00) | + ((b8 >> 0) & 0x000000ff); - t2 = (t2 >> 8) * a + 0x800080; - t2 = (t2 + ((t2 >> 8) & 0xff00ff)); - - return (color | (t1 & 0xff00ff) | (t2 & 0xff00)); + return v; } - diff --git a/lib/pixman/pixman/pixman-image.c b/lib/pixman/pixman/pixman-image.c index d9c303441..65041b43b 100644 --- a/lib/pixman/pixman/pixman-image.c +++ b/lib/pixman/pixman/pixman-image.c @@ -373,6 +373,10 @@ compute_image_info (pixman_image_t *image) case PIXMAN_FILTER_CONVOLUTION: break; + case PIXMAN_FILTER_SEPARABLE_CONVOLUTION: + flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER; + break; + default: flags |= FAST_PATH_NO_CONVOLUTION_FILTER; break; @@ -515,8 +519,9 @@ compute_image_info (pixman_image_t *image) * if all channels are opaque, so we simply turn it off * unconditionally for those images. */ - if (image->common.alpha_map || - image->common.filter == PIXMAN_FILTER_CONVOLUTION || + if (image->common.alpha_map || + image->common.filter == PIXMAN_FILTER_CONVOLUTION || + image->common.filter == PIXMAN_FILTER_SEPARABLE_CONVOLUTION || image->common.component_alpha) { flags &= ~(FAST_PATH_IS_OPAQUE | FAST_PATH_SAMPLES_OPAQUE); @@ -679,6 +684,19 @@ pixman_image_set_filter (pixman_image_t * image, if (params == common->filter_params && filter == common->filter) return TRUE; + if (filter == PIXMAN_FILTER_SEPARABLE_CONVOLUTION) + { + int width = pixman_fixed_to_int (params[0]); + int height = pixman_fixed_to_int (params[1]); + int x_phase_bits = pixman_fixed_to_int (params[2]); + int y_phase_bits = pixman_fixed_to_int (params[3]); + int n_x_phases = (1 << x_phase_bits); + int n_y_phases = (1 << y_phase_bits); + + return_val_if_fail ( + n_params == 4 + n_x_phases * width + n_y_phases * height, FALSE); + } + new_params = NULL; if (params) { @@ -870,7 +888,7 @@ pixman_image_get_format (pixman_image_t *image) if (image->type == BITS) return image->bits.format; - return 0; + return PIXMAN_null; } uint32_t diff --git a/lib/pixman/pixman/pixman-implementation.c b/lib/pixman/pixman/pixman-implementation.c index a70892c75..cfb82bb1f 100644 --- a/lib/pixman/pixman/pixman-implementation.c +++ b/lib/pixman/pixman/pixman-implementation.c @@ -65,7 +65,13 @@ typedef struct PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache); -pixman_bool_t +static void +dummy_composite_rect (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ +} + +void _pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, pixman_op_t op, pixman_format_code_t src_format, @@ -142,7 +148,18 @@ _pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, ++info; } } - return FALSE; + + /* We should never reach this point */ + _pixman_log_error ( + FUNC, + "No composite function found\n" + "\n" + "The most likely cause of this is that this system has issues with\n" + "thread local storage\n"); + + *out_imp = NULL; + *out_func = dummy_composite_rect; + return; update_cache: if (i) @@ -160,8 +177,16 @@ update_cache: cache->cache[0].fast_path.dest_flags = dest_flags; cache->cache[0].fast_path.func = *out_func; } +} - return TRUE; +static void +dummy_combine (pixman_implementation_t *imp, + pixman_op_t op, + uint32_t * pd, + const uint32_t * ps, + const uint32_t * pm, + int w) +{ } pixman_combine_32_func_t @@ -199,7 +224,9 @@ _pixman_implementation_lookup_combiner (pixman_implementation_t *imp, imp = imp->fallback; } - return NULL; + /* We should never reach this point */ + _pixman_log_error (FUNC, "No known combine function\n"); + return dummy_combine; } pixman_bool_t @@ -242,12 +269,12 @@ _pixman_implementation_fill (pixman_implementation_t *imp, int y, int width, int height, - uint32_t xor) + uint32_t filler) { while (imp) { if (imp->fill && - ((*imp->fill) (imp, bits, stride, bpp, x, y, width, height, xor))) + ((*imp->fill) (imp, bits, stride, bpp, x, y, width, height, filler))) { return TRUE; } diff --git a/lib/pixman/pixman/pixman-inlines.h b/lib/pixman/pixman/pixman-inlines.h index 7f2e4047e..dd1c2f17f 100644 --- a/lib/pixman/pixman/pixman-inlines.h +++ b/lib/pixman/pixman/pixman-inlines.h @@ -88,6 +88,42 @@ pixman_fixed_to_bilinear_weight (pixman_fixed_t x) ((1 << BILINEAR_INTERPOLATION_BITS) - 1); } +#if BILINEAR_INTERPOLATION_BITS <= 4 +/* Inspired by Filter_32_opaque from Skia */ +static force_inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + int distxy, distxiy, distixy, distixiy; + uint32_t lo, hi; + + distx <<= (4 - BILINEAR_INTERPOLATION_BITS); + disty <<= (4 - BILINEAR_INTERPOLATION_BITS); + + distxy = distx * disty; + distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */ + distixy = (disty << 4) - distxy; /* disty * (16 - distx) */ + distixiy = + 16 * 16 - (disty << 4) - + (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */ + + lo = (tl & 0xff00ff) * distixiy; + hi = ((tl >> 8) & 0xff00ff) * distixiy; + + lo += (tr & 0xff00ff) * distxiy; + hi += ((tr >> 8) & 0xff00ff) * distxiy; + + lo += (bl & 0xff00ff) * distixy; + hi += ((bl >> 8) & 0xff00ff) * distixy; + + lo += (br & 0xff00ff) * distxy; + hi += ((br >> 8) & 0xff00ff) * distxy; + + return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff); +} + +#else #if SIZEOF_LONG > 4 static force_inline uint32_t @@ -184,6 +220,7 @@ bilinear_interpolation (uint32_t tl, uint32_t tr, } #endif +#endif // BILINEAR_INTERPOLATION_BITS <= 4 /* * For each scanline fetched from source image with PAD repeat: @@ -314,36 +351,36 @@ scanline_func_name (dst_type_t *dst, \ \ if (a1 == 0xff) \ { \ - *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ } \ else if (s1) \ { \ - d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \ - s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ + d = convert_ ## DST_FORMAT ## _to_8888 (*dst); \ + s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \ a1 ^= 0xff; \ UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ - *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ + *dst = convert_8888_to_ ## DST_FORMAT (d); \ } \ dst++; \ \ if (a2 == 0xff) \ { \ - *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ + *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \ } \ else if (s2) \ { \ - d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ - s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \ + d = convert_## DST_FORMAT ## _to_8888 (*dst); \ + s2 = convert_## SRC_FORMAT ## _to_8888 (s2); \ a2 ^= 0xff; \ UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \ - *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ + *dst = convert_8888_to_ ## DST_FORMAT (d); \ } \ dst++; \ } \ else /* PIXMAN_OP_SRC */ \ { \ - *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ + *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ + *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \ } \ } \ \ @@ -358,21 +395,21 @@ scanline_func_name (dst_type_t *dst, \ \ if (a1 == 0xff) \ { \ - *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ } \ else if (s1) \ { \ - d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ - s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ + d = convert_## DST_FORMAT ## _to_8888 (*dst); \ + s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \ a1 ^= 0xff; \ UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ - *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ + *dst = convert_8888_to_ ## DST_FORMAT (d); \ } \ dst++; \ } \ else /* PIXMAN_OP_SRC */ \ { \ - *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ } \ } \ } diff --git a/lib/pixman/pixman/pixman-matrix.c b/lib/pixman/pixman/pixman-matrix.c index a029ab757..89b96826b 100644 --- a/lib/pixman/pixman/pixman-matrix.c +++ b/lib/pixman/pixman/pixman-matrix.c @@ -34,6 +34,338 @@ #define F(x) pixman_int_to_fixed (x) +static force_inline int +count_leading_zeros (uint32_t x) +{ +#ifdef __GNUC__ + return __builtin_clz (x); +#else + int n = 0; + while (x) + { + n++; + x >>= 1; + } + return 32 - n; +#endif +} + +/* + * Large signed/unsigned integer division with rounding for the platforms with + * only 64-bit integer data type supported (no 128-bit data type). + * + * Arguments: + * hi, lo - high and low 64-bit parts of the dividend + * div - 48-bit divisor + * + * Returns: lowest 64 bits of the result as a return value and highest 64 + * bits of the result to "result_hi" pointer + */ + +/* grade-school unsigned division (128-bit by 48-bit) with rounding to nearest */ +static force_inline uint64_t +rounded_udiv_128_by_48 (uint64_t hi, + uint64_t lo, + uint64_t div, + uint64_t *result_hi) +{ + uint64_t tmp, remainder, result_lo; + assert(div < ((uint64_t)1 << 48)); + + remainder = hi % div; + *result_hi = hi / div; + + tmp = (remainder << 16) + (lo >> 48); + result_lo = tmp / div; + remainder = tmp % div; + + tmp = (remainder << 16) + ((lo >> 32) & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + tmp = (remainder << 16) + ((lo >> 16) & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + tmp = (remainder << 16) + (lo & 0xFFFF); + result_lo = (result_lo << 16) + (tmp / div); + remainder = tmp % div; + + /* round to nearest */ + if (remainder * 2 >= div && ++result_lo == 0) + *result_hi += 1; + + return result_lo; +} + +/* signed division (128-bit by 49-bit) with rounding to nearest */ +static inline int64_t +rounded_sdiv_128_by_49 (int64_t hi, + uint64_t lo, + int64_t div, + int64_t *signed_result_hi) +{ + uint64_t result_lo, result_hi; + int sign = 0; + if (div < 0) + { + div = -div; + sign ^= 1; + } + if (hi < 0) + { + if (lo != 0) + hi++; + hi = -hi; + lo = -lo; + sign ^= 1; + } + result_lo = rounded_udiv_128_by_48 (hi, lo, div, &result_hi); + if (sign) + { + if (result_lo != 0) + result_hi++; + result_hi = -result_hi; + result_lo = -result_lo; + } + if (signed_result_hi) + { + *signed_result_hi = result_hi; + } + return result_lo; +} + +/* + * Multiply 64.16 fixed point value by (2^scalebits) and convert + * to 128-bit integer. + */ +static force_inline void +fixed_64_16_to_int128 (int64_t hi, + int64_t lo, + int64_t *rhi, + int64_t *rlo, + int scalebits) +{ + /* separate integer and fractional parts */ + hi += lo >> 16; + lo &= 0xFFFF; + + if (scalebits <= 0) + { + *rlo = hi >> (-scalebits); + *rhi = *rlo >> 63; + } + else + { + *rhi = hi >> (64 - scalebits); + *rlo = (uint64_t)hi << scalebits; + if (scalebits < 16) + *rlo += lo >> (16 - scalebits); + else + *rlo += lo << (scalebits - 16); + } +} + +/* + * Convert 112.16 fixed point value to 48.16 with clamping for the out + * of range values. + */ +static force_inline pixman_fixed_48_16_t +fixed_112_16_to_fixed_48_16 (int64_t hi, int64_t lo, pixman_bool_t *clampflag) +{ + if ((lo >> 63) != hi) + { + *clampflag = TRUE; + return hi >= 0 ? INT64_MAX : INT64_MIN; + } + else + { + return lo; + } +} + +/* + * Transform a point with 31.16 fixed point coordinates from the destination + * space to a point with 48.16 fixed point coordinates in the source space. + * No overflows are possible for affine transformations and the results are + * accurate including the least significant bit. Projective transformations + * may overflow, in this case the results are just clamped to return maximum + * or minimum 48.16 values (so that the caller can at least handle the NONE + * and PAD repeats correctly) and the return value is FALSE to indicate that + * such clamping has happened. + */ +PIXMAN_EXPORT pixman_bool_t +pixman_transform_point_31_16 (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + pixman_bool_t clampflag = FALSE; + int i; + int64_t tmp[3][2], divint; + uint16_t divfrac; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + for (i = 0; i < 3; i++) + { + tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); + tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); + } + + /* + * separate 64-bit integer and 16-bit fractional parts for the divisor, + * which is also scaled by 65536 after fixed point multiplication. + */ + divint = tmp[2][0] + (tmp[2][1] >> 16); + divfrac = tmp[2][1] & 0xFFFF; + + if (divint == pixman_fixed_1 && divfrac == 0) + { + /* + * this is a simple affine transformation + */ + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + result->v[2] = pixman_fixed_1; + } + else if (divint == 0 && divfrac == 0) + { + /* + * handle zero divisor (if the values are non-zero, set the + * results to maximum positive or minimum negative) + */ + clampflag = TRUE; + + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + + if (result->v[0] > 0) + result->v[0] = INT64_MAX; + else if (result->v[0] < 0) + result->v[0] = INT64_MIN; + + if (result->v[1] > 0) + result->v[1] = INT64_MAX; + else if (result->v[1] < 0) + result->v[1] = INT64_MIN; + } + else + { + /* + * projective transformation, analyze the top 32 bits of the divisor + */ + int32_t hi32divbits = divint >> 32; + if (hi32divbits < 0) + hi32divbits = ~hi32divbits; + + if (hi32divbits == 0) + { + /* the divisor is small, we can actually keep all the bits */ + int64_t hi, rhi, lo, rlo; + int64_t div = (divint << 16) + divfrac; + + fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + + fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + } + else + { + /* the divisor needs to be reduced to 48 bits */ + int64_t hi, rhi, lo, rlo, div; + int shift = 32 - count_leading_zeros (hi32divbits); + fixed_64_16_to_int128 (divint, divfrac, &hi, &div, 16 - shift); + + fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32 - shift); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + + fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32 - shift); + rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi); + result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag); + } + } + result->v[2] = pixman_fixed_1; + return !clampflag; +} + +PIXMAN_EXPORT void +pixman_transform_point_31_16_affine (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + int64_t hi0, lo0, hi1, lo1; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + hi0 = (int64_t)t->matrix[0][0] * (v->v[0] >> 16); + lo0 = (int64_t)t->matrix[0][0] * (v->v[0] & 0xFFFF); + hi0 += (int64_t)t->matrix[0][1] * (v->v[1] >> 16); + lo0 += (int64_t)t->matrix[0][1] * (v->v[1] & 0xFFFF); + hi0 += (int64_t)t->matrix[0][2]; + + hi1 = (int64_t)t->matrix[1][0] * (v->v[0] >> 16); + lo1 = (int64_t)t->matrix[1][0] * (v->v[0] & 0xFFFF); + hi1 += (int64_t)t->matrix[1][1] * (v->v[1] >> 16); + lo1 += (int64_t)t->matrix[1][1] * (v->v[1] & 0xFFFF); + hi1 += (int64_t)t->matrix[1][2]; + + result->v[0] = hi0 + ((lo0 + 0x8000) >> 16); + result->v[1] = hi1 + ((lo1 + 0x8000) >> 16); + result->v[2] = pixman_fixed_1; +} + +PIXMAN_EXPORT void +pixman_transform_point_31_16_3d (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result) +{ + int i; + int64_t tmp[3][2]; + + /* input vector values must have no more than 31 bits (including sign) + * in the integer part */ + assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16))); + assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16))); + + for (i = 0; i < 3; i++) + { + tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16); + tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF); + tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16); + tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF); + } + + result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16); + result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16); + result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16); +} + PIXMAN_EXPORT void pixman_transform_init_identity (struct pixman_transform *matrix) { @@ -50,69 +382,41 @@ PIXMAN_EXPORT pixman_bool_t pixman_transform_point_3d (const struct pixman_transform *transform, struct pixman_vector * vector) { - struct pixman_vector result; - pixman_fixed_32_32_t partial; - pixman_fixed_48_16_t v; - int i, j; + pixman_vector_48_16_t tmp; + tmp.v[0] = vector->vector[0]; + tmp.v[1] = vector->vector[1]; + tmp.v[2] = vector->vector[2]; - for (j = 0; j < 3; j++) - { - v = 0; - for (i = 0; i < 3; i++) - { - partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] * - (pixman_fixed_48_16_t) vector->vector[i]); - v += partial >> 16; - } - - if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) - return FALSE; - - result.vector[j] = (pixman_fixed_t) v; - } - - *vector = result; + pixman_transform_point_31_16_3d (transform, &tmp, &tmp); - if (!result.vector[2]) - return FALSE; + vector->vector[0] = tmp.v[0]; + vector->vector[1] = tmp.v[1]; + vector->vector[2] = tmp.v[2]; - return TRUE; + return vector->vector[0] == tmp.v[0] && + vector->vector[1] == tmp.v[1] && + vector->vector[2] == tmp.v[2]; } PIXMAN_EXPORT pixman_bool_t pixman_transform_point (const struct pixman_transform *transform, struct pixman_vector * vector) { - pixman_fixed_32_32_t partial; - pixman_fixed_34_30_t v[3]; - pixman_fixed_48_16_t quo; - int i, j; + pixman_vector_48_16_t tmp; + tmp.v[0] = vector->vector[0]; + tmp.v[1] = vector->vector[1]; + tmp.v[2] = vector->vector[2]; - for (j = 0; j < 3; j++) - { - v[j] = 0; - - for (i = 0; i < 3; i++) - { - partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] * - (pixman_fixed_32_32_t) vector->vector[i]); - v[j] += partial >> 2; - } - } - - if (!(v[2] >> 16)) - return FALSE; + if (!pixman_transform_point_31_16 (transform, &tmp, &tmp)) + return FALSE; - for (j = 0; j < 2; j++) - { - quo = v[j] / (v[2] >> 16); - if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16) - return FALSE; - vector->vector[j] = (pixman_fixed_t) quo; - } - - vector->vector[2] = pixman_fixed_1; - return TRUE; + vector->vector[0] = tmp.v[0]; + vector->vector[1] = tmp.v[1]; + vector->vector[2] = tmp.v[2]; + + return vector->vector[0] == tmp.v[0] && + vector->vector[1] == tmp.v[1] && + vector->vector[2] == tmp.v[2]; } PIXMAN_EXPORT pixman_bool_t @@ -138,7 +442,7 @@ pixman_transform_multiply (struct pixman_transform * dst, (pixman_fixed_32_32_t) l->matrix[dy][o] * (pixman_fixed_32_32_t) r->matrix[o][dx]; - v += partial >> 16; + v += (partial + 0x8000) >> 16; } if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16) @@ -336,14 +640,14 @@ PIXMAN_EXPORT pixman_bool_t pixman_transform_invert (struct pixman_transform * dst, const struct pixman_transform *src) { - struct pixman_f_transform m, r; + struct pixman_f_transform m; pixman_f_transform_from_pixman_transform (&m, src); - if (!pixman_f_transform_invert (&r, &m)) + if (!pixman_f_transform_invert (&m, &m)) return FALSE; - if (!pixman_transform_from_pixman_f_transform (dst, &r)) + if (!pixman_transform_from_pixman_f_transform (dst, &m)) return FALSE; return TRUE; @@ -469,10 +773,11 @@ PIXMAN_EXPORT pixman_bool_t pixman_f_transform_invert (struct pixman_f_transform * dst, const struct pixman_f_transform *src) { - double det; - int i, j; static const int a[3] = { 2, 2, 1 }; static const int b[3] = { 1, 0, 0 }; + pixman_f_transform_t d; + double det; + int i, j; det = 0; for (i = 0; i < 3; i++) @@ -507,10 +812,12 @@ pixman_f_transform_invert (struct pixman_f_transform * dst, if (((i + j) & 1) != 0) p = -p; - dst->m[j][i] = det * p; + d.m[j][i] = det * p; } } + *dst = d; + return TRUE; } diff --git a/lib/pixman/pixman/pixman-mips-dspr2-asm.S b/lib/pixman/pixman/pixman-mips-dspr2-asm.S index b5cae1690..866e93e58 100644 --- a/lib/pixman/pixman/pixman-mips-dspr2-asm.S +++ b/lib/pixman/pixman/pixman-mips-dspr2-asm.S @@ -310,6 +310,649 @@ LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips) END(pixman_composite_src_x888_8888_asm_mips) +#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) +LEAF_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (b8g8r8) + * a2 - w + */ + + beqz a2, 6f + nop + + lui t8, 0xff00; + srl t9, a2, 2 /* t9 = how many multiples of 4 src pixels */ + beqz t9, 4f /* branch if less than 4 src pixels */ + nop + + li t0, 0x1 + li t1, 0x2 + li t2, 0x3 + andi t3, a1, 0x3 + beq t3, t0, 1f + nop + beq t3, t1, 2f + nop + beq t3, t2, 3f + nop + +0: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 0(a1) /* t0 = R2 | B1 | G1 | R1 */ + lw t1, 4(a1) /* t1 = G3 | R3 | B2 | G2 */ + lw t2, 8(a1) /* t2 = B4 | G4 | R4 | B3 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = B1 | R2 | R1 | G1 */ + wsbh t1, t1 /* t1 = R3 | G3 | G2 | B2 */ + wsbh t2, t2 /* t2 = G4 | B4 | B3 | R4 */ + + packrl.ph t3, t1, t0 /* t3 = G2 | B2 | B1 | R2 */ + packrl.ph t4, t0, t0 /* t4 = R1 | G1 | B1 | R2 */ + rotr t3, t3, 16 /* t3 = B1 | R2 | G2 | B2 */ + or t3, t3, t8 /* t3 = FF | R2 | G2 | B2 */ + srl t4, t4, 8 /* t4 = 0 | R1 | G1 | B1 */ + or t4, t4, t8 /* t4 = FF | R1 | G1 | B1 */ + packrl.ph t5, t2, t1 /* t5 = B3 | R4 | R3 | G3 */ + rotr t5, t5, 24 /* t5 = R4 | R3 | G3 | B3 */ + or t5, t5, t8 /* t5 = FF | R3 | G3 | B3 */ + rotr t2, t2, 16 /* t2 = B3 | R4 | G4 | B4 */ + or t2, t2, t8 /* t5 = FF | R3 | G3 | B3 */ + + sw t4, 0(a0) + sw t3, 4(a0) + sw t5, 8(a0) + sw t2, 12(a0) + b 0b + addiu a0, a0, 16 + +1: + lbu t6, 0(a1) /* t6 = 0 | 0 | 0 | R1 */ + lhu t7, 1(a1) /* t7 = 0 | 0 | B1 | G1 */ + sll t6, t6, 16 /* t6 = 0 | R1 | 0 | 0 */ + wsbh t7, t7 /* t7 = 0 | 0 | G1 | B1 */ + or t7, t6, t7 /* t7 = 0 | R1 | G1 | B1 */ +11: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 3(a1) /* t0 = R3 | B2 | G2 | R2 */ + lw t1, 7(a1) /* t1 = G4 | R4 | B3 | G3 */ + lw t2, 11(a1) /* t2 = B5 | G5 | R5 | B4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = B2 | R3 | R2 | G2 */ + wsbh t1, t1 /* t1 = R4 | G4 | G3 | B3 */ + wsbh t2, t2 /* t2 = G5 | B5 | B4 | R5 */ + + packrl.ph t3, t1, t0 /* t3 = G3 | B3 | B2 | R3 */ + packrl.ph t4, t2, t1 /* t4 = B4 | R5 | R4 | G4 */ + rotr t0, t0, 24 /* t0 = R3 | R2 | G2 | B2 */ + rotr t3, t3, 16 /* t3 = B2 | R3 | G3 | B3 */ + rotr t4, t4, 24 /* t4 = R5 | R4 | G4 | B4 */ + or t7, t7, t8 /* t7 = FF | R1 | G1 | B1 */ + or t0, t0, t8 /* t0 = FF | R2 | G2 | B2 */ + or t3, t3, t8 /* t1 = FF | R3 | G3 | B3 */ + or t4, t4, t8 /* t3 = FF | R4 | G4 | B4 */ + + sw t7, 0(a0) + sw t0, 4(a0) + sw t3, 8(a0) + sw t4, 12(a0) + rotr t7, t2, 16 /* t7 = xx | R5 | G5 | B5 */ + b 11b + addiu a0, a0, 16 + +2: + lhu t7, 0(a1) /* t7 = 0 | 0 | G1 | R1 */ + wsbh t7, t7 /* t7 = 0 | 0 | R1 | G1 */ +21: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 2(a1) /* t0 = B2 | G2 | R2 | B1 */ + lw t1, 6(a1) /* t1 = R4 | B3 | G3 | R3 */ + lw t2, 10(a1) /* t2 = G5 | R5 | B4 | G4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = G2 | B2 | B1 | R2 */ + wsbh t1, t1 /* t1 = B3 | R4 | R3 | G3 */ + wsbh t2, t2 /* t2 = R5 | G5 | G4 | B4 */ + + precr_sra.ph.w t7, t0, 0 /* t7 = R1 | G1 | B1 | R2 */ + rotr t0, t0, 16 /* t0 = B1 | R2 | G2 | B2 */ + packrl.ph t3, t2, t1 /* t3 = G4 | B4 | B3 | R4 */ + rotr t1, t1, 24 /* t1 = R4 | R3 | G3 | B3 */ + srl t7, t7, 8 /* t7 = 0 | R1 | G1 | B1 */ + rotr t3, t3, 16 /* t3 = B3 | R4 | G4 | B4 */ + or t7, t7, t8 /* t7 = FF | R1 | G1 | B1 */ + or t0, t0, t8 /* t0 = FF | R2 | G2 | B2 */ + or t1, t1, t8 /* t1 = FF | R3 | G3 | B3 */ + or t3, t3, t8 /* t3 = FF | R4 | G4 | B4 */ + + sw t7, 0(a0) + sw t0, 4(a0) + sw t1, 8(a0) + sw t3, 12(a0) + srl t7, t2, 16 /* t7 = 0 | 0 | R5 | G5 */ + b 21b + addiu a0, a0, 16 + +3: + lbu t7, 0(a1) /* t7 = 0 | 0 | 0 | R1 */ +31: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 1(a1) /* t0 = G2 | R2 | B1 | G1 */ + lw t1, 5(a1) /* t1 = B3 | G3 | R3 | B2 */ + lw t2, 9(a1) /* t2 = R5 | B4 | G4 | R4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = R2 | G2 | G1 | B1 */ + wsbh t1, t1 /* t1 = G3 | B3 | B2 | R3 */ + wsbh t2, t2 /* t2 = B4 | R5 | R4 | G4 */ + + precr_sra.ph.w t7, t0, 0 /* t7 = xx | R1 | G1 | B1 */ + packrl.ph t3, t1, t0 /* t3 = B2 | R3 | R2 | G2 */ + rotr t1, t1, 16 /* t1 = B2 | R3 | G3 | B3 */ + rotr t4, t2, 24 /* t4 = R5 | R4 | G4 | B4 */ + rotr t3, t3, 24 /* t3 = R3 | R2 | G2 | B2 */ + or t7, t7, t8 /* t7 = FF | R1 | G1 | B1 */ + or t3, t3, t8 /* t3 = FF | R2 | G2 | B2 */ + or t1, t1, t8 /* t1 = FF | R3 | G3 | B3 */ + or t4, t4, t8 /* t4 = FF | R4 | G4 | B4 */ + + sw t7, 0(a0) + sw t3, 4(a0) + sw t1, 8(a0) + sw t4, 12(a0) + srl t7, t2, 16 /* t7 = 0 | 0 | xx | R5 */ + b 31b + addiu a0, a0, 16 + +4: + beqz a2, 6f + nop +5: + lbu t0, 0(a1) /* t0 = 0 | 0 | 0 | R */ + lbu t1, 1(a1) /* t1 = 0 | 0 | 0 | G */ + lbu t2, 2(a1) /* t2 = 0 | 0 | 0 | B */ + addiu a1, a1, 3 + + sll t0, t0, 16 /* t2 = 0 | R | 0 | 0 */ + sll t1, t1, 8 /* t1 = 0 | 0 | G | 0 */ + + or t2, t2, t1 /* t2 = 0 | 0 | G | B */ + or t2, t2, t0 /* t2 = 0 | R | G | B */ + or t2, t2, t8 /* t2 = FF | R | G | B */ + + sw t2, 0(a0) + addiu a2, a2, -1 + bnez a2, 5b + addiu a0, a0, 4 +6: + j ra + nop + +END(pixman_composite_src_0888_8888_rev_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (b8g8r8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 0, v0, v1 + beqz a2, 6f + nop + + li t6, 0xf800f800 + li t7, 0x07e007e0 + li t8, 0x001F001F + srl t9, a2, 2 /* t9 = how many multiples of 4 src pixels */ + beqz t9, 4f /* branch if less than 4 src pixels */ + nop + + li t0, 0x1 + li t1, 0x2 + li t2, 0x3 + andi t3, a1, 0x3 + beq t3, t0, 1f + nop + beq t3, t1, 2f + nop + beq t3, t2, 3f + nop + +0: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 0(a1) /* t0 = R2 | B1 | G1 | R1 */ + lw t1, 4(a1) /* t1 = G3 | R3 | B2 | G2 */ + lw t2, 8(a1) /* t2 = B4 | G4 | R4 | B3 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = B1 | R2 | R1 | G1 */ + wsbh t1, t1 /* t1 = R3 | G3 | G2 | B2 */ + wsbh t2, t2 /* t2 = G4 | B4 | B3 | R4 */ + + packrl.ph t3, t1, t0 /* t3 = G2 | B2 | B1 | R2 */ + packrl.ph t4, t0, t0 /* t4 = R1 | G1 | B1 | R2 */ + rotr t3, t3, 16 /* t3 = B1 | R2 | G2 | B2 */ + srl t4, t4, 8 /* t4 = 0 | R1 | G1 | B1 */ + packrl.ph t5, t2, t1 /* t5 = B3 | R4 | R3 | G3 */ + rotr t5, t5, 24 /* t5 = R4 | R3 | G3 | B3 */ + rotr t2, t2, 16 /* t2 = B3 | R4 | G4 | B4 */ + + CONVERT_2x8888_TO_2x0565 t4, t3, t4, t3, t6, t7, t8, v0, v1 + CONVERT_2x8888_TO_2x0565 t5, t2, t5, t2, t6, t7, t8, v0, v1 + + sh t4, 0(a0) + sh t3, 2(a0) + sh t5, 4(a0) + sh t2, 6(a0) + b 0b + addiu a0, a0, 8 + +1: + lbu t4, 0(a1) /* t4 = 0 | 0 | 0 | R1 */ + lhu t5, 1(a1) /* t5 = 0 | 0 | B1 | G1 */ + sll t4, t4, 16 /* t4 = 0 | R1 | 0 | 0 */ + wsbh t5, t5 /* t5 = 0 | 0 | G1 | B1 */ + or t5, t4, t5 /* t5 = 0 | R1 | G1 | B1 */ +11: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 3(a1) /* t0 = R3 | B2 | G2 | R2 */ + lw t1, 7(a1) /* t1 = G4 | R4 | B3 | G3 */ + lw t2, 11(a1) /* t2 = B5 | G5 | R5 | B4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = B2 | R3 | R2 | G2 */ + wsbh t1, t1 /* t1 = R4 | G4 | G3 | B3 */ + wsbh t2, t2 /* t2 = G5 | B5 | B4 | R5 */ + + packrl.ph t3, t1, t0 /* t3 = G3 | B3 | B2 | R3 */ + packrl.ph t4, t2, t1 /* t4 = B4 | R5 | R4 | G4 */ + rotr t0, t0, 24 /* t0 = R3 | R2 | G2 | B2 */ + rotr t3, t3, 16 /* t3 = B2 | R3 | G3 | B3 */ + rotr t4, t4, 24 /* t4 = R5 | R4 | G4 | B4 */ + + CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1 + CONVERT_2x8888_TO_2x0565 t3, t4, t3, t4, t6, t7, t8, v0, v1 + + sh t5, 0(a0) + sh t0, 2(a0) + sh t3, 4(a0) + sh t4, 6(a0) + rotr t5, t2, 16 /* t5 = xx | R5 | G5 | B5 */ + b 11b + addiu a0, a0, 8 + +2: + lhu t5, 0(a1) /* t5 = 0 | 0 | G1 | R1 */ + wsbh t5, t5 /* t5 = 0 | 0 | R1 | G1 */ +21: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 2(a1) /* t0 = B2 | G2 | R2 | B1 */ + lw t1, 6(a1) /* t1 = R4 | B3 | G3 | R3 */ + lw t2, 10(a1) /* t2 = G5 | R5 | B4 | G4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = G2 | B2 | B1 | R2 */ + wsbh t1, t1 /* t1 = B3 | R4 | R3 | G3 */ + wsbh t2, t2 /* t2 = R5 | G5 | G4 | B4 */ + + precr_sra.ph.w t5, t0, 0 /* t5 = R1 | G1 | B1 | R2 */ + rotr t0, t0, 16 /* t0 = B1 | R2 | G2 | B2 */ + packrl.ph t3, t2, t1 /* t3 = G4 | B4 | B3 | R4 */ + rotr t1, t1, 24 /* t1 = R4 | R3 | G3 | B3 */ + srl t5, t5, 8 /* t5 = 0 | R1 | G1 | B1 */ + rotr t3, t3, 16 /* t3 = B3 | R4 | G4 | B4 */ + + CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1 + CONVERT_2x8888_TO_2x0565 t1, t3, t1, t3, t6, t7, t8, v0, v1 + + sh t5, 0(a0) + sh t0, 2(a0) + sh t1, 4(a0) + sh t3, 6(a0) + srl t5, t2, 16 /* t5 = 0 | 0 | R5 | G5 */ + b 21b + addiu a0, a0, 8 + +3: + lbu t5, 0(a1) /* t5 = 0 | 0 | 0 | R1 */ +31: + beqz t9, 4f + addiu t9, t9, -1 + lw t0, 1(a1) /* t0 = G2 | R2 | B1 | G1 */ + lw t1, 5(a1) /* t1 = B3 | G3 | R3 | B2 */ + lw t2, 9(a1) /* t2 = R5 | B4 | G4 | R4 */ + + addiu a1, a1, 12 + addiu a2, a2, -4 + + wsbh t0, t0 /* t0 = R2 | G2 | G1 | B1 */ + wsbh t1, t1 /* t1 = G3 | B3 | B2 | R3 */ + wsbh t2, t2 /* t2 = B4 | R5 | R4 | G4 */ + + precr_sra.ph.w t5, t0, 0 /* t5 = xx | R1 | G1 | B1 */ + packrl.ph t3, t1, t0 /* t3 = B2 | R3 | R2 | G2 */ + rotr t1, t1, 16 /* t1 = B2 | R3 | G3 | B3 */ + rotr t4, t2, 24 /* t4 = R5 | R4 | G4 | B4 */ + rotr t3, t3, 24 /* t3 = R3 | R2 | G2 | B2 */ + + CONVERT_2x8888_TO_2x0565 t5, t3, t5, t3, t6, t7, t8, v0, v1 + CONVERT_2x8888_TO_2x0565 t1, t4, t1, t4, t6, t7, t8, v0, v1 + + sh t5, 0(a0) + sh t3, 2(a0) + sh t1, 4(a0) + sh t4, 6(a0) + srl t5, t2, 16 /* t5 = 0 | 0 | xx | R5 */ + b 31b + addiu a0, a0, 8 + +4: + beqz a2, 6f + nop +5: + lbu t0, 0(a1) /* t0 = 0 | 0 | 0 | R */ + lbu t1, 1(a1) /* t1 = 0 | 0 | 0 | G */ + lbu t2, 2(a1) /* t2 = 0 | 0 | 0 | B */ + addiu a1, a1, 3 + + sll t0, t0, 16 /* t2 = 0 | R | 0 | 0 */ + sll t1, t1, 8 /* t1 = 0 | 0 | G | 0 */ + + or t2, t2, t1 /* t2 = 0 | 0 | G | B */ + or t2, t2, t0 /* t2 = 0 | R | G | B */ + + CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5 + + sh t3, 0(a0) + addiu a2, a2, -1 + bnez a2, 5b + addiu a0, a0, 2 +6: + RESTORE_REGS_FROM_STACK 0, v0, v1 + j ra + nop + +END(pixman_composite_src_0888_0565_rev_asm_mips) +#endif + +LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips) +/* + * a0 - dst (a8b8g8r8) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 0, v0 + li v0, 0x00ff00ff + + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) + lw t1, 4(a1) + addiu a1, a1, 8 + addiu a2, a2, -2 + srl t2, t0, 24 + srl t3, t1, 24 + + MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9 + + sll t0, t0, 8 + sll t1, t1, 8 + andi t2, t2, 0xff + andi t3, t3, 0xff + or t0, t0, t2 + or t1, t1, t3 + wsbh t0, t0 + wsbh t1, t1 + rotr t0, t0, 16 + rotr t1, t1, 16 + sw t0, 0(a0) + sw t1, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + lw t0, 0(a1) + srl t1, t0, 24 + + MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5 + + sll t0, t0, 8 + andi t1, t1, 0xff + or t0, t0, t1 + wsbh t0, t0 + rotr t0, t0, 16 + sw t0, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_src_pixbuf_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 0, v0 + li v0, 0x00ff00ff + + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) + lw t1, 4(a1) + addiu a1, a1, 8 + addiu a2, a2, -2 + srl t2, t0, 24 + srl t3, t1, 24 + + MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9 + + sll t0, t0, 8 + sll t1, t1, 8 + andi t2, t2, 0xff + andi t3, t3, 0xff + or t0, t0, t2 + or t1, t1, t3 + rotr t0, t0, 8 + rotr t1, t1, 8 + sw t0, 0(a0) + sw t1, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + lw t0, 0(a1) + srl t1, t0, 24 + + MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5 + + sll t0, t0, 8 + andi t1, t1, 0xff + or t0, t0, t1 + rotr t0, t0, 8 + sw t0, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_src_rpixbuf_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (32bit constant) + * a2 - mask (a8) + * a3 - w + */ + + + SAVE_REGS_ON_STACK 0, v0 + li v0, 0x00ff00ff + + beqz a3, 3f + nop + addiu t1, a3, -1 + beqz t1, 2f + nop + +1: + /* a1 = source (32bit constant) */ + lbu t0, 0(a2) /* t2 = mask (a8) */ + lbu t1, 1(a2) /* t3 = mask (a8) */ + addiu a2, a2, 2 + + MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, t2, t3, v0, t4, t5, t6, t7, t8, t9 + + sw t2, 0(a0) + sw t3, 4(a0) + addiu a3, a3, -2 + addiu t2, a3, -1 + bgtz t2, 1b + addiu a0, a0, 8 + + beqz a3, 3f + nop + +2: + lbu t0, 0(a2) + addiu a2, a2, 1 + + MIPS_UN8x4_MUL_UN8 a1, t0, t1, v0, t3, t4, t5 + + sw t1, 0(a0) + addiu a3, a3, -1 + addiu a0, a0, 4 + +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_src_n_8_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm_mips) +/* + * a0 - dst (a8) + * a1 - src (32bit constant) + * a2 - mask (a8) + * a3 - w + */ + + li t9, 0x00ff00ff + beqz a3, 3f + nop + srl t7, a3, 2 /* t7 = how many multiples of 4 dst pixels */ + beqz t7, 1f /* branch if less than 4 src pixels */ + nop + + srl t8, a1, 24 + replv.ph t8, t8 + +0: + beqz t7, 1f + addiu t7, t7, -1 + lbu t0, 0(a2) + lbu t1, 1(a2) + lbu t2, 2(a2) + lbu t3, 3(a2) + + addiu a2, a2, 4 + + precr_sra.ph.w t1, t0, 0 + precr_sra.ph.w t3, t2, 0 + precr.qb.ph t0, t3, t1 + + muleu_s.ph.qbl t2, t0, t8 + muleu_s.ph.qbr t3, t0, t8 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 + shra_r.ph t2, t2, 8 + shra_r.ph t3, t3, 8 + precr.qb.ph t2, t2, t3 + + sb t2, 0(a0) + srl t2, t2, 8 + sb t2, 1(a0) + srl t2, t2, 8 + sb t2, 2(a0) + srl t2, t2, 8 + sb t2, 3(a0) + addiu a3, a3, -4 + b 0b + addiu a0, a0, 4 + +1: + beqz a3, 3f + nop + srl t8, a1, 24 +2: + lbu t0, 0(a2) + addiu a2, a2, 1 + + mul t2, t0, t8 + shra_r.ph t3, t2, 8 + andi t3, t3, 0x00ff + addq.ph t2, t2, t3 + shra_r.ph t2, t2, 8 + + sb t2, 0(a0) + addiu a3, a3, -1 + bnez a3, 2b + addiu a0, a0, 1 + +3: + j ra + nop + +END(pixman_composite_src_n_8_8_asm_mips) + LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips) /* * a0 - dst (a8r8g8b8) @@ -318,34 +961,35 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips) * a3 - w */ - SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5 - beqz a3, 4f + beqz a3, 8f nop + SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5 + li t6, 0xff addiu t7, zero, -1 /* t7 = 0xffffffff */ srl t8, a1, 24 /* t8 = srca */ li t9, 0x00ff00ff + addiu t1, a3, -1 - beqz t1, 3f /* last pixel */ - nop - beq t8, t6, 2f /* if (srca == 0xff) */ + beqz t1, 4f /* last pixel */ nop -1: - /* a1 = src */ + +0: lw t0, 0(a2) /* t0 = mask */ lw t1, 4(a2) /* t1 = mask */ + addiu a3, a3, -2 /* w = w - 2 */ or t2, t0, t1 - beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */ + beqz t2, 3f /* if (t0 == 0) && (t1 == 0) */ addiu a2, a2, 8 - and t3, t0, t1 - move t4, a1 /* t4 = src */ - move t5, a1 /* t5 = src */ + and t2, t0, t1 + beq t2, t7, 1f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ + nop + +//if(ma) lw t2, 0(a0) /* t2 = dst */ - beq t3, t7, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - lw t3, 4(a0) /* t3 = dst */ + lw t3, 4(a0) /* t3 = dst */ MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5 -11: not t0, t0 not t1, t1 MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 @@ -353,60 +997,77 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips) addu_s.qb t3, t5, t3 sw t2, 0(a0) sw t3, 4(a0) -12: - addiu a3, a3, -2 addiu t1, a3, -1 - bgtz t1, 1b + bgtz t1, 0b addiu a0, a0, 8 - b 3f + b 4f + nop +1: +//if (t0 == 0xffffffff) && (t1 == 0xffffffff): + beq t8, t6, 2f /* if (srca == 0xff) */ nop -2: - /* a1 = src */ - lw t0, 0(a2) /* t0 = mask */ - lw t1, 4(a2) /* t1 = mask */ - or t2, t0, t1 - beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */ - addiu a2, a2, 8 - and t2, t0, t1 - move t4, a1 - beq t2, t7, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - move t5, a1 lw t2, 0(a0) /* t2 = dst */ lw t3, 4(a0) /* t3 = dst */ - MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 - not t0, t0 - not t1, t1 - MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 - addu_s.qb t4, t4, t2 - addu_s.qb t5, t5, t3 -21: - sw t4, 0(a0) - sw t5, 4(a0) -22: - addiu a3, a3, -2 + not t0, a1 + not t1, a1 + srl t0, t0, 24 + srl t1, t1, 24 + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 + addu_s.qb t2, a1, t2 + addu_s.qb t3, a1, t3 + sw t2, 0(a0) + sw t3, 4(a0) addiu t1, a3, -1 - bgtz t1, 2b + bgtz t1, 0b addiu a0, a0, 8 + b 4f + nop +2: + sw a1, 0(a0) + sw a1, 4(a0) 3: - blez a3, 4f + addiu t1, a3, -1 + bgtz t1, 0b + addiu a0, a0, 8 + +4: + beqz a3, 7f nop /* a1 = src */ - lw t1, 0(a2) /* t1 = mask */ - beqz t1, 4f + lw t0, 0(a2) /* t0 = mask */ + beqz t0, 7f /* if (t0 == 0) */ nop - move t2, a1 /* t2 = src */ - beq t1, t7, 31f - lw t0, 0(a0) /* t0 = dst */ - - MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6 - MIPS_UN8x4_MUL_UN8 t1, t8, t1, t9, t3, t4, t5 -31: - not t1, t1 - MIPS_UN8x4_MUL_UN8x4 t0, t1, t0, t9, t3, t4, t5, t6 - addu_s.qb t0, t2, t0 - sw t0, 0(a0) -4: + beq t0, t7, 5f /* if (t0 == 0xffffffff) */ + nop +//if(ma) + lw t1, 0(a0) /* t1 = dst */ + MIPS_UN8x4_MUL_UN8x4 a1, t0, t2, t9, t3, t4, t5, s0 + MIPS_UN8x4_MUL_UN8 t0, t8, t0, t9, t3, t4, t5 + not t0, t0 + MIPS_UN8x4_MUL_UN8x4 t1, t0, t1, t9, t3, t4, t5, s0 + addu_s.qb t1, t2, t1 + sw t1, 0(a0) + RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 + j ra + nop +5: +//if (t0 == 0xffffffff) + beq t8, t6, 6f /* if (srca == 0xff) */ + nop + lw t1, 0(a0) /* t1 = dst */ + not t0, a1 + srl t0, t0, 24 + MIPS_UN8x4_MUL_UN8 t1, t0, t1, t9, t2, t3, t4 + addu_s.qb t1, a1, t1 + sw t1, 0(a0) + RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 + j ra + nop +6: + sw a1, 0(a0) +7: RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 +8: j ra nop @@ -420,111 +1081,251 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips) * a3 - w */ - SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 - beqz a3, 4f + beqz a3, 8f nop - li t5, 0xf800f800 - li t6, 0x07e007e0 - li t7, 0x001F001F - li t9, 0x00ff00ff + SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + li t6, 0xff + addiu t7, zero, -1 /* t7 = 0xffffffff */ srl t8, a1, 24 /* t8 = srca */ + li t9, 0x00ff00ff + li s6, 0xf800f800 + li s7, 0x07e007e0 + li s8, 0x001F001F + addiu t1, a3, -1 - beqz t1, 3f /* last pixel */ + beqz t1, 4f /* last pixel */ nop - li s0, 0xff /* s0 = 0xff */ - addiu s1, zero, -1 /* s1 = 0xffffffff */ - beq t8, s0, 2f /* if (srca == 0xff) */ - nop -1: - /* a1 = src */ +0: lw t0, 0(a2) /* t0 = mask */ lw t1, 4(a2) /* t1 = mask */ + addiu a3, a3, -2 /* w = w - 2 */ or t2, t0, t1 - beqz t2, 12f /* if (t0 == 0) && (t1 == 0) */ + beqz t2, 3f /* if (t0 == 0) && (t1 == 0) */ addiu a2, a2, 8 - and t3, t0, t1 - move s2, a1 /* s2 = src */ - move s3, a1 /* s3 = src */ + and t2, t0, t1 + beq t2, t7, 1f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ + nop + +//if(ma) lhu t2, 0(a0) /* t2 = dst */ - beq t3, s1, 11f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - lhu t3, 2(a0) /* t3 = dst */ - MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8 - MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, t4, s4, s5, s6, s7, s8 -11: + lhu t3, 2(a0) /* t3 = dst */ + MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5 + MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5 not t0, t0 not t1, t1 - CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8 - MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t0, t1 - addu_s.qb s2, s2, s4 - addu_s.qb s3, s3, s5 - CONVERT_2x8888_TO_2x0565 s2, s3, t2, t3, t5, t6, t7, s4, s5 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3 + MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 + addu_s.qb t2, t4, t2 + addu_s.qb t3, t5, t3 + CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1 sh t2, 0(a0) sh t3, 2(a0) -12: - addiu a3, a3, -2 addiu t1, a3, -1 - bgtz t1, 1b + bgtz t1, 0b addiu a0, a0, 4 - b 3f + b 4f + nop +1: +//if (t0 == 0xffffffff) && (t1 == 0xffffffff): + beq t8, t6, 2f /* if (srca == 0xff) */ nop -2: - /* a1 = src */ - lw t0, 0(a2) /* t0 = mask */ - lw t1, 4(a2) /* t1 = mask */ - or t2, t0, t1 - beqz t2, 22f /* if (t0 == 0) & (t1 == 0) */ - addiu a2, a2, 8 - and t3, t0, t1 - move t2, a1 - beq t3, s1, 21f /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */ - move t3, a1 lhu t2, 0(a0) /* t2 = dst */ lhu t3, 2(a0) /* t3 = dst */ - MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, s2, s3, t9, t4, s4, s5, s6, s7, s8 - not t0, t0 - not t1, t1 - CONVERT_2x0565_TO_2x8888 t2, t3, s4, s5, t6, t7, t4, s6, s7, s8 - MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t2, t3 - addu_s.qb t2, s2, s4 - addu_s.qb t3, s3, s5 -21: - CONVERT_2x8888_TO_2x0565 t2, t3, t0, t1, t5, t6, t7, s2, s3 - sh t0, 0(a0) - sh t1, 2(a0) -22: - addiu a3, a3, -2 + not t0, a1 + not t1, a1 + srl t0, t0, 24 + srl t1, t1, 24 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3 + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5 + addu_s.qb t2, a1, t2 + addu_s.qb t3, a1, t3 + CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1 + sh t2, 0(a0) + sh t3, 2(a0) addiu t1, a3, -1 - bgtz t1, 2b + bgtz t1, 0b addiu a0, a0, 4 + b 4f + nop +2: + CONVERT_1x8888_TO_1x0565 a1, t2, s0, s1 + sh t2, 0(a0) + sh t2, 2(a0) 3: - blez a3, 4f + addiu t1, a3, -1 + bgtz t1, 0b + addiu a0, a0, 4 + +4: + beqz a3, 7f nop /* a1 = src */ - lw t1, 0(a2) /* t1 = mask */ - beqz t1, 4f + lw t0, 0(a2) /* t0 = mask */ + beqz t0, 7f /* if (t0 == 0) */ nop - move t2, a1 /* t2 = src */ - beq t1, t7, 31f - lhu t0, 0(a0) /* t0 = dst */ - - MIPS_UN8x4_MUL_UN8x4 a1, t1, t2, t9, t3, t4, t5, t6 - MIPS_UN8x4_MUL_UN8 t1, t8, t1, t9, t3, t4, t5 -31: - not t1, t1 - CONVERT_1x0565_TO_1x8888 t0, s1, s2, s3 - MIPS_UN8x4_MUL_UN8x4 s1, t1, t3, t9, t4, t5, t6, t7 - addu_s.qb t0, t2, t3 - CONVERT_1x8888_TO_1x0565 t0, s1, s2, s3 - sh s1, 0(a0) -4: - RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + beq t0, t7, 5f /* if (t0 == 0xffffffff) */ + nop +//if(ma) + lhu t1, 0(a0) /* t1 = dst */ + MIPS_UN8x4_MUL_UN8x4 a1, t0, t2, t9, t3, t4, t5, s0 + MIPS_UN8x4_MUL_UN8 t0, t8, t0, t9, t3, t4, t5 + not t0, t0 + CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3 + MIPS_UN8x4_MUL_UN8x4 s1, t0, s1, t9, t3, t4, t5, s0 + addu_s.qb s1, t2, s1 + CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2 + sh t1, 0(a0) + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + j ra + nop +5: +//if (t0 == 0xffffffff) + beq t8, t6, 6f /* if (srca == 0xff) */ + nop + lhu t1, 0(a0) /* t1 = dst */ + not t0, a1 + srl t0, t0, 24 + CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3 + MIPS_UN8x4_MUL_UN8 s1, t0, s1, t9, t2, t3, t4 + addu_s.qb s1, a1, s1 + CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2 + sh t1, 0(a0) + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 + j ra + nop +6: + CONVERT_1x8888_TO_1x0565 a1, t1, s0, s2 + sh t1, 0(a0) +7: + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8 +8: j ra nop END(pixman_composite_over_n_8888_0565_ca_asm_mips) +LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8_asm_mips) +/* + * a0 - dst (a8) + * a1 - src (32bit constant) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, v0 + li t9, 0x00ff00ff + beqz a3, 3f + nop + srl v0, a3, 2 /* v0 = how many multiples of 4 dst pixels */ + beqz v0, 1f /* branch if less than 4 src pixels */ + nop + + srl t8, a1, 24 + replv.ph t8, t8 + +0: + beqz v0, 1f + addiu v0, v0, -1 + lbu t0, 0(a2) + lbu t1, 1(a2) + lbu t2, 2(a2) + lbu t3, 3(a2) + lbu t4, 0(a0) + lbu t5, 1(a0) + lbu t6, 2(a0) + lbu t7, 3(a0) + + addiu a2, a2, 4 + + precr_sra.ph.w t1, t0, 0 + precr_sra.ph.w t3, t2, 0 + precr_sra.ph.w t5, t4, 0 + precr_sra.ph.w t7, t6, 0 + + precr.qb.ph t0, t3, t1 + precr.qb.ph t1, t7, t5 + + muleu_s.ph.qbl t2, t0, t8 + muleu_s.ph.qbr t3, t0, t8 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 + shra_r.ph t2, t2, 8 + shra_r.ph t3, t3, 8 + precr.qb.ph t0, t2, t3 + not t6, t0 + + preceu.ph.qbl t7, t6 + preceu.ph.qbr t6, t6 + + muleu_s.ph.qbl t2, t1, t7 + muleu_s.ph.qbr t3, t1, t6 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 + shra_r.ph t2, t2, 8 + shra_r.ph t3, t3, 8 + precr.qb.ph t1, t2, t3 + + addu_s.qb t2, t0, t1 + + sb t2, 0(a0) + srl t2, t2, 8 + sb t2, 1(a0) + srl t2, t2, 8 + sb t2, 2(a0) + srl t2, t2, 8 + sb t2, 3(a0) + addiu a3, a3, -4 + b 0b + addiu a0, a0, 4 + +1: + beqz a3, 3f + nop + srl t8, a1, 24 +2: + lbu t0, 0(a2) + lbu t1, 0(a0) + addiu a2, a2, 1 + + mul t2, t0, t8 + shra_r.ph t3, t2, 8 + andi t3, t3, 0x00ff + addq.ph t2, t2, t3 + shra_r.ph t2, t2, 8 + not t3, t2 + andi t3, t3, 0x00ff + + + mul t4, t1, t3 + shra_r.ph t5, t4, 8 + andi t5, t5, 0x00ff + addq.ph t4, t4, t5 + shra_r.ph t4, t4, 8 + andi t4, t4, 0x00ff + + addu_s.qb t2, t2, t4 + sb t2, 0(a0) + addiu a3, a3, -1 + bnez a3, 2b + addiu a0, a0, 1 + +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_composite_over_n_8_8_asm_mips) + LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips) /* * a0 - dst (a8r8g8b8) @@ -1209,6 +2010,218 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips) END(pixman_composite_over_8888_8888_asm_mips) +LEAF_MIPS_DSPR2(pixman_composite_over_8888_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8r8g8b8) + * a2 - w + */ + + SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5 + li t4, 0x00ff00ff + li s3, 0xf800f800 + li s4, 0x07e007e0 + li s5, 0x001F001F + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + lhu t3, 2(a0) /* t3 = destination (r5g6b5) */ + addiu a1, a1, 8 + + not t5, t0 + srl t5, t5, 24 + not t6, t1 + srl t6, t6, 24 + + or t7, t5, t6 + beqz t7, 11f + or t8, t0, t1 + beqz t8, 12f + + CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, s4, s5, t7, t8, t9, s2 + MIPS_2xUN8x4_MUL_2xUN8 s0, s1, t5, t6, t7, t8, t4, t9, t2, t3, s2, s0, s1 + + addu_s.qb t0, t7, t0 + addu_s.qb t1, t8, t1 +11: + CONVERT_2x8888_TO_2x0565 t0, t1, t7, t8, s3, s4, s5, t2, t3 + sh t7, 0(a0) + sh t8, 2(a0) +12: + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a2, 3f + nop + + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + addiu a1, a1, 4 + + not t2, t0 + srl t2, t2, 24 + + beqz t2, 21f + nop + beqz t0, 3f + + CONVERT_1x0565_TO_1x8888 t1, s0, t8, t9 + MIPS_UN8x4_MUL_UN8 s0, t2, t3, t4, t5, t6, t7 + + addu_s.qb t0, t3, t0 +21: + CONVERT_1x8888_TO_1x0565 t0, s0, t8, t9 + sh s0, 0(a0) + +3: + RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5 + j ra + nop + +END(pixman_composite_over_8888_0565_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (32bit constant) + * a2 - w + */ + + beqz a2, 5f + nop + + not t0, a1 + srl t0, t0, 24 + bgtz t0, 1f + nop + CONVERT_1x8888_TO_1x0565 a1, t1, t2, t3 +0: + sh t1, 0(a0) + addiu a2, a2, -1 + bgtz a2, 0b + addiu a0, a0, 2 + j ra + nop + +1: + SAVE_REGS_ON_STACK 0, s0, s1, s2 + li t4, 0x00ff00ff + li t5, 0xf800f800 + li t6, 0x07e007e0 + li t7, 0x001F001F + addiu t1, a2, -1 + beqz t1, 3f + nop +2: + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + lhu t2, 2(a0) /* t2 = destination (r5g6b5) */ + + CONVERT_2x0565_TO_2x8888 t1, t2, t3, t8, t6, t7, t9, s0, s1, s2 + MIPS_2xUN8x4_MUL_2xUN8 t3, t8, t0, t0, t1, t2, t4, t9, s0, s1, s2, t3, t8 + addu_s.qb t1, t1, a1 + addu_s.qb t2, t2, a1 + CONVERT_2x8888_TO_2x0565 t1, t2, t3, t8, t5, t6, t7, s0, s1 + + sh t3, 0(a0) + sh t8, 2(a0) + + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 2b + addiu a0, a0, 4 +3: + beqz a2, 4f + nop + + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + + CONVERT_1x0565_TO_1x8888 t1, t2, s0, s1 + MIPS_UN8x4_MUL_UN8 t2, t0, t1, t4, s0, s1, s2 + addu_s.qb t1, t1, a1 + CONVERT_1x8888_TO_1x0565 t1, t2, s0, s1 + + sh t2, 0(a0) + +4: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2 +5: + j ra + nop + +END(pixman_composite_over_n_0565_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (32bit constant) + * a2 - w + */ + + beqz a2, 5f + nop + + not t0, a1 + srl t0, t0, 24 + bgtz t0, 1f + nop +0: + sw a1, 0(a0) + addiu a2, a2, -1 + bgtz a2, 0b + addiu a0, a0, 4 + j ra + nop + +1: + SAVE_REGS_ON_STACK 0, s0, s1, s2 + li t4, 0x00ff00ff + addiu t1, a2, -1 + beqz t1, 3f + nop +2: + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t0, t7, t8, t4, t9, s0, s1, s2, t2, t3 + + addu_s.qb t7, t7, a1 + addu_s.qb t8, t8, a1 + + sw t7, 0(a0) + sw t8, 4(a0) + + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 2b + addiu a0, a0, 8 +3: + beqz a2, 4f + nop + + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + + MIPS_UN8x4_MUL_UN8 t1, t0, t3, t4, t5, t6, t7 + + addu_s.qb t3, t3, a1 + + sw t3, 0(a0) + +4: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2 +5: + j ra + nop + +END(pixman_composite_over_n_8888_asm_mips) + LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips) /* * a0 - dst (a8) @@ -1833,6 +2846,671 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips) END(pixman_composite_add_8888_8888_asm_mips) +LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8) + * a2 - w + */ + + beqz a2, 4f + nop + + SAVE_REGS_ON_STACK 0, s0, s1, s2, s3 + li t2, 0xf800f800 + li t3, 0x07e007e0 + li t4, 0x001F001F + li t5, 0x00ff00ff + + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lbu t0, 0(a1) /* t0 = source (a8) */ + lbu t1, 1(a1) /* t1 = source (a8) */ + lhu t6, 0(a0) /* t6 = destination (r5g6b5) */ + lhu t7, 2(a0) /* t7 = destination (r5g6b5) */ + addiu a1, a1, 2 + + not t0, t0 + not t1, t1 + andi t0, 0xff /* t0 = neg source1 */ + andi t1, 0xff /* t1 = neg source2 */ + CONVERT_2x0565_TO_2x8888 t6, t7, t8, t9, t3, t4, s0, s1, s2, s3 + MIPS_2xUN8x4_MUL_2xUN8 t8, t9, t0, t1, t6, t7, t5, s0, s1, s2, s3, t8, t9 + CONVERT_2x8888_TO_2x0565 t6, t7, t8, t9, t2, t3, t4, s0, s1 + + sh t8, 0(a0) + sh t9, 2(a0) + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a2, 3f + nop + lbu t0, 0(a1) /* t0 = source (a8) */ + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + + not t0, t0 + andi t0, 0xff /* t0 = neg source */ + CONVERT_1x0565_TO_1x8888 t1, t2, t3, t4 + MIPS_UN8x4_MUL_UN8 t2, t0, t1, t5, t3, t4, t6 + CONVERT_1x8888_TO_1x0565 t1, t2, t3, t4 + + sh t2, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 +4: + j ra + nop + +END(pixman_composite_out_reverse_8_0565_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8) + * a2 - w + */ + + beqz a2, 3f + nop + li t4, 0x00ff00ff + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + lbu t0, 0(a1) /* t0 = source (a8) */ + lbu t1, 1(a1) /* t1 = source (a8) */ + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + addiu a1, a1, 2 + not t0, t0 + not t1, t1 + andi t0, 0xff /* t0 = neg source */ + andi t1, 0xff /* t1 = neg source */ + + MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t5, t6, t4, t7, t8, t9, t2, t3, t0 + + sw t5, 0(a0) + sw t6, 4(a0) + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + lbu t0, 0(a1) /* t0 = source (a8) */ + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + not t0, t0 + andi t0, 0xff /* t0 = neg source */ + + MIPS_UN8x4_MUL_UN8 t1, t0, t2, t4, t3, t5, t6 + + sw t2, 0(a0) +3: + j ra + nop + +END(pixman_composite_out_reverse_8_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (32bit constant) + * a2 - w + */ + + beqz a2, 5f + nop + + SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 + li t0, 0x00ff00ff + srl t9, a2, 2 /* t9 = how many multiples of 4 src pixels */ + beqz t9, 2f /* branch if less than 4 src pixels */ + nop +1: + beqz t9, 2f + addiu t9, t9, -1 + + lw t1, 0(a0) + lw t2, 4(a0) + lw t3, 8(a0) + lw t4, 12(a0) + + addiu a2, a2, -4 + + not t5, t1 + not t6, t2 + not t7, t3 + not t8, t4 + srl t5, t5, 24 + srl t6, t6, 24 + srl t7, t7, 24 + srl t8, t8, 24 + replv.ph t5, t5 + replv.ph t6, t6 + replv.ph t7, t7 + replv.ph t8, t8 + muleu_s.ph.qbl s0, a1, t5 + muleu_s.ph.qbr s1, a1, t5 + muleu_s.ph.qbl s2, a1, t6 + muleu_s.ph.qbr s3, a1, t6 + muleu_s.ph.qbl s4, a1, t7 + muleu_s.ph.qbr s5, a1, t7 + muleu_s.ph.qbl s6, a1, t8 + muleu_s.ph.qbr s7, a1, t8 + + shra_r.ph t5, s0, 8 + shra_r.ph t6, s1, 8 + shra_r.ph t7, s2, 8 + shra_r.ph t8, s3, 8 + and t5, t5, t0 + and t6, t6, t0 + and t7, t7, t0 + and t8, t8, t0 + addq.ph s0, s0, t5 + addq.ph s1, s1, t6 + addq.ph s2, s2, t7 + addq.ph s3, s3, t8 + shra_r.ph s0, s0, 8 + shra_r.ph s1, s1, 8 + shra_r.ph s2, s2, 8 + shra_r.ph s3, s3, 8 + shra_r.ph t5, s4, 8 + shra_r.ph t6, s5, 8 + shra_r.ph t7, s6, 8 + shra_r.ph t8, s7, 8 + and t5, t5, t0 + and t6, t6, t0 + and t7, t7, t0 + and t8, t8, t0 + addq.ph s4, s4, t5 + addq.ph s5, s5, t6 + addq.ph s6, s6, t7 + addq.ph s7, s7, t8 + shra_r.ph s4, s4, 8 + shra_r.ph s5, s5, 8 + shra_r.ph s6, s6, 8 + shra_r.ph s7, s7, 8 + + precr.qb.ph t5, s0, s1 + precr.qb.ph t6, s2, s3 + precr.qb.ph t7, s4, s5 + precr.qb.ph t8, s6, s7 + addu_s.qb t5, t1, t5 + addu_s.qb t6, t2, t6 + addu_s.qb t7, t3, t7 + addu_s.qb t8, t4, t8 + + sw t5, 0(a0) + sw t6, 4(a0) + sw t7, 8(a0) + sw t8, 12(a0) + b 1b + addiu a0, a0, 16 + +2: + beqz a2, 4f + nop +3: + lw t1, 0(a0) + + not t2, t1 + srl t2, t2, 24 + replv.ph t2, t2 + + muleu_s.ph.qbl t4, a1, t2 + muleu_s.ph.qbr t5, a1, t2 + shra_r.ph t6, t4, 8 + shra_r.ph t7, t5, 8 + + and t6,t6,t0 + and t7,t7,t0 + + addq.ph t8, t4, t6 + addq.ph t9, t5, t7 + + shra_r.ph t8, t8, 8 + shra_r.ph t9, t9, 8 + + precr.qb.ph t9, t8, t9 + + addu_s.qb t9, t1, t9 + sw t9, 0(a0) + + addiu a2, a2, -1 + bnez a2, 3b + addiu a0, a0, 4 +4: + RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7 +5: + j ra + nop + +END(pixman_composite_over_reverse_n_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips) +/* + * a0 - dst (a8) + * a1 - src (32bit constant) + * a2 - w + */ + + li t9, 0x00ff00ff + beqz a2, 3f + nop + srl t7, a2, 2 /* t7 = how many multiples of 4 dst pixels */ + beqz t7, 1f /* branch if less than 4 src pixels */ + nop + + srl t8, a1, 24 + replv.ph t8, t8 + +0: + beqz t7, 1f + addiu t7, t7, -1 + lbu t0, 0(a0) + lbu t1, 1(a0) + lbu t2, 2(a0) + lbu t3, 3(a0) + + precr_sra.ph.w t1, t0, 0 + precr_sra.ph.w t3, t2, 0 + precr.qb.ph t0, t3, t1 + + muleu_s.ph.qbl t2, t0, t8 + muleu_s.ph.qbr t3, t0, t8 + shra_r.ph t4, t2, 8 + shra_r.ph t5, t3, 8 + and t4, t4, t9 + and t5, t5, t9 + addq.ph t2, t2, t4 + addq.ph t3, t3, t5 + shra_r.ph t2, t2, 8 + shra_r.ph t3, t3, 8 + precr.qb.ph t2, t2, t3 + + sb t2, 0(a0) + srl t2, t2, 8 + sb t2, 1(a0) + srl t2, t2, 8 + sb t2, 2(a0) + srl t2, t2, 8 + sb t2, 3(a0) + addiu a2, a2, -4 + b 0b + addiu a0, a0, 4 + +1: + beqz a2, 3f + nop + srl t8, a1, 24 +2: + lbu t0, 0(a0) + + mul t2, t0, t8 + shra_r.ph t3, t2, 8 + andi t3, t3, 0x00ff + addq.ph t2, t2, t3 + shra_r.ph t2, t2, 8 + + sb t2, 0(a0) + addiu a2, a2, -1 + bnez a2, 2b + addiu a0, a0, 1 + +3: + j ra + nop + +END(pixman_composite_in_n_8_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - w + * a3 - vx + * 16(sp) - unit_x + */ + + SAVE_REGS_ON_STACK 0, s0, s1, s2, s3 + lw t8, 16(sp) /* t8 = unit_x */ + li t6, 0x00ff00ff + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + sra t1, a3, 16 /* t0 = vx >> 16 */ + sll t1, t1, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t1, a1, t1 + lw t1, 0(t1) /* t1 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + + OVER_2x8888_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t9, s0, s1, s2, s3 + + sw t4, 0(a0) + sw t5, 4(a0) + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + OVER_8888_8888 t0, t1, t2, t6, t4, t5, t3, t7 + + sw t2, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 + j ra + nop + +END(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8r8g8b8) + * a2 - w + * a3 - vx + * 16(sp) - unit_x + */ + + SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, v0, v1 + lw t8, 40(sp) /* t8 = unit_x */ + li t4, 0x00ff00ff + li t5, 0xf800f800 + li t6, 0x07e007e0 + li t7, 0x001F001F + beqz a2, 3f + nop + addiu t1, a2, -1 + beqz t1, 2f + nop +1: + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + sra t1, a3, 16 /* t0 = vx >> 16 */ + sll t1, t1, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t1, a1, t1 + lw t1, 0(t1) /* t1 = source (a8r8g8b8) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + lhu t3, 2(a0) /* t3 = destination (r5g6b5) */ + + CONVERT_2x0565_TO_2x8888 t2, t3, v0, v1, t6, t7, s0, s1, s2, s3 + OVER_2x8888_2x8888 t0, t1, v0, v1, t2, t3, t4, t9, s0, s1, s2, s3, s4 + CONVERT_2x8888_TO_2x0565 t2, t3, v0, v1, t5, t6, t7, t9, s2 + + sh v0, 0(a0) + sh v1, 2(a0) + addiu a2, a2, -2 + addiu t1, a2, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a2, 3f + nop + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + lhu t1, 0(a0) /* t1 = destination (r5g6b5) */ + addu a3, a3, t8 /* a3 = vx + unit_x */ + + CONVERT_1x0565_TO_1x8888 t1, t2, t5, t6 + OVER_8888_8888 t0, t2, t1, t4, t3, t5, t6, t7 + CONVERT_1x8888_TO_1x0565 t1, t2, t5, t6 + + sh t2, 0(a0) +3: + RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, v0, v1 + j ra + nop + +END(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (r5g6b5) + * a2 - w + * a3 - vx + * 16(sp) - unit_x + */ + + SAVE_REGS_ON_STACK 0, v0 + beqz a2, 3f + nop + + lw v0, 16(sp) /* v0 = unit_x */ + addiu t1, a2, -1 + beqz t1, 2f + nop + + li t4, 0x07e007e0 + li t5, 0x001F001F +1: + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */ + addu t0, a1, t0 + lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */ + addu a3, a3, v0 /* a3 = vx + unit_x */ + sra t1, a3, 16 /* t1 = vx >> 16 */ + sll t1, t1, 1 /* t1 = t1 * 2 ((r5g6b5)) */ + addu t1, a1, t1 + lhu t1, 0(t1) /* t1 = source ((r5g6b5)) */ + addu a3, a3, v0 /* a3 = vx + unit_x */ + addiu a2, a2, -2 + + CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9 + + sw t2, 0(a0) + sw t3, 4(a0) + + addiu t2, a2, -1 + bgtz t2, 1b + addiu a0, a0, 8 +2: + beqz a2, 3f + nop + sra t0, a3, 16 /* t0 = vx >> 16 */ + sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */ + addu t0, a1, t0 + lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */ + + CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3 + + sw t1, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, v0 + j ra + nop + +END(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (a8r8g8b8) + * a2 - mask (a8) + * a3 - w + * 16(sp) - vx + * 20(sp) - unit_x + */ + beqz a3, 4f + nop + + SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5 + lw v0, 36(sp) /* v0 = vx */ + lw v1, 40(sp) /* v1 = unit_x */ + li t6, 0x00ff00ff + li t7, 0xf800f800 + li t8, 0x07e007e0 + li t9, 0x001F001F + + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + sra t0, v0, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + addu v0, v0, v1 /* v0 = vx + unit_x */ + sra t1, v0, 16 /* t1 = vx >> 16 */ + sll t1, t1, 2 /* t1 = t1 * 4 (a8r8g8b8) */ + addu t1, a1, t1 + lw t1, 0(t1) /* t1 = source (a8r8g8b8) */ + addu v0, v0, v1 /* v0 = vx + unit_x */ + lbu t2, 0(a2) /* t2 = mask (a8) */ + lbu t3, 1(a2) /* t3 = mask (a8) */ + lhu t4, 0(a0) /* t4 = destination (r5g6b5) */ + lhu t5, 2(a0) /* t5 = destination (r5g6b5) */ + addiu a2, a2, 2 + + CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5 + OVER_2x8888_2x8_2x8888 t0, t1, \ + t2, t3, \ + s0, s1, \ + t4, t5, \ + t6, s2, s3, s4, s5, t2, t3 + CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3 + + sh s0, 0(a0) + sh s1, 2(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a3, 3f + nop + sra t0, v0, 16 /* t0 = vx >> 16 */ + sll t0, t0, 2 /* t0 = t0 * 4 (a8r8g8b8) */ + addu t0, a1, t0 + lw t0, 0(t0) /* t0 = source (a8r8g8b8) */ + lbu t1, 0(a2) /* t1 = mask (a8) */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + + CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5 + OVER_8888_8_8888 t0, t1, t3, t2, t6, t4, t5, t7, t8 + CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5 + + sh t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5 +4: + j ra + nop + +END(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips) +/* + * a0 - dst (r5g6b5) + * a1 - src (r5g6b5) + * a2 - mask (a8) + * a3 - w + * 16(sp) - vx + * 20(sp) - unit_x + */ + + beqz a3, 4f + nop + SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5 + lw v0, 36(sp) /* v0 = vx */ + lw v1, 40(sp) /* v1 = unit_x */ + li t4, 0xf800f800 + li t5, 0x07e007e0 + li t6, 0x001F001F + li t7, 0x00ff00ff + + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + sra t0, v0, 16 /* t0 = vx >> 16 */ + sll t0, t0, 1 /* t0 = t0 * 2 (r5g6b5) */ + addu t0, a1, t0 + lhu t0, 0(t0) /* t0 = source (r5g6b5) */ + addu v0, v0, v1 /* v0 = vx + unit_x */ + sra t1, v0, 16 /* t1 = vx >> 16 */ + sll t1, t1, 1 /* t1 = t1 * 2 (r5g6b5) */ + addu t1, a1, t1 + lhu t1, 0(t1) /* t1 = source (r5g6b5) */ + addu v0, v0, v1 /* v0 = vx + unit_x */ + lbu t2, 0(a2) /* t2 = mask (a8) */ + lbu t3, 1(a2) /* t3 = mask (a8) */ + lhu t8, 0(a0) /* t8 = destination (r5g6b5) */ + lhu t9, 2(a0) /* t9 = destination (r5g6b5) */ + addiu a2, a2, 2 + + CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5 + CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1 + OVER_2x8888_2x8_2x8888 s0, s1, \ + t2, t3, \ + s2, s3, \ + t0, t1, \ + t7, t8, t9, s4, s5, s0, s1 + CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3 + + sh s0, 0(a0) + sh s1, 2(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 4 +2: + beqz a3, 3f + nop + sra t0, v0, 16 /* t0 = vx >> 16 */ + sll t0, t0, 1 /* t0 = t0 * 2 (r5g6b5) */ + addu t0, a1, t0 + + lhu t0, 0(t0) /* t0 = source (r5g6b5) */ + lbu t1, 0(a2) /* t1 = mask (a8) */ + lhu t2, 0(a0) /* t2 = destination (r5g6b5) */ + + CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5 + CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6 + OVER_8888_8_8888 t3, t1, t4, t0, t7, t2, t5, t6, t8 + CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5 + + sh t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5 +4: + j ra + nop + +END(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips) + LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips) /* * a0 - *dst diff --git a/lib/pixman/pixman/pixman-mips-dspr2-asm.h b/lib/pixman/pixman/pixman-mips-dspr2-asm.h index b330c0f0d..cab122d80 100644 --- a/lib/pixman/pixman/pixman-mips-dspr2-asm.h +++ b/lib/pixman/pixman/pixman-mips-dspr2-asm.h @@ -354,17 +354,16 @@ LEAF_MIPS32R2(symbol) \ out1_565, out2_565, \ maskR, maskG, maskB, \ scratch1, scratch2 - precrq.ph.w \scratch1, \in2_8888, \in1_8888 - precr_sra.ph.w \in2_8888, \in1_8888, 0 - shll.ph \scratch1, \scratch1, 8 - srl \in2_8888, \in2_8888, 3 - and \scratch2, \in2_8888, \maskB - and \scratch1, \scratch1, \maskR - srl \in2_8888, \in2_8888, 2 - and \out2_565, \in2_8888, \maskG - or \out2_565, \out2_565, \scratch2 - or \out1_565, \out2_565, \scratch1 - srl \out2_565, \out1_565, 16 + precr.qb.ph \scratch1, \in2_8888, \in1_8888 + precrq.qb.ph \in2_8888, \in2_8888, \in1_8888 + and \out1_565, \scratch1, \maskR + shrl.ph \scratch1, \scratch1, 3 + shll.ph \in2_8888, \in2_8888, 3 + and \scratch1, \scratch1, \maskB + or \out1_565, \out1_565, \scratch1 + and \in2_8888, \in2_8888, \maskG + or \out1_565, \out1_565, \in2_8888 + srl \out2_565, \out1_565, 16 .endm /* @@ -587,6 +586,36 @@ LEAF_MIPS32R2(symbol) \ addu_s.qb \out_8888, \out_8888, \s_8888 .endm +/* + * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two + * a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR + * needed for rounding process. maskLSR must have following value: + * li maskLSR, 0x00ff00ff + */ +.macro OVER_2x8888_2x8888 s1_8888, \ + s2_8888, \ + d1_8888, \ + d2_8888, \ + out1_8888, \ + out2_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, \ + scratch4, scratch5, scratch6 + not \scratch1, \s1_8888 + srl \scratch1, \scratch1, 24 + not \scratch2, \s2_8888 + srl \scratch2, \scratch2, 24 + MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \ + \scratch1, \scratch2, \ + \out1_8888, \out2_8888, \ + \maskLSR, \ + \scratch3, \scratch4, \scratch5, \ + \scratch6, \d1_8888, \d2_8888 + + addu_s.qb \out1_8888, \out1_8888, \s1_8888 + addu_s.qb \out2_8888, \out2_8888, \s2_8888 +.endm + .macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \ m_8, \ d_8888, \ diff --git a/lib/pixman/pixman/pixman-mips-dspr2.c b/lib/pixman/pixman/pixman-mips-dspr2.c index 9da636d5a..e10c9df0a 100644 --- a/lib/pixman/pixman/pixman-mips-dspr2.c +++ b/lib/pixman/pixman/pixman-mips-dspr2.c @@ -48,17 +48,39 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888, uint32_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888, uint8_t, 3, uint8_t, 3) +#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_8888_rev, + uint8_t, 3, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_0565_rev, + uint8_t, 3, uint16_t, 1) +#endif +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_pixbuf_8888, + uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_rpixbuf_8888, + uint32_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_8888, uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_0565, + uint32_t, 1, uint16_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8_8, uint8_t, 1, uint8_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8888_8888, uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_0565, + uint8_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_8888, + uint8_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8888, + uint8_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8, + uint8_t, 1, uint8_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca, uint32_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca, uint32_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8, + uint8_t, 1, uint8_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888, uint8_t, 1, uint32_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565, @@ -77,6 +99,15 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_0565_n_0565, PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, add_8888_n_8888, uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_0565, + uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_8888, + uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_reverse_n_8888, + uint32_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_N_DST (0, in_n_8, + uint8_t, 1) + PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1) PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8_8888, uint32_t, 1, @@ -94,6 +125,13 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_0565_8_0565, uint16_t, 1, PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_8888, OVER, + uint32_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_0565, OVER, + uint32_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (0565_8888, SRC, + uint16_t, uint32_t) + PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC, uint32_t, uint32_t) PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC, @@ -107,6 +145,11 @@ PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, OVER, PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, ADD, uint32_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_0565, + OVER, uint32_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, 0565_8_0565, + OVER, uint16_t, uint16_t) + PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_8888, SRC, uint32_t, uint32_t) PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_0565, SRC, @@ -256,6 +299,19 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888), PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888), PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, mips_composite_src_0888_0888), +#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, mips_composite_src_0888_8888_rev), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, mips_composite_src_0888_0565_rev), +#endif + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, mips_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, mips_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, mips_composite_src_rpixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, mips_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, mips_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, mips_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, mips_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, mips_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8, mips_composite_src_n_8_8), PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, mips_composite_over_n_8888_8888_ca), PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, mips_composite_over_n_8888_8888_ca), @@ -263,13 +319,16 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mips_composite_over_n_8888_8888_ca), PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, mips_composite_over_n_8888_0565_ca), PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, mips_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, mips_composite_over_n_8_8), PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, mips_composite_over_n_8_8888), PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, mips_composite_over_n_8_8888), PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, mips_composite_over_n_8_8888), PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, mips_composite_over_n_8_8888), PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mips_composite_over_n_8_0565), PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mips_composite_over_n_8_0565), - + PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, mips_composite_over_n_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, mips_composite_over_n_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, mips_composite_over_n_8888), PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, mips_composite_over_8888_n_8888), PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, mips_composite_over_8888_n_8888), PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, mips_composite_over_8888_n_0565), @@ -289,6 +348,8 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, mips_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, mips_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, mips_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, mips_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, mips_composite_over_8888_0565), PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, mips_composite_add_n_8_8), PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, mips_composite_add_n_8_8888), PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, mips_composite_add_n_8_8888), @@ -303,6 +364,35 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, mips_composite_add_8_8), PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, mips_composite_add_8888_8888), PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, mips_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, mips_composite_out_reverse_8_0565), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, mips_composite_out_reverse_8_0565), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8r8g8b8, mips_composite_out_reverse_8_8888), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8b8g8r8, mips_composite_out_reverse_8_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, mips_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, mips_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (IN, solid, null, a8, mips_composite_in_n_8), + + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mips_8888_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mips_8888_8888), + + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_0565), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_0565), + + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, mips_0565_8888), + PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888), + /* Note: NONE repeat is not supported yet */ + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, mips_0565_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, mips_0565_8888), + + PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565), + PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565), + + PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, mips_0565_8_0565), + PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, mips_0565_8_0565), SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888), SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888), diff --git a/lib/pixman/pixman/pixman-mips-dspr2.h b/lib/pixman/pixman/pixman-mips-dspr2.h index bddcfd827..955ed70b8 100644 --- a/lib/pixman/pixman/pixman-mips-dspr2.h +++ b/lib/pixman/pixman/pixman-mips-dspr2.h @@ -85,6 +85,42 @@ mips_composite_##name (pixman_implementation_t *imp, \ } \ } +/****************************************************************/ + +#define PIXMAN_MIPS_BIND_FAST_PATH_N_DST(flags, name, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_mips (dst_type *dst, \ + uint32_t src, \ + int32_t w); \ + \ +static void \ +mips_composite_##name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line, *dst; \ + int32_t dst_stride; \ + uint32_t src; \ + \ + src = _pixman_image_get_solid ( \ + imp, src_image, dest_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_SRC) && src == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + \ + while (height--) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + \ + pixman_composite_##name##_asm_mips (dst, src, width); \ + } \ +} + /*******************************************************************/ #define PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST(flags, name, \ @@ -212,6 +248,94 @@ mips_composite_##name (pixman_implementation_t *imp, \ /****************************************************************************/ +#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST(name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_mips ( \ + dst_type * dst, \ + const src_type * src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x); \ + \ +static force_inline void \ +scaled_nearest_scanline_mips_##name##_##op (dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, w, \ + vx, unit_x); \ +} \ + \ +FAST_NEAREST_MAINLOOP (mips_##name##_cover_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, dst_type, COVER) \ +FAST_NEAREST_MAINLOOP (mips_##name##_none_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, dst_type, NONE) \ +FAST_NEAREST_MAINLOOP (mips_##name##_pad_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, dst_type, PAD) + +/* Provide entries for the fast path table */ +#define PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func) + + +/*****************************************************************************/ + +#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST(flags, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_mips ( \ + dst_type * dst, \ + const src_type * src, \ + const uint8_t * mask, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x); \ + \ +static force_inline void \ +scaled_nearest_scanline_mips_##name##_##op (const uint8_t * mask, \ + dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, \ + mask, w, \ + vx, unit_x); \ +} \ + \ +FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_cover_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\ +FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_none_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \ +FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_pad_##op, \ + scaled_nearest_scanline_mips_##name##_##op, \ + src_type, uint8_t, dst_type, PAD, TRUE, FALSE) + +/* Provide entries for the fast path table */ +#define PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) + +/****************************************************************************/ + #define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op, \ src_type, dst_type) \ void \ diff --git a/lib/pixman/pixman/pixman-mmx.c b/lib/pixman/pixman/pixman-mmx.c index 1e6dbe8aa..14790c029 100644 --- a/lib/pixman/pixman/pixman-mmx.c +++ b/lib/pixman/pixman/pixman-mmx.c @@ -44,8 +44,6 @@ #include "pixman-combine32.h" #include "pixman-inlines.h" -#define no_vERBOSE - #ifdef VERBOSE #define CHECKPOINT() error_f ("at %s %d\n", __FUNCTION__, __LINE__) #else @@ -62,7 +60,7 @@ _mm_empty (void) #endif #ifdef USE_X86_MMX -# if (defined(__SUNPRO_C) || defined(_MSC_VER)) +# if (defined(__SUNPRO_C) || defined(_MSC_VER) || defined(_WIN64)) # include <xmmintrin.h> # else /* We have to compile with -msse to use xmmintrin.h, but that causes SSE @@ -1402,7 +1400,7 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp, CHECKPOINT (); - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { store8888 (dst, over (vsrc, vsrca, load8888 (dst))); @@ -1468,7 +1466,7 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp, CHECKPOINT (); - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { uint64_t d = *dst; __m64 vdest = expand565 (to_m64 (d), 0); @@ -1546,7 +1544,7 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, uint32_t *p = (uint32_t *)mask_line; uint32_t *q = (uint32_t *)dst_line; - while (twidth && (unsigned long)q & 7) + while (twidth && (uintptr_t)q & 7) { uint32_t m = *(uint32_t *)p; @@ -1637,7 +1635,7 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { __m64 s = load8888 (src); __m64 d = load8888 (dst); @@ -1707,7 +1705,7 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { uint32_t ssrc = *src | 0xff000000; __m64 s = load8888 (&ssrc); @@ -1881,7 +1879,7 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp, CHECKPOINT (); - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { __m64 vsrc = load8888 (src); uint64_t d = *dst; @@ -1984,7 +1982,7 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp, CHECKPOINT (); - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { uint64_t m = *mask; @@ -2064,7 +2062,7 @@ mmx_fill (pixman_implementation_t *imp, int y, int width, int height, - uint32_t xor) + uint32_t filler) { uint64_t fill; __m64 vfill; @@ -2084,7 +2082,7 @@ mmx_fill (pixman_implementation_t *imp, byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x); byte_width = width; stride *= 1; - xor = (xor & 0xff) * 0x01010101; + filler = (filler & 0xff) * 0x01010101; } else if (bpp == 16) { @@ -2092,7 +2090,7 @@ mmx_fill (pixman_implementation_t *imp, byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); byte_width = 2 * width; stride *= 2; - xor = (xor & 0xffff) * 0x00010001; + filler = (filler & 0xffff) * 0x00010001; } else { @@ -2102,7 +2100,7 @@ mmx_fill (pixman_implementation_t *imp, stride *= 4; } - fill = ((uint64_t)xor << 32) | xor; + fill = ((uint64_t)filler << 32) | filler; vfill = to_m64 (fill); #if defined __GNUC__ && defined USE_X86_MMX @@ -2127,23 +2125,23 @@ mmx_fill (pixman_implementation_t *imp, byte_line += stride; w = byte_width; - if (w >= 1 && ((unsigned long)d & 1)) + if (w >= 1 && ((uintptr_t)d & 1)) { - *(uint8_t *)d = (xor & 0xff); + *(uint8_t *)d = (filler & 0xff); w--; d++; } - if (w >= 2 && ((unsigned long)d & 3)) + if (w >= 2 && ((uintptr_t)d & 3)) { - *(uint16_t *)d = xor; + *(uint16_t *)d = filler; w -= 2; d += 2; } - while (w >= 4 && ((unsigned long)d & 7)) + while (w >= 4 && ((uintptr_t)d & 7)) { - *(uint32_t *)d = xor; + *(uint32_t *)d = filler; w -= 4; d += 4; @@ -2182,20 +2180,20 @@ mmx_fill (pixman_implementation_t *imp, while (w >= 4) { - *(uint32_t *)d = xor; + *(uint32_t *)d = filler; w -= 4; d += 4; } if (w >= 2) { - *(uint16_t *)d = xor; + *(uint16_t *)d = filler; w -= 2; d += 2; } if (w >= 1) { - *(uint8_t *)d = (xor & 0xff); + *(uint8_t *)d = (filler & 0xff); w--; d++; } @@ -2227,10 +2225,10 @@ mmx_composite_src_x888_0565 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { s = *src++; - *dst = CONVERT_8888_TO_0565 (s); + *dst = convert_8888_to_0565 (s); dst++; w--; } @@ -2253,7 +2251,7 @@ mmx_composite_src_x888_0565 (pixman_implementation_t *imp, while (w) { s = *src++; - *dst = CONVERT_8888_TO_0565 (s); + *dst = convert_8888_to_0565 (s); dst++; w--; } @@ -2305,7 +2303,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp, CHECKPOINT (); - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { uint64_t m = *mask; @@ -2419,7 +2417,7 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp, CHECKPOINT (); - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { uint64_t m = *mask; @@ -2536,7 +2534,7 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp, CHECKPOINT (); - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { __m64 vsrc = load8888 (src); uint64_t d = *dst; @@ -2651,7 +2649,7 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { __m64 s = load8888 (src); __m64 d = load8888 (dst); @@ -2739,7 +2737,7 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, uint32_t *p = (uint32_t *)mask_line; uint16_t *q = (uint16_t *)dst_line; - while (twidth && ((unsigned long)q & 7)) + while (twidth && ((uintptr_t)q & 7)) { uint32_t m = *(uint32_t *)p; @@ -2840,7 +2838,7 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp, mask_line += mask_stride; w = width; - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { uint16_t tmp; uint8_t a; @@ -2911,7 +2909,7 @@ mmx_composite_in_8_8 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 3) + while (w && (uintptr_t)dst & 3) { uint8_t s, d; uint16_t tmp; @@ -2990,7 +2988,7 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp, mask_line += mask_stride; w = width; - while (w && (unsigned long)dst & 3) + while (w && (uintptr_t)dst & 3) { uint16_t tmp; uint16_t a; @@ -3067,7 +3065,7 @@ mmx_composite_add_8_8 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { s = *src; d = *dst; @@ -3130,19 +3128,19 @@ mmx_composite_add_0565_0565 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { s = *src++; if (s) { d = *dst; - s = CONVERT_0565_TO_8888 (s); + s = convert_0565_to_8888 (s); if (d) { - d = CONVERT_0565_TO_8888 (d); + d = convert_0565_to_8888 (d); UN8x4_ADD_UN8x4 (s, d); } - *dst = CONVERT_8888_TO_0565 (s); + *dst = convert_8888_to_0565 (s); } dst++; w--; @@ -3174,13 +3172,13 @@ mmx_composite_add_0565_0565 (pixman_implementation_t *imp, if (s) { d = *dst; - s = CONVERT_0565_TO_8888 (s); + s = convert_0565_to_8888 (s); if (d) { - d = CONVERT_0565_TO_8888 (d); + d = convert_0565_to_8888 (d); UN8x4_ADD_UN8x4 (s, d); } - *dst = CONVERT_8888_TO_0565 (s); + *dst = convert_8888_to_0565 (s); } dst++; } @@ -3212,7 +3210,7 @@ mmx_composite_add_8888_8888 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { store (dst, _mm_adds_pu8 (load ((const uint32_t *)src), load ((const uint32_t *)dst))); @@ -3296,7 +3294,7 @@ mmx_blt (pixman_implementation_t *imp, dst_bytes += dst_stride; w = byte_width; - if (w >= 1 && ((unsigned long)d & 1)) + if (w >= 1 && ((uintptr_t)d & 1)) { *(uint8_t *)d = *(uint8_t *)s; w -= 1; @@ -3304,7 +3302,7 @@ mmx_blt (pixman_implementation_t *imp, d += 1; } - if (w >= 2 && ((unsigned long)d & 3)) + if (w >= 2 && ((uintptr_t)d & 3)) { *(uint16_t *)d = *(uint16_t *)s; w -= 2; @@ -3312,7 +3310,7 @@ mmx_blt (pixman_implementation_t *imp, d += 2; } - while (w >= 4 && ((unsigned long)d & 7)) + while (w >= 4 && ((uintptr_t)d & 7)) { *(uint32_t *)d = ldl_u ((uint32_t *)s); @@ -3495,7 +3493,7 @@ mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp, CHECKPOINT (); - while (w && (unsigned long)dst & 7) + while (w && (uintptr_t)dst & 7) { __m64 vdest = load8888 (dst); @@ -3778,7 +3776,7 @@ mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) iter->bits += iter->stride; - while (w && ((unsigned long)dst) & 7) + while (w && ((uintptr_t)dst) & 7) { *dst++ = (*src++) | 0xff000000; w--; @@ -3820,11 +3818,11 @@ mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) iter->bits += iter->stride; - while (w && ((unsigned long)dst) & 0x0f) + while (w && ((uintptr_t)dst) & 0x0f) { uint16_t s = *src++; - *dst++ = CONVERT_0565_TO_8888 (s); + *dst++ = convert_0565_to_8888 (s); w--; } @@ -3847,7 +3845,7 @@ mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) { uint16_t s = *src++; - *dst++ = CONVERT_0565_TO_8888 (s); + *dst++ = convert_0565_to_8888 (s); w--; } @@ -3864,7 +3862,7 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) iter->bits += iter->stride; - while (w && (((unsigned long)dst) & 15)) + while (w && (((uintptr_t)dst) & 15)) { *dst++ = *(src++) << 24; w--; diff --git a/lib/pixman/pixman/pixman-noop.c b/lib/pixman/pixman/pixman-noop.c index 850caa192..e39996d9d 100644 --- a/lib/pixman/pixman/pixman-noop.c +++ b/lib/pixman/pixman/pixman-noop.c @@ -77,25 +77,33 @@ noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) iter->get_scanline = _pixman_iter_get_scanline_noop; } else if (image->common.extended_format_code == PIXMAN_solid && - ((iter->image_flags & (FAST_PATH_BITS_IMAGE | FAST_PATH_NO_ALPHA_MAP)) == - (FAST_PATH_BITS_IMAGE | FAST_PATH_NO_ALPHA_MAP))) + (iter->image->type == SOLID || + (iter->image_flags & FAST_PATH_NO_ALPHA_MAP))) { - bits_image_t *bits = &image->bits; - if (iter->iter_flags & ITER_NARROW) { - uint32_t color = bits->fetch_pixel_32 (bits, 0, 0); uint32_t *buffer = iter->buffer; uint32_t *end = buffer + iter->width; + uint32_t color; + + if (image->type == SOLID) + color = image->solid.color_32; + else + color = image->bits.fetch_pixel_32 (&image->bits, 0, 0); while (buffer < end) *(buffer++) = color; } else { - argb_t color = bits->fetch_pixel_float (bits, 0, 0); argb_t *buffer = (argb_t *)iter->buffer; argb_t *end = buffer + iter->width; + argb_t color; + + if (image->type == SOLID) + color = image->solid.color_float; + else + color = image->bits.fetch_pixel_float (&image->bits, 0, 0); while (buffer < end) *(buffer++) = color; diff --git a/lib/pixman/pixman/pixman-ppc.c b/lib/pixman/pixman/pixman-ppc.c index 601f1df12..a6e7bb0cf 100644 --- a/lib/pixman/pixman/pixman-ppc.c +++ b/lib/pixman/pixman/pixman-ppc.c @@ -37,10 +37,10 @@ static pixman_bool_t pixman_have_vmx (void) { - int error, have_mmx; + int error, have_vmx; size_t length = sizeof(have_vmx); - sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0); + error = sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0); if (error) return FALSE; @@ -56,8 +56,8 @@ pixman_have_vmx (void) static pixman_bool_t pixman_have_vmx (void) { - int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC }; int error, have_vmx; + int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC }; size_t length = sizeof(have_vmx); error = sysctl (mib, 2, &have_vmx, &length, NULL, 0); diff --git a/lib/pixman/pixman/pixman-private.h b/lib/pixman/pixman/pixman-private.h index c0a6bc0a5..6d9c05321 100644 --- a/lib/pixman/pixman/pixman-private.h +++ b/lib/pixman/pixman/pixman-private.h @@ -1,3 +1,5 @@ +#include <float.h> + #ifndef PIXMAN_PRIVATE_H #define PIXMAN_PRIVATE_H @@ -263,9 +265,6 @@ void _pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter); void -_pixman_solid_fill_iter_init (pixman_image_t *image, pixman_iter_t *iter); - -void _pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter); void @@ -320,13 +319,12 @@ _pixman_image_validate (pixman_image_t *image); */ typedef struct { - uint32_t left_ag; - uint32_t left_rb; - uint32_t right_ag; - uint32_t right_rb; + float a_s, a_b; + float r_s, r_b; + float g_s, g_b; + float b_s, b_b; pixman_fixed_t left_x; pixman_fixed_t right_x; - pixman_fixed_t stepper; pixman_gradient_stop_t *stops; int num_stops; @@ -455,7 +453,7 @@ typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp, int y, int width, int height, - uint32_t xor); + uint32_t filler); typedef pixman_bool_t (*pixman_iter_init_func_t) (pixman_implementation_t *imp, pixman_iter_t *iter); @@ -500,7 +498,7 @@ pixman_implementation_t * _pixman_implementation_create (pixman_implementation_t *fallback, const pixman_fast_path_t *fast_paths); -pixman_bool_t +void _pixman_implementation_lookup_composite (pixman_implementation_t *toplevel, pixman_op_t op, pixman_format_code_t src_format, @@ -542,7 +540,7 @@ _pixman_implementation_fill (pixman_implementation_t *imp, int y, int width, int height, - uint32_t xor); + uint32_t filler); pixman_bool_t _pixman_implementation_src_iter_init (pixman_implementation_t *imp, @@ -687,6 +685,7 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask); #define FAST_PATH_SAMPLES_COVER_CLIP_NEAREST (1 << 23) #define FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR (1 << 24) #define FAST_PATH_BITS_IMAGE (1 << 25) +#define FAST_PATH_SEPARABLE_CONVOLUTION_FILTER (1 << 26) #define FAST_PATH_PAD_REPEAT \ (FAST_PATH_NO_NONE_REPEAT | \ @@ -881,24 +880,55 @@ pixman_list_move_to_front (pixman_list_t *list, pixman_link_t *link) #define CLIP(v, low, high) ((v) < (low) ? (low) : ((v) > (high) ? (high) : (v))) +#define FLOAT_IS_ZERO(f) (-FLT_MIN < (f) && (f) < FLT_MIN) + /* Conversion between 8888 and 0565 */ -#define CONVERT_8888_TO_0565(s) \ - ((((s) >> 3) & 0x001f) | \ - (((s) >> 5) & 0x07e0) | \ - (((s) >> 8) & 0xf800)) +static force_inline uint16_t +convert_8888_to_0565 (uint32_t s) +{ + /* The following code can be compiled into just 4 instructions on ARM */ + uint32_t a, b; + a = (s >> 3) & 0x1F001F; + b = s & 0xFC00; + a |= a >> 5; + a |= b >> 5; + return (uint16_t)a; +} -#define CONVERT_0565_TO_0888(s) \ - (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) | \ - ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) | \ - ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000))) +static force_inline uint32_t +convert_0565_to_0888 (uint16_t s) +{ + return (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) | + ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) | + ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000))); +} -#define CONVERT_0565_TO_8888(s) (CONVERT_0565_TO_0888(s) | 0xff000000) +static force_inline uint32_t +convert_0565_to_8888 (uint16_t s) +{ + return convert_0565_to_0888 (s) | 0xff000000; +} /* Trivial versions that are useful in macros */ -#define CONVERT_8888_TO_8888(s) (s) -#define CONVERT_x888_TO_8888(s) ((s) | 0xff000000) -#define CONVERT_0565_TO_0565(s) (s) + +static force_inline uint32_t +convert_8888_to_8888 (uint32_t s) +{ + return s; +} + +static force_inline uint32_t +convert_x888_to_8888 (uint32_t s) +{ + return s | 0xff000000; +} + +static force_inline uint16_t +convert_0565_to_0565 (uint16_t s) +{ + return s; +} #define PIXMAN_FORMAT_IS_WIDE(f) \ (PIXMAN_FORMAT_A (f) > 8 || \ @@ -987,15 +1017,13 @@ float pixman_unorm_to_float (uint16_t u, int n_bits); #endif -#ifdef DEBUG - void _pixman_log_error (const char *function, const char *message); #define return_if_fail(expr) \ do \ { \ - if (!(expr)) \ + if (unlikely (!(expr))) \ { \ _pixman_log_error (FUNC, "The expression " # expr " was false"); \ return; \ @@ -1006,7 +1034,7 @@ _pixman_log_error (const char *function, const char *message); #define return_val_if_fail(expr, retval) \ do \ { \ - if (!(expr)) \ + if (unlikely (!(expr))) \ { \ _pixman_log_error (FUNC, "The expression " # expr " was false"); \ return (retval); \ @@ -1017,38 +1045,31 @@ _pixman_log_error (const char *function, const char *message); #define critical_if_fail(expr) \ do \ { \ - if (!(expr)) \ + if (unlikely (!(expr))) \ _pixman_log_error (FUNC, "The expression " # expr " was false"); \ } \ while (0) +/* + * Matrix + */ -#else - -#define _pixman_log_error(f,m) do { } while (0) \ +typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t; -#define return_if_fail(expr) \ - do \ - { \ - if (!(expr)) \ - return; \ - } \ - while (0) +pixman_bool_t +pixman_transform_point_31_16 (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); -#define return_val_if_fail(expr, retval) \ - do \ - { \ - if (!(expr)) \ - return (retval); \ - } \ - while (0) +void +pixman_transform_point_31_16_3d (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); -#define critical_if_fail(expr) \ - do \ - { \ - } \ - while (0) -#endif +void +pixman_transform_point_31_16_affine (const pixman_transform_t *t, + const pixman_vector_48_16_t *v, + pixman_vector_48_16_t *result); /* * Timers diff --git a/lib/pixman/pixman/pixman-radial-gradient.c b/lib/pixman/pixman/pixman-radial-gradient.c index 8d562468d..6a217963d 100644 --- a/lib/pixman/pixman/pixman-radial-gradient.c +++ b/lib/pixman/pixman/pixman-radial-gradient.c @@ -109,7 +109,7 @@ radial_compute_color (double a, } else { - if (t * dr > mindr) + if (t * dr >= mindr) return _pixman_gradient_walker_pixel (walker, t); } @@ -145,9 +145,9 @@ radial_compute_color (double a, } else { - if (t0 * dr > mindr) + if (t0 * dr >= mindr) return _pixman_gradient_walker_pixel (walker, t0); - else if (t1 * dr > mindr) + else if (t1 * dr >= mindr) return _pixman_gradient_walker_pixel (walker, t1); } } diff --git a/lib/pixman/pixman/pixman-region.c b/lib/pixman/pixman/pixman-region.c index 9d2a60b64..59bc9c797 100644 --- a/lib/pixman/pixman/pixman-region.c +++ b/lib/pixman/pixman/pixman-region.c @@ -42,7 +42,7 @@ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS * SOFTWARE. * - * Copyright © 1998 Keith Packard + * Copyright © 1998 Keith Packard * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that @@ -202,7 +202,7 @@ PIXREGION_SZOF (size_t n) return size + sizeof(region_data_type_t); } -static void * +static region_data_type_t * alloc_data (size_t n) { size_t sz = PIXREGION_SZOF (n); @@ -1858,7 +1858,7 @@ pixman_region_subtract_o (region_type_t * region, else if (r2->x1 <= x1) { /* - * Subtrahend preceeds minuend: nuke left edge of minuend. + * Subtrahend precedes minuend: nuke left edge of minuend. */ x1 = r2->x2; if (x1 >= r1->x2) @@ -1982,7 +1982,7 @@ PREFIX (_subtract) (region_type_t *reg_d, } /* Add those rectangles in region 1 that aren't in region 2, - do yucky substraction for overlaps, and + do yucky subtraction for overlaps, and just throw away rectangles in region 2 that aren't in region 1 */ if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE)) return FALSE; @@ -2042,7 +2042,7 @@ PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */ } /* Add those rectangles in region 1 that aren't in region 2, - * do yucky substraction for overlaps, and + * do yucky subtraction for overlaps, and * just throw away rectangles in region 2 that aren't in region 1 */ inv_reg.extents = *inv_rect; diff --git a/lib/pixman/pixman/pixman-solid-fill.c b/lib/pixman/pixman/pixman-solid-fill.c index 60d56d52a..5f9fef630 100644 --- a/lib/pixman/pixman/pixman-solid-fill.c +++ b/lib/pixman/pixman/pixman-solid-fill.c @@ -26,31 +26,6 @@ #endif #include "pixman-private.h" -void -_pixman_solid_fill_iter_init (pixman_image_t *image, pixman_iter_t *iter) -{ - if (iter->iter_flags & ITER_NARROW) - { - uint32_t *b = (uint32_t *)iter->buffer; - uint32_t *e = b + iter->width; - uint32_t color = iter->image->solid.color_32; - - while (b < e) - *(b++) = color; - } - else - { - argb_t *b = (argb_t *)iter->buffer; - argb_t *e = b + iter->width; - argb_t color = image->solid.color_float; - - while (b < e) - *(b++) = color; - } - - iter->get_scanline = _pixman_iter_get_scanline_noop; -} - static uint32_t color_to_uint32 (const pixman_color_t *color) { diff --git a/lib/pixman/pixman/pixman-sse2.c b/lib/pixman/pixman/pixman-sse2.c index 27cf60e16..8a82eda7e 100644 --- a/lib/pixman/pixman/pixman-sse2.c +++ b/lib/pixman/pixman/pixman-sse2.c @@ -576,7 +576,7 @@ core_combine_over_u_sse2_mask (uint32_t * pd, uint32_t s, d; /* Align dst on a 16-byte boundary */ - while (w && ((unsigned long)pd & 15)) + while (w && ((uintptr_t)pd & 15)) { d = *pd; s = combine1 (ps, pm); @@ -661,7 +661,7 @@ core_combine_over_u_sse2_no_mask (uint32_t * pd, uint32_t s, d; /* Align dst on a 16-byte boundary */ - while (w && ((unsigned long)pd & 15)) + while (w && ((uintptr_t)pd & 15)) { d = *pd; s = *ps; @@ -753,7 +753,7 @@ sse2_combine_over_reverse_u (pixman_implementation_t *imp, /* Align dst on a 16-byte boundary */ while (w && - ((unsigned long)pd & 15)) + ((uintptr_t)pd & 15)) { d = *pd; s = combine1 (ps, pm); @@ -840,7 +840,7 @@ sse2_combine_in_u (pixman_implementation_t *imp, __m128i xmm_src_lo, xmm_src_hi; __m128i xmm_dst_lo, xmm_dst_hi; - while (w && ((unsigned long) pd & 15)) + while (w && ((uintptr_t)pd & 15)) { s = combine1 (ps, pm); d = *pd; @@ -901,7 +901,7 @@ sse2_combine_in_reverse_u (pixman_implementation_t *imp, __m128i xmm_src_lo, xmm_src_hi; __m128i xmm_dst_lo, xmm_dst_hi; - while (w && ((unsigned long) pd & 15)) + while (w && ((uintptr_t)pd & 15)) { s = combine1 (ps, pm); d = *pd; @@ -957,7 +957,7 @@ sse2_combine_out_reverse_u (pixman_implementation_t *imp, const uint32_t * pm, int w) { - while (w && ((unsigned long) pd & 15)) + while (w && ((uintptr_t)pd & 15)) { uint32_t s = combine1 (ps, pm); uint32_t d = *pd; @@ -1026,7 +1026,7 @@ sse2_combine_out_u (pixman_implementation_t *imp, const uint32_t * pm, int w) { - while (w && ((unsigned long) pd & 15)) + while (w && ((uintptr_t)pd & 15)) { uint32_t s = combine1 (ps, pm); uint32_t d = *pd; @@ -1113,7 +1113,7 @@ sse2_combine_atop_u (pixman_implementation_t *imp, __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; - while (w && ((unsigned long) pd & 15)) + while (w && ((uintptr_t)pd & 15)) { s = combine1 (ps, pm); d = *pd; @@ -1197,7 +1197,7 @@ sse2_combine_atop_reverse_u (pixman_implementation_t *imp, __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; - while (w && ((unsigned long) pd & 15)) + while (w && ((uintptr_t)pd & 15)) { s = combine1 (ps, pm); d = *pd; @@ -1285,7 +1285,7 @@ sse2_combine_xor_u (pixman_implementation_t *imp, __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; - while (w && ((unsigned long) pd & 15)) + while (w && ((uintptr_t)pd & 15)) { s = combine1 (ps, pm); d = *pd; @@ -1357,7 +1357,7 @@ sse2_combine_add_u (pixman_implementation_t *imp, const uint32_t* ps = src; const uint32_t* pm = mask; - while (w && (unsigned long)pd & 15) + while (w && (uintptr_t)pd & 15) { s = combine1 (ps, pm); d = *pd; @@ -1430,7 +1430,7 @@ sse2_combine_saturate_u (pixman_implementation_t *imp, uint32_t pack_cmp; __m128i xmm_src, xmm_dst; - while (w && (unsigned long)pd & 15) + while (w && (uintptr_t)pd & 15) { s = combine1 (ps, pm); d = *pd; @@ -1518,7 +1518,7 @@ sse2_combine_src_ca (pixman_implementation_t *imp, __m128i xmm_mask_lo, xmm_mask_hi; __m128i xmm_dst_lo, xmm_dst_hi; - while (w && (unsigned long)pd & 15) + while (w && (uintptr_t)pd & 15) { s = *ps++; m = *pm++; @@ -1586,7 +1586,7 @@ sse2_combine_over_ca (pixman_implementation_t *imp, __m128i xmm_dst_lo, xmm_dst_hi; __m128i xmm_mask_lo, xmm_mask_hi; - while (w && (unsigned long)pd & 15) + while (w && (uintptr_t)pd & 15) { s = *ps++; m = *pm++; @@ -1662,7 +1662,7 @@ sse2_combine_over_reverse_ca (pixman_implementation_t *imp, __m128i xmm_dst_lo, xmm_dst_hi; __m128i xmm_mask_lo, xmm_mask_hi; - while (w && (unsigned long)pd & 15) + while (w && (uintptr_t)pd & 15) { s = *ps++; m = *pm++; @@ -1727,7 +1727,7 @@ sse2_combine_in_ca (pixman_implementation_t *imp, __m128i xmm_dst_lo, xmm_dst_hi; __m128i xmm_mask_lo, xmm_mask_hi; - while (w && (unsigned long)pd & 15) + while (w && (uintptr_t)pd & 15) { s = *ps++; m = *pm++; @@ -1802,7 +1802,7 @@ sse2_combine_in_reverse_ca (pixman_implementation_t *imp, __m128i xmm_dst_lo, xmm_dst_hi; __m128i xmm_mask_lo, xmm_mask_hi; - while (w && (unsigned long)pd & 15) + while (w && (uintptr_t)pd & 15) { s = *ps++; m = *pm++; @@ -1875,7 +1875,7 @@ sse2_combine_out_ca (pixman_implementation_t *imp, __m128i xmm_dst_lo, xmm_dst_hi; __m128i xmm_mask_lo, xmm_mask_hi; - while (w && (unsigned long)pd & 15) + while (w && (uintptr_t)pd & 15) { s = *ps++; m = *pm++; @@ -1951,7 +1951,7 @@ sse2_combine_out_reverse_ca (pixman_implementation_t *imp, __m128i xmm_dst_lo, xmm_dst_hi; __m128i xmm_mask_lo, xmm_mask_hi; - while (w && (unsigned long)pd & 15) + while (w && (uintptr_t)pd & 15) { s = *ps++; m = *pm++; @@ -2048,7 +2048,7 @@ sse2_combine_atop_ca (pixman_implementation_t *imp, __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; __m128i xmm_mask_lo, xmm_mask_hi; - while (w && (unsigned long)pd & 15) + while (w && (uintptr_t)pd & 15) { s = *ps++; m = *pm++; @@ -2141,7 +2141,7 @@ sse2_combine_atop_reverse_ca (pixman_implementation_t *imp, __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; __m128i xmm_mask_lo, xmm_mask_hi; - while (w && (unsigned long)pd & 15) + while (w && (uintptr_t)pd & 15) { s = *ps++; m = *pm++; @@ -2237,7 +2237,7 @@ sse2_combine_xor_ca (pixman_implementation_t *imp, __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; __m128i xmm_mask_lo, xmm_mask_hi; - while (w && (unsigned long)pd & 15) + while (w && (uintptr_t)pd & 15) { s = *ps++; m = *pm++; @@ -2313,7 +2313,7 @@ sse2_combine_add_ca (pixman_implementation_t *imp, __m128i xmm_dst_lo, xmm_dst_hi; __m128i xmm_mask_lo, xmm_mask_hi; - while (w && (unsigned long)pd & 15) + while (w && (uintptr_t)pd & 15) { s = *ps++; m = *pm++; @@ -2414,7 +2414,7 @@ sse2_composite_over_n_8888 (pixman_implementation_t *imp, dst_line += dst_stride; w = width; - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { d = *dst; *dst++ = pack_1x128_32 (over_1x128 (xmm_src, @@ -2483,7 +2483,7 @@ sse2_composite_over_n_0565 (pixman_implementation_t *imp, dst_line += dst_stride; w = width; - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { d = *dst; @@ -2568,7 +2568,7 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, dst_line += dst_stride; mask_line += mask_stride; - while (w && (unsigned long)pd & 15) + while (w && (uintptr_t)pd & 15) { m = *pm++; @@ -2682,7 +2682,7 @@ sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, dst_line += dst_stride; mask_line += mask_stride; - while (w && (unsigned long)pd & 15) + while (w && (uintptr_t)pd & 15) { m = *pm++; @@ -2786,7 +2786,7 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { uint32_t s = *src++; @@ -2878,10 +2878,10 @@ sse2_composite_src_x888_0565 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { s = *src++; - *dst = CONVERT_8888_TO_0565 (s); + *dst = convert_8888_to_0565 (s); dst++; w--; } @@ -2901,7 +2901,7 @@ sse2_composite_src_x888_0565 (pixman_implementation_t *imp, while (w) { s = *src++; - *dst = CONVERT_8888_TO_0565 (s); + *dst = convert_8888_to_0565 (s); dst++; w--; } @@ -2932,7 +2932,7 @@ sse2_composite_src_x888_8888 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { *dst++ = *src++ | 0xff000000; w--; @@ -2999,7 +2999,7 @@ sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { uint32_t s = (*src++) | 0xff000000; uint32_t d = *dst; @@ -3125,7 +3125,7 @@ sse2_composite_over_8888_0565 (pixman_implementation_t *imp, /* Align dst on a 16-byte boundary */ while (w && - ((unsigned long)dst & 15)) + ((uintptr_t)dst & 15)) { s = *src++; d = *dst; @@ -3231,7 +3231,7 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp, mask_line += mask_stride; w = width; - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { uint8_t m = *mask++; @@ -3321,7 +3321,7 @@ sse2_fill (pixman_implementation_t *imp, int y, int width, int height, - uint32_t xor) + uint32_t filler) { uint32_t byte_width; uint8_t *byte_line; @@ -3338,9 +3338,9 @@ sse2_fill (pixman_implementation_t *imp, byte_width = width; stride *= 1; - b = xor & 0xff; + b = filler & 0xff; w = (b << 8) | b; - xor = (w << 16) | w; + filler = (w << 16) | w; } else if (bpp == 16) { @@ -3349,7 +3349,7 @@ sse2_fill (pixman_implementation_t *imp, byte_width = 2 * width; stride *= 2; - xor = (xor & 0xffff) * 0x00010001; + filler = (filler & 0xffff) * 0x00010001; } else if (bpp == 32) { @@ -3363,7 +3363,7 @@ sse2_fill (pixman_implementation_t *imp, return FALSE; } - xmm_def = create_mask_2x32_128 (xor, xor); + xmm_def = create_mask_2x32_128 (filler, filler); while (height--) { @@ -3372,23 +3372,23 @@ sse2_fill (pixman_implementation_t *imp, byte_line += stride; w = byte_width; - if (w >= 1 && ((unsigned long)d & 1)) + if (w >= 1 && ((uintptr_t)d & 1)) { - *(uint8_t *)d = xor; + *(uint8_t *)d = filler; w -= 1; d += 1; } - while (w >= 2 && ((unsigned long)d & 3)) + while (w >= 2 && ((uintptr_t)d & 3)) { - *(uint16_t *)d = xor; + *(uint16_t *)d = filler; w -= 2; d += 2; } - while (w >= 4 && ((unsigned long)d & 15)) + while (w >= 4 && ((uintptr_t)d & 15)) { - *(uint32_t *)d = xor; + *(uint32_t *)d = filler; w -= 4; d += 4; @@ -3439,7 +3439,7 @@ sse2_fill (pixman_implementation_t *imp, while (w >= 4) { - *(uint32_t *)d = xor; + *(uint32_t *)d = filler; w -= 4; d += 4; @@ -3447,14 +3447,14 @@ sse2_fill (pixman_implementation_t *imp, if (w >= 2) { - *(uint16_t *)d = xor; + *(uint16_t *)d = filler; w -= 2; d += 2; } if (w >= 1) { - *(uint8_t *)d = xor; + *(uint8_t *)d = filler; w -= 1; d += 1; } @@ -3505,7 +3505,7 @@ sse2_composite_src_n_8_8888 (pixman_implementation_t *imp, mask_line += mask_stride; w = width; - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { uint8_t m = *mask++; @@ -3621,7 +3621,7 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp, mask_line += mask_stride; w = width; - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { m = *mask++; @@ -3745,7 +3745,7 @@ sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { s = *src++; d = *dst; @@ -3854,7 +3854,7 @@ sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { s = *src++; d = *dst; @@ -3957,7 +3957,7 @@ sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, mask_line += mask_stride; dst_line += dst_stride; - while (w && ((unsigned long)dst & 15)) + while (w && ((uintptr_t)dst & 15)) { m = *(uint32_t *) mask; @@ -4083,7 +4083,7 @@ sse2_composite_in_n_8_8 (pixman_implementation_t *imp, mask_line += mask_stride; w = width; - while (w && ((unsigned long)dst & 15)) + while (w && ((uintptr_t)dst & 15)) { m = (uint32_t) *mask++; d = (uint32_t) *dst; @@ -4176,7 +4176,7 @@ sse2_composite_in_n_8 (pixman_implementation_t *imp, dst_line += dst_stride; w = width; - while (w && ((unsigned long)dst & 15)) + while (w && ((uintptr_t)dst & 15)) { d = (uint32_t) *dst; @@ -4245,7 +4245,7 @@ sse2_composite_in_8_8 (pixman_implementation_t *imp, src_line += src_stride; w = width; - while (w && ((unsigned long)dst & 15)) + while (w && ((uintptr_t)dst & 15)) { s = (uint32_t) *src++; d = (uint32_t) *dst; @@ -4322,7 +4322,7 @@ sse2_composite_add_n_8_8 (pixman_implementation_t *imp, mask_line += mask_stride; w = width; - while (w && ((unsigned long)dst & 15)) + while (w && ((uintptr_t)dst & 15)) { m = (uint32_t) *mask++; d = (uint32_t) *dst; @@ -4414,7 +4414,7 @@ sse2_composite_add_n_8 (pixman_implementation_t *imp, dst_line += dst_stride; w = width; - while (w && ((unsigned long)dst & 15)) + while (w && ((uintptr_t)dst & 15)) { *dst = (uint8_t)_mm_cvtsi128_si32 ( _mm_adds_epu8 ( @@ -4474,7 +4474,7 @@ sse2_composite_add_8_8 (pixman_implementation_t *imp, w = width; /* Small head */ - while (w && (unsigned long)dst & 3) + while (w && (uintptr_t)dst & 3) { t = (*dst) + (*src++); *dst++ = t | (0 - (t >> 8)); @@ -4523,7 +4523,163 @@ sse2_composite_add_8888_8888 (pixman_implementation_t *imp, sse2_combine_add_u (imp, op, dst, src, NULL, width); } +} + +static void +sse2_composite_add_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst, src; + int dst_stride; + + __m128i xmm_src; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + if (src == 0) + return; + + if (src == ~0) + { + pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32, + dest_x, dest_y, width, height, ~0); + + return; + } + + xmm_src = _mm_set_epi32 (src, src, src, src); + while (height--) + { + int w = width; + uint32_t d; + + dst = dst_line; + dst_line += dst_stride; + + while (w && (uintptr_t)dst & 15) + { + d = *dst; + *dst++ = + _mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d))); + w--; + } + + while (w >= 4) + { + save_128_aligned + ((__m128i*)dst, + _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst))); + + dst += 4; + w -= 4; + } + + while (w--) + { + d = *dst; + *dst++ = + _mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src, + _mm_cvtsi32_si128 (d))); + } + } +} + +static void +sse2_composite_add_n_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t src; + + __m128i xmm_src; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + if (src == 0) + return; + xmm_src = expand_pixel_32_1x128 (src); + + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE ( + mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w && ((uintptr_t)dst & 15)) + { + uint8_t m = *mask++; + if (m) + { + *dst = pack_1x128_32 + (_mm_adds_epu16 + (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), + unpack_32_1x128 (*dst))); + } + dst++; + w--; + } + + while (w >= 4) + { + uint32_t m = *(uint32_t*)mask; + if (m) + { + __m128i xmm_mask_lo, xmm_mask_hi; + __m128i xmm_dst_lo, xmm_dst_hi; + + __m128i xmm_dst = load_128_aligned ((__m128i*)dst); + __m128i xmm_mask = + _mm_unpacklo_epi8 (unpack_32_1x128(m), + _mm_setzero_si128 ()); + + unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + + expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + pix_multiply_2x128 (&xmm_src, &xmm_src, + &xmm_mask_lo, &xmm_mask_hi, + &xmm_mask_lo, &xmm_mask_hi); + + xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo); + xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + w -= 4; + dst += 4; + mask += 4; + } + + while (w) + { + uint8_t m = *mask++; + if (m) + { + *dst = pack_1x128_32 + (_mm_adds_epu16 + (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), + unpack_32_1x128 (*dst))); + } + dst++; + w--; + } + } } static pixman_bool_t @@ -4582,7 +4738,7 @@ sse2_blt (pixman_implementation_t *imp, dst_bytes += dst_stride; w = byte_width; - while (w >= 2 && ((unsigned long)d & 3)) + while (w >= 2 && ((uintptr_t)d & 3)) { *(uint16_t *)d = *(uint16_t *)s; w -= 2; @@ -4590,7 +4746,7 @@ sse2_blt (pixman_implementation_t *imp, d += 2; } - while (w >= 4 && ((unsigned long)d & 15)) + while (w >= 4 && ((uintptr_t)d & 15)) { *(uint32_t *)d = *(uint32_t *)s; @@ -4697,7 +4853,7 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp, w = width; - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { s = 0xff000000 | *src++; m = (uint32_t) *mask++; @@ -4821,7 +4977,7 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp, w = width; - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { uint32_t sa; @@ -4960,7 +5116,7 @@ sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp, dst_line += dst_stride; w = width; - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { __m128i vd; @@ -5045,7 +5201,7 @@ sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp, w = width; - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { uint32_t sa; @@ -5173,7 +5329,7 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd, return; /* Align dst on a 16-byte boundary */ - while (w && ((unsigned long)pd & 15)) + while (w && ((uintptr_t)pd & 15)) { d = *pd; s = combine1 (ps + pixman_fixed_to_int (vx), pm); @@ -5291,7 +5447,7 @@ scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask, xmm_mask = create_mask_16_128 (*mask >> 24); - while (w && (unsigned long)dst & 15) + while (w && (uintptr_t)dst & 15) { uint32_t s = *(src + pixman_fixed_to_int (vx)); vx += unit_x; @@ -5398,19 +5554,27 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, scaled_nearest_scanline_sse2_8888_n_8888_OVER, uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE) -#define BMSK ((1 << BILINEAR_INTERPOLATION_BITS) - 1) - -#define BILINEAR_DECLARE_VARIABLES \ +#if BILINEAR_INTERPOLATION_BITS < 8 +# define BILINEAR_DECLARE_VARIABLES \ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ - const __m128i xmm_xorc8 = _mm_set_epi16 (0, 0, 0, 0, BMSK, BMSK, BMSK, BMSK);\ - const __m128i xmm_addc8 = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \ - const __m128i xmm_xorc7 = _mm_set_epi16 (0, BMSK, 0, BMSK, 0, BMSK, 0, BMSK);\ - const __m128i xmm_addc7 = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ + const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ + const __m128i xmm_ux = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ + unit_x, -unit_x, unit_x, -unit_x); \ + const __m128i xmm_zero = _mm_setzero_si128 (); \ + __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), \ + vx, -(vx + 1), vx, -(vx + 1)) +#else +# define BILINEAR_DECLARE_VARIABLES \ + const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ + const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ + const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \ const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x, \ - unit_x, unit_x, unit_x, unit_x); \ + -unit_x, -unit_x, -unit_x, -unit_x); \ const __m128i xmm_zero = _mm_setzero_si128 (); \ - __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, vx, vx, vx, vx) + __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, \ + -(vx + 1), -(vx + 1), -(vx + 1), -(vx + 1)) +#endif #define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \ do { \ @@ -5429,8 +5593,8 @@ do { \ if (BILINEAR_INTERPOLATION_BITS < 8) \ { \ /* calculate horizontal weights */ \ - xmm_wh = _mm_add_epi16 (xmm_addc7, _mm_xor_si128 (xmm_xorc7, \ - _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \ + xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS)); \ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ /* horizontal interpolation */ \ a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \ @@ -5439,8 +5603,8 @@ do { \ else \ { \ /* calculate horizontal weights */ \ - xmm_wh = _mm_add_epi16 (xmm_addc8, _mm_xor_si128 (xmm_xorc8, \ - _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \ + xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ + 16 - BILINEAR_INTERPOLATION_BITS)); \ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ /* horizontal interpolation */ \ xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \ @@ -5538,7 +5702,7 @@ scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst, BILINEAR_DECLARE_VARIABLES; uint32_t pix1, pix2, pix3, pix4; - while (w && ((unsigned long)dst & 15)) + while (w && ((uintptr_t)dst & 15)) { BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); @@ -5639,7 +5803,7 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst, uint32_t pix1, pix2, pix3, pix4; uint32_t m; - while (w && ((unsigned long)dst & 15)) + while (w && ((uintptr_t)dst & 15)) { uint32_t sa; @@ -5786,6 +5950,121 @@ FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER, uint32_t, uint8_t, uint32_t, NORMAL, FLAG_HAVE_NON_SOLID_MASK) +static force_inline void +scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1, pix2, pix3, pix4; + __m128i xmm_mask; + + if (zero_src || (*mask >> 24) == 0) + return; + + xmm_mask = create_mask_16_128 (*mask >> 24); + + while (w && ((uintptr_t)dst & 15)) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + if (pix1) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (pix1); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i dest = xmm_mask; + __m128i alpha_dst = unpack_32_1x128 (d); + + *dst = pack_1x128_32 + (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); + } + + dst++; + w--; + } + + while (w >= 4) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix3); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix4); + + if (pix1 | pix2 | pix3 | pix4) + { + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + + xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1); + + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask, &xmm_mask, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned + ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + dst += 4; + w -= 4; + } + + while (w) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + if (pix1) + { + uint32_t d = *dst; + + __m128i ms = unpack_32_1x128 (pix1); + __m128i alpha = expand_alpha_1x128 (ms); + __m128i dest = xmm_mask; + __m128i alpha_dst = unpack_32_1x128 (d); + + *dst = pack_1x128_32 + (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); + } + + dst++; + w--; + } +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + NONE, FLAG_HAVE_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER, + scaled_bilinear_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_HAVE_SOLID_MASK) + static const pixman_fast_path_t sse2_fast_paths[] = { /* PIXMAN_OP_OVER */ @@ -5848,6 +6127,14 @@ static const pixman_fast_path_t sse2_fast_paths[] = PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888), PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8), PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8), + PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888), /* PIXMAN_OP_SRC */ PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888), @@ -5912,6 +6199,11 @@ static const pixman_fast_path_t sse2_fast_paths[] = SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888), + SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888), SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888), SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888), @@ -5930,7 +6222,7 @@ sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) iter->bits += iter->stride; - while (w && ((unsigned long)dst) & 0x0f) + while (w && ((uintptr_t)dst) & 0x0f) { *dst++ = (*src++) | 0xff000000; w--; @@ -5966,11 +6258,11 @@ sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) iter->bits += iter->stride; - while (w && ((unsigned long)dst) & 0x0f) + while (w && ((uintptr_t)dst) & 0x0f) { uint16_t s = *src++; - *dst++ = CONVERT_0565_TO_8888 (s); + *dst++ = convert_0565_to_8888 (s); w--; } @@ -5995,7 +6287,7 @@ sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) { uint16_t s = *src++; - *dst++ = CONVERT_0565_TO_8888 (s); + *dst++ = convert_0565_to_8888 (s); w--; } @@ -6012,7 +6304,7 @@ sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) iter->bits += iter->stride; - while (w && (((unsigned long)dst) & 15)) + while (w && (((uintptr_t)dst) & 15)) { *dst++ = *(src++) << 24; w--; diff --git a/lib/pixman/pixman/pixman-trap.c b/lib/pixman/pixman/pixman-trap.c index ab5c8c895..91766fdbf 100644 --- a/lib/pixman/pixman/pixman-trap.c +++ b/lib/pixman/pixman/pixman-trap.c @@ -491,6 +491,8 @@ pixman_composite_trapezoids (pixman_op_t op, { int i; + return_if_fail (PIXMAN_FORMAT_TYPE (mask_format) == PIXMAN_TYPE_A); + if (n_traps <= 0) return; @@ -521,8 +523,9 @@ pixman_composite_trapezoids (pixman_op_t op, if (!get_trap_extents (op, dst, traps, n_traps, &box)) return; - tmp = pixman_image_create_bits ( - mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1); + if (!(tmp = pixman_image_create_bits ( + mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1))) + return; for (i = 0; i < n_traps; ++i) { diff --git a/lib/pixman/pixman/pixman-utils.c b/lib/pixman/pixman/pixman-utils.c index b1e9fb62d..f31171f6d 100644 --- a/lib/pixman/pixman/pixman-utils.c +++ b/lib/pixman/pixman/pixman-utils.c @@ -292,8 +292,6 @@ _pixman_internal_only_get_implementation (void) return get_implementation (); } -#ifdef DEBUG - void _pixman_log_error (const char *function, const char *message) { @@ -310,5 +308,3 @@ _pixman_log_error (const char *function, const char *message) n_messages++; } } - -#endif diff --git a/lib/pixman/pixman/pixman.c b/lib/pixman/pixman/pixman.c index e3b6516b5..184f0c4e6 100644 --- a/lib/pixman/pixman/pixman.c +++ b/lib/pixman/pixman/pixman.c @@ -455,6 +455,14 @@ analyze_extent (pixman_image_t *image, height = params[1]; break; + case PIXMAN_FILTER_SEPARABLE_CONVOLUTION: + params = image->common.filter_params; + x_off = - pixman_fixed_e - ((params[0] - pixman_fixed_1) >> 1); + y_off = - pixman_fixed_e - ((params[1] - pixman_fixed_1) >> 1); + width = params[0]; + height = params[1]; + break; + case PIXMAN_FILTER_GOOD: case PIXMAN_FILTER_BEST: case PIXMAN_FILTER_BILINEAR: @@ -573,11 +581,13 @@ pixman_image_composite32 (pixman_op_t op, int32_t height) { pixman_format_code_t src_format, mask_format, dest_format; - uint32_t src_flags, mask_flags, dest_flags; pixman_region32_t region; pixman_box32_t extents; pixman_implementation_t *imp; pixman_composite_func_t func; + pixman_composite_info_t info; + const pixman_box32_t *pbox; + int n; _pixman_image_validate (src); if (mask) @@ -585,27 +595,27 @@ pixman_image_composite32 (pixman_op_t op, _pixman_image_validate (dest); src_format = src->common.extended_format_code; - src_flags = src->common.flags; + info.src_flags = src->common.flags; - if (mask) + if (mask && !(mask->common.flags & FAST_PATH_IS_OPAQUE)) { mask_format = mask->common.extended_format_code; - mask_flags = mask->common.flags; + info.mask_flags = mask->common.flags; } else { mask_format = PIXMAN_null; - mask_flags = FAST_PATH_IS_OPAQUE; + info.mask_flags = FAST_PATH_IS_OPAQUE; } dest_format = dest->common.extended_format_code; - dest_flags = dest->common.flags; + info.dest_flags = dest->common.flags; /* Check for pixbufs */ if ((mask_format == PIXMAN_a8r8g8b8 || mask_format == PIXMAN_a8b8g8r8) && (src->type == BITS && src->bits.bits == mask->bits.bits) && (src->common.repeat == mask->common.repeat) && - (src_flags & mask_flags & FAST_PATH_ID_TRANSFORM) && + (info.src_flags & info.mask_flags & FAST_PATH_ID_TRANSFORM) && (src_x == mask_x && src_y == mask_y)) { if (src_format == PIXMAN_x8b8g8r8) @@ -630,7 +640,7 @@ pixman_image_composite32 (pixman_op_t op, extents.x2 -= dest_x - src_x; extents.y2 -= dest_y - src_y; - if (!analyze_extent (src, &extents, &src_flags)) + if (!analyze_extent (src, &extents, &info.src_flags)) goto out; extents.x1 -= src_x - mask_x; @@ -638,7 +648,7 @@ pixman_image_composite32 (pixman_op_t op, extents.x2 -= src_x - mask_x; extents.y2 -= src_y - mask_y; - if (!analyze_extent (mask, &extents, &mask_flags)) + if (!analyze_extent (mask, &extents, &info.mask_flags)) goto out; /* If the clip is within the source samples, and the samples are @@ -651,16 +661,16 @@ pixman_image_composite32 (pixman_op_t op, FAST_PATH_BILINEAR_FILTER | \ FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR) - if ((src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || - (src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) + if ((info.src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || + (info.src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) { - src_flags |= FAST_PATH_IS_OPAQUE; + info.src_flags |= FAST_PATH_IS_OPAQUE; } - if ((mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || - (mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) + if ((info.mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE || + (info.mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE) { - mask_flags |= FAST_PATH_IS_OPAQUE; + info.mask_flags |= FAST_PATH_IS_OPAQUE; } /* @@ -668,42 +678,35 @@ pixman_image_composite32 (pixman_op_t op, * if the src or dest are opaque. The output operator should be * mathematically equivalent to the source. */ - op = optimize_operator (op, src_flags, mask_flags, dest_flags); + info.op = optimize_operator (op, info.src_flags, info.mask_flags, info.dest_flags); - if (_pixman_implementation_lookup_composite ( - get_implementation (), op, - src_format, src_flags, mask_format, mask_flags, dest_format, dest_flags, - &imp, &func)) - { - pixman_composite_info_t info; - const pixman_box32_t *pbox; - int n; + _pixman_implementation_lookup_composite ( + get_implementation (), info.op, + src_format, info.src_flags, + mask_format, info.mask_flags, + dest_format, info.dest_flags, + &imp, &func); - info.op = op; - info.src_image = src; - info.mask_image = mask; - info.dest_image = dest; - info.src_flags = src_flags; - info.mask_flags = mask_flags; - info.dest_flags = dest_flags; + info.src_image = src; + info.mask_image = mask; + info.dest_image = dest; - pbox = pixman_region32_rectangles (®ion, &n); + pbox = pixman_region32_rectangles (®ion, &n); - while (n--) - { - info.src_x = pbox->x1 + src_x - dest_x; - info.src_y = pbox->y1 + src_y - dest_y; - info.mask_x = pbox->x1 + mask_x - dest_x; - info.mask_y = pbox->y1 + mask_y - dest_y; - info.dest_x = pbox->x1; - info.dest_y = pbox->y1; - info.width = pbox->x2 - pbox->x1; - info.height = pbox->y2 - pbox->y1; - - func (imp, &info); - - pbox++; - } + while (n--) + { + info.src_x = pbox->x1 + src_x - dest_x; + info.src_y = pbox->y1 + src_y - dest_y; + info.mask_x = pbox->x1 + mask_x - dest_x; + info.mask_y = pbox->y1 + mask_y - dest_y; + info.dest_x = pbox->x1; + info.dest_y = pbox->y1; + info.width = pbox->x2 - pbox->x1; + info.height = pbox->y2 - pbox->y1; + + func (imp, &info); + + pbox++; } out: @@ -758,10 +761,10 @@ pixman_fill (uint32_t *bits, int y, int width, int height, - uint32_t xor) + uint32_t filler) { return _pixman_implementation_fill ( - get_implementation(), bits, stride, bpp, x, y, width, height, xor); + get_implementation(), bits, stride, bpp, x, y, width, height, filler); } static uint32_t @@ -820,7 +823,7 @@ color_to_pixel (const pixman_color_t *color, c = c >> 24; else if (format == PIXMAN_r5g6b5 || format == PIXMAN_b5g6r5) - c = CONVERT_8888_TO_0565 (c); + c = convert_8888_to_0565 (c); #if 0 printf ("color: %x %x %x %x\n", color->alpha, color->red, color->green, color->blue); diff --git a/lib/pixman/pixman/pixman.h b/lib/pixman/pixman/pixman.h index c8723cf41..7ff9fb52a 100644 --- a/lib/pixman/pixman/pixman.h +++ b/lib/pixman/pixman/pixman.h @@ -226,6 +226,9 @@ pixman_bool_t pixman_transform_is_inverse (const struct pixman_transform * /* * Floating point matrices */ +typedef struct pixman_f_transform pixman_f_transform_t; +typedef struct pixman_f_vector pixman_f_vector_t; + struct pixman_f_vector { double v[3]; @@ -289,7 +292,28 @@ typedef enum PIXMAN_FILTER_BEST, PIXMAN_FILTER_NEAREST, PIXMAN_FILTER_BILINEAR, - PIXMAN_FILTER_CONVOLUTION + PIXMAN_FILTER_CONVOLUTION, + + /* The SEPARABLE_CONVOLUTION filter takes the following parameters: + * + * width: integer given as 16.16 fixpoint number + * height: integer given as 16.16 fixpoint number + * x_phase_bits: integer given as 16.16 fixpoint + * y_phase_bits: integer given as 16.16 fixpoint + * xtables: (1 << x_phase_bits) tables of size width + * ytables: (1 << y_phase_bits) tables of size height + * + * When sampling at (x, y), the location is first rounded to one of + * n_x_phases * n_y_phases subpixel positions. These subpixel positions + * determine an xtable and a ytable to use. + * + * Conceptually a width x height matrix is then formed in which each entry + * is the product of the corresponding entries in the x and y tables. + * This matrix is then aligned with the image pixels such that its center + * is as close as possible to the subpixel location chosen earlier. Then + * the image is convolved with the matrix and the resulting pixel returned. + */ + PIXMAN_FILTER_SEPARABLE_CONVOLUTION } pixman_filter_t; typedef enum @@ -807,6 +831,33 @@ int pixman_image_get_height (pixman_image_t int pixman_image_get_stride (pixman_image_t *image); /* in bytes */ int pixman_image_get_depth (pixman_image_t *image); pixman_format_code_t pixman_image_get_format (pixman_image_t *image); + +typedef enum +{ + PIXMAN_KERNEL_IMPULSE, + PIXMAN_KERNEL_BOX, + PIXMAN_KERNEL_LINEAR, + PIXMAN_KERNEL_CUBIC, + PIXMAN_KERNEL_GAUSSIAN, + PIXMAN_KERNEL_LANCZOS2, + PIXMAN_KERNEL_LANCZOS3, + PIXMAN_KERNEL_LANCZOS3_STRETCHED /* Jim Blinn's 'nice' filter */ +} pixman_kernel_t; + +/* Create the parameter list for a SEPARABLE_CONVOLUTION filter + * with the given kernels and scale parameters. + */ +pixman_fixed_t * +pixman_filter_create_separable_convolution (int *n_values, + pixman_fixed_t scale_x, + pixman_fixed_t scale_y, + pixman_kernel_t reconstruct_x, + pixman_kernel_t reconstruct_y, + pixman_kernel_t sample_x, + pixman_kernel_t sample_y, + int subsample_bits_x, + int subsample_bits_y); + pixman_bool_t pixman_image_fill_rectangles (pixman_op_t op, pixman_image_t *image, const pixman_color_t *color, diff --git a/lib/pixman/test/Makefile.am b/lib/pixman/test/Makefile.am index eeb3679f0..5d901d572 100644 --- a/lib/pixman/test/Makefile.am +++ b/lib/pixman/test/Makefile.am @@ -3,11 +3,11 @@ include $(top_srcdir)/test/Makefile.sources AM_CFLAGS = $(OPENMP_CFLAGS) AM_LDFLAGS = $(OPENMP_CFLAGS) $(TESTPROGS_EXTRA_LDFLAGS) LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm $(PNG_LIBS) -INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(PNG_CFLAGS) +AM_CPPFLAGS = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(PNG_CFLAGS) libutils_la_SOURCES = $(libutils_sources) $(libutils_headers) noinst_LTLIBRARIES = libutils.la -noinst_PROGRAMS = $(TESTPROGRAMS) $(BENCHMARKS) +noinst_PROGRAMS = $(TESTPROGRAMS) $(OTHERPROGRAMS) TESTS = $(TESTPROGRAMS) diff --git a/lib/pixman/test/Makefile.in b/lib/pixman/test/Makefile.in index 21477cc68..48974546f 100644 --- a/lib/pixman/test/Makefile.in +++ b/lib/pixman/test/Makefile.in @@ -1,4 +1,4 @@ -# Makefile.in generated by automake 1.12.3 from Makefile.am. +# Makefile.in generated by automake 1.12.6 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2012 Free Software Foundation, Inc. @@ -66,7 +66,7 @@ CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = LTLIBRARIES = $(noinst_LTLIBRARIES) libutils_la_LIBADD = -am__objects_1 = utils.lo +am__objects_1 = utils.lo utils-prng.lo am__objects_2 = am_libutils_la_OBJECTS = $(am__objects_1) $(am__objects_2) libutils_la_OBJECTS = $(am_libutils_la_OBJECTS) @@ -74,18 +74,20 @@ AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) am__v_lt_0 = --silent am__v_lt_1 = -am__EXEEXT_1 = a1-trap-test$(EXEEXT) pdf-op-test$(EXEEXT) \ - region-test$(EXEEXT) region-translate-test$(EXEEXT) \ - combiner-test$(EXEEXT) fetch-test$(EXEEXT) \ - rotate-test$(EXEEXT) oob-test$(EXEEXT) infinite-loop$(EXEEXT) \ - trap-crasher$(EXEEXT) alpha-loop$(EXEEXT) \ - scaling-crash-test$(EXEEXT) scaling-helpers-test$(EXEEXT) \ - gradient-crash-test$(EXEEXT) region-contains-test$(EXEEXT) \ - alphamap$(EXEEXT) stress-test$(EXEEXT) \ +am__EXEEXT_1 = prng-test$(EXEEXT) a1-trap-test$(EXEEXT) \ + pdf-op-test$(EXEEXT) region-test$(EXEEXT) \ + region-translate-test$(EXEEXT) combiner-test$(EXEEXT) \ + pixel-test$(EXEEXT) fetch-test$(EXEEXT) rotate-test$(EXEEXT) \ + oob-test$(EXEEXT) infinite-loop$(EXEEXT) trap-crasher$(EXEEXT) \ + alpha-loop$(EXEEXT) scaling-crash-test$(EXEEXT) \ + scaling-helpers-test$(EXEEXT) gradient-crash-test$(EXEEXT) \ + region-contains-test$(EXEEXT) alphamap$(EXEEXT) \ + matrix-test$(EXEEXT) stress-test$(EXEEXT) \ composite-traps-test$(EXEEXT) blitters-test$(EXEEXT) \ glyph-test$(EXEEXT) scaling-test$(EXEEXT) affine-test$(EXEEXT) \ composite$(EXEEXT) -am__EXEEXT_2 = lowlevel-blt-bench$(EXEEXT) +am__EXEEXT_2 = lowlevel-blt-bench$(EXEEXT) radial-perf-test$(EXEEXT) \ + check-formats$(EXEEXT) PROGRAMS = $(noinst_PROGRAMS) a1_trap_test_SOURCES = a1-trap-test.c a1_trap_test_OBJECTS = a1-trap-test.$(OBJEXT) @@ -113,6 +115,11 @@ blitters_test_OBJECTS = blitters-test.$(OBJEXT) blitters_test_LDADD = $(LDADD) blitters_test_DEPENDENCIES = libutils.la \ $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) +check_formats_SOURCES = check-formats.c +check_formats_OBJECTS = check-formats.$(OBJEXT) +check_formats_LDADD = $(LDADD) +check_formats_DEPENDENCIES = libutils.la \ + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) combiner_test_SOURCES = combiner-test.c combiner_test_OBJECTS = combiner-test.$(OBJEXT) combiner_test_LDADD = $(LDADD) @@ -153,6 +160,11 @@ lowlevel_blt_bench_OBJECTS = lowlevel-blt-bench.$(OBJEXT) lowlevel_blt_bench_LDADD = $(LDADD) lowlevel_blt_bench_DEPENDENCIES = libutils.la \ $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) +matrix_test_SOURCES = matrix-test.c +matrix_test_OBJECTS = matrix-test.$(OBJEXT) +matrix_test_LDADD = $(LDADD) +matrix_test_DEPENDENCIES = libutils.la \ + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) oob_test_SOURCES = oob-test.c oob_test_OBJECTS = oob-test.$(OBJEXT) oob_test_LDADD = $(LDADD) @@ -163,6 +175,21 @@ pdf_op_test_OBJECTS = pdf-op-test.$(OBJEXT) pdf_op_test_LDADD = $(LDADD) pdf_op_test_DEPENDENCIES = libutils.la \ $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) +pixel_test_SOURCES = pixel-test.c +pixel_test_OBJECTS = pixel-test.$(OBJEXT) +pixel_test_LDADD = $(LDADD) +pixel_test_DEPENDENCIES = libutils.la \ + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) +prng_test_SOURCES = prng-test.c +prng_test_OBJECTS = prng-test.$(OBJEXT) +prng_test_LDADD = $(LDADD) +prng_test_DEPENDENCIES = libutils.la \ + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) +radial_perf_test_SOURCES = radial-perf-test.c +radial_perf_test_OBJECTS = radial-perf-test.$(OBJEXT) +radial_perf_test_LDADD = $(LDADD) +radial_perf_test_DEPENDENCIES = libutils.la \ + $(top_builddir)/pixman/libpixman-1.la $(am__DEPENDENCIES_1) region_contains_test_SOURCES = region-contains-test.c region_contains_test_OBJECTS = region-contains-test.$(OBJEXT) region_contains_test_LDADD = $(LDADD) @@ -243,21 +270,23 @@ am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = $(libutils_la_SOURCES) a1-trap-test.c affine-test.c \ - alpha-loop.c alphamap.c blitters-test.c combiner-test.c \ - composite.c composite-traps-test.c fetch-test.c glyph-test.c \ - gradient-crash-test.c infinite-loop.c lowlevel-blt-bench.c \ - oob-test.c pdf-op-test.c region-contains-test.c region-test.c \ - region-translate-test.c rotate-test.c scaling-crash-test.c \ - scaling-helpers-test.c scaling-test.c stress-test.c \ - trap-crasher.c + alpha-loop.c alphamap.c blitters-test.c check-formats.c \ + combiner-test.c composite.c composite-traps-test.c \ + fetch-test.c glyph-test.c gradient-crash-test.c \ + infinite-loop.c lowlevel-blt-bench.c matrix-test.c oob-test.c \ + pdf-op-test.c pixel-test.c prng-test.c radial-perf-test.c \ + region-contains-test.c region-test.c region-translate-test.c \ + rotate-test.c scaling-crash-test.c scaling-helpers-test.c \ + scaling-test.c stress-test.c trap-crasher.c DIST_SOURCES = $(libutils_la_SOURCES) a1-trap-test.c affine-test.c \ - alpha-loop.c alphamap.c blitters-test.c combiner-test.c \ - composite.c composite-traps-test.c fetch-test.c glyph-test.c \ - gradient-crash-test.c infinite-loop.c lowlevel-blt-bench.c \ - oob-test.c pdf-op-test.c region-contains-test.c region-test.c \ - region-translate-test.c rotate-test.c scaling-crash-test.c \ - scaling-helpers-test.c scaling-test.c stress-test.c \ - trap-crasher.c + alpha-loop.c alphamap.c blitters-test.c check-formats.c \ + combiner-test.c composite.c composite-traps-test.c \ + fetch-test.c glyph-test.c gradient-crash-test.c \ + infinite-loop.c lowlevel-blt-bench.c matrix-test.c oob-test.c \ + pdf-op-test.c pixel-test.c prng-test.c radial-perf-test.c \ + region-contains-test.c region-test.c region-translate-test.c \ + rotate-test.c scaling-crash-test.c scaling-helpers-test.c \ + scaling-test.c stress-test.c trap-crasher.c am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ @@ -344,6 +373,8 @@ PIXMAN_VERSION_MAJOR = @PIXMAN_VERSION_MAJOR@ PIXMAN_VERSION_MICRO = @PIXMAN_VERSION_MICRO@ PIXMAN_VERSION_MINOR = @PIXMAN_VERSION_MINOR@ PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ PNG_CFLAGS = @PNG_CFLAGS@ PNG_LIBS = @PNG_LIBS@ PTHREAD_LDFLAGS = @PTHREAD_LDFLAGS@ @@ -415,11 +446,13 @@ top_srcdir = @top_srcdir@ # Tests (sorted by expected completion time) TESTPROGRAMS = \ + prng-test \ a1-trap-test \ pdf-op-test \ region-test \ region-translate-test \ combiner-test \ + pixel-test \ fetch-test \ rotate-test \ oob-test \ @@ -431,6 +464,7 @@ TESTPROGRAMS = \ gradient-crash-test \ region-contains-test \ alphamap \ + matrix-test \ stress-test \ composite-traps-test \ blitters-test \ @@ -441,25 +475,29 @@ TESTPROGRAMS = \ $(NULL) -# Benchmarks -BENCHMARKS = \ +# Other programs +OTHERPROGRAMS = \ lowlevel-blt-bench \ + radial-perf-test \ + check-formats \ $(NULL) # Utility functions libutils_sources = \ utils.c \ + utils-prng.c \ $(NULL) libutils_headers = \ utils.h \ + utils-prng.h \ $(NULL) AM_CFLAGS = $(OPENMP_CFLAGS) AM_LDFLAGS = $(OPENMP_CFLAGS) $(TESTPROGS_EXTRA_LDFLAGS) LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm $(PNG_LIBS) -INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(PNG_CFLAGS) +AM_CPPFLAGS = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(PNG_CFLAGS) libutils_la_SOURCES = $(libutils_sources) $(libutils_headers) noinst_LTLIBRARIES = libutils.la all: all-am @@ -534,6 +572,9 @@ alphamap$(EXEEXT): $(alphamap_OBJECTS) $(alphamap_DEPENDENCIES) $(EXTRA_alphamap blitters-test$(EXEEXT): $(blitters_test_OBJECTS) $(blitters_test_DEPENDENCIES) $(EXTRA_blitters_test_DEPENDENCIES) @rm -f blitters-test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(blitters_test_OBJECTS) $(blitters_test_LDADD) $(LIBS) +check-formats$(EXEEXT): $(check_formats_OBJECTS) $(check_formats_DEPENDENCIES) $(EXTRA_check_formats_DEPENDENCIES) + @rm -f check-formats$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(check_formats_OBJECTS) $(check_formats_LDADD) $(LIBS) combiner-test$(EXEEXT): $(combiner_test_OBJECTS) $(combiner_test_DEPENDENCIES) $(EXTRA_combiner_test_DEPENDENCIES) @rm -f combiner-test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(combiner_test_OBJECTS) $(combiner_test_LDADD) $(LIBS) @@ -558,12 +599,24 @@ infinite-loop$(EXEEXT): $(infinite_loop_OBJECTS) $(infinite_loop_DEPENDENCIES) $ lowlevel-blt-bench$(EXEEXT): $(lowlevel_blt_bench_OBJECTS) $(lowlevel_blt_bench_DEPENDENCIES) $(EXTRA_lowlevel_blt_bench_DEPENDENCIES) @rm -f lowlevel-blt-bench$(EXEEXT) $(AM_V_CCLD)$(LINK) $(lowlevel_blt_bench_OBJECTS) $(lowlevel_blt_bench_LDADD) $(LIBS) +matrix-test$(EXEEXT): $(matrix_test_OBJECTS) $(matrix_test_DEPENDENCIES) $(EXTRA_matrix_test_DEPENDENCIES) + @rm -f matrix-test$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(matrix_test_OBJECTS) $(matrix_test_LDADD) $(LIBS) oob-test$(EXEEXT): $(oob_test_OBJECTS) $(oob_test_DEPENDENCIES) $(EXTRA_oob_test_DEPENDENCIES) @rm -f oob-test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(oob_test_OBJECTS) $(oob_test_LDADD) $(LIBS) pdf-op-test$(EXEEXT): $(pdf_op_test_OBJECTS) $(pdf_op_test_DEPENDENCIES) $(EXTRA_pdf_op_test_DEPENDENCIES) @rm -f pdf-op-test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(pdf_op_test_OBJECTS) $(pdf_op_test_LDADD) $(LIBS) +pixel-test$(EXEEXT): $(pixel_test_OBJECTS) $(pixel_test_DEPENDENCIES) $(EXTRA_pixel_test_DEPENDENCIES) + @rm -f pixel-test$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(pixel_test_OBJECTS) $(pixel_test_LDADD) $(LIBS) +prng-test$(EXEEXT): $(prng_test_OBJECTS) $(prng_test_DEPENDENCIES) $(EXTRA_prng_test_DEPENDENCIES) + @rm -f prng-test$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(prng_test_OBJECTS) $(prng_test_LDADD) $(LIBS) +radial-perf-test$(EXEEXT): $(radial_perf_test_OBJECTS) $(radial_perf_test_DEPENDENCIES) $(EXTRA_radial_perf_test_DEPENDENCIES) + @rm -f radial-perf-test$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(radial_perf_test_OBJECTS) $(radial_perf_test_LDADD) $(LIBS) region-contains-test$(EXEEXT): $(region_contains_test_OBJECTS) $(region_contains_test_DEPENDENCIES) $(EXTRA_region_contains_test_DEPENDENCIES) @rm -f region-contains-test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(region_contains_test_OBJECTS) $(region_contains_test_LDADD) $(LIBS) @@ -603,6 +656,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alpha-loop.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alphamap.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blitters-test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/check-formats.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/combiner-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/composite-traps-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/composite.Po@am__quote@ @@ -611,8 +665,12 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gradient-crash-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/infinite-loop.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lowlevel-blt-bench.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matrix-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oob-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdf-op-test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pixel-test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/prng-test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radial-perf-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/region-contains-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/region-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/region-translate-test.Po@am__quote@ @@ -622,6 +680,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scaling-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stress-test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/trap-crasher.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utils-prng.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utils.Plo@am__quote@ .c.o: diff --git a/lib/pixman/test/Makefile.sources b/lib/pixman/test/Makefile.sources index 077897161..b5fc740f3 100644 --- a/lib/pixman/test/Makefile.sources +++ b/lib/pixman/test/Makefile.sources @@ -1,10 +1,12 @@ # Tests (sorted by expected completion time) TESTPROGRAMS = \ + prng-test \ a1-trap-test \ pdf-op-test \ region-test \ region-translate-test \ combiner-test \ + pixel-test \ fetch-test \ rotate-test \ oob-test \ @@ -16,6 +18,7 @@ TESTPROGRAMS = \ gradient-crash-test \ region-contains-test \ alphamap \ + matrix-test \ stress-test \ composite-traps-test \ blitters-test \ @@ -25,16 +28,20 @@ TESTPROGRAMS = \ composite \ $(NULL) -# Benchmarks -BENCHMARKS = \ +# Other programs +OTHERPROGRAMS = \ lowlevel-blt-bench \ + radial-perf-test \ + check-formats \ $(NULL) # Utility functions libutils_sources = \ utils.c \ + utils-prng.c \ $(NULL) libutils_headers = \ utils.h \ + utils-prng.h \ $(NULL) diff --git a/lib/pixman/test/a1-trap-test.c b/lib/pixman/test/a1-trap-test.c index 93c6caa14..c2b488316 100644 --- a/lib/pixman/test/a1-trap-test.c +++ b/lib/pixman/test/a1-trap-test.c @@ -45,6 +45,14 @@ main (int argc, char **argv) assert (bits[1] == 0xffffffff); assert (bits[1 * WIDTH + 0] == 0xffffffff); assert (bits[1 * WIDTH + 1] == 0xffffffff); + + /* The check-formats test depends on operator_name() and format_name() returning + * these precise formats, so if those change, check-formats.c must be updated too. + */ + assert ( + strcmp (operator_name (PIXMAN_OP_DISJOINT_OVER), "PIXMAN_OP_DISJOINT_OVER") == 0); + assert ( + strcmp (format_name (PIXMAN_r5g6b5), "r5g6b5") == 0); return 0; } diff --git a/lib/pixman/test/affine-test.c b/lib/pixman/test/affine-test.c index 7bc28b4cd..2506250db 100644 --- a/lib/pixman/test/affine-test.c +++ b/lib/pixman/test/affine-test.c @@ -48,18 +48,18 @@ test_composite (int testnum, uint32_t crc32; FLOAT_REGS_CORRUPTION_DETECTOR_START (); - lcg_srand (testnum); + prng_srand (testnum); - src_bpp = (lcg_rand_n (2) == 0) ? 2 : 4; - dst_bpp = (lcg_rand_n (2) == 0) ? 2 : 4; - op = (lcg_rand_n (2) == 0) ? PIXMAN_OP_SRC : PIXMAN_OP_OVER; + src_bpp = (prng_rand_n (2) == 0) ? 2 : 4; + dst_bpp = (prng_rand_n (2) == 0) ? 2 : 4; + op = (prng_rand_n (2) == 0) ? PIXMAN_OP_SRC : PIXMAN_OP_OVER; - src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1; - src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1; - dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1; - dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1; - src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp; - dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp; + src_width = prng_rand_n (MAX_SRC_WIDTH) + 1; + src_height = prng_rand_n (MAX_SRC_HEIGHT) + 1; + dst_width = prng_rand_n (MAX_DST_WIDTH) + 1; + dst_height = prng_rand_n (MAX_DST_HEIGHT) + 1; + src_stride = src_width * src_bpp + prng_rand_n (MAX_STRIDE) * src_bpp; + dst_stride = dst_width * dst_bpp + prng_rand_n (MAX_STRIDE) * dst_bpp; if (src_stride & 3) src_stride += 2; @@ -67,26 +67,23 @@ test_composite (int testnum, if (dst_stride & 3) dst_stride += 2; - src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2); - src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2); - dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2); - dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2); - w = lcg_rand_n (dst_width * 3 / 2 - dst_x); - h = lcg_rand_n (dst_height * 3 / 2 - dst_y); + src_x = -(src_width / 4) + prng_rand_n (src_width * 3 / 2); + src_y = -(src_height / 4) + prng_rand_n (src_height * 3 / 2); + dst_x = -(dst_width / 4) + prng_rand_n (dst_width * 3 / 2); + dst_y = -(dst_height / 4) + prng_rand_n (dst_height * 3 / 2); + w = prng_rand_n (dst_width * 3 / 2 - dst_x); + h = prng_rand_n (dst_height * 3 / 2 - dst_y); srcbuf = (uint32_t *)malloc (src_stride * src_height); dstbuf = (uint32_t *)malloc (dst_stride * dst_height); - for (i = 0; i < src_stride * src_height; i++) - *((uint8_t *)srcbuf + i) = lcg_rand_n (256); + prng_randmemset (srcbuf, src_stride * src_height, 0); + prng_randmemset (dstbuf, dst_stride * dst_height, 0); - for (i = 0; i < dst_stride * dst_height; i++) - *((uint8_t *)dstbuf + i) = lcg_rand_n (256); - - src_fmt = src_bpp == 4 ? (lcg_rand_n (2) == 0 ? + src_fmt = src_bpp == 4 ? (prng_rand_n (2) == 0 ? PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5; - dst_fmt = dst_bpp == 4 ? (lcg_rand_n (2) == 0 ? + dst_fmt = dst_bpp == 4 ? (prng_rand_n (2) == 0 ? PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5; src_img = pixman_image_create_bits ( @@ -100,29 +97,29 @@ test_composite (int testnum, pixman_transform_init_identity (&transform); - if (lcg_rand_n (3) > 0) + if (prng_rand_n (3) > 0) { - scale_x = -65536 * 3 + lcg_rand_N (65536 * 6); - if (lcg_rand_n (2)) - scale_y = -65536 * 3 + lcg_rand_N (65536 * 6); + scale_x = -65536 * 3 + prng_rand_n (65536 * 6); + if (prng_rand_n (2)) + scale_y = -65536 * 3 + prng_rand_n (65536 * 6); else scale_y = scale_x; pixman_transform_init_scale (&transform, scale_x, scale_y); } - if (lcg_rand_n (3) > 0) + if (prng_rand_n (3) > 0) { - translate_x = -65536 * 3 + lcg_rand_N (6 * 65536); - if (lcg_rand_n (2)) - translate_y = -65536 * 3 + lcg_rand_N (6 * 65536); + translate_x = -65536 * 3 + prng_rand_n (6 * 65536); + if (prng_rand_n (2)) + translate_y = -65536 * 3 + prng_rand_n (6 * 65536); else translate_y = translate_x; pixman_transform_translate (&transform, NULL, translate_x, translate_y); } - if (lcg_rand_n (4) > 0) + if (prng_rand_n (4) > 0) { int c, s, tx = 0, ty = 0; - switch (lcg_rand_n (4)) + switch (prng_rand_n (4)) { case 0: /* 90 degrees */ @@ -145,32 +142,32 @@ test_composite (int testnum, break; default: /* arbitrary rotation */ - c = lcg_rand_N (2 * 65536) - 65536; - s = lcg_rand_N (2 * 65536) - 65536; + c = prng_rand_n (2 * 65536) - 65536; + s = prng_rand_n (2 * 65536) - 65536; break; } pixman_transform_rotate (&transform, NULL, c, s); pixman_transform_translate (&transform, NULL, tx, ty); } - if (lcg_rand_n (8) == 0) + if (prng_rand_n (8) == 0) { /* Flip random bits */ int maxflipcount = 8; while (maxflipcount--) { - int i = lcg_rand_n (2); - int j = lcg_rand_n (3); - int bitnum = lcg_rand_n (32); + int i = prng_rand_n (2); + int j = prng_rand_n (3); + int bitnum = prng_rand_n (32); transform.matrix[i][j] ^= 1 << bitnum; - if (lcg_rand_n (2)) + if (prng_rand_n (2)) break; } } pixman_image_set_transform (src_img, &transform); - switch (lcg_rand_n (4)) + switch (prng_rand_n (4)) { case 0: repeat = PIXMAN_REPEAT_NONE; @@ -193,7 +190,7 @@ test_composite (int testnum, } pixman_image_set_repeat (src_img, repeat); - if (lcg_rand_n (2)) + if (prng_rand_n (2)) pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0); else pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0); @@ -203,9 +200,9 @@ test_composite (int testnum, #define M(r,c) \ transform.matrix[r][c] - printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt); - printf ("op=%d, repeat=%d, transform=\n", - op, repeat); + printf ("src_fmt=%s, dst_fmt=%s\n", format_name (src_fmt), format_name (dst_fmt)); + printf ("op=%s, repeat=%d, transform=\n", + operator_name (op), repeat); printf (" { { { 0x%08x, 0x%08x, 0x%08x },\n" " { 0x%08x, 0x%08x, 0x%08x },\n" " { 0x%08x, 0x%08x, 0x%08x },\n" @@ -220,19 +217,19 @@ test_composite (int testnum, printf ("w=%d, h=%d\n", w, h); } - if (lcg_rand_n (8) == 0) + if (prng_rand_n (8) == 0) { pixman_box16_t clip_boxes[2]; - int n = lcg_rand_n (2) + 1; + int n = prng_rand_n (2) + 1; for (i = 0; i < n; i++) { - clip_boxes[i].x1 = lcg_rand_n (src_width); - clip_boxes[i].y1 = lcg_rand_n (src_height); + clip_boxes[i].x1 = prng_rand_n (src_width); + clip_boxes[i].y1 = prng_rand_n (src_height); clip_boxes[i].x2 = - clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1); + clip_boxes[i].x1 + prng_rand_n (src_width - clip_boxes[i].x1); clip_boxes[i].y2 = - clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1); + clip_boxes[i].y1 + prng_rand_n (src_height - clip_boxes[i].y1); if (verbose) { @@ -248,18 +245,18 @@ test_composite (int testnum, pixman_region_fini (&clip); } - if (lcg_rand_n (8) == 0) + if (prng_rand_n (8) == 0) { pixman_box16_t clip_boxes[2]; - int n = lcg_rand_n (2) + 1; + int n = prng_rand_n (2) + 1; for (i = 0; i < n; i++) { - clip_boxes[i].x1 = lcg_rand_n (dst_width); - clip_boxes[i].y1 = lcg_rand_n (dst_height); + clip_boxes[i].x1 = prng_rand_n (dst_width); + clip_boxes[i].y1 = prng_rand_n (dst_height); clip_boxes[i].x2 = - clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1); + clip_boxes[i].x1 + prng_rand_n (dst_width - clip_boxes[i].x1); clip_boxes[i].y2 = - clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1); + clip_boxes[i].y1 + prng_rand_n (dst_height - clip_boxes[i].y1); if (verbose) { @@ -310,11 +307,11 @@ test_composite (int testnum, } #if BILINEAR_INTERPOLATION_BITS == 8 -#define CHECKSUM 0x1EF2175A +#define CHECKSUM 0x2CDF1F07 #elif BILINEAR_INTERPOLATION_BITS == 7 -#define CHECKSUM 0x74050F50 +#define CHECKSUM 0xBC00B1DF #elif BILINEAR_INTERPOLATION_BITS == 4 -#define CHECKSUM 0x4362EAE8 +#define CHECKSUM 0xA227306B #else #define CHECKSUM 0x00000000 #endif diff --git a/lib/pixman/test/alpha-loop.c b/lib/pixman/test/alpha-loop.c index e4d90a988..eca761537 100644 --- a/lib/pixman/test/alpha-loop.c +++ b/lib/pixman/test/alpha-loop.c @@ -8,9 +8,14 @@ int main (int argc, char **argv) { - uint8_t *alpha = make_random_bytes (WIDTH * HEIGHT); - uint32_t *src = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * 4); - uint32_t *dest = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * 4); + uint8_t *alpha; + uint32_t *src, *dest; + + prng_srand (0); + + alpha = make_random_bytes (WIDTH * HEIGHT); + src = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * 4); + dest = (uint32_t *)make_random_bytes (WIDTH * HEIGHT * 4); pixman_image_t *a = pixman_image_create_bits (PIXMAN_a8, WIDTH, HEIGHT, (uint32_t *)alpha, WIDTH); pixman_image_t *d = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, dest, WIDTH * 4); diff --git a/lib/pixman/test/alphamap.c b/lib/pixman/test/alphamap.c index 0c5757ea3..4d09076fb 100644 --- a/lib/pixman/test/alphamap.c +++ b/lib/pixman/test/alphamap.c @@ -26,25 +26,6 @@ static const int origins[] = 0, 10, -100 }; -static const char * -format_name (pixman_format_code_t format) -{ - if (format == PIXMAN_a8) - return "a8"; - else if (format == PIXMAN_a2r10g10b10) - return "a2r10g10b10"; - else if (format == PIXMAN_a8r8g8b8) - return "a8r8g8b8"; - else if (format == PIXMAN_a4r4g4b4) - return "a4r4g4b4"; - else if (format == PIXMAN_null) - return "none"; - else - assert (0); - - return "<unknown - bug in alphamap.c>"; -} - static void on_destroy (pixman_image_t *image, void *data) { @@ -307,6 +288,8 @@ main (int argc, char **argv) { int i, j, a, b, x, y; + prng_srand (0); + for (i = 0; i < ARRAY_LENGTH (formats); ++i) { for (j = 0; j < ARRAY_LENGTH (formats); ++j) diff --git a/lib/pixman/test/blitters-test.c b/lib/pixman/test/blitters-test.c index 30d69124c..a2c6ff4d8 100644 --- a/lib/pixman/test/blitters-test.c +++ b/lib/pixman/test/blitters-test.c @@ -25,7 +25,7 @@ create_random_image (pixman_format_code_t *allowed_formats, int max_extra_stride, pixman_format_code_t *used_fmt) { - int n = 0, i, width, height, stride; + int n = 0, width, height, stride; pixman_format_code_t fmt; uint32_t *buf; pixman_image_t *img; @@ -33,27 +33,28 @@ create_random_image (pixman_format_code_t *allowed_formats, while (allowed_formats[n] != PIXMAN_null) n++; - if (n > N_MOST_LIKELY_FORMATS && lcg_rand_n (4) != 0) + if (n > N_MOST_LIKELY_FORMATS && prng_rand_n (4) != 0) n = N_MOST_LIKELY_FORMATS; - fmt = allowed_formats[lcg_rand_n (n)]; + fmt = allowed_formats[prng_rand_n (n)]; - width = lcg_rand_n (max_width) + 1; - height = lcg_rand_n (max_height) + 1; + width = prng_rand_n (max_width) + 1; + height = prng_rand_n (max_height) + 1; stride = (width * PIXMAN_FORMAT_BPP (fmt) + 7) / 8 + - lcg_rand_n (max_extra_stride + 1); + prng_rand_n (max_extra_stride + 1); stride = (stride + 3) & ~3; /* do the allocation */ buf = aligned_malloc (64, stride * height); - /* initialize image with random data */ - for (i = 0; i < stride * height; i++) + if (prng_rand_n (4) == 0) { - /* generation is biased to having more 0 or 255 bytes as - * they are more likely to be special-cased in code - */ - *((uint8_t *)buf + i) = lcg_rand_n (4) ? lcg_rand_n (256) : - (lcg_rand_n (2) ? 0 : 255); + /* uniform distribution */ + prng_randmemset (buf, stride * height, 0); + } + else + { + /* significantly increased probability for 0x00 and 0xFF */ + prng_randmemset (buf, stride * height, RANDMEMSET_MORE_00_AND_FF); } img = pixman_image_create_bits (fmt, width, height, buf, stride); @@ -67,7 +68,7 @@ create_random_image (pixman_format_code_t *allowed_formats, pixman_image_set_indexed (img, &(y_palette[PIXMAN_FORMAT_BPP (fmt)])); } - if (lcg_rand_n (16) == 0) + if (prng_rand_n (16) == 0) pixman_image_set_filter (img, PIXMAN_FILTER_BILINEAR, NULL, 0); image_endian_swap (img); @@ -251,11 +252,11 @@ test_composite (int testnum, int verbose) if (max_extra_stride > 8) max_extra_stride = 8; - lcg_srand (testnum); + prng_srand (testnum); - op = op_list[lcg_rand_n (ARRAY_LENGTH (op_list))]; + op = op_list[prng_rand_n (ARRAY_LENGTH (op_list))]; - if (lcg_rand_n (8)) + if (prng_rand_n (8)) { /* normal image */ src_img = create_random_image (img_fmt_list, max_width, max_height, @@ -284,10 +285,10 @@ test_composite (int testnum, int verbose) dstbuf = pixman_image_get_data (dst_img); srcbuf = pixman_image_get_data (src_img); - src_x = lcg_rand_n (src_width); - src_y = lcg_rand_n (src_height); - dst_x = lcg_rand_n (dst_width); - dst_y = lcg_rand_n (dst_height); + src_x = prng_rand_n (src_width); + src_y = prng_rand_n (src_height); + dst_x = prng_rand_n (dst_width); + dst_y = prng_rand_n (dst_height); mask_img = NULL; mask_fmt = PIXMAN_null; @@ -296,10 +297,10 @@ test_composite (int testnum, int verbose) maskbuf = NULL; if ((src_fmt == PIXMAN_x8r8g8b8 || src_fmt == PIXMAN_x8b8g8r8) && - (lcg_rand_n (4) == 0)) + (prng_rand_n (4) == 0)) { /* PIXBUF */ - mask_fmt = lcg_rand_n (2) ? PIXMAN_a8r8g8b8 : PIXMAN_a8b8g8r8; + mask_fmt = prng_rand_n (2) ? PIXMAN_a8r8g8b8 : PIXMAN_a8b8g8r8; mask_img = pixman_image_create_bits (mask_fmt, src_width, src_height, @@ -309,9 +310,9 @@ test_composite (int testnum, int verbose) mask_y = src_y; maskbuf = srcbuf; } - else if (lcg_rand_n (2)) + else if (prng_rand_n (2)) { - if (lcg_rand_n (2)) + if (prng_rand_n (2)) { mask_img = create_random_image (mask_fmt_list, max_width, max_height, max_extra_stride, &mask_fmt); @@ -324,21 +325,23 @@ test_composite (int testnum, int verbose) pixman_image_set_repeat (mask_img, PIXMAN_REPEAT_NORMAL); } - if (lcg_rand_n (2)) + if (prng_rand_n (2)) pixman_image_set_component_alpha (mask_img, 1); - mask_x = lcg_rand_n (pixman_image_get_width (mask_img)); - mask_y = lcg_rand_n (pixman_image_get_height (mask_img)); + mask_x = prng_rand_n (pixman_image_get_width (mask_img)); + mask_y = prng_rand_n (pixman_image_get_height (mask_img)); } - w = lcg_rand_n (dst_width - dst_x + 1); - h = lcg_rand_n (dst_height - dst_y + 1); + w = prng_rand_n (dst_width - dst_x + 1); + h = prng_rand_n (dst_height - dst_y + 1); if (verbose) { - printf ("op=%d, src_fmt=%08X, dst_fmt=%08X, mask_fmt=%08X\n", - op, src_fmt, dst_fmt, mask_fmt); + printf ("op=%s\n", operator_name (op)); + printf ("src_fmt=%s, dst_fmt=%s, mask_fmt=%s\n", + format_name (src_fmt), format_name (dst_fmt), + format_name (mask_fmt)); printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n", src_width, src_height, dst_width, dst_height); printf ("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n", @@ -390,6 +393,8 @@ main (int argc, const char *argv[]) { int i; + prng_srand (0); + for (i = 1; i <= 8; i++) { initialize_palette (&(rgb_palette[i]), i, TRUE); @@ -397,6 +402,6 @@ main (int argc, const char *argv[]) } return fuzzer_test_main("blitters", 2000000, - 0x46136E0A, + 0x0CF3283B, test_composite, argc, argv); } diff --git a/lib/pixman/test/check-formats.c b/lib/pixman/test/check-formats.c new file mode 100644 index 000000000..7edc198c1 --- /dev/null +++ b/lib/pixman/test/check-formats.c @@ -0,0 +1,352 @@ +#include <ctype.h> +#include "utils.h" + +static int +check_op (pixman_op_t op, + pixman_format_code_t src_format, + pixman_format_code_t dest_format) +{ + uint32_t src_alpha_mask, src_green_mask; + uint32_t dest_alpha_mask, dest_green_mask; + pixel_checker_t src_checker, dest_checker; + pixman_image_t *si, *di; + uint32_t sa, sg, da, dg; + uint32_t s, d; + int retval = 0; + + pixel_checker_init (&src_checker, src_format); + pixel_checker_init (&dest_checker, dest_format); + + pixel_checker_get_masks ( + &src_checker, &src_alpha_mask, NULL, &src_green_mask, NULL); + pixel_checker_get_masks ( + &dest_checker, &dest_alpha_mask, NULL, &dest_green_mask, NULL); + + /* printf ("masks: %x %x %x %x\n", */ + /* src_alpha_mask, src_green_mask, */ + /* dest_alpha_mask, dest_green_mask); */ + + si = pixman_image_create_bits (src_format, 1, 1, &s, 4); + di = pixman_image_create_bits (dest_format, 1, 1, &d, 4); + + sa = 0; + do + { + sg = 0; + do + { + da = 0; + do + { + dg = 0; + do + { + color_t src_color, dest_color, result_color; + uint32_t orig_d; + + s = sa | sg; + d = da | dg; + + orig_d = d; + + pixel_checker_convert_pixel_to_color (&src_checker, s, &src_color); + pixel_checker_convert_pixel_to_color (&dest_checker, d, &dest_color); + + do_composite (op, &src_color, NULL, &dest_color, &result_color, FALSE); + + + if (!is_little_endian()) + { + s <<= 32 - PIXMAN_FORMAT_BPP (src_format); + d <<= 32 - PIXMAN_FORMAT_BPP (dest_format); + } + + pixman_image_composite32 (op, si, NULL, di, + 0, 0, 0, 0, 0, 0, 1, 1); + + if (!is_little_endian()) + d >>= (32 - PIXMAN_FORMAT_BPP (dest_format)); + + if (!pixel_checker_check (&dest_checker, d, &result_color)) + { + printf ("---- test failed ----\n"); + printf ("operator: %-32s\n", operator_name (op)); + printf ("source: %-12s pixel: %08x\n", format_name (src_format), s); + printf ("dest: %-12s pixel: %08x\n", format_name (dest_format), orig_d); + printf ("got: %-12s pixel: %08x\n", format_name (dest_format), d); + + retval = 1; + } + + dg -= dest_green_mask; + dg &= dest_green_mask; + } + while (dg != 0); + + da -= dest_alpha_mask; + da &= dest_alpha_mask; + } + while (da != 0); + + sg -= src_green_mask; + sg &= src_green_mask; + } + while (sg != 0); + + sa -= src_alpha_mask; + sa &= src_alpha_mask; + } + while (sa != 0); + + pixman_image_unref (si); + pixman_image_unref (di); + + return retval; +} + +static const pixman_op_t op_list[] = +{ + PIXMAN_OP_CLEAR, + PIXMAN_OP_SRC, + PIXMAN_OP_DST, + PIXMAN_OP_OVER, + PIXMAN_OP_OVER_REVERSE, + PIXMAN_OP_IN, + PIXMAN_OP_IN_REVERSE, + PIXMAN_OP_OUT, + PIXMAN_OP_OUT_REVERSE, + PIXMAN_OP_ATOP, + PIXMAN_OP_ATOP_REVERSE, + PIXMAN_OP_XOR, + PIXMAN_OP_ADD, + PIXMAN_OP_SATURATE, + + PIXMAN_OP_DISJOINT_CLEAR, + PIXMAN_OP_DISJOINT_SRC, + PIXMAN_OP_DISJOINT_DST, + PIXMAN_OP_DISJOINT_OVER, + PIXMAN_OP_DISJOINT_OVER_REVERSE, + PIXMAN_OP_DISJOINT_IN, + PIXMAN_OP_DISJOINT_IN_REVERSE, + PIXMAN_OP_DISJOINT_OUT, + PIXMAN_OP_DISJOINT_OUT_REVERSE, + PIXMAN_OP_DISJOINT_ATOP, + PIXMAN_OP_DISJOINT_ATOP_REVERSE, + PIXMAN_OP_DISJOINT_XOR, + + PIXMAN_OP_CONJOINT_CLEAR, + PIXMAN_OP_CONJOINT_SRC, + PIXMAN_OP_CONJOINT_DST, + PIXMAN_OP_CONJOINT_OVER, + PIXMAN_OP_CONJOINT_OVER_REVERSE, + PIXMAN_OP_CONJOINT_IN, + PIXMAN_OP_CONJOINT_IN_REVERSE, + PIXMAN_OP_CONJOINT_OUT, + PIXMAN_OP_CONJOINT_OUT_REVERSE, + PIXMAN_OP_CONJOINT_ATOP, + PIXMAN_OP_CONJOINT_ATOP_REVERSE, + PIXMAN_OP_CONJOINT_XOR, +}; + +static const pixman_format_code_t format_list[] = +{ + PIXMAN_a8r8g8b8, + PIXMAN_x8r8g8b8, + PIXMAN_a8b8g8r8, + PIXMAN_x8b8g8r8, + PIXMAN_b8g8r8a8, + PIXMAN_b8g8r8x8, + PIXMAN_r8g8b8a8, + PIXMAN_r8g8b8x8, + PIXMAN_x14r6g6b6, + PIXMAN_x2r10g10b10, + PIXMAN_a2r10g10b10, + PIXMAN_x2b10g10r10, + PIXMAN_a2b10g10r10, + PIXMAN_a8r8g8b8_sRGB, + PIXMAN_r8g8b8, + PIXMAN_b8g8r8, + PIXMAN_r5g6b5, + PIXMAN_b5g6r5, + PIXMAN_a1r5g5b5, + PIXMAN_x1r5g5b5, + PIXMAN_a1b5g5r5, + PIXMAN_x1b5g5r5, + PIXMAN_a4r4g4b4, + PIXMAN_x4r4g4b4, + PIXMAN_a4b4g4r4, + PIXMAN_x4b4g4r4, + PIXMAN_a8, + PIXMAN_r3g3b2, + PIXMAN_b2g3r3, + PIXMAN_a2r2g2b2, + PIXMAN_a2b2g2r2, + PIXMAN_x4a4, + PIXMAN_a4, + PIXMAN_r1g2b1, + PIXMAN_b1g2r1, + PIXMAN_a1r1g1b1, + PIXMAN_a1b1g1r1, + PIXMAN_a1, +}; + +static pixman_format_code_t +format_from_string (const char *s) +{ + int i; + + for (i = 0; i < ARRAY_LENGTH (format_list); ++i) + { + if (strcasecmp (format_name (format_list[i]), s) == 0) + return format_list[i]; + } + + return PIXMAN_null; +} + +static void +emit (const char *s, int *n_chars) +{ + *n_chars += printf ("%s,", s); + if (*n_chars > 60) + { + printf ("\n "); + *n_chars = 0; + } + else + { + printf (" "); + (*n_chars)++; + } +} + +static void +list_formats (void) +{ + int n_chars; + int i; + + printf ("Formats:\n "); + + n_chars = 0; + for (i = 0; i < ARRAY_LENGTH (format_list); ++i) + emit (format_name (format_list[i]), &n_chars); + + printf ("\n\n"); +} + +static void +list_operators (void) +{ + char short_name [128] = { 0 }; + int i, n_chars; + + printf ("Operators:\n "); + + n_chars = 0; + for (i = 0; i < ARRAY_LENGTH (op_list); ++i) + { + pixman_op_t op = op_list[i]; + int j; + + snprintf (short_name, sizeof (short_name) - 1, "%s", + operator_name (op) + strlen ("PIXMAN_OP_")); + + for (j = 0; short_name[j] != '\0'; ++j) + short_name[j] = tolower (short_name[j]); + + emit (short_name, &n_chars); + } + + printf ("\n\n"); +} + +static pixman_op_t +operator_from_string (const char *s) +{ + char full_name[128] = { 0 }; + int i; + + snprintf (full_name, (sizeof full_name) - 1, "PIXMAN_OP_%s", s); + + for (i = 0; i < ARRAY_LENGTH (op_list); ++i) + { + pixman_op_t op = op_list[i]; + + if (strcasecmp (operator_name (op), full_name) == 0) + return op; + } + + return PIXMAN_OP_NONE; +} + +int +main (int argc, char **argv) +{ + enum { OPTION_OP, OPTION_SRC, OPTION_DEST, LAST_OPTION } option; + pixman_format_code_t src_fmt, dest_fmt; + pixman_op_t op; + + op = PIXMAN_OP_NONE; + src_fmt = PIXMAN_null; + dest_fmt = PIXMAN_null; + + argc--; + argv++; + + for (option = OPTION_OP; option < LAST_OPTION; ++option) + { + char *arg = NULL; + + if (argc) + { + argc--; + arg = *argv++; + } + + switch (option) + { + case OPTION_OP: + if (!arg) + printf (" - missing operator\n"); + else if ((op = operator_from_string (arg)) == PIXMAN_OP_NONE) + printf (" - unknown operator %s\n", arg); + break; + + case OPTION_SRC: + if (!arg) + printf (" - missing source format\n"); + else if ((src_fmt = format_from_string (arg)) == PIXMAN_null) + printf (" - unknown source format %s\n", arg); + break; + + case OPTION_DEST: + if (!arg) + printf (" - missing destination format\n"); + else if ((dest_fmt = format_from_string (arg)) == PIXMAN_null) + printf (" - unknown destination format %s\n", arg); + break; + + default: + assert (0); + break; + } + } + + while (argc--) + { + op = PIXMAN_OP_NONE; + printf (" - unexpected argument: %s\n", *argv++); + } + + if (op == PIXMAN_OP_NONE || src_fmt == PIXMAN_null || dest_fmt == PIXMAN_null) + { + printf ("\nUsage:\n check-formats <operator> <src-format> <dest-format>\n\n"); + list_operators(); + list_formats(); + + return -1; + } + + return check_op (op, src_fmt, dest_fmt); +} diff --git a/lib/pixman/test/combiner-test.c b/lib/pixman/test/combiner-test.c index c438ae62e..01f63a56e 100644 --- a/lib/pixman/test/combiner-test.c +++ b/lib/pixman/test/combiner-test.c @@ -67,7 +67,7 @@ static const pixman_op_t op_list[] = static float rand_float (void) { - uint32_t u = lcg_rand_u32(); + uint32_t u = prng_rand(); return *(float *)&u; } @@ -123,7 +123,7 @@ main () impl = _pixman_internal_only_get_implementation(); - lcg_srand (0); + prng_srand (0); for (i = 0; i < ARRAY_LENGTH (op_list); ++i) { diff --git a/lib/pixman/test/composite-traps-test.c b/lib/pixman/test/composite-traps-test.c index 9fc94a4d6..2983eae83 100644 --- a/lib/pixman/test/composite-traps-test.c +++ b/lib/pixman/test/composite-traps-test.c @@ -26,7 +26,7 @@ static pixman_op_t operators[] = }; #define RANDOM_ELT(array) \ - ((array)[lcg_rand_n(ARRAY_LENGTH((array)))]) + ((array)[prng_rand_n(ARRAY_LENGTH((array)))]) static void destroy_bits (pixman_image_t *image, void *data) @@ -37,7 +37,7 @@ destroy_bits (pixman_image_t *image, void *data) static pixman_fixed_t random_fixed (int n) { - return lcg_rand_N (n << 16); + return prng_rand_n (n << 16); } /* @@ -75,17 +75,17 @@ test_composite (int testnum, FLOAT_REGS_CORRUPTION_DETECTOR_START (); - lcg_srand (testnum); + prng_srand (testnum); op = RANDOM_ELT (operators); mask_format = RANDOM_ELT (mask_formats); /* Create source image */ - if (lcg_rand_n (4) == 0) + if (prng_rand_n (4) == 0) { src_img = pixman_image_create_solid_fill ( - &(colors[lcg_rand_n (ARRAY_LENGTH (colors))])); + &(colors[prng_rand_n (ARRAY_LENGTH (colors))])); src_x = 10; src_y = 234; @@ -94,13 +94,13 @@ test_composite (int testnum, { pixman_format_code_t src_format = RANDOM_ELT(formats); int src_bpp = (PIXMAN_FORMAT_BPP (src_format) + 7) / 8; - int src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1; - int src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1; - int src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp; + int src_width = prng_rand_n (MAX_SRC_WIDTH) + 1; + int src_height = prng_rand_n (MAX_SRC_HEIGHT) + 1; + int src_stride = src_width * src_bpp + prng_rand_n (MAX_STRIDE) * src_bpp; uint32_t *bits; - src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2); - src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2); + src_x = -(src_width / 4) + prng_rand_n (src_width * 3 / 2); + src_y = -(src_height / 4) + prng_rand_n (src_height * 3 / 2); src_stride = (src_stride + 3) & ~3; @@ -111,19 +111,19 @@ test_composite (int testnum, pixman_image_set_destroy_function (src_img, destroy_bits, bits); - if (lcg_rand_n (8) == 0) + if (prng_rand_n (8) == 0) { pixman_box16_t clip_boxes[2]; - int n = lcg_rand_n (2) + 1; + int n = prng_rand_n (2) + 1; for (i = 0; i < n; i++) { - clip_boxes[i].x1 = lcg_rand_n (src_width); - clip_boxes[i].y1 = lcg_rand_n (src_height); + clip_boxes[i].x1 = prng_rand_n (src_width); + clip_boxes[i].y1 = prng_rand_n (src_height); clip_boxes[i].x2 = - clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1); + clip_boxes[i].x1 + prng_rand_n (src_width - clip_boxes[i].x1); clip_boxes[i].y2 = - clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1); + clip_boxes[i].y1 + prng_rand_n (src_height - clip_boxes[i].y1); if (verbose) { @@ -146,15 +146,15 @@ test_composite (int testnum, { dst_format = RANDOM_ELT(formats); dst_bpp = (PIXMAN_FORMAT_BPP (dst_format) + 7) / 8; - dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1; - dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1; - dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp; + dst_width = prng_rand_n (MAX_DST_WIDTH) + 1; + dst_height = prng_rand_n (MAX_DST_HEIGHT) + 1; + dst_stride = dst_width * dst_bpp + prng_rand_n (MAX_STRIDE) * dst_bpp; dst_stride = (dst_stride + 3) & ~3; dst_bits = (uint32_t *)make_random_bytes (dst_stride * dst_height); - dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2); - dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2); + dst_x = -(dst_width / 4) + prng_rand_n (dst_width * 3 / 2); + dst_y = -(dst_height / 4) + prng_rand_n (dst_height * 3 / 2); dst_img = pixman_image_create_bits ( dst_format, dst_width, dst_height, dst_bits, dst_stride); @@ -166,7 +166,7 @@ test_composite (int testnum, { int i; - n_traps = lcg_rand_n (25); + n_traps = prng_rand_n (25); traps = fence_malloc (n_traps * sizeof (pixman_trapezoid_t)); for (i = 0; i < n_traps; ++i) @@ -186,18 +186,18 @@ test_composite (int testnum, } } - if (lcg_rand_n (8) == 0) + if (prng_rand_n (8) == 0) { pixman_box16_t clip_boxes[2]; - int n = lcg_rand_n (2) + 1; + int n = prng_rand_n (2) + 1; for (i = 0; i < n; i++) { - clip_boxes[i].x1 = lcg_rand_n (dst_width); - clip_boxes[i].y1 = lcg_rand_n (dst_height); + clip_boxes[i].x1 = prng_rand_n (dst_width); + clip_boxes[i].y1 = prng_rand_n (dst_height); clip_boxes[i].x2 = - clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1); + clip_boxes[i].x1 + prng_rand_n (dst_width - clip_boxes[i].x1); clip_boxes[i].y2 = - clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1); + clip_boxes[i].y1 + prng_rand_n (dst_height - clip_boxes[i].y1); if (verbose) { @@ -251,6 +251,6 @@ test_composite (int testnum, int main (int argc, const char *argv[]) { - return fuzzer_test_main("composite traps", 40000, 0x33BFAA55, + return fuzzer_test_main("composite traps", 40000, 0x749BCC57, test_composite, argc, argv); } diff --git a/lib/pixman/test/composite.c b/lib/pixman/test/composite.c index 2930fb75b..9e51a8f65 100644 --- a/lib/pixman/test/composite.c +++ b/lib/pixman/test/composite.c @@ -28,15 +28,7 @@ #include <time.h> #include "utils.h" -typedef struct format_t format_t; typedef struct image_t image_t; -typedef struct operator_t operator_t; - -struct format_t -{ - pixman_format_code_t format; - const char *name; -}; static const color_t colors[] = { @@ -82,401 +74,113 @@ static const int sizes[] = 10 }; -static const format_t formats[] = +static const pixman_format_code_t formats[] = { -#define P(x) { PIXMAN_##x, #x } - /* 32 bpp formats */ - P(a8r8g8b8), - P(x8r8g8b8), - P(a8b8g8r8), - P(x8b8g8r8), - P(b8g8r8a8), - P(b8g8r8x8), - P(r8g8b8a8), - P(r8g8b8x8), - P(x2r10g10b10), - P(x2b10g10r10), - P(a2r10g10b10), - P(a2b10g10r10), + PIXMAN_a8r8g8b8, + PIXMAN_x8r8g8b8, + PIXMAN_a8b8g8r8, + PIXMAN_x8b8g8r8, + PIXMAN_b8g8r8a8, + PIXMAN_b8g8r8x8, + PIXMAN_r8g8b8a8, + PIXMAN_r8g8b8x8, + PIXMAN_x2r10g10b10, + PIXMAN_x2b10g10r10, + PIXMAN_a2r10g10b10, + PIXMAN_a2b10g10r10, /* sRGB formats */ - P(a8r8g8b8_sRGB), + PIXMAN_a8r8g8b8_sRGB, /* 24 bpp formats */ - P(r8g8b8), - P(b8g8r8), - P(r5g6b5), - P(b5g6r5), + PIXMAN_r8g8b8, + PIXMAN_b8g8r8, + PIXMAN_r5g6b5, + PIXMAN_b5g6r5, /* 16 bpp formats */ - P(x1r5g5b5), - P(x1b5g5r5), - P(a1r5g5b5), - P(a1b5g5r5), - P(a4b4g4r4), - P(x4b4g4r4), - P(a4r4g4b4), - P(x4r4g4b4), + PIXMAN_x1r5g5b5, + PIXMAN_x1b5g5r5, + PIXMAN_a1r5g5b5, + PIXMAN_a1b5g5r5, + PIXMAN_a4b4g4r4, + PIXMAN_x4b4g4r4, + PIXMAN_a4r4g4b4, + PIXMAN_x4r4g4b4, /* 8 bpp formats */ - P(a8), - P(r3g3b2), - P(b2g3r3), - P(a2r2g2b2), - P(a2b2g2r2), - P(x4a4), + PIXMAN_a8, + PIXMAN_r3g3b2, + PIXMAN_b2g3r3, + PIXMAN_a2r2g2b2, + PIXMAN_a2b2g2r2, + PIXMAN_x4a4, /* 4 bpp formats */ - P(a4), - P(r1g2b1), - P(b1g2r1), - P(a1r1g1b1), - P(a1b1g1r1), + PIXMAN_a4, + PIXMAN_r1g2b1, + PIXMAN_b1g2r1, + PIXMAN_a1r1g1b1, + PIXMAN_a1b1g1r1, /* 1 bpp formats */ - P(a1) -#undef P + PIXMAN_a1, }; struct image_t { pixman_image_t *image; - const format_t *format; + pixman_format_code_t format; const color_t *color; pixman_repeat_t repeat; int size; }; -struct operator_t +static const pixman_op_t operators[] = { - pixman_op_t op; - const char *name; + PIXMAN_OP_CLEAR, + PIXMAN_OP_SRC, + PIXMAN_OP_DST, + PIXMAN_OP_OVER, + PIXMAN_OP_OVER_REVERSE, + PIXMAN_OP_IN, + PIXMAN_OP_IN_REVERSE, + PIXMAN_OP_OUT, + PIXMAN_OP_OUT_REVERSE, + PIXMAN_OP_ATOP, + PIXMAN_OP_ATOP_REVERSE, + PIXMAN_OP_XOR, + PIXMAN_OP_ADD, + PIXMAN_OP_SATURATE, + + PIXMAN_OP_DISJOINT_CLEAR, + PIXMAN_OP_DISJOINT_SRC, + PIXMAN_OP_DISJOINT_DST, + PIXMAN_OP_DISJOINT_OVER, + PIXMAN_OP_DISJOINT_OVER_REVERSE, + PIXMAN_OP_DISJOINT_IN, + PIXMAN_OP_DISJOINT_IN_REVERSE, + PIXMAN_OP_DISJOINT_OUT, + PIXMAN_OP_DISJOINT_OUT_REVERSE, + PIXMAN_OP_DISJOINT_ATOP, + PIXMAN_OP_DISJOINT_ATOP_REVERSE, + PIXMAN_OP_DISJOINT_XOR, + + PIXMAN_OP_CONJOINT_CLEAR, + PIXMAN_OP_CONJOINT_SRC, + PIXMAN_OP_CONJOINT_DST, + PIXMAN_OP_CONJOINT_OVER, + PIXMAN_OP_CONJOINT_OVER_REVERSE, + PIXMAN_OP_CONJOINT_IN, + PIXMAN_OP_CONJOINT_IN_REVERSE, + PIXMAN_OP_CONJOINT_OUT, + PIXMAN_OP_CONJOINT_OUT_REVERSE, + PIXMAN_OP_CONJOINT_ATOP, + PIXMAN_OP_CONJOINT_ATOP_REVERSE, + PIXMAN_OP_CONJOINT_XOR, }; -static const operator_t operators[] = -{ -#define P(x) { PIXMAN_OP_##x, #x } - P(CLEAR), - P(SRC), - P(DST), - P(OVER), - P(OVER_REVERSE), - P(IN), - P(IN_REVERSE), - P(OUT), - P(OUT_REVERSE), - P(ATOP), - P(ATOP_REVERSE), - P(XOR), - P(ADD), - P(SATURATE), - - P(DISJOINT_CLEAR), - P(DISJOINT_SRC), - P(DISJOINT_DST), - P(DISJOINT_OVER), - P(DISJOINT_OVER_REVERSE), - P(DISJOINT_IN), - P(DISJOINT_IN_REVERSE), - P(DISJOINT_OUT), - P(DISJOINT_OUT_REVERSE), - P(DISJOINT_ATOP), - P(DISJOINT_ATOP_REVERSE), - P(DISJOINT_XOR), - - P(CONJOINT_CLEAR), - P(CONJOINT_SRC), - P(CONJOINT_DST), - P(CONJOINT_OVER), - P(CONJOINT_OVER_REVERSE), - P(CONJOINT_IN), - P(CONJOINT_IN_REVERSE), - P(CONJOINT_OUT), - P(CONJOINT_OUT_REVERSE), - P(CONJOINT_ATOP), - P(CONJOINT_ATOP_REVERSE), - P(CONJOINT_XOR), -#undef P -}; - -static double -calc_op (pixman_op_t op, double src, double dst, double srca, double dsta) -{ -#define mult_chan(src, dst, Fa, Fb) MIN ((src) * (Fa) + (dst) * (Fb), 1.0) - - double Fa, Fb; - - switch (op) - { - case PIXMAN_OP_CLEAR: - case PIXMAN_OP_DISJOINT_CLEAR: - case PIXMAN_OP_CONJOINT_CLEAR: - return mult_chan (src, dst, 0.0, 0.0); - - case PIXMAN_OP_SRC: - case PIXMAN_OP_DISJOINT_SRC: - case PIXMAN_OP_CONJOINT_SRC: - return mult_chan (src, dst, 1.0, 0.0); - - case PIXMAN_OP_DST: - case PIXMAN_OP_DISJOINT_DST: - case PIXMAN_OP_CONJOINT_DST: - return mult_chan (src, dst, 0.0, 1.0); - - case PIXMAN_OP_OVER: - return mult_chan (src, dst, 1.0, 1.0 - srca); - - case PIXMAN_OP_OVER_REVERSE: - return mult_chan (src, dst, 1.0 - dsta, 1.0); - - case PIXMAN_OP_IN: - return mult_chan (src, dst, dsta, 0.0); - - case PIXMAN_OP_IN_REVERSE: - return mult_chan (src, dst, 0.0, srca); - - case PIXMAN_OP_OUT: - return mult_chan (src, dst, 1.0 - dsta, 0.0); - - case PIXMAN_OP_OUT_REVERSE: - return mult_chan (src, dst, 0.0, 1.0 - srca); - - case PIXMAN_OP_ATOP: - return mult_chan (src, dst, dsta, 1.0 - srca); - - case PIXMAN_OP_ATOP_REVERSE: - return mult_chan (src, dst, 1.0 - dsta, srca); - - case PIXMAN_OP_XOR: - return mult_chan (src, dst, 1.0 - dsta, 1.0 - srca); - - case PIXMAN_OP_ADD: - return mult_chan (src, dst, 1.0, 1.0); - - case PIXMAN_OP_SATURATE: - case PIXMAN_OP_DISJOINT_OVER_REVERSE: - if (srca == 0.0) - Fa = 1.0; - else - Fa = MIN (1.0, (1.0 - dsta) / srca); - return mult_chan (src, dst, Fa, 1.0); - - case PIXMAN_OP_DISJOINT_OVER: - if (dsta == 0.0) - Fb = 1.0; - else - Fb = MIN (1.0, (1.0 - srca) / dsta); - return mult_chan (src, dst, 1.0, Fb); - - case PIXMAN_OP_DISJOINT_IN: - if (srca == 0.0) - Fa = 0.0; - else - Fa = MAX (0.0, 1.0 - (1.0 - dsta) / srca); - return mult_chan (src, dst, Fa, 0.0); - - case PIXMAN_OP_DISJOINT_IN_REVERSE: - if (dsta == 0.0) - Fb = 0.0; - else - Fb = MAX (0.0, 1.0 - (1.0 - srca) / dsta); - return mult_chan (src, dst, 0.0, Fb); - - case PIXMAN_OP_DISJOINT_OUT: - if (srca == 0.0) - Fa = 1.0; - else - Fa = MIN (1.0, (1.0 - dsta) / srca); - return mult_chan (src, dst, Fa, 0.0); - - case PIXMAN_OP_DISJOINT_OUT_REVERSE: - if (dsta == 0.0) - Fb = 1.0; - else - Fb = MIN (1.0, (1.0 - srca) / dsta); - return mult_chan (src, dst, 0.0, Fb); - - case PIXMAN_OP_DISJOINT_ATOP: - if (srca == 0.0) - Fa = 0.0; - else - Fa = MAX (0.0, 1.0 - (1.0 - dsta) / srca); - if (dsta == 0.0) - Fb = 1.0; - else - Fb = MIN (1.0, (1.0 - srca) / dsta); - return mult_chan (src, dst, Fa, Fb); - - case PIXMAN_OP_DISJOINT_ATOP_REVERSE: - if (srca == 0.0) - Fa = 1.0; - else - Fa = MIN (1.0, (1.0 - dsta) / srca); - if (dsta == 0.0) - Fb = 0.0; - else - Fb = MAX (0.0, 1.0 - (1.0 - srca) / dsta); - return mult_chan (src, dst, Fa, Fb); - - case PIXMAN_OP_DISJOINT_XOR: - if (srca == 0.0) - Fa = 1.0; - else - Fa = MIN (1.0, (1.0 - dsta) / srca); - if (dsta == 0.0) - Fb = 1.0; - else - Fb = MIN (1.0, (1.0 - srca) / dsta); - return mult_chan (src, dst, Fa, Fb); - - case PIXMAN_OP_CONJOINT_OVER: - if (dsta == 0.0) - Fb = 0.0; - else - Fb = MAX (0.0, 1.0 - srca / dsta); - return mult_chan (src, dst, 1.0, Fb); - - case PIXMAN_OP_CONJOINT_OVER_REVERSE: - if (srca == 0.0) - Fa = 0.0; - else - Fa = MAX (0.0, 1.0 - dsta / srca); - return mult_chan (src, dst, Fa, 1.0); - - case PIXMAN_OP_CONJOINT_IN: - if (srca == 0.0) - Fa = 1.0; - else - Fa = MIN (1.0, dsta / srca); - return mult_chan (src, dst, Fa, 0.0); - - case PIXMAN_OP_CONJOINT_IN_REVERSE: - if (dsta == 0.0) - Fb = 1.0; - else - Fb = MIN (1.0, srca / dsta); - return mult_chan (src, dst, 0.0, Fb); - - case PIXMAN_OP_CONJOINT_OUT: - if (srca == 0.0) - Fa = 0.0; - else - Fa = MAX (0.0, 1.0 - dsta / srca); - return mult_chan (src, dst, Fa, 0.0); - - case PIXMAN_OP_CONJOINT_OUT_REVERSE: - if (dsta == 0.0) - Fb = 0.0; - else - Fb = MAX (0.0, 1.0 - srca / dsta); - return mult_chan (src, dst, 0.0, Fb); - - case PIXMAN_OP_CONJOINT_ATOP: - if (srca == 0.0) - Fa = 1.0; - else - Fa = MIN (1.0, dsta / srca); - if (dsta == 0.0) - Fb = 0.0; - else - Fb = MAX (0.0, 1.0 - srca / dsta); - return mult_chan (src, dst, Fa, Fb); - - case PIXMAN_OP_CONJOINT_ATOP_REVERSE: - if (srca == 0.0) - Fa = 0.0; - else - Fa = MAX (0.0, 1.0 - dsta / srca); - if (dsta == 0.0) - Fb = 1.0; - else - Fb = MIN (1.0, srca / dsta); - return mult_chan (src, dst, Fa, Fb); - - case PIXMAN_OP_CONJOINT_XOR: - if (srca == 0.0) - Fa = 0.0; - else - Fa = MAX (0.0, 1.0 - dsta / srca); - if (dsta == 0.0) - Fb = 0.0; - else - Fb = MAX (0.0, 1.0 - srca / dsta); - return mult_chan (src, dst, Fa, Fb); - - case PIXMAN_OP_MULTIPLY: - case PIXMAN_OP_SCREEN: - case PIXMAN_OP_OVERLAY: - case PIXMAN_OP_DARKEN: - case PIXMAN_OP_LIGHTEN: - case PIXMAN_OP_COLOR_DODGE: - case PIXMAN_OP_COLOR_BURN: - case PIXMAN_OP_HARD_LIGHT: - case PIXMAN_OP_SOFT_LIGHT: - case PIXMAN_OP_DIFFERENCE: - case PIXMAN_OP_EXCLUSION: - case PIXMAN_OP_HSL_HUE: - case PIXMAN_OP_HSL_SATURATION: - case PIXMAN_OP_HSL_COLOR: - case PIXMAN_OP_HSL_LUMINOSITY: - default: - abort(); - return 0; /* silence MSVC */ - } -#undef mult_chan -} - -static void -do_composite (pixman_op_t op, - const color_t *src, - const color_t *mask, - const color_t *dst, - color_t *result, - pixman_bool_t component_alpha) -{ - color_t srcval, srcalpha; - - if (mask == NULL) - { - srcval = *src; - - srcalpha.r = src->a; - srcalpha.g = src->a; - srcalpha.b = src->a; - srcalpha.a = src->a; - } - else if (component_alpha) - { - srcval.r = src->r * mask->r; - srcval.g = src->g * mask->g; - srcval.b = src->b * mask->b; - srcval.a = src->a * mask->a; - - srcalpha.r = src->a * mask->r; - srcalpha.g = src->a * mask->g; - srcalpha.b = src->a * mask->b; - srcalpha.a = src->a * mask->a; - } - else - { - srcval.r = src->r * mask->a; - srcval.g = src->g * mask->a; - srcval.b = src->b * mask->a; - srcval.a = src->a * mask->a; - - srcalpha.r = src->a * mask->a; - srcalpha.g = src->a * mask->a; - srcalpha.b = src->a * mask->a; - srcalpha.a = src->a * mask->a; - } - - result->r = calc_op (op, srcval.r, dst->r, srcalpha.r, dst->a); - result->g = calc_op (op, srcval.g, dst->g, srcalpha.g, dst->a); - result->b = calc_op (op, srcval.b, dst->b, srcalpha.b, dst->a); - result->a = calc_op (op, srcval.a, dst->a, srcalpha.a, dst->a); -} - static uint32_t get_value (pixman_image_t *image) { @@ -498,7 +202,7 @@ describe_image (image_t *info, char *buf) if (info->size) { sprintf (buf, "%s, %dx%d%s", - info->format->name, + format_name (info->format), info->size, info->size, info->repeat ? " R" :""); } @@ -521,7 +225,7 @@ describe_color (const color_t *color, char *buf) static pixman_bool_t composite_test (image_t *dst, - const operator_t *op, + pixman_op_t op, image_t *src, image_t *mask, pixman_bool_t component_alpha, @@ -534,12 +238,12 @@ composite_test (image_t *dst, { pixman_image_set_component_alpha (mask->image, component_alpha); - pixman_image_composite (op->op, src->image, mask->image, dst->image, + pixman_image_composite (op, src->image, mask->image, dst->image, 0, 0, 0, 0, 0, 0, dst->size, dst->size); } else { - pixman_image_composite (op->op, src->image, NULL, dst->image, + pixman_image_composite (op, src->image, NULL, dst->image, 0, 0, 0, 0, 0, 0, @@ -561,43 +265,43 @@ composite_test (image_t *dst, */ if (src->size) { - if (PIXMAN_FORMAT_TYPE (src->format->format) == PIXMAN_TYPE_ARGB_SRGB) + if (PIXMAN_FORMAT_TYPE (src->format) == PIXMAN_TYPE_ARGB_SRGB) { tsrc.r = convert_linear_to_srgb (tsrc.r); tsrc.g = convert_linear_to_srgb (tsrc.g); tsrc.b = convert_linear_to_srgb (tsrc.b); - round_color (src->format->format, &tsrc); + round_color (src->format, &tsrc); tsrc.r = convert_srgb_to_linear (tsrc.r); tsrc.g = convert_srgb_to_linear (tsrc.g); tsrc.b = convert_srgb_to_linear (tsrc.b); } else { - round_color (src->format->format, &tsrc); + round_color (src->format, &tsrc); } } if (mask && mask->size) { - if (PIXMAN_FORMAT_TYPE (mask->format->format) == PIXMAN_TYPE_ARGB_SRGB) + if (PIXMAN_FORMAT_TYPE (mask->format) == PIXMAN_TYPE_ARGB_SRGB) { tmsk.r = convert_linear_to_srgb (tmsk.r); tmsk.g = convert_linear_to_srgb (tmsk.g); tmsk.b = convert_linear_to_srgb (tmsk.b); - round_color (mask->format->format, &tmsk); + round_color (mask->format, &tmsk); tmsk.r = convert_srgb_to_linear (tmsk.r); tmsk.g = convert_srgb_to_linear (tmsk.g); tmsk.b = convert_srgb_to_linear (tmsk.b); } else { - round_color (mask->format->format, &tmsk); + round_color (mask->format, &tmsk); } } if (mask) { - if (component_alpha && PIXMAN_FORMAT_R (mask->format->format) == 0) + if (component_alpha && PIXMAN_FORMAT_R (mask->format) == 0) { /* Ax component-alpha masks expand alpha into * all color channels. @@ -606,29 +310,29 @@ composite_test (image_t *dst, } } - if (PIXMAN_FORMAT_TYPE (dst->format->format) == PIXMAN_TYPE_ARGB_SRGB) + if (PIXMAN_FORMAT_TYPE (dst->format) == PIXMAN_TYPE_ARGB_SRGB) { tdst.r = convert_linear_to_srgb (tdst.r); tdst.g = convert_linear_to_srgb (tdst.g); tdst.b = convert_linear_to_srgb (tdst.b); - round_color (dst->format->format, &tdst); + round_color (dst->format, &tdst); tdst.r = convert_srgb_to_linear (tdst.r); tdst.g = convert_srgb_to_linear (tdst.g); tdst.b = convert_srgb_to_linear (tdst.b); } else { - round_color (dst->format->format, &tdst); + round_color (dst->format, &tdst); } - do_composite (op->op, + do_composite (op, &tsrc, mask? &tmsk : NULL, &tdst, &expected, component_alpha); - pixel_checker_init (&checker, dst->format->format); + pixel_checker_init (&checker, dst->format); if (!pixel_checker_check (&checker, get_value (dst->image), &expected)) { @@ -638,7 +342,7 @@ composite_test (image_t *dst, printf ("---- Test %d failed ----\n", testno); printf ("Operator: %s %s\n", - op->name, component_alpha ? "CA" : ""); + operator_name (op), component_alpha ? "CA" : ""); printf ("Source: %s\n", describe_image (src, buf)); if (mask != NULL) @@ -687,7 +391,7 @@ image_init (image_t *info, info->color = &colors[color]; compute_pixman_color (info->color, &fill); - info->format = &formats[format]; + info->format = formats[format]; info->size = sizes[size] & ~FLAGS; info->repeat = PIXMAN_REPEAT_NONE; @@ -695,7 +399,7 @@ image_init (image_t *info, { pixman_image_t *solid; - info->image = pixman_image_create_bits (info->format->format, + info->image = pixman_image_create_bits (info->format, info->size, info->size, NULL, 0); @@ -725,38 +429,38 @@ image_fini (image_t *info) static int random_size (void) { - return lcg_rand_n (ARRAY_LENGTH (sizes)); + return prng_rand_n (ARRAY_LENGTH (sizes)); } static int random_color (void) { - return lcg_rand_n (ARRAY_LENGTH (colors)); + return prng_rand_n (ARRAY_LENGTH (colors)); } static int random_format (void) { - return lcg_rand_n (ARRAY_LENGTH (formats)); + return prng_rand_n (ARRAY_LENGTH (formats)); } static pixman_bool_t run_test (uint32_t seed) { image_t src, mask, dst; - const operator_t *op; + pixman_op_t op; int ca; int ok; - lcg_srand (seed); + prng_srand (seed); image_init (&dst, random_color(), random_format(), 1); image_init (&src, random_color(), random_format(), random_size()); image_init (&mask, random_color(), random_format(), random_size()); - op = &(operators [lcg_rand_n (ARRAY_LENGTH (operators))]); + op = operators [prng_rand_n (ARRAY_LENGTH (operators))]; - ca = lcg_rand_n (3); + ca = prng_rand_n (3); switch (ca) { diff --git a/lib/pixman/test/glyph-test.c b/lib/pixman/test/glyph-test.c index 9dd5b41e4..1811add73 100644 --- a/lib/pixman/test/glyph-test.c +++ b/lib/pixman/test/glyph-test.c @@ -107,7 +107,7 @@ random_format (const pixman_format_code_t *formats) i = 0; while (formats[i] != PIXMAN_null) ++i; - return formats[lcg_rand_n (i)]; + return formats[prng_rand_n (i)]; } static pixman_image_t * @@ -122,27 +122,27 @@ create_image (int max_size, const pixman_format_code_t *formats, uint32_t flags) int i; pixman_image_destroy_func_t destroy; - if ((flags & ALLOW_SOLID) && lcg_rand_n (4) == 0) + if ((flags & ALLOW_SOLID) && prng_rand_n (4) == 0) { pixman_color_t color; - color.alpha = lcg_rand_u32(); - color.red = lcg_rand_u32(); - color.green = lcg_rand_u32(); - color.blue = lcg_rand_u32(); + color.alpha = prng_rand(); + color.red = prng_rand(); + color.green = prng_rand(); + color.blue = prng_rand(); return pixman_image_create_solid_fill (&color); } - width = lcg_rand_n (max_size) + 1; - height = lcg_rand_n (max_size) + 1; + width = prng_rand_n (max_size) + 1; + height = prng_rand_n (max_size) + 1; format = random_format (formats); bpp = PIXMAN_FORMAT_BPP (format); - stride = (width * bpp + 7) / 8 + lcg_rand_n (17); + stride = (width * bpp + 7) / 8 + prng_rand_n (17); stride = (stride + 3) & ~3; - if (lcg_rand_n (64) == 0) + if (prng_rand_n (64) == 0) { if (!(data = (uint32_t *)make_random_bytes (stride * height))) { @@ -153,34 +153,28 @@ create_image (int max_size, const pixman_format_code_t *formats, uint32_t flags) } else { - uint8_t *d8; - data = malloc (stride * height); - - d8 = (uint8_t *)data; - for (i = 0; i < height * stride; ++i) - d8[i] = lcg_rand_n (256); - + prng_randmemset (data, height * stride, 0); destroy = destroy_malloced; } image = pixman_image_create_bits (format, width, height, data, stride); pixman_image_set_destroy_function (image, destroy, data); - if ((flags & ALLOW_CLIPPED) && lcg_rand_n (8) == 0) + if ((flags & ALLOW_CLIPPED) && prng_rand_n (8) == 0) { pixman_box16_t clip_boxes[8]; pixman_region16_t clip; - int n = lcg_rand_n (8) + 1; + int n = prng_rand_n (8) + 1; for (i = 0; i < n; i++) { - clip_boxes[i].x1 = lcg_rand_n (width); - clip_boxes[i].y1 = lcg_rand_n (height); + clip_boxes[i].x1 = prng_rand_n (width); + clip_boxes[i].y1 = prng_rand_n (height); clip_boxes[i].x2 = - clip_boxes[i].x1 + lcg_rand_n (width - clip_boxes[i].x1); + clip_boxes[i].x1 + prng_rand_n (width - clip_boxes[i].x1); clip_boxes[i].y2 = - clip_boxes[i].y1 + lcg_rand_n (height - clip_boxes[i].y1); + clip_boxes[i].y1 + prng_rand_n (height - clip_boxes[i].y1); } pixman_region_init_rects (&clip, clip_boxes, n); @@ -188,35 +182,35 @@ create_image (int max_size, const pixman_format_code_t *formats, uint32_t flags) pixman_region_fini (&clip); } - if ((flags & ALLOW_SOURCE_CLIPPING) && lcg_rand_n (4) == 0) + if ((flags & ALLOW_SOURCE_CLIPPING) && prng_rand_n (4) == 0) { pixman_image_set_source_clipping (image, TRUE); pixman_image_set_has_client_clip (image, TRUE); } - if ((flags & ALLOW_ALPHA_MAP) && lcg_rand_n (16) == 0) + if ((flags & ALLOW_ALPHA_MAP) && prng_rand_n (16) == 0) { pixman_image_t *alpha_map; int alpha_x, alpha_y; - alpha_x = lcg_rand_n (width); - alpha_y = lcg_rand_n (height); + alpha_x = prng_rand_n (width); + alpha_y = prng_rand_n (height); alpha_map = create_image (max_size, formats, (flags & ~(ALLOW_ALPHA_MAP | ALLOW_SOLID))); pixman_image_set_alpha_map (image, alpha_map, alpha_x, alpha_y); pixman_image_unref (alpha_map); } - if ((flags & ALLOW_REPEAT) && lcg_rand_n (2) == 0) - pixman_image_set_repeat (image, lcg_rand_n (4)); + if ((flags & ALLOW_REPEAT) && prng_rand_n (2) == 0) + pixman_image_set_repeat (image, prng_rand_n (4)); image_endian_swap (image); return image; } -#define KEY1(p) ((void *)(((unsigned long)p) ^ (0xa7e23dfaUL))) -#define KEY2(p) ((void *)(((unsigned long)p) ^ (0xabcd9876UL))) +#define KEY1(p) ((void *)(((uintptr_t)p) ^ (0xa7e23dfaUL))) +#define KEY2(p) ((void *)(((uintptr_t)p) ^ (0xabcd9876UL))) #define MAX_GLYPHS 32 @@ -230,7 +224,7 @@ test_glyphs (int testnum, int verbose) int n_glyphs, i; pixman_glyph_cache_t *cache; - lcg_srand (testnum); + prng_srand (testnum); cache = pixman_glyph_cache_create (); @@ -245,13 +239,13 @@ test_glyphs (int testnum, int verbose) pixman_glyph_cache_freeze (cache); - n_glyphs = lcg_rand_n (MAX_GLYPHS); + n_glyphs = prng_rand_n (MAX_GLYPHS); for (i = 0; i < n_glyphs; ++i) glyph_images[i] = create_image (32, glyph_formats, 0); for (i = 0; i < 4 * n_glyphs; ++i) { - int g = lcg_rand_n (n_glyphs); + int g = prng_rand_n (n_glyphs); pixman_image_t *glyph_img = glyph_images[g]; void *key1 = KEY1 (glyph_img); void *key2 = KEY2 (glyph_img); @@ -264,21 +258,21 @@ test_glyphs (int testnum, int verbose) } glyphs[i].glyph = glyph; - glyphs[i].x = lcg_rand_n (128); - glyphs[i].y = lcg_rand_n (128); + glyphs[i].x = prng_rand_n (128); + glyphs[i].y = prng_rand_n (128); } - if (lcg_rand_n (2) == 0) + if (prng_rand_n (2) == 0) { - int src_x = lcg_rand_n (300) - 150; - int src_y = lcg_rand_n (300) - 150; - int mask_x = lcg_rand_n (64) - 32; - int mask_y = lcg_rand_n (64) - 32; - int dest_x = lcg_rand_n (64) - 32; - int dest_y = lcg_rand_n (64) - 32; - int width = lcg_rand_n (64); - int height = lcg_rand_n (64); - pixman_op_t op = operators[lcg_rand_n (ARRAY_LENGTH (operators))]; + int src_x = prng_rand_n (300) - 150; + int src_y = prng_rand_n (300) - 150; + int mask_x = prng_rand_n (64) - 32; + int mask_y = prng_rand_n (64) - 32; + int dest_x = prng_rand_n (64) - 32; + int dest_y = prng_rand_n (64) - 32; + int width = prng_rand_n (64); + int height = prng_rand_n (64); + pixman_op_t op = operators[prng_rand_n (ARRAY_LENGTH (operators))]; pixman_format_code_t format = random_format (glyph_formats); pixman_composite_glyphs ( @@ -292,11 +286,11 @@ test_glyphs (int testnum, int verbose) } else { - pixman_op_t op = operators[lcg_rand_n (ARRAY_LENGTH (operators))]; - int src_x = lcg_rand_n (300) - 150; - int src_y = lcg_rand_n (300) - 150; - int dest_x = lcg_rand_n (64) - 32; - int dest_y = lcg_rand_n (64) - 32; + pixman_op_t op = operators[prng_rand_n (ARRAY_LENGTH (operators))]; + int src_x = prng_rand_n (300) - 150; + int src_y = prng_rand_n (300) - 150; + int dest_x = prng_rand_n (64) - 32; + int dest_y = prng_rand_n (64) - 32; pixman_composite_glyphs_no_mask ( op, source, dest, @@ -333,6 +327,6 @@ int main (int argc, const char *argv[]) { return fuzzer_test_main ("glyph", 30000, - 0x79E74996, + 0xFA478A79, test_glyphs, argc, argv); } diff --git a/lib/pixman/test/lowlevel-blt-bench.c b/lib/pixman/test/lowlevel-blt-bench.c index 3afa926b0..1049e21e7 100644 --- a/lib/pixman/test/lowlevel-blt-bench.c +++ b/lib/pixman/test/lowlevel-blt-bench.c @@ -33,6 +33,14 @@ #define L1CACHE_SIZE (8 * 1024) #define L2CACHE_SIZE (128 * 1024) +/* This is applied to both L1 and L2 tests - alternatively, you could + * parameterise bench_L or split it into two functions. It could be + * read at runtime on some architectures, but it only really matters + * that it's a number that's an integer divisor of both cacheline + * lengths, and further, it only really matters for caches that don't + * do allocate0on-write. */ +#define CACHELINE_LENGTH (32) /* bytes */ + #define WIDTH 1920 #define HEIGHT 1080 #define BUFSIZE (WIDTH * HEIGHT * 4) @@ -168,18 +176,29 @@ bench_L (pixman_op_t op, int width, int lines_count) { - int64_t i, j; + int64_t i, j, k; int x = 0; int q = 0; volatile int qx; for (i = 0; i < n; i++) { - /* touch destination buffer to fetch it into L1 cache */ - for (j = 0; j < width + 64; j += 16) { - q += dst[j]; - q += src[j]; - } + /* For caches without allocate-on-write, we need to force the + * destination buffer back into the cache on each iteration, + * otherwise if they are evicted during the test, they remain + * uncached. This doesn't matter for tests which read the + * destination buffer, or for caches that do allocate-on-write, + * but in those cases this loop just adds constant time, which + * should be successfully cancelled out. + */ + for (j = 0; j < lines_count; j++) + { + for (k = 0; k < width + 62; k += CACHELINE_LENGTH / sizeof *dst) + { + q += dst[j * WIDTH + k]; + } + q += dst[j * WIDTH + width + 62]; + } if (++x >= 64) x = 0; call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 63 - x, 0, width, lines_count); @@ -366,6 +385,7 @@ bench_composite (char * testname, double t1, t2, t3, pix_cnt; int64_t n, l1test_width, nlines; double bytes_per_pix = 0; + pixman_bool_t bench_pixbuf = FALSE; pixman_composite_func_t func = pixman_image_composite_wrapper; @@ -403,16 +423,20 @@ bench_composite (char * testname, mask_img = NULL; xmask_img = NULL; + if (strcmp (testname, "pixbuf") == 0 || strcmp (testname, "rpixbuf") == 0) + { + bench_pixbuf = TRUE; + } if (!(mask_flags & SOLID_FLAG) && mask_fmt != PIXMAN_null) { bytes_per_pix += (mask_fmt >> 24) / ((op == PIXMAN_OP_SRC) ? 8.0 : 4.0); mask_img = pixman_image_create_bits (mask_fmt, WIDTH, HEIGHT, - mask, + bench_pixbuf ? src : mask, WIDTH * 4); xmask_img = pixman_image_create_bits (mask_fmt, XWIDTH, XHEIGHT, - mask, + bench_pixbuf ? src : mask, XWIDTH * 4); } else if (mask_fmt != PIXMAN_null) @@ -441,8 +465,8 @@ bench_composite (char * testname, printf ("%24s %c", testname, func != pixman_image_composite_wrapper ? '-' : '='); - memcpy (src, dst, BUFSIZE); memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); l1test_width = L1CACHE_SIZE / 8 - 64; if (l1test_width < 1) @@ -461,8 +485,8 @@ bench_composite (char * testname, ((t3 - t2) - (t2 - t1)) / 1000000.); fflush (stdout); - memcpy (src, dst, BUFSIZE); memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); nlines = (L2CACHE_SIZE / l1test_width) / ((PIXMAN_FORMAT_BPP(src_fmt) + PIXMAN_FORMAT_BPP(dst_fmt)) / 8); @@ -480,8 +504,8 @@ bench_composite (char * testname, ((t3 - t2) - (t2 - t1)) / 1000000.); fflush (stdout); - memcpy (src, dst, BUFSIZE); memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); n = 1 + npix / (WIDTH * HEIGHT); t1 = gettime (); @@ -496,8 +520,8 @@ bench_composite (char * testname, ((double)n * (WIDTH - 64) * HEIGHT / ((t3 - t2) - (t2 - t1)) * bytes_per_pix) * (100.0 / bandwidth) ); fflush (stdout); - memcpy (src, dst, BUFSIZE); memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH); t1 = gettime (); @@ -510,8 +534,8 @@ bench_composite (char * testname, printf (" HT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.); fflush (stdout); - memcpy (src, dst, BUFSIZE); memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH); t1 = gettime (); @@ -524,8 +548,8 @@ bench_composite (char * testname, printf (" VT:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.); fflush (stdout); - memcpy (src, dst, BUFSIZE); memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); n = 1 + npix / (8 * TILEWIDTH * TILEWIDTH); t1 = gettime (); @@ -538,8 +562,8 @@ bench_composite (char * testname, printf (" R:%6.2f", (double)pix_cnt / ((t3 - t2) - (t2 - t1)) / 1000000.); fflush (stdout); - memcpy (src, dst, BUFSIZE); memcpy (dst, src, BUFSIZE); + memcpy (src, dst, BUFSIZE); n = 1 + npix / (16 * TINYWIDTH * TINYWIDTH); t1 = gettime (); @@ -616,6 +640,7 @@ tests_tbl[] = { "src_n_2x10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x2r10g10b10 }, { "src_n_2a10", PIXMAN_a2r10g10b10, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r10g10b10 }, { "src_8888_0565", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, + { "src_0565_8888", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, { "src_8888_4444", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a4r4g4b4 }, { "src_8888_2222", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a2r2g2b2 }, { "src_8888_2x10", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x2r10g10b10 }, @@ -623,12 +648,16 @@ tests_tbl[] = { "src_0888_0565", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, { "src_0888_8888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, { "src_0888_x888", PIXMAN_r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, + { "src_0888_8888_rev", PIXMAN_b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, + { "src_0888_0565_rev", PIXMAN_b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, { "src_x888_x888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_x8r8g8b8 }, { "src_x888_8888", PIXMAN_x8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, { "src_8888_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, { "src_0565_0565", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, { "src_1555_0565", PIXMAN_a1r5g5b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_r5g6b5 }, { "src_0565_1555", PIXMAN_r5g6b5, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a1r5g5b5 }, + { "src_8_8", PIXMAN_a8, 0, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8 }, + { "src_n_8", PIXMAN_a8, 1, PIXMAN_OP_SRC, PIXMAN_null, 0, PIXMAN_a8 }, { "src_n_8_0565", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_r5g6b5 }, { "src_n_8_1555", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a1r5g5b5 }, { "src_n_8_4444", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_SRC, PIXMAN_a8, 0, PIXMAN_a4r4g4b4 }, @@ -685,6 +714,8 @@ tests_tbl[] = { "outrev_n_8888_x888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 }, { "outrev_n_8888_8888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 }, { "over_reverse_n_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER_REVERSE, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, + { "pixbuf", PIXMAN_x8b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, 0, PIXMAN_a8r8g8b8 }, + { "rpixbuf", PIXMAN_x8b8g8r8, 0, PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, 0, PIXMAN_a8b8g8r8 }, }; int @@ -771,7 +802,7 @@ main (int argc, char *argv[]) for (i = 0; i < ARRAY_LENGTH (tests_tbl); i++) { - if (strcmp (pattern, "all") == 0 || strstr (tests_tbl[i].testname, pattern)) + if (strcmp (pattern, "all") == 0 || strcmp (tests_tbl[i].testname, pattern) == 0) { bench_composite (tests_tbl[i].testname, tests_tbl[i].src_fmt, diff --git a/lib/pixman/test/matrix-test.c b/lib/pixman/test/matrix-test.c new file mode 100644 index 000000000..8437dd291 --- /dev/null +++ b/lib/pixman/test/matrix-test.c @@ -0,0 +1,186 @@ +/* + * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "utils.h" +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> + +#ifdef HAVE_FLOAT128 + +#define pixman_fixed_to_float128(x) (((__float128)(x)) / 65536.0Q) + +typedef struct { __float128 v[3]; } pixman_vector_f128_t; +typedef struct { __float128 m[3][3]; } pixman_transform_f128_t; + +pixman_bool_t +pixman_transform_point_f128 (const pixman_transform_f128_t *t, + const pixman_vector_f128_t *v, + pixman_vector_f128_t *result) +{ + int i; + for (i = 0; i < 3; i++) + { + result->v[i] = t->m[i][0] * v->v[0] + + t->m[i][1] * v->v[1] + + t->m[i][2] * v->v[2]; + } + if (result->v[2] != 0) + { + result->v[0] /= result->v[2]; + result->v[1] /= result->v[2]; + result->v[2] = 1; + return TRUE; + } + else + { + return FALSE; + } +} + +pixman_bool_t does_it_fit_fixed_48_16 (__float128 x) +{ + if (x >= 65536.0Q * 65536.0Q * 32768.0Q) + return FALSE; + if (x <= -65536.0Q * 65536.0Q * 32768.0Q) + return FALSE; + return TRUE; +} + +#endif + +uint32_t +test_matrix (int testnum, int verbose) +{ + uint32_t crc32 = 0; + int i, j, k; + pixman_bool_t is_affine; + + prng_srand (testnum); + + for (i = 0; i < 100; i++) + { + pixman_bool_t transform_ok; + pixman_transform_t ti; + pixman_vector_48_16_t vi, result_i; +#ifdef HAVE_FLOAT128 + pixman_transform_f128_t tf; + pixman_vector_f128_t vf, result_f; +#endif + prng_randmemset (&ti, sizeof(ti), 0); + prng_randmemset (&vi, sizeof(vi), 0); + + for (j = 0; j < 3; j++) + { + /* make sure that "vi" contains 31.16 fixed point data */ + vi.v[j] >>= 17; + /* and apply random shift */ + if (prng_rand_n (3) == 0) + vi.v[j] >>= prng_rand_n (46); + } + + if (prng_rand_n (2)) + { + /* random shift for the matrix */ + for (j = 0; j < 3; j++) + for (k = 0; k < 3; k++) + ti.matrix[j][k] >>= prng_rand_n (30); + } + + if (prng_rand_n (2)) + { + /* affine matrix */ + ti.matrix[2][0] = 0; + ti.matrix[2][1] = 0; + ti.matrix[2][2] = pixman_fixed_1; + } + + if (prng_rand_n (2)) + { + /* cartesian coordinates */ + vi.v[2] = pixman_fixed_1; + } + + is_affine = (ti.matrix[2][0] == 0 && ti.matrix[2][1] == 0 && + ti.matrix[2][2] == pixman_fixed_1 && + vi.v[2] == pixman_fixed_1); + + transform_ok = TRUE; + if (is_affine && prng_rand_n (2)) + pixman_transform_point_31_16_affine (&ti, &vi, &result_i); + else + transform_ok = pixman_transform_point_31_16 (&ti, &vi, &result_i); + + crc32 = compute_crc32 (crc32, &result_i, sizeof(result_i)); + +#ifdef HAVE_FLOAT128 + /* compare with a reference 128-bit floating point implementation */ + for (j = 0; j < 3; j++) + { + vf.v[j] = pixman_fixed_to_float128 (vi.v[j]); + for (k = 0; k < 3; k++) + { + tf.m[j][k] = pixman_fixed_to_float128 (ti.matrix[j][k]); + } + } + + if (pixman_transform_point_f128 (&tf, &vf, &result_f)) + { + if (transform_ok || + (does_it_fit_fixed_48_16 (result_f.v[0]) && + does_it_fit_fixed_48_16 (result_f.v[1]) && + does_it_fit_fixed_48_16 (result_f.v[2]))) + { + for (j = 0; j < 3; j++) + { + double diff = fabs (result_f.v[j] - + pixman_fixed_to_float128 (result_i.v[j])); + + if (is_affine && diff > (0.51 / 65536.0)) + { + printf ("%d:%d: bad precision for affine (%.12f)\n", + testnum, i, diff); + abort (); + } + else if (diff > (0.71 / 65536.0)) + { + printf ("%d:%d: bad precision for projective (%.12f)\n", + testnum, i, diff); + abort (); + } + } + } + } +#endif + } + return crc32; +} + +int +main (int argc, const char *argv[]) +{ + return fuzzer_test_main ("matrix", 20000, + 0xBEBF98C3, + test_matrix, argc, argv); +} diff --git a/lib/pixman/test/pixel-test.c b/lib/pixman/test/pixel-test.c new file mode 100644 index 000000000..8c525d202 --- /dev/null +++ b/lib/pixman/test/pixel-test.c @@ -0,0 +1,267 @@ +/* + * Copyright © 2013 Soeren Sandmann + * Copyright © 2013 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include <stdio.h> +#include <stdlib.h> /* abort() */ +#include <math.h> +#include <time.h> +#include "utils.h" + +typedef struct pixel_combination_t pixel_combination_t; +struct pixel_combination_t +{ + pixman_op_t op; + pixman_format_code_t src_format; + uint32_t src_pixel; + pixman_format_code_t dest_format; + uint32_t dest_pixel; +}; + +static const pixel_combination_t regressions[] = +{ + { PIXMAN_OP_OVER, + PIXMAN_a8r8g8b8, 0x0f00c300, + PIXMAN_x14r6g6b6, 0x003c0, + }, + { PIXMAN_OP_DISJOINT_XOR, + PIXMAN_a4r4g4b4, 0xd0c0, + PIXMAN_a8r8g8b8, 0x5300ea00, + }, + { PIXMAN_OP_OVER, + PIXMAN_a8r8g8b8, 0x20c6bf00, + PIXMAN_r5g6b5, 0xb9ff + }, + { PIXMAN_OP_OVER, + PIXMAN_a8r8g8b8, 0x204ac7ff, + PIXMAN_r5g6b5, 0xc1ff + }, + { PIXMAN_OP_OVER_REVERSE, + PIXMAN_r5g6b5, 0xffc3, + PIXMAN_a8r8g8b8, 0x102d00dd + }, + { PIXMAN_OP_OVER_REVERSE, + PIXMAN_r5g6b5, 0x1f00, + PIXMAN_a8r8g8b8, 0x1bdf0c89 + }, + { PIXMAN_OP_OVER_REVERSE, + PIXMAN_r5g6b5, 0xf9d2, + PIXMAN_a8r8g8b8, 0x1076bcf7 + }, + { PIXMAN_OP_OVER_REVERSE, + PIXMAN_r5g6b5, 0x00c3, + PIXMAN_a8r8g8b8, 0x1bfe9ae5 + }, + { PIXMAN_OP_OVER_REVERSE, + PIXMAN_r5g6b5, 0x09ff, + PIXMAN_a8r8g8b8, 0x0b00c16c + }, + { PIXMAN_OP_DISJOINT_ATOP, + PIXMAN_a2r2g2b2, 0xbc, + PIXMAN_a8r8g8b8, 0x9efff1ff + }, + { PIXMAN_OP_DISJOINT_ATOP, + PIXMAN_a4r4g4b4, 0xae5f, + PIXMAN_a8r8g8b8, 0xf215b675 + }, + { PIXMAN_OP_DISJOINT_ATOP_REVERSE, + PIXMAN_a8r8g8b8, 0xce007980, + PIXMAN_a8r8g8b8, 0x80ffe4ad + }, + { PIXMAN_OP_DISJOINT_XOR, + PIXMAN_a8r8g8b8, 0xb8b07bea, + PIXMAN_a4r4g4b4, 0x939c + }, + { PIXMAN_OP_CONJOINT_ATOP_REVERSE, + PIXMAN_r5g6b5, 0x0063, + PIXMAN_a8r8g8b8, 0x10bb1ed7, + }, +}; + +static void +fill (pixman_image_t *image, uint32_t pixel) +{ + uint8_t *data = (uint8_t *)pixman_image_get_data (image); + int bytes_per_pixel = PIXMAN_FORMAT_BPP (pixman_image_get_format (image)) / 8; + int n_bytes = pixman_image_get_stride (image) * pixman_image_get_height (image); + int i; + + switch (bytes_per_pixel) + { + case 4: + for (i = 0; i < n_bytes / 4; ++i) + ((uint32_t *)data)[i] = pixel; + break; + + case 2: + pixel &= 0xffff; + for (i = 0; i < n_bytes / 2; ++i) + ((uint16_t *)data)[i] = pixel; + break; + + case 1: + pixel &= 0xff; + for (i = 0; i < n_bytes; ++i) + ((uint8_t *)data)[i] = pixel; + break; + + default: + assert (0); + break; + } +} + +static uint32_t +access (pixman_image_t *image, int x, int y) +{ + int bytes_per_pixel; + int stride; + uint32_t result; + uint8_t *location; + + if (x < 0 || x >= image->bits.width || y < 0 || y >= image->bits.height) + return 0; + + bytes_per_pixel = PIXMAN_FORMAT_BPP (image->bits.format) / 8; + stride = image->bits.rowstride * 4; + + location = (uint8_t *)image->bits.bits + y * stride + x * bytes_per_pixel; + + if (bytes_per_pixel == 4) + result = *(uint32_t *)location; + else if (bytes_per_pixel == 2) + result = *(uint16_t *)location; + else if (bytes_per_pixel == 1) + result = *(uint8_t *)location; + else + assert (0); + + return result; +} + +static pixman_bool_t +verify (int test_no, const pixel_combination_t *combination, int size) +{ + pixman_image_t *src, *dest; + pixel_checker_t src_checker, dest_checker; + color_t source_color, dest_color, reference_color; + pixman_bool_t result = TRUE; + int i, j; + + /* Compute reference color */ + pixel_checker_init (&src_checker, combination->src_format); + pixel_checker_init (&dest_checker, combination->dest_format); + pixel_checker_convert_pixel_to_color ( + &src_checker, combination->src_pixel, &source_color); + pixel_checker_convert_pixel_to_color ( + &dest_checker, combination->dest_pixel, &dest_color); + do_composite (combination->op, + &source_color, NULL, &dest_color, + &reference_color, FALSE); + + src = pixman_image_create_bits ( + combination->src_format, size, size, NULL, -1); + dest = pixman_image_create_bits ( + combination->dest_format, size, size, NULL, -1); + + fill (src, combination->src_pixel); + fill (dest, combination->dest_pixel); + + pixman_image_composite32 ( + combination->op, src, NULL, dest, 0, 0, 0, 0, 0, 0, size, size); + + for (j = 0; j < size; ++j) + { + for (i = 0; i < size; ++i) + { + uint32_t computed = access (dest, i, j); + int32_t a, r, g, b; + + if (!pixel_checker_check (&dest_checker, computed, &reference_color)) + { + printf ("----------- Test %d failed ----------\n", test_no); + + printf (" operator: %s\n", operator_name (combination->op)); + printf (" src format: %s\n", format_name (combination->src_format)); + printf (" dest format: %s\n", format_name (combination->dest_format)); + printf (" - source ARGB: %f %f %f %f (pixel: %8x)\n", + source_color.a, source_color.r, source_color.g, source_color.b, + combination->src_pixel); + pixel_checker_split_pixel (&src_checker, combination->src_pixel, + &a, &r, &g, &b); + printf (" %8d %8d %8d %8d\n", a, r, g, b); + + printf (" - dest ARGB: %f %f %f %f (pixel: %8x)\n", + dest_color.a, dest_color.r, dest_color.g, dest_color.b, + combination->dest_pixel); + pixel_checker_split_pixel (&dest_checker, combination->dest_pixel, + &a, &r, &g, &b); + printf (" %8d %8d %8d %8d\n", a, r, g, b); + + pixel_checker_split_pixel (&dest_checker, computed, &a, &r, &g, &b); + printf (" - expected ARGB: %f %f %f %f\n", + reference_color.a, reference_color.r, reference_color.g, reference_color.b); + + pixel_checker_get_min (&dest_checker, &reference_color, &a, &r, &g, &b); + printf (" min acceptable: %8d %8d %8d %8d\n", a, r, g, b); + + pixel_checker_split_pixel (&dest_checker, computed, &a, &r, &g, &b); + printf (" got: %8d %8d %8d %8d (pixel: %8x)\n", a, r, g, b, computed); + + pixel_checker_get_max (&dest_checker, &reference_color, &a, &r, &g, &b); + printf (" max acceptable: %8d %8d %8d %8d\n", a, r, g, b); + + result = FALSE; + goto done; + } + } + } + +done: + pixman_image_unref (src); + pixman_image_unref (dest); + + return result; +} + +int +main (int argc, char **argv) +{ + int result = 0; + int i, j; + + for (i = 0; i < ARRAY_LENGTH (regressions); ++i) + { + const pixel_combination_t *combination = &(regressions[i]); + + for (j = 1; j < 34; ++j) + { + if (!verify (i, combination, j)) + { + result = 1; + break; + } + } + } + + return result; +} diff --git a/lib/pixman/test/prng-test.c b/lib/pixman/test/prng-test.c new file mode 100644 index 000000000..c1d9320cc --- /dev/null +++ b/lib/pixman/test/prng-test.c @@ -0,0 +1,175 @@ +/* + * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com> + * + * Based on the public domain implementation of small noncryptographic PRNG + * authored by Bob Jenkins: http://burtleburtle.net/bob/rand/smallprng.html + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdlib.h> +#include "utils-prng.h" +#include "utils.h" + +/* The original code from http://www.burtleburtle.net/bob/rand/smallprng.html */ + +typedef uint32_t u4; +typedef struct ranctx { u4 a; u4 b; u4 c; u4 d; } ranctx; + +#define rot(x,k) (((x)<<(k))|((x)>>(32-(k)))) +u4 ranval( ranctx *x ) { + u4 e = x->a - rot(x->b, 27); + x->a = x->b ^ rot(x->c, 17); + x->b = x->c + x->d; + x->c = x->d + e; + x->d = e + x->a; + return x->d; +} + +void raninit( ranctx *x, u4 seed ) { + u4 i; + x->a = 0xf1ea5eed, x->b = x->c = x->d = seed; + for (i=0; i<20; ++i) { + (void)ranval(x); + } +} + +/*****************************************************************************/ + +#define BUFSIZE (8 * 1024 * 1024) +#define N 50 + +void bench (void) +{ + double t1, t2; + int i; + prng_t prng; + uint8_t *buf = aligned_malloc (16, BUFSIZE + 1); + + prng_srand_r (&prng, 1234); + t1 = gettime(); + for (i = 0; i < N; i++) + prng_randmemset_r (&prng, buf, BUFSIZE, 0); + t2 = gettime(); + printf ("aligned randmemset : %.2f MB/s\n", + (double)BUFSIZE * N / 1000000. / (t2 - t1)); + + t1 = gettime(); + for (i = 0; i < N; i++) + prng_randmemset_r (&prng, buf + 1, BUFSIZE, 0); + t2 = gettime(); + printf ("unaligned randmemset : %.2f MB/s\n", + (double)BUFSIZE * N / 1000000. / (t2 - t1)); + + t1 = gettime(); + for (i = 0; i < N; i++) + { + prng_randmemset_r (&prng, buf, BUFSIZE, RANDMEMSET_MORE_00_AND_FF); + } + t2 = gettime (); + printf ("aligned randmemset (more 00 and FF) : %.2f MB/s\n", + (double)BUFSIZE * N / 1000000. / (t2 - t1)); + + t1 = gettime(); + for (i = 0; i < N; i++) + { + prng_randmemset_r (&prng, buf + 1, BUFSIZE, RANDMEMSET_MORE_00_AND_FF); + } + t2 = gettime (); + printf ("unaligned randmemset (more 00 and FF) : %.2f MB/s\n", + (double)BUFSIZE * N / 1000000. / (t2 - t1)); + + free (buf); +} + +#define SMALLBUFSIZE 100 + +int main (int argc, char *argv[]) +{ + const uint32_t ref_crc[RANDMEMSET_MORE_00_AND_FF + 1] = + { + 0xBA06763D, 0x103FC550, 0x8B59ABA5, 0xD82A0F39, + 0xD2321099, 0xFD8C5420, 0xD3B7C42A, 0xFC098093, + 0x85E01DE0, 0x6680F8F7, 0x4D32DD3C, 0xAE52382B, + 0x149E6CB5, 0x8B336987, 0x15DCB2B3, 0x8A71B781 + }; + uint32_t crc1, crc2; + uint32_t ref, seed, seed0, seed1, seed2, seed3; + prng_rand_128_data_t buf; + uint8_t *bytebuf = aligned_malloc(16, SMALLBUFSIZE + 1); + ranctx x; + prng_t prng; + prng_randmemset_flags_t flags; + + if (argc > 1 && strcmp(argv[1], "-bench") == 0) + { + bench (); + return 0; + } + + /* basic test */ + raninit (&x, 0); + prng_srand_r (&prng, 0); + assert (ranval (&x) == prng_rand_r (&prng)); + + /* test for simd code */ + seed = 0; + prng_srand_r (&prng, seed); + seed0 = (seed = seed * 1103515245 + 12345); + seed1 = (seed = seed * 1103515245 + 12345); + seed2 = (seed = seed * 1103515245 + 12345); + seed3 = (seed = seed * 1103515245 + 12345); + prng_rand_128_r (&prng, &buf); + + raninit (&x, seed0); + ref = ranval (&x); + assert (ref == buf.w[0]); + + raninit (&x, seed1); + ref = ranval (&x); + assert (ref == buf.w[1]); + + raninit (&x, seed2); + ref = ranval (&x); + assert (ref == buf.w[2]); + + raninit (&x, seed3); + ref = ranval (&x); + assert (ref == buf.w[3]); + + /* test for randmemset */ + for (flags = 0; flags <= RANDMEMSET_MORE_00_AND_FF; flags++) + { + prng_srand_r (&prng, 1234); + prng_randmemset_r (&prng, bytebuf, 16, flags); + prng_randmemset_r (&prng, bytebuf + 16, SMALLBUFSIZE - 17, flags); + crc1 = compute_crc32 (0, bytebuf, SMALLBUFSIZE - 1); + prng_srand_r (&prng, 1234); + prng_randmemset_r (&prng, bytebuf + 1, SMALLBUFSIZE - 1, flags); + crc2 = compute_crc32 (0, bytebuf + 1, SMALLBUFSIZE - 1); + assert (ref_crc[flags] == crc1); + assert (ref_crc[flags] == crc2); + } + + free (bytebuf); + + return 0; +} diff --git a/lib/pixman/test/radial-perf-test.c b/lib/pixman/test/radial-perf-test.c new file mode 100644 index 000000000..71092e27b --- /dev/null +++ b/lib/pixman/test/radial-perf-test.c @@ -0,0 +1,58 @@ +#include "utils.h" +#include <stdio.h> + +int +main () +{ + static const pixman_point_fixed_t inner = { 0x0000, 0x0000 }; + static const pixman_point_fixed_t outer = { 0x0000, 0x0000 }; + static const pixman_fixed_t r_inner = 0; + static const pixman_fixed_t r_outer = 64 << 16; + static const pixman_gradient_stop_t stops[] = { + { 0x00000, { 0x6666, 0x6666, 0x6666, 0xffff } }, + { 0x10000, { 0x0000, 0x0000, 0x0000, 0xffff } } + }; + static const pixman_transform_t transform = { + { { 0x0, 0x26ee, 0x0}, + { 0xffffeeef, 0x0, 0x0}, + { 0x0, 0x0, 0x10000} + } + }; + static const pixman_color_t z = { 0x0000, 0x0000, 0x0000, 0x0000 }; + pixman_image_t *dest, *radial, *zero; + int i; + double before, after; + + dest = pixman_image_create_bits ( + PIXMAN_x8r8g8b8, 640, 429, NULL, -1); + zero = pixman_image_create_solid_fill (&z); + radial = pixman_image_create_radial_gradient ( + &inner, &outer, r_inner, r_outer, stops, ARRAY_LENGTH (stops)); + pixman_image_set_transform (radial, &transform); + pixman_image_set_repeat (radial, PIXMAN_REPEAT_PAD); + +#define N_COMPOSITE 500 + + before = gettime(); + for (i = 0; i < N_COMPOSITE; ++i) + { + before -= gettime(); + + pixman_image_composite ( + PIXMAN_OP_SRC, zero, NULL, dest, + 0, 0, 0, 0, 0, 0, 640, 429); + + before += gettime(); + + pixman_image_composite32 ( + PIXMAN_OP_OVER, radial, NULL, dest, + - 150, -158, 0, 0, 0, 0, 640, 361); + } + + after = gettime(); + + write_png (dest, "radial.png"); + + printf ("Average time to composite: %f\n", (after - before) / N_COMPOSITE); + return 0; +} diff --git a/lib/pixman/test/region-contains-test.c b/lib/pixman/test/region-contains-test.c index 9524e2888..096e65179 100644 --- a/lib/pixman/test/region-contains-test.c +++ b/lib/pixman/test/region-contains-test.c @@ -9,16 +9,16 @@ make_random_region (pixman_region32_t *region) pixman_region32_init (region); - n_boxes = lcg_rand_n (64); + n_boxes = prng_rand_n (64); while (n_boxes--) { int32_t x, y; uint32_t w, h; - x = (int32_t)lcg_rand_u32() >> 2; - y = (int32_t)lcg_rand_u32() >> 2; - w = lcg_rand_u32() >> 2; - h = lcg_rand_u32() >> 2; + x = (int32_t)prng_rand() >> 2; + y = (int32_t)prng_rand() >> 2; + w = prng_rand() >> 2; + h = prng_rand() >> 2; pixman_region32_union_rect (region, region, x, y, w, h); } @@ -37,12 +37,12 @@ random_coord (pixman_region32_t *region, pixman_bool_t x) int n_boxes; int begin, end; - if (lcg_rand_n (14)) + if (prng_rand_n (14)) { bb = pixman_region32_rectangles (region, &n_boxes); if (n_boxes == 0) goto use_extent; - b = bb + lcg_rand_n (n_boxes); + b = bb + prng_rand_n (n_boxes); } else { @@ -62,12 +62,12 @@ random_coord (pixman_region32_t *region, pixman_bool_t x) end = b->y2; } - switch (lcg_rand_n (5)) + switch (prng_rand_n (5)) { case 0: - return begin - lcg_rand_u32(); + return begin - prng_rand(); case 1: - return end + lcg_rand_u32 (); + return end + prng_rand (); case 2: return end; case 3: @@ -111,14 +111,14 @@ test_region_contains_rectangle (int i, int verbose) pixman_region32_t region; uint32_t r, r1, r2, r3, r4, crc32; - lcg_srand (i); + prng_srand (i); make_random_region (®ion); box.x1 = random_coord (®ion, TRUE); - box.x2 = box.x1 + lcg_rand_u32 (); + box.x2 = box.x1 + prng_rand (); box.y1 = random_coord (®ion, FALSE); - box.y2 = box.y1 + lcg_rand_u32 (); + box.y2 = box.y1 + prng_rand (); if (verbose) { @@ -163,7 +163,7 @@ main (int argc, const char *argv[]) { return fuzzer_test_main ("region_contains", 1000000, - 0xD2BF8C73, + 0x548E0F3F, test_region_contains_rectangle, argc, argv); } diff --git a/lib/pixman/test/region-test.c b/lib/pixman/test/region-test.c index 9d5a41eb9..bfc219bc7 100644 --- a/lib/pixman/test/region-test.c +++ b/lib/pixman/test/region-test.c @@ -32,6 +32,8 @@ main () 0xffff }; + prng_srand (0); + /* This used to go into an infinite loop before pixman-region.c * was fixed to not use explict "short" variables */ @@ -91,10 +93,10 @@ main () /* Add some random rectangles */ for (j = 0; j < 64; j++) pixman_region32_union_rect (&r1, &r1, - lcg_rand_n (image_size), - lcg_rand_n (image_size), - lcg_rand_n (25), - lcg_rand_n (25)); + prng_rand_n (image_size), + prng_rand_n (image_size), + prng_rand_n (25), + prng_rand_n (25)); /* Clip to image size */ pixman_region32_init_rect (&r2, 0, 0, image_size, image_size); diff --git a/lib/pixman/test/rotate-test.c b/lib/pixman/test/rotate-test.c index d63a28947..9d2a620cb 100644 --- a/lib/pixman/test/rotate-test.c +++ b/lib/pixman/test/rotate-test.c @@ -43,13 +43,13 @@ static const pixman_transform_t transforms[] = }; #define RANDOM_FORMAT() \ - (formats[lcg_rand_n (ARRAY_LENGTH (formats))]) + (formats[prng_rand_n (ARRAY_LENGTH (formats))]) #define RANDOM_OP() \ - (ops[lcg_rand_n (ARRAY_LENGTH (ops))]) + (ops[prng_rand_n (ARRAY_LENGTH (ops))]) #define RANDOM_TRANSFORM() \ - (&(transforms[lcg_rand_n (ARRAY_LENGTH (transforms))])) + (&(transforms[prng_rand_n (ARRAY_LENGTH (transforms))])) static void on_destroy (pixman_image_t *image, void *data) @@ -63,10 +63,8 @@ make_image (void) pixman_format_code_t format = RANDOM_FORMAT(); uint32_t *bytes = malloc (WIDTH * HEIGHT * 4); pixman_image_t *image; - int i; - for (i = 0; i < WIDTH * HEIGHT * 4; ++i) - ((uint8_t *)bytes)[i] = lcg_rand_n (256); + prng_randmemset (bytes, WIDTH * HEIGHT * 4, 0); image = pixman_image_create_bits ( format, WIDTH, HEIGHT, bytes, WIDTH * 4); @@ -86,7 +84,7 @@ test_transform (int testnum, int verbose) pixman_image_t *src, *dest; uint32_t crc; - lcg_srand (testnum); + prng_srand (testnum); src = make_image (); dest = make_image (); @@ -108,6 +106,6 @@ int main (int argc, const char *argv[]) { return fuzzer_test_main ("rotate", 15000, - 0x03A24D51, + 0xECF5E426, test_transform, argc, argv); } diff --git a/lib/pixman/test/scaling-helpers-test.c b/lib/pixman/test/scaling-helpers-test.c index 33ec47c85..cd5ace0b2 100644 --- a/lib/pixman/test/scaling-helpers-test.c +++ b/lib/pixman/test/scaling-helpers-test.c @@ -52,14 +52,15 @@ int main (void) { int i; + prng_srand (0); for (i = 0; i < 10000; i++) { int32_t left_pad1, left_tz1, width1, right_tz1, right_pad1; int32_t left_pad2, left_tz2, width2, right_tz2, right_pad2; - pixman_fixed_t vx = lcg_rand_N(10000 << 16) - (3000 << 16); - int32_t width = lcg_rand_N(10000); - int32_t source_image_width = lcg_rand_N(10000) + 1; - pixman_fixed_t unit_x = lcg_rand_N(10 << 16) + 1; + pixman_fixed_t vx = prng_rand_n(10000 << 16) - (3000 << 16); + int32_t width = prng_rand_n(10000); + int32_t source_image_width = prng_rand_n(10000) + 1; + pixman_fixed_t unit_x = prng_rand_n(10 << 16) + 1; width1 = width2 = width; bilinear_pad_repeat_get_scanline_bounds_ref (source_image_width, diff --git a/lib/pixman/test/scaling-test.c b/lib/pixman/test/scaling-test.c index 273612395..a8cb4c47b 100644 --- a/lib/pixman/test/scaling-test.c +++ b/lib/pixman/test/scaling-test.c @@ -26,7 +26,7 @@ get_format (int bpp) { if (bpp == 4) { - switch (lcg_rand_n (4)) + switch (prng_rand_n (4)) { default: case 0: @@ -80,11 +80,11 @@ test_composite (int testnum, uint32_t crc32; FLOAT_REGS_CORRUPTION_DETECTOR_START (); - lcg_srand (testnum); + prng_srand (testnum); - src_bpp = (lcg_rand_n (2) == 0) ? 2 : 4; - dst_bpp = (lcg_rand_n (2) == 0) ? 2 : 4; - switch (lcg_rand_n (3)) + src_bpp = (prng_rand_n (2) == 0) ? 2 : 4; + dst_bpp = (prng_rand_n (2) == 0) ? 2 : 4; + switch (prng_rand_n (3)) { case 0: op = PIXMAN_OP_SRC; @@ -97,24 +97,24 @@ test_composite (int testnum, break; } - src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1; - src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1; + src_width = prng_rand_n (MAX_SRC_WIDTH) + 1; + src_height = prng_rand_n (MAX_SRC_HEIGHT) + 1; - if (lcg_rand_n (2)) + if (prng_rand_n (2)) { - mask_width = lcg_rand_n (MAX_SRC_WIDTH) + 1; - mask_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1; + mask_width = prng_rand_n (MAX_SRC_WIDTH) + 1; + mask_height = prng_rand_n (MAX_SRC_HEIGHT) + 1; } else { mask_width = mask_height = 1; } - dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1; - dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1; - src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp; - mask_stride = mask_width * mask_bpp + lcg_rand_n (MAX_STRIDE) * mask_bpp; - dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp; + dst_width = prng_rand_n (MAX_DST_WIDTH) + 1; + dst_height = prng_rand_n (MAX_DST_HEIGHT) + 1; + src_stride = src_width * src_bpp + prng_rand_n (MAX_STRIDE) * src_bpp; + mask_stride = mask_width * mask_bpp + prng_rand_n (MAX_STRIDE) * mask_bpp; + dst_stride = dst_width * dst_bpp + prng_rand_n (MAX_STRIDE) * dst_bpp; if (src_stride & 3) src_stride += 2; @@ -127,27 +127,22 @@ test_composite (int testnum, if (dst_stride & 3) dst_stride += 2; - src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2); - src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2); - mask_x = -(mask_width / 4) + lcg_rand_n (mask_width * 3 / 2); - mask_y = -(mask_height / 4) + lcg_rand_n (mask_height * 3 / 2); - dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2); - dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2); - w = lcg_rand_n (dst_width * 3 / 2 - dst_x); - h = lcg_rand_n (dst_height * 3 / 2 - dst_y); + src_x = -(src_width / 4) + prng_rand_n (src_width * 3 / 2); + src_y = -(src_height / 4) + prng_rand_n (src_height * 3 / 2); + mask_x = -(mask_width / 4) + prng_rand_n (mask_width * 3 / 2); + mask_y = -(mask_height / 4) + prng_rand_n (mask_height * 3 / 2); + dst_x = -(dst_width / 4) + prng_rand_n (dst_width * 3 / 2); + dst_y = -(dst_height / 4) + prng_rand_n (dst_height * 3 / 2); + w = prng_rand_n (dst_width * 3 / 2 - dst_x); + h = prng_rand_n (dst_height * 3 / 2 - dst_y); srcbuf = (uint32_t *)malloc (src_stride * src_height); maskbuf = (uint32_t *)malloc (mask_stride * mask_height); dstbuf = (uint32_t *)malloc (dst_stride * dst_height); - for (i = 0; i < src_stride * src_height; i++) - *((uint8_t *)srcbuf + i) = lcg_rand_n (256); - - for (i = 0; i < mask_stride * mask_height; i++) - *((uint8_t *)maskbuf + i) = lcg_rand_n (256); - - for (i = 0; i < dst_stride * dst_height; i++) - *((uint8_t *)dstbuf + i) = lcg_rand_n (256); + prng_randmemset (srcbuf, src_stride * src_height, 0); + prng_randmemset (maskbuf, mask_stride * mask_height, 0); + prng_randmemset (dstbuf, dst_stride * dst_height, 0); src_fmt = get_format (src_bpp); dst_fmt = get_format (dst_bpp); @@ -164,29 +159,29 @@ test_composite (int testnum, image_endian_swap (src_img); image_endian_swap (dst_img); - if (lcg_rand_n (4) > 0) + if (prng_rand_n (4) > 0) { - scale_x = -32768 * 3 + lcg_rand_N (65536 * 5); - scale_y = -32768 * 3 + lcg_rand_N (65536 * 5); - translate_x = lcg_rand_N (65536); - translate_y = lcg_rand_N (65536); + scale_x = -32768 * 3 + prng_rand_n (65536 * 5); + scale_y = -32768 * 3 + prng_rand_n (65536 * 5); + translate_x = prng_rand_n (65536); + translate_y = prng_rand_n (65536); pixman_transform_init_scale (&transform, scale_x, scale_y); pixman_transform_translate (&transform, NULL, translate_x, translate_y); pixman_image_set_transform (src_img, &transform); } - if (lcg_rand_n (2) > 0) + if (prng_rand_n (2) > 0) { - mask_scale_x = -32768 * 3 + lcg_rand_N (65536 * 5); - mask_scale_y = -32768 * 3 + lcg_rand_N (65536 * 5); - mask_translate_x = lcg_rand_N (65536); - mask_translate_y = lcg_rand_N (65536); + mask_scale_x = -32768 * 3 + prng_rand_n (65536 * 5); + mask_scale_y = -32768 * 3 + prng_rand_n (65536 * 5); + mask_translate_x = prng_rand_n (65536); + mask_translate_y = prng_rand_n (65536); pixman_transform_init_scale (&transform, mask_scale_x, mask_scale_y); pixman_transform_translate (&transform, NULL, mask_translate_x, mask_translate_y); pixman_image_set_transform (mask_img, &transform); } - switch (lcg_rand_n (4)) + switch (prng_rand_n (4)) { case 0: mask_repeat = PIXMAN_REPEAT_NONE; @@ -209,7 +204,7 @@ test_composite (int testnum, } pixman_image_set_repeat (mask_img, mask_repeat); - switch (lcg_rand_n (4)) + switch (prng_rand_n (4)) { case 0: repeat = PIXMAN_REPEAT_NONE; @@ -232,21 +227,22 @@ test_composite (int testnum, } pixman_image_set_repeat (src_img, repeat); - if (lcg_rand_n (2)) + if (prng_rand_n (2)) pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0); else pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0); - if (lcg_rand_n (2)) + if (prng_rand_n (2)) pixman_image_set_filter (mask_img, PIXMAN_FILTER_NEAREST, NULL, 0); else pixman_image_set_filter (mask_img, PIXMAN_FILTER_BILINEAR, NULL, 0); if (verbose) { - printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt); - printf ("op=%d, scale_x=%d, scale_y=%d, repeat=%d\n", - op, scale_x, scale_y, repeat); + printf ("src_fmt=%s, dst_fmt=%s\n", + format_name (src_fmt), format_name (dst_fmt)); + printf ("op=%s, scale_x=%d, scale_y=%d, repeat=%d\n", + operator_name (op), scale_x, scale_y, repeat); printf ("translate_x=%d, translate_y=%d\n", translate_x, translate_y); printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n", @@ -256,19 +252,19 @@ test_composite (int testnum, printf ("w=%d, h=%d\n", w, h); } - if (lcg_rand_n (8) == 0) + if (prng_rand_n (8) == 0) { pixman_box16_t clip_boxes[2]; - int n = lcg_rand_n (2) + 1; + int n = prng_rand_n (2) + 1; for (i = 0; i < n; i++) { - clip_boxes[i].x1 = lcg_rand_n (src_width); - clip_boxes[i].y1 = lcg_rand_n (src_height); + clip_boxes[i].x1 = prng_rand_n (src_width); + clip_boxes[i].y1 = prng_rand_n (src_height); clip_boxes[i].x2 = - clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1); + clip_boxes[i].x1 + prng_rand_n (src_width - clip_boxes[i].x1); clip_boxes[i].y2 = - clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1); + clip_boxes[i].y1 + prng_rand_n (src_height - clip_boxes[i].y1); if (verbose) { @@ -284,19 +280,19 @@ test_composite (int testnum, pixman_region_fini (&clip); } - if (lcg_rand_n (8) == 0) + if (prng_rand_n (8) == 0) { pixman_box16_t clip_boxes[2]; - int n = lcg_rand_n (2) + 1; + int n = prng_rand_n (2) + 1; for (i = 0; i < n; i++) { - clip_boxes[i].x1 = lcg_rand_n (mask_width); - clip_boxes[i].y1 = lcg_rand_n (mask_height); + clip_boxes[i].x1 = prng_rand_n (mask_width); + clip_boxes[i].y1 = prng_rand_n (mask_height); clip_boxes[i].x2 = - clip_boxes[i].x1 + lcg_rand_n (mask_width - clip_boxes[i].x1); + clip_boxes[i].x1 + prng_rand_n (mask_width - clip_boxes[i].x1); clip_boxes[i].y2 = - clip_boxes[i].y1 + lcg_rand_n (mask_height - clip_boxes[i].y1); + clip_boxes[i].y1 + prng_rand_n (mask_height - clip_boxes[i].y1); if (verbose) { @@ -312,18 +308,18 @@ test_composite (int testnum, pixman_region_fini (&clip); } - if (lcg_rand_n (8) == 0) + if (prng_rand_n (8) == 0) { pixman_box16_t clip_boxes[2]; - int n = lcg_rand_n (2) + 1; + int n = prng_rand_n (2) + 1; for (i = 0; i < n; i++) { - clip_boxes[i].x1 = lcg_rand_n (dst_width); - clip_boxes[i].y1 = lcg_rand_n (dst_height); + clip_boxes[i].x1 = prng_rand_n (dst_width); + clip_boxes[i].y1 = prng_rand_n (dst_height); clip_boxes[i].x2 = - clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1); + clip_boxes[i].x1 + prng_rand_n (dst_width - clip_boxes[i].x1); clip_boxes[i].y2 = - clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1); + clip_boxes[i].y1 + prng_rand_n (dst_height - clip_boxes[i].y1); if (verbose) { @@ -337,7 +333,7 @@ test_composite (int testnum, pixman_region_fini (&clip); } - if (lcg_rand_n (2) == 0) + if (prng_rand_n (2) == 0) pixman_image_composite (op, src_img, NULL, dst_img, src_x, src_y, 0, 0, dst_x, dst_y, w, h); else @@ -380,11 +376,11 @@ test_composite (int testnum, } #if BILINEAR_INTERPOLATION_BITS == 8 -#define CHECKSUM 0x8D3A7539 +#define CHECKSUM 0x9096E6B6 #elif BILINEAR_INTERPOLATION_BITS == 7 -#define CHECKSUM 0x03A23E0C +#define CHECKSUM 0xCE8EC6BA #elif BILINEAR_INTERPOLATION_BITS == 4 -#define CHECKSUM 0xE96D1A5E +#define CHECKSUM 0xAB1D39BE #else #define CHECKSUM 0x00000000 #endif diff --git a/lib/pixman/test/stress-test.c b/lib/pixman/test/stress-test.c index 059250dd4..1f03c7543 100644 --- a/lib/pixman/test/stress-test.c +++ b/lib/pixman/test/stress-test.c @@ -74,7 +74,7 @@ static pixman_filter_t filters[] = static int get_size (void) { - switch (lcg_rand_n (28)) + switch (prng_rand_n (28)) { case 0: return 1; @@ -84,10 +84,10 @@ get_size (void) default: case 2: - return lcg_rand_n (100); + return prng_rand_n (100); case 4: - return lcg_rand_n (2000) + 1000; + return prng_rand_n (2000) + 1000; case 5: return 65535; @@ -96,7 +96,7 @@ get_size (void) return 65536; case 7: - return lcg_rand_N (64000) + 63000; + return prng_rand_n (64000) + 63000; } } @@ -164,7 +164,7 @@ real_writer (void *src, uint32_t value, int size) static uint32_t fake_reader (const void *src, int size) { - uint32_t r = lcg_rand_u32 (); + uint32_t r = prng_rand (); assert (size == 1 || size == 2 || size == 4); @@ -182,16 +182,16 @@ log_rand (void) { uint32_t mask; - mask = (1 << lcg_rand_n (10)) - 1; + mask = (1 << prng_rand_n (10)) - 1; - return (lcg_rand_u32 () & mask) - (mask >> 1); + return (prng_rand () & mask) - (mask >> 1); } static int32_t rand_x (pixman_image_t *image) { if (image->type == BITS) - return lcg_rand_n (image->bits.width); + return prng_rand_n (image->bits.width); else return log_rand (); } @@ -200,13 +200,42 @@ static int32_t rand_y (pixman_image_t *image) { if (image->type == BITS) - return lcg_rand_n (image->bits.height); + return prng_rand_n (image->bits.height); else return log_rand (); } +typedef enum +{ + DONT_CARE, + PREFER_ALPHA, + REQUIRE_ALPHA +} alpha_preference_t; + +static pixman_format_code_t +random_format (alpha_preference_t alpha) +{ + pixman_format_code_t format; + int n = prng_rand_n (ARRAY_LENGTH (image_formats)); + + if (alpha >= PREFER_ALPHA && + (alpha == REQUIRE_ALPHA || prng_rand_n (4) != 0)) + { + do + { + format = image_formats[n++ % ARRAY_LENGTH (image_formats)]; + } while (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_A); + } + else + { + format = image_formats[n]; + } + + return format; +} + static pixman_image_t * -create_random_bits_image (void) +create_random_bits_image (alpha_preference_t alpha_preference) { pixman_format_code_t format; pixman_indexed_t *indexed; @@ -220,7 +249,7 @@ create_random_bits_image (void) int n_coefficients = 0; /* format */ - format = image_formats[lcg_rand_n (ARRAY_LENGTH (image_formats))]; + format = random_format (alpha_preference); indexed = NULL; if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR) @@ -246,7 +275,7 @@ create_random_bits_image (void) while ((uint64_t)width * height > 200000) { - if (lcg_rand_n(2) == 0) + if (prng_rand_n(2) == 0) height = 200000 / width; else width = 200000 / height; @@ -258,11 +287,11 @@ create_random_bits_image (void) width = 1; /* bits */ - switch (lcg_rand_n (7)) + switch (prng_rand_n (7)) { default: case 0: - stride = width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17); + stride = width * PIXMAN_FORMAT_BPP (format) + prng_rand_n (17); stride = (stride + 3) & (~3); bits = (uint32_t *)make_random_bytes (height * stride); break; @@ -273,7 +302,7 @@ create_random_bits_image (void) break; case 2: /* Zero-filled */ - stride = width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17); + stride = width * PIXMAN_FORMAT_BPP (format) + prng_rand_n (17); stride = (stride + 3) & (~3); bits = fence_malloc (height * stride); if (!bits) @@ -282,7 +311,7 @@ create_random_bits_image (void) break; case 3: /* Filled with 0xFF */ - stride = width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17); + stride = width * PIXMAN_FORMAT_BPP (format) + prng_rand_n (17); stride = (stride + 3) & (~3); bits = fence_malloc (height * stride); if (!bits) @@ -298,7 +327,7 @@ create_random_bits_image (void) break; case 5: /* bits is a real pointer, has read/write functions */ - stride = width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17); + stride = width * PIXMAN_FORMAT_BPP (format) + prng_rand_n (17); stride = (stride + 3) & (~3); bits = fence_malloc (height * stride); if (!bits) @@ -309,7 +338,7 @@ create_random_bits_image (void) break; case 6: /* bits is a real pointer, stride is negative */ - stride = (width * PIXMAN_FORMAT_BPP (format) + lcg_rand_n (17)); + stride = (width * PIXMAN_FORMAT_BPP (format) + prng_rand_n (17)); stride = (stride + 3) & (~3); bits = (uint32_t *)make_random_bytes (height * stride); if (!bits) @@ -320,11 +349,11 @@ create_random_bits_image (void) } /* Filter */ - filter = filters[lcg_rand_n (ARRAY_LENGTH (filters))]; + filter = filters[prng_rand_n (ARRAY_LENGTH (filters))]; if (filter == PIXMAN_FILTER_CONVOLUTION) { - int width = lcg_rand_n (3); - int height = lcg_rand_n (4); + int width = prng_rand_n (3); + int height = prng_rand_n (4); n_coefficients = width * height + 2; coefficients = malloc (n_coefficients * sizeof (pixman_fixed_t)); @@ -334,7 +363,7 @@ create_random_bits_image (void) int i; for (i = 0; i < width * height; ++i) - coefficients[i + 2] = lcg_rand_u32(); + coefficients[i + 2] = prng_rand(); coefficients[0] = width << 16; coefficients[1] = height << 16; @@ -380,16 +409,16 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map) /* Set properties that are generic to all images */ /* Repeat */ - repeat = repeats[lcg_rand_n (ARRAY_LENGTH (repeats))]; + repeat = repeats[prng_rand_n (ARRAY_LENGTH (repeats))]; pixman_image_set_repeat (image, repeat); /* Alpha map */ - if (allow_alpha_map && lcg_rand_n (4) == 0) + if (allow_alpha_map && prng_rand_n (4) == 0) { pixman_image_t *alpha_map; int16_t x, y; - alpha_map = create_random_bits_image (); + alpha_map = create_random_bits_image (DONT_CARE); if (alpha_map) { @@ -405,17 +434,17 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map) } /* Component alpha */ - pixman_image_set_component_alpha (image, lcg_rand_n (3) == 0); + pixman_image_set_component_alpha (image, prng_rand_n (3) == 0); /* Clip region */ - if (lcg_rand_n (8) < 2) + if (prng_rand_n (8) < 2) { pixman_region32_t region; int i, n_rects; pixman_region32_init (®ion); - switch (lcg_rand_n (12)) + switch (prng_rand_n (12)) { case 0: n_rects = 0; @@ -434,7 +463,7 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map) break; default: - n_rects = lcg_rand_n (100); + n_rects = prng_rand_n (100); break; } @@ -452,7 +481,7 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map) ®ion, ®ion, x, y, width, height); } - if (image->type == BITS && lcg_rand_n (8) != 0) + if (image->type == BITS && prng_rand_n (8) != 0) { uint32_t width, height; int x, y; @@ -463,16 +492,16 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map) */ for (i = 0; i < 5; ++i) { - x = lcg_rand_n (2 * image->bits.width) - image->bits.width; - y = lcg_rand_n (2 * image->bits.height) - image->bits.height; - width = lcg_rand_n (image->bits.width) - x + 10; - height = lcg_rand_n (image->bits.height) - y + 10; + x = prng_rand_n (2 * image->bits.width) - image->bits.width; + y = prng_rand_n (2 * image->bits.height) - image->bits.height; + width = prng_rand_n (image->bits.width) - x + 10; + height = prng_rand_n (image->bits.height) - y + 10; if (width + x < x) width = INT32_MAX - x; if (height + y < y) height = INT32_MAX - y; - + pixman_region32_union_rect ( ®ion, ®ion, x, y, width, height); } @@ -484,13 +513,13 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map) } /* Whether source clipping is enabled */ - pixman_image_set_source_clipping (image, !!lcg_rand_n (2)); + pixman_image_set_source_clipping (image, !!prng_rand_n (2)); /* Client clip */ - pixman_image_set_has_client_clip (image, !!lcg_rand_n (2)); + pixman_image_set_has_client_clip (image, !!prng_rand_n (2)); /* Transform */ - if (lcg_rand_n (5) < 2) + if (prng_rand_n (5) < 2) { pixman_transform_t xform; int i, j, k; @@ -504,39 +533,39 @@ set_general_properties (pixman_image_t *image, pixman_bool_t allow_alpha_map) for (k = 0; k < 3; ++k) { - switch (lcg_rand_n (4)) + switch (prng_rand_n (4)) { case 0: /* rotation */ - c = lcg_rand_N (2 * 65536) - 65536; - s = lcg_rand_N (2 * 65536) - 65536; + c = prng_rand_n (2 * 65536) - 65536; + s = prng_rand_n (2 * 65536) - 65536; pixman_transform_rotate (&xform, NULL, c, s); break; case 1: /* translation */ - tx = lcg_rand_u32(); - ty = lcg_rand_u32(); + tx = prng_rand(); + ty = prng_rand(); pixman_transform_translate (&xform, NULL, tx, ty); break; case 2: /* scale */ - sx = lcg_rand_u32(); - sy = lcg_rand_u32(); + sx = prng_rand(); + sy = prng_rand(); pixman_transform_scale (&xform, NULL, sx, sy); break; case 3: - if (lcg_rand_n (16) == 0) + if (prng_rand_n (16) == 0) { /* random */ for (i = 0; i < 3; ++i) for (j = 0; j < 3; ++j) - xform.matrix[i][j] = lcg_rand_u32(); + xform.matrix[i][j] = prng_rand(); break; } - else if (lcg_rand_n (16) == 0) + else if (prng_rand_n (16) == 0) { /* zero */ memset (&xform, 0, sizeof xform); @@ -554,10 +583,10 @@ random_color (void) { pixman_color_t color = { - lcg_rand() & 0xffff, - lcg_rand() & 0xffff, - lcg_rand() & 0xffff, - lcg_rand() & 0xffff, + prng_rand() & 0xffff, + prng_rand() & 0xffff, + prng_rand() & 0xffff, + prng_rand() & 0xffff, }; return color; @@ -581,7 +610,7 @@ create_random_stops (int *n_stops) int i; pixman_gradient_stop_t *stops; - *n_stops = lcg_rand_n (50) + 1; + *n_stops = prng_rand_n (50) + 1; step = pixman_fixed_1 / *n_stops; @@ -646,8 +675,8 @@ create_random_radial_image (void) inner_c = create_random_point(); outer_c = create_random_point(); - inner_r = lcg_rand(); - outer_r = lcg_rand(); + inner_r = prng_rand(); + outer_r = prng_rand(); stops = create_random_stops (&n_stops); @@ -672,7 +701,7 @@ create_random_conical_image (void) pixman_image_t *result; c = create_random_point(); - angle = lcg_rand(); + angle = prng_rand(); stops = create_random_stops (&n_stops); @@ -691,11 +720,11 @@ create_random_image (void) { pixman_image_t *result; - switch (lcg_rand_n (5)) + switch (prng_rand_n (5)) { default: case 0: - result = create_random_bits_image (); + result = create_random_bits_image (DONT_CARE); break; case 1: @@ -721,6 +750,39 @@ create_random_image (void) return result; } +static void +random_line (pixman_line_fixed_t *line, int width, int height) +{ + line->p1.x = prng_rand_n (width) << 16; + line->p1.y = prng_rand_n (height) << 16; + line->p2.x = prng_rand_n (width) << 16; + line->p2.y = prng_rand_n (height) << 16; +} + +static pixman_trapezoid_t * +create_random_trapezoids (int *n_traps, int height, int width) +{ + pixman_trapezoid_t *trapezoids; + int i; + + *n_traps = prng_rand_n (16) + 1; + + trapezoids = malloc (sizeof (pixman_trapezoid_t) * *n_traps); + + for (i = 0; i < *n_traps; ++i) + { + pixman_trapezoid_t *t = &(trapezoids[i]); + + t->top = prng_rand_n (height) << 16; + t->bottom = prng_rand_n (height) << 16; + + random_line (&t->left, height, width); + random_line (&t->right, height, width); + } + + return trapezoids; +} + static const pixman_op_t op_list[] = { PIXMAN_OP_SRC, @@ -792,27 +854,88 @@ run_test (uint32_t seed, pixman_bool_t verbose, uint32_t mod) if (mod == 0 || (seed % mod) == 0) printf ("Seed 0x%08x\n", seed); } - - lcg_srand (seed); - source = create_random_image (); - mask = create_random_image (); - dest = create_random_bits_image (); + source = mask = dest = NULL; - if (source && mask && dest) + prng_srand (seed); + + if (prng_rand_n (8) == 0) { + int n_traps; + pixman_trapezoid_t *trapezoids; + int p = prng_rand_n (3); + + if (p == 0) + dest = create_random_bits_image (DONT_CARE); + else + dest = create_random_bits_image (REQUIRE_ALPHA); + + if (!dest) + goto out; + set_general_properties (dest, TRUE); - op = op_list [lcg_rand_n (ARRAY_LENGTH (op_list))]; + if (!(trapezoids = create_random_trapezoids ( + &n_traps, dest->bits.width, dest->bits.height))) + { + goto out; + } + + switch (p) + { + case 0: + source = create_random_image (); + + if (source) + { + op = op_list [prng_rand_n (ARRAY_LENGTH (op_list))]; + + pixman_composite_trapezoids ( + op, source, dest, + random_format (REQUIRE_ALPHA), + rand_x (source), rand_y (source), + rand_x (dest), rand_y (dest), + n_traps, trapezoids); + } + break; + + case 1: + pixman_rasterize_trapezoid ( + dest, &trapezoids[prng_rand_n (n_traps)], + rand_x (dest), rand_y (dest)); + break; - pixman_image_composite32 (op, - source, mask, dest, - rand_x (source), rand_y (source), - rand_x (mask), rand_y (mask), - 0, 0, - dest->bits.width, - dest->bits.height); + case 2: + pixman_add_trapezoids ( + dest, rand_x (dest), rand_y (dest), n_traps, trapezoids); + break; + } + + free (trapezoids); + } + else + { + dest = create_random_bits_image (DONT_CARE); + source = create_random_image (); + mask = create_random_image (); + + if (source && mask && dest) + { + set_general_properties (dest, TRUE); + + op = op_list [prng_rand_n (ARRAY_LENGTH (op_list))]; + + pixman_image_composite32 (op, + source, mask, dest, + rand_x (source), rand_y (source), + rand_x (mask), rand_y (mask), + 0, 0, + dest->bits.width, + dest->bits.height); + } } + +out: if (source) pixman_image_unref (source); if (mask) diff --git a/lib/pixman/test/utils-prng.c b/lib/pixman/test/utils-prng.c new file mode 100644 index 000000000..7b32e3531 --- /dev/null +++ b/lib/pixman/test/utils-prng.c @@ -0,0 +1,298 @@ +/* + * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com> + * + * Based on the public domain implementation of small noncryptographic PRNG + * authored by Bob Jenkins: http://burtleburtle.net/bob/rand/smallprng.html + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "utils.h" +#include "utils-prng.h" + +#if defined(GCC_VECTOR_EXTENSIONS_SUPPORTED) && defined(__SSE2__) +#include <xmmintrin.h> +#endif + +void smallprng_srand_r (smallprng_t *x, uint32_t seed) +{ + uint32_t i; + x->a = 0xf1ea5eed, x->b = x->c = x->d = seed; + for (i = 0; i < 20; ++i) + smallprng_rand_r (x); +} + +/* + * Set a 32-bit seed for PRNG + * + * LCG is used here for generating independent seeds for different + * smallprng instances (in the case if smallprng is also used for + * generating these seeds, "Big Crush" test from TestU01 detects + * some problems in the glued 'prng_rand_128_r' output data). + * Actually we might be even better using some cryptographic + * hash for this purpose, but LCG seems to be also enough for + * passing "Big Crush". + */ +void prng_srand_r (prng_t *x, uint32_t seed) +{ +#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED + int i; + prng_rand_128_data_t dummy; + smallprng_srand_r (&x->p0, seed); + x->a[0] = x->a[1] = x->a[2] = x->a[3] = 0xf1ea5eed; + x->b[0] = x->c[0] = x->d[0] = (seed = seed * 1103515245 + 12345); + x->b[1] = x->c[1] = x->d[1] = (seed = seed * 1103515245 + 12345); + x->b[2] = x->c[2] = x->d[2] = (seed = seed * 1103515245 + 12345); + x->b[3] = x->c[3] = x->d[3] = (seed = seed * 1103515245 + 12345); + for (i = 0; i < 20; ++i) + prng_rand_128_r (x, &dummy); +#else + smallprng_srand_r (&x->p0, seed); + smallprng_srand_r (&x->p1, (seed = seed * 1103515245 + 12345)); + smallprng_srand_r (&x->p2, (seed = seed * 1103515245 + 12345)); + smallprng_srand_r (&x->p3, (seed = seed * 1103515245 + 12345)); + smallprng_srand_r (&x->p4, (seed = seed * 1103515245 + 12345)); +#endif +} + +static force_inline void +store_rand_128_data (void *addr, prng_rand_128_data_t *d, int aligned) +{ +#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED + if (aligned) + { + *(uint8x16 *)addr = d->vb; + return; + } + else + { +#ifdef __SSE2__ + /* workaround for http://gcc.gnu.org/PR55614 */ + _mm_storeu_si128 (addr, _mm_loadu_si128 ((__m128i *)d)); + return; +#endif + } +#endif + /* we could try something better for unaligned writes (packed attribute), + * but GCC is not very reliable: http://gcc.gnu.org/PR55454 */ + memcpy (addr, d, 16); +} + +/* + * Helper function and the actual code for "prng_randmemset_r" function + */ +static force_inline void +randmemset_internal (prng_t *prng, + uint8_t *buf, + size_t size, + prng_randmemset_flags_t flags, + int aligned) +{ + prng_t local_prng = *prng; + prng_rand_128_data_t randdata; + size_t i; + + while (size >= 16) + { + prng_rand_128_data_t t; + if (flags == 0) + { + prng_rand_128_r (&local_prng, &randdata); + } + else + { + prng_rand_128_r (&local_prng, &t); + prng_rand_128_r (&local_prng, &randdata); +#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED + if (flags & RANDMEMSET_MORE_FF) + { + const uint8x16 const_C0 = + { + 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, + 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0 + }; + randdata.vb |= (t.vb >= const_C0); + } + if (flags & RANDMEMSET_MORE_00) + { + const uint8x16 const_40 = + { + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 + }; + randdata.vb &= (t.vb >= const_40); + } + if (flags & RANDMEMSET_MORE_FFFFFFFF) + { + const uint32x4 const_C0000000 = + { + 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000 + }; + randdata.vw |= ((t.vw << 30) >= const_C0000000); + } + if (flags & RANDMEMSET_MORE_00000000) + { + const uint32x4 const_40000000 = + { + 0x40000000, 0x40000000, 0x40000000, 0x40000000 + }; + randdata.vw &= ((t.vw << 30) >= const_40000000); + } +#else + #define PROCESS_ONE_LANE(i) \ + if (flags & RANDMEMSET_MORE_FF) \ + { \ + uint32_t mask_ff = (t.w[i] & (t.w[i] << 1)) & 0x80808080; \ + mask_ff |= mask_ff >> 1; \ + mask_ff |= mask_ff >> 2; \ + mask_ff |= mask_ff >> 4; \ + randdata.w[i] |= mask_ff; \ + } \ + if (flags & RANDMEMSET_MORE_00) \ + { \ + uint32_t mask_00 = (t.w[i] | (t.w[i] << 1)) & 0x80808080; \ + mask_00 |= mask_00 >> 1; \ + mask_00 |= mask_00 >> 2; \ + mask_00 |= mask_00 >> 4; \ + randdata.w[i] &= mask_00; \ + } \ + if (flags & RANDMEMSET_MORE_FFFFFFFF) \ + { \ + int32_t mask_ff = ((t.w[i] << 30) & (t.w[i] << 31)) & \ + 0x80000000; \ + randdata.w[i] |= mask_ff >> 31; \ + } \ + if (flags & RANDMEMSET_MORE_00000000) \ + { \ + int32_t mask_00 = ((t.w[i] << 30) | (t.w[i] << 31)) & \ + 0x80000000; \ + randdata.w[i] &= mask_00 >> 31; \ + } + + PROCESS_ONE_LANE (0) + PROCESS_ONE_LANE (1) + PROCESS_ONE_LANE (2) + PROCESS_ONE_LANE (3) +#endif + } + if (is_little_endian ()) + { + store_rand_128_data (buf, &randdata, aligned); + buf += 16; + } + else + { +#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED + const uint8x16 bswap_shufflemask = + { + 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 + }; + randdata.vb = __builtin_shuffle (randdata.vb, bswap_shufflemask); + store_rand_128_data (buf, &randdata, aligned); + buf += 16; +#else + uint8_t t1, t2, t3, t4; + #define STORE_ONE_LANE(i) \ + t1 = randdata.b[i * 4 + 3]; \ + t2 = randdata.b[i * 4 + 2]; \ + t3 = randdata.b[i * 4 + 1]; \ + t4 = randdata.b[i * 4 + 0]; \ + *buf++ = t1; \ + *buf++ = t2; \ + *buf++ = t3; \ + *buf++ = t4; + + STORE_ONE_LANE (0) + STORE_ONE_LANE (1) + STORE_ONE_LANE (2) + STORE_ONE_LANE (3) +#endif + } + size -= 16; + } + i = 0; + while (i < size) + { + uint8_t randbyte = prng_rand_r (&local_prng) & 0xFF; + if (flags != 0) + { + uint8_t t = prng_rand_r (&local_prng) & 0xFF; + if ((flags & RANDMEMSET_MORE_FF) && (t >= 0xC0)) + randbyte = 0xFF; + if ((flags & RANDMEMSET_MORE_00) && (t < 0x40)) + randbyte = 0x00; + if (i % 4 == 0 && i + 4 <= size) + { + t = prng_rand_r (&local_prng) & 0xFF; + if ((flags & RANDMEMSET_MORE_FFFFFFFF) && (t >= 0xC0)) + { + memset(&buf[i], 0xFF, 4); + i += 4; + continue; + } + if ((flags & RANDMEMSET_MORE_00000000) && (t < 0x40)) + { + memset(&buf[i], 0x00, 4); + i += 4; + continue; + } + } + } + buf[i] = randbyte; + i++; + } + *prng = local_prng; +} + +/* + * Fill memory buffer with random data. Flags argument may be used + * to tweak some statistics properties: + * RANDMEMSET_MORE_00 - set ~25% of bytes to 0x00 + * RANDMEMSET_MORE_FF - set ~25% of bytes to 0xFF + * RANDMEMSET_MORE_00000000 - ~25% chance for 00000000 4-byte clusters + * RANDMEMSET_MORE_FFFFFFFF - ~25% chance for FFFFFFFF 4-byte clusters + */ +void prng_randmemset_r (prng_t *prng, + void *voidbuf, + size_t size, + prng_randmemset_flags_t flags) +{ + uint8_t *buf = (uint8_t *)voidbuf; + if ((uintptr_t)buf & 15) + { + /* unaligned buffer */ + if (flags == 0) + randmemset_internal (prng, buf, size, 0, 0); + else if (flags == RANDMEMSET_MORE_00_AND_FF) + randmemset_internal (prng, buf, size, RANDMEMSET_MORE_00_AND_FF, 0); + else + randmemset_internal (prng, buf, size, flags, 0); + } + else + { + /* aligned buffer */ + if (flags == 0) + randmemset_internal (prng, buf, size, 0, 1); + else if (flags == RANDMEMSET_MORE_00_AND_FF) + randmemset_internal (prng, buf, size, RANDMEMSET_MORE_00_AND_FF, 1); + else + randmemset_internal (prng, buf, size, flags, 1); + } +} diff --git a/lib/pixman/test/utils-prng.h b/lib/pixman/test/utils-prng.h new file mode 100644 index 000000000..564ffcef1 --- /dev/null +++ b/lib/pixman/test/utils-prng.h @@ -0,0 +1,171 @@ +/* + * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com> + * + * Based on the public domain implementation of small noncryptographic PRNG + * authored by Bob Jenkins: http://burtleburtle.net/bob/rand/smallprng.html + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __UTILS_PRNG_H__ +#define __UTILS_PRNG_H__ + +/* + * This file provides a fast SIMD-optimized noncryptographic PRNG (pseudorandom + * number generator), with the output good enough to pass "Big Crush" tests + * from TestU01 (http://en.wikipedia.org/wiki/TestU01). + * + * SIMD code uses http://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html + * which is a GCC specific extension. There is also a slower alternative + * code path, which should work with any C compiler. + * + * The "prng_t" structure keeps the internal state of the random number + * generator. It is possible to have multiple instances of the random number + * generator active at the same time, in this case each of them needs to have + * its own "prng_t". All the functions take a pointer to "prng_t" + * as the first argument. + * + * Functions: + * + * ---------------------------------------------------------------------------- + * void prng_srand_r (prng_t *prng, uint32_t seed); + * + * Initialize the pseudorandom number generator. The sequence of preudorandom + * numbers is deterministic and only depends on "seed". Any two generators + * initialized with the same seed will produce exactly the same sequence. + * + * ---------------------------------------------------------------------------- + * uint32_t prng_rand_r (prng_t *prng); + * + * Generate a single uniformly distributed 32-bit pseudorandom value. + * + * ---------------------------------------------------------------------------- + * void prng_randmemset_r (prng_t *prng, + * void *buffer, + * size_t size, + * prng_randmemset_flags_t flags); + * + * Fills the memory buffer "buffer" with "size" bytes of pseudorandom data. + * The "flags" argument may be used to tweak some statistics properties: + * RANDMEMSET_MORE_00 - set ~25% of bytes to 0x00 + * RANDMEMSET_MORE_FF - set ~25% of bytes to 0xFF + * The flags can be combined. This allows a bit better simulation of typical + * pixel data, which normally contains a lot of fully transparent or fully + * opaque pixels. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "pixman-private.h" + +/*****************************************************************************/ + +#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) +#define GCC_VECTOR_EXTENSIONS_SUPPORTED +typedef uint32_t uint32x4 __attribute__ ((vector_size(16))); +typedef uint8_t uint8x16 __attribute__ ((vector_size(16))); +#endif + +typedef struct +{ + uint32_t a, b, c, d; +} smallprng_t; + +typedef struct +{ +#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED + uint32x4 a, b, c, d; +#else + smallprng_t p1, p2, p3, p4; +#endif + smallprng_t p0; +} prng_t; + +typedef union +{ + uint8_t b[16]; + uint32_t w[4]; +#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED + uint8x16 vb; + uint32x4 vw; +#endif +} prng_rand_128_data_t; + +/*****************************************************************************/ + +static force_inline uint32_t +smallprng_rand_r (smallprng_t *x) +{ + uint32_t e = x->a - ((x->b << 27) + (x->b >> (32 - 27))); + x->a = x->b ^ ((x->c << 17) ^ (x->c >> (32 - 17))); + x->b = x->c + x->d; + x->c = x->d + e; + x->d = e + x->a; + return x->d; +} + +/* Generate 4 bytes (32-bits) of random data */ +static force_inline uint32_t +prng_rand_r (prng_t *x) +{ + return smallprng_rand_r (&x->p0); +} + +/* Generate 16 bytes (128-bits) of random data */ +static force_inline void +prng_rand_128_r (prng_t *x, prng_rand_128_data_t *data) +{ +#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED + uint32x4 e = x->a - ((x->b << 27) + (x->b >> (32 - 27))); + x->a = x->b ^ ((x->c << 17) ^ (x->c >> (32 - 17))); + x->b = x->c + x->d; + x->c = x->d + e; + x->d = e + x->a; + data->vw = x->d; +#else + data->w[0] = smallprng_rand_r (&x->p1); + data->w[1] = smallprng_rand_r (&x->p2); + data->w[2] = smallprng_rand_r (&x->p3); + data->w[3] = smallprng_rand_r (&x->p4); +#endif +} + +typedef enum +{ + RANDMEMSET_MORE_00 = 1, /* ~25% chance for 0x00 bytes */ + RANDMEMSET_MORE_FF = 2, /* ~25% chance for 0xFF bytes */ + RANDMEMSET_MORE_00000000 = 4, /* ~25% chance for 0x00000000 clusters */ + RANDMEMSET_MORE_FFFFFFFF = 8, /* ~25% chance for 0xFFFFFFFF clusters */ + RANDMEMSET_MORE_00_AND_FF = (RANDMEMSET_MORE_00 | RANDMEMSET_MORE_00000000 | + RANDMEMSET_MORE_FF | RANDMEMSET_MORE_FFFFFFFF) +} prng_randmemset_flags_t; + +/* Set the 32-bit seed for PRNG */ +void prng_srand_r (prng_t *prng, uint32_t seed); + +/* Fill memory buffer with random data */ +void prng_randmemset_r (prng_t *prng, + void *buffer, + size_t size, + prng_randmemset_flags_t flags); + +#endif diff --git a/lib/pixman/test/utils.c b/lib/pixman/test/utils.c index 716bb7594..3d1ba22ae 100644 --- a/lib/pixman/test/utils.c +++ b/lib/pixman/test/utils.c @@ -27,10 +27,11 @@ #include <png.h> #endif -/* Random number seed +/* Random number generator state */ -uint32_t lcg_seed; +prng_t prng_state_data; +prng_t *prng_state; /*----------------------------------------------------------------------------*\ * CRC-32 version 2.0.0 by Craig Bruce, 2006-04-29. @@ -237,14 +238,6 @@ compute_crc32_for_image (uint32_t crc32, return crc32; } -pixman_bool_t -is_little_endian (void) -{ - volatile uint16_t endian_check_var = 0x1234; - - return (*(volatile uint8_t *)&endian_check_var == 0x34); -} - /* perform endian conversion of pixel data */ void @@ -377,7 +370,7 @@ fence_malloc (int64_t len) return NULL; } - initial_page = (uint8_t *)(((unsigned long)addr + page_mask) & ~page_mask); + initial_page = (uint8_t *)(((uintptr_t)addr + page_mask) & ~page_mask); leading_protected = initial_page + page_size; payload = leading_protected + N_LEADING_PROTECTED * page_size; trailing_protected = payload + n_payload_bytes; @@ -431,13 +424,11 @@ uint8_t * make_random_bytes (int n_bytes) { uint8_t *bytes = fence_malloc (n_bytes); - int i; if (!bytes) return NULL; - for (i = 0; i < n_bytes; ++i) - bytes[i] = lcg_rand () & 0xff; + prng_randmemset (bytes, n_bytes, 0); return bytes; } @@ -557,6 +548,60 @@ write_png (pixman_image_t *image, const char *filename) #endif +static void +color8_to_color16 (uint32_t color8, pixman_color_t *color16) +{ + color16->alpha = ((color8 & 0xff000000) >> 24); + color16->red = ((color8 & 0x00ff0000) >> 16); + color16->green = ((color8 & 0x0000ff00) >> 8); + color16->blue = ((color8 & 0x000000ff) >> 0); + + color16->alpha |= color16->alpha << 8; + color16->red |= color16->red << 8; + color16->blue |= color16->blue << 8; + color16->green |= color16->green << 8; +} + +void +draw_checkerboard (pixman_image_t *image, + int check_size, + uint32_t color1, uint32_t color2) +{ + pixman_color_t check1, check2; + pixman_image_t *c1, *c2; + int n_checks_x, n_checks_y; + int i, j; + + color8_to_color16 (color1, &check1); + color8_to_color16 (color2, &check2); + + c1 = pixman_image_create_solid_fill (&check1); + c2 = pixman_image_create_solid_fill (&check2); + + n_checks_x = ( + pixman_image_get_width (image) + check_size - 1) / check_size; + n_checks_y = ( + pixman_image_get_height (image) + check_size - 1) / check_size; + + for (j = 0; j < n_checks_y; j++) + { + for (i = 0; i < n_checks_x; i++) + { + pixman_image_t *src; + + if (((i ^ j) & 1)) + src = c1; + else + src = c2; + + pixman_image_composite32 (PIXMAN_OP_SRC, src, NULL, image, + 0, 0, 0, 0, + i * check_size, j * check_size, + check_size, check_size); + } + } +} + /* * A function, which can be used as a core part of the test programs, * intended to detect various problems with the help of fuzzing input @@ -689,11 +734,13 @@ get_random_seed (void) { union { double d; uint32_t u32; } t; t.d = gettime(); - lcg_srand (t.u32); + prng_srand (t.u32); - return lcg_rand_u32 (); + return prng_rand (); } +#ifdef HAVE_SIGACTION +#ifdef HAVE_ALARM static const char *global_msg; static void @@ -702,6 +749,8 @@ on_alarm (int signo) printf ("%s\n", global_msg); exit (1); } +#endif +#endif void fail_after (int seconds, const char *msg) @@ -781,7 +830,7 @@ initialize_palette (pixman_indexed_t *palette, uint32_t depth, int is_rgb) uint32_t mask = (1 << depth) - 1; for (i = 0; i < 32768; ++i) - palette->ent[i] = lcg_rand() & mask; + palette->ent[i] = prng_rand() & mask; memset (palette->rgba, 0, sizeof (palette->rgba)); @@ -801,7 +850,7 @@ initialize_palette (pixman_indexed_t *palette, uint32_t depth, int is_rgb) { uint32_t old_idx; - rgba24 = lcg_rand(); + rgba24 = prng_rand(); i15 = CONVERT_15 (rgba24, is_rgb); old_idx = palette->ent[i15]; @@ -821,6 +870,445 @@ initialize_palette (pixman_indexed_t *palette, uint32_t depth, int is_rgb) } } +const char * +operator_name (pixman_op_t op) +{ + switch (op) + { + case PIXMAN_OP_CLEAR: return "PIXMAN_OP_CLEAR"; + case PIXMAN_OP_SRC: return "PIXMAN_OP_SRC"; + case PIXMAN_OP_DST: return "PIXMAN_OP_DST"; + case PIXMAN_OP_OVER: return "PIXMAN_OP_OVER"; + case PIXMAN_OP_OVER_REVERSE: return "PIXMAN_OP_OVER_REVERSE"; + case PIXMAN_OP_IN: return "PIXMAN_OP_IN"; + case PIXMAN_OP_IN_REVERSE: return "PIXMAN_OP_IN_REVERSE"; + case PIXMAN_OP_OUT: return "PIXMAN_OP_OUT"; + case PIXMAN_OP_OUT_REVERSE: return "PIXMAN_OP_OUT_REVERSE"; + case PIXMAN_OP_ATOP: return "PIXMAN_OP_ATOP"; + case PIXMAN_OP_ATOP_REVERSE: return "PIXMAN_OP_ATOP_REVERSE"; + case PIXMAN_OP_XOR: return "PIXMAN_OP_XOR"; + case PIXMAN_OP_ADD: return "PIXMAN_OP_ADD"; + case PIXMAN_OP_SATURATE: return "PIXMAN_OP_SATURATE"; + + case PIXMAN_OP_DISJOINT_CLEAR: return "PIXMAN_OP_DISJOINT_CLEAR"; + case PIXMAN_OP_DISJOINT_SRC: return "PIXMAN_OP_DISJOINT_SRC"; + case PIXMAN_OP_DISJOINT_DST: return "PIXMAN_OP_DISJOINT_DST"; + case PIXMAN_OP_DISJOINT_OVER: return "PIXMAN_OP_DISJOINT_OVER"; + case PIXMAN_OP_DISJOINT_OVER_REVERSE: return "PIXMAN_OP_DISJOINT_OVER_REVERSE"; + case PIXMAN_OP_DISJOINT_IN: return "PIXMAN_OP_DISJOINT_IN"; + case PIXMAN_OP_DISJOINT_IN_REVERSE: return "PIXMAN_OP_DISJOINT_IN_REVERSE"; + case PIXMAN_OP_DISJOINT_OUT: return "PIXMAN_OP_DISJOINT_OUT"; + case PIXMAN_OP_DISJOINT_OUT_REVERSE: return "PIXMAN_OP_DISJOINT_OUT_REVERSE"; + case PIXMAN_OP_DISJOINT_ATOP: return "PIXMAN_OP_DISJOINT_ATOP"; + case PIXMAN_OP_DISJOINT_ATOP_REVERSE: return "PIXMAN_OP_DISJOINT_ATOP_REVERSE"; + case PIXMAN_OP_DISJOINT_XOR: return "PIXMAN_OP_DISJOINT_XOR"; + + case PIXMAN_OP_CONJOINT_CLEAR: return "PIXMAN_OP_CONJOINT_CLEAR"; + case PIXMAN_OP_CONJOINT_SRC: return "PIXMAN_OP_CONJOINT_SRC"; + case PIXMAN_OP_CONJOINT_DST: return "PIXMAN_OP_CONJOINT_DST"; + case PIXMAN_OP_CONJOINT_OVER: return "PIXMAN_OP_CONJOINT_OVER"; + case PIXMAN_OP_CONJOINT_OVER_REVERSE: return "PIXMAN_OP_CONJOINT_OVER_REVERSE"; + case PIXMAN_OP_CONJOINT_IN: return "PIXMAN_OP_CONJOINT_IN"; + case PIXMAN_OP_CONJOINT_IN_REVERSE: return "PIXMAN_OP_CONJOINT_IN_REVERSE"; + case PIXMAN_OP_CONJOINT_OUT: return "PIXMAN_OP_CONJOINT_OUT"; + case PIXMAN_OP_CONJOINT_OUT_REVERSE: return "PIXMAN_OP_CONJOINT_OUT_REVERSE"; + case PIXMAN_OP_CONJOINT_ATOP: return "PIXMAN_OP_CONJOINT_ATOP"; + case PIXMAN_OP_CONJOINT_ATOP_REVERSE: return "PIXMAN_OP_CONJOINT_ATOP_REVERSE"; + case PIXMAN_OP_CONJOINT_XOR: return "PIXMAN_OP_CONJOINT_XOR"; + + case PIXMAN_OP_MULTIPLY: return "PIXMAN_OP_MULTIPLY"; + case PIXMAN_OP_SCREEN: return "PIXMAN_OP_SCREEN"; + case PIXMAN_OP_OVERLAY: return "PIXMAN_OP_OVERLAY"; + case PIXMAN_OP_DARKEN: return "PIXMAN_OP_DARKEN"; + case PIXMAN_OP_LIGHTEN: return "PIXMAN_OP_LIGHTEN"; + case PIXMAN_OP_COLOR_DODGE: return "PIXMAN_OP_COLOR_DODGE"; + case PIXMAN_OP_COLOR_BURN: return "PIXMAN_OP_COLOR_BURN"; + case PIXMAN_OP_HARD_LIGHT: return "PIXMAN_OP_HARD_LIGHT"; + case PIXMAN_OP_SOFT_LIGHT: return "PIXMAN_OP_SOFT_LIGHT"; + case PIXMAN_OP_DIFFERENCE: return "PIXMAN_OP_DIFFERENCE"; + case PIXMAN_OP_EXCLUSION: return "PIXMAN_OP_EXCLUSION"; + case PIXMAN_OP_HSL_HUE: return "PIXMAN_OP_HSL_HUE"; + case PIXMAN_OP_HSL_SATURATION: return "PIXMAN_OP_HSL_SATURATION"; + case PIXMAN_OP_HSL_COLOR: return "PIXMAN_OP_HSL_COLOR"; + case PIXMAN_OP_HSL_LUMINOSITY: return "PIXMAN_OP_HSL_LUMINOSITY"; + + case PIXMAN_OP_NONE: + return "<invalid operator 'none'>"; + }; + + return "<unknown operator>"; +} + +const char * +format_name (pixman_format_code_t format) +{ + switch (format) + { +/* 32bpp formats */ + case PIXMAN_a8r8g8b8: return "a8r8g8b8"; + case PIXMAN_x8r8g8b8: return "x8r8g8b8"; + case PIXMAN_a8b8g8r8: return "a8b8g8r8"; + case PIXMAN_x8b8g8r8: return "x8b8g8r8"; + case PIXMAN_b8g8r8a8: return "b8g8r8a8"; + case PIXMAN_b8g8r8x8: return "b8g8r8x8"; + case PIXMAN_r8g8b8a8: return "r8g8b8a8"; + case PIXMAN_r8g8b8x8: return "r8g8b8x8"; + case PIXMAN_x14r6g6b6: return "x14r6g6b6"; + case PIXMAN_x2r10g10b10: return "x2r10g10b10"; + case PIXMAN_a2r10g10b10: return "a2r10g10b10"; + case PIXMAN_x2b10g10r10: return "x2b10g10r10"; + case PIXMAN_a2b10g10r10: return "a2b10g10r10"; + +/* sRGB formats */ + case PIXMAN_a8r8g8b8_sRGB: return "a8r8g8b8_sRGB"; + +/* 24bpp formats */ + case PIXMAN_r8g8b8: return "r8g8b8"; + case PIXMAN_b8g8r8: return "b8g8r8"; + +/* 16bpp formats */ + case PIXMAN_r5g6b5: return "r5g6b5"; + case PIXMAN_b5g6r5: return "b5g6r5"; + + case PIXMAN_a1r5g5b5: return "a1r5g5b5"; + case PIXMAN_x1r5g5b5: return "x1r5g5b5"; + case PIXMAN_a1b5g5r5: return "a1b5g5r5"; + case PIXMAN_x1b5g5r5: return "x1b5g5r5"; + case PIXMAN_a4r4g4b4: return "a4r4g4b4"; + case PIXMAN_x4r4g4b4: return "x4r4g4b4"; + case PIXMAN_a4b4g4r4: return "a4b4g4r4"; + case PIXMAN_x4b4g4r4: return "x4b4g4r4"; + +/* 8bpp formats */ + case PIXMAN_a8: return "a8"; + case PIXMAN_r3g3b2: return "r3g3b2"; + case PIXMAN_b2g3r3: return "b2g3r3"; + case PIXMAN_a2r2g2b2: return "a2r2g2b2"; + case PIXMAN_a2b2g2r2: return "a2b2g2r2"; + +#if 0 + case PIXMAN_x4c4: return "x4c4"; + case PIXMAN_g8: return "g8"; +#endif + case PIXMAN_c8: return "x4c4 / c8"; + case PIXMAN_x4g4: return "x4g4 / g8"; + + case PIXMAN_x4a4: return "x4a4"; + +/* 4bpp formats */ + case PIXMAN_a4: return "a4"; + case PIXMAN_r1g2b1: return "r1g2b1"; + case PIXMAN_b1g2r1: return "b1g2r1"; + case PIXMAN_a1r1g1b1: return "a1r1g1b1"; + case PIXMAN_a1b1g1r1: return "a1b1g1r1"; + + case PIXMAN_c4: return "c4"; + case PIXMAN_g4: return "g4"; + +/* 1bpp formats */ + case PIXMAN_a1: return "a1"; + + case PIXMAN_g1: return "g1"; + +/* YUV formats */ + case PIXMAN_yuy2: return "yuy2"; + case PIXMAN_yv12: return "yv12"; + }; + + /* Fake formats. + * + * This is separate switch to prevent GCC from complaining + * that the values are not in the pixman_format_code_t enum. + */ + switch ((uint32_t)format) + { + case PIXMAN_null: return "null"; + case PIXMAN_solid: return "solid"; + case PIXMAN_pixbuf: return "pixbuf"; + case PIXMAN_rpixbuf: return "rpixbuf"; + case PIXMAN_unknown: return "unknown"; + }; + + return "<unknown format>"; +}; + +static double +calc_op (pixman_op_t op, double src, double dst, double srca, double dsta) +{ +#define mult_chan(src, dst, Fa, Fb) MIN ((src) * (Fa) + (dst) * (Fb), 1.0) + + double Fa, Fb; + + switch (op) + { + case PIXMAN_OP_CLEAR: + case PIXMAN_OP_DISJOINT_CLEAR: + case PIXMAN_OP_CONJOINT_CLEAR: + return mult_chan (src, dst, 0.0, 0.0); + + case PIXMAN_OP_SRC: + case PIXMAN_OP_DISJOINT_SRC: + case PIXMAN_OP_CONJOINT_SRC: + return mult_chan (src, dst, 1.0, 0.0); + + case PIXMAN_OP_DST: + case PIXMAN_OP_DISJOINT_DST: + case PIXMAN_OP_CONJOINT_DST: + return mult_chan (src, dst, 0.0, 1.0); + + case PIXMAN_OP_OVER: + return mult_chan (src, dst, 1.0, 1.0 - srca); + + case PIXMAN_OP_OVER_REVERSE: + return mult_chan (src, dst, 1.0 - dsta, 1.0); + + case PIXMAN_OP_IN: + return mult_chan (src, dst, dsta, 0.0); + + case PIXMAN_OP_IN_REVERSE: + return mult_chan (src, dst, 0.0, srca); + + case PIXMAN_OP_OUT: + return mult_chan (src, dst, 1.0 - dsta, 0.0); + + case PIXMAN_OP_OUT_REVERSE: + return mult_chan (src, dst, 0.0, 1.0 - srca); + + case PIXMAN_OP_ATOP: + return mult_chan (src, dst, dsta, 1.0 - srca); + + case PIXMAN_OP_ATOP_REVERSE: + return mult_chan (src, dst, 1.0 - dsta, srca); + + case PIXMAN_OP_XOR: + return mult_chan (src, dst, 1.0 - dsta, 1.0 - srca); + + case PIXMAN_OP_ADD: + return mult_chan (src, dst, 1.0, 1.0); + + case PIXMAN_OP_SATURATE: + case PIXMAN_OP_DISJOINT_OVER_REVERSE: + if (srca == 0.0) + Fa = 1.0; + else + Fa = MIN (1.0, (1.0 - dsta) / srca); + return mult_chan (src, dst, Fa, 1.0); + + case PIXMAN_OP_DISJOINT_OVER: + if (dsta == 0.0) + Fb = 1.0; + else + Fb = MIN (1.0, (1.0 - srca) / dsta); + return mult_chan (src, dst, 1.0, Fb); + + case PIXMAN_OP_DISJOINT_IN: + if (srca == 0.0) + Fa = 0.0; + else + Fa = MAX (0.0, 1.0 - (1.0 - dsta) / srca); + return mult_chan (src, dst, Fa, 0.0); + + case PIXMAN_OP_DISJOINT_IN_REVERSE: + if (dsta == 0.0) + Fb = 0.0; + else + Fb = MAX (0.0, 1.0 - (1.0 - srca) / dsta); + return mult_chan (src, dst, 0.0, Fb); + + case PIXMAN_OP_DISJOINT_OUT: + if (srca == 0.0) + Fa = 1.0; + else + Fa = MIN (1.0, (1.0 - dsta) / srca); + return mult_chan (src, dst, Fa, 0.0); + + case PIXMAN_OP_DISJOINT_OUT_REVERSE: + if (dsta == 0.0) + Fb = 1.0; + else + Fb = MIN (1.0, (1.0 - srca) / dsta); + return mult_chan (src, dst, 0.0, Fb); + + case PIXMAN_OP_DISJOINT_ATOP: + if (srca == 0.0) + Fa = 0.0; + else + Fa = MAX (0.0, 1.0 - (1.0 - dsta) / srca); + if (dsta == 0.0) + Fb = 1.0; + else + Fb = MIN (1.0, (1.0 - srca) / dsta); + return mult_chan (src, dst, Fa, Fb); + + case PIXMAN_OP_DISJOINT_ATOP_REVERSE: + if (srca == 0.0) + Fa = 1.0; + else + Fa = MIN (1.0, (1.0 - dsta) / srca); + if (dsta == 0.0) + Fb = 0.0; + else + Fb = MAX (0.0, 1.0 - (1.0 - srca) / dsta); + return mult_chan (src, dst, Fa, Fb); + + case PIXMAN_OP_DISJOINT_XOR: + if (srca == 0.0) + Fa = 1.0; + else + Fa = MIN (1.0, (1.0 - dsta) / srca); + if (dsta == 0.0) + Fb = 1.0; + else + Fb = MIN (1.0, (1.0 - srca) / dsta); + return mult_chan (src, dst, Fa, Fb); + + case PIXMAN_OP_CONJOINT_OVER: + if (dsta == 0.0) + Fb = 0.0; + else + Fb = MAX (0.0, 1.0 - srca / dsta); + return mult_chan (src, dst, 1.0, Fb); + + case PIXMAN_OP_CONJOINT_OVER_REVERSE: + if (srca == 0.0) + Fa = 0.0; + else + Fa = MAX (0.0, 1.0 - dsta / srca); + return mult_chan (src, dst, Fa, 1.0); + + case PIXMAN_OP_CONJOINT_IN: + if (srca == 0.0) + Fa = 1.0; + else + Fa = MIN (1.0, dsta / srca); + return mult_chan (src, dst, Fa, 0.0); + + case PIXMAN_OP_CONJOINT_IN_REVERSE: + if (dsta == 0.0) + Fb = 1.0; + else + Fb = MIN (1.0, srca / dsta); + return mult_chan (src, dst, 0.0, Fb); + + case PIXMAN_OP_CONJOINT_OUT: + if (srca == 0.0) + Fa = 0.0; + else + Fa = MAX (0.0, 1.0 - dsta / srca); + return mult_chan (src, dst, Fa, 0.0); + + case PIXMAN_OP_CONJOINT_OUT_REVERSE: + if (dsta == 0.0) + Fb = 0.0; + else + Fb = MAX (0.0, 1.0 - srca / dsta); + return mult_chan (src, dst, 0.0, Fb); + + case PIXMAN_OP_CONJOINT_ATOP: + if (srca == 0.0) + Fa = 1.0; + else + Fa = MIN (1.0, dsta / srca); + if (dsta == 0.0) + Fb = 0.0; + else + Fb = MAX (0.0, 1.0 - srca / dsta); + return mult_chan (src, dst, Fa, Fb); + + case PIXMAN_OP_CONJOINT_ATOP_REVERSE: + if (srca == 0.0) + Fa = 0.0; + else + Fa = MAX (0.0, 1.0 - dsta / srca); + if (dsta == 0.0) + Fb = 1.0; + else + Fb = MIN (1.0, srca / dsta); + return mult_chan (src, dst, Fa, Fb); + + case PIXMAN_OP_CONJOINT_XOR: + if (srca == 0.0) + Fa = 0.0; + else + Fa = MAX (0.0, 1.0 - dsta / srca); + if (dsta == 0.0) + Fb = 0.0; + else + Fb = MAX (0.0, 1.0 - srca / dsta); + return mult_chan (src, dst, Fa, Fb); + + case PIXMAN_OP_MULTIPLY: + case PIXMAN_OP_SCREEN: + case PIXMAN_OP_OVERLAY: + case PIXMAN_OP_DARKEN: + case PIXMAN_OP_LIGHTEN: + case PIXMAN_OP_COLOR_DODGE: + case PIXMAN_OP_COLOR_BURN: + case PIXMAN_OP_HARD_LIGHT: + case PIXMAN_OP_SOFT_LIGHT: + case PIXMAN_OP_DIFFERENCE: + case PIXMAN_OP_EXCLUSION: + case PIXMAN_OP_HSL_HUE: + case PIXMAN_OP_HSL_SATURATION: + case PIXMAN_OP_HSL_COLOR: + case PIXMAN_OP_HSL_LUMINOSITY: + default: + abort(); + return 0; /* silence MSVC */ + } +#undef mult_chan +} + +void +do_composite (pixman_op_t op, + const color_t *src, + const color_t *mask, + const color_t *dst, + color_t *result, + pixman_bool_t component_alpha) +{ + color_t srcval, srcalpha; + + if (mask == NULL) + { + srcval = *src; + + srcalpha.r = src->a; + srcalpha.g = src->a; + srcalpha.b = src->a; + srcalpha.a = src->a; + } + else if (component_alpha) + { + srcval.r = src->r * mask->r; + srcval.g = src->g * mask->g; + srcval.b = src->b * mask->b; + srcval.a = src->a * mask->a; + + srcalpha.r = src->a * mask->r; + srcalpha.g = src->a * mask->g; + srcalpha.b = src->a * mask->b; + srcalpha.a = src->a * mask->a; + } + else + { + srcval.r = src->r * mask->a; + srcval.g = src->g * mask->a; + srcval.b = src->b * mask->a; + srcval.a = src->a * mask->a; + + srcalpha.r = src->a * mask->a; + srcalpha.g = src->a * mask->a; + srcalpha.b = src->a * mask->a; + srcalpha.a = src->a * mask->a; + } + + result->r = calc_op (op, srcval.r, dst->r, srcalpha.r, dst->a); + result->g = calc_op (op, srcval.g, dst->g, srcalpha.g, dst->a); + result->b = calc_op (op, srcval.b, dst->b, srcalpha.b, dst->a); + result->a = calc_op (op, srcval.a, dst->a, srcalpha.a, dst->a); +} + static double round_channel (double p, int m) { @@ -933,6 +1421,59 @@ pixel_checker_split_pixel (const pixel_checker_t *checker, uint32_t pixel, *b = (pixel & checker->bm) >> checker->bs; } +void +pixel_checker_get_masks (const pixel_checker_t *checker, + uint32_t *am, + uint32_t *rm, + uint32_t *gm, + uint32_t *bm) +{ + if (am) + *am = checker->am; + if (rm) + *rm = checker->rm; + if (gm) + *gm = checker->gm; + if (bm) + *bm = checker->bm; +} + +void +pixel_checker_convert_pixel_to_color (const pixel_checker_t *checker, + uint32_t pixel, color_t *color) +{ + int a, r, g, b; + + pixel_checker_split_pixel (checker, pixel, &a, &r, &g, &b); + + if (checker->am == 0) + color->a = 1.0; + else + color->a = a / (double)(checker->am >> checker->as); + + if (checker->rm == 0) + color->r = 0.0; + else + color->r = r / (double)(checker->rm >> checker->rs); + + if (checker->gm == 0) + color->g = 0.0; + else + color->g = g / (double)(checker->gm >> checker->gs); + + if (checker->bm == 0) + color->b = 0.0; + else + color->b = b / (double)(checker->bm >> checker->bs); + + if (PIXMAN_FORMAT_TYPE (checker->format) == PIXMAN_TYPE_ARGB_SRGB) + { + color->r = convert_srgb_to_linear (color->r); + color->g = convert_srgb_to_linear (color->g); + color->b = convert_srgb_to_linear (color->b); + } +} + static int32_t convert (double v, uint32_t width, uint32_t mask, uint32_t shift, double def) { @@ -972,7 +1513,7 @@ get_limits (const pixel_checker_t *checker, double limit, /* The acceptable deviation in units of [0.0, 1.0] */ -#define DEVIATION (0.004) +#define DEVIATION (0.0064) void pixel_checker_get_max (const pixel_checker_t *checker, color_t *color, diff --git a/lib/pixman/test/utils.h b/lib/pixman/test/utils.h index f7ea34c5f..c2781516f 100644 --- a/lib/pixman/test/utils.h +++ b/lib/pixman/test/utils.h @@ -4,6 +4,7 @@ #include <assert.h> #include "pixman-private.h" /* For 'inline' definition */ +#include "utils-prng.h" #define ARRAY_LENGTH(A) ((int) (sizeof (A) / sizeof ((A) [0]))) @@ -11,49 +12,44 @@ * taken from POSIX.1-2001 example */ -extern uint32_t lcg_seed; +extern prng_t prng_state_data; +extern prng_t *prng_state; #ifdef USE_OPENMP -#pragma omp threadprivate(lcg_seed) +#pragma omp threadprivate(prng_state_data) +#pragma omp threadprivate(prng_state) #endif static inline uint32_t -lcg_rand (void) +prng_rand (void) { - lcg_seed = lcg_seed * 1103515245 + 12345; - return ((uint32_t)(lcg_seed / 65536) % 32768); + return prng_rand_r (prng_state); } static inline void -lcg_srand (uint32_t seed) +prng_srand (uint32_t seed) { - lcg_seed = seed; + if (!prng_state) + { + /* Without setting a seed, PRNG does not work properly (is just + * returning zeros). So we only initialize the pointer here to + * make sure that 'prng_srand' is always called before any + * other 'prng_*' function. The wrongdoers violating this order + * will get a segfault. */ + prng_state = &prng_state_data; + } + prng_srand_r (prng_state, seed); } static inline uint32_t -lcg_rand_n (int max) +prng_rand_n (int max) { - return lcg_rand () % max; + return prng_rand () % max; } -static inline uint32_t -lcg_rand_N (int max) -{ - uint32_t lo = lcg_rand (); - uint32_t hi = lcg_rand () << 15; - return (lo | hi) % max; -} - -static inline uint32_t -lcg_rand_u32 (void) +static inline void +prng_randmemset (void *buffer, size_t size, prng_randmemset_flags_t flags) { - /* This uses the 10/11 most significant bits from the 3 lcg results - * (and mixes them with the low from the adjacent one). - */ - uint32_t lo = lcg_rand() >> -(32 - 15 - 11 * 2); - uint32_t mid = lcg_rand() << (32 - 15 - 11 * 1); - uint32_t hi = lcg_rand() << (32 - 15 - 11 * 0); - - return (hi ^ mid ^ lo); + prng_randmemset_r (prng_state, buffer, size, flags); } /* CRC 32 computation @@ -69,8 +65,12 @@ compute_crc32_for_image (uint32_t in_crc32, /* Returns TRUE if running on a little endian system */ -pixman_bool_t -is_little_endian (void); +static force_inline pixman_bool_t +is_little_endian (void) +{ + unsigned long endian_check_var = 1; + return *(unsigned char *)&endian_check_var == 1; +} /* perform endian conversion of pixel data */ @@ -124,6 +124,11 @@ a8r8g8b8_to_rgba_np (uint32_t *dst, uint32_t *src, int n_pixels); pixman_bool_t write_png (pixman_image_t *image, const char *filename); +void +draw_checkerboard (pixman_image_t *image, + int check_size, + uint32_t color1, uint32_t color2); + /* A pair of macros which can help to detect corruption of * floating point registers after a function call. This may * happen if _mm_empty() call is forgotten in MMX/SSE2 fast @@ -172,12 +177,26 @@ convert_linear_to_srgb (double component); void initialize_palette (pixman_indexed_t *palette, uint32_t depth, int is_rgb); +const char * +operator_name (pixman_op_t op); + +const char * +format_name (pixman_format_code_t format); + typedef struct { double r, g, b, a; } color_t; void +do_composite (pixman_op_t op, + const color_t *src, + const color_t *mask, + const color_t *dst, + color_t *result, + pixman_bool_t component_alpha); + +void round_color (pixman_format_code_t format, color_t *color); typedef struct @@ -206,3 +225,14 @@ pixel_checker_get_min (const pixel_checker_t *checker, color_t *color, pixman_bool_t pixel_checker_check (const pixel_checker_t *checker, uint32_t pixel, color_t *color); + +void +pixel_checker_convert_pixel_to_color (const pixel_checker_t *checker, + uint32_t pixel, color_t *color); + +void +pixel_checker_get_masks (const pixel_checker_t *checker, + uint32_t *am, + uint32_t *rm, + uint32_t *gm, + uint32_t *bm); |