diff options
author | Keith Packard <keithp@keithp.com> | 2008-04-08 16:21:55 -0500 |
---|---|---|
committer | Keith Packard <keithp@keithp.com> | 2008-04-10 16:58:11 -0500 |
commit | 32ef98518394d29cb87405005c660278489396bb (patch) | |
tree | dff128b528813b318c55e3796ccdc5c051b26c5d /src | |
parent | b68d9f4245d0ebe3371c179401ff145f1a4d101b (diff) |
Compute pixel values directly into data port
Instead of leaving pixel values in src_sample registers, compute the pixel
values directl to the data port to save 8 moves. This cannot work when no
computation is done as there is both no way to wait for the sampler to
finish and because the sampler returns data in a different order from that
required by the data port (sigh).
Diffstat (limited to 'src')
-rw-r--r-- | src/exa_wm.g4i | 8 | ||||
-rw-r--r-- | src/exa_wm_ca.g4a | 15 | ||||
-rw-r--r-- | src/exa_wm_ca.g4b | 12 | ||||
-rw-r--r-- | src/exa_wm_ca_srcalpha.g4a | 15 | ||||
-rw-r--r-- | src/exa_wm_ca_srcalpha.g4b | 12 | ||||
-rw-r--r-- | src/exa_wm_noca.g4a | 15 | ||||
-rw-r--r-- | src/exa_wm_noca.g4b | 12 | ||||
-rw-r--r-- | src/exa_wm_src_data.g4a | 46 | ||||
-rw-r--r-- | src/exa_wm_src_data.g4b | 8 | ||||
-rw-r--r-- | src/exa_wm_write.g4a | 18 | ||||
-rw-r--r-- | src/exa_wm_write.g4b | 8 | ||||
-rw-r--r-- | src/i965_render.c | 2 |
12 files changed, 121 insertions, 50 deletions
diff --git a/src/exa_wm.g4i b/src/exa_wm.g4i index ee8e3ad0..a4b464bd 100644 --- a/src/exa_wm.g4i +++ b/src/exa_wm.g4i @@ -103,12 +103,20 @@ define(`mask_w_1', `src_w_1') /* sample src to these registers */ define(`src_sample_base', `g14') + +define(`src_sample_r', `g14') define(`src_sample_r_01', `g14') define(`src_sample_r_23', `g15') + +define(`src_sample_g', `g16') define(`src_sample_g_01', `g16') define(`src_sample_g_23', `g17') + +define(`src_sample_b', `g18') define(`src_sample_b_01', `g18') define(`src_sample_b_23', `g19') + +define(`src_sample_a', `g20') define(`src_sample_a_01', `g20') define(`src_sample_a_23', `g21') diff --git a/src/exa_wm_ca.g4a b/src/exa_wm_ca.g4a index 5d982b38..a8cb806c 100644 --- a/src/exa_wm_ca.g4a +++ b/src/exa_wm_ca.g4a @@ -32,7 +32,14 @@ include(`exa_wm.g4i') /* mul mask rgba channels to src */ -mul (16) src_sample_r_01<1>F src_sample_r_01<8,8,1>F mask_sample_r_01<8,8,1>F { compr align1 }; -mul (16) src_sample_g_01<1>F src_sample_g_01<8,8,1>F mask_sample_g_01<8,8,1>F { compr align1 }; -mul (16) src_sample_b_01<1>F src_sample_b_01<8,8,1>F mask_sample_b_01<8,8,1>F { compr align1 }; -mul (16) src_sample_a_01<1>F src_sample_a_01<8,8,1>F mask_sample_a_01<8,8,1>F { compr align1 }; +mul (8) data_port_r_01<1>F src_sample_r_01<8,8,1>F mask_sample_r_01<8,8,1>F { align1 }; +mul (8) data_port_r_23<1>F src_sample_r_23<8,8,1>F mask_sample_r_23<8,8,1>F { align1 }; + +mul (8) data_port_g_01<1>F src_sample_g_01<8,8,1>F mask_sample_g_01<8,8,1>F { align1 }; +mul (8) data_port_g_23<1>F src_sample_g_23<8,8,1>F mask_sample_g_23<8,8,1>F { align1 }; + +mul (8) data_port_b_01<1>F src_sample_b_01<8,8,1>F mask_sample_b_01<8,8,1>F { align1 }; +mul (8) data_port_b_23<1>F src_sample_b_23<8,8,1>F mask_sample_b_23<8,8,1>F { align1 }; + +mul (8) data_port_a_01<1>F src_sample_a_01<8,8,1>F mask_sample_a_01<8,8,1>F { align1 }; +mul (8) data_port_a_23<1>F src_sample_a_23<8,8,1>F mask_sample_a_23<8,8,1>F { align1 }; diff --git a/src/exa_wm_ca.g4b b/src/exa_wm_ca.g4b index 372e8b26..ec336114 100644 --- a/src/exa_wm_ca.g4b +++ b/src/exa_wm_ca.g4b @@ -1,4 +1,8 @@ - { 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d02c0 }, - { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 }, - { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 }, - { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 }, + { 0x00600041, 0x204077be, 0x008d01c0, 0x008d02c0 }, + { 0x00600041, 0x20c077be, 0x008d01e0, 0x008d02e0 }, + { 0x00600041, 0x206077be, 0x008d0200, 0x008d0300 }, + { 0x00600041, 0x20e077be, 0x008d0220, 0x008d0320 }, + { 0x00600041, 0x208077be, 0x008d0240, 0x008d0340 }, + { 0x00600041, 0x210077be, 0x008d0260, 0x008d0360 }, + { 0x00600041, 0x20a077be, 0x008d0280, 0x008d0380 }, + { 0x00600041, 0x212077be, 0x008d02a0, 0x008d03a0 }, diff --git a/src/exa_wm_ca_srcalpha.g4a b/src/exa_wm_ca_srcalpha.g4a index d1f847fd..a5f029fb 100644 --- a/src/exa_wm_ca_srcalpha.g4a +++ b/src/exa_wm_ca_srcalpha.g4a @@ -31,7 +31,14 @@ include(`exa_wm.g4i') -mul (16) src_sample_r_01<1>F mask_sample_r_01<8,8,1>F src_sample_a_01<8,8,1>F { compr align1 }; -mul (16) src_sample_g_01<1>F mask_sample_g_01<8,8,1>F src_sample_a_01<8,8,1>F { compr align1 }; -mul (16) src_sample_b_01<1>F mask_sample_b_01<8,8,1>F src_sample_a_01<8,8,1>F { compr align1 }; -mul (16) src_sample_a_01<1>F mask_sample_a_01<8,8,1>F src_sample_a_01<8,8,1>F { compr align1 }; +mul (8) data_port_r_01<1>F mask_sample_r_01<8,8,1>F src_sample_a_01<8,8,1>F { align1 }; +mul (8) data_port_r_23<1>F mask_sample_r_23<8,8,1>F src_sample_a_23<8,8,1>F { align1 }; + +mul (8) data_port_g_01<1>F mask_sample_g_01<8,8,1>F src_sample_a_01<8,8,1>F { align1 }; +mul (8) data_port_g_23<1>F mask_sample_g_23<8,8,1>F src_sample_a_23<8,8,1>F { align1 }; + +mul (8) data_port_b_01<1>F mask_sample_b_01<8,8,1>F src_sample_a_01<8,8,1>F { align1 }; +mul (8) data_port_b_23<1>F mask_sample_b_23<8,8,1>F src_sample_a_23<8,8,1>F { align1 }; + +mul (8) data_port_a_01<1>F mask_sample_a_01<8,8,1>F src_sample_a_01<8,8,1>F { align1 }; +mul (8) data_port_a_23<1>F mask_sample_a_23<8,8,1>F src_sample_a_23<8,8,1>F { align1 }; diff --git a/src/exa_wm_ca_srcalpha.g4b b/src/exa_wm_ca_srcalpha.g4b index 963d6760..6ea89b81 100644 --- a/src/exa_wm_ca_srcalpha.g4b +++ b/src/exa_wm_ca_srcalpha.g4b @@ -1,4 +1,8 @@ - { 0x00802041, 0x21c077bd, 0x008d02c0, 0x008d0280 }, - { 0x00802041, 0x220077bd, 0x008d0300, 0x008d0280 }, - { 0x00802041, 0x224077bd, 0x008d0340, 0x008d0280 }, - { 0x00802041, 0x228077bd, 0x008d0380, 0x008d0280 }, + { 0x00600041, 0x204077be, 0x008d02c0, 0x008d0280 }, + { 0x00600041, 0x20c077be, 0x008d02e0, 0x008d02a0 }, + { 0x00600041, 0x206077be, 0x008d0300, 0x008d0280 }, + { 0x00600041, 0x20e077be, 0x008d0320, 0x008d02a0 }, + { 0x00600041, 0x208077be, 0x008d0340, 0x008d0280 }, + { 0x00600041, 0x210077be, 0x008d0360, 0x008d02a0 }, + { 0x00600041, 0x20a077be, 0x008d0380, 0x008d0280 }, + { 0x00600041, 0x212077be, 0x008d03a0, 0x008d02a0 }, diff --git a/src/exa_wm_noca.g4a b/src/exa_wm_noca.g4a index d0d60faa..f43c6f43 100644 --- a/src/exa_wm_noca.g4a +++ b/src/exa_wm_noca.g4a @@ -32,7 +32,14 @@ include(`exa_wm.g4i') /* mul mask's alpha channel to src */ -mul (16) src_sample_r_01<1>F src_sample_r_01<8,8,1>F mask_sample_a_01<8,8,1>F { compr align1 }; -mul (16) src_sample_g_01<1>F src_sample_g_01<8,8,1>F mask_sample_a_01<8,8,1>F { compr align1 }; -mul (16) src_sample_b_01<1>F src_sample_b_01<8,8,1>F mask_sample_a_01<8,8,1>F { compr align1 }; -mul (16) src_sample_a_01<1>F src_sample_a_01<8,8,1>F mask_sample_a_01<8,8,1>F { compr align1 }; +mul (8) data_port_r_01<1>F src_sample_r_01<8,8,1>F mask_sample_a_01<8,8,1>F { align1 }; +mul (8) data_port_r_23<1>F src_sample_r_23<8,8,1>F mask_sample_a_23<8,8,1>F { align1 }; + +mul (8) data_port_g_01<1>F src_sample_g_01<8,8,1>F mask_sample_a_01<8,8,1>F { align1 }; +mul (8) data_port_g_23<1>F src_sample_g_23<8,8,1>F mask_sample_a_23<8,8,1>F { align1 }; + +mul (8) data_port_b_01<1>F src_sample_b_01<8,8,1>F mask_sample_a_01<8,8,1>F { align1 }; +mul (8) data_port_b_23<1>F src_sample_b_23<8,8,1>F mask_sample_a_23<8,8,1>F { align1 }; + +mul (8) data_port_a_01<1>F src_sample_a_01<8,8,1>F mask_sample_a_01<8,8,1>F { align1 }; +mul (8) data_port_a_23<1>F src_sample_a_23<8,8,1>F mask_sample_a_23<8,8,1>F { align1 }; diff --git a/src/exa_wm_noca.g4b b/src/exa_wm_noca.g4b index 15063341..2f5940ac 100644 --- a/src/exa_wm_noca.g4b +++ b/src/exa_wm_noca.g4b @@ -1,4 +1,8 @@ - { 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d0380 }, - { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0380 }, - { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0380 }, - { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 }, + { 0x00600041, 0x204077be, 0x008d01c0, 0x008d0380 }, + { 0x00600041, 0x20c077be, 0x008d01e0, 0x008d03a0 }, + { 0x00600041, 0x206077be, 0x008d0200, 0x008d0380 }, + { 0x00600041, 0x20e077be, 0x008d0220, 0x008d03a0 }, + { 0x00600041, 0x208077be, 0x008d0240, 0x008d0380 }, + { 0x00600041, 0x210077be, 0x008d0260, 0x008d03a0 }, + { 0x00600041, 0x20a077be, 0x008d0280, 0x008d0380 }, + { 0x00600041, 0x212077be, 0x008d02a0, 0x008d03a0 }, diff --git a/src/exa_wm_src_data.g4a b/src/exa_wm_src_data.g4a new file mode 100644 index 00000000..9c3daf0f --- /dev/null +++ b/src/exa_wm_src_data.g4a @@ -0,0 +1,46 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +include(`exa_wm.g4i') + +/* + * Prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), + * + * Note that the SIMD16 write message takes data for the first + * two sub-spans followed by the data for the second two sub-spans + * instead of having the two sub-spans interleaved by channel. Weird. + */ + +mov (8) data_port_r_01<1>F src_sample_r_01<8,8,1>F { align1 }; +mov (8) data_port_g_01<1>F src_sample_g_01<8,8,1>F { align1 }; +mov (8) data_port_b_01<1>F src_sample_b_01<8,8,1>F { align1 }; +mov (8) data_port_a_01<1>F src_sample_a_01<8,8,1>F { align1 }; + +mov (8) data_port_r_23<1>F src_sample_r_23<8,8,1>F { align1 }; +mov (8) data_port_g_23<1>F src_sample_g_23<8,8,1>F { align1 }; +mov (8) data_port_b_23<1>F src_sample_b_23<8,8,1>F { align1 }; +mov (8) data_port_a_23<1>F src_sample_a_23<8,8,1>F { align1 }; diff --git a/src/exa_wm_src_data.g4b b/src/exa_wm_src_data.g4b new file mode 100644 index 00000000..8b535805 --- /dev/null +++ b/src/exa_wm_src_data.g4b @@ -0,0 +1,8 @@ + { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 }, + { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 }, + { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 }, + { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 }, + { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 }, + { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 }, diff --git a/src/exa_wm_write.g4a b/src/exa_wm_write.g4a index b16e6497..c46023e8 100644 --- a/src/exa_wm_write.g4a +++ b/src/exa_wm_write.g4a @@ -27,24 +27,6 @@ include(`exa_wm.g4i') -/* - * Prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), - * - * Note that the SIMD16 write message takes data for the first - * two sub-spans followed by the data for the second two sub-spans - * instead of having the two sub-spans interleaved by channel. Weird. - */ - -mov (8) data_port_r_01<1>F src_sample_r_01<8,8,1>F { align1 }; -mov (8) data_port_g_01<1>F src_sample_g_01<8,8,1>F { align1 }; -mov (8) data_port_b_01<1>F src_sample_b_01<8,8,1>F { align1 }; -mov (8) data_port_a_01<1>F src_sample_a_01<8,8,1>F { align1 }; - -mov (8) data_port_r_23<1>F src_sample_r_23<8,8,1>F { align1 }; -mov (8) data_port_g_23<1>F src_sample_g_23<8,8,1>F { align1 }; -mov (8) data_port_b_23<1>F src_sample_b_23<8,8,1>F { align1 }; -mov (8) data_port_a_23<1>F src_sample_a_23<8,8,1>F { align1 }; - /* m0, m1 are all direct passed by PS thread payload */ mov (8) data_port_msg_1<1>UD g1<8,8,1>UD { align1 }; diff --git a/src/exa_wm_write.g4b b/src/exa_wm_write.g4b index 785fe321..9402d115 100644 --- a/src/exa_wm_write.g4b +++ b/src/exa_wm_write.g4b @@ -1,11 +1,3 @@ - { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 }, - { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 }, - { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 }, - { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 }, - { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 }, - { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 }, - { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 }, - { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 }, { 0x00600001, 0x20200022, 0x008d0020, 0x00000000 }, { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/i965_render.c b/src/i965_render.c index 1b4afcc4..79db41c8 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -340,6 +340,7 @@ static const uint32_t ps_kernel_nomask_affine_static [][4] = { #include "exa_wm_xy.g4b" #include "exa_wm_src_affine.g4b" #include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_src_data.g4b" #include "exa_wm_write.g4b" }; @@ -347,6 +348,7 @@ static const uint32_t ps_kernel_nomask_projective_static [][4] = { #include "exa_wm_xy.g4b" #include "exa_wm_src_projective.g4b" #include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_src_data.g4b" #include "exa_wm_write.g4b" }; |