summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKeith Packard <keithp@keithp.com>2008-04-08 16:21:55 -0500
committerKeith Packard <keithp@keithp.com>2008-04-10 16:58:11 -0500
commit32ef98518394d29cb87405005c660278489396bb (patch)
treedff128b528813b318c55e3796ccdc5c051b26c5d /src
parentb68d9f4245d0ebe3371c179401ff145f1a4d101b (diff)
Compute pixel values directly into data port
Instead of leaving pixel values in src_sample registers, compute the pixel values directl to the data port to save 8 moves. This cannot work when no computation is done as there is both no way to wait for the sampler to finish and because the sampler returns data in a different order from that required by the data port (sigh).
Diffstat (limited to 'src')
-rw-r--r--src/exa_wm.g4i8
-rw-r--r--src/exa_wm_ca.g4a15
-rw-r--r--src/exa_wm_ca.g4b12
-rw-r--r--src/exa_wm_ca_srcalpha.g4a15
-rw-r--r--src/exa_wm_ca_srcalpha.g4b12
-rw-r--r--src/exa_wm_noca.g4a15
-rw-r--r--src/exa_wm_noca.g4b12
-rw-r--r--src/exa_wm_src_data.g4a46
-rw-r--r--src/exa_wm_src_data.g4b8
-rw-r--r--src/exa_wm_write.g4a18
-rw-r--r--src/exa_wm_write.g4b8
-rw-r--r--src/i965_render.c2
12 files changed, 121 insertions, 50 deletions
diff --git a/src/exa_wm.g4i b/src/exa_wm.g4i
index ee8e3ad0..a4b464bd 100644
--- a/src/exa_wm.g4i
+++ b/src/exa_wm.g4i
@@ -103,12 +103,20 @@ define(`mask_w_1', `src_w_1')
/* sample src to these registers */
define(`src_sample_base', `g14')
+
+define(`src_sample_r', `g14')
define(`src_sample_r_01', `g14')
define(`src_sample_r_23', `g15')
+
+define(`src_sample_g', `g16')
define(`src_sample_g_01', `g16')
define(`src_sample_g_23', `g17')
+
+define(`src_sample_b', `g18')
define(`src_sample_b_01', `g18')
define(`src_sample_b_23', `g19')
+
+define(`src_sample_a', `g20')
define(`src_sample_a_01', `g20')
define(`src_sample_a_23', `g21')
diff --git a/src/exa_wm_ca.g4a b/src/exa_wm_ca.g4a
index 5d982b38..a8cb806c 100644
--- a/src/exa_wm_ca.g4a
+++ b/src/exa_wm_ca.g4a
@@ -32,7 +32,14 @@
include(`exa_wm.g4i')
/* mul mask rgba channels to src */
-mul (16) src_sample_r_01<1>F src_sample_r_01<8,8,1>F mask_sample_r_01<8,8,1>F { compr align1 };
-mul (16) src_sample_g_01<1>F src_sample_g_01<8,8,1>F mask_sample_g_01<8,8,1>F { compr align1 };
-mul (16) src_sample_b_01<1>F src_sample_b_01<8,8,1>F mask_sample_b_01<8,8,1>F { compr align1 };
-mul (16) src_sample_a_01<1>F src_sample_a_01<8,8,1>F mask_sample_a_01<8,8,1>F { compr align1 };
+mul (8) data_port_r_01<1>F src_sample_r_01<8,8,1>F mask_sample_r_01<8,8,1>F { align1 };
+mul (8) data_port_r_23<1>F src_sample_r_23<8,8,1>F mask_sample_r_23<8,8,1>F { align1 };
+
+mul (8) data_port_g_01<1>F src_sample_g_01<8,8,1>F mask_sample_g_01<8,8,1>F { align1 };
+mul (8) data_port_g_23<1>F src_sample_g_23<8,8,1>F mask_sample_g_23<8,8,1>F { align1 };
+
+mul (8) data_port_b_01<1>F src_sample_b_01<8,8,1>F mask_sample_b_01<8,8,1>F { align1 };
+mul (8) data_port_b_23<1>F src_sample_b_23<8,8,1>F mask_sample_b_23<8,8,1>F { align1 };
+
+mul (8) data_port_a_01<1>F src_sample_a_01<8,8,1>F mask_sample_a_01<8,8,1>F { align1 };
+mul (8) data_port_a_23<1>F src_sample_a_23<8,8,1>F mask_sample_a_23<8,8,1>F { align1 };
diff --git a/src/exa_wm_ca.g4b b/src/exa_wm_ca.g4b
index 372e8b26..ec336114 100644
--- a/src/exa_wm_ca.g4b
+++ b/src/exa_wm_ca.g4b
@@ -1,4 +1,8 @@
- { 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d02c0 },
- { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 },
- { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 },
- { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
+ { 0x00600041, 0x204077be, 0x008d01c0, 0x008d02c0 },
+ { 0x00600041, 0x20c077be, 0x008d01e0, 0x008d02e0 },
+ { 0x00600041, 0x206077be, 0x008d0200, 0x008d0300 },
+ { 0x00600041, 0x20e077be, 0x008d0220, 0x008d0320 },
+ { 0x00600041, 0x208077be, 0x008d0240, 0x008d0340 },
+ { 0x00600041, 0x210077be, 0x008d0260, 0x008d0360 },
+ { 0x00600041, 0x20a077be, 0x008d0280, 0x008d0380 },
+ { 0x00600041, 0x212077be, 0x008d02a0, 0x008d03a0 },
diff --git a/src/exa_wm_ca_srcalpha.g4a b/src/exa_wm_ca_srcalpha.g4a
index d1f847fd..a5f029fb 100644
--- a/src/exa_wm_ca_srcalpha.g4a
+++ b/src/exa_wm_ca_srcalpha.g4a
@@ -31,7 +31,14 @@
include(`exa_wm.g4i')
-mul (16) src_sample_r_01<1>F mask_sample_r_01<8,8,1>F src_sample_a_01<8,8,1>F { compr align1 };
-mul (16) src_sample_g_01<1>F mask_sample_g_01<8,8,1>F src_sample_a_01<8,8,1>F { compr align1 };
-mul (16) src_sample_b_01<1>F mask_sample_b_01<8,8,1>F src_sample_a_01<8,8,1>F { compr align1 };
-mul (16) src_sample_a_01<1>F mask_sample_a_01<8,8,1>F src_sample_a_01<8,8,1>F { compr align1 };
+mul (8) data_port_r_01<1>F mask_sample_r_01<8,8,1>F src_sample_a_01<8,8,1>F { align1 };
+mul (8) data_port_r_23<1>F mask_sample_r_23<8,8,1>F src_sample_a_23<8,8,1>F { align1 };
+
+mul (8) data_port_g_01<1>F mask_sample_g_01<8,8,1>F src_sample_a_01<8,8,1>F { align1 };
+mul (8) data_port_g_23<1>F mask_sample_g_23<8,8,1>F src_sample_a_23<8,8,1>F { align1 };
+
+mul (8) data_port_b_01<1>F mask_sample_b_01<8,8,1>F src_sample_a_01<8,8,1>F { align1 };
+mul (8) data_port_b_23<1>F mask_sample_b_23<8,8,1>F src_sample_a_23<8,8,1>F { align1 };
+
+mul (8) data_port_a_01<1>F mask_sample_a_01<8,8,1>F src_sample_a_01<8,8,1>F { align1 };
+mul (8) data_port_a_23<1>F mask_sample_a_23<8,8,1>F src_sample_a_23<8,8,1>F { align1 };
diff --git a/src/exa_wm_ca_srcalpha.g4b b/src/exa_wm_ca_srcalpha.g4b
index 963d6760..6ea89b81 100644
--- a/src/exa_wm_ca_srcalpha.g4b
+++ b/src/exa_wm_ca_srcalpha.g4b
@@ -1,4 +1,8 @@
- { 0x00802041, 0x21c077bd, 0x008d02c0, 0x008d0280 },
- { 0x00802041, 0x220077bd, 0x008d0300, 0x008d0280 },
- { 0x00802041, 0x224077bd, 0x008d0340, 0x008d0280 },
- { 0x00802041, 0x228077bd, 0x008d0380, 0x008d0280 },
+ { 0x00600041, 0x204077be, 0x008d02c0, 0x008d0280 },
+ { 0x00600041, 0x20c077be, 0x008d02e0, 0x008d02a0 },
+ { 0x00600041, 0x206077be, 0x008d0300, 0x008d0280 },
+ { 0x00600041, 0x20e077be, 0x008d0320, 0x008d02a0 },
+ { 0x00600041, 0x208077be, 0x008d0340, 0x008d0280 },
+ { 0x00600041, 0x210077be, 0x008d0360, 0x008d02a0 },
+ { 0x00600041, 0x20a077be, 0x008d0380, 0x008d0280 },
+ { 0x00600041, 0x212077be, 0x008d03a0, 0x008d02a0 },
diff --git a/src/exa_wm_noca.g4a b/src/exa_wm_noca.g4a
index d0d60faa..f43c6f43 100644
--- a/src/exa_wm_noca.g4a
+++ b/src/exa_wm_noca.g4a
@@ -32,7 +32,14 @@
include(`exa_wm.g4i')
/* mul mask's alpha channel to src */
-mul (16) src_sample_r_01<1>F src_sample_r_01<8,8,1>F mask_sample_a_01<8,8,1>F { compr align1 };
-mul (16) src_sample_g_01<1>F src_sample_g_01<8,8,1>F mask_sample_a_01<8,8,1>F { compr align1 };
-mul (16) src_sample_b_01<1>F src_sample_b_01<8,8,1>F mask_sample_a_01<8,8,1>F { compr align1 };
-mul (16) src_sample_a_01<1>F src_sample_a_01<8,8,1>F mask_sample_a_01<8,8,1>F { compr align1 };
+mul (8) data_port_r_01<1>F src_sample_r_01<8,8,1>F mask_sample_a_01<8,8,1>F { align1 };
+mul (8) data_port_r_23<1>F src_sample_r_23<8,8,1>F mask_sample_a_23<8,8,1>F { align1 };
+
+mul (8) data_port_g_01<1>F src_sample_g_01<8,8,1>F mask_sample_a_01<8,8,1>F { align1 };
+mul (8) data_port_g_23<1>F src_sample_g_23<8,8,1>F mask_sample_a_23<8,8,1>F { align1 };
+
+mul (8) data_port_b_01<1>F src_sample_b_01<8,8,1>F mask_sample_a_01<8,8,1>F { align1 };
+mul (8) data_port_b_23<1>F src_sample_b_23<8,8,1>F mask_sample_a_23<8,8,1>F { align1 };
+
+mul (8) data_port_a_01<1>F src_sample_a_01<8,8,1>F mask_sample_a_01<8,8,1>F { align1 };
+mul (8) data_port_a_23<1>F src_sample_a_23<8,8,1>F mask_sample_a_23<8,8,1>F { align1 };
diff --git a/src/exa_wm_noca.g4b b/src/exa_wm_noca.g4b
index 15063341..2f5940ac 100644
--- a/src/exa_wm_noca.g4b
+++ b/src/exa_wm_noca.g4b
@@ -1,4 +1,8 @@
- { 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
- { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0380 },
- { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0380 },
- { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
+ { 0x00600041, 0x204077be, 0x008d01c0, 0x008d0380 },
+ { 0x00600041, 0x20c077be, 0x008d01e0, 0x008d03a0 },
+ { 0x00600041, 0x206077be, 0x008d0200, 0x008d0380 },
+ { 0x00600041, 0x20e077be, 0x008d0220, 0x008d03a0 },
+ { 0x00600041, 0x208077be, 0x008d0240, 0x008d0380 },
+ { 0x00600041, 0x210077be, 0x008d0260, 0x008d03a0 },
+ { 0x00600041, 0x20a077be, 0x008d0280, 0x008d0380 },
+ { 0x00600041, 0x212077be, 0x008d02a0, 0x008d03a0 },
diff --git a/src/exa_wm_src_data.g4a b/src/exa_wm_src_data.g4a
new file mode 100644
index 00000000..9c3daf0f
--- /dev/null
+++ b/src/exa_wm_src_data.g4a
@@ -0,0 +1,46 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+include(`exa_wm.g4i')
+
+/*
+ * Prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2),
+ *
+ * Note that the SIMD16 write message takes data for the first
+ * two sub-spans followed by the data for the second two sub-spans
+ * instead of having the two sub-spans interleaved by channel. Weird.
+ */
+
+mov (8) data_port_r_01<1>F src_sample_r_01<8,8,1>F { align1 };
+mov (8) data_port_g_01<1>F src_sample_g_01<8,8,1>F { align1 };
+mov (8) data_port_b_01<1>F src_sample_b_01<8,8,1>F { align1 };
+mov (8) data_port_a_01<1>F src_sample_a_01<8,8,1>F { align1 };
+
+mov (8) data_port_r_23<1>F src_sample_r_23<8,8,1>F { align1 };
+mov (8) data_port_g_23<1>F src_sample_g_23<8,8,1>F { align1 };
+mov (8) data_port_b_23<1>F src_sample_b_23<8,8,1>F { align1 };
+mov (8) data_port_a_23<1>F src_sample_a_23<8,8,1>F { align1 };
diff --git a/src/exa_wm_src_data.g4b b/src/exa_wm_src_data.g4b
new file mode 100644
index 00000000..8b535805
--- /dev/null
+++ b/src/exa_wm_src_data.g4b
@@ -0,0 +1,8 @@
+ { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
+ { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
+ { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
+ { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
diff --git a/src/exa_wm_write.g4a b/src/exa_wm_write.g4a
index b16e6497..c46023e8 100644
--- a/src/exa_wm_write.g4a
+++ b/src/exa_wm_write.g4a
@@ -27,24 +27,6 @@
include(`exa_wm.g4i')
-/*
- * Prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2),
- *
- * Note that the SIMD16 write message takes data for the first
- * two sub-spans followed by the data for the second two sub-spans
- * instead of having the two sub-spans interleaved by channel. Weird.
- */
-
-mov (8) data_port_r_01<1>F src_sample_r_01<8,8,1>F { align1 };
-mov (8) data_port_g_01<1>F src_sample_g_01<8,8,1>F { align1 };
-mov (8) data_port_b_01<1>F src_sample_b_01<8,8,1>F { align1 };
-mov (8) data_port_a_01<1>F src_sample_a_01<8,8,1>F { align1 };
-
-mov (8) data_port_r_23<1>F src_sample_r_23<8,8,1>F { align1 };
-mov (8) data_port_g_23<1>F src_sample_g_23<8,8,1>F { align1 };
-mov (8) data_port_b_23<1>F src_sample_b_23<8,8,1>F { align1 };
-mov (8) data_port_a_23<1>F src_sample_a_23<8,8,1>F { align1 };
-
/* m0, m1 are all direct passed by PS thread payload */
mov (8) data_port_msg_1<1>UD g1<8,8,1>UD { align1 };
diff --git a/src/exa_wm_write.g4b b/src/exa_wm_write.g4b
index 785fe321..9402d115 100644
--- a/src/exa_wm_write.g4b
+++ b/src/exa_wm_write.g4b
@@ -1,11 +1,3 @@
- { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
- { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
- { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
- { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
- { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
- { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
- { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
- { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0020, 0x00000000 },
{ 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/i965_render.c b/src/i965_render.c
index 1b4afcc4..79db41c8 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -340,6 +340,7 @@ static const uint32_t ps_kernel_nomask_affine_static [][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_affine.g4b"
#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_src_data.g4b"
#include "exa_wm_write.g4b"
};
@@ -347,6 +348,7 @@ static const uint32_t ps_kernel_nomask_projective_static [][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_projective.g4b"
#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_src_data.g4b"
#include "exa_wm_write.g4b"
};