summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Makefile.am41
-rw-r--r--src/exa_wm.g4i65
-rw-r--r--src/exa_wm_affine.g4i1
-rw-r--r--src/exa_wm_ca.g4b2
-rw-r--r--src/exa_wm_ca_srcalpha.g4a9
-rw-r--r--src/exa_wm_ca_srcalpha.g4b8
-rw-r--r--src/exa_wm_mask_affine.g4a4
-rw-r--r--src/exa_wm_mask_affine.g4b16
-rw-r--r--src/exa_wm_mask_projective.g4a5
-rw-r--r--src/exa_wm_mask_projective.g4b32
-rw-r--r--src/exa_wm_mask_sample.g4b1
-rw-r--r--src/exa_wm_mask_sample_a.g4a48
-rw-r--r--src/exa_wm_mask_sample_a.g4b2
-rw-r--r--src/exa_wm_mask_sample_argb.g4a (renamed from src/exa_wm_mask_sample.g4a)13
-rw-r--r--src/exa_wm_mask_sample_argb.g4b2
-rw-r--r--src/exa_wm_noca.g4b8
-rw-r--r--src/exa_wm_nomask.g4a2
-rw-r--r--src/exa_wm_src_affine.g4a4
-rw-r--r--src/exa_wm_src_affine.g4b16
-rw-r--r--src/exa_wm_src_projective.g4a4
-rw-r--r--src/exa_wm_src_projective.g4b32
-rw-r--r--src/exa_wm_src_sample.g4b1
-rw-r--r--src/exa_wm_src_sample_a.g4a47
-rw-r--r--src/exa_wm_src_sample_a.g4b2
-rw-r--r--src/exa_wm_src_sample_argb.g4a (renamed from src/exa_wm_src_sample.g4a)12
-rw-r--r--src/exa_wm_src_sample_argb.g4b2
-rw-r--r--src/exa_wm_write.g4a6
-rw-r--r--src/exa_wm_write.g4b20
-rw-r--r--src/exa_wm_xy.g4b8
-rw-r--r--src/i810_reg.h1
-rw-r--r--src/i965_render.c71
-rw-r--r--src/i965_video.c4
32 files changed, 300 insertions, 189 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 81d9596a..9b5d653a 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -131,18 +131,14 @@ INTEL_G4A = \
packed_yuv_wm.g4a \
exa_sf.g4a \
exa_sf_mask.g4a \
- exa_sf_rotation.g4a \
- exa_wm_maskca.g4a \
- exa_wm_maskca_srcalpha.g4a \
- exa_wm_masknoca.g4a \
- exa_wm_nomask.g4a \
- exa_wm_rotation.g4a \
exa_wm_src_affine.g4a \
exa_wm_src_projective.g4a \
- exa_wm_src_sample.g4a \
+ exa_wm_src_sample_argb.g4a \
+ exa_wm_src_sample_a.g4a \
exa_wm_mask_affine.g4a \
exa_wm_mask_projective.g4a \
- exa_wm_mask_sample.g4a \
+ exa_wm_mask_sample_argb.g4a \
+ exa_wm_mask_sample_a.g4a \
exa_wm_noca.g4a \
exa_wm_ca.g4a \
exa_wm_ca_srcalpha.g4a \
@@ -153,29 +149,21 @@ INTEL_G4I = \
exa_wm.g4i \
exa_wm_affine.g4i \
exa_wm_projective.g4i
+
INTEL_G4B = \
packed_yuv_sf.g4b \
- packed_yuv_wm.g4b \
- exa_sf_mask.g4b \
+ packed_yuv_wm.g4b \
exa_sf.g4b \
- exa_sf_rotation.g4b \
- exa_wm_maskca.g4b \
- exa_wm_maskca_srcalpha.g4b \
- exa_wm_masknoca.g4b \
- exa_wm_nomask.g4b \
- exa_wm_rotation.g4b \
- exa_wm_maskca.g4b \
- exa_wm_maskca_srcalpha.g4b \
- exa_wm_masknoca.g4b \
- exa_wm_nomask.g4b \
- exa_wm_rotation.g4b \
+ exa_sf_mask.g4b \
exa_wm_src_affine.g4b \
exa_wm_src_projective.g4b \
- exa_wm_src_sample.g4b \
+ exa_wm_src_sample_argb.g4b \
+ exa_wm_src_sample_a.g4b \
exa_wm_mask_affine.g4b \
exa_wm_mask_projective.g4b \
- exa_wm_mask_sample.g4b \
+ exa_wm_mask_sample_argb.g4b \
+ exa_wm_mask_sample_a.g4b \
exa_wm_noca.g4b \
exa_wm_ca.g4b \
exa_wm_ca_srcalpha.g4b \
@@ -194,8 +182,11 @@ if HAVE_GEN4ASM
SUFFIXES = .g4a .g4b
.g4a.g4b:
- m4 -s $*.g4a > $*.g4m
- intel-gen4asm -o $@ $*.g4m && rm $*.g4m
+ m4 -s $*.g4a > $*.g4m && intel-gen4asm -o $@ $*.g4m && rm $*.g4m
+
+$(INTEL_G4B): $(INTEL_G4I)
+
+BUILT_SOURCES= $(INTEL_G4B)
endif
diff --git a/src/exa_wm.g4i b/src/exa_wm.g4i
index 1be40e70..724ef2b5 100644
--- a/src/exa_wm.g4i
+++ b/src/exa_wm.g4i
@@ -71,47 +71,52 @@ define(`dst_y_0', `dst_y')
define(`dst_y_1', `g11')
/* When computing x * dn/dx, use this */
-define(`temp_x', `g12')
+define(`temp_x', `g34')
define(`temp_x_0', `temp_x')
-define(`temp_x_1', `g13')
+define(`temp_x_1', `g35')
/* When computing y * dn/dy, use this */
-define(`temp_y', `g14')
+define(`temp_y', `g32')
define(`temp_y_0', temp_y)
-define(`temp_y_1', `g15')
+define(`temp_y_1', `g33')
/* when loading x/y, use these to hold them in UW format */
define(`temp_x_uw', temp_x)
define(`temp_y_uw', temp_y)
/* compute source and mask u/v to this pair to send to sampler */
-define(`src_u', `m1')
-define(`src_v', `m3')
-define(`mask_u', src_u)
-define(`mask_v', src_v)
-define(`src_w', `g16')
-define(`src_w_0', src_w)
-define(`src_w_1', `g17')
-define(`mask_w', src_w)
-define(`mask_w_0', src_w_0)
-define(`mask_w_1', src_w_1)
+define(`src_msg', `m1')
+define(`src_msg_ind',`1')
+define(`src_u', `m2')
+define(`src_v', `m4')
+define(`src_w', `g12')
+define(`src_w_0', `g12')
+define(`src_w_1', `g13')
+
+define(`mask_msg', `m7')
+define(`mask_msg_ind',`7')
+define(`mask_u', `m8')
+define(`mask_v', `m10')
+define(`mask_w', `g14')
+define(`mask_w_0', `g14')
+define(`mask_w_1', `g15')
/* sample src to these registers */
-define(`src_sample0', `g18')
-define(`src_sample1', `g19')
-define(`src_sample2', `g20')
-define(`src_sample3', `g21')
-define(`src_sample4', `g22')
-define(`src_sample5', `g23')
-define(`src_sample6', `g24')
-define(`src_sample7', `g25')
+define(`src_sample0', `g16')
+define(`src_sample1', `g17')
+define(`src_sample2', `g18')
+define(`src_sample3', `g19')
+define(`src_sample4', `g20')
+define(`src_sample5', `g21')
+define(`src_sample6', `g22')
+define(`src_sample7', `g23')
/* sample mask to these registers */
-define(`mask_sample0', `g26')
-define(`mask_sample1', `g27')
-define(`mask_sample2', `g28')
-define(`mask_sample3', `g29')
-define(`mask_sample4', `g30')
-define(`mask_sample5', `g31')
-define(`mask_sample6', `g32')
-define(`mask_sample7', `g33')
+define(`mask_sample0', `g24')
+define(`mask_sample1', `g25')
+define(`mask_sample2', `g26')
+define(`mask_sample3', `g27')
+define(`mask_sample4', `g28')
+define(`mask_sample5', `g29')
+define(`mask_sample6', `g30')
+define(`mask_sample7', `g31')
diff --git a/src/exa_wm_affine.g4i b/src/exa_wm_affine.g4i
index 8fc6450b..e72656b6 100644
--- a/src/exa_wm_affine.g4i
+++ b/src/exa_wm_affine.g4i
@@ -42,4 +42,3 @@ mul (16) temp_x<1>F dst_x<8,8,1>F dv_dx { compr align1 };
mul (16) temp_y<1>F dst_y<8,8,1>F dv_dy { compr align1 };
add (16) temp_x<1>F temp_x<8,8,1>F temp_y<8,8,1>F { compr align1 };
add (16) v<1>F temp_x<8,8,1>F vo { compr align1 };
-
diff --git a/src/exa_wm_ca.g4b b/src/exa_wm_ca.g4b
index d0f3519b..28bd6c6b 100644
--- a/src/exa_wm_ca.g4b
+++ b/src/exa_wm_ca.g4b
@@ -1,4 +1,4 @@
+ { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 },
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 },
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
{ 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d03c0 },
- { 0x00802041, 0x230077bd, 0x008d0300, 0x008d0400 },
diff --git a/src/exa_wm_ca_srcalpha.g4a b/src/exa_wm_ca_srcalpha.g4a
index a1be28e4..e252e19b 100644
--- a/src/exa_wm_ca_srcalpha.g4a
+++ b/src/exa_wm_ca_srcalpha.g4a
@@ -31,8 +31,7 @@
include(`exa_wm.g4i')
-/* mul mask rgba channels to src */
-mul (16) src_sample0<1>F src_sample0<8,8,1>F src_sample6<8,8,1>F { compr align1 };
-mul (16) src_sample2<1>F src_sample2<8,8,1>F src_sample6<8,8,1>F { compr align1 };
-mul (16) src_sample4<1>F src_sample4<8,8,1>F src_sample6<8,8,1>F { compr align1 };
-mul (16) src_sample6<1>F src_sample6<8,8,1>F src_sample6<8,8,1>F { compr align1 };
+mul (16) src_sample0<1>F mask_sample0<8,8,1>F src_sample6<8,8,1>F { compr align1 };
+mul (16) src_sample2<1>F mask_sample2<8,8,1>F src_sample6<8,8,1>F { compr align1 };
+mul (16) src_sample4<1>F mask_sample4<8,8,1>F src_sample6<8,8,1>F { compr align1 };
+mul (16) src_sample6<1>F mask_sample6<8,8,1>F src_sample6<8,8,1>F { compr align1 };
diff --git a/src/exa_wm_ca_srcalpha.g4b b/src/exa_wm_ca_srcalpha.g4b
index 780e704b..94f15163 100644
--- a/src/exa_wm_ca_srcalpha.g4b
+++ b/src/exa_wm_ca_srcalpha.g4b
@@ -1,4 +1,4 @@
- { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0300 },
- { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0300 },
- { 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d0300 },
- { 0x00802041, 0x230077bd, 0x008d0300, 0x008d0300 },
+ { 0x00802041, 0x220077bd, 0x008d0300, 0x008d02c0 },
+ { 0x00802041, 0x224077bd, 0x008d0340, 0x008d02c0 },
+ { 0x00802041, 0x228077bd, 0x008d0380, 0x008d02c0 },
+ { 0x00802041, 0x22c077bd, 0x008d03c0, 0x008d02c0 },
diff --git a/src/exa_wm_mask_affine.g4a b/src/exa_wm_mask_affine.g4a
index 4c096cbb..9c52d2f9 100644
--- a/src/exa_wm_mask_affine.g4a
+++ b/src/exa_wm_mask_affine.g4a
@@ -26,12 +26,16 @@
*/
include(`exa_wm.g4i')
+
define(`du_dx', `mask_du_dx')
define(`du_dy', `mask_du_dy')
define(`uo', `mask_uo')
+
define(`dv_dx', `mask_dv_dx')
define(`dv_dy', `mask_dv_dy')
define(`vo', `mask_vo')
+
define(`u', `mask_u')
define(`v', `mask_v')
+
include(`exa_wm_affine.g4i')
diff --git a/src/exa_wm_mask_affine.g4b b/src/exa_wm_mask_affine.g4b
index 62b46e0a..35dec6fd 100644
--- a/src/exa_wm_mask_affine.g4b
+++ b/src/exa_wm_mask_affine.g4b
@@ -1,8 +1,8 @@
- { 0x00802041, 0x218077bd, 0x008d0100, 0x00000090 },
- { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000094 },
- { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
- { 0x00802040, 0x202077be, 0x008d0180, 0x0000009c },
- { 0x00802041, 0x218077bd, 0x008d0100, 0x000000a0 },
- { 0x00802041, 0x21c077bd, 0x008d0140, 0x000000a4 },
- { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
- { 0x00802040, 0x206077be, 0x008d0180, 0x000000ac },
+ { 0x00802041, 0x244077bd, 0x008d0100, 0x000000a0 },
+ { 0x00802041, 0x240077bd, 0x008d0140, 0x000000a4 },
+ { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+ { 0x00802040, 0x210077be, 0x008d0440, 0x000000ac },
+ { 0x00802041, 0x244077bd, 0x008d0100, 0x000000b0 },
+ { 0x00802041, 0x240077bd, 0x008d0140, 0x000000b4 },
+ { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+ { 0x00802040, 0x214077be, 0x008d0440, 0x000000bc },
diff --git a/src/exa_wm_mask_projective.g4a b/src/exa_wm_mask_projective.g4a
index 464f6c51..9acaaced 100644
--- a/src/exa_wm_mask_projective.g4a
+++ b/src/exa_wm_mask_projective.g4a
@@ -42,6 +42,11 @@ define(`wo', `mask_wo')
define(`u', `mask_u')
define(`v', `mask_v')
define(`w', `mask_w')
+
+define(`u_0', `mask_u_0')
+define(`v_0', `mask_v_0')
+define(`u_1', `mask_u_1')
+define(`v_1', `mask_v_1')
define(`w_0', `mask_w_0')
define(`w_1', `mask_w_1')
diff --git a/src/exa_wm_mask_projective.g4b b/src/exa_wm_mask_projective.g4b
index ac4faa3e..06848823 100644
--- a/src/exa_wm_mask_projective.g4b
+++ b/src/exa_wm_mask_projective.g4b
@@ -1,16 +1,16 @@
- { 0x00802041, 0x218077bd, 0x008d0100, 0x000000b0 },
- { 0x00802041, 0x21c077bd, 0x008d0140, 0x000000b4 },
- { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
- { 0x00802040, 0x218077bd, 0x008d0180, 0x000000bc },
- { 0x00600031, 0x22001fbd, 0x008d0180, 0x01110001 },
- { 0x00600031, 0x22201fbd, 0x008d01a0, 0x01110001 },
- { 0x00802041, 0x218077bd, 0x008d0100, 0x00000090 },
- { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000094 },
- { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
- { 0x00802040, 0x218077bd, 0x008d0180, 0x0000009c },
- { 0x00802041, 0x202077be, 0x008d0180, 0x008d0200 },
- { 0x00802041, 0x218077bd, 0x008d0100, 0x000000a0 },
- { 0x00802041, 0x21c077bd, 0x008d0140, 0x000000a4 },
- { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
- { 0x00802040, 0x218077bd, 0x008d0180, 0x000000ac },
- { 0x00802041, 0x206077be, 0x008d0180, 0x008d0200 },
+ { 0x00802041, 0x244077bd, 0x008d0100, 0x000000c0 },
+ { 0x00802041, 0x240077bd, 0x008d0140, 0x000000c4 },
+ { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+ { 0x00802040, 0x244077bd, 0x008d0440, 0x000000cc },
+ { 0x00600031, 0x21c01fbd, 0x008d0440, 0x01110001 },
+ { 0x00600031, 0x21e01fbd, 0x008d0460, 0x01110001 },
+ { 0x00802041, 0x244077bd, 0x008d0100, 0x000000a0 },
+ { 0x00802041, 0x240077bd, 0x008d0140, 0x000000a4 },
+ { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+ { 0x00802040, 0x244077bd, 0x008d0440, 0x000000ac },
+ { 0x00802041, 0x210077be, 0x008d0440, 0x008d01c0 },
+ { 0x00802041, 0x244077bd, 0x008d0100, 0x000000b0 },
+ { 0x00802041, 0x240077bd, 0x008d0140, 0x000000b4 },
+ { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+ { 0x00802040, 0x244077bd, 0x008d0440, 0x000000bc },
+ { 0x00802041, 0x214077be, 0x008d0440, 0x008d01c0 },
diff --git a/src/exa_wm_mask_sample.g4b b/src/exa_wm_mask_sample.g4b
deleted file mode 100644
index 45f7ead1..00000000
--- a/src/exa_wm_mask_sample.g4b
+++ /dev/null
@@ -1 +0,0 @@
- { 0x00800031, 0x23401d29, 0x008d0000, 0x02580001 },
diff --git a/src/exa_wm_mask_sample_a.g4a b/src/exa_wm_mask_sample_a.g4a
new file mode 100644
index 00000000..c06611d5
--- /dev/null
+++ b/src/exa_wm_mask_sample_a.g4a
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/* Sample the mask surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load only alpha */
+mov (1) g0.8<1>UD 0x00007000UD { align1 mask_disable };
+
+/* mask_msg will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) mask_msg_ind /* msg reg index */
+ mask_sample6<1>UW /* readback */
+ g0<8,8,1>UW /* copy to msg start reg*/
+ sampler (2,1,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */
+
diff --git a/src/exa_wm_mask_sample_a.g4b b/src/exa_wm_mask_sample_a.g4b
new file mode 100644
index 00000000..01fc8d5e
--- /dev/null
+++ b/src/exa_wm_mask_sample_a.g4b
@@ -0,0 +1,2 @@
+ { 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
+ { 0x07800031, 0x23c01d29, 0x008d0000, 0x02520102 },
diff --git a/src/exa_wm_mask_sample.g4a b/src/exa_wm_mask_sample_argb.g4a
index 45dc3c4f..7f0815f2 100644
--- a/src/exa_wm_mask_sample.g4a
+++ b/src/exa_wm_mask_sample_argb.g4a
@@ -34,16 +34,15 @@ include(`exa_wm.g4i')
/* use simd16 sampler, param 0 is u, param 1 is v. */
/* 'payload' loading, assuming tex coord start from g4 */
-/* m0 will be copied with g0, as it contains send desc */
+/* load argb */
+mov (1) g0.8<1>UD 0x00000000UD { align1 mask_disable };
+
+/* mask_msg will be copied with g0, as it contains send desc */
/* emit sampler 'send' cmd */
-send (16) 0 /* msg reg index */
+send (16) mask_msg_ind /* msg reg index */
mask_sample0<1>UW /* readback */
g0<8,8,1>UW /* copy to msg start reg*/
- sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ sampler (2,1,F) /* sampler message description, (binding_table,sampler_index,datatype)
/* here(src->dst) we should use src_sampler and src_surface */
mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
-// mov (8) mask_sample7<1>UD mask_sample7<8,8,1>UD { align1 }; /* wait sampler return */
-
-/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */
-
diff --git a/src/exa_wm_mask_sample_argb.g4b b/src/exa_wm_mask_sample_argb.g4b
new file mode 100644
index 00000000..97d3803c
--- /dev/null
+++ b/src/exa_wm_mask_sample_argb.g4b
@@ -0,0 +1,2 @@
+ { 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
+ { 0x07800031, 0x23001d29, 0x008d0000, 0x02580102 },
diff --git a/src/exa_wm_noca.g4b b/src/exa_wm_noca.g4b
index ba01d1a5..1c9d9486 100644
--- a/src/exa_wm_noca.g4b
+++ b/src/exa_wm_noca.g4b
@@ -1,4 +1,4 @@
- { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0400 },
- { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0400 },
- { 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d0400 },
- { 0x00802041, 0x230077bd, 0x008d0300, 0x008d0400 },
+ { 0x00802041, 0x220077bd, 0x008d0200, 0x008d03c0 },
+ { 0x00802041, 0x224077bd, 0x008d0240, 0x008d03c0 },
+ { 0x00802041, 0x228077bd, 0x008d0280, 0x008d03c0 },
+ { 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d03c0 },
diff --git a/src/exa_wm_nomask.g4a b/src/exa_wm_nomask.g4a
index 97426ec1..eb535fe3 100644
--- a/src/exa_wm_nomask.g4a
+++ b/src/exa_wm_nomask.g4a
@@ -119,7 +119,7 @@ mov (8) m8<1>F g17<8,8,1>F { align1 };
mov (8) m9<1>F g19<8,8,1>F { align1 };
/* m0, m1 are all direct passed by PS thread payload */
-mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
+mov (8) m1<1>UD g1<8,8,1>UD { align1 };
/* write */
send (16) 0 acc0<1>UW g0<8,8,1>UW write (
diff --git a/src/exa_wm_src_affine.g4a b/src/exa_wm_src_affine.g4a
index 3bf87179..3194b5a6 100644
--- a/src/exa_wm_src_affine.g4a
+++ b/src/exa_wm_src_affine.g4a
@@ -30,12 +30,16 @@
*/
include(`exa_wm.g4i')
+
define(`du_dx', `src_du_dx')
define(`du_dy', `src_du_dy')
define(`uo', `src_uo')
+
define(`dv_dx', `src_dv_dx')
define(`dv_dy', `src_dv_dy')
define(`vo', `src_vo')
+
define(`u', `src_u')
define(`v', `src_v')
+
include(`exa_wm_affine.g4i')
diff --git a/src/exa_wm_src_affine.g4b b/src/exa_wm_src_affine.g4b
index f18ea1ee..9fef62c1 100644
--- a/src/exa_wm_src_affine.g4b
+++ b/src/exa_wm_src_affine.g4b
@@ -1,8 +1,8 @@
- { 0x00802041, 0x218077bd, 0x008d0100, 0x00000060 },
- { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000064 },
- { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
- { 0x00802040, 0x202077be, 0x008d0180, 0x0000006c },
- { 0x00802041, 0x218077bd, 0x008d0100, 0x00000070 },
- { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000074 },
- { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
- { 0x00802040, 0x206077be, 0x008d0180, 0x0000007c },
+ { 0x00802041, 0x244077bd, 0x008d0100, 0x00000060 },
+ { 0x00802041, 0x240077bd, 0x008d0140, 0x00000064 },
+ { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+ { 0x00802040, 0x204077be, 0x008d0440, 0x0000006c },
+ { 0x00802041, 0x244077bd, 0x008d0100, 0x00000070 },
+ { 0x00802041, 0x240077bd, 0x008d0140, 0x00000074 },
+ { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+ { 0x00802040, 0x208077be, 0x008d0440, 0x0000007c },
diff --git a/src/exa_wm_src_projective.g4a b/src/exa_wm_src_projective.g4a
index 6bd2d6a4..16c9cd56 100644
--- a/src/exa_wm_src_projective.g4a
+++ b/src/exa_wm_src_projective.g4a
@@ -39,6 +39,10 @@ define(`wo', `src_wo')
define(`u', `src_u')
define(`v', `src_v')
define(`w', `src_w')
+define(`u_0', `src_u_0')
+define(`v_0', `src_v_0')
+define(`u_1', `src_u_1')
+define(`v_1', `src_v_1')
define(`w_0', `src_w_0')
define(`w_1', `src_w_1')
diff --git a/src/exa_wm_src_projective.g4b b/src/exa_wm_src_projective.g4b
index 68bfc920..2d203955 100644
--- a/src/exa_wm_src_projective.g4b
+++ b/src/exa_wm_src_projective.g4b
@@ -1,16 +1,16 @@
- { 0x00802041, 0x218077bd, 0x008d0100, 0x00000080 },
- { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000084 },
- { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
- { 0x00802040, 0x218077bd, 0x008d0180, 0x0000008c },
- { 0x00600031, 0x22001fbd, 0x008d0180, 0x01110001 },
- { 0x00600031, 0x22201fbd, 0x008d01a0, 0x01110001 },
- { 0x00802041, 0x218077bd, 0x008d0100, 0x00000060 },
- { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000064 },
- { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
- { 0x00802040, 0x218077bd, 0x008d0180, 0x0000006c },
- { 0x00802041, 0x202077be, 0x008d0180, 0x008d0200 },
- { 0x00802041, 0x218077bd, 0x008d0100, 0x00000070 },
- { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000074 },
- { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
- { 0x00802040, 0x218077bd, 0x008d0180, 0x0000007c },
- { 0x00802041, 0x206077be, 0x008d0180, 0x008d0200 },
+ { 0x00802041, 0x244077bd, 0x008d0100, 0x00000080 },
+ { 0x00802041, 0x240077bd, 0x008d0140, 0x00000084 },
+ { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+ { 0x00802040, 0x244077bd, 0x008d0440, 0x0000008c },
+ { 0x00600031, 0x21801fbd, 0x008d0440, 0x01110001 },
+ { 0x00600031, 0x21a01fbd, 0x008d0460, 0x01110001 },
+ { 0x00802041, 0x244077bd, 0x008d0100, 0x00000060 },
+ { 0x00802041, 0x240077bd, 0x008d0140, 0x00000064 },
+ { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+ { 0x00802040, 0x244077bd, 0x008d0440, 0x0000006c },
+ { 0x00802041, 0x204077be, 0x008d0440, 0x008d0180 },
+ { 0x00802041, 0x244077bd, 0x008d0100, 0x00000070 },
+ { 0x00802041, 0x240077bd, 0x008d0140, 0x00000074 },
+ { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
+ { 0x00802040, 0x244077bd, 0x008d0440, 0x0000007c },
+ { 0x00802041, 0x208077be, 0x008d0440, 0x008d0180 },
diff --git a/src/exa_wm_src_sample.g4b b/src/exa_wm_src_sample.g4b
deleted file mode 100644
index 5ca33f5a..00000000
--- a/src/exa_wm_src_sample.g4b
+++ /dev/null
@@ -1 +0,0 @@
- { 0x00800031, 0x22401d29, 0x008d0000, 0x02580001 },
diff --git a/src/exa_wm_src_sample_a.g4a b/src/exa_wm_src_sample_a.g4a
new file mode 100644
index 00000000..803c358a
--- /dev/null
+++ b/src/exa_wm_src_sample_a.g4a
@@ -0,0 +1,47 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/* Sample the src surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load alpha */
+mov (1) g0.8<1>UD 0x00007000UD { align1 mask_disable };
+
+/* src_msg will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) src_msg_ind /* msg reg index */
+ src_sample6<1>UW /* readback */
+ g0<8,8,1>UW /* copy to msg start reg*/
+ sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */
diff --git a/src/exa_wm_src_sample_a.g4b b/src/exa_wm_src_sample_a.g4b
new file mode 100644
index 00000000..85057575
--- /dev/null
+++ b/src/exa_wm_src_sample_a.g4b
@@ -0,0 +1,2 @@
+ { 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
+ { 0x01800031, 0x22c01d29, 0x008d0000, 0x02520001 },
diff --git a/src/exa_wm_src_sample.g4a b/src/exa_wm_src_sample_argb.g4a
index 04cd3e3d..4fcf276c 100644
--- a/src/exa_wm_src_sample.g4a
+++ b/src/exa_wm_src_sample_argb.g4a
@@ -34,16 +34,14 @@ include(`exa_wm.g4i')
/* use simd16 sampler, param 0 is u, param 1 is v. */
/* 'payload' loading, assuming tex coord start from g4 */
-/* m0 will be copied with g0, as it contains send desc */
+/* load argb */
+mov (1) g0.8<1>UD 0x00000000UD { align1 mask_disable };
+
+/* src_msg will be copied with g0, as it contains send desc */
/* emit sampler 'send' cmd */
-send (16) 0 /* msg reg index */
+send (16) src_msg_ind /* msg reg index */
src_sample0<1>UW /* readback */
g0<8,8,1>UW /* copy to msg start reg*/
sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
/* here(src->dst) we should use src_sampler and src_surface */
mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
-
-// mov (8) src_sample7<1>UD src_sample7<8,8,1>UD { align1 }; /* wait sampler return */
-
-/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */
-
diff --git a/src/exa_wm_src_sample_argb.g4b b/src/exa_wm_src_sample_argb.g4b
new file mode 100644
index 00000000..1d4a7304
--- /dev/null
+++ b/src/exa_wm_src_sample_argb.g4b
@@ -0,0 +1,2 @@
+ { 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
+ { 0x01800031, 0x22001d29, 0x008d0000, 0x02580001 },
diff --git a/src/exa_wm_write.g4a b/src/exa_wm_write.g4a
index 9a821d72..5d3e6b1e 100644
--- a/src/exa_wm_write.g4a
+++ b/src/exa_wm_write.g4a
@@ -31,9 +31,6 @@
include(`exa_wm.g4i')
-/* m0, m1 are all direct passed by PS thread payload */
-mov (8) m1<1>F g1<8,8,1>F { align1 };
-
/* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */
/* src_sample0 -> m2
src_sample1 -> m6
@@ -55,7 +52,7 @@ mov (8) m8<1>F src_sample5<8,8,1>F { align1 };
mov (8) m9<1>F src_sample7<8,8,1>F { align1 };
/* m0, m1 are all direct passed by PS thread payload */
-mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
+mov (8) m1<1>UD g1<8,8,1>UD { align1 };
/* write */
send (16) 0 acc0<1>UW g0<8,8,1>UW write (
@@ -76,5 +73,4 @@ nop;
nop;
nop;
nop;
-nop;
diff --git a/src/exa_wm_write.g4b b/src/exa_wm_write.g4b
index dd266a3e..b7421c21 100644
--- a/src/exa_wm_write.g4b
+++ b/src/exa_wm_write.g4b
@@ -1,13 +1,12 @@
- { 0x00600001, 0x202003be, 0x008d0020, 0x00000000 },
- { 0x00600001, 0x204003be, 0x008d0240, 0x00000000 },
- { 0x00600001, 0x206003be, 0x008d0280, 0x00000000 },
- { 0x00600001, 0x208003be, 0x008d02c0, 0x00000000 },
- { 0x00600001, 0x20a003be, 0x008d0300, 0x00000000 },
- { 0x00600001, 0x20c003be, 0x008d0260, 0x00000000 },
- { 0x00600001, 0x20e003be, 0x008d02a0, 0x00000000 },
- { 0x00600001, 0x210003be, 0x008d02e0, 0x00000000 },
- { 0x00600001, 0x212003be, 0x008d0320, 0x00000000 },
- { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0200, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d0280, 0x00000000 },
+ { 0x00600001, 0x20a003be, 0x008d02c0, 0x00000000 },
+ { 0x00600001, 0x20c003be, 0x008d0220, 0x00000000 },
+ { 0x00600001, 0x20e003be, 0x008d0260, 0x00000000 },
+ { 0x00600001, 0x210003be, 0x008d02a0, 0x00000000 },
+ { 0x00600001, 0x212003be, 0x008d02e0, 0x00000000 },
+ { 0x00600001, 0x20200022, 0x008d0020, 0x00000000 },
{ 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
@@ -17,4 +16,3 @@
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_xy.g4b b/src/exa_wm_xy.g4b
index 7784a3d1..c5620cdd 100644
--- a/src/exa_wm_xy.g4b
+++ b/src/exa_wm_xy.g4b
@@ -1,4 +1,4 @@
- { 0x00800040, 0x21806d29, 0x00480028, 0x10101010 },
- { 0x00800040, 0x21c06d29, 0x0048002a, 0x11001100 },
- { 0x00802040, 0x2100753d, 0x008d0180, 0x00004020 },
- { 0x00802040, 0x2140753d, 0x008d01c0, 0x00004024 },
+ { 0x00800040, 0x24406d29, 0x00480028, 0x10101010 },
+ { 0x00800040, 0x24006d29, 0x0048002a, 0x11001100 },
+ { 0x00802040, 0x2100753d, 0x008d0440, 0x00004020 },
+ { 0x00802040, 0x2140753d, 0x008d0400, 0x00004024 },
diff --git a/src/i810_reg.h b/src/i810_reg.h
index d799e77f..834b948c 100644
--- a/src/i810_reg.h
+++ b/src/i810_reg.h
@@ -2322,6 +2322,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define MI_FLUSH (0x04<<23)
#define MI_WRITE_DIRTY_STATE (1<<4)
#define MI_END_SCENE (1<<3)
+#define MI_GLOBAL_SNAPSHOT_COUNT_RESET (1<<3)
#define MI_INHIBIT_RENDER_CACHE_FLUSH (1<<2)
#define MI_STATE_INSTRUCTION_CACHE_FLUSH (1<<1)
#define MI_INVALIDATE_MAP_CACHE (1<<0)
diff --git a/src/i965_render.c b/src/i965_render.c
index e348c2b0..c2260eba 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -285,7 +285,7 @@ static int next_offset, total_state_size;
static char *state_base;
static int state_base_offset;
static float *vb;
-static int vb_size = (6 * 4) * 4 ; /* 6 DWORDS per vertex - and mask*/
+static int vb_size = (2 + 3 + 3) * 3 * 4; /* (dst, src, mask) 3 vertices, 4 bytes */
static uint32_t src_blend, dst_blend;
@@ -318,7 +318,7 @@ static const uint32_t sip_kernel_static[][4] = {
*/
#define SF_KERNEL_NUM_GRF 16
-#define SF_MAX_THREADS 2
+#define SF_MAX_THREADS 1
static const uint32_t sf_kernel_static[][4] = {
#include "exa_sf.g4b"
@@ -329,29 +329,31 @@ static const uint32_t sf_kernel_static_mask[][4] = {
};
/* ps kernels */
-#define PS_KERNEL_NUM_GRF 32
-#define PS_MAX_THREADS 32
+#define PS_KERNEL_NUM_GRF 48
+#define PS_MAX_THREADS 32
+#define PS_SCRATCH_SPACE 2048
+#define PS_SCRATCH_SPACE_LOG 1 /* log2 (PS_SCRATCH_SPACE) - 10 (1024 is 0, 2048 is 1) */
static const uint32_t ps_kernel_static_nomask_affine [][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_affine.g4b"
-#include "exa_wm_src_sample.g4b"
+#include "exa_wm_src_sample_argb.g4b"
#include "exa_wm_write.g4b"
};
static const uint32_t ps_kernel_static_nomask_projective [][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_projective.g4b"
-#include "exa_wm_src_sample.g4b"
+#include "exa_wm_src_sample_argb.g4b"
#include "exa_wm_write.g4b"
};
static const uint32_t ps_kernel_static_maskca [][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_affine.g4b"
-#include "exa_wm_src_sample.g4b"
+#include "exa_wm_src_sample_argb.g4b"
#include "exa_wm_mask_affine.g4b"
-#include "exa_wm_mask_sample.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
#include "exa_wm_ca.g4b"
#include "exa_wm_write.g4b"
};
@@ -359,9 +361,9 @@ static const uint32_t ps_kernel_static_maskca [][4] = {
static const uint32_t ps_kernel_static_maskca_srcalpha [][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_affine.g4b"
-#include "exa_wm_src_sample.g4b"
+#include "exa_wm_src_sample_a.g4b"
#include "exa_wm_mask_affine.g4b"
-#include "exa_wm_mask_sample.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
#include "exa_wm_ca_srcalpha.g4b"
#include "exa_wm_write.g4b"
};
@@ -369,9 +371,9 @@ static const uint32_t ps_kernel_static_maskca_srcalpha [][4] = {
static const uint32_t ps_kernel_static_masknoca [][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_affine.g4b"
-#include "exa_wm_src_sample.g4b"
+#include "exa_wm_src_sample_argb.g4b"
#include "exa_wm_mask_affine.g4b"
-#include "exa_wm_mask_sample.g4b"
+#include "exa_wm_mask_sample_a.g4b"
#include "exa_wm_noca.g4b"
#include "exa_wm_write.g4b"
};
@@ -432,21 +434,21 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
pI830->transform[0] = pSrcPicture->transform;
is_affine_src = i830_transform_is_affine (pI830->transform[0]);
- is_affine_mask = i830_transform_is_affine (pI830->transform[1]);
- is_affine = is_affine_src && is_affine_mask;
if (!pMask) {
pI830->transform[1] = NULL;
pI830->scale_units[1][0] = -1;
pI830->scale_units[1][1] = -1;
+ is_affine_mask = TRUE;
} else {
pI830->transform[1] = pMaskPicture->transform;
- if (pI830->transform[1])
- I830FALLBACK("i965 mask transform not implemented!\n");
pI830->scale_units[1][0] = pMask->drawable.width;
pI830->scale_units[1][1] = pMask->drawable.height;
+ is_affine_mask = i830_transform_is_affine (pI830->transform[1]);
}
+ is_affine = is_affine_src && is_affine_mask;
+
/* setup 3d pipeline state */
binding_table_entries = 2; /* default no mask */
@@ -463,7 +465,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
next_offset = wm_offset + sizeof(*wm_state);
wm_scratch_offset = ALIGN(next_offset, 1024);
- next_offset = wm_scratch_offset + 1024 * PS_MAX_THREADS;
+ next_offset = wm_scratch_offset + PS_SCRATCH_SPACE * PS_MAX_THREADS;
cc_offset = ALIGN(next_offset, 32);
next_offset = cc_offset + sizeof(*cc_state);
@@ -782,6 +784,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
I830FALLBACK("Bad filter 0x%x\n", pMaskPicture->filter);
}
+ mask_sampler_state->ss0.default_color_mode = 0; /* GL mode */
if (!pMaskPicture->repeat) {
mask_sampler_state->ss1.r_wrap_mode =
BRW_TEXCOORDMODE_CLAMP_BORDER;
@@ -885,7 +888,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
wm_state->thread0.kernel_start_pointer =
(state_base_offset + ps_kernel_offset) >> 6;
wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
- wm_state->thread1.single_program_flow = 0;
+ wm_state->thread1.single_program_flow = 1;
if (!pMask)
wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
else
@@ -893,7 +896,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
wm_state->thread2.scratch_space_base_pointer = (state_base_offset +
wm_scratch_offset)>>10;
- wm_state->thread2.per_thread_scratch_space = 0;
+ wm_state->thread2.per_thread_scratch_space = PS_SCRATCH_SPACE_LOG;
wm_state->thread3.const_urb_entry_read_length = 0;
wm_state->thread3.const_urb_entry_read_offset = 0;
/* Each pair of attributes (src/mask coords) is one URB entry */
@@ -1044,12 +1047,12 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
if (is_affine)
{
src_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
- w_component = BRW_VFCOMPONENT_NOSTORE;
+ w_component = BRW_VFCOMPONENT_STORE_1_FLT;
}
else
{
src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
- w_component = BRW_VFCOMPONENT_NOSTORE;
+ w_component = BRW_VFCOMPONENT_STORE_SRC;
}
BEGIN_BATCH(pMask?12:10);
/* Set up the pointer to our (single) vertex buffer */
@@ -1083,7 +1086,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
(BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
(w_component << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
/* u1, v1, w1 */
if (pMask) {
@@ -1095,15 +1098,15 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
(BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
(w_component << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) |
- ((4 + 2 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+ ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
}
ADVANCE_BATCH();
}
#ifdef I830DEBUG
- ErrorF("try to sync to show any errors...");
+ ErrorF("try to sync to show any errors...\n");
I830Sync(pScrn);
#endif
return TRUE;
@@ -1119,7 +1122,6 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
Bool is_affine_src, is_affine_mask, is_affine;
float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
int i;
- int per_vertex = 2; /* dst x/y */
is_affine_src = i830_transform_is_affine (pI830->transform[0]);
is_affine_mask = i830_transform_is_affine (pI830->transform[1]);
@@ -1139,7 +1141,6 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
pI830->transform[0],
&src_x[2], &src_y[2]))
return;
- per_vertex += 2; /* src u/v */
}
else
{
@@ -1158,14 +1159,13 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
&src_x[2], &src_y[2],
&src_w[2]))
return;
- per_vertex += 3; /* src u/v/w */
}
if (pI830->scale_units[1][0] == -1 || pI830->scale_units[1][1] == -1) {
has_mask = FALSE;
} else {
has_mask = TRUE;
- if (is_affine_mask) {
+ if (is_affine) {
if (!i830_get_transformed_coordinates(maskX, maskY,
pI830->transform[1],
&mask_x[0], &mask_y[0]))
@@ -1178,7 +1178,6 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
pI830->transform[1],
&mask_x[2], &mask_y[2]))
return;
- per_vertex += 2; /* mask u/v */
} else {
if (!i830_get_transformed_coordinates_3d(maskX, maskY,
pI830->transform[1],
@@ -1195,10 +1194,17 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
&mask_x[2], &mask_y[2],
&mask_w[2]))
return;
- per_vertex += 3; /* mask u/v/w */
}
}
+ {
+ BEGIN_BATCH(2);
+ OUT_BATCH(MI_FLUSH |
+ MI_STATE_INSTRUCTION_CACHE_FLUSH |
+ BRW_MI_GLOBAL_SNAPSHOT_RESET);
+ OUT_BATCH(MI_NOOP);
+ ADVANCE_BATCH();
+ }
/* Wait for any existing composite rectangles to land before we overwrite
* the VB with the next one.
*/
@@ -1246,6 +1252,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
if (!is_affine)
vb[i++] = mask_w[0];
}
+ assert (i * 4 <= vb_size);
{
BEGIN_BATCH(6);
@@ -1262,7 +1269,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
ADVANCE_BATCH();
}
#ifdef I830DEBUG
- ErrorF("sync after 3dprimitive");
+ ErrorF("sync after 3dprimitive\n");
I830Sync(pScrn);
#endif
/* we must be sure that the pipeline is flushed before next exa draw,
diff --git a/src/i965_video.c b/src/i965_video.c
index 41f56a9d..1d2c3f54 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -78,7 +78,7 @@ static const uint32_t sip_kernel_static[][4] = {
#define SF_MAX_THREADS 1
static const uint32_t sf_kernel_static[][4] = {
-#include "sf_prog.h"
+#include "packed_yuv_sf.g4b"
};
/*
@@ -94,7 +94,7 @@ static const uint32_t sf_kernel_static[][4] = {
#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
static const uint32_t ps_kernel_static[][4] = {
-#include "wm_prog.h"
+#include "packed_yuv_wm.g4b"
};
#define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1))