summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am96
-rw-r--r--src/exa_sf.g4b15
-rw-r--r--src/exa_sf_mask.g4a104
-rw-r--r--src/exa_sf_mask.g4b25
-rw-r--r--src/exa_wm.g4i119
-rw-r--r--src/exa_wm_affine.g4i45
-rw-r--r--src/exa_wm_ca.g4a38
-rw-r--r--src/exa_wm_ca.g4b4
-rw-r--r--src/exa_wm_ca_srcalpha.g4a38
-rw-r--r--src/exa_wm_ca_srcalpha.g4b4
-rw-r--r--src/exa_wm_mask_affine.g4a37
-rw-r--r--src/exa_wm_mask_affine.g4b8
-rw-r--r--src/exa_wm_mask_projective.g4a48
-rw-r--r--src/exa_wm_mask_projective.g4b16
-rw-r--r--src/exa_wm_mask_sample.g4a49
-rw-r--r--src/exa_wm_mask_sample.g4b1
-rw-r--r--src/exa_wm_maskca.g4a32
-rw-r--r--src/exa_wm_maskca.g4b95
-rw-r--r--src/exa_wm_maskca_srcalpha.g4a32
-rw-r--r--src/exa_wm_maskca_srcalpha.g4b95
-rw-r--r--src/exa_wm_masknoca.g4a32
-rw-r--r--src/exa_wm_masknoca.g4b95
-rw-r--r--src/exa_wm_noca.g4a38
-rw-r--r--src/exa_wm_noca.g4b4
-rw-r--r--src/exa_wm_projective.g4i51
-rw-r--r--src/exa_wm_src_affine.g4a41
-rw-r--r--src/exa_wm_src_affine.g4b8
-rw-r--r--src/exa_wm_src_projective.g4a45
-rw-r--r--src/exa_wm_src_projective.g4b16
-rw-r--r--src/exa_wm_src_sample.g4a49
-rw-r--r--src/exa_wm_src_sample.g4b1
-rw-r--r--src/exa_wm_write.g4a80
-rw-r--r--src/exa_wm_write.g4b20
-rw-r--r--src/exa_wm_xy.g4a52
-rw-r--r--src/exa_wm_xy.g4b4
-rw-r--r--src/i965_render.c265
-rw-r--r--src/packed_yuv_sf.g4b17
-rw-r--r--src/packed_yuv_wm.g4a32
-rw-r--r--src/packed_yuv_wm.g4b82
39 files changed, 1592 insertions, 241 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 7df69b61..81d9596a 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -136,58 +136,66 @@ INTEL_G4A = \
exa_wm_maskca_srcalpha.g4a \
exa_wm_masknoca.g4a \
exa_wm_nomask.g4a \
- exa_wm_rotation.g4a
-
-INTEL_G4H = \
- sf_prog.h \
- wm_prog.h \
- exa_sf_mask_prog.h \
- exa_sf_prog.h \
- exa_sf_rotation_prog.h \
- exa_wm_maskca_prog.h \
- exa_wm_maskca_srcalpha_prog.h \
- exa_wm_masknoca_prog.h \
- exa_wm_nomask_prog.h \
- exa_wm_rotation_prog.h
-
+ exa_wm_rotation.g4a \
+ exa_wm_src_affine.g4a \
+ exa_wm_src_projective.g4a \
+ exa_wm_src_sample.g4a \
+ exa_wm_mask_affine.g4a \
+ exa_wm_mask_projective.g4a \
+ exa_wm_mask_sample.g4a \
+ exa_wm_noca.g4a \
+ exa_wm_ca.g4a \
+ exa_wm_ca_srcalpha.g4a \
+ exa_wm_write.g4a \
+ exa_wm_xy.g4a
+
+INTEL_G4I = \
+ exa_wm.g4i \
+ exa_wm_affine.g4i \
+ exa_wm_projective.g4i
+
+INTEL_G4B = \
+ packed_yuv_sf.g4b \
+ packed_yuv_wm.g4b \
+ exa_sf_mask.g4b \
+ exa_sf.g4b \
+ exa_sf_rotation.g4b \
+ exa_wm_maskca.g4b \
+ exa_wm_maskca_srcalpha.g4b \
+ exa_wm_masknoca.g4b \
+ exa_wm_nomask.g4b \
+ exa_wm_rotation.g4b \
+ exa_wm_maskca.g4b \
+ exa_wm_maskca_srcalpha.g4b \
+ exa_wm_masknoca.g4b \
+ exa_wm_nomask.g4b \
+ exa_wm_rotation.g4b \
+ exa_wm_src_affine.g4b \
+ exa_wm_src_projective.g4b \
+ exa_wm_src_sample.g4b \
+ exa_wm_mask_affine.g4b \
+ exa_wm_mask_projective.g4b \
+ exa_wm_mask_sample.g4b \
+ exa_wm_noca.g4b \
+ exa_wm_ca.g4b \
+ exa_wm_ca_srcalpha.g4b \
+ exa_wm_write.g4b \
+ exa_wm_xy.g4b
+
EXTRA_DIST = \
$(XMODE_SRCS) \
$(INTEL_G4A) \
- $(INTEL_G4H) \
+ $(INTEL_G4I) \
+ $(INTEL_G4B) \
$(INTEL_DRI_SRCS) \
$(INTEL_XVMC_SRCS)
if HAVE_GEN4ASM
-sf_prog.h: packed_yuv_sf.g4a
- intel-gen4asm -o sf_prog.h packed_yuv_sf.g4a
-
-wm_prog.h: packed_yuv_wm.g4a
- intel-gen4asm -o wm_prog.h packed_yuv_wm.g4a
-
-exa_sf_mask_prog.h: exa_sf_mask.g4a
- intel-gen4asm -o exa_sf_mask_prog.h exa_sf_mask.g4a
-
-exa_sf_prog.h: exa_sf.g4a
- intel-gen4asm -o exa_sf_prog.h exa_sf.g4a
-
-exa_sf_rotation_prog.h: exa_sf_rotation.g4a
- intel-gen4asm -o exa_sf_rotation_prog.h exa_sf_rotation.g4a
-
-exa_wm_maskca_prog.h: exa_wm_maskca.g4a
- intel-gen4asm -o exa_wm_maskca_prog.h exa_wm_maskca.g4a
-
-exa_wm_maskca_srcalpha_prog.h: exa_wm_maskca_srcalpha.g4a
- intel-gen4asm -o exa_wm_maskca_srcalpha_prog.h exa_wm_maskca_srcalpha.g4a
-
-exa_wm_masknoca_prog.h: exa_wm_masknoca.g4a
- intel-gen4asm -o exa_wm_masknoca_prog.h exa_wm_masknoca.g4a
-
-exa_wm_nomask_prog.h: exa_wm_nomask.g4a
- intel-gen4asm -o exa_wm_nomask_prog.h exa_wm_nomask.g4a
-
-exa_wm_rotation_prog.h: exa_wm_rotation.g4a
- intel-gen4asm -o exa_wm_rotation_prog.h exa_wm_rotation.g4a
+SUFFIXES = .g4a .g4b
+.g4a.g4b:
+ m4 -s $*.g4a > $*.g4m
+ intel-gen4asm -o $@ $*.g4m && rm $*.g4m
endif
diff --git a/src/exa_sf.g4b b/src/exa_sf.g4b
new file mode 100644
index 00000000..223c9c9a
--- /dev/null
+++ b/src/exa_sf.g4b
@@ -0,0 +1,15 @@
+ { 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 },
+ { 0x00400001, 0x206003be, 0x00690060, 0x00000000 },
+ { 0x00400040, 0x20e077bd, 0x00690080, 0x006940a0 },
+ { 0x00400041, 0x202077be, 0x006900e0, 0x000000c0 },
+ { 0x00400040, 0x20e077bd, 0x006900a0, 0x00694060 },
+ { 0x00400041, 0x204077be, 0x006900e0, 0x000000c8 },
+ { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_sf_mask.g4a b/src/exa_sf_mask.g4a
index c830fd86..a0d6efc4 100644
--- a/src/exa_sf_mask.g4a
+++ b/src/exa_sf_mask.g4a
@@ -21,82 +21,52 @@
* IN THE SOFTWARE.
*
* Authors:
- * Keith Packard <keithp@keithp.com>
- * Eric Anholt <eric@anholt.net>
* Wang Zhenyu <zhenyu.z.wang@intel.com>
*/
+/* FIXME how to setup second coeffient for mask tex coord */
-/*
- * Inputs (note all sub-register addresses are bytes, not float indices)
- *
- * Note that the vertices will have been reordered:
- *
- * V0 is topmost (leftmost among topmost) (upper left)
- * V1 is next clockwise (lower right)
- * V2 is remaining (lower left)
- *
- * V0 ...................... XX
- * | .
- * | .
- * | .
- * V2------------------------V1
- *
- * G0 thread state -- just pass along
- *
- * G1 and G2 are fixed by SF spec
- *
- * G1.0 reserved
- * G1.4 Provoking vertex
- * G1.8 Determinant
- * G1.12 X1 - X0
- * G1.16 X2 - X0
- * G1.20 Y1 - Y0
- * G1.24 Y2 - Y0
- * G1.30 reserved
- *
- * G2.0 Z0
- * G2.4 1/W0
- * G2.8 Z1
- * G2.12 1/W1
- * G2.16 Z2
- * G2.20 1/W2
- * G2.24 reserved
- * G2.30 reserved
- *
- * G3 is V0 Vertex Attribute Data from URB (upper left)
- *
- * G3.0 u0
- * G3.4 v0
- *
- * G4 is V1 Vertex Attribute Data from URB (lower right)
- *
- * G4.0 u1
- * G4.4 v1
- *
- * G5 is V2 Vertex Attribute Data from URB (lower left)
- *
+/*
+ g3 (v0) { u0, v0, 1.0, 1.0 } ==> {u0, v0, 1.0, 1.0, mu0, mv0, 1.0, 1.0} Co[0](u0) Co[1](v0) Co[2](mu0) Co[3](mv0)
+ g4 (v1) { u1, v1, 1.0, 1.0 } ==> {u1, v1, 1.0, 1.0, mu1, mv1, 1.0, 1.0}
+ g5 (v2) { u2, v2 } ==> (u2, v2, mu2, mv2}
+ g6 { 1/(x1-x0), 1/(y1-y0) }
+ g7 { u1-u0, v1-v0, 0, 0} ==>{u1-u0, v1-v0,0, 0, mu1-mu0, mv1-mv0, 0, 0}
+ -> { (u1-u0)/(x1-x0), (v1-v0)/(y1-y0) } ==>{(u1-u0)/(x1-x0), (v1-v0)/(y1-y0),(mu1-mu0)/(x1-x0), (mv1-mv0)/(y1-y0)
+ Cx, Cy Cx[0], Cy[0], Cx[1], Cy[1]
*/
-/* Compute inverses of the input deltas */
-send (4) 0 g6<1>F g1.12<4,4,1>F math inv mlen 1 rlen 1 { align1 };
+/* assign Cx[0], Cx[1] to src, same to Cy, Co
+ Cx[2], Cx[3] to mask, same to Cy, Co */
-/* texture location at V0 */
-mov (8) m3<1>F g3<8,8,1>F { align1 };
+send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+add (8) g7<1>F g4<8,8,1>F -g3<8,8,1>F { align1 };
+/* Cx[0] */
+mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 };
+/* Cy[0] */
+mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 };
+/* Cx[2] */
+mul (1) g7.8<1>F g7.8<0,1,0>F g6<0,1,0>F { align1 };
+/* Cy[2] */
+mul (1) g7.12<1>F g7.12<0,1,0>F g6.4<0,1,0>F { align1 };
-/* compute V1 - V2 (motion in X) for texture coordinates */
-add (8) g7<1>F g4<8,8,1>F -g5<8,8,1>F { align1 };
-
-/* multiply by 1/dx */
-mul (8) m1<1>F g7<8,8,1>F g6.0<0,1,0>F { align1 };
-
-/* Compute V2 - V0 (motion in Y) for texture coordinates */
-add (8) g7<1>F g5<8,8,1>F -g3<8,8,1>F { align1 };
-
-/* multiply by 1/dy */
-mul (8) m2<1>F g7<8,8,1>F g6.8<0,1,0>F {align1 };
+/* src Cx[0], Cx[1] */
+mov (8) m1<1>F g7<0,1,0>F { align1 };
+/* mask Cx[2], Cx[3] */
+mov (1) m1.8<1>F g7.8<0,1,0>F { align1 };
+mov (1) m1.12<1>F g7.8<0,1,0>F { align1 };
+/* src Cy[0], Cy[1] */
+mov (8) m2<1>F g7.4<0,1,0>F { align1 };
+/* mask Cy[2], Cy[3] */
+mov (1) m2.8<1>F g7.12<0,1,0>F { align1 };
+mov (1) m2.12<1>F g7.12<0,1,0>F { align1 };
+/* src Co[0], Co[1] */
+mov (8) m3<1>F g3<8,8,1>F { align1 };
+/* mask Co[2], Co[3] */
+mov (1) m3.8<1>F g3.8<0,1,0>F { align1 };
+mov (1) m3.12<1>F g3.12<0,1,0>F { align1 };
-/* and we're done */
send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT };
nop;
nop;
diff --git a/src/exa_sf_mask.g4b b/src/exa_sf_mask.g4b
new file mode 100644
index 00000000..4e9114d6
--- /dev/null
+++ b/src/exa_sf_mask.g4b
@@ -0,0 +1,25 @@
+ { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
+ { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
+ { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 },
+ { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 },
+ { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 },
+ { 0x00000041, 0x20e877bd, 0x000000e8, 0x000000c0 },
+ { 0x00000041, 0x20ec77bd, 0x000000ec, 0x000000c4 },
+ { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
+ { 0x00000001, 0x202803be, 0x000000e8, 0x00000000 },
+ { 0x00000001, 0x202c03be, 0x000000e8, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
+ { 0x00000001, 0x204803be, 0x000000ec, 0x00000000 },
+ { 0x00000001, 0x204c03be, 0x000000ec, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
+ { 0x00000001, 0x206803be, 0x00000068, 0x00000000 },
+ { 0x00000001, 0x206c03be, 0x0000006c, 0x00000000 },
+ { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm.g4i b/src/exa_wm.g4i
new file mode 100644
index 00000000..c7ecb09d
--- /dev/null
+++ b/src/exa_wm.g4i
@@ -0,0 +1,119 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/*
+ * Input parameters
+ */
+
+define(`quote', `ifelse(`$#', `0', `', ``$*'')')
+
+/* Destination X/Y */
+define(`dst_x_uw', `g1.8<2,4,0>UW')
+define(`dst_y_uw', `g1.10<2,4,0>UW')
+define(`screen_x0', `g1.0<0,1,0>F')
+define(`screen_y0', `g1.4<0,1,0>F')
+
+/* Source transformation parameters */
+define(`src_du_dx', `g3.0<0,1,0>F')
+define(`src_du_dy', `g3.4<0,1,0>F')
+define(`src_uo', `g3.12<0,1,0>F')
+define(`src_dv_dx', `g3.16<0,1,0>F')
+define(`src_dv_dy', `g3.20<0,1,0>F')
+define(`src_vo', `g3.28<0,1,0>F')
+define(`src_dw_dx', `g4.0<0,1,0>F')
+define(`src_dw_dy', `g4.4<0,1,0>F')
+define(`src_wo', `g4.12<0,1,0>F')
+
+define(`mask_du_dx', `g4.16<0,1,0>F')
+define(`mask_du_dy', `g4.20<0,1,0>F')
+define(`mask_uo', `g4.28<0,1,0>F')
+define(`mask_dv_dx', `g5.0<0,1,0>F')
+define(`mask_dv_dy', `g5.4<0,1,0>F')
+define(`mask_vo', `g5.12<0,1,0>F')
+define(`mask_dw_dx', `g5.16<0,1,0>F')
+define(`mask_dw_dy', `g5.20<0,1,0>F')
+define(`mask_wo', `g5.28<0,1,0>F')
+
+/*
+ * Local variables
+ */
+
+/* this holds the X dest coordinates */
+define(`dst_x', `g8')
+define(`dst_x_0', `dst_x')
+define(`dst_x_1', `g9')
+
+/* this holds the Y dest coordinates */
+define(`dst_y', `g10')
+define(`dst_y_0', `dst_y')
+define(`dst_y_1', `g11')
+
+/* When computing x * dn/dx, use this */
+define(`temp_x', `g12')
+define(`temp_x_0', `temp_x')
+define(`temp_x_1', `g13')
+
+/* When computing y * dn/dy, use this */
+define(`temp_y', `g14')
+define(`temp_y_0', temp_y)
+define(`temp_y_1', `g15')
+
+/* when loading x/y, use these to hold them in UW format */
+define(`temp_x_uw', temp_x)
+define(`temp_y_uw', temp_y)
+
+/* compute source and mask u/v to this pair to send to sampler */
+define(`src_u', `m1')
+define(`src_v', `m3')
+define(`mask_u', src_u)
+define(`mask_v', src_v)
+define(`src_w', `g16')
+define(`src_w_0', src_w)
+define(`src_w_1', `g17')
+define(`mask_w', src_w)
+define(`mask_w_0', src_w_0)
+define(`mask_w_1', src_w_1)
+
+/* sample src to these registers */
+define(`src_sample0', `g18')
+define(`src_sample1', `g19')
+define(`src_sample2', `g20')
+define(`src_sample3', `g21')
+define(`src_sample4', `g22')
+define(`src_sample5', `g23')
+define(`src_sample6', `g24')
+define(`src_sample7', `g25')
+
+/* sample mask to these registers */
+define(`mask_sample0', `g26')
+define(`mask_sample1', `g27')
+define(`mask_sample2', `g28')
+define(`mask_sample3', `g29')
+define(`mask_sample4', `g30')
+define(`mask_sample5', `g31')
+define(`mask_sample6', `g32')
+define(`mask_sample7', `g33')
diff --git a/src/exa_wm_affine.g4i b/src/exa_wm_affine.g4i
new file mode 100644
index 00000000..8fc6450b
--- /dev/null
+++ b/src/exa_wm_affine.g4i
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/*
+ * Fragment to compute src u/v values under an affine transform
+ */
+
+/********** Compute u *************/
+
+mul (16) temp_x<1>F dst_x<8,8,1>F du_dx { compr align1 };
+mul (16) temp_y<1>F dst_y<8,8,1>F du_dy { compr align1 };
+add (16) temp_x<1>F temp_x<8,8,1>F temp_y<8,8,1>F { compr align1 };
+add (16) u<1>F temp_x<8,8,1>F uo { compr align1 };
+
+/********** Compute v *************/
+
+mul (16) temp_x<1>F dst_x<8,8,1>F dv_dx { compr align1 };
+mul (16) temp_y<1>F dst_y<8,8,1>F dv_dy { compr align1 };
+add (16) temp_x<1>F temp_x<8,8,1>F temp_y<8,8,1>F { compr align1 };
+add (16) v<1>F temp_x<8,8,1>F vo { compr align1 };
+
diff --git a/src/exa_wm_ca.g4a b/src/exa_wm_ca.g4a
new file mode 100644
index 00000000..955c68c2
--- /dev/null
+++ b/src/exa_wm_ca.g4a
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/*
+ * Composite src and mask together, no component alpha
+ */
+
+include(`exa_wm.g4i')
+
+/* mul mask rgba channels to src */
+mul (16) src_sample0<1>F src_sample0<8,8,1>F mask_sample0<8,8,1>F { compr align1 };
+mul (16) src_sample2<1>F src_sample2<8,8,1>F mask_sample2<8,8,1>F { compr align1 };
+mul (16) src_sample4<1>F src_sample4<8,8,1>F mask_sample4<8,8,1>F { compr align1 };
+mul (16) src_sample6<1>F src_sample6<8,8,1>F mask_sample6<8,8,1>F { compr align1 };
diff --git a/src/exa_wm_ca.g4b b/src/exa_wm_ca.g4b
new file mode 100644
index 00000000..d0f3519b
--- /dev/null
+++ b/src/exa_wm_ca.g4b
@@ -0,0 +1,4 @@
+ { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 },
+ { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
+ { 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d03c0 },
+ { 0x00802041, 0x230077bd, 0x008d0300, 0x008d0400 },
diff --git a/src/exa_wm_ca_srcalpha.g4a b/src/exa_wm_ca_srcalpha.g4a
new file mode 100644
index 00000000..a1be28e4
--- /dev/null
+++ b/src/exa_wm_ca_srcalpha.g4a
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/*
+ * Composite src and mask together, no component alpha
+ */
+
+include(`exa_wm.g4i')
+
+/* mul mask rgba channels to src */
+mul (16) src_sample0<1>F src_sample0<8,8,1>F src_sample6<8,8,1>F { compr align1 };
+mul (16) src_sample2<1>F src_sample2<8,8,1>F src_sample6<8,8,1>F { compr align1 };
+mul (16) src_sample4<1>F src_sample4<8,8,1>F src_sample6<8,8,1>F { compr align1 };
+mul (16) src_sample6<1>F src_sample6<8,8,1>F src_sample6<8,8,1>F { compr align1 };
diff --git a/src/exa_wm_ca_srcalpha.g4b b/src/exa_wm_ca_srcalpha.g4b
new file mode 100644
index 00000000..780e704b
--- /dev/null
+++ b/src/exa_wm_ca_srcalpha.g4b
@@ -0,0 +1,4 @@
+ { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0300 },
+ { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0300 },
+ { 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d0300 },
+ { 0x00802041, 0x230077bd, 0x008d0300, 0x008d0300 },
diff --git a/src/exa_wm_mask_affine.g4a b/src/exa_wm_mask_affine.g4a
new file mode 100644
index 00000000..4c096cbb
--- /dev/null
+++ b/src/exa_wm_mask_affine.g4a
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+include(`exa_wm.g4i')
+define(`du_dx', `mask_du_dx')
+define(`du_dy', `mask_du_dy')
+define(`uo', `mask_uo')
+define(`dv_dx', `mask_dv_dx')
+define(`dv_dy', `mask_dv_dy')
+define(`vo', `mask_vo')
+define(`u', `mask_u')
+define(`v', `mask_v')
+include(`exa_wm_affine.g4i')
diff --git a/src/exa_wm_mask_affine.g4b b/src/exa_wm_mask_affine.g4b
new file mode 100644
index 00000000..62b46e0a
--- /dev/null
+++ b/src/exa_wm_mask_affine.g4b
@@ -0,0 +1,8 @@
+ { 0x00802041, 0x218077bd, 0x008d0100, 0x00000090 },
+ { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000094 },
+ { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
+ { 0x00802040, 0x202077be, 0x008d0180, 0x0000009c },
+ { 0x00802041, 0x218077bd, 0x008d0100, 0x000000a0 },
+ { 0x00802041, 0x21c077bd, 0x008d0140, 0x000000a4 },
+ { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
+ { 0x00802040, 0x206077be, 0x008d0180, 0x000000ac },
diff --git a/src/exa_wm_mask_projective.g4a b/src/exa_wm_mask_projective.g4a
new file mode 100644
index 00000000..464f6c51
--- /dev/null
+++ b/src/exa_wm_mask_projective.g4a
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+include(`exa_wm.g4i')
+
+define(`du_dx', `mask_du_dx')
+define(`du_dy', `mask_du_dy')
+define(`uo', `mask_uo')
+
+define(`dv_dx', `mask_dv_dx')
+define(`dv_dy', `mask_dv_dy')
+define(`vo', `mask_vo')
+
+define(`dw_dx', `mask_dw_dx')
+define(`dw_dy', `mask_dw_dy')
+define(`wo', `mask_wo')
+
+define(`u', `mask_u')
+define(`v', `mask_v')
+define(`w', `mask_w')
+define(`w_0', `mask_w_0')
+define(`w_1', `mask_w_1')
+
+include(`exa_wm_projective.g4i')
diff --git a/src/exa_wm_mask_projective.g4b b/src/exa_wm_mask_projective.g4b
new file mode 100644
index 00000000..ac4faa3e
--- /dev/null
+++ b/src/exa_wm_mask_projective.g4b
@@ -0,0 +1,16 @@
+ { 0x00802041, 0x218077bd, 0x008d0100, 0x000000b0 },
+ { 0x00802041, 0x21c077bd, 0x008d0140, 0x000000b4 },
+ { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
+ { 0x00802040, 0x218077bd, 0x008d0180, 0x000000bc },
+ { 0x00600031, 0x22001fbd, 0x008d0180, 0x01110001 },
+ { 0x00600031, 0x22201fbd, 0x008d01a0, 0x01110001 },
+ { 0x00802041, 0x218077bd, 0x008d0100, 0x00000090 },
+ { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000094 },
+ { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
+ { 0x00802040, 0x218077bd, 0x008d0180, 0x0000009c },
+ { 0x00802041, 0x202077be, 0x008d0180, 0x008d0200 },
+ { 0x00802041, 0x218077bd, 0x008d0100, 0x000000a0 },
+ { 0x00802041, 0x21c077bd, 0x008d0140, 0x000000a4 },
+ { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
+ { 0x00802040, 0x218077bd, 0x008d0180, 0x000000ac },
+ { 0x00802041, 0x206077be, 0x008d0180, 0x008d0200 },
diff --git a/src/exa_wm_mask_sample.g4a b/src/exa_wm_mask_sample.g4a
new file mode 100644
index 00000000..45dc3c4f
--- /dev/null
+++ b/src/exa_wm_mask_sample.g4a
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/* Sample the mask surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* m0 will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) 0 /* msg reg index */
+ mask_sample0<1>UW /* readback */
+ g0<8,8,1>UW /* copy to msg start reg*/
+ sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
+
+// mov (8) mask_sample7<1>UD mask_sample7<8,8,1>UD { align1 }; /* wait sampler return */
+
+/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */
+
diff --git a/src/exa_wm_mask_sample.g4b b/src/exa_wm_mask_sample.g4b
new file mode 100644
index 00000000..45f7ead1
--- /dev/null
+++ b/src/exa_wm_mask_sample.g4b
@@ -0,0 +1 @@
+ { 0x00800031, 0x23401d29, 0x008d0000, 0x02580001 },
diff --git a/src/exa_wm_maskca.g4a b/src/exa_wm_maskca.g4a
index 0e96aa04..d0304673 100644
--- a/src/exa_wm_maskca.g4a
+++ b/src/exa_wm_maskca.g4a
@@ -58,44 +58,44 @@
/* Set up ss0.x coordinates*/
mov (1) g6<1>F g1.8<0,1,0>UW { align1 };
-add (1) g6.4<1>F g1.8<0,1,0>UW 1UB { align1 };
+add (1) g6.4<1>F g1.8<0,1,0>UW 1UD { align1 };
mov (1) g6.8<1>F g1.8<0,1,0>UW { align1 };
-add (1) g6.12<1>F g1.8<0,1,0>UW 1UB { align1 };
+add (1) g6.12<1>F g1.8<0,1,0>UW 1UD { align1 };
/* Set up ss0.y coordinates */
mov (1) g8<1>F g1.10<0,1,0>UW { align1 };
mov (1) g8.4<1>F g1.10<0,1,0>UW { align1 };
-add (1) g8.8<1>F g1.10<0,1,0>UW 1UB { align1 };
-add (1) g8.12<1>F g1.10<0,1,0>UW 1UB { align1 };
+add (1) g8.8<1>F g1.10<0,1,0>UW 1UD { align1 };
+add (1) g8.12<1>F g1.10<0,1,0>UW 1UD { align1 };
/* set up ss1.x coordinates */
mov (1) g6.16<1>F g1.12<0,1,0>UW { align1 };
-add (1) g6.20<1>F g1.12<0,1,0>UW 1UB { align1 };
+add (1) g6.20<1>F g1.12<0,1,0>UW 1UD { align1 };
mov (1) g6.24<1>F g1.12<0,1,0>UW { align1 };
-add (1) g6.28<1>F g1.12<0,1,0>UW 1UB { align1 };
+add (1) g6.28<1>F g1.12<0,1,0>UW 1UD { align1 };
/* set up ss1.y coordinates */
mov (1) g8.16<1>F g1.14<0,1,0>UW { align1 };
mov (1) g8.20<1>F g1.14<0,1,0>UW { align1 };
-add (1) g8.24<1>F g1.14<0,1,0>UW 1UB { align1 };
-add (1) g8.28<1>F g1.14<0,1,0>UW 1UB { align1 };
+add (1) g8.24<1>F g1.14<0,1,0>UW 1UD { align1 };
+add (1) g8.28<1>F g1.14<0,1,0>UW 1UD { align1 };
/* Set up ss2.x coordinates */
mov (1) g7<1>F g1.16<0,1,0>UW { align1 };
-add (1) g7.4<1>F g1.16<0,1,0>UW 1UB { align1 };
+add (1) g7.4<1>F g1.16<0,1,0>UW 1UD { align1 };
mov (1) g7.8<1>F g1.16<0,1,0>UW { align1 };
-add (1) g7.12<1>F g1.16<0,1,0>UW 1UB { align1 };
+add (1) g7.12<1>F g1.16<0,1,0>UW 1UD { align1 };
/* Set up ss2.y coordinates */
mov (1) g9<1>F g1.18<0,1,0>UW { align1 };
mov (1) g9.4<1>F g1.18<0,1,0>UW { align1 };
-add (1) g9.8<1>F g1.18<0,1,0>UW 1UB { align1 };
-add (1) g9.12<1>F g1.18<0,1,0>UW 1UB { align1 };
+add (1) g9.8<1>F g1.18<0,1,0>UW 1UD { align1 };
+add (1) g9.12<1>F g1.18<0,1,0>UW 1UD { align1 };
/* Set up ss3.x coordinates */
mov (1) g7.16<1>F g1.20<0,1,0>UW { align1 };
-add (1) g7.20<1>F g1.20<0,1,0>UW 1UB { align1 };
+add (1) g7.20<1>F g1.20<0,1,0>UW 1UD { align1 };
mov (1) g7.24<1>F g1.20<0,1,0>UW { align1 };
-add (1) g7.28<1>F g1.20<0,1,0>UW 1UB { align1 };
+add (1) g7.28<1>F g1.20<0,1,0>UW 1UD { align1 };
/* Set up ss3.y coordinates */
mov (1) g9.16<1>F g1.22<0,1,0>UW { align1 };
mov (1) g9.20<1>F g1.22<0,1,0>UW { align1 };
-add (1) g9.24<1>F g1.22<0,1,0>UW 1UB { align1 };
-add (1) g9.28<1>F g1.22<0,1,0>UW 1UB { align1 };
+add (1) g9.24<1>F g1.22<0,1,0>UW 1UD { align1 };
+add (1) g9.28<1>F g1.22<0,1,0>UW 1UD { align1 };
/* Now, map these screen space coordinates into texture coordinates. */
/* This is for src texture */
diff --git a/src/exa_wm_maskca.g4b b/src/exa_wm_maskca.g4b
new file mode 100644
index 00000000..d936412c
--- /dev/null
+++ b/src/exa_wm_maskca.g4b
@@ -0,0 +1,95 @@
+ { 0x00000001, 0x20c0013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20c40d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x20c8013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20cc0d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x2100013d, 0x0000002a, 0x00000000 },
+ { 0x00000001, 0x2104013d, 0x0000002a, 0x00000000 },
+ { 0x00000040, 0x21080d3d, 0x0000002a, 0x00000001 },
+ { 0x00000040, 0x210c0d3d, 0x0000002a, 0x00000001 },
+ { 0x00000001, 0x20d0013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20d40d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x20d8013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20dc0d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x2110013d, 0x0000002e, 0x00000000 },
+ { 0x00000001, 0x2114013d, 0x0000002e, 0x00000000 },
+ { 0x00000040, 0x21180d3d, 0x0000002e, 0x00000001 },
+ { 0x00000040, 0x211c0d3d, 0x0000002e, 0x00000001 },
+ { 0x00000001, 0x20e0013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20e40d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20e8013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20ec0d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x2120013d, 0x00000032, 0x00000000 },
+ { 0x00000001, 0x2124013d, 0x00000032, 0x00000000 },
+ { 0x00000040, 0x21280d3d, 0x00000032, 0x00000001 },
+ { 0x00000040, 0x212c0d3d, 0x00000032, 0x00000001 },
+ { 0x00000001, 0x20f0013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20f40d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20f8013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20fc0d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x2130013d, 0x00000036, 0x00000000 },
+ { 0x00000001, 0x2134013d, 0x00000036, 0x00000000 },
+ { 0x00000040, 0x21380d3d, 0x00000036, 0x00000001 },
+ { 0x00000040, 0x213c0d3d, 0x00000036, 0x00000001 },
+ { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
+ { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
+ { 0x00600041, 0x214077bd, 0x008d0140, 0x00000060 },
+ { 0x00600041, 0x216077bd, 0x008d0160, 0x00000060 },
+ { 0x00600040, 0x214077bd, 0x008d0140, 0x0000006c },
+ { 0x00600040, 0x216077bd, 0x008d0160, 0x0000006c },
+ { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
+ { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
+ { 0x00600041, 0x218077bd, 0x008d0180, 0x00000064 },
+ { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000064 },
+ { 0x00600040, 0x218077bd, 0x008d0180, 0x0000007c },
+ { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000007c },
+ { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
+ { 0x00800031, 0x21c01d29, 0x008d0000, 0x02580001 },
+ { 0x00600001, 0x22a00021, 0x008d02a0, 0x00000000 },
+ { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
+ { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
+ { 0x00600041, 0x214077bd, 0x008d0140, 0x00000080 },
+ { 0x00600041, 0x216077bd, 0x008d0160, 0x00000080 },
+ { 0x00600040, 0x214077bd, 0x008d0140, 0x0000008c },
+ { 0x00600040, 0x216077bd, 0x008d0160, 0x0000008c },
+ { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
+ { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
+ { 0x00600041, 0x218077bd, 0x008d0180, 0x00000084 },
+ { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000084 },
+ { 0x00600040, 0x218077bd, 0x008d0180, 0x0000009c },
+ { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000009c },
+ { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
+ { 0x00800031, 0x22c01d29, 0x008d0000, 0x02580102 },
+ { 0x00600001, 0x23a00021, 0x008d03a0, 0x00000000 },
+ { 0x00600041, 0x21c077bd, 0x008d01c0, 0x008d02c0 },
+ { 0x00600041, 0x21e077bd, 0x008d01e0, 0x008d02e0 },
+ { 0x00600041, 0x220077bd, 0x008d0200, 0x008d0300 },
+ { 0x00600041, 0x222077bd, 0x008d0220, 0x008d0320 },
+ { 0x00600041, 0x224077bd, 0x008d0240, 0x008d0340 },
+ { 0x00600041, 0x226077bd, 0x008d0260, 0x008d0360 },
+ { 0x00600041, 0x228077bd, 0x008d0280, 0x008d0380 },
+ { 0x00600041, 0x22a077bd, 0x008d02a0, 0x008d03a0 },
+ { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
+ { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
+ { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
+ { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
+ { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
+ { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_maskca_srcalpha.g4a b/src/exa_wm_maskca_srcalpha.g4a
index a92c9e4f..133c9f0b 100644
--- a/src/exa_wm_maskca_srcalpha.g4a
+++ b/src/exa_wm_maskca_srcalpha.g4a
@@ -58,44 +58,44 @@
/* Set up ss0.x coordinates*/
mov (1) g6<1>F g1.8<0,1,0>UW { align1 };
-add (1) g6.4<1>F g1.8<0,1,0>UW 1UB { align1 };
+add (1) g6.4<1>F g1.8<0,1,0>UW 1UD { align1 };
mov (1) g6.8<1>F g1.8<0,1,0>UW { align1 };
-add (1) g6.12<1>F g1.8<0,1,0>UW 1UB { align1 };
+add (1) g6.12<1>F g1.8<0,1,0>UW 1UD { align1 };
/* Set up ss0.y coordinates */
mov (1) g8<1>F g1.10<0,1,0>UW { align1 };
mov (1) g8.4<1>F g1.10<0,1,0>UW { align1 };
-add (1) g8.8<1>F g1.10<0,1,0>UW 1UB { align1 };
-add (1) g8.12<1>F g1.10<0,1,0>UW 1UB { align1 };
+add (1) g8.8<1>F g1.10<0,1,0>UW 1UD { align1 };
+add (1) g8.12<1>F g1.10<0,1,0>UW 1UD { align1 };
/* set up ss1.x coordinates */
mov (1) g6.16<1>F g1.12<0,1,0>UW { align1 };
-add (1) g6.20<1>F g1.12<0,1,0>UW 1UB { align1 };
+add (1) g6.20<1>F g1.12<0,1,0>UW 1UD { align1 };
mov (1) g6.24<1>F g1.12<0,1,0>UW { align1 };
-add (1) g6.28<1>F g1.12<0,1,0>UW 1UB { align1 };
+add (1) g6.28<1>F g1.12<0,1,0>UW 1UD { align1 };
/* set up ss1.y coordinates */
mov (1) g8.16<1>F g1.14<0,1,0>UW { align1 };
mov (1) g8.20<1>F g1.14<0,1,0>UW { align1 };
-add (1) g8.24<1>F g1.14<0,1,0>UW 1UB { align1 };
-add (1) g8.28<1>F g1.14<0,1,0>UW 1UB { align1 };
+add (1) g8.24<1>F g1.14<0,1,0>UW 1UD { align1 };
+add (1) g8.28<1>F g1.14<0,1,0>UW 1UD { align1 };
/* Set up ss2.x coordinates */
mov (1) g7<1>F g1.16<0,1,0>UW { align1 };
-add (1) g7.4<1>F g1.16<0,1,0>UW 1UB { align1 };
+add (1) g7.4<1>F g1.16<0,1,0>UW 1UD { align1 };
mov (1) g7.8<1>F g1.16<0,1,0>UW { align1 };
-add (1) g7.12<1>F g1.16<0,1,0>UW 1UB { align1 };
+add (1) g7.12<1>F g1.16<0,1,0>UW 1UD { align1 };
/* Set up ss2.y coordinates */
mov (1) g9<1>F g1.18<0,1,0>UW { align1 };
mov (1) g9.4<1>F g1.18<0,1,0>UW { align1 };
-add (1) g9.8<1>F g1.18<0,1,0>UW 1UB { align1 };
-add (1) g9.12<1>F g1.18<0,1,0>UW 1UB { align1 };
+add (1) g9.8<1>F g1.18<0,1,0>UW 1UD { align1 };
+add (1) g9.12<1>F g1.18<0,1,0>UW 1UD { align1 };
/* Set up ss3.x coordinates */
mov (1) g7.16<1>F g1.20<0,1,0>UW { align1 };
-add (1) g7.20<1>F g1.20<0,1,0>UW 1UB { align1 };
+add (1) g7.20<1>F g1.20<0,1,0>UW 1UD { align1 };
mov (1) g7.24<1>F g1.20<0,1,0>UW { align1 };
-add (1) g7.28<1>F g1.20<0,1,0>UW 1UB { align1 };
+add (1) g7.28<1>F g1.20<0,1,0>UW 1UD { align1 };
/* Set up ss3.y coordinates */
mov (1) g9.16<1>F g1.22<0,1,0>UW { align1 };
mov (1) g9.20<1>F g1.22<0,1,0>UW { align1 };
-add (1) g9.24<1>F g1.22<0,1,0>UW 1UB { align1 };
-add (1) g9.28<1>F g1.22<0,1,0>UW 1UB { align1 };
+add (1) g9.24<1>F g1.22<0,1,0>UW 1UD { align1 };
+add (1) g9.28<1>F g1.22<0,1,0>UW 1UD { align1 };
/* Now, map these screen space coordinates into texture coordinates. */
/* This is for src texture */
diff --git a/src/exa_wm_maskca_srcalpha.g4b b/src/exa_wm_maskca_srcalpha.g4b
new file mode 100644
index 00000000..d83b119f
--- /dev/null
+++ b/src/exa_wm_maskca_srcalpha.g4b
@@ -0,0 +1,95 @@
+ { 0x00000001, 0x20c0013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20c40d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x20c8013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20cc0d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x2100013d, 0x0000002a, 0x00000000 },
+ { 0x00000001, 0x2104013d, 0x0000002a, 0x00000000 },
+ { 0x00000040, 0x21080d3d, 0x0000002a, 0x00000001 },
+ { 0x00000040, 0x210c0d3d, 0x0000002a, 0x00000001 },
+ { 0x00000001, 0x20d0013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20d40d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x20d8013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20dc0d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x2110013d, 0x0000002e, 0x00000000 },
+ { 0x00000001, 0x2114013d, 0x0000002e, 0x00000000 },
+ { 0x00000040, 0x21180d3d, 0x0000002e, 0x00000001 },
+ { 0x00000040, 0x211c0d3d, 0x0000002e, 0x00000001 },
+ { 0x00000001, 0x20e0013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20e40d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20e8013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20ec0d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x2120013d, 0x00000032, 0x00000000 },
+ { 0x00000001, 0x2124013d, 0x00000032, 0x00000000 },
+ { 0x00000040, 0x21280d3d, 0x00000032, 0x00000001 },
+ { 0x00000040, 0x212c0d3d, 0x00000032, 0x00000001 },
+ { 0x00000001, 0x20f0013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20f40d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20f8013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20fc0d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x2130013d, 0x00000036, 0x00000000 },
+ { 0x00000001, 0x2134013d, 0x00000036, 0x00000000 },
+ { 0x00000040, 0x21380d3d, 0x00000036, 0x00000001 },
+ { 0x00000040, 0x213c0d3d, 0x00000036, 0x00000001 },
+ { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
+ { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
+ { 0x00600041, 0x214077bd, 0x008d0140, 0x00000060 },
+ { 0x00600041, 0x216077bd, 0x008d0160, 0x00000060 },
+ { 0x00600040, 0x214077bd, 0x008d0140, 0x0000006c },
+ { 0x00600040, 0x216077bd, 0x008d0160, 0x0000006c },
+ { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
+ { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
+ { 0x00600041, 0x218077bd, 0x008d0180, 0x00000064 },
+ { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000064 },
+ { 0x00600040, 0x218077bd, 0x008d0180, 0x0000007c },
+ { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000007c },
+ { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
+ { 0x00800031, 0x21c01d29, 0x008d0000, 0x02580001 },
+ { 0x00600001, 0x22a00021, 0x008d02a0, 0x00000000 },
+ { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
+ { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
+ { 0x00600041, 0x214077bd, 0x008d0140, 0x00000080 },
+ { 0x00600041, 0x216077bd, 0x008d0160, 0x00000080 },
+ { 0x00600040, 0x214077bd, 0x008d0140, 0x0000008c },
+ { 0x00600040, 0x216077bd, 0x008d0160, 0x0000008c },
+ { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
+ { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
+ { 0x00600041, 0x218077bd, 0x008d0180, 0x00000084 },
+ { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000084 },
+ { 0x00600040, 0x218077bd, 0x008d0180, 0x0000009c },
+ { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000009c },
+ { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
+ { 0x00800031, 0x22c01d29, 0x008d0000, 0x02580102 },
+ { 0x00600001, 0x23a00021, 0x008d03a0, 0x00000000 },
+ { 0x00600041, 0x21c077bd, 0x008d02c0, 0x008d0280 },
+ { 0x00600041, 0x21e077bd, 0x008d02e0, 0x008d02a0 },
+ { 0x00600041, 0x220077bd, 0x008d0300, 0x008d0280 },
+ { 0x00600041, 0x222077bd, 0x008d0320, 0x008d02a0 },
+ { 0x00600041, 0x224077bd, 0x008d0340, 0x008d0280 },
+ { 0x00600041, 0x226077bd, 0x008d0360, 0x008d02a0 },
+ { 0x00600041, 0x228077bd, 0x008d0380, 0x008d0280 },
+ { 0x00600041, 0x22a077bd, 0x008d03a0, 0x008d02a0 },
+ { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
+ { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
+ { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
+ { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
+ { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
+ { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_masknoca.g4a b/src/exa_wm_masknoca.g4a
index 2e9e3c9a..44f69539 100644
--- a/src/exa_wm_masknoca.g4a
+++ b/src/exa_wm_masknoca.g4a
@@ -58,44 +58,44 @@
/* Set up ss0.x coordinates*/
mov (1) g6<1>F g1.8<0,1,0>UW { align1 };
-add (1) g6.4<1>F g1.8<0,1,0>UW 1UB { align1 };
+add (1) g6.4<1>F g1.8<0,1,0>UW 1UD { align1 };
mov (1) g6.8<1>F g1.8<0,1,0>UW { align1 };
-add (1) g6.12<1>F g1.8<0,1,0>UW 1UB { align1 };
+add (1) g6.12<1>F g1.8<0,1,0>UW 1UD { align1 };
/* Set up ss0.y coordinates */
mov (1) g8<1>F g1.10<0,1,0>UW { align1 };
mov (1) g8.4<1>F g1.10<0,1,0>UW { align1 };
-add (1) g8.8<1>F g1.10<0,1,0>UW 1UB { align1 };
-add (1) g8.12<1>F g1.10<0,1,0>UW 1UB { align1 };
+add (1) g8.8<1>F g1.10<0,1,0>UW 1UD { align1 };
+add (1) g8.12<1>F g1.10<0,1,0>UW 1UD { align1 };
/* set up ss1.x coordinates */
mov (1) g6.16<1>F g1.12<0,1,0>UW { align1 };
-add (1) g6.20<1>F g1.12<0,1,0>UW 1UB { align1 };
+add (1) g6.20<1>F g1.12<0,1,0>UW 1UD { align1 };
mov (1) g6.24<1>F g1.12<0,1,0>UW { align1 };
-add (1) g6.28<1>F g1.12<0,1,0>UW 1UB { align1 };
+add (1) g6.28<1>F g1.12<0,1,0>UW 1UD { align1 };
/* set up ss1.y coordinates */
mov (1) g8.16<1>F g1.14<0,1,0>UW { align1 };
mov (1) g8.20<1>F g1.14<0,1,0>UW { align1 };
-add (1) g8.24<1>F g1.14<0,1,0>UW 1UB { align1 };
-add (1) g8.28<1>F g1.14<0,1,0>UW 1UB { align1 };
+add (1) g8.24<1>F g1.14<0,1,0>UW 1UD { align1 };
+add (1) g8.28<1>F g1.14<0,1,0>UW 1UD { align1 };
/* Set up ss2.x coordinates */
mov (1) g7<1>F g1.16<0,1,0>UW { align1 };
-add (1) g7.4<1>F g1.16<0,1,0>UW 1UB { align1 };
+add (1) g7.4<1>F g1.16<0,1,0>UW 1UD { align1 };
mov (1) g7.8<1>F g1.16<0,1,0>UW { align1 };
-add (1) g7.12<1>F g1.16<0,1,0>UW 1UB { align1 };
+add (1) g7.12<1>F g1.16<0,1,0>UW 1UD { align1 };
/* Set up ss2.y coordinates */
mov (1) g9<1>F g1.18<0,1,0>UW { align1 };
mov (1) g9.4<1>F g1.18<0,1,0>UW { align1 };
-add (1) g9.8<1>F g1.18<0,1,0>UW 1UB { align1 };
-add (1) g9.12<1>F g1.18<0,1,0>UW 1UB { align1 };
+add (1) g9.8<1>F g1.18<0,1,0>UW 1UD { align1 };
+add (1) g9.12<1>F g1.18<0,1,0>UW 1UD { align1 };
/* Set up ss3.x coordinates */
mov (1) g7.16<1>F g1.20<0,1,0>UW { align1 };
-add (1) g7.20<1>F g1.20<0,1,0>UW 1UB { align1 };
+add (1) g7.20<1>F g1.20<0,1,0>UW 1UD { align1 };
mov (1) g7.24<1>F g1.20<0,1,0>UW { align1 };
-add (1) g7.28<1>F g1.20<0,1,0>UW 1UB { align1 };
+add (1) g7.28<1>F g1.20<0,1,0>UW 1UD { align1 };
/* Set up ss3.y coordinates */
mov (1) g9.16<1>F g1.22<0,1,0>UW { align1 };
mov (1) g9.20<1>F g1.22<0,1,0>UW { align1 };
-add (1) g9.24<1>F g1.22<0,1,0>UW 1UB { align1 };
-add (1) g9.28<1>F g1.22<0,1,0>UW 1UB { align1 };
+add (1) g9.24<1>F g1.22<0,1,0>UW 1UD { align1 };
+add (1) g9.28<1>F g1.22<0,1,0>UW 1UD { align1 };
/* Now, map these screen space coordinates into texture coordinates. */
/* This is for src texture */
diff --git a/src/exa_wm_masknoca.g4b b/src/exa_wm_masknoca.g4b
new file mode 100644
index 00000000..5fcf3b52
--- /dev/null
+++ b/src/exa_wm_masknoca.g4b
@@ -0,0 +1,95 @@
+ { 0x00000001, 0x20c0013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20c40d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x20c8013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20cc0d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x2100013d, 0x0000002a, 0x00000000 },
+ { 0x00000001, 0x2104013d, 0x0000002a, 0x00000000 },
+ { 0x00000040, 0x21080d3d, 0x0000002a, 0x00000001 },
+ { 0x00000040, 0x210c0d3d, 0x0000002a, 0x00000001 },
+ { 0x00000001, 0x20d0013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20d40d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x20d8013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20dc0d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x2110013d, 0x0000002e, 0x00000000 },
+ { 0x00000001, 0x2114013d, 0x0000002e, 0x00000000 },
+ { 0x00000040, 0x21180d3d, 0x0000002e, 0x00000001 },
+ { 0x00000040, 0x211c0d3d, 0x0000002e, 0x00000001 },
+ { 0x00000001, 0x20e0013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20e40d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20e8013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20ec0d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x2120013d, 0x00000032, 0x00000000 },
+ { 0x00000001, 0x2124013d, 0x00000032, 0x00000000 },
+ { 0x00000040, 0x21280d3d, 0x00000032, 0x00000001 },
+ { 0x00000040, 0x212c0d3d, 0x00000032, 0x00000001 },
+ { 0x00000001, 0x20f0013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20f40d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20f8013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20fc0d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x2130013d, 0x00000036, 0x00000000 },
+ { 0x00000001, 0x2134013d, 0x00000036, 0x00000000 },
+ { 0x00000040, 0x21380d3d, 0x00000036, 0x00000001 },
+ { 0x00000040, 0x213c0d3d, 0x00000036, 0x00000001 },
+ { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
+ { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
+ { 0x00600041, 0x214077bd, 0x008d0140, 0x00000060 },
+ { 0x00600041, 0x216077bd, 0x008d0160, 0x00000060 },
+ { 0x00600040, 0x214077bd, 0x008d0140, 0x0000006c },
+ { 0x00600040, 0x216077bd, 0x008d0160, 0x0000006c },
+ { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
+ { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
+ { 0x00600041, 0x218077bd, 0x008d0180, 0x00000064 },
+ { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000064 },
+ { 0x00600040, 0x218077bd, 0x008d0180, 0x0000007c },
+ { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000007c },
+ { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
+ { 0x00800031, 0x21c01d29, 0x008d0000, 0x02580001 },
+ { 0x00600001, 0x22a00021, 0x008d02a0, 0x00000000 },
+ { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
+ { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
+ { 0x00600041, 0x214077bd, 0x008d0140, 0x00000080 },
+ { 0x00600041, 0x216077bd, 0x008d0160, 0x00000080 },
+ { 0x00600040, 0x214077bd, 0x008d0140, 0x0000008c },
+ { 0x00600040, 0x216077bd, 0x008d0160, 0x0000008c },
+ { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
+ { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
+ { 0x00600041, 0x218077bd, 0x008d0180, 0x00000084 },
+ { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000084 },
+ { 0x00600040, 0x218077bd, 0x008d0180, 0x0000009c },
+ { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000009c },
+ { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
+ { 0x00800031, 0x22c01d29, 0x008d0000, 0x02580102 },
+ { 0x00600001, 0x23a00021, 0x008d03a0, 0x00000000 },
+ { 0x00600041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
+ { 0x00600041, 0x21e077bd, 0x008d01e0, 0x008d03a0 },
+ { 0x00600041, 0x220077bd, 0x008d0200, 0x008d0380 },
+ { 0x00600041, 0x222077bd, 0x008d0220, 0x008d03a0 },
+ { 0x00600041, 0x224077bd, 0x008d0240, 0x008d0380 },
+ { 0x00600041, 0x226077bd, 0x008d0260, 0x008d03a0 },
+ { 0x00600041, 0x228077bd, 0x008d0280, 0x008d0380 },
+ { 0x00600041, 0x22a077bd, 0x008d02a0, 0x008d03a0 },
+ { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
+ { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
+ { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
+ { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
+ { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
+ { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_noca.g4a b/src/exa_wm_noca.g4a
new file mode 100644
index 00000000..7dd12247
--- /dev/null
+++ b/src/exa_wm_noca.g4a
@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/*
+ * Composite src and mask together, no component alpha
+ */
+
+include(`exa_wm.g4i')
+/* mul mask's alpha channel to src */
+
+mul (16) src_sample0<1>F src_sample0<8,8,1>F mask_sample6<8,8,1>F { compr align1 };
+mul (16) src_sample2<1>F src_sample2<8,8,1>F mask_sample6<8,8,1>F { compr align1 };
+mul (16) src_sample4<1>F src_sample4<8,8,1>F mask_sample6<8,8,1>F { compr align1 };
+mul (16) src_sample6<1>F src_sample6<8,8,1>F mask_sample6<8,8,1>F { compr align1 };
diff --git a/src/exa_wm_noca.g4b b/src/exa_wm_noca.g4b
new file mode 100644
index 00000000..ba01d1a5
--- /dev/null
+++ b/src/exa_wm_noca.g4b
@@ -0,0 +1,4 @@
+ { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0400 },
+ { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0400 },
+ { 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d0400 },
+ { 0x00802041, 0x230077bd, 0x008d0300, 0x008d0400 },
diff --git a/src/exa_wm_projective.g4i b/src/exa_wm_projective.g4i
new file mode 100644
index 00000000..13da99c8
--- /dev/null
+++ b/src/exa_wm_projective.g4i
@@ -0,0 +1,51 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/********** Compute w *************/
+
+mul (16) temp_x<1>F dst_x<8,8,1>F dw_dx { compr align1 };
+mul (16) temp_y<1>F dst_y<8,8,1>F dw_dy { compr align1 };
+add (16) temp_x<1>F temp_x<8,8,1>F temp_y<8,8,1>F { compr align1 };
+add (16) temp_x<1>F temp_x<8,8,1>F wo { compr align1 };
+send (8) 0 w_0<1>F temp_x_0<8,8,1>F math inv mlen 1 rlen 1 { compr align1 };
+send (8) 0 w_1<1>F temp_x_1<8,8,1>F math inv mlen 1 rlen 1 { compr align1 };
+
+/********** Compute u *************/
+
+mul (16) temp_x<1>F dst_x<8,8,1>F du_dx { compr align1 };
+mul (16) temp_y<1>F dst_y<8,8,1>F du_dy { compr align1 };
+add (16) temp_x<1>F temp_x<8,8,1>F temp_y<8,8,1>F { compr align1 };
+add (16) temp_x<1>F temp_x<8,8,1>F uo { compr align1 };
+mul (16) u<1>F temp_x<8,8,1>F w<8,8,1>F { compr align1 };
+
+/********** Compute v *************/
+
+mul (16) temp_x<1>F dst_x<8,8,1>F dv_dx { compr align1 };
+mul (16) temp_y<1>F dst_y<8,8,1>F dv_dy { compr align1 };
+add (16) temp_x<1>F temp_x<8,8,1>F temp_y<8,8,1>F { compr align1 };
+add (16) temp_x<1>F temp_x<8,8,1>F vo { compr align1 };
+mul (16) v<1>F temp_x<8,8,1>F w<8,8,1>F { compr align1 };
diff --git a/src/exa_wm_src_affine.g4a b/src/exa_wm_src_affine.g4a
new file mode 100644
index 00000000..3bf87179
--- /dev/null
+++ b/src/exa_wm_src_affine.g4a
@@ -0,0 +1,41 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/*
+ * Fragment to compute src u/v values under an affine transform
+ */
+
+include(`exa_wm.g4i')
+define(`du_dx', `src_du_dx')
+define(`du_dy', `src_du_dy')
+define(`uo', `src_uo')
+define(`dv_dx', `src_dv_dx')
+define(`dv_dy', `src_dv_dy')
+define(`vo', `src_vo')
+define(`u', `src_u')
+define(`v', `src_v')
+include(`exa_wm_affine.g4i')
diff --git a/src/exa_wm_src_affine.g4b b/src/exa_wm_src_affine.g4b
new file mode 100644
index 00000000..f18ea1ee
--- /dev/null
+++ b/src/exa_wm_src_affine.g4b
@@ -0,0 +1,8 @@
+ { 0x00802041, 0x218077bd, 0x008d0100, 0x00000060 },
+ { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000064 },
+ { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
+ { 0x00802040, 0x202077be, 0x008d0180, 0x0000006c },
+ { 0x00802041, 0x218077bd, 0x008d0100, 0x00000070 },
+ { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000074 },
+ { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
+ { 0x00802040, 0x206077be, 0x008d0180, 0x0000007c },
diff --git a/src/exa_wm_src_projective.g4a b/src/exa_wm_src_projective.g4a
new file mode 100644
index 00000000..6bd2d6a4
--- /dev/null
+++ b/src/exa_wm_src_projective.g4a
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+
+include(`exa_wm.g4i')
+define(`du_dx', `src_du_dx')
+define(`du_dy', `src_du_dy')
+define(`uo', `src_uo')
+define(`dv_dx', `src_dv_dx')
+define(`dv_dy', `src_dv_dy')
+define(`vo', `src_vo')
+define(`dw_dx', `src_dw_dx')
+define(`dw_dy', `src_dw_dy')
+define(`wo', `src_wo')
+define(`u', `src_u')
+define(`v', `src_v')
+define(`w', `src_w')
+define(`w_0', `src_w_0')
+define(`w_1', `src_w_1')
+
+include(`exa_wm_projective.g4i')
diff --git a/src/exa_wm_src_projective.g4b b/src/exa_wm_src_projective.g4b
new file mode 100644
index 00000000..68bfc920
--- /dev/null
+++ b/src/exa_wm_src_projective.g4b
@@ -0,0 +1,16 @@
+ { 0x00802041, 0x218077bd, 0x008d0100, 0x00000080 },
+ { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000084 },
+ { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
+ { 0x00802040, 0x218077bd, 0x008d0180, 0x0000008c },
+ { 0x00600031, 0x22001fbd, 0x008d0180, 0x01110001 },
+ { 0x00600031, 0x22201fbd, 0x008d01a0, 0x01110001 },
+ { 0x00802041, 0x218077bd, 0x008d0100, 0x00000060 },
+ { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000064 },
+ { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
+ { 0x00802040, 0x218077bd, 0x008d0180, 0x0000006c },
+ { 0x00802041, 0x202077be, 0x008d0180, 0x008d0200 },
+ { 0x00802041, 0x218077bd, 0x008d0100, 0x00000070 },
+ { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000074 },
+ { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 },
+ { 0x00802040, 0x218077bd, 0x008d0180, 0x0000007c },
+ { 0x00802041, 0x206077be, 0x008d0180, 0x008d0200 },
diff --git a/src/exa_wm_src_sample.g4a b/src/exa_wm_src_sample.g4a
new file mode 100644
index 00000000..04cd3e3d
--- /dev/null
+++ b/src/exa_wm_src_sample.g4a
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/* Sample the src surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* m0 will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) 0 /* msg reg index */
+ src_sample0<1>UW /* readback */
+ g0<8,8,1>UW /* copy to msg start reg*/
+ sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
+
+// mov (8) src_sample7<1>UD src_sample7<8,8,1>UD { align1 }; /* wait sampler return */
+
+/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */
+
diff --git a/src/exa_wm_src_sample.g4b b/src/exa_wm_src_sample.g4b
new file mode 100644
index 00000000..5ca33f5a
--- /dev/null
+++ b/src/exa_wm_src_sample.g4b
@@ -0,0 +1 @@
+ { 0x00800031, 0x22401d29, 0x008d0000, 0x02580001 },
diff --git a/src/exa_wm_write.g4a b/src/exa_wm_write.g4a
new file mode 100644
index 00000000..9a821d72
--- /dev/null
+++ b/src/exa_wm_write.g4a
@@ -0,0 +1,80 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/*
+ * Once the data are ready, write them to the destination
+ */
+
+include(`exa_wm.g4i')
+
+/* m0, m1 are all direct passed by PS thread payload */
+mov (8) m1<1>F g1<8,8,1>F { align1 };
+
+/* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */
+/* src_sample0 -> m2
+ src_sample1 -> m6
+ src_sample2 -> m3
+ src_sample3 -> m7
+ src_sample4 -> m4
+ src_sample5 -> m8
+ src_sample6 -> m5
+ src_sample7 -> m9
+*/
+
+mov (8) m2<1>F src_sample0<8,8,1>F { align1 };
+mov (8) m3<1>F src_sample2<8,8,1>F { align1 };
+mov (8) m4<1>F src_sample4<8,8,1>F { align1 };
+mov (8) m5<1>F src_sample6<8,8,1>F { align1 };
+mov (8) m6<1>F src_sample1<8,8,1>F { align1 };
+mov (8) m7<1>F src_sample3<8,8,1>F { align1 };
+mov (8) m8<1>F src_sample5<8,8,1>F { align1 };
+mov (8) m9<1>F src_sample7<8,8,1>F { align1 };
+
+/* m0, m1 are all direct passed by PS thread payload */
+mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
+
+/* write */
+send (16) 0 acc0<1>UW g0<8,8,1>UW write (
+ 0, /* binding_table */
+ 8, /* pixel scordboard clear, msg type simd16 single source */
+ 4, /* render target write */
+ 0 /* no write commit message */
+ )
+ mlen 10
+ rlen 0
+ { align1 EOT };
+
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+
diff --git a/src/exa_wm_write.g4b b/src/exa_wm_write.g4b
new file mode 100644
index 00000000..dd266a3e
--- /dev/null
+++ b/src/exa_wm_write.g4b
@@ -0,0 +1,20 @@
+ { 0x00600001, 0x202003be, 0x008d0020, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0280, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d02c0, 0x00000000 },
+ { 0x00600001, 0x20a003be, 0x008d0300, 0x00000000 },
+ { 0x00600001, 0x20c003be, 0x008d0260, 0x00000000 },
+ { 0x00600001, 0x20e003be, 0x008d02a0, 0x00000000 },
+ { 0x00600001, 0x210003be, 0x008d02e0, 0x00000000 },
+ { 0x00600001, 0x212003be, 0x008d0320, 0x00000000 },
+ { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
+ { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_xy.g4a b/src/exa_wm_xy.g4a
new file mode 100644
index 00000000..e99f5ac1
--- /dev/null
+++ b/src/exa_wm_xy.g4a
@@ -0,0 +1,52 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Keith Packard <keithp@keithp.com>
+ */
+
+/*
+ * Register assignments:
+ *
+ * x g6/g7
+ * y g8/g9
+ *
+ * temp x g10/g11
+ * temp y g12/g13
+ *
+ * src w g14/g15
+ * src u m1/m2
+ * src v m3/m4
+ */
+
+/* Fragment to compute per-pixel XY values */
+
+include(`exa_wm.g4i')
+
+ /* Load X and Y coordinates and compute per-pixel coordinates */
+add (16) temp_x_uw<1>UW dst_x_uw 0x10101010V { align1 };
+add (16) temp_y_uw<1>UW dst_y_uw 0x11001100V { align1 };
+
+ /* subtract screen-space origin of vertex 0 */
+add (16) dst_x<1>F temp_x_uw<8,8,1>UW -screen_x0 { compr align1 };
+add (16) dst_y<1>F temp_y_uw<8,8,1>UW -screen_y0 { compr align1 };
diff --git a/src/exa_wm_xy.g4b b/src/exa_wm_xy.g4b
new file mode 100644
index 00000000..7784a3d1
--- /dev/null
+++ b/src/exa_wm_xy.g4b
@@ -0,0 +1,4 @@
+ { 0x00800040, 0x21806d29, 0x00480028, 0x10101010 },
+ { 0x00800040, 0x21c06d29, 0x0048002a, 0x11001100 },
+ { 0x00802040, 0x2100753d, 0x008d0180, 0x00004020 },
+ { 0x00802040, 0x2140753d, 0x008d01c0, 0x00004024 },
diff --git a/src/i965_render.c b/src/i965_render.c
index 26c06aa4..76687795 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -321,31 +321,68 @@ static const uint32_t sip_kernel_static[][4] = {
#define SF_MAX_THREADS 2
static const uint32_t sf_kernel_static[][4] = {
-#include "exa_sf_prog.h"
+#include "exa_sf.g4b"
};
static const uint32_t sf_kernel_static_mask[][4] = {
-#include "exa_sf_mask_prog.h"
+#include "exa_sf_mask.g4b"
};
/* ps kernels */
#define PS_KERNEL_NUM_GRF 32
#define PS_MAX_THREADS 32
-static const uint32_t ps_kernel_static_nomask [][4] = {
-#include "exa_wm_nomask_prog.h"
+static const uint32_t ps_kernel_static_nomask_affine [][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_static_nomask_projective [][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_projective.g4b"
+#include "exa_wm_src_sample.g4b"
+#include "exa_wm_write.g4b"
};
static const uint32_t ps_kernel_static_maskca [][4] = {
-#include "exa_wm_maskca_prog.h"
+#include "exa_wm_maskca.g4b"
+#if 0
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample.g4b"
+#include "exa_wm_mask_affine.g4b"
+#include "exa_wm_mask_sample.g4b"
+#include "exa_wm_ca.g4b"
+#include "exa_wm_write.g4b"
+#endif
};
static const uint32_t ps_kernel_static_maskca_srcalpha [][4] = {
-#include "exa_wm_maskca_srcalpha_prog.h"
+#include "exa_wm_maskca_srcalpha.g4b"
+#if 0
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample.g4b"
+#include "exa_wm_mask_affine.g4b"
+#include "exa_wm_mask_sample.g4b"
+#include "exa_wm_ca_srcalpha.g4b"
+#include "exa_wm_write.g4b"
+#endif
};
static const uint32_t ps_kernel_static_masknoca [][4] = {
-#include "exa_wm_masknoca_prog.h"
+#include "exa_wm_masknoca.g4b"
+#if 0
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample.g4b"
+#include "exa_wm_mask_affine.g4b"
+#include "exa_wm_mask_sample.g4b"
+#include "exa_wm_noca.g4b"
+#include "exa_wm_write.g4b"
+#endif
};
static uint32_t
@@ -374,6 +411,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
mask_tiled = 0;
uint32_t dst_format, dst_offset, dst_pitch, dst_tile_format = 0,
dst_tiled = 0;
+ Bool is_affine_src, is_affine_mask, is_affine;
IntelEmitInvarientState(pScrn);
*pI830->last_3d = LAST_3D_RENDER;
@@ -402,6 +440,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
pI830->scale_units[0][1] = pSrc->drawable.height;
pI830->transform[0] = pSrcPicture->transform;
+ is_affine_src = i830_transform_is_affine (pI830->transform[0]);
+ is_affine_mask = i830_transform_is_affine (pI830->transform[1]);
+ is_affine = is_affine_src && is_affine_mask;
if (!pMask) {
pI830->transform[1] = NULL;
@@ -460,7 +501,10 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
next_offset = ps_kernel_offset +
sizeof(ps_kernel_static_masknoca);
} else {
- next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask);
+ if (is_affine)
+ next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask_affine);
+ else
+ next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask_projective);
}
sip_kernel_offset = ALIGN(next_offset, 64);
@@ -837,8 +881,12 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
memcpy(ps_kernel, ps_kernel_static_masknoca,
sizeof (ps_kernel_static_masknoca));
} else {
- memcpy(ps_kernel, ps_kernel_static_nomask,
- sizeof (ps_kernel_static_nomask));
+ if (is_affine)
+ memcpy(ps_kernel, ps_kernel_static_nomask_affine,
+ sizeof (ps_kernel_static_nomask_affine));
+ else
+ memcpy(ps_kernel, ps_kernel_static_nomask_projective,
+ sizeof (ps_kernel_static_nomask_projective));
}
wm_state = &wm_state_local;
@@ -989,51 +1037,75 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
ADVANCE_BATCH();
}
{
- int nelem = pMask ? 3: 2;
+ /*
+ * number of extra parameters per vertex
+ */
+ int nelem = pMask ? 2: 1;
+ /*
+ * size of extra parameters:
+ * 3 for homogenous (xyzw)
+ * 2 for cartesian (xy)
+ */
+ int selem = is_affine ? 2 : 3;
+ uint32_t w_component;
+ uint32_t src_format;
+
+ if (is_affine)
+ {
+ src_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
+ w_component = BRW_VFCOMPONENT_NOSTORE;
+ }
+ else
+ {
+ src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+ w_component = BRW_VFCOMPONENT_NOSTORE;
+ }
BEGIN_BATCH(pMask?12:10);
- /* Set up the pointer to our vertex buffer */
+ /* Set up the pointer to our (single) vertex buffer */
OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3);
OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
VB0_VERTEXDATA |
- ((4 * 2 * nelem) << VB0_BUFFER_PITCH_SHIFT));
+ ((4 * (2 + nelem * selem)) << VB0_BUFFER_PITCH_SHIFT));
OUT_BATCH(state_base_offset + vb_offset);
OUT_BATCH(3);
OUT_BATCH(0); // ignore for VERTEXDATA, but still there
/* Set up our vertex elements, sourced from the single vertex buffer.
*/
- OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * nelem) - 1));
- /* vertex coordinates */
- OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
- VE0_VALID |
- (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
- (0 << VE0_OFFSET_SHIFT));
- OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
- (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
- /* u0, v0 */
+
+ OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * (1 + nelem)) - 1));
+ /* x,y */
OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
VE0_VALID |
(BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
- (8 << VE0_OFFSET_SHIFT)); /* offset vb in bytes */
- OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) |
- (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
- /* u1, v1 */
+ (0 << VE0_OFFSET_SHIFT));
+ OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+ (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
+ /* u0, v0, w0 */
+ OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ VE0_VALID |
+ (src_format << VE0_FORMAT_SHIFT) |
+ ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */
+ OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (w_component << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) |
+ ((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
+ /* u1, v1, w1 */
if (pMask) {
- OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
- VE0_VALID |
- (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
- (16 << VE0_OFFSET_SHIFT));
- OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) |
- (10 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
+ OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ VE0_VALID |
+ (src_format << VE0_FORMAT_SHIFT) |
+ (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */
+
+ OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (w_component << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) |
+ ((4 + 2 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
}
ADVANCE_BATCH();
@@ -1053,38 +1125,87 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
I830Ptr pI830 = I830PTR(pScrn);
Bool has_mask;
- float src_x[3], src_y[3], mask_x[3], mask_y[3];
+ Bool is_affine_src, is_affine_mask, is_affine;
+ float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
int i;
+ int per_vertex = 2; /* dst x/y */
- if (!i830_get_transformed_coordinates(srcX, srcY,
- pI830->transform[0],
- &src_x[0], &src_y[0]))
- return;
- if (!i830_get_transformed_coordinates(srcX, srcY + h,
- pI830->transform[0],
- &src_x[1], &src_y[1]))
- return;
- if (!i830_get_transformed_coordinates(srcX + w, srcY + h,
- pI830->transform[0],
- &src_x[2], &src_y[2]))
- return;
+ is_affine_src = i830_transform_is_affine (pI830->transform[0]);
+ is_affine_mask = i830_transform_is_affine (pI830->transform[1]);
+ is_affine = is_affine_src && is_affine_mask;
+
+ if (is_affine)
+ {
+ if (!i830_get_transformed_coordinates(srcX, srcY,
+ pI830->transform[0],
+ &src_x[0], &src_y[0]))
+ return;
+ if (!i830_get_transformed_coordinates(srcX, srcY + h,
+ pI830->transform[0],
+ &src_x[1], &src_y[1]))
+ return;
+ if (!i830_get_transformed_coordinates(srcX + w, srcY + h,
+ pI830->transform[0],
+ &src_x[2], &src_y[2]))
+ return;
+ per_vertex += 2; /* src u/v */
+ }
+ else
+ {
+ if (!i830_get_transformed_coordinates_3d(srcX, srcY,
+ pI830->transform[0],
+ &src_x[0], &src_y[0],
+ &src_w[0]))
+ return;
+ if (!i830_get_transformed_coordinates_3d(srcX, srcY + h,
+ pI830->transform[0],
+ &src_x[1], &src_y[1],
+ &src_w[1]))
+ return;
+ if (!i830_get_transformed_coordinates_3d(srcX + w, srcY + h,
+ pI830->transform[0],
+ &src_x[2], &src_y[2],
+ &src_w[2]))
+ return;
+ per_vertex += 3; /* src u/v/w */
+ }
if (pI830->scale_units[1][0] == -1 || pI830->scale_units[1][1] == -1) {
has_mask = FALSE;
} else {
has_mask = TRUE;
- if (!i830_get_transformed_coordinates(maskX, maskY,
- pI830->transform[1],
- &mask_x[0], &mask_y[0]))
- return;
- if (!i830_get_transformed_coordinates(maskX, maskY + h,
- pI830->transform[1],
- &mask_x[1], &mask_y[1]))
- return;
- if (!i830_get_transformed_coordinates(maskX + w, maskY + h,
- pI830->transform[1],
- &mask_x[2], &mask_y[2]))
- return;
+ if (is_affine_mask) {
+ if (!i830_get_transformed_coordinates(maskX, maskY,
+ pI830->transform[1],
+ &mask_x[0], &mask_y[0]))
+ return;
+ if (!i830_get_transformed_coordinates(maskX, maskY + h,
+ pI830->transform[1],
+ &mask_x[1], &mask_y[1]))
+ return;
+ if (!i830_get_transformed_coordinates(maskX + w, maskY + h,
+ pI830->transform[1],
+ &mask_x[2], &mask_y[2]))
+ return;
+ per_vertex += 2; /* mask u/v */
+ } else {
+ if (!i830_get_transformed_coordinates_3d(maskX, maskY,
+ pI830->transform[1],
+ &mask_x[0], &mask_y[0],
+ &mask_w[0]))
+ return;
+ if (!i830_get_transformed_coordinates_3d(maskX, maskY + h,
+ pI830->transform[1],
+ &mask_x[1], &mask_y[1],
+ &mask_w[1]))
+ return;
+ if (!i830_get_transformed_coordinates_3d(maskX + w, maskY + h,
+ pI830->transform[1],
+ &mask_x[2], &mask_y[2],
+ &mask_w[2]))
+ return;
+ per_vertex += 3; /* mask u/v/w */
+ }
}
/* Wait for any existing composite rectangles to land before we overwrite
@@ -1098,9 +1219,13 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
vb[i++] = (float)(dstY + h);
vb[i++] = src_x[2] / pI830->scale_units[0][0];
vb[i++] = src_y[2] / pI830->scale_units[0][1];
+ if (!is_affine)
+ vb[i++] = src_w[2];
if (has_mask) {
vb[i++] = mask_x[2] / pI830->scale_units[1][0];
vb[i++] = mask_y[2] / pI830->scale_units[1][1];
+ if (!is_affine)
+ vb[i++] = mask_w[2];
}
/* rect (x1,y2) */
@@ -1108,9 +1233,13 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
vb[i++] = (float)(dstY + h);
vb[i++] = src_x[1] / pI830->scale_units[0][0];
vb[i++] = src_y[1] / pI830->scale_units[0][1];
+ if (!is_affine)
+ vb[i++] = src_w[1];
if (has_mask) {
vb[i++] = mask_x[1] / pI830->scale_units[1][0];
vb[i++] = mask_y[1] / pI830->scale_units[1][1];
+ if (!is_affine)
+ vb[i++] = mask_w[1];
}
/* rect (x1,y1) */
@@ -1118,9 +1247,13 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
vb[i++] = (float)dstY;
vb[i++] = src_x[0] / pI830->scale_units[0][0];
vb[i++] = src_y[0] / pI830->scale_units[0][1];
+ if (!is_affine)
+ vb[i++] = src_w[0];
if (has_mask) {
vb[i++] = mask_x[0] / pI830->scale_units[1][0];
vb[i++] = mask_y[0] / pI830->scale_units[1][1];
+ if (!is_affine)
+ vb[i++] = mask_w[0];
}
{
diff --git a/src/packed_yuv_sf.g4b b/src/packed_yuv_sf.g4b
new file mode 100644
index 00000000..830d1760
--- /dev/null
+++ b/src/packed_yuv_sf.g4b
@@ -0,0 +1,17 @@
+ { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
+ { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
+ { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 },
+ { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 },
+ { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 },
+ { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
+ { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/packed_yuv_wm.g4a b/src/packed_yuv_wm.g4a
index 5e31f106..9e635ba8 100644
--- a/src/packed_yuv_wm.g4a
+++ b/src/packed_yuv_wm.g4a
@@ -49,44 +49,44 @@
/* Set up ss0.x coordinates*/
mov (1) g4<1>F g1.8<0,1,0>UW { align1 };
-add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 };
+add (1) g4.4<1>F g1.8<0,1,0>UW 1UD { align1 };
mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 };
-add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 };
+add (1) g4.12<1>F g1.8<0,1,0>UW 1UD { align1 };
/* Set up ss0.y coordinates */
mov (1) g6<1>F g1.10<0,1,0>UW { align1 };
mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 };
-add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 };
-add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 };
+add (1) g6.8<1>F g1.10<0,1,0>UW 1UD { align1 };
+add (1) g6.12<1>F g1.10<0,1,0>UW 1UD { align1 };
/* set up ss1.x coordinates */
mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 };
-add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 };
+add (1) g4.20<1>F g1.12<0,1,0>UW 1UD { align1 };
mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 };
-add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 };
+add (1) g4.28<1>F g1.12<0,1,0>UW 1UD { align1 };
/* set up ss1.y coordinates */
mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 };
mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 };
-add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 };
-add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 };
+add (1) g6.24<1>F g1.14<0,1,0>UW 1UD { align1 };
+add (1) g6.28<1>F g1.14<0,1,0>UW 1UD { align1 };
/* Set up ss2.x coordinates */
mov (1) g5<1>F g1.16<0,1,0>UW { align1 };
-add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 };
+add (1) g5.4<1>F g1.16<0,1,0>UW 1UD { align1 };
mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 };
-add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 };
+add (1) g5.12<1>F g1.16<0,1,0>UW 1UD { align1 };
/* Set up ss2.y coordinates */
mov (1) g7<1>F g1.18<0,1,0>UW { align1 };
mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 };
-add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 };
-add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 };
+add (1) g7.8<1>F g1.18<0,1,0>UW 1UD { align1 };
+add (1) g7.12<1>F g1.18<0,1,0>UW 1UD { align1 };
/* Set up ss3.x coordinates */
mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 };
-add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 };
+add (1) g5.20<1>F g1.20<0,1,0>UW 1UD { align1 };
mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 };
-add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 };
+add (1) g5.28<1>F g1.20<0,1,0>UW 1UD { align1 };
/* Set up ss3.y coordinates */
mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 };
mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 };
-add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 };
-add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 };
+add (1) g7.24<1>F g1.22<0,1,0>UW 1UD { align1 };
+add (1) g7.28<1>F g1.22<0,1,0>UW 1UD { align1 };
/* Now, map these screen space coordinates into texture coordinates. */
/* subtract screen-space X origin of vertex 0. */
diff --git a/src/packed_yuv_wm.g4b b/src/packed_yuv_wm.g4b
new file mode 100644
index 00000000..d72c6510
--- /dev/null
+++ b/src/packed_yuv_wm.g4b
@@ -0,0 +1,82 @@
+ { 0x00000001, 0x2080013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20840d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x2088013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x208c0d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x20c0013d, 0x0000002a, 0x00000000 },
+ { 0x00000001, 0x20c4013d, 0x0000002a, 0x00000000 },
+ { 0x00000040, 0x20c80d3d, 0x0000002a, 0x00000001 },
+ { 0x00000040, 0x20cc0d3d, 0x0000002a, 0x00000001 },
+ { 0x00000001, 0x2090013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20940d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x2098013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x209c0d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x20d0013d, 0x0000002e, 0x00000000 },
+ { 0x00000001, 0x20d4013d, 0x0000002e, 0x00000000 },
+ { 0x00000040, 0x20d80d3d, 0x0000002e, 0x00000001 },
+ { 0x00000040, 0x20dc0d3d, 0x0000002e, 0x00000001 },
+ { 0x00000001, 0x20a0013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20a40d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20a8013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20ac0d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20e0013d, 0x00000032, 0x00000000 },
+ { 0x00000001, 0x20e4013d, 0x00000032, 0x00000000 },
+ { 0x00000040, 0x20e80d3d, 0x00000032, 0x00000001 },
+ { 0x00000040, 0x20ec0d3d, 0x00000032, 0x00000001 },
+ { 0x00000001, 0x20b0013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20b40d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20b8013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20bc0d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20f0013d, 0x00000036, 0x00000000 },
+ { 0x00000001, 0x20f4013d, 0x00000036, 0x00000000 },
+ { 0x00000040, 0x20f80d3d, 0x00000036, 0x00000001 },
+ { 0x00000040, 0x20fc0d3d, 0x00000036, 0x00000001 },
+ { 0x00600040, 0x208077bd, 0x008d0080, 0x00004020 },
+ { 0x00600040, 0x20a077bd, 0x008d00a0, 0x00004020 },
+ { 0x00600041, 0x208077bd, 0x008d0080, 0x00000060 },
+ { 0x00600041, 0x20a077bd, 0x008d00a0, 0x00000060 },
+ { 0x00600040, 0x208077bd, 0x008d0080, 0x0000006c },
+ { 0x00600040, 0x20a077bd, 0x008d00a0, 0x0000006c },
+ { 0x00600040, 0x20c077bd, 0x008d00c0, 0x00004024 },
+ { 0x00600040, 0x20e077bd, 0x008d00e0, 0x00004024 },
+ { 0x00600041, 0x20c077bd, 0x008d00c0, 0x00000074 },
+ { 0x00600041, 0x20e077bd, 0x008d00e0, 0x00000074 },
+ { 0x00600040, 0x20c077bd, 0x008d00c0, 0x0000007c },
+ { 0x00600040, 0x20e077bd, 0x008d00e0, 0x0000007c },
+ { 0x00600001, 0x202003be, 0x008d0080, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d00a0, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d00c0, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d00e0, 0x00000000 },
+ { 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 },
+ { 0x00600001, 0x22600129, 0x008d0260, 0x00000000 },
+ { 0x00600040, 0x21c07fbd, 0x008d01c0, 0xbd808081 },
+ { 0x00600040, 0x21807fbd, 0x008d0180, 0xbf008084 },
+ { 0x00600040, 0x22007fbd, 0x008d0200, 0xbf008084 },
+ { 0x00600041, 0x21c07fbd, 0x008d01c0, 0x3f94fdf4 },
+ { 0x00600041, 0x20007fbc, 0x008d0180, 0x3fcc49ba },
+ { 0x80600048, 0x20407fbe, 0x008d01c0, 0x3f800000 },
+ { 0x00600041, 0x20007fbc, 0x008d0180, 0xbf5020c5 },
+ { 0x00600048, 0x20007fbc, 0x008d0200, 0xbec8b439 },
+ { 0x80600048, 0x20607fbe, 0x008d01c0, 0x3f800000 },
+ { 0x00600041, 0x20007fbc, 0x008d0200, 0x40011687 },
+ { 0x80600048, 0x20807fbe, 0x008d01c0, 0x3f800000 },
+ { 0x00600040, 0x21e07fbd, 0x008d01e0, 0xbd808081 },
+ { 0x00600040, 0x21a07fbd, 0x008d01a0, 0xbf008084 },
+ { 0x00600040, 0x22207fbd, 0x008d0220, 0xbf008084 },
+ { 0x00600041, 0x21e07fbd, 0x008d01e0, 0x3f94fdf4 },
+ { 0x00600041, 0x20007fbc, 0x008d01a0, 0x3fcc49ba },
+ { 0x80600048, 0x20c07fbe, 0x008d01e0, 0x3f800000 },
+ { 0x00600041, 0x20007fbc, 0x008d01a0, 0xbf5020c5 },
+ { 0x00600048, 0x20007fbc, 0x008d0220, 0xbec8b439 },
+ { 0x80600048, 0x20e07fbe, 0x008d01e0, 0x3f800000 },
+ { 0x00600041, 0x20007fbc, 0x008d0220, 0x40011687 },
+ { 0x80600048, 0x21007fbe, 0x008d01e0, 0x3f800000 },
+ { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
+ { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },