summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWang Zhenyu <zhenyu.z.wang@intel.com>2007-03-21 14:50:45 +0800
committerWang Zhenyu <zhenyu.z.wang@intel.com>2007-03-21 14:50:45 +0800
commit3025fa0fb2bf5ace7076796e45e2560fe8410e8d (patch)
treecc4ff8d73dd734b85711cc39a6b393ad9e6afc7f
parent223944878cf38f86580df5a7d3102d86cfc061b9 (diff)
EXA: try to enable rotation for G965
The new sf/wm should handle the texture sampling only in rotated case. Also fix possible hole in VUE slot.
-rw-r--r--src/Makefile.am14
-rw-r--r--src/exa_sf_rotation.g4a29
-rw-r--r--src/exa_sf_rotation_prog.h20
-rw-r--r--src/exa_wm_rotation.g4a158
-rw-r--r--src/exa_wm_rotation_prog.h70
-rw-r--r--src/i965_render.c48
6 files changed, 333 insertions, 6 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index c65c1e73..bd64f503 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -120,20 +120,24 @@ INTEL_G4A = \
packed_yuv_wm.g4a \
exa_sf.g4a \
exa_sf_mask.g4a \
+ exa_sf_rotation.g4a \
exa_wm_maskca.g4a \
exa_wm_maskca_srcalpha.g4a \
exa_wm_masknoca.g4a \
- exa_wm_nomask.g4a
+ exa_wm_nomask.g4a \
+ exa_wm_rotation.g4a
INTEL_G4H = \
sf_prog.h \
wm_prog.h \
exa_sf_mask_prog.h \
exa_sf_prog.h \
+ exa_sf_rotation_prog.h \
exa_wm_maskca_prog.h \
exa_wm_maskca_srcalpha_prog.h \
exa_wm_masknoca_prog.h \
- exa_wm_nomask_prog.h
+ exa_wm_nomask_prog.h \
+ exa_wm_rotation_prog.h
EXTRA_DIST = \
$(XMODE_SRCS) \
@@ -154,6 +158,9 @@ exa_sf_mask_prog.h: exa_sf_mask.g4a
exa_sf_prog.h: exa_sf.g4a
intel-gen4asm -o exa_sf_prog.h exa_sf.g4a
+
+exa_sf_rotation_prog.h: exa_sf_rotation.g4a
+ intel-gen4asm -o exa_sf_rotation_prog.h exa_sf_rotation.g4a
exa_wm_maskca_prog.h: exa_wm_maskca.g4a
intel-gen4asm -o exa_wm_maskca_prog.h exa_wm_maskca.g4a
@@ -166,6 +173,9 @@ exa_wm_masknoca_prog.h: exa_wm_masknoca.g4a
exa_wm_nomask_prog.h: exa_wm_nomask.g4a
intel-gen4asm -o exa_wm_nomask_prog.h exa_wm_nomask.g4a
+
+exa_wm_rotation_prog.h: exa_wm_rotation.g4a
+ intel-gen4asm -o exa_wm_rotation_prog.h exa_wm_rotation.g4a
endif
diff --git a/src/exa_sf_rotation.g4a b/src/exa_sf_rotation.g4a
new file mode 100644
index 00000000..4c93553c
--- /dev/null
+++ b/src/exa_sf_rotation.g4a
@@ -0,0 +1,29 @@
+/* 1/dx */
+send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+/* 1/dy */
+send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+/* du, dv */
+mul (1) g7<1>F g3<0,1,0>F -1.0F { align1 };
+mul (1) g7.4<1>F g3.4<0,1,0>F -1.0F { align1 };
+add (1) g7<1>F g4<0,1,0>F g7<0,1,0>F { align1 };
+add (1) g7.4<1>F g4.4<0,1,0>F g7.4<0,1,0>F { align1 };
+
+/* du/dy */
+mul (1) g7<1>F g7<0,1,0>F g6.4<0,1,0>F { align1 };
+/* dv/dx */
+mul (1) g7.4<1>F g7.4<0,1,0>F g6<0,1,0>F { align1 };
+/* Cx */
+mov (8) m1<1>F g7<0,1,0>F { align1 };
+/* Cy */
+mov (8) m2<1>F g7.4<0,1,0>F { align1 };
+/* Co */
+mov (8) m3<1>F g3<8,8,1>F { align1 };
+send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT };
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/src/exa_sf_rotation_prog.h b/src/exa_sf_rotation_prog.h
new file mode 100644
index 00000000..95891305
--- /dev/null
+++ b/src/exa_sf_rotation_prog.h
@@ -0,0 +1,20 @@
+ { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
+ { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
+ { 0x00000041, 0x20e07fbd, 0x00000060, 0xbf800000 },
+ { 0x00000041, 0x20e47fbd, 0x00000064, 0xbf800000 },
+ { 0x00000040, 0x20e077bd, 0x00000080, 0x000000e0 },
+ { 0x00000040, 0x20e477bd, 0x00000084, 0x000000e4 },
+ { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c4 },
+ { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c0 },
+ { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
+ { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_rotation.g4a b/src/exa_wm_rotation.g4a
new file mode 100644
index 00000000..b12f81c1
--- /dev/null
+++ b/src/exa_wm_rotation.g4a
@@ -0,0 +1,158 @@
+/*
+ * This's for exa composite operation in no mask picture case.
+ * The simplest case is just sending what src picture has to dst picture.
+ */
+
+/* I think this should be same as in g4a program for texture video,
+ as we also use 16-pixel dispatch. and SF scale in g3 is useful for us. */
+
+/* The initial payload of the thread is always g0.
+ * WM_URB (incoming URB entries) is g3
+ * X0_R is g4
+ * X1_R is g5
+ * Y0_R is g6
+ * Y1_R is g7
+ */
+
+ /* Set up the X/Y screen coordinates of the pixels in our 4 subspans. Each
+ * subspan is a 2x2 rectangle, and the screen x/y of the upper left of each
+ * subspan are given in GRF register 1.2 through 1.5 (which, with the word
+ * addressing below, are 1.4 through 1.11).
+ *
+ * The result is WM_X*_R and WM_Y*R being:
+ *
+ * X0: {ss0.x, ss0.x+1, ss0.x, ss0.x+1, ss1.x, ss1.x+1, ss1.x, ss1.x+y}
+ * Y0: {ss0.y, ss0.y, ss0.y+1, ss0.y+1, ss1.y, ss1.y, ss1.y+1, ss1.y+1}
+ * X1: {ss2.x, ss2.x+1, ss2.x, ss2.x+1, ss3.x, ss3.x+1, ss3.x, ss3.x+y}
+ * Y1: {ss2.y, ss2.y, ss2.y+1, ss2.y+1, ss3.y, ss3.y, ss3.y+1, ss3.y+1}
+ */
+ /* Set up ss0.x coordinates*/
+mov (1) g4<1>F g1.8<0,1,0>UW { align1 };
+add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 };
+mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 };
+add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 };
+ /* Set up ss0.y coordinates */
+mov (1) g6<1>F g1.10<0,1,0>UW { align1 };
+mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 };
+add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 };
+add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 };
+ /* set up ss1.x coordinates */
+mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 };
+add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 };
+mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 };
+add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 };
+ /* set up ss1.y coordinates */
+mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 };
+mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 };
+add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 };
+add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.x coordinates */
+mov (1) g5<1>F g1.16<0,1,0>UW { align1 };
+add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 };
+mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 };
+add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.y coordinates */
+mov (1) g7<1>F g1.18<0,1,0>UW { align1 };
+mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 };
+add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 };
+add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.x coordinates */
+mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 };
+add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 };
+mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 };
+add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.y coordinates */
+mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 };
+mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 };
+add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 };
+add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 };
+
+ /* Now, map these screen space coordinates into texture coordinates. */
+ /* subtract screen-space X origin of vertex 0. */
+/* for rotation, texture y is from ssX.x, so g4,g5 will be Y */
+add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 };
+add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 };
+ /* scale by texture X increment */
+mul (8) g4<1>F g4<8,8,1>F g3.20<0,1,0>F { align1 };
+mul (8) g5<1>F g5<8,8,1>F g3.20<0,1,0>F { align1 };
+ /* add in texture X offset */
+add (8) g4<1>F g4<8,8,1>F g3.28<0,1,0>F { align1 };
+add (8) g5<1>F g5<8,8,1>F g3.28<0,1,0>F { align1 };
+
+/* texture Y is from ssX.x */
+ /* subtract screen-space Y origin of vertex 0. */
+add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 };
+add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 };
+ /* scale by texture Y increment */
+mul (8) g6<1>F g6<8,8,1>F g3.16<0,1,0>F { align1 };
+mul (8) g7<1>F g7<8,8,1>F g3.16<0,1,0>F { align1 };
+ /* add in texture Y offset */
+add (8) g6<1>F g6<8,8,1>F g3.12<0,1,0>F { align1 };
+add (8) g7<1>F g7<8,8,1>F g3.12<0,1,0>F { align1 };
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+mov (8) m1<1>F g6<8,8,1>F { align1 };
+mov (8) m2<1>F g7<8,8,1>F { align1 };
+mov (8) m3<1>F g4<8,8,1>F { align1 };
+mov (8) m4<1>F g5<8,8,1>F { align1 };
+
+/* m0 will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) 0 /* msg reg index */
+ g12<1>UW /* readback */
+ g0<8,8,1>UW /* copy to msg start reg*/
+ sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
+
+mov (8) g19<1>UD g19<8,8,1>UD { align1 }; /* wait sampler return */
+/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */
+
+/* m0, m1 are all direct passed by PS thread payload */
+mov (8) m1<1>F g1<8,8,1>F { align1 };
+
+/* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */
+/* g12 -> m2
+ g13 -> m6
+ g14 -> m3
+ g15 -> m7
+ g16 -> m4
+ g17 -> m8
+ g18 -> m5
+ g19 -> m9
+*/
+mov (8) m2<1>F g12<8,8,1>F { align1 };
+mov (8) m3<1>F g14<8,8,1>F { align1 };
+mov (8) m4<1>F g16<8,8,1>F { align1 };
+mov (8) m5<1>F g18<8,8,1>F { align1 };
+mov (8) m6<1>F g13<8,8,1>F { align1 };
+mov (8) m7<1>F g15<8,8,1>F { align1 };
+mov (8) m8<1>F g17<8,8,1>F { align1 };
+mov (8) m9<1>F g19<8,8,1>F { align1 };
+
+/* m0, m1 are all direct passed by PS thread payload */
+mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
+
+/* write */
+send (16) 0 acc0<1>UW g0<8,8,1>UW write (
+ 0, /* binding_table */
+ 8, /* pixel scordboard clear, msg type simd16 single source */
+ 4, /* render target write */
+ 0 /* no write commit message */
+ )
+ mlen 10
+ rlen 0
+ { align1 EOT };
+
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/src/exa_wm_rotation_prog.h b/src/exa_wm_rotation_prog.h
new file mode 100644
index 00000000..890d2cf0
--- /dev/null
+++ b/src/exa_wm_rotation_prog.h
@@ -0,0 +1,70 @@
+ { 0x00000001, 0x2080013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20840d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x2088013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x208c0d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x20c0013d, 0x0000002a, 0x00000000 },
+ { 0x00000001, 0x20c4013d, 0x0000002a, 0x00000000 },
+ { 0x00000040, 0x20c80d3d, 0x0000002a, 0x00000001 },
+ { 0x00000040, 0x20cc0d3d, 0x0000002a, 0x00000001 },
+ { 0x00000001, 0x2090013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20940d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x2098013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x209c0d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x20d0013d, 0x0000002e, 0x00000000 },
+ { 0x00000001, 0x20d4013d, 0x0000002e, 0x00000000 },
+ { 0x00000040, 0x20d80d3d, 0x0000002e, 0x00000001 },
+ { 0x00000040, 0x20dc0d3d, 0x0000002e, 0x00000001 },
+ { 0x00000001, 0x20a0013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20a40d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20a8013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20ac0d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20e0013d, 0x00000032, 0x00000000 },
+ { 0x00000001, 0x20e4013d, 0x00000032, 0x00000000 },
+ { 0x00000040, 0x20e80d3d, 0x00000032, 0x00000001 },
+ { 0x00000040, 0x20ec0d3d, 0x00000032, 0x00000001 },
+ { 0x00000001, 0x20b0013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20b40d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20b8013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20bc0d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20f0013d, 0x00000036, 0x00000000 },
+ { 0x00000001, 0x20f4013d, 0x00000036, 0x00000000 },
+ { 0x00000040, 0x20f80d3d, 0x00000036, 0x00000001 },
+ { 0x00000040, 0x20fc0d3d, 0x00000036, 0x00000001 },
+ { 0x00600040, 0x208077bd, 0x008d0080, 0x00004020 },
+ { 0x00600040, 0x20a077bd, 0x008d00a0, 0x00004020 },
+ { 0x00600041, 0x208077bd, 0x008d0080, 0x00000074 },
+ { 0x00600041, 0x20a077bd, 0x008d00a0, 0x00000074 },
+ { 0x00600040, 0x208077bd, 0x008d0080, 0x0000007c },
+ { 0x00600040, 0x20a077bd, 0x008d00a0, 0x0000007c },
+ { 0x00600040, 0x20c077bd, 0x008d00c0, 0x00004024 },
+ { 0x00600040, 0x20e077bd, 0x008d00e0, 0x00004024 },
+ { 0x00600041, 0x20c077bd, 0x008d00c0, 0x00000070 },
+ { 0x00600041, 0x20e077bd, 0x008d00e0, 0x00000070 },
+ { 0x00600040, 0x20c077bd, 0x008d00c0, 0x0000006c },
+ { 0x00600040, 0x20e077bd, 0x008d00e0, 0x0000006c },
+ { 0x00600001, 0x202003be, 0x008d00c0, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d00e0, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0080, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d00a0, 0x00000000 },
+ { 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 },
+ { 0x00600001, 0x22600021, 0x008d0260, 0x00000000 },
+ { 0x00600001, 0x202003be, 0x008d0020, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
+ { 0x00600001, 0x20a003be, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x20c003be, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x20e003be, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x210003be, 0x008d0220, 0x00000000 },
+ { 0x00600001, 0x212003be, 0x008d0260, 0x00000000 },
+ { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
+ { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/i965_render.c b/src/i965_render.c
index 8d06c228..3cf694f4 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -335,6 +335,10 @@ static const CARD32 sf_kernel_static_mask[][4] = {
#include "exa_sf_mask_prog.h"
};
+static const CARD32 sf_kernel_static_rotation[][4] = {
+#include "exa_sf_rotation_prog.h"
+};
+
/* ps kernels */
#define PS_KERNEL_NUM_GRF 32
#define PS_MAX_THREADS 32
@@ -355,7 +359,12 @@ static const CARD32 ps_kernel_static_masknoca [][4] = {
#include "exa_wm_masknoca_prog.h"
};
-static CARD32 i965_get_card_format(PicturePtr pPict)
+static const CARD32 ps_kernel_static_rotation [][4] = {
+#include "exa_wm_rotation_prog.h"
+};
+
+static CARD32
+i965_get_card_format(PicturePtr pPict)
{
int i;
@@ -368,6 +377,21 @@ static CARD32 i965_get_card_format(PicturePtr pPict)
return i965_tex_formats[i].card_fmt;
}
+static Bool
+i965_check_rotation_transform(PictTransformPtr t)
+{
+ /* XXX this is arbitrary */
+ int a, b;
+ a = xFixedToInt(t->matrix[0][1]);
+ b = xFixedToInt(t->matrix[1][0]);
+ if (a == -1 && b == 1)
+ return TRUE;
+ else if (a == 1 && b == -1)
+ return TRUE;
+ else
+ return FALSE;
+}
+
Bool
i965_prepare_composite(int op, PicturePtr pSrcPicture,
PicturePtr pMaskPicture, PicturePtr pDstPicture,
@@ -378,6 +402,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
CARD32 src_offset, src_pitch;
CARD32 mask_offset = 0, mask_pitch = 0;
CARD32 dst_format, dst_offset, dst_pitch;
+ Bool rotation_program = FALSE;
#ifdef XF86DRI
if (pI830->directRenderingEnabled) {
@@ -406,6 +431,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
pI830->transform[1] = NULL;
pI830->scale_units[1][0] = -1;
pI830->scale_units[1][1] = -1;
+ if (pI830->transform[0] &&
+ i965_check_rotation_transform(pI830->transform[0]))
+ rotation_program = TRUE;
} else {
pI830->transform[1] = pMaskPicture->transform;
pI830->scale_units[1][0] = pMask->drawable.width;
@@ -442,7 +470,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
sf_kernel_offset = ALIGN(next_offset, 64);
if (pMask)
next_offset = sf_kernel_offset + sizeof (sf_kernel_static_mask);
- else
+ else if (rotation_program)
+ next_offset = sf_kernel_offset + sizeof (sf_kernel_static_rotation);
+ else
next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
ps_kernel_offset = ALIGN(next_offset, 64);
@@ -459,6 +489,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
} else
next_offset = ps_kernel_offset +
sizeof(ps_kernel_static_masknoca);
+ } else if (rotation_program) {
+ next_offset = ps_kernel_offset + sizeof (ps_kernel_static_rotation);
} else {
next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask);
}
@@ -762,6 +794,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
*/
if (pMask)
memcpy(sf_kernel, sf_kernel_static_mask, sizeof (sf_kernel_static));
+ else if (rotation_program)
+ memcpy(sf_kernel, sf_kernel_static_rotation,
+ sizeof (sf_kernel_static_rotation));
else
memcpy(sf_kernel, sf_kernel_static, sizeof (sf_kernel_static));
@@ -808,6 +843,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
} else
memcpy(ps_kernel, ps_kernel_static_masknoca,
sizeof (ps_kernel_static_masknoca));
+ } else if (rotation_program) {
+ memcpy(ps_kernel, ps_kernel_static_rotation,
+ sizeof (ps_kernel_static_rotation));
} else {
memcpy(ps_kernel, ps_kernel_static_nomask,
sizeof (ps_kernel_static_nomask));
@@ -973,8 +1011,10 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
(0 << VE0_OFFSET_SHIFT));
OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
(BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) |
+ ((pMask ? BRW_VFCOMPONENT_NOSTORE: BRW_VFCOMPONENT_STORE_1_FLT)
+ << VE1_VFCOMPONENT_2_SHIFT) |
+ ((pMask ? BRW_VFCOMPONENT_NOSTORE: BRW_VFCOMPONENT_STORE_1_FLT)
+ << VE1_VFCOMPONENT_3_SHIFT) |
(0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
if (pMask) {
OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |