diff options
author | Wang Zhenyu <zhenyu.z.wang@intel.com> | 2007-03-21 14:50:45 +0800 |
---|---|---|
committer | Wang Zhenyu <zhenyu.z.wang@intel.com> | 2007-03-21 14:50:45 +0800 |
commit | 3025fa0fb2bf5ace7076796e45e2560fe8410e8d (patch) | |
tree | cc4ff8d73dd734b85711cc39a6b393ad9e6afc7f | |
parent | 223944878cf38f86580df5a7d3102d86cfc061b9 (diff) |
EXA: try to enable rotation for G965
The new sf/wm should handle the texture sampling only in
rotated case. Also fix possible hole in VUE slot.
-rw-r--r-- | src/Makefile.am | 14 | ||||
-rw-r--r-- | src/exa_sf_rotation.g4a | 29 | ||||
-rw-r--r-- | src/exa_sf_rotation_prog.h | 20 | ||||
-rw-r--r-- | src/exa_wm_rotation.g4a | 158 | ||||
-rw-r--r-- | src/exa_wm_rotation_prog.h | 70 | ||||
-rw-r--r-- | src/i965_render.c | 48 |
6 files changed, 333 insertions, 6 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index c65c1e73..bd64f503 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -120,20 +120,24 @@ INTEL_G4A = \ packed_yuv_wm.g4a \ exa_sf.g4a \ exa_sf_mask.g4a \ + exa_sf_rotation.g4a \ exa_wm_maskca.g4a \ exa_wm_maskca_srcalpha.g4a \ exa_wm_masknoca.g4a \ - exa_wm_nomask.g4a + exa_wm_nomask.g4a \ + exa_wm_rotation.g4a INTEL_G4H = \ sf_prog.h \ wm_prog.h \ exa_sf_mask_prog.h \ exa_sf_prog.h \ + exa_sf_rotation_prog.h \ exa_wm_maskca_prog.h \ exa_wm_maskca_srcalpha_prog.h \ exa_wm_masknoca_prog.h \ - exa_wm_nomask_prog.h + exa_wm_nomask_prog.h \ + exa_wm_rotation_prog.h EXTRA_DIST = \ $(XMODE_SRCS) \ @@ -154,6 +158,9 @@ exa_sf_mask_prog.h: exa_sf_mask.g4a exa_sf_prog.h: exa_sf.g4a intel-gen4asm -o exa_sf_prog.h exa_sf.g4a + +exa_sf_rotation_prog.h: exa_sf_rotation.g4a + intel-gen4asm -o exa_sf_rotation_prog.h exa_sf_rotation.g4a exa_wm_maskca_prog.h: exa_wm_maskca.g4a intel-gen4asm -o exa_wm_maskca_prog.h exa_wm_maskca.g4a @@ -166,6 +173,9 @@ exa_wm_masknoca_prog.h: exa_wm_masknoca.g4a exa_wm_nomask_prog.h: exa_wm_nomask.g4a intel-gen4asm -o exa_wm_nomask_prog.h exa_wm_nomask.g4a + +exa_wm_rotation_prog.h: exa_wm_rotation.g4a + intel-gen4asm -o exa_wm_rotation_prog.h exa_wm_rotation.g4a endif diff --git a/src/exa_sf_rotation.g4a b/src/exa_sf_rotation.g4a new file mode 100644 index 00000000..4c93553c --- /dev/null +++ b/src/exa_sf_rotation.g4a @@ -0,0 +1,29 @@ +/* 1/dx */ +send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 }; +/* 1/dy */ +send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 }; +/* du, dv */ +mul (1) g7<1>F g3<0,1,0>F -1.0F { align1 }; +mul (1) g7.4<1>F g3.4<0,1,0>F -1.0F { align1 }; +add (1) g7<1>F g4<0,1,0>F g7<0,1,0>F { align1 }; +add (1) g7.4<1>F g4.4<0,1,0>F g7.4<0,1,0>F { align1 }; + +/* du/dy */ +mul (1) g7<1>F g7<0,1,0>F g6.4<0,1,0>F { align1 }; +/* dv/dx */ +mul (1) g7.4<1>F g7.4<0,1,0>F g6<0,1,0>F { align1 }; +/* Cx */ +mov (8) m1<1>F g7<0,1,0>F { align1 }; +/* Cy */ +mov (8) m2<1>F g7.4<0,1,0>F { align1 }; +/* Co */ +mov (8) m3<1>F g3<8,8,1>F { align1 }; +send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT }; +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; diff --git a/src/exa_sf_rotation_prog.h b/src/exa_sf_rotation_prog.h new file mode 100644 index 00000000..95891305 --- /dev/null +++ b/src/exa_sf_rotation_prog.h @@ -0,0 +1,20 @@ + { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 }, + { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 }, + { 0x00000041, 0x20e07fbd, 0x00000060, 0xbf800000 }, + { 0x00000041, 0x20e47fbd, 0x00000064, 0xbf800000 }, + { 0x00000040, 0x20e077bd, 0x00000080, 0x000000e0 }, + { 0x00000040, 0x20e477bd, 0x00000084, 0x000000e4 }, + { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c4 }, + { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c0 }, + { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 }, + { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 }, + { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 }, + { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/exa_wm_rotation.g4a b/src/exa_wm_rotation.g4a new file mode 100644 index 00000000..b12f81c1 --- /dev/null +++ b/src/exa_wm_rotation.g4a @@ -0,0 +1,158 @@ +/* + * This's for exa composite operation in no mask picture case. + * The simplest case is just sending what src picture has to dst picture. + */ + +/* I think this should be same as in g4a program for texture video, + as we also use 16-pixel dispatch. and SF scale in g3 is useful for us. */ + +/* The initial payload of the thread is always g0. + * WM_URB (incoming URB entries) is g3 + * X0_R is g4 + * X1_R is g5 + * Y0_R is g6 + * Y1_R is g7 + */ + + /* Set up the X/Y screen coordinates of the pixels in our 4 subspans. Each + * subspan is a 2x2 rectangle, and the screen x/y of the upper left of each + * subspan are given in GRF register 1.2 through 1.5 (which, with the word + * addressing below, are 1.4 through 1.11). + * + * The result is WM_X*_R and WM_Y*R being: + * + * X0: {ss0.x, ss0.x+1, ss0.x, ss0.x+1, ss1.x, ss1.x+1, ss1.x, ss1.x+y} + * Y0: {ss0.y, ss0.y, ss0.y+1, ss0.y+1, ss1.y, ss1.y, ss1.y+1, ss1.y+1} + * X1: {ss2.x, ss2.x+1, ss2.x, ss2.x+1, ss3.x, ss3.x+1, ss3.x, ss3.x+y} + * Y1: {ss2.y, ss2.y, ss2.y+1, ss2.y+1, ss3.y, ss3.y, ss3.y+1, ss3.y+1} + */ + /* Set up ss0.x coordinates*/ +mov (1) g4<1>F g1.8<0,1,0>UW { align1 }; +add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 }; +mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 }; +add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 }; + /* Set up ss0.y coordinates */ +mov (1) g6<1>F g1.10<0,1,0>UW { align1 }; +mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 }; +add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 }; +add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 }; + /* set up ss1.x coordinates */ +mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 }; +add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 }; +mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 }; +add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 }; + /* set up ss1.y coordinates */ +mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 }; +mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 }; +add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 }; +add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 }; + /* Set up ss2.x coordinates */ +mov (1) g5<1>F g1.16<0,1,0>UW { align1 }; +add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 }; +mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 }; +add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 }; + /* Set up ss2.y coordinates */ +mov (1) g7<1>F g1.18<0,1,0>UW { align1 }; +mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 }; +add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 }; +add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 }; + /* Set up ss3.x coordinates */ +mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 }; +add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 }; +mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 }; +add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 }; + /* Set up ss3.y coordinates */ +mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 }; +mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 }; +add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 }; +add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 }; + + /* Now, map these screen space coordinates into texture coordinates. */ + /* subtract screen-space X origin of vertex 0. */ +/* for rotation, texture y is from ssX.x, so g4,g5 will be Y */ +add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 }; +add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 }; + /* scale by texture X increment */ +mul (8) g4<1>F g4<8,8,1>F g3.20<0,1,0>F { align1 }; +mul (8) g5<1>F g5<8,8,1>F g3.20<0,1,0>F { align1 }; + /* add in texture X offset */ +add (8) g4<1>F g4<8,8,1>F g3.28<0,1,0>F { align1 }; +add (8) g5<1>F g5<8,8,1>F g3.28<0,1,0>F { align1 }; + +/* texture Y is from ssX.x */ + /* subtract screen-space Y origin of vertex 0. */ +add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 }; +add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 }; + /* scale by texture Y increment */ +mul (8) g6<1>F g6<8,8,1>F g3.16<0,1,0>F { align1 }; +mul (8) g7<1>F g7<8,8,1>F g3.16<0,1,0>F { align1 }; + /* add in texture Y offset */ +add (8) g6<1>F g6<8,8,1>F g3.12<0,1,0>F { align1 }; +add (8) g7<1>F g7<8,8,1>F g3.12<0,1,0>F { align1 }; + +/* prepare sampler read back gX register, which would be written back to output */ + +/* use simd16 sampler, param 0 is u, param 1 is v. */ +/* 'payload' loading, assuming tex coord start from g4 */ +mov (8) m1<1>F g6<8,8,1>F { align1 }; +mov (8) m2<1>F g7<8,8,1>F { align1 }; +mov (8) m3<1>F g4<8,8,1>F { align1 }; +mov (8) m4<1>F g5<8,8,1>F { align1 }; + +/* m0 will be copied with g0, as it contains send desc */ +/* emit sampler 'send' cmd */ +send (16) 0 /* msg reg index */ + g12<1>UW /* readback */ + g0<8,8,1>UW /* copy to msg start reg*/ + sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ + +mov (8) g19<1>UD g19<8,8,1>UD { align1 }; /* wait sampler return */ +/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */ + +/* m0, m1 are all direct passed by PS thread payload */ +mov (8) m1<1>F g1<8,8,1>F { align1 }; + +/* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */ +/* g12 -> m2 + g13 -> m6 + g14 -> m3 + g15 -> m7 + g16 -> m4 + g17 -> m8 + g18 -> m5 + g19 -> m9 +*/ +mov (8) m2<1>F g12<8,8,1>F { align1 }; +mov (8) m3<1>F g14<8,8,1>F { align1 }; +mov (8) m4<1>F g16<8,8,1>F { align1 }; +mov (8) m5<1>F g18<8,8,1>F { align1 }; +mov (8) m6<1>F g13<8,8,1>F { align1 }; +mov (8) m7<1>F g15<8,8,1>F { align1 }; +mov (8) m8<1>F g17<8,8,1>F { align1 }; +mov (8) m9<1>F g19<8,8,1>F { align1 }; + +/* m0, m1 are all direct passed by PS thread payload */ +mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable }; + +/* write */ +send (16) 0 acc0<1>UW g0<8,8,1>UW write ( + 0, /* binding_table */ + 8, /* pixel scordboard clear, msg type simd16 single source */ + 4, /* render target write */ + 0 /* no write commit message */ + ) + mlen 10 + rlen 0 + { align1 EOT }; + +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; diff --git a/src/exa_wm_rotation_prog.h b/src/exa_wm_rotation_prog.h new file mode 100644 index 00000000..890d2cf0 --- /dev/null +++ b/src/exa_wm_rotation_prog.h @@ -0,0 +1,70 @@ + { 0x00000001, 0x2080013d, 0x00000028, 0x00000000 }, + { 0x00000040, 0x20840d3d, 0x00000028, 0x00000001 }, + { 0x00000001, 0x2088013d, 0x00000028, 0x00000000 }, + { 0x00000040, 0x208c0d3d, 0x00000028, 0x00000001 }, + { 0x00000001, 0x20c0013d, 0x0000002a, 0x00000000 }, + { 0x00000001, 0x20c4013d, 0x0000002a, 0x00000000 }, + { 0x00000040, 0x20c80d3d, 0x0000002a, 0x00000001 }, + { 0x00000040, 0x20cc0d3d, 0x0000002a, 0x00000001 }, + { 0x00000001, 0x2090013d, 0x0000002c, 0x00000000 }, + { 0x00000040, 0x20940d3d, 0x0000002c, 0x00000001 }, + { 0x00000001, 0x2098013d, 0x0000002c, 0x00000000 }, + { 0x00000040, 0x209c0d3d, 0x0000002c, 0x00000001 }, + { 0x00000001, 0x20d0013d, 0x0000002e, 0x00000000 }, + { 0x00000001, 0x20d4013d, 0x0000002e, 0x00000000 }, + { 0x00000040, 0x20d80d3d, 0x0000002e, 0x00000001 }, + { 0x00000040, 0x20dc0d3d, 0x0000002e, 0x00000001 }, + { 0x00000001, 0x20a0013d, 0x00000030, 0x00000000 }, + { 0x00000040, 0x20a40d3d, 0x00000030, 0x00000001 }, + { 0x00000001, 0x20a8013d, 0x00000030, 0x00000000 }, + { 0x00000040, 0x20ac0d3d, 0x00000030, 0x00000001 }, + { 0x00000001, 0x20e0013d, 0x00000032, 0x00000000 }, + { 0x00000001, 0x20e4013d, 0x00000032, 0x00000000 }, + { 0x00000040, 0x20e80d3d, 0x00000032, 0x00000001 }, + { 0x00000040, 0x20ec0d3d, 0x00000032, 0x00000001 }, + { 0x00000001, 0x20b0013d, 0x00000034, 0x00000000 }, + { 0x00000040, 0x20b40d3d, 0x00000034, 0x00000001 }, + { 0x00000001, 0x20b8013d, 0x00000034, 0x00000000 }, + { 0x00000040, 0x20bc0d3d, 0x00000034, 0x00000001 }, + { 0x00000001, 0x20f0013d, 0x00000036, 0x00000000 }, + { 0x00000001, 0x20f4013d, 0x00000036, 0x00000000 }, + { 0x00000040, 0x20f80d3d, 0x00000036, 0x00000001 }, + { 0x00000040, 0x20fc0d3d, 0x00000036, 0x00000001 }, + { 0x00600040, 0x208077bd, 0x008d0080, 0x00004020 }, + { 0x00600040, 0x20a077bd, 0x008d00a0, 0x00004020 }, + { 0x00600041, 0x208077bd, 0x008d0080, 0x00000074 }, + { 0x00600041, 0x20a077bd, 0x008d00a0, 0x00000074 }, + { 0x00600040, 0x208077bd, 0x008d0080, 0x0000007c }, + { 0x00600040, 0x20a077bd, 0x008d00a0, 0x0000007c }, + { 0x00600040, 0x20c077bd, 0x008d00c0, 0x00004024 }, + { 0x00600040, 0x20e077bd, 0x008d00e0, 0x00004024 }, + { 0x00600041, 0x20c077bd, 0x008d00c0, 0x00000070 }, + { 0x00600041, 0x20e077bd, 0x008d00e0, 0x00000070 }, + { 0x00600040, 0x20c077bd, 0x008d00c0, 0x0000006c }, + { 0x00600040, 0x20e077bd, 0x008d00e0, 0x0000006c }, + { 0x00600001, 0x202003be, 0x008d00c0, 0x00000000 }, + { 0x00600001, 0x204003be, 0x008d00e0, 0x00000000 }, + { 0x00600001, 0x206003be, 0x008d0080, 0x00000000 }, + { 0x00600001, 0x208003be, 0x008d00a0, 0x00000000 }, + { 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 }, + { 0x00600001, 0x22600021, 0x008d0260, 0x00000000 }, + { 0x00600001, 0x202003be, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x204003be, 0x008d0180, 0x00000000 }, + { 0x00600001, 0x206003be, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 }, + { 0x00600001, 0x20a003be, 0x008d0240, 0x00000000 }, + { 0x00600001, 0x20c003be, 0x008d01a0, 0x00000000 }, + { 0x00600001, 0x20e003be, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x210003be, 0x008d0220, 0x00000000 }, + { 0x00600001, 0x212003be, 0x008d0260, 0x00000000 }, + { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 }, + { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/i965_render.c b/src/i965_render.c index 8d06c228..3cf694f4 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -335,6 +335,10 @@ static const CARD32 sf_kernel_static_mask[][4] = { #include "exa_sf_mask_prog.h" }; +static const CARD32 sf_kernel_static_rotation[][4] = { +#include "exa_sf_rotation_prog.h" +}; + /* ps kernels */ #define PS_KERNEL_NUM_GRF 32 #define PS_MAX_THREADS 32 @@ -355,7 +359,12 @@ static const CARD32 ps_kernel_static_masknoca [][4] = { #include "exa_wm_masknoca_prog.h" }; -static CARD32 i965_get_card_format(PicturePtr pPict) +static const CARD32 ps_kernel_static_rotation [][4] = { +#include "exa_wm_rotation_prog.h" +}; + +static CARD32 +i965_get_card_format(PicturePtr pPict) { int i; @@ -368,6 +377,21 @@ static CARD32 i965_get_card_format(PicturePtr pPict) return i965_tex_formats[i].card_fmt; } +static Bool +i965_check_rotation_transform(PictTransformPtr t) +{ + /* XXX this is arbitrary */ + int a, b; + a = xFixedToInt(t->matrix[0][1]); + b = xFixedToInt(t->matrix[1][0]); + if (a == -1 && b == 1) + return TRUE; + else if (a == 1 && b == -1) + return TRUE; + else + return FALSE; +} + Bool i965_prepare_composite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, PicturePtr pDstPicture, @@ -378,6 +402,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, CARD32 src_offset, src_pitch; CARD32 mask_offset = 0, mask_pitch = 0; CARD32 dst_format, dst_offset, dst_pitch; + Bool rotation_program = FALSE; #ifdef XF86DRI if (pI830->directRenderingEnabled) { @@ -406,6 +431,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, pI830->transform[1] = NULL; pI830->scale_units[1][0] = -1; pI830->scale_units[1][1] = -1; + if (pI830->transform[0] && + i965_check_rotation_transform(pI830->transform[0])) + rotation_program = TRUE; } else { pI830->transform[1] = pMaskPicture->transform; pI830->scale_units[1][0] = pMask->drawable.width; @@ -442,7 +470,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, sf_kernel_offset = ALIGN(next_offset, 64); if (pMask) next_offset = sf_kernel_offset + sizeof (sf_kernel_static_mask); - else + else if (rotation_program) + next_offset = sf_kernel_offset + sizeof (sf_kernel_static_rotation); + else next_offset = sf_kernel_offset + sizeof (sf_kernel_static); ps_kernel_offset = ALIGN(next_offset, 64); @@ -459,6 +489,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, } else next_offset = ps_kernel_offset + sizeof(ps_kernel_static_masknoca); + } else if (rotation_program) { + next_offset = ps_kernel_offset + sizeof (ps_kernel_static_rotation); } else { next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask); } @@ -762,6 +794,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, */ if (pMask) memcpy(sf_kernel, sf_kernel_static_mask, sizeof (sf_kernel_static)); + else if (rotation_program) + memcpy(sf_kernel, sf_kernel_static_rotation, + sizeof (sf_kernel_static_rotation)); else memcpy(sf_kernel, sf_kernel_static, sizeof (sf_kernel_static)); @@ -808,6 +843,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, } else memcpy(ps_kernel, ps_kernel_static_masknoca, sizeof (ps_kernel_static_masknoca)); + } else if (rotation_program) { + memcpy(ps_kernel, ps_kernel_static_rotation, + sizeof (ps_kernel_static_rotation)); } else { memcpy(ps_kernel, ps_kernel_static_nomask, sizeof (ps_kernel_static_nomask)); @@ -973,8 +1011,10 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, (0 << VE0_OFFSET_SHIFT)); OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | - (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) | - (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) | + ((pMask ? BRW_VFCOMPONENT_NOSTORE: BRW_VFCOMPONENT_STORE_1_FLT) + << VE1_VFCOMPONENT_2_SHIFT) | + ((pMask ? BRW_VFCOMPONENT_NOSTORE: BRW_VFCOMPONENT_STORE_1_FLT) + << VE1_VFCOMPONENT_3_SHIFT) | (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); if (pMask) { OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | |