diff options
author | Keith Packard <keithp@keithp.com> | 2008-03-31 23:50:20 -0700 |
---|---|---|
committer | Keith Packard <keithp@keithp.com> | 2008-03-31 23:50:20 -0700 |
commit | a6492661ae07310128eb73c3ef037c42ce7ab184 (patch) | |
tree | 0d72faeb2195da4618bf0d1f8fce460e80c99a74 /src | |
parent | f8081178eb6fda0e405967cbacad532561619262 (diff) |
Fix composite with mask using new compositing thread code
Clean up register allocation to never overlap
Always write 4 values for each texture vertex.
Diffstat (limited to 'src')
32 files changed, 300 insertions, 189 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 81d9596a..9b5d653a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -131,18 +131,14 @@ INTEL_G4A = \ packed_yuv_wm.g4a \ exa_sf.g4a \ exa_sf_mask.g4a \ - exa_sf_rotation.g4a \ - exa_wm_maskca.g4a \ - exa_wm_maskca_srcalpha.g4a \ - exa_wm_masknoca.g4a \ - exa_wm_nomask.g4a \ - exa_wm_rotation.g4a \ exa_wm_src_affine.g4a \ exa_wm_src_projective.g4a \ - exa_wm_src_sample.g4a \ + exa_wm_src_sample_argb.g4a \ + exa_wm_src_sample_a.g4a \ exa_wm_mask_affine.g4a \ exa_wm_mask_projective.g4a \ - exa_wm_mask_sample.g4a \ + exa_wm_mask_sample_argb.g4a \ + exa_wm_mask_sample_a.g4a \ exa_wm_noca.g4a \ exa_wm_ca.g4a \ exa_wm_ca_srcalpha.g4a \ @@ -153,29 +149,21 @@ INTEL_G4I = \ exa_wm.g4i \ exa_wm_affine.g4i \ exa_wm_projective.g4i + INTEL_G4B = \ packed_yuv_sf.g4b \ - packed_yuv_wm.g4b \ - exa_sf_mask.g4b \ + packed_yuv_wm.g4b \ exa_sf.g4b \ - exa_sf_rotation.g4b \ - exa_wm_maskca.g4b \ - exa_wm_maskca_srcalpha.g4b \ - exa_wm_masknoca.g4b \ - exa_wm_nomask.g4b \ - exa_wm_rotation.g4b \ - exa_wm_maskca.g4b \ - exa_wm_maskca_srcalpha.g4b \ - exa_wm_masknoca.g4b \ - exa_wm_nomask.g4b \ - exa_wm_rotation.g4b \ + exa_sf_mask.g4b \ exa_wm_src_affine.g4b \ exa_wm_src_projective.g4b \ - exa_wm_src_sample.g4b \ + exa_wm_src_sample_argb.g4b \ + exa_wm_src_sample_a.g4b \ exa_wm_mask_affine.g4b \ exa_wm_mask_projective.g4b \ - exa_wm_mask_sample.g4b \ + exa_wm_mask_sample_argb.g4b \ + exa_wm_mask_sample_a.g4b \ exa_wm_noca.g4b \ exa_wm_ca.g4b \ exa_wm_ca_srcalpha.g4b \ @@ -194,8 +182,11 @@ if HAVE_GEN4ASM SUFFIXES = .g4a .g4b .g4a.g4b: - m4 -s $*.g4a > $*.g4m - intel-gen4asm -o $@ $*.g4m && rm $*.g4m + m4 -s $*.g4a > $*.g4m && intel-gen4asm -o $@ $*.g4m && rm $*.g4m + +$(INTEL_G4B): $(INTEL_G4I) + +BUILT_SOURCES= $(INTEL_G4B) endif diff --git a/src/exa_wm.g4i b/src/exa_wm.g4i index 1be40e70..724ef2b5 100644 --- a/src/exa_wm.g4i +++ b/src/exa_wm.g4i @@ -71,47 +71,52 @@ define(`dst_y_0', `dst_y') define(`dst_y_1', `g11') /* When computing x * dn/dx, use this */ -define(`temp_x', `g12') +define(`temp_x', `g34') define(`temp_x_0', `temp_x') -define(`temp_x_1', `g13') +define(`temp_x_1', `g35') /* When computing y * dn/dy, use this */ -define(`temp_y', `g14') +define(`temp_y', `g32') define(`temp_y_0', temp_y) -define(`temp_y_1', `g15') +define(`temp_y_1', `g33') /* when loading x/y, use these to hold them in UW format */ define(`temp_x_uw', temp_x) define(`temp_y_uw', temp_y) /* compute source and mask u/v to this pair to send to sampler */ -define(`src_u', `m1') -define(`src_v', `m3') -define(`mask_u', src_u) -define(`mask_v', src_v) -define(`src_w', `g16') -define(`src_w_0', src_w) -define(`src_w_1', `g17') -define(`mask_w', src_w) -define(`mask_w_0', src_w_0) -define(`mask_w_1', src_w_1) +define(`src_msg', `m1') +define(`src_msg_ind',`1') +define(`src_u', `m2') +define(`src_v', `m4') +define(`src_w', `g12') +define(`src_w_0', `g12') +define(`src_w_1', `g13') + +define(`mask_msg', `m7') +define(`mask_msg_ind',`7') +define(`mask_u', `m8') +define(`mask_v', `m10') +define(`mask_w', `g14') +define(`mask_w_0', `g14') +define(`mask_w_1', `g15') /* sample src to these registers */ -define(`src_sample0', `g18') -define(`src_sample1', `g19') -define(`src_sample2', `g20') -define(`src_sample3', `g21') -define(`src_sample4', `g22') -define(`src_sample5', `g23') -define(`src_sample6', `g24') -define(`src_sample7', `g25') +define(`src_sample0', `g16') +define(`src_sample1', `g17') +define(`src_sample2', `g18') +define(`src_sample3', `g19') +define(`src_sample4', `g20') +define(`src_sample5', `g21') +define(`src_sample6', `g22') +define(`src_sample7', `g23') /* sample mask to these registers */ -define(`mask_sample0', `g26') -define(`mask_sample1', `g27') -define(`mask_sample2', `g28') -define(`mask_sample3', `g29') -define(`mask_sample4', `g30') -define(`mask_sample5', `g31') -define(`mask_sample6', `g32') -define(`mask_sample7', `g33') +define(`mask_sample0', `g24') +define(`mask_sample1', `g25') +define(`mask_sample2', `g26') +define(`mask_sample3', `g27') +define(`mask_sample4', `g28') +define(`mask_sample5', `g29') +define(`mask_sample6', `g30') +define(`mask_sample7', `g31') diff --git a/src/exa_wm_affine.g4i b/src/exa_wm_affine.g4i index 8fc6450b..e72656b6 100644 --- a/src/exa_wm_affine.g4i +++ b/src/exa_wm_affine.g4i @@ -42,4 +42,3 @@ mul (16) temp_x<1>F dst_x<8,8,1>F dv_dx { compr align1 }; mul (16) temp_y<1>F dst_y<8,8,1>F dv_dy { compr align1 }; add (16) temp_x<1>F temp_x<8,8,1>F temp_y<8,8,1>F { compr align1 }; add (16) v<1>F temp_x<8,8,1>F vo { compr align1 }; - diff --git a/src/exa_wm_ca.g4b b/src/exa_wm_ca.g4b index d0f3519b..28bd6c6b 100644 --- a/src/exa_wm_ca.g4b +++ b/src/exa_wm_ca.g4b @@ -1,4 +1,4 @@ + { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 }, { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 }, { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 }, { 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d03c0 }, - { 0x00802041, 0x230077bd, 0x008d0300, 0x008d0400 }, diff --git a/src/exa_wm_ca_srcalpha.g4a b/src/exa_wm_ca_srcalpha.g4a index a1be28e4..e252e19b 100644 --- a/src/exa_wm_ca_srcalpha.g4a +++ b/src/exa_wm_ca_srcalpha.g4a @@ -31,8 +31,7 @@ include(`exa_wm.g4i') -/* mul mask rgba channels to src */ -mul (16) src_sample0<1>F src_sample0<8,8,1>F src_sample6<8,8,1>F { compr align1 }; -mul (16) src_sample2<1>F src_sample2<8,8,1>F src_sample6<8,8,1>F { compr align1 }; -mul (16) src_sample4<1>F src_sample4<8,8,1>F src_sample6<8,8,1>F { compr align1 }; -mul (16) src_sample6<1>F src_sample6<8,8,1>F src_sample6<8,8,1>F { compr align1 }; +mul (16) src_sample0<1>F mask_sample0<8,8,1>F src_sample6<8,8,1>F { compr align1 }; +mul (16) src_sample2<1>F mask_sample2<8,8,1>F src_sample6<8,8,1>F { compr align1 }; +mul (16) src_sample4<1>F mask_sample4<8,8,1>F src_sample6<8,8,1>F { compr align1 }; +mul (16) src_sample6<1>F mask_sample6<8,8,1>F src_sample6<8,8,1>F { compr align1 }; diff --git a/src/exa_wm_ca_srcalpha.g4b b/src/exa_wm_ca_srcalpha.g4b index 780e704b..94f15163 100644 --- a/src/exa_wm_ca_srcalpha.g4b +++ b/src/exa_wm_ca_srcalpha.g4b @@ -1,4 +1,4 @@ - { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0300 }, - { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0300 }, - { 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d0300 }, - { 0x00802041, 0x230077bd, 0x008d0300, 0x008d0300 }, + { 0x00802041, 0x220077bd, 0x008d0300, 0x008d02c0 }, + { 0x00802041, 0x224077bd, 0x008d0340, 0x008d02c0 }, + { 0x00802041, 0x228077bd, 0x008d0380, 0x008d02c0 }, + { 0x00802041, 0x22c077bd, 0x008d03c0, 0x008d02c0 }, diff --git a/src/exa_wm_mask_affine.g4a b/src/exa_wm_mask_affine.g4a index 4c096cbb..9c52d2f9 100644 --- a/src/exa_wm_mask_affine.g4a +++ b/src/exa_wm_mask_affine.g4a @@ -26,12 +26,16 @@ */ include(`exa_wm.g4i') + define(`du_dx', `mask_du_dx') define(`du_dy', `mask_du_dy') define(`uo', `mask_uo') + define(`dv_dx', `mask_dv_dx') define(`dv_dy', `mask_dv_dy') define(`vo', `mask_vo') + define(`u', `mask_u') define(`v', `mask_v') + include(`exa_wm_affine.g4i') diff --git a/src/exa_wm_mask_affine.g4b b/src/exa_wm_mask_affine.g4b index 62b46e0a..35dec6fd 100644 --- a/src/exa_wm_mask_affine.g4b +++ b/src/exa_wm_mask_affine.g4b @@ -1,8 +1,8 @@ - { 0x00802041, 0x218077bd, 0x008d0100, 0x00000090 }, - { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000094 }, - { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 }, - { 0x00802040, 0x202077be, 0x008d0180, 0x0000009c }, - { 0x00802041, 0x218077bd, 0x008d0100, 0x000000a0 }, - { 0x00802041, 0x21c077bd, 0x008d0140, 0x000000a4 }, - { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 }, - { 0x00802040, 0x206077be, 0x008d0180, 0x000000ac }, + { 0x00802041, 0x244077bd, 0x008d0100, 0x000000a0 }, + { 0x00802041, 0x240077bd, 0x008d0140, 0x000000a4 }, + { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 }, + { 0x00802040, 0x210077be, 0x008d0440, 0x000000ac }, + { 0x00802041, 0x244077bd, 0x008d0100, 0x000000b0 }, + { 0x00802041, 0x240077bd, 0x008d0140, 0x000000b4 }, + { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 }, + { 0x00802040, 0x214077be, 0x008d0440, 0x000000bc }, diff --git a/src/exa_wm_mask_projective.g4a b/src/exa_wm_mask_projective.g4a index 464f6c51..9acaaced 100644 --- a/src/exa_wm_mask_projective.g4a +++ b/src/exa_wm_mask_projective.g4a @@ -42,6 +42,11 @@ define(`wo', `mask_wo') define(`u', `mask_u') define(`v', `mask_v') define(`w', `mask_w') + +define(`u_0', `mask_u_0') +define(`v_0', `mask_v_0') +define(`u_1', `mask_u_1') +define(`v_1', `mask_v_1') define(`w_0', `mask_w_0') define(`w_1', `mask_w_1') diff --git a/src/exa_wm_mask_projective.g4b b/src/exa_wm_mask_projective.g4b index ac4faa3e..06848823 100644 --- a/src/exa_wm_mask_projective.g4b +++ b/src/exa_wm_mask_projective.g4b @@ -1,16 +1,16 @@ - { 0x00802041, 0x218077bd, 0x008d0100, 0x000000b0 }, - { 0x00802041, 0x21c077bd, 0x008d0140, 0x000000b4 }, - { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 }, - { 0x00802040, 0x218077bd, 0x008d0180, 0x000000bc }, - { 0x00600031, 0x22001fbd, 0x008d0180, 0x01110001 }, - { 0x00600031, 0x22201fbd, 0x008d01a0, 0x01110001 }, - { 0x00802041, 0x218077bd, 0x008d0100, 0x00000090 }, - { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000094 }, - { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 }, - { 0x00802040, 0x218077bd, 0x008d0180, 0x0000009c }, - { 0x00802041, 0x202077be, 0x008d0180, 0x008d0200 }, - { 0x00802041, 0x218077bd, 0x008d0100, 0x000000a0 }, - { 0x00802041, 0x21c077bd, 0x008d0140, 0x000000a4 }, - { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 }, - { 0x00802040, 0x218077bd, 0x008d0180, 0x000000ac }, - { 0x00802041, 0x206077be, 0x008d0180, 0x008d0200 }, + { 0x00802041, 0x244077bd, 0x008d0100, 0x000000c0 }, + { 0x00802041, 0x240077bd, 0x008d0140, 0x000000c4 }, + { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 }, + { 0x00802040, 0x244077bd, 0x008d0440, 0x000000cc }, + { 0x00600031, 0x21c01fbd, 0x008d0440, 0x01110001 }, + { 0x00600031, 0x21e01fbd, 0x008d0460, 0x01110001 }, + { 0x00802041, 0x244077bd, 0x008d0100, 0x000000a0 }, + { 0x00802041, 0x240077bd, 0x008d0140, 0x000000a4 }, + { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 }, + { 0x00802040, 0x244077bd, 0x008d0440, 0x000000ac }, + { 0x00802041, 0x210077be, 0x008d0440, 0x008d01c0 }, + { 0x00802041, 0x244077bd, 0x008d0100, 0x000000b0 }, + { 0x00802041, 0x240077bd, 0x008d0140, 0x000000b4 }, + { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 }, + { 0x00802040, 0x244077bd, 0x008d0440, 0x000000bc }, + { 0x00802041, 0x214077be, 0x008d0440, 0x008d01c0 }, diff --git a/src/exa_wm_mask_sample.g4b b/src/exa_wm_mask_sample.g4b deleted file mode 100644 index 45f7ead1..00000000 --- a/src/exa_wm_mask_sample.g4b +++ /dev/null @@ -1 +0,0 @@ - { 0x00800031, 0x23401d29, 0x008d0000, 0x02580001 }, diff --git a/src/exa_wm_mask_sample_a.g4a b/src/exa_wm_mask_sample_a.g4a new file mode 100644 index 00000000..c06611d5 --- /dev/null +++ b/src/exa_wm_mask_sample_a.g4a @@ -0,0 +1,48 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* Sample the mask surface */ + +include(`exa_wm.g4i') + +/* prepare sampler read back gX register, which would be written back to output */ + +/* use simd16 sampler, param 0 is u, param 1 is v. */ +/* 'payload' loading, assuming tex coord start from g4 */ + +/* load only alpha */ +mov (1) g0.8<1>UD 0x00007000UD { align1 mask_disable }; + +/* mask_msg will be copied with g0, as it contains send desc */ +/* emit sampler 'send' cmd */ +send (16) mask_msg_ind /* msg reg index */ + mask_sample6<1>UW /* readback */ + g0<8,8,1>UW /* copy to msg start reg*/ + sampler (2,1,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ + diff --git a/src/exa_wm_mask_sample_a.g4b b/src/exa_wm_mask_sample_a.g4b new file mode 100644 index 00000000..01fc8d5e --- /dev/null +++ b/src/exa_wm_mask_sample_a.g4b @@ -0,0 +1,2 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00007000 }, + { 0x07800031, 0x23c01d29, 0x008d0000, 0x02520102 }, diff --git a/src/exa_wm_mask_sample.g4a b/src/exa_wm_mask_sample_argb.g4a index 45dc3c4f..7f0815f2 100644 --- a/src/exa_wm_mask_sample.g4a +++ b/src/exa_wm_mask_sample_argb.g4a @@ -34,16 +34,15 @@ include(`exa_wm.g4i') /* use simd16 sampler, param 0 is u, param 1 is v. */ /* 'payload' loading, assuming tex coord start from g4 */ -/* m0 will be copied with g0, as it contains send desc */ +/* load argb */ +mov (1) g0.8<1>UD 0x00000000UD { align1 mask_disable }; + +/* mask_msg will be copied with g0, as it contains send desc */ /* emit sampler 'send' cmd */ -send (16) 0 /* msg reg index */ +send (16) mask_msg_ind /* msg reg index */ mask_sample0<1>UW /* readback */ g0<8,8,1>UW /* copy to msg start reg*/ - sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) + sampler (2,1,F) /* sampler message description, (binding_table,sampler_index,datatype) /* here(src->dst) we should use src_sampler and src_surface */ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ -// mov (8) mask_sample7<1>UD mask_sample7<8,8,1>UD { align1 }; /* wait sampler return */ - -/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */ - diff --git a/src/exa_wm_mask_sample_argb.g4b b/src/exa_wm_mask_sample_argb.g4b new file mode 100644 index 00000000..97d3803c --- /dev/null +++ b/src/exa_wm_mask_sample_argb.g4b @@ -0,0 +1,2 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x07800031, 0x23001d29, 0x008d0000, 0x02580102 }, diff --git a/src/exa_wm_noca.g4b b/src/exa_wm_noca.g4b index ba01d1a5..1c9d9486 100644 --- a/src/exa_wm_noca.g4b +++ b/src/exa_wm_noca.g4b @@ -1,4 +1,4 @@ - { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0400 }, - { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0400 }, - { 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d0400 }, - { 0x00802041, 0x230077bd, 0x008d0300, 0x008d0400 }, + { 0x00802041, 0x220077bd, 0x008d0200, 0x008d03c0 }, + { 0x00802041, 0x224077bd, 0x008d0240, 0x008d03c0 }, + { 0x00802041, 0x228077bd, 0x008d0280, 0x008d03c0 }, + { 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d03c0 }, diff --git a/src/exa_wm_nomask.g4a b/src/exa_wm_nomask.g4a index 97426ec1..eb535fe3 100644 --- a/src/exa_wm_nomask.g4a +++ b/src/exa_wm_nomask.g4a @@ -119,7 +119,7 @@ mov (8) m8<1>F g17<8,8,1>F { align1 }; mov (8) m9<1>F g19<8,8,1>F { align1 }; /* m0, m1 are all direct passed by PS thread payload */ -mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable }; +mov (8) m1<1>UD g1<8,8,1>UD { align1 }; /* write */ send (16) 0 acc0<1>UW g0<8,8,1>UW write ( diff --git a/src/exa_wm_src_affine.g4a b/src/exa_wm_src_affine.g4a index 3bf87179..3194b5a6 100644 --- a/src/exa_wm_src_affine.g4a +++ b/src/exa_wm_src_affine.g4a @@ -30,12 +30,16 @@ */ include(`exa_wm.g4i') + define(`du_dx', `src_du_dx') define(`du_dy', `src_du_dy') define(`uo', `src_uo') + define(`dv_dx', `src_dv_dx') define(`dv_dy', `src_dv_dy') define(`vo', `src_vo') + define(`u', `src_u') define(`v', `src_v') + include(`exa_wm_affine.g4i') diff --git a/src/exa_wm_src_affine.g4b b/src/exa_wm_src_affine.g4b index f18ea1ee..9fef62c1 100644 --- a/src/exa_wm_src_affine.g4b +++ b/src/exa_wm_src_affine.g4b @@ -1,8 +1,8 @@ - { 0x00802041, 0x218077bd, 0x008d0100, 0x00000060 }, - { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000064 }, - { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 }, - { 0x00802040, 0x202077be, 0x008d0180, 0x0000006c }, - { 0x00802041, 0x218077bd, 0x008d0100, 0x00000070 }, - { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000074 }, - { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 }, - { 0x00802040, 0x206077be, 0x008d0180, 0x0000007c }, + { 0x00802041, 0x244077bd, 0x008d0100, 0x00000060 }, + { 0x00802041, 0x240077bd, 0x008d0140, 0x00000064 }, + { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 }, + { 0x00802040, 0x204077be, 0x008d0440, 0x0000006c }, + { 0x00802041, 0x244077bd, 0x008d0100, 0x00000070 }, + { 0x00802041, 0x240077bd, 0x008d0140, 0x00000074 }, + { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 }, + { 0x00802040, 0x208077be, 0x008d0440, 0x0000007c }, diff --git a/src/exa_wm_src_projective.g4a b/src/exa_wm_src_projective.g4a index 6bd2d6a4..16c9cd56 100644 --- a/src/exa_wm_src_projective.g4a +++ b/src/exa_wm_src_projective.g4a @@ -39,6 +39,10 @@ define(`wo', `src_wo') define(`u', `src_u') define(`v', `src_v') define(`w', `src_w') +define(`u_0', `src_u_0') +define(`v_0', `src_v_0') +define(`u_1', `src_u_1') +define(`v_1', `src_v_1') define(`w_0', `src_w_0') define(`w_1', `src_w_1') diff --git a/src/exa_wm_src_projective.g4b b/src/exa_wm_src_projective.g4b index 68bfc920..2d203955 100644 --- a/src/exa_wm_src_projective.g4b +++ b/src/exa_wm_src_projective.g4b @@ -1,16 +1,16 @@ - { 0x00802041, 0x218077bd, 0x008d0100, 0x00000080 }, - { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000084 }, - { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 }, - { 0x00802040, 0x218077bd, 0x008d0180, 0x0000008c }, - { 0x00600031, 0x22001fbd, 0x008d0180, 0x01110001 }, - { 0x00600031, 0x22201fbd, 0x008d01a0, 0x01110001 }, - { 0x00802041, 0x218077bd, 0x008d0100, 0x00000060 }, - { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000064 }, - { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 }, - { 0x00802040, 0x218077bd, 0x008d0180, 0x0000006c }, - { 0x00802041, 0x202077be, 0x008d0180, 0x008d0200 }, - { 0x00802041, 0x218077bd, 0x008d0100, 0x00000070 }, - { 0x00802041, 0x21c077bd, 0x008d0140, 0x00000074 }, - { 0x00802040, 0x218077bd, 0x008d0180, 0x008d01c0 }, - { 0x00802040, 0x218077bd, 0x008d0180, 0x0000007c }, - { 0x00802041, 0x206077be, 0x008d0180, 0x008d0200 }, + { 0x00802041, 0x244077bd, 0x008d0100, 0x00000080 }, + { 0x00802041, 0x240077bd, 0x008d0140, 0x00000084 }, + { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 }, + { 0x00802040, 0x244077bd, 0x008d0440, 0x0000008c }, + { 0x00600031, 0x21801fbd, 0x008d0440, 0x01110001 }, + { 0x00600031, 0x21a01fbd, 0x008d0460, 0x01110001 }, + { 0x00802041, 0x244077bd, 0x008d0100, 0x00000060 }, + { 0x00802041, 0x240077bd, 0x008d0140, 0x00000064 }, + { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 }, + { 0x00802040, 0x244077bd, 0x008d0440, 0x0000006c }, + { 0x00802041, 0x204077be, 0x008d0440, 0x008d0180 }, + { 0x00802041, 0x244077bd, 0x008d0100, 0x00000070 }, + { 0x00802041, 0x240077bd, 0x008d0140, 0x00000074 }, + { 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 }, + { 0x00802040, 0x244077bd, 0x008d0440, 0x0000007c }, + { 0x00802041, 0x208077be, 0x008d0440, 0x008d0180 }, diff --git a/src/exa_wm_src_sample.g4b b/src/exa_wm_src_sample.g4b deleted file mode 100644 index 5ca33f5a..00000000 --- a/src/exa_wm_src_sample.g4b +++ /dev/null @@ -1 +0,0 @@ - { 0x00800031, 0x22401d29, 0x008d0000, 0x02580001 }, diff --git a/src/exa_wm_src_sample_a.g4a b/src/exa_wm_src_sample_a.g4a new file mode 100644 index 00000000..803c358a --- /dev/null +++ b/src/exa_wm_src_sample_a.g4a @@ -0,0 +1,47 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* Sample the src surface */ + +include(`exa_wm.g4i') + +/* prepare sampler read back gX register, which would be written back to output */ + +/* use simd16 sampler, param 0 is u, param 1 is v. */ +/* 'payload' loading, assuming tex coord start from g4 */ + +/* load alpha */ +mov (1) g0.8<1>UD 0x00007000UD { align1 mask_disable }; + +/* src_msg will be copied with g0, as it contains send desc */ +/* emit sampler 'send' cmd */ +send (16) src_msg_ind /* msg reg index */ + src_sample6<1>UW /* readback */ + g0<8,8,1>UW /* copy to msg start reg*/ + sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ diff --git a/src/exa_wm_src_sample_a.g4b b/src/exa_wm_src_sample_a.g4b new file mode 100644 index 00000000..85057575 --- /dev/null +++ b/src/exa_wm_src_sample_a.g4b @@ -0,0 +1,2 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00007000 }, + { 0x01800031, 0x22c01d29, 0x008d0000, 0x02520001 }, diff --git a/src/exa_wm_src_sample.g4a b/src/exa_wm_src_sample_argb.g4a index 04cd3e3d..4fcf276c 100644 --- a/src/exa_wm_src_sample.g4a +++ b/src/exa_wm_src_sample_argb.g4a @@ -34,16 +34,14 @@ include(`exa_wm.g4i') /* use simd16 sampler, param 0 is u, param 1 is v. */ /* 'payload' loading, assuming tex coord start from g4 */ -/* m0 will be copied with g0, as it contains send desc */ +/* load argb */ +mov (1) g0.8<1>UD 0x00000000UD { align1 mask_disable }; + +/* src_msg will be copied with g0, as it contains send desc */ /* emit sampler 'send' cmd */ -send (16) 0 /* msg reg index */ +send (16) src_msg_ind /* msg reg index */ src_sample0<1>UW /* readback */ g0<8,8,1>UW /* copy to msg start reg*/ sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) /* here(src->dst) we should use src_sampler and src_surface */ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ - -// mov (8) src_sample7<1>UD src_sample7<8,8,1>UD { align1 }; /* wait sampler return */ - -/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */ - diff --git a/src/exa_wm_src_sample_argb.g4b b/src/exa_wm_src_sample_argb.g4b new file mode 100644 index 00000000..1d4a7304 --- /dev/null +++ b/src/exa_wm_src_sample_argb.g4b @@ -0,0 +1,2 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x01800031, 0x22001d29, 0x008d0000, 0x02580001 }, diff --git a/src/exa_wm_write.g4a b/src/exa_wm_write.g4a index 9a821d72..5d3e6b1e 100644 --- a/src/exa_wm_write.g4a +++ b/src/exa_wm_write.g4a @@ -31,9 +31,6 @@ include(`exa_wm.g4i') -/* m0, m1 are all direct passed by PS thread payload */ -mov (8) m1<1>F g1<8,8,1>F { align1 }; - /* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */ /* src_sample0 -> m2 src_sample1 -> m6 @@ -55,7 +52,7 @@ mov (8) m8<1>F src_sample5<8,8,1>F { align1 }; mov (8) m9<1>F src_sample7<8,8,1>F { align1 }; /* m0, m1 are all direct passed by PS thread payload */ -mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable }; +mov (8) m1<1>UD g1<8,8,1>UD { align1 }; /* write */ send (16) 0 acc0<1>UW g0<8,8,1>UW write ( @@ -76,5 +73,4 @@ nop; nop; nop; nop; -nop; diff --git a/src/exa_wm_write.g4b b/src/exa_wm_write.g4b index dd266a3e..b7421c21 100644 --- a/src/exa_wm_write.g4b +++ b/src/exa_wm_write.g4b @@ -1,13 +1,12 @@ - { 0x00600001, 0x202003be, 0x008d0020, 0x00000000 }, - { 0x00600001, 0x204003be, 0x008d0240, 0x00000000 }, - { 0x00600001, 0x206003be, 0x008d0280, 0x00000000 }, - { 0x00600001, 0x208003be, 0x008d02c0, 0x00000000 }, - { 0x00600001, 0x20a003be, 0x008d0300, 0x00000000 }, - { 0x00600001, 0x20c003be, 0x008d0260, 0x00000000 }, - { 0x00600001, 0x20e003be, 0x008d02a0, 0x00000000 }, - { 0x00600001, 0x210003be, 0x008d02e0, 0x00000000 }, - { 0x00600001, 0x212003be, 0x008d0320, 0x00000000 }, - { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x204003be, 0x008d0200, 0x00000000 }, + { 0x00600001, 0x206003be, 0x008d0240, 0x00000000 }, + { 0x00600001, 0x208003be, 0x008d0280, 0x00000000 }, + { 0x00600001, 0x20a003be, 0x008d02c0, 0x00000000 }, + { 0x00600001, 0x20c003be, 0x008d0220, 0x00000000 }, + { 0x00600001, 0x20e003be, 0x008d0260, 0x00000000 }, + { 0x00600001, 0x210003be, 0x008d02a0, 0x00000000 }, + { 0x00600001, 0x212003be, 0x008d02e0, 0x00000000 }, + { 0x00600001, 0x20200022, 0x008d0020, 0x00000000 }, { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, @@ -17,4 +16,3 @@ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, - { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/exa_wm_xy.g4b b/src/exa_wm_xy.g4b index 7784a3d1..c5620cdd 100644 --- a/src/exa_wm_xy.g4b +++ b/src/exa_wm_xy.g4b @@ -1,4 +1,4 @@ - { 0x00800040, 0x21806d29, 0x00480028, 0x10101010 }, - { 0x00800040, 0x21c06d29, 0x0048002a, 0x11001100 }, - { 0x00802040, 0x2100753d, 0x008d0180, 0x00004020 }, - { 0x00802040, 0x2140753d, 0x008d01c0, 0x00004024 }, + { 0x00800040, 0x24406d29, 0x00480028, 0x10101010 }, + { 0x00800040, 0x24006d29, 0x0048002a, 0x11001100 }, + { 0x00802040, 0x2100753d, 0x008d0440, 0x00004020 }, + { 0x00802040, 0x2140753d, 0x008d0400, 0x00004024 }, diff --git a/src/i810_reg.h b/src/i810_reg.h index d799e77f..834b948c 100644 --- a/src/i810_reg.h +++ b/src/i810_reg.h @@ -2322,6 +2322,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define MI_FLUSH (0x04<<23) #define MI_WRITE_DIRTY_STATE (1<<4) #define MI_END_SCENE (1<<3) +#define MI_GLOBAL_SNAPSHOT_COUNT_RESET (1<<3) #define MI_INHIBIT_RENDER_CACHE_FLUSH (1<<2) #define MI_STATE_INSTRUCTION_CACHE_FLUSH (1<<1) #define MI_INVALIDATE_MAP_CACHE (1<<0) diff --git a/src/i965_render.c b/src/i965_render.c index e348c2b0..c2260eba 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -285,7 +285,7 @@ static int next_offset, total_state_size; static char *state_base; static int state_base_offset; static float *vb; -static int vb_size = (6 * 4) * 4 ; /* 6 DWORDS per vertex - and mask*/ +static int vb_size = (2 + 3 + 3) * 3 * 4; /* (dst, src, mask) 3 vertices, 4 bytes */ static uint32_t src_blend, dst_blend; @@ -318,7 +318,7 @@ static const uint32_t sip_kernel_static[][4] = { */ #define SF_KERNEL_NUM_GRF 16 -#define SF_MAX_THREADS 2 +#define SF_MAX_THREADS 1 static const uint32_t sf_kernel_static[][4] = { #include "exa_sf.g4b" @@ -329,29 +329,31 @@ static const uint32_t sf_kernel_static_mask[][4] = { }; /* ps kernels */ -#define PS_KERNEL_NUM_GRF 32 -#define PS_MAX_THREADS 32 +#define PS_KERNEL_NUM_GRF 48 +#define PS_MAX_THREADS 32 +#define PS_SCRATCH_SPACE 2048 +#define PS_SCRATCH_SPACE_LOG 1 /* log2 (PS_SCRATCH_SPACE) - 10 (1024 is 0, 2048 is 1) */ static const uint32_t ps_kernel_static_nomask_affine [][4] = { #include "exa_wm_xy.g4b" #include "exa_wm_src_affine.g4b" -#include "exa_wm_src_sample.g4b" +#include "exa_wm_src_sample_argb.g4b" #include "exa_wm_write.g4b" }; static const uint32_t ps_kernel_static_nomask_projective [][4] = { #include "exa_wm_xy.g4b" #include "exa_wm_src_projective.g4b" -#include "exa_wm_src_sample.g4b" +#include "exa_wm_src_sample_argb.g4b" #include "exa_wm_write.g4b" }; static const uint32_t ps_kernel_static_maskca [][4] = { #include "exa_wm_xy.g4b" #include "exa_wm_src_affine.g4b" -#include "exa_wm_src_sample.g4b" +#include "exa_wm_src_sample_argb.g4b" #include "exa_wm_mask_affine.g4b" -#include "exa_wm_mask_sample.g4b" +#include "exa_wm_mask_sample_argb.g4b" #include "exa_wm_ca.g4b" #include "exa_wm_write.g4b" }; @@ -359,9 +361,9 @@ static const uint32_t ps_kernel_static_maskca [][4] = { static const uint32_t ps_kernel_static_maskca_srcalpha [][4] = { #include "exa_wm_xy.g4b" #include "exa_wm_src_affine.g4b" -#include "exa_wm_src_sample.g4b" +#include "exa_wm_src_sample_a.g4b" #include "exa_wm_mask_affine.g4b" -#include "exa_wm_mask_sample.g4b" +#include "exa_wm_mask_sample_argb.g4b" #include "exa_wm_ca_srcalpha.g4b" #include "exa_wm_write.g4b" }; @@ -369,9 +371,9 @@ static const uint32_t ps_kernel_static_maskca_srcalpha [][4] = { static const uint32_t ps_kernel_static_masknoca [][4] = { #include "exa_wm_xy.g4b" #include "exa_wm_src_affine.g4b" -#include "exa_wm_src_sample.g4b" +#include "exa_wm_src_sample_argb.g4b" #include "exa_wm_mask_affine.g4b" -#include "exa_wm_mask_sample.g4b" +#include "exa_wm_mask_sample_a.g4b" #include "exa_wm_noca.g4b" #include "exa_wm_write.g4b" }; @@ -432,21 +434,21 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, pI830->transform[0] = pSrcPicture->transform; is_affine_src = i830_transform_is_affine (pI830->transform[0]); - is_affine_mask = i830_transform_is_affine (pI830->transform[1]); - is_affine = is_affine_src && is_affine_mask; if (!pMask) { pI830->transform[1] = NULL; pI830->scale_units[1][0] = -1; pI830->scale_units[1][1] = -1; + is_affine_mask = TRUE; } else { pI830->transform[1] = pMaskPicture->transform; - if (pI830->transform[1]) - I830FALLBACK("i965 mask transform not implemented!\n"); pI830->scale_units[1][0] = pMask->drawable.width; pI830->scale_units[1][1] = pMask->drawable.height; + is_affine_mask = i830_transform_is_affine (pI830->transform[1]); } + is_affine = is_affine_src && is_affine_mask; + /* setup 3d pipeline state */ binding_table_entries = 2; /* default no mask */ @@ -463,7 +465,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, next_offset = wm_offset + sizeof(*wm_state); wm_scratch_offset = ALIGN(next_offset, 1024); - next_offset = wm_scratch_offset + 1024 * PS_MAX_THREADS; + next_offset = wm_scratch_offset + PS_SCRATCH_SPACE * PS_MAX_THREADS; cc_offset = ALIGN(next_offset, 32); next_offset = cc_offset + sizeof(*cc_state); @@ -782,6 +784,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, I830FALLBACK("Bad filter 0x%x\n", pMaskPicture->filter); } + mask_sampler_state->ss0.default_color_mode = 0; /* GL mode */ if (!pMaskPicture->repeat) { mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; @@ -885,7 +888,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, wm_state->thread0.kernel_start_pointer = (state_base_offset + ps_kernel_offset) >> 6; wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); - wm_state->thread1.single_program_flow = 0; + wm_state->thread1.single_program_flow = 1; if (!pMask) wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ else @@ -893,7 +896,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, wm_state->thread2.scratch_space_base_pointer = (state_base_offset + wm_scratch_offset)>>10; - wm_state->thread2.per_thread_scratch_space = 0; + wm_state->thread2.per_thread_scratch_space = PS_SCRATCH_SPACE_LOG; wm_state->thread3.const_urb_entry_read_length = 0; wm_state->thread3.const_urb_entry_read_offset = 0; /* Each pair of attributes (src/mask coords) is one URB entry */ @@ -1044,12 +1047,12 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, if (is_affine) { src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; - w_component = BRW_VFCOMPONENT_NOSTORE; + w_component = BRW_VFCOMPONENT_STORE_1_FLT; } else { src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; - w_component = BRW_VFCOMPONENT_NOSTORE; + w_component = BRW_VFCOMPONENT_STORE_SRC; } BEGIN_BATCH(pMask?12:10); /* Set up the pointer to our (single) vertex buffer */ @@ -1083,7 +1086,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | (w_component << VE1_VFCOMPONENT_2_SHIFT) | - (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | ((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ /* u1, v1, w1 */ if (pMask) { @@ -1095,15 +1098,15 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | (w_component << VE1_VFCOMPONENT_2_SHIFT) | - (BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) | - ((4 + 2 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | + ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ } ADVANCE_BATCH(); } #ifdef I830DEBUG - ErrorF("try to sync to show any errors..."); + ErrorF("try to sync to show any errors...\n"); I830Sync(pScrn); #endif return TRUE; @@ -1119,7 +1122,6 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, Bool is_affine_src, is_affine_mask, is_affine; float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; int i; - int per_vertex = 2; /* dst x/y */ is_affine_src = i830_transform_is_affine (pI830->transform[0]); is_affine_mask = i830_transform_is_affine (pI830->transform[1]); @@ -1139,7 +1141,6 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, pI830->transform[0], &src_x[2], &src_y[2])) return; - per_vertex += 2; /* src u/v */ } else { @@ -1158,14 +1159,13 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, &src_x[2], &src_y[2], &src_w[2])) return; - per_vertex += 3; /* src u/v/w */ } if (pI830->scale_units[1][0] == -1 || pI830->scale_units[1][1] == -1) { has_mask = FALSE; } else { has_mask = TRUE; - if (is_affine_mask) { + if (is_affine) { if (!i830_get_transformed_coordinates(maskX, maskY, pI830->transform[1], &mask_x[0], &mask_y[0])) @@ -1178,7 +1178,6 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, pI830->transform[1], &mask_x[2], &mask_y[2])) return; - per_vertex += 2; /* mask u/v */ } else { if (!i830_get_transformed_coordinates_3d(maskX, maskY, pI830->transform[1], @@ -1195,10 +1194,17 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, &mask_x[2], &mask_y[2], &mask_w[2])) return; - per_vertex += 3; /* mask u/v/w */ } } + { + BEGIN_BATCH(2); + OUT_BATCH(MI_FLUSH | + MI_STATE_INSTRUCTION_CACHE_FLUSH | + BRW_MI_GLOBAL_SNAPSHOT_RESET); + OUT_BATCH(MI_NOOP); + ADVANCE_BATCH(); + } /* Wait for any existing composite rectangles to land before we overwrite * the VB with the next one. */ @@ -1246,6 +1252,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, if (!is_affine) vb[i++] = mask_w[0]; } + assert (i * 4 <= vb_size); { BEGIN_BATCH(6); @@ -1262,7 +1269,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, ADVANCE_BATCH(); } #ifdef I830DEBUG - ErrorF("sync after 3dprimitive"); + ErrorF("sync after 3dprimitive\n"); I830Sync(pScrn); #endif /* we must be sure that the pipeline is flushed before next exa draw, diff --git a/src/i965_video.c b/src/i965_video.c index 41f56a9d..1d2c3f54 100644 --- a/src/i965_video.c +++ b/src/i965_video.c @@ -78,7 +78,7 @@ static const uint32_t sip_kernel_static[][4] = { #define SF_MAX_THREADS 1 static const uint32_t sf_kernel_static[][4] = { -#include "sf_prog.h" +#include "packed_yuv_sf.g4b" }; /* @@ -94,7 +94,7 @@ static const uint32_t sf_kernel_static[][4] = { #define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) static const uint32_t ps_kernel_static[][4] = { -#include "wm_prog.h" +#include "packed_yuv_wm.g4b" }; #define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1)) |