diff options
author | Zhenyu Wang <zhenyuw@linux.intel.com> | 2009-06-25 14:05:40 +0800 |
---|---|---|
committer | Zhenyu Wang <zhenyuw@linux.intel.com> | 2009-06-30 11:12:12 +0800 |
commit | 488acc4595bb7f40130afcb8bcb05656ff3ae82c (patch) | |
tree | 39c65836fd73a03f84c5672b6c684f07abd06b69 /src/render_program | |
parent | 170cae0c8d58fc141de1d8a2f17a4328d39c1263 (diff) |
Move shader programs under its own subdirectory
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Diffstat (limited to 'src/render_program')
43 files changed, 1952 insertions, 0 deletions
diff --git a/src/render_program/Makefile.am b/src/render_program/Makefile.am new file mode 100644 index 00000000..820303fc --- /dev/null +++ b/src/render_program/Makefile.am @@ -0,0 +1,66 @@ +INTEL_G4A = \ + packed_yuv_sf.g4a \ + packed_yuv_wm.g4a \ + exa_sf.g4a \ + exa_sf_mask.g4a \ + exa_wm_src_affine.g4a \ + exa_wm_src_projective.g4a \ + exa_wm_src_sample_argb.g4a \ + exa_wm_src_sample_a.g4a \ + exa_wm_src_sample_planar.g4a \ + exa_wm_mask_affine.g4a \ + exa_wm_mask_projective.g4a \ + exa_wm_mask_sample_argb.g4a \ + exa_wm_mask_sample_a.g4a \ + exa_wm_noca.g4a \ + exa_wm_ca.g4a \ + exa_wm_ca_srcalpha.g4a \ + exa_wm_write.g4a \ + exa_wm_yuv_rgb.g4a \ + exa_wm_xy.g4a + +INTEL_G4I = \ + exa_wm.g4i \ + exa_wm_affine.g4i \ + exa_wm_projective.g4i + +INTEL_G4B = \ + packed_yuv_sf.g4b \ + packed_yuv_wm.g4b \ + exa_sf.g4b \ + exa_sf_mask.g4b \ + exa_wm_src_affine.g4b \ + exa_wm_src_projective.g4b \ + exa_wm_src_sample_argb.g4b \ + exa_wm_src_sample_a.g4b \ + exa_wm_src_sample_planar.g4b \ + exa_wm_mask_affine.g4b \ + exa_wm_mask_projective.g4b \ + exa_wm_mask_sample_argb.g4b \ + exa_wm_mask_sample_a.g4b \ + exa_wm_noca.g4b \ + exa_wm_ca.g4b \ + exa_wm_ca_srcalpha.g4b \ + exa_wm_write.g4b \ + exa_wm_yuv_rgb.g4b \ + exa_wm_xy.g4b + + +EXTRA_DIST = \ + $(INTEL_G4A) \ + $(INTEL_G4I) \ + $(INTEL_G4B) + +if HAVE_GEN4ASM + +SUFFIXES = .g4a .g4b +.g4a.g4b: + m4 -I$(srcdir) -s $< > $*.g4m && intel-gen4asm -o $@ $*.g4m && rm $*.g4m + +$(INTEL_G4B): $(INTEL_G4I) + +BUILT_SOURCES= $(INTEL_G4B) + +clean-local: + -rm -f $(INTEL_G4B) +endif diff --git a/src/render_program/exa_sf.g4a b/src/render_program/exa_sf.g4a new file mode 100644 index 00000000..3e660ac2 --- /dev/null +++ b/src/render_program/exa_sf.g4a @@ -0,0 +1,107 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Keith Packard <keithp@keithp.com> + * Eric Anholt <eric@anholt.net> + * + */ + +/* + * Inputs (note all sub-register addresses are bytes, not float indices) + * + * Note that the vertices will have been reordered: + * + * V0 is topmost (leftmost among topmost) (upper left) + * V1 is next clockwise (lower right) + * V2 is remaining (lower left) + * + * V0 ...................... XX + * | . + * | . + * | . + * V2------------------------V1 + * + * G0 thread state -- just pass along + * + * G1 and G2 are fixed by SF spec + * + * G1.0 reserved + * G1.4 Provoking vertex + * G1.8 Determinant + * G1.12 X1 - X0 + * G1.16 X2 - X0 + * G1.20 Y1 - Y0 + * G1.24 Y2 - Y0 + * G1.30 reserved + * + * G2.0 Z0 + * G2.4 1/W0 + * G2.8 Z1 + * G2.12 1/W1 + * G2.16 Z2 + * G2.20 1/W2 + * G2.24 reserved + * G2.30 reserved + * + * G3 is V0 Vertex Attribute Data from URB (upper left) + * + * G3.0 u0 + * G3.4 v0 + * + * G4 is V1 Vertex Attribute Data from URB (lower right) + * + * G4.0 u1 + * G4.4 v1 + * + * G5 is V2 Vertex Attribute Data from URB (lower left) + * + */ + +/* Compute inverses of the input deltas */ +send (4) 0 g6<1>F g1.12<4,4,1>F math inv mlen 1 rlen 1 { align1 }; + +/* texture location at V0 */ +mov (4) m3<1>F g3<4,4,1>F { align1 }; + +/* compute V1 - V2 (motion in X) for texture coordinates */ +add (4) g7<1>F g4<4,4,1>F -g5<4,4,1>F { align1 }; + +/* multiply by 1/dx */ +mul (4) m1<1>F g7<4,4,1>F g6.0<0,1,0>F { align1 }; + +/* Compute V2 - V0 (motion in Y) for texture coordinates */ +add (4) g7<1>F g5<4,4,1>F -g3<4,4,1>F { align1 }; + +/* multiply by 1/dy */ +mul (4) m2<1>F g7<4,4,1>F g6.8<0,1,0>F {align1 }; + +/* and we're done */ +send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT }; +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; diff --git a/src/render_program/exa_sf.g4b b/src/render_program/exa_sf.g4b new file mode 100644 index 00000000..223c9c9a --- /dev/null +++ b/src/render_program/exa_sf.g4b @@ -0,0 +1,15 @@ + { 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 }, + { 0x00400001, 0x206003be, 0x00690060, 0x00000000 }, + { 0x00400040, 0x20e077bd, 0x00690080, 0x006940a0 }, + { 0x00400041, 0x202077be, 0x006900e0, 0x000000c0 }, + { 0x00400040, 0x20e077bd, 0x006900a0, 0x00694060 }, + { 0x00400041, 0x204077be, 0x006900e0, 0x000000c8 }, + { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/render_program/exa_sf_mask.g4a b/src/render_program/exa_sf_mask.g4a new file mode 100644 index 00000000..5078d014 --- /dev/null +++ b/src/render_program/exa_sf_mask.g4a @@ -0,0 +1,107 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Keith Packard <keithp@keithp.com> + * Eric Anholt <eric@anholt.net> + * Wang Zhenyu <zhenyu.z.wang@intel.com> + */ + +/* + * Inputs (note all sub-register addresses are bytes, not float indices) + * + * Note that the vertices will have been reordered: + * + * V0 is topmost (leftmost among topmost) (upper left) + * V1 is next clockwise (lower right) + * V2 is remaining (lower left) + * + * V0 ...................... XX + * | . + * | . + * | . + * V2------------------------V1 + * + * G0 thread state -- just pass along + * + * G1 and G2 are fixed by SF spec + * + * G1.0 reserved + * G1.4 Provoking vertex + * G1.8 Determinant + * G1.12 X1 - X0 + * G1.16 X2 - X0 + * G1.20 Y1 - Y0 + * G1.24 Y2 - Y0 + * G1.30 reserved + * + * G2.0 Z0 + * G2.4 1/W0 + * G2.8 Z1 + * G2.12 1/W1 + * G2.16 Z2 + * G2.20 1/W2 + * G2.24 reserved + * G2.30 reserved + * + * G3 is V0 Vertex Attribute Data from URB (upper left) + * + * G3.0 u0 + * G3.4 v0 + * + * G4 is V1 Vertex Attribute Data from URB (lower right) + * + * G4.0 u1 + * G4.4 v1 + * + * G5 is V2 Vertex Attribute Data from URB (lower left) + * + */ + +/* Compute inverses of the input deltas */ +send (4) 0 g6<1>F g1.12<4,4,1>F math inv mlen 1 rlen 1 { align1 }; + +/* texture location at V0 */ +mov (8) m3<1>F g3<8,8,1>F { align1 }; + +/* compute V1 - V2 (motion in X) for texture coordinates */ +add (8) g7<1>F g4<8,8,1>F -g5<8,8,1>F { align1 }; + +/* multiply by 1/dx */ +mul (8) m1<1>F g7<8,8,1>F g6.0<0,1,0>F { align1 }; + +/* Compute V2 - V0 (motion in Y) for texture coordinates */ +add (8) g7<1>F g5<8,8,1>F -g3<8,8,1>F { align1 }; + +/* multiply by 1/dy */ +mul (8) m2<1>F g7<8,8,1>F g6.8<0,1,0>F {align1 }; + +/* and we're done */ +send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT }; +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; diff --git a/src/render_program/exa_sf_mask.g4b b/src/render_program/exa_sf_mask.g4b new file mode 100644 index 00000000..be0a77b0 --- /dev/null +++ b/src/render_program/exa_sf_mask.g4b @@ -0,0 +1,15 @@ + { 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 }, + { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 }, + { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d40a0 }, + { 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 }, + { 0x00600040, 0x20e077bd, 0x008d00a0, 0x008d4060 }, + { 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 }, + { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/render_program/exa_wm.g4i b/src/render_program/exa_wm.g4i new file mode 100644 index 00000000..5d3d45b1 --- /dev/null +++ b/src/render_program/exa_wm.g4i @@ -0,0 +1,156 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* + * Input parameters + */ + +/* Destination X/Y */ +define(`dst_x_uw', `g1.8<2,4,0>UW') +define(`dst_y_uw', `g1.10<2,4,0>UW') +define(`screen_x0', `g1.0<0,1,0>F') +define(`screen_y0', `g1.4<0,1,0>F') + +/* Source transformation parameters */ +define(`src_du_dx', `g3.0<0,1,0>F') +define(`src_du_dy', `g3.4<0,1,0>F') +define(`src_uo', `g3.12<0,1,0>F') +define(`src_dv_dx', `g3.16<0,1,0>F') +define(`src_dv_dy', `g3.20<0,1,0>F') +define(`src_vo', `g3.28<0,1,0>F') +define(`src_dw_dx', `g4.0<0,1,0>F') +define(`src_dw_dy', `g4.4<0,1,0>F') +define(`src_wo', `g4.12<0,1,0>F') + +define(`mask_du_dx', `g5.0<0,1,0>F') +define(`mask_du_dy', `g5.4<0,1,0>F') +define(`mask_uo', `g5.12<0,1,0>F') +define(`mask_dv_dx', `g5.16<0,1,0>F') +define(`mask_dv_dy', `g5.20<0,1,0>F') +define(`mask_vo', `g5.28<0,1,0>F') +define(`mask_dw_dx', `g6.0<0,1,0>F') +define(`mask_dw_dy', `g6.4<0,1,0>F') +define(`mask_wo', `g6.12<0,1,0>F') + +/* + * Local variables. Pairs must be aligned on even reg boundry + */ + +/* this holds the X dest coordinates */ +define(`dst_x', `g8') +define(`dst_x_0', `dst_x') +define(`dst_x_1', `g9') + +/* this holds the Y dest coordinates */ +define(`dst_y', `g10') +define(`dst_y_0', `dst_y') +define(`dst_y_1', `g11') + +/* When computing x * dn/dx, use this */ +define(`temp_x', `g30') +define(`temp_x_0', `temp_x') +define(`temp_x_1', `g31') + +/* When computing y * dn/dy, use this */ +define(`temp_y', `g28') +define(`temp_y_0', temp_y) +define(`temp_y_1', `g29') + +/* when loading x/y, use these to hold them in UW format */ +define(`temp_x_uw', temp_x) +define(`temp_y_uw', temp_y) + +/* compute source and mask u/v to this pair to send to sampler */ +define(`src_msg', `m1') +define(`src_msg_ind',`1') +define(`src_u', `m2') +define(`src_v', `m4') +define(`src_w', `g12') +define(`src_w_0', `src_w') +define(`src_w_1', `g13') + +define(`mask_msg', `m7') +define(`mask_msg_ind',`7') +define(`mask_u', `m8') +define(`mask_v', `m10') +define(`mask_w', `src_w') +define(`mask_w_0', `src_w_0') +define(`mask_w_1', `src_w_1') + +/* sample src to these registers */ +define(`src_sample_base', `g14') + +define(`src_sample_r', `g14') +define(`src_sample_r_01', `g14') +define(`src_sample_r_23', `g15') + +define(`src_sample_g', `g16') +define(`src_sample_g_01', `g16') +define(`src_sample_g_23', `g17') + +define(`src_sample_b', `g18') +define(`src_sample_b_01', `g18') +define(`src_sample_b_23', `g19') + +define(`src_sample_a', `g20') +define(`src_sample_a_01', `g20') +define(`src_sample_a_23', `g21') + +/* sample mask to these registers */ +define(`mask_sample_base', `g22') + +define(`mask_sample_r', `g22') +define(`mask_sample_r_01', `g22') +define(`mask_sample_r_23', `g23') + +define(`mask_sample_g', `g24') +define(`mask_sample_g_01', `g24') +define(`mask_sample_g_23', `g25') + +define(`mask_sample_b', `g26') +define(`mask_sample_b_01', `g26') +define(`mask_sample_b_23', `g27') + +define(`mask_sample_a', `g28') +define(`mask_sample_a_01', `g28') +define(`mask_sample_a_23', `g29') + +/* data port SIMD16 send registers */ + +define(`data_port_msg_0', `m0') +define(`data_port_msg_0_ind', `0') +define(`data_port_msg_1', `m1') +define(`data_port_r_01', `m2') +define(`data_port_g_01', `m3') +define(`data_port_b_01', `m4') +define(`data_port_a_01', `m5') + +define(`data_port_r_23', `m6') +define(`data_port_g_23', `m7') +define(`data_port_b_23', `m8') +define(`data_port_a_23', `m9') + diff --git a/src/render_program/exa_wm_affine.g4i b/src/render_program/exa_wm_affine.g4i new file mode 100644 index 00000000..e72656b6 --- /dev/null +++ b/src/render_program/exa_wm_affine.g4i @@ -0,0 +1,44 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* + * Fragment to compute src u/v values under an affine transform + */ + +/********** Compute u *************/ + +mul (16) temp_x<1>F dst_x<8,8,1>F du_dx { compr align1 }; +mul (16) temp_y<1>F dst_y<8,8,1>F du_dy { compr align1 }; +add (16) temp_x<1>F temp_x<8,8,1>F temp_y<8,8,1>F { compr align1 }; +add (16) u<1>F temp_x<8,8,1>F uo { compr align1 }; + +/********** Compute v *************/ + +mul (16) temp_x<1>F dst_x<8,8,1>F dv_dx { compr align1 }; +mul (16) temp_y<1>F dst_y<8,8,1>F dv_dy { compr align1 }; +add (16) temp_x<1>F temp_x<8,8,1>F temp_y<8,8,1>F { compr align1 }; +add (16) v<1>F temp_x<8,8,1>F vo { compr align1 }; diff --git a/src/render_program/exa_wm_ca.g4a b/src/render_program/exa_wm_ca.g4a new file mode 100644 index 00000000..5d982b38 --- /dev/null +++ b/src/render_program/exa_wm_ca.g4a @@ -0,0 +1,38 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* + * Composite src and mask together, no component alpha + */ + +include(`exa_wm.g4i') + +/* mul mask rgba channels to src */ +mul (16) src_sample_r_01<1>F src_sample_r_01<8,8,1>F mask_sample_r_01<8,8,1>F { compr align1 }; +mul (16) src_sample_g_01<1>F src_sample_g_01<8,8,1>F mask_sample_g_01<8,8,1>F { compr align1 }; +mul (16) src_sample_b_01<1>F src_sample_b_01<8,8,1>F mask_sample_b_01<8,8,1>F { compr align1 }; +mul (16) src_sample_a_01<1>F src_sample_a_01<8,8,1>F mask_sample_a_01<8,8,1>F { compr align1 }; diff --git a/src/render_program/exa_wm_ca.g4b b/src/render_program/exa_wm_ca.g4b new file mode 100644 index 00000000..372e8b26 --- /dev/null +++ b/src/render_program/exa_wm_ca.g4b @@ -0,0 +1,4 @@ + { 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d02c0 }, + { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 }, + { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 }, + { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 }, diff --git a/src/render_program/exa_wm_ca_srcalpha.g4a b/src/render_program/exa_wm_ca_srcalpha.g4a new file mode 100644 index 00000000..d1f847fd --- /dev/null +++ b/src/render_program/exa_wm_ca_srcalpha.g4a @@ -0,0 +1,37 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* + * Composite src and mask together, no component alpha + */ + +include(`exa_wm.g4i') + +mul (16) src_sample_r_01<1>F mask_sample_r_01<8,8,1>F src_sample_a_01<8,8,1>F { compr align1 }; +mul (16) src_sample_g_01<1>F mask_sample_g_01<8,8,1>F src_sample_a_01<8,8,1>F { compr align1 }; +mul (16) src_sample_b_01<1>F mask_sample_b_01<8,8,1>F src_sample_a_01<8,8,1>F { compr align1 }; +mul (16) src_sample_a_01<1>F mask_sample_a_01<8,8,1>F src_sample_a_01<8,8,1>F { compr align1 }; diff --git a/src/render_program/exa_wm_ca_srcalpha.g4b b/src/render_program/exa_wm_ca_srcalpha.g4b new file mode 100644 index 00000000..963d6760 --- /dev/null +++ b/src/render_program/exa_wm_ca_srcalpha.g4b @@ -0,0 +1,4 @@ + { 0x00802041, 0x21c077bd, 0x008d02c0, 0x008d0280 }, + { 0x00802041, 0x220077bd, 0x008d0300, 0x008d0280 }, + { 0x00802041, 0x224077bd, 0x008d0340, 0x008d0280 }, + { 0x00802041, 0x228077bd, 0x008d0380, 0x008d0280 }, diff --git a/src/render_program/exa_wm_mask_affine.g4a b/src/render_program/exa_wm_mask_affine.g4a new file mode 100644 index 00000000..9c52d2f9 --- /dev/null +++ b/src/render_program/exa_wm_mask_affine.g4a @@ -0,0 +1,41 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +include(`exa_wm.g4i') + +define(`du_dx', `mask_du_dx') +define(`du_dy', `mask_du_dy') +define(`uo', `mask_uo') + +define(`dv_dx', `mask_dv_dx') +define(`dv_dy', `mask_dv_dy') +define(`vo', `mask_vo') + +define(`u', `mask_u') +define(`v', `mask_v') + +include(`exa_wm_affine.g4i') diff --git a/src/render_program/exa_wm_mask_affine.g4b b/src/render_program/exa_wm_mask_affine.g4b new file mode 100644 index 00000000..14a54517 --- /dev/null +++ b/src/render_program/exa_wm_mask_affine.g4b @@ -0,0 +1,8 @@ + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x210077be, 0x008d03c0, 0x000000ac }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x214077be, 0x008d03c0, 0x000000bc }, diff --git a/src/render_program/exa_wm_mask_projective.g4a b/src/render_program/exa_wm_mask_projective.g4a new file mode 100644 index 00000000..9acaaced --- /dev/null +++ b/src/render_program/exa_wm_mask_projective.g4a @@ -0,0 +1,53 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +include(`exa_wm.g4i') + +define(`du_dx', `mask_du_dx') +define(`du_dy', `mask_du_dy') +define(`uo', `mask_uo') + +define(`dv_dx', `mask_dv_dx') +define(`dv_dy', `mask_dv_dy') +define(`vo', `mask_vo') + +define(`dw_dx', `mask_dw_dx') +define(`dw_dy', `mask_dw_dy') +define(`wo', `mask_wo') + +define(`u', `mask_u') +define(`v', `mask_v') +define(`w', `mask_w') + +define(`u_0', `mask_u_0') +define(`v_0', `mask_v_0') +define(`u_1', `mask_u_1') +define(`v_1', `mask_v_1') +define(`w_0', `mask_w_0') +define(`w_1', `mask_w_1') + +include(`exa_wm_projective.g4i') diff --git a/src/render_program/exa_wm_mask_projective.g4b b/src/render_program/exa_wm_mask_projective.g4b new file mode 100644 index 00000000..78cb9aef --- /dev/null +++ b/src/render_program/exa_wm_mask_projective.g4b @@ -0,0 +1,16 @@ + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000c0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000c4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000cc }, + { 0x00600031, 0x21801fbd, 0x008d03c0, 0x01110001 }, + { 0x00600031, 0x21a01fbd, 0x008d03e0, 0x01110001 }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000ac }, + { 0x00802041, 0x210077be, 0x008d03c0, 0x008d0180 }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000bc }, + { 0x00802041, 0x214077be, 0x008d03c0, 0x008d0180 }, diff --git a/src/render_program/exa_wm_mask_sample_a.g4a b/src/render_program/exa_wm_mask_sample_a.g4a new file mode 100644 index 00000000..bbb19d7a --- /dev/null +++ b/src/render_program/exa_wm_mask_sample_a.g4a @@ -0,0 +1,48 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* Sample the mask surface */ + +include(`exa_wm.g4i') + +/* prepare sampler read back gX register, which would be written back to output */ + +/* use simd16 sampler, param 0 is u, param 1 is v. */ +/* 'payload' loading, assuming tex coord start from g4 */ + +/* load only alpha */ +mov (1) g0.8<1>UD 0x00007000UD { align1 mask_disable }; + +/* mask_msg will be copied with g0, as it contains send desc */ +/* emit sampler 'send' cmd */ +send (16) mask_msg_ind /* msg reg index */ + mask_sample_a_01<1>UW /* readback */ + g0<8,8,1>UW /* copy to msg start reg*/ + sampler (2,1,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ + diff --git a/src/render_program/exa_wm_mask_sample_a.g4b b/src/render_program/exa_wm_mask_sample_a.g4b new file mode 100644 index 00000000..018bd36a --- /dev/null +++ b/src/render_program/exa_wm_mask_sample_a.g4b @@ -0,0 +1,2 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00007000 }, + { 0x07800031, 0x23801d29, 0x008d0000, 0x02520102 }, diff --git a/src/render_program/exa_wm_mask_sample_argb.g4a b/src/render_program/exa_wm_mask_sample_argb.g4a new file mode 100644 index 00000000..def4cfe4 --- /dev/null +++ b/src/render_program/exa_wm_mask_sample_argb.g4a @@ -0,0 +1,48 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* Sample the mask surface */ + +include(`exa_wm.g4i') + +/* prepare sampler read back gX register, which would be written back to output */ + +/* use simd16 sampler, param 0 is u, param 1 is v. */ +/* 'payload' loading, assuming tex coord start from g4 */ + +/* load argb */ +mov (1) g0.8<1>UD 0x00000000UD { align1 mask_disable }; + +/* mask_msg will be copied with g0, as it contains send desc */ +/* emit sampler 'send' cmd */ +send (16) mask_msg_ind /* msg reg index */ + mask_sample_base<1>UW /* readback */ + g0<8,8,1>UW /* copy to msg start reg*/ + sampler (2,1,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ + diff --git a/src/render_program/exa_wm_mask_sample_argb.g4b b/src/render_program/exa_wm_mask_sample_argb.g4b new file mode 100644 index 00000000..b159cbaa --- /dev/null +++ b/src/render_program/exa_wm_mask_sample_argb.g4b @@ -0,0 +1,2 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x07800031, 0x22c01d29, 0x008d0000, 0x02580102 }, diff --git a/src/render_program/exa_wm_noca.g4a b/src/render_program/exa_wm_noca.g4a new file mode 100644 index 00000000..d0d60faa --- /dev/null +++ b/src/render_program/exa_wm_noca.g4a @@ -0,0 +1,38 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* + * Composite src and mask together, no component alpha + */ + +include(`exa_wm.g4i') +/* mul mask's alpha channel to src */ + +mul (16) src_sample_r_01<1>F src_sample_r_01<8,8,1>F mask_sample_a_01<8,8,1>F { compr align1 }; +mul (16) src_sample_g_01<1>F src_sample_g_01<8,8,1>F mask_sample_a_01<8,8,1>F { compr align1 }; +mul (16) src_sample_b_01<1>F src_sample_b_01<8,8,1>F mask_sample_a_01<8,8,1>F { compr align1 }; +mul (16) src_sample_a_01<1>F src_sample_a_01<8,8,1>F mask_sample_a_01<8,8,1>F { compr align1 }; diff --git a/src/render_program/exa_wm_noca.g4b b/src/render_program/exa_wm_noca.g4b new file mode 100644 index 00000000..15063341 --- /dev/null +++ b/src/render_program/exa_wm_noca.g4b @@ -0,0 +1,4 @@ + { 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d0380 }, + { 0x00802041, 0x220077bd, 0x008d0200, 0x008d0380 }, + { 0x00802041, 0x224077bd, 0x008d0240, 0x008d0380 }, + { 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 }, diff --git a/src/render_program/exa_wm_nomask.g4a b/src/render_program/exa_wm_nomask.g4a new file mode 100644 index 00000000..eb535fe3 --- /dev/null +++ b/src/render_program/exa_wm_nomask.g4a @@ -0,0 +1,143 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + */ + +/* + * This's for exa composite operation in no mask picture case. + * The simplest case is just sending what src picture has to dst picture. + */ + +/* I think this should be same as in g4a program for texture video, + as we also use 16-pixel dispatch. and SF scale in g3 is useful for us. */ + +/* The initial payload of the thread is always g0. + * WM_URB (incoming URB entries) is g3 + * X0_R is g4 + * X1_R is g5 + * Y0_R is g6 + * Y1_R is g7 + */ + + +/* Load X and Y coordinates and compute per-pixel coordinates */ +add (16) g4<1>UW g1.8<2,4,0>UW 0x10101010V { align1 }; +add (16) g6<1>UW g1.10<2,4,0>UW 0x11001100V { align1 }; + + /* Now, map these screen space coordinates into texture coordinates. */ + + /* subtract screen-space X origin of vertex 0. */ +add (16) g12<1>F g4<8,8,1>UW -g1.0<0,1,0>F { compr align1 }; + + /* subtract screen-space Y origin of vertex 0. */ +add (16) g16<1>F g6<8,8,1>UW -g1.4<0,1,0>F { compr align1 }; + + /* g8/g9 = X * du/dx */ +mul (16) g8<1>F g12<8,8,1>F g3.0<0,1,0>F { compr align1 }; + + /* g10/g11 = Y * du/dy */ +mul (16) g10<1>F g16<8,8,1>F g3.4<0,1,0>F { compr align1 }; + + /* g8/g9 = X du/dx + Y du/dy */ +add (16) g8<1>F g8<8,8,1>F g10<8,8,1>F { compr align1 }; + + /* m1/m2 = g8/g9 + uo */ +add (16) m1<1>F g8<8,8,1>F g3.12<0,1,0>F { compr align1 }; + + + /* g8/g9 = X * dv/dx */ +mul (16) g8<1>F g12<8,8,1>F g3.16<0,1,0>F { compr align1 }; + + /* g10/g11 = Y * du/dy */ +mul (16) g10<1>F g16<8,8,1>F g3.20<0,1,0>F { compr align1 }; + + /* g8/g9 = X du/dx + Y du/dy */ +add (16) g8<1>F g8<8,8,1>F g10<8,8,1>F { compr align1 }; + + /* m3/m4 = g8/g9 + vo */ +add (16) m3<1>F g8<8,8,1>F g3.28<0,1,0>F { compr align1 }; + + +/* prepare sampler read back gX register, which would be written back to output */ + +/* use simd16 sampler, param 0 is u, param 1 is v. */ +/* 'payload' loading, assuming tex coord start from g4 */ + +/* m0 will be copied with g0, as it contains send desc */ +/* emit sampler 'send' cmd */ +send (16) 0 /* msg reg index */ + g12<1>UW /* readback */ + g0<8,8,1>UW /* copy to msg start reg*/ + sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ + +mov (8) g19<1>UD g19<8,8,1>UD { align1 }; /* wait sampler return */ +/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */ + +/* m0, m1 are all direct passed by PS thread payload */ +mov (8) m1<1>F g1<8,8,1>F { align1 }; + +/* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */ +/* g12 -> m2 + g13 -> m6 + g14 -> m3 + g15 -> m7 + g16 -> m4 + g17 -> m8 + g18 -> m5 + g19 -> m9 +*/ +mov (8) m2<1>F g12<8,8,1>F { align1 }; +mov (8) m3<1>F g14<8,8,1>F { align1 }; +mov (8) m4<1>F g16<8,8,1>F { align1 }; +mov (8) m5<1>F g18<8,8,1>F { align1 }; +mov (8) m6<1>F g13<8,8,1>F { align1 }; +mov (8) m7<1>F g15<8,8,1>F { align1 }; +mov (8) m8<1>F g17<8,8,1>F { align1 }; +mov (8) m9<1>F g19<8,8,1>F { align1 }; + +/* m0, m1 are all direct passed by PS thread payload */ +mov (8) m1<1>UD g1<8,8,1>UD { align1 }; + +/* write */ +send (16) 0 acc0<1>UW g0<8,8,1>UW write ( + 0, /* binding_table */ + 8, /* pixel scordboard clear, msg type simd16 single source */ + 4, /* render target write */ + 0 /* no write commit message */ + ) + mlen 10 + rlen 0 + { align1 EOT }; + +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; diff --git a/src/render_program/exa_wm_projective.g4i b/src/render_program/exa_wm_projective.g4i new file mode 100644 index 00000000..7e2e0a82 --- /dev/null +++ b/src/render_program/exa_wm_projective.g4i @@ -0,0 +1,51 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/********** Compute w *************/ + +mul (16) temp_x<1>F dst_x<8,8,1>F dw_dx { compr align1 }; +mul (16) temp_y<1>F dst_y<8,8,1>F dw_dy { compr align1 }; +add (16) temp_x<1>F temp_x<8,8,1>F temp_y<8,8,1>F { compr align1 }; +add (16) temp_x<1>F temp_x<8,8,1>F wo { compr align1 }; +send (8) 0 w_0<1>F temp_x_0<8,8,1>F math inv mlen 1 rlen 1 { align1 }; +send (8) 0 w_1<1>F temp_x_1<8,8,1>F math inv mlen 1 rlen 1 { sechalf align1 }; + +/********** Compute u *************/ + +mul (16) temp_x<1>F dst_x<8,8,1>F du_dx { compr align1 }; +mul (16) temp_y<1>F dst_y<8,8,1>F du_dy { compr align1 }; +add (16) temp_x<1>F temp_x<8,8,1>F temp_y<8,8,1>F { compr align1 }; +add (16) temp_x<1>F temp_x<8,8,1>F uo { compr align1 }; +mul (16) u<1>F temp_x<8,8,1>F w<8,8,1>F { compr align1 }; + +/********** Compute v *************/ + +mul (16) temp_x<1>F dst_x<8,8,1>F dv_dx { compr align1 }; +mul (16) temp_y<1>F dst_y<8,8,1>F dv_dy { compr align1 }; +add (16) temp_x<1>F temp_x<8,8,1>F temp_y<8,8,1>F { compr align1 }; +add (16) temp_x<1>F temp_x<8,8,1>F vo { compr align1 }; +mul (16) v<1>F temp_x<8,8,1>F w<8,8,1>F { compr align1 }; diff --git a/src/render_program/exa_wm_src_affine.g4a b/src/render_program/exa_wm_src_affine.g4a new file mode 100644 index 00000000..3194b5a6 --- /dev/null +++ b/src/render_program/exa_wm_src_affine.g4a @@ -0,0 +1,45 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* + * Fragment to compute src u/v values under an affine transform + */ + +include(`exa_wm.g4i') + +define(`du_dx', `src_du_dx') +define(`du_dy', `src_du_dy') +define(`uo', `src_uo') + +define(`dv_dx', `src_dv_dx') +define(`dv_dy', `src_dv_dy') +define(`vo', `src_vo') + +define(`u', `src_u') +define(`v', `src_v') + +include(`exa_wm_affine.g4i') diff --git a/src/render_program/exa_wm_src_affine.g4b b/src/render_program/exa_wm_src_affine.g4b new file mode 100644 index 00000000..d30da873 --- /dev/null +++ b/src/render_program/exa_wm_src_affine.g4b @@ -0,0 +1,8 @@ + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x204077be, 0x008d03c0, 0x0000006c }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x208077be, 0x008d03c0, 0x0000007c }, diff --git a/src/render_program/exa_wm_src_projective.g4a b/src/render_program/exa_wm_src_projective.g4a new file mode 100644 index 00000000..16c9cd56 --- /dev/null +++ b/src/render_program/exa_wm_src_projective.g4a @@ -0,0 +1,49 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + + +include(`exa_wm.g4i') +define(`du_dx', `src_du_dx') +define(`du_dy', `src_du_dy') +define(`uo', `src_uo') +define(`dv_dx', `src_dv_dx') +define(`dv_dy', `src_dv_dy') +define(`vo', `src_vo') +define(`dw_dx', `src_dw_dx') +define(`dw_dy', `src_dw_dy') +define(`wo', `src_wo') +define(`u', `src_u') +define(`v', `src_v') +define(`w', `src_w') +define(`u_0', `src_u_0') +define(`v_0', `src_v_0') +define(`u_1', `src_u_1') +define(`v_1', `src_v_1') +define(`w_0', `src_w_0') +define(`w_1', `src_w_1') + +include(`exa_wm_projective.g4i') diff --git a/src/render_program/exa_wm_src_projective.g4b b/src/render_program/exa_wm_src_projective.g4b new file mode 100644 index 00000000..198bab3e --- /dev/null +++ b/src/render_program/exa_wm_src_projective.g4b @@ -0,0 +1,16 @@ + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000080 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000084 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000008c }, + { 0x00600031, 0x21801fbd, 0x008d03c0, 0x01110001 }, + { 0x00600031, 0x21a01fbd, 0x008d03e0, 0x01110001 }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000006c }, + { 0x00802041, 0x204077be, 0x008d03c0, 0x008d0180 }, + { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 }, + { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 }, + { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000007c }, + { 0x00802041, 0x208077be, 0x008d03c0, 0x008d0180 }, diff --git a/src/render_program/exa_wm_src_sample_a.g4a b/src/render_program/exa_wm_src_sample_a.g4a new file mode 100644 index 00000000..552aaeeb --- /dev/null +++ b/src/render_program/exa_wm_src_sample_a.g4a @@ -0,0 +1,47 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* Sample the src surface */ + +include(`exa_wm.g4i') + +/* prepare sampler read back gX register, which would be written back to output */ + +/* use simd16 sampler, param 0 is u, param 1 is v. */ +/* 'payload' loading, assuming tex coord start from g4 */ + +/* load alpha */ +mov (1) g0.8<1>UD 0x00007000UD { align1 mask_disable }; + +/* src_msg will be copied with g0, as it contains send desc */ +/* emit sampler 'send' cmd */ +send (16) src_msg_ind /* msg reg index */ + src_sample_a_01<1>UW /* readback */ + g0<8,8,1>UW /* copy to msg start reg*/ + sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ diff --git a/src/render_program/exa_wm_src_sample_a.g4b b/src/render_program/exa_wm_src_sample_a.g4b new file mode 100644 index 00000000..ce8650a0 --- /dev/null +++ b/src/render_program/exa_wm_src_sample_a.g4b @@ -0,0 +1,2 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00007000 }, + { 0x01800031, 0x22801d29, 0x008d0000, 0x02520001 }, diff --git a/src/render_program/exa_wm_src_sample_argb.g4a b/src/render_program/exa_wm_src_sample_argb.g4a new file mode 100644 index 00000000..c20f53f2 --- /dev/null +++ b/src/render_program/exa_wm_src_sample_argb.g4a @@ -0,0 +1,47 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* Sample the src surface */ + +include(`exa_wm.g4i') + +/* prepare sampler read back gX register, which would be written back to output */ + +/* use simd16 sampler, param 0 is u, param 1 is v. */ +/* 'payload' loading, assuming tex coord start from g4 */ + +/* load argb */ +mov (1) g0.8<1>UD 0x00000000UD { align1 mask_disable }; + +/* src_msg will be copied with g0, as it contains send desc */ +/* emit sampler 'send' cmd */ +send (16) src_msg_ind /* msg reg index */ + src_sample_base<1>UW /* readback */ + g0<8,8,1>UW /* copy to msg start reg*/ + sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ diff --git a/src/render_program/exa_wm_src_sample_argb.g4b b/src/render_program/exa_wm_src_sample_argb.g4b new file mode 100644 index 00000000..c5b92740 --- /dev/null +++ b/src/render_program/exa_wm_src_sample_argb.g4b @@ -0,0 +1,2 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x00000000 }, + { 0x01800031, 0x21c01d29, 0x008d0000, 0x02580001 }, diff --git a/src/render_program/exa_wm_src_sample_planar.g4a b/src/render_program/exa_wm_src_sample_planar.g4a new file mode 100644 index 00000000..ca77b484 --- /dev/null +++ b/src/render_program/exa_wm_src_sample_planar.g4a @@ -0,0 +1,65 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* Sample the src surface in planar format */ + +include(`exa_wm.g4i') + +/* prepare sampler read back gX register, which would be written back to output */ + +/* use simd16 sampler, param 0 is u, param 1 is v. */ +/* 'payload' loading, assuming tex coord start from g4 */ + +/* load r */ +mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable }; + +/* src_msg will be copied with g0, as it contains send desc */ +/* emit sampler 'send' cmd */ + +/* sample Y */ +send (16) src_msg_ind /* msg reg index */ + src_sample_g<1>UW /* readback */ + g0<8,8,1>UW /* copy to msg start reg*/ + sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ + +/* sample U (Cr) */ +send (16) src_msg_ind /* msg reg index */ + src_sample_r<1>UW /* readback */ + g0<8,8,1>UW /* copy to msg start reg*/ + sampler (3,2,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ + +/* sample V (Cb) */ +send (16) src_msg_ind /* msg reg index */ + src_sample_b<1>UW /* readback */ + g0<8,8,1>UW /* copy to msg start reg*/ + sampler (5,4,F) /* sampler message description, (binding_table,sampler_index,datatype) + /* here(src->dst) we should use src_sampler and src_surface */ + mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ diff --git a/src/render_program/exa_wm_src_sample_planar.g4b b/src/render_program/exa_wm_src_sample_planar.g4b new file mode 100644 index 00000000..77a5c234 --- /dev/null +++ b/src/render_program/exa_wm_src_sample_planar.g4b @@ -0,0 +1,4 @@ + { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 }, + { 0x01800031, 0x22001d29, 0x008d0000, 0x02520001 }, + { 0x01800031, 0x21c01d29, 0x008d0000, 0x02520203 }, + { 0x01800031, 0x22401d29, 0x008d0000, 0x02520405 }, diff --git a/src/render_program/exa_wm_write.g4a b/src/render_program/exa_wm_write.g4a new file mode 100644 index 00000000..faee80b3 --- /dev/null +++ b/src/render_program/exa_wm_write.g4a @@ -0,0 +1,74 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +include(`exa_wm.g4i') + +/* + * Prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), + * + * Note that the SIMD16 write message takes data for the first + * two sub-spans followed by the data for the second two sub-spans + * instead of having the two sub-spans interleaved by channel. Weird. + */ + +mov (8) data_port_r_01<1>F src_sample_r_01<8,8,1>F { align1 }; +mov (8) data_port_g_01<1>F src_sample_g_01<8,8,1>F { align1 }; +mov (8) data_port_b_01<1>F src_sample_b_01<8,8,1>F { align1 }; +mov (8) data_port_a_01<1>F src_sample_a_01<8,8,1>F { align1 }; + +mov (8) data_port_r_23<1>F src_sample_r_23<8,8,1>F { sechalf align1 }; +mov (8) data_port_g_23<1>F src_sample_g_23<8,8,1>F { sechalf align1 }; +mov (8) data_port_b_23<1>F src_sample_b_23<8,8,1>F { sechalf align1 }; +mov (8) data_port_a_23<1>F src_sample_a_23<8,8,1>F { sechalf align1 }; + +/* m0, m1 are all direct passed by PS thread payload */ +mov (8) data_port_msg_1<1>UD g1<8,8,1>UD { mask_disable align1 }; + +/* write */ +send (16) + data_port_msg_0_ind + acc0<1>UW + g0<8,8,1>UW + write ( + 0, /* binding_table */ + 8, /* pixel scordboard clear, msg type simd16 single source */ + 4, /* render target write */ + 0 /* no write commit message */ + ) + mlen 10 + rlen 0 + { align1 EOT }; + +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; + diff --git a/src/render_program/exa_wm_write.g4b b/src/render_program/exa_wm_write.g4b new file mode 100644 index 00000000..92e7b248 --- /dev/null +++ b/src/render_program/exa_wm_write.g4b @@ -0,0 +1,18 @@ + { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 }, + { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 }, + { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 }, + { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 }, + { 0x00601001, 0x20c003be, 0x008d01e0, 0x00000000 }, + { 0x00601001, 0x20e003be, 0x008d0220, 0x00000000 }, + { 0x00601001, 0x210003be, 0x008d0260, 0x00000000 }, + { 0x00601001, 0x212003be, 0x008d02a0, 0x00000000 }, + { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 }, + { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/render_program/exa_wm_xy.g4a b/src/render_program/exa_wm_xy.g4a new file mode 100644 index 00000000..e99f5ac1 --- /dev/null +++ b/src/render_program/exa_wm_xy.g4a @@ -0,0 +1,52 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Keith Packard <keithp@keithp.com> + */ + +/* + * Register assignments: + * + * x g6/g7 + * y g8/g9 + * + * temp x g10/g11 + * temp y g12/g13 + * + * src w g14/g15 + * src u m1/m2 + * src v m3/m4 + */ + +/* Fragment to compute per-pixel XY values */ + +include(`exa_wm.g4i') + + /* Load X and Y coordinates and compute per-pixel coordinates */ +add (16) temp_x_uw<1>UW dst_x_uw 0x10101010V { align1 }; +add (16) temp_y_uw<1>UW dst_y_uw 0x11001100V { align1 }; + + /* subtract screen-space origin of vertex 0 */ +add (16) dst_x<1>F temp_x_uw<8,8,1>UW -screen_x0 { compr align1 }; +add (16) dst_y<1>F temp_y_uw<8,8,1>UW -screen_y0 { compr align1 }; diff --git a/src/render_program/exa_wm_xy.g4b b/src/render_program/exa_wm_xy.g4b new file mode 100644 index 00000000..327fc29c --- /dev/null +++ b/src/render_program/exa_wm_xy.g4b @@ -0,0 +1,4 @@ + { 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 }, + { 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 }, + { 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 }, + { 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 }, diff --git a/src/render_program/exa_wm_yuv_rgb.g4a b/src/render_program/exa_wm_yuv_rgb.g4a new file mode 100644 index 00000000..4fb2576a --- /dev/null +++ b/src/render_program/exa_wm_yuv_rgb.g4a @@ -0,0 +1,98 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Keith Packard <keithp@keithp.com> + * Eric Anholt <eric@anholt.net> + * + */ + +include(`exa_wm.g4i') + +define(`YCbCr_base', `src_sample_base') + +define(`Cr', `src_sample_r') +define(`Cr_01', `src_sample_r_01') +define(`Cr_23', `src_sample_r_23') + +define(`Y', `src_sample_g') +define(`Y_01', `src_sample_g_01') +define(`Y_23', `src_sample_g_23') + +define(`Cb', `src_sample_b') +define(`Cb_01', `src_sample_b_01') +define(`Cb_23', `src_sample_b_23') + +define(`Crn', `mask_sample_r') +define(`Crn_01', `mask_sample_r_01') +define(`Crn_23', `mask_sample_r_23') + +define(`Yn', `mask_sample_g') +define(`Yn_01', `mask_sample_g_01') +define(`Yn_23', `mask_sample_g_23') + +define(`Cbn', `mask_sample_b') +define(`Cbn_01', `mask_sample_b_01') +define(`Cbn_23', `mask_sample_b_23') + + /* color space conversion function: + * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1) + * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1) + * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1) + */ + + /* Normalize Y, Cb and Cr: + * + * Yn = (Y - 16/255) * 1.164 + * Crn = Cr - 128 / 255 + * Cbn = Cb - 128 / 255 + */ +add (16) Yn<1>F Y<8,8,1>F -0.0627451F { compr align1 }; +mul (16) Yn<1>F Yn<8,8,1>F 1.164F { compr align1 }; + +add (16) Crn<1>F Cr<8,8,1>F -0.501961F { compr align1 }; + +add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 }; + + /* + * R = Y + Cr * 1.596 + */ +mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; +mac.sat(16) src_sample_r<1>F Crn<8,8,1>F 1.596F { compr align1 }; + + /* + * G = Crn * -0.813 + Cbn * -0.392 + Y + */ +mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; +mac (16) acc0<1>F Crn<8,8,1>F -0.813F { compr align1 }; +mac.sat(16) src_sample_g<1>F Cbn<8,8,1>F -0.392F { compr align1 }; + + /* + * B = Cbn * 2.017 + Y + */ +mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; +mac.sat(16) src_sample_b<1>F Cbn<8,8,1>F 2.017F { compr align1 }; + + /* + * A = 1.0 + */ +mov (16) src_sample_a<1>F 1.0F { compr align1 }; diff --git a/src/render_program/exa_wm_yuv_rgb.g4b b/src/render_program/exa_wm_yuv_rgb.g4b new file mode 100644 index 00000000..01f6e2b2 --- /dev/null +++ b/src/render_program/exa_wm_yuv_rgb.g4b @@ -0,0 +1,12 @@ + { 0x00802040, 0x23007fbd, 0x008d0200, 0xbd808081 }, + { 0x00802041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 }, + { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbf008084 }, + { 0x00802040, 0x23407fbd, 0x008d0240, 0xbf008084 }, + { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x80802048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba }, + { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x00802048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 }, + { 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, + { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 }, + { 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 }, diff --git a/src/render_program/packed_yuv_sf.g4a b/src/render_program/packed_yuv_sf.g4a new file mode 100644 index 00000000..5a023992 --- /dev/null +++ b/src/render_program/packed_yuv_sf.g4a @@ -0,0 +1,45 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Keith Packard <keithp@keithp.com> + * Eric Anholt <eric@anholt.net> + * + */ + +send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 }; +send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 }; +add (8) g7<1>F g4<8,8,1>F -g3<8,8,1>F { align1 }; +mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 }; +mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 }; +mov (8) m1<1>F g7<0,1,0>F { align1 }; +mov (8) m2<1>F g7.4<0,1,0>F { align1 }; +mov (8) m3<1>F g3<8,8,1>F { align1 }; +send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT }; +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; diff --git a/src/render_program/packed_yuv_sf.g4b b/src/render_program/packed_yuv_sf.g4b new file mode 100644 index 00000000..830d1760 --- /dev/null +++ b/src/render_program/packed_yuv_sf.g4b @@ -0,0 +1,17 @@ + { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 }, + { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 }, + { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 }, + { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 }, + { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 }, + { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 }, + { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 }, + { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 }, + { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/render_program/packed_yuv_wm.g4a b/src/render_program/packed_yuv_wm.g4a new file mode 100644 index 00000000..2be52b5f --- /dev/null +++ b/src/render_program/packed_yuv_wm.g4a @@ -0,0 +1,221 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Keith Packard <keithp@keithp.com> + * Eric Anholt <eric@anholt.net> + * + */ + +include(`exa_wm.g4i') + +define(`YCbCr_base', `g12') +define(`Cr', `g12') +define(`Cr_01', `g12') +define(`Cr_23', `g13') +define(`Y', `g14') +define(`Y_01', `g14') +define(`Y_23', `g15') +define(`Cb', `g16') +define(`Cb_01', `g16') +define(`Cb_23', `g17') + +/* The initial payload of the thread is always g0. + * WM_URB (incoming URB entries) is g3 + * X0_R is g4 + * X1_R is g5 + * Y0_R is g6 + * Y1_R is g7 + */ + + /* Set up the X/Y screen coordinates of the pixels in our 4 subspans. Each + * subspan is a 2x2 rectangle, and the screen x/y of the upper left of each + * subspan are given in GRF register 1.2 through 1.5 (which, with the word + * addressing below, are 1.4 through 1.11). + * + * The result is WM_X*_R and WM_Y*R being: + * + * X0: {ss0.x, ss0.x+1, ss0.x, ss0.x+1, ss1.x, ss1.x+1, ss1.x, ss1.x+y} + * Y0: {ss0.y, ss0.y, ss0.y+1, ss0.y+1, ss1.y, ss1.y, ss1.y+1, ss1.y+1} + * X1: {ss2.x, ss2.x+1, ss2.x, ss2.x+1, ss3.x, ss3.x+1, ss3.x, ss3.x+y} + * Y1: {ss2.y, ss2.y, ss2.y+1, ss2.y+1, ss3.y, ss3.y, ss3.y+1, ss3.y+1} + */ + + /* Set up ss0.x coordinates*/ +mov (1) g4<1>F g1.8<0,1,0>UW { align1 }; +add (1) g4.4<1>F g1.8<0,1,0>UW 1UD { align1 }; +mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 }; +add (1) g4.12<1>F g1.8<0,1,0>UW 1UD { align1 }; + /* Set up ss0.y coordinates */ +mov (1) g6<1>F g1.10<0,1,0>UW { align1 }; +mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 }; +add (1) g6.8<1>F g1.10<0,1,0>UW 1UD { align1 }; +add (1) g6.12<1>F g1.10<0,1,0>UW 1UD { align1 }; + /* set up ss1.x coordinates */ +mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 }; +add (1) g4.20<1>F g1.12<0,1,0>UW 1UD { align1 }; +mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 }; +add (1) g4.28<1>F g1.12<0,1,0>UW 1UD { align1 }; + /* set up ss1.y coordinates */ +mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 }; +mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 }; +add (1) g6.24<1>F g1.14<0,1,0>UW 1UD { align1 }; +add (1) g6.28<1>F g1.14<0,1,0>UW 1UD { align1 }; + /* Set up ss2.x coordinates */ +mov (1) g5<1>F g1.16<0,1,0>UW { align1 }; +add (1) g5.4<1>F g1.16<0,1,0>UW 1UD { align1 }; +mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 }; +add (1) g5.12<1>F g1.16<0,1,0>UW 1UD { align1 }; + /* Set up ss2.y coordinates */ +mov (1) g7<1>F g1.18<0,1,0>UW { align1 }; +mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 }; +add (1) g7.8<1>F g1.18<0,1,0>UW 1UD { align1 }; +add (1) g7.12<1>F g1.18<0,1,0>UW 1UD { align1 }; + /* Set up ss3.x coordinates */ +mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 }; +add (1) g5.20<1>F g1.20<0,1,0>UW 1UD { align1 }; +mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 }; +add (1) g5.28<1>F g1.20<0,1,0>UW 1UD { align1 }; + /* Set up ss3.y coordinates */ +mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 }; +mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 }; +add (1) g7.24<1>F g1.22<0,1,0>UW 1UD { align1 }; +add (1) g7.28<1>F g1.22<0,1,0>UW 1UD { align1 }; + + /* Now, map these screen space coordinates into texture coordinates. */ + /* subtract screen-space X origin of vertex 0. */ +add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 }; +add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 }; + /* scale by texture X increment */ +mul (8) g4<1>F g4<8,8,1>F g3<0,1,0>F { align1 }; +mul (8) g5<1>F g5<8,8,1>F g3<0,1,0>F { align1 }; + /* add in texture X offset */ +add (8) g4<1>F g4<8,8,1>F g3.12<0,1,0>F { align1 }; +add (8) g5<1>F g5<8,8,1>F g3.12<0,1,0>F { align1 }; + /* subtract screen-space Y origin of vertex 0. */ +add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 }; +add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 }; + /* scale by texture Y increment */ +mul (8) g6<1>F g6<8,8,1>F g3.20<0,1,0>F { align1 }; +mul (8) g7<1>F g7<8,8,1>F g3.20<0,1,0>F { align1 }; + /* add in texture Y offset */ +add (8) g6<1>F g6<8,8,1>F g3.28<0,1,0>F { align1 }; +add (8) g7<1>F g7<8,8,1>F g3.28<0,1,0>F { align1 }; + /* sampler */ +mov (8) m1<1>F g4<8,8,1>F { align1 }; +mov (8) m2<1>F g5<8,8,1>F { align1 }; +mov (8) m3<1>F g6<8,8,1>F { align1 }; +mov (8) m4<1>F g7<8,8,1>F { align1 }; + + /* + * g0 holds the PS thread payload, which (oddly) contains + * precisely what the sampler wants to see in m0 + */ +send (16) + 0 /* load g0 to m0 */ + YCbCr_base<1>UW + g0<8,8,1>UW + sampler (1,0,F) + mlen 5 rlen 8 { align1 }; + + /* color space conversion function: + * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1) + * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1) + * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1) + * + * Y is g14, g15. + * Cr is g12, g13. + * Cb is g16, g17. + * + * R is g2, g6. + * G is g3, g7. + * B is g4, g8. + */ + + /* Normalize Y, Cb and Cr: + * + * Y = (Y - 16/255) * 1.164 + * Cr = Cr - 128 / 255 + * Cb = Cb - 128 / 255 + */ +add (16) Y<1>F Y<8,8,1>F -0.0627451F { compr align1 }; +mul (16) Y<1>F Y<8,8,1>F 1.164F { compr align1 }; + +add (16) Cr<1>F Cr<8,8,1>F -0.501961F { compr align1 }; + +add (16) Cb<1>F Cb<8,8,1>F -0.501961F { compr align1 }; + + /* + * R = Y + Cr * 1.596 + */ +mul (8) null Cr_01<8,8,1>F 1.596F { align1 }; +mac.sat (8) data_port_r_01<1>F Y_01<8,8,1>F 1F { align1 }; +mul (8) null Cr_23<8,8,1>F 1.596F { align1 }; +mac.sat (8) data_port_r_23<1>F Y_23<8,8,1>F 1F { align1 }; + + /* + * G = Cr * -0.813 + Cb * -0.392 + Y + */ +mul (8) null Cr_01<8,8,1>F -0.813F { align1 }; +mac (8) null Cb_01<8,8,1>F -0.392F { align1 }; +mac.sat (8) data_port_g_01<1>F Y_01<8,8,1>F 1F { align1 }; +mul (8) null Cr_23<8,8,1>F -0.813F { align1 }; +mac (8) null Cb_23<8,8,1>F -0.392F { align1 }; +mac.sat (8) data_port_g_23<1>F Y_23<8,8,1>F 1F { align1 }; + + /* + * B = Cb * 2.017 + Y + */ +mul (8) null Cb_01<8,8,1>F 2.017F { align1 }; +mac.sat (8) data_port_b_01<1>F Y_01<8,8,1>F 1F { align1 }; +mul (8) null Cb_23<8,8,1>F 2.017F { align1 }; +mac.sat (8) data_port_b_23<1>F Y_23<8,8,1>F 1F { align1 }; + + /* + * A = 1.0 + */ +mov (8) data_port_a_01<1>F 1.0F { align1 }; +mov (8) data_port_a_23<1>F 1.0F { align1 }; + + /* + * Pass through control information: + */ +mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable }; + + /* + * Send framebuffer write message: XXX: acc0? + */ +send (16) 0 acc0<1>UW g0<8,8,1>UW write ( + 0, /* binding table index 0 */ + 8, /* pixel scoreboard clear */ + 4, /* render target write */ + 0 /* no write commit message */ + ) mlen 10 rlen 0 { align1 EOT }; + /* padding */ +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; diff --git a/src/render_program/packed_yuv_wm.g4b b/src/render_program/packed_yuv_wm.g4b new file mode 100644 index 00000000..f2e650a3 --- /dev/null +++ b/src/render_program/packed_yuv_wm.g4b @@ -0,0 +1,79 @@ + { 0x00000001, 0x2080013d, 0x00000028, 0x00000000 }, + { 0x00000040, 0x20840d3d, 0x00000028, 0x00000001 }, + { 0x00000001, 0x2088013d, 0x00000028, 0x00000000 }, + { 0x00000040, 0x208c0d3d, 0x00000028, 0x00000001 }, + { 0x00000001, 0x20c0013d, 0x0000002a, 0x00000000 }, + { 0x00000001, 0x20c4013d, 0x0000002a, 0x00000000 }, + { 0x00000040, 0x20c80d3d, 0x0000002a, 0x00000001 }, + { 0x00000040, 0x20cc0d3d, 0x0000002a, 0x00000001 }, + { 0x00000001, 0x2090013d, 0x0000002c, 0x00000000 }, + { 0x00000040, 0x20940d3d, 0x0000002c, 0x00000001 }, + { 0x00000001, 0x2098013d, 0x0000002c, 0x00000000 }, + { 0x00000040, 0x209c0d3d, 0x0000002c, 0x00000001 }, + { 0x00000001, 0x20d0013d, 0x0000002e, 0x00000000 }, + { 0x00000001, 0x20d4013d, 0x0000002e, 0x00000000 }, + { 0x00000040, 0x20d80d3d, 0x0000002e, 0x00000001 }, + { 0x00000040, 0x20dc0d3d, 0x0000002e, 0x00000001 }, + { 0x00000001, 0x20a0013d, 0x00000030, 0x00000000 }, + { 0x00000040, 0x20a40d3d, 0x00000030, 0x00000001 }, + { 0x00000001, 0x20a8013d, 0x00000030, 0x00000000 }, + { 0x00000040, 0x20ac0d3d, 0x00000030, 0x00000001 }, + { 0x00000001, 0x20e0013d, 0x00000032, 0x00000000 }, + { 0x00000001, 0x20e4013d, 0x00000032, 0x00000000 }, + { 0x00000040, 0x20e80d3d, 0x00000032, 0x00000001 }, + { 0x00000040, 0x20ec0d3d, 0x00000032, 0x00000001 }, + { 0x00000001, 0x20b0013d, 0x00000034, 0x00000000 }, + { 0x00000040, 0x20b40d3d, 0x00000034, 0x00000001 }, + { 0x00000001, 0x20b8013d, 0x00000034, 0x00000000 }, + { 0x00000040, 0x20bc0d3d, 0x00000034, 0x00000001 }, + { 0x00000001, 0x20f0013d, 0x00000036, 0x00000000 }, + { 0x00000001, 0x20f4013d, 0x00000036, 0x00000000 }, + { 0x00000040, 0x20f80d3d, 0x00000036, 0x00000001 }, + { 0x00000040, 0x20fc0d3d, 0x00000036, 0x00000001 }, + { 0x00600040, 0x208077bd, 0x008d0080, 0x00004020 }, + { 0x00600040, 0x20a077bd, 0x008d00a0, 0x00004020 }, + { 0x00600041, 0x208077bd, 0x008d0080, 0x00000060 }, + { 0x00600041, 0x20a077bd, 0x008d00a0, 0x00000060 }, + { 0x00600040, 0x208077bd, 0x008d0080, 0x0000006c }, + { 0x00600040, 0x20a077bd, 0x008d00a0, 0x0000006c }, + { 0x00600040, 0x20c077bd, 0x008d00c0, 0x00004024 }, + { 0x00600040, 0x20e077bd, 0x008d00e0, 0x00004024 }, + { 0x00600041, 0x20c077bd, 0x008d00c0, 0x00000074 }, + { 0x00600041, 0x20e077bd, 0x008d00e0, 0x00000074 }, + { 0x00600040, 0x20c077bd, 0x008d00c0, 0x0000007c }, + { 0x00600040, 0x20e077bd, 0x008d00e0, 0x0000007c }, + { 0x00600001, 0x202003be, 0x008d0080, 0x00000000 }, + { 0x00600001, 0x204003be, 0x008d00a0, 0x00000000 }, + { 0x00600001, 0x206003be, 0x008d00c0, 0x00000000 }, + { 0x00600001, 0x208003be, 0x008d00e0, 0x00000000 }, + { 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 }, + { 0x00802040, 0x21c07fbd, 0x008d01c0, 0xbd808081 }, + { 0x00802041, 0x21c07fbd, 0x008d01c0, 0x3f94fdf4 }, + { 0x00802040, 0x21807fbd, 0x008d0180, 0xbf008084 }, + { 0x00802040, 0x22007fbd, 0x008d0200, 0xbf008084 }, + { 0x00600041, 0x20007fbc, 0x008d0180, 0x3fcc49ba }, + { 0x80600048, 0x20407fbe, 0x008d01c0, 0x3f800000 }, + { 0x00600041, 0x20007fbc, 0x008d01a0, 0x3fcc49ba }, + { 0x80600048, 0x20c07fbe, 0x008d01e0, 0x3f800000 }, + { 0x00600041, 0x20007fbc, 0x008d0180, 0xbf5020c5 }, + { 0x00600048, 0x20007fbc, 0x008d0200, 0xbec8b439 }, + { 0x80600048, 0x20607fbe, 0x008d01c0, 0x3f800000 }, + { 0x00600041, 0x20007fbc, 0x008d01a0, 0xbf5020c5 }, + { 0x00600048, 0x20007fbc, 0x008d0220, 0xbec8b439 }, + { 0x80600048, 0x20e07fbe, 0x008d01e0, 0x3f800000 }, + { 0x00600041, 0x20007fbc, 0x008d0200, 0x40011687 }, + { 0x80600048, 0x20807fbe, 0x008d01c0, 0x3f800000 }, + { 0x00600041, 0x20007fbc, 0x008d0220, 0x40011687 }, + { 0x80600048, 0x21007fbe, 0x008d01e0, 0x3f800000 }, + { 0x00600001, 0x20a003fe, 0x00000000, 0x3f800000 }, + { 0x00600001, 0x212003fe, 0x00000000, 0x3f800000 }, + { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 }, + { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, |