diff options
author | Keith Packard <keithp@keithp.com> | 2008-03-30 00:54:51 -0700 |
---|---|---|
committer | Keith Packard <keithp@keithp.com> | 2008-03-30 18:05:32 -0700 |
commit | 6304b38423f99190a5e54f1a7dcaa75adfad4f2a (patch) | |
tree | 1291c442de6aabe6b2124f502392f768f257fc54 /src/exa_wm_nomask.g4a | |
parent | 771a56b1ed0df69345c723cb62a73b6842cd8227 (diff) |
Reimplement wm program for nomask case to handle affine transforms
This involves correctly computing u/v locations based on x/y vectors and
line constants computed in new sf program.
Also, use fewer instructions to make this go a bit faster (2X for 500x500
composite).
Diffstat (limited to 'src/exa_wm_nomask.g4a')
-rw-r--r-- | src/exa_wm_nomask.g4a | 94 |
1 files changed, 34 insertions, 60 deletions
diff --git a/src/exa_wm_nomask.g4a b/src/exa_wm_nomask.g4a index f92dc1a6..97426ec1 100644 --- a/src/exa_wm_nomask.g4a +++ b/src/exa_wm_nomask.g4a @@ -40,75 +40,49 @@ * Y1_R is g7 */ - /* Set up ss0.x coordinates*/ -mov (1) g4<1>F g1.8<0,1,0>UW { align1 }; -add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 }; -mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 }; -add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 }; - /* Set up ss0.y coordinates */ -mov (1) g6<1>F g1.10<0,1,0>UW { align1 }; -mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 }; -add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 }; -add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 }; - /* set up ss1.x coordinates */ -mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 }; -add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 }; -mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 }; -add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 }; - /* set up ss1.y coordinates */ -mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 }; -mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 }; -add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 }; -add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 }; - /* Set up ss2.x coordinates */ -mov (1) g5<1>F g1.16<0,1,0>UW { align1 }; -add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 }; -mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 }; -add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 }; - /* Set up ss2.y coordinates */ -mov (1) g7<1>F g1.18<0,1,0>UW { align1 }; -mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 }; -add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 }; -add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 }; - /* Set up ss3.x coordinates */ -mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 }; -add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 }; -mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 }; -add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 }; - /* Set up ss3.y coordinates */ -mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 }; -mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 }; -add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 }; -add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 }; + +/* Load X and Y coordinates and compute per-pixel coordinates */ +add (16) g4<1>UW g1.8<2,4,0>UW 0x10101010V { align1 }; +add (16) g6<1>UW g1.10<2,4,0>UW 0x11001100V { align1 }; /* Now, map these screen space coordinates into texture coordinates. */ + /* subtract screen-space X origin of vertex 0. */ -add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 }; -add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 }; - /* scale by texture X increment */ -mul (8) g4<1>F g4<8,8,1>F g3<0,1,0>F { align1 }; -mul (8) g5<1>F g5<8,8,1>F g3<0,1,0>F { align1 }; - /* add in texture X offset */ -add (8) g4<1>F g4<8,8,1>F g3.12<0,1,0>F { align1 }; -add (8) g5<1>F g5<8,8,1>F g3.12<0,1,0>F { align1 }; +add (16) g12<1>F g4<8,8,1>UW -g1.0<0,1,0>F { compr align1 }; + /* subtract screen-space Y origin of vertex 0. */ -add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 }; -add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 }; - /* scale by texture Y increment */ -mul (8) g6<1>F g6<8,8,1>F g3.20<0,1,0>F { align1 }; -mul (8) g7<1>F g7<8,8,1>F g3.20<0,1,0>F { align1 }; - /* add in texture Y offset */ -add (8) g6<1>F g6<8,8,1>F g3.28<0,1,0>F { align1 }; -add (8) g7<1>F g7<8,8,1>F g3.28<0,1,0>F { align1 }; +add (16) g16<1>F g6<8,8,1>UW -g1.4<0,1,0>F { compr align1 }; + + /* g8/g9 = X * du/dx */ +mul (16) g8<1>F g12<8,8,1>F g3.0<0,1,0>F { compr align1 }; + + /* g10/g11 = Y * du/dy */ +mul (16) g10<1>F g16<8,8,1>F g3.4<0,1,0>F { compr align1 }; + + /* g8/g9 = X du/dx + Y du/dy */ +add (16) g8<1>F g8<8,8,1>F g10<8,8,1>F { compr align1 }; + + /* m1/m2 = g8/g9 + uo */ +add (16) m1<1>F g8<8,8,1>F g3.12<0,1,0>F { compr align1 }; + + + /* g8/g9 = X * dv/dx */ +mul (16) g8<1>F g12<8,8,1>F g3.16<0,1,0>F { compr align1 }; + + /* g10/g11 = Y * du/dy */ +mul (16) g10<1>F g16<8,8,1>F g3.20<0,1,0>F { compr align1 }; + + /* g8/g9 = X du/dx + Y du/dy */ +add (16) g8<1>F g8<8,8,1>F g10<8,8,1>F { compr align1 }; + + /* m3/m4 = g8/g9 + vo */ +add (16) m3<1>F g8<8,8,1>F g3.28<0,1,0>F { compr align1 }; + /* prepare sampler read back gX register, which would be written back to output */ /* use simd16 sampler, param 0 is u, param 1 is v. */ /* 'payload' loading, assuming tex coord start from g4 */ -mov (8) m1<1>F g4<8,8,1>F { align1 }; -mov (8) m2<1>F g5<8,8,1>F { align1 }; /* param 0 u in m1, m2 */ -mov (8) m3<1>F g6<8,8,1>F { align1 }; -mov (8) m4<1>F g7<8,8,1>F { align1 }; /* param 1 v in m3, m4 */ /* m0 will be copied with g0, as it contains send desc */ /* emit sampler 'send' cmd */ |