diff options
author | Keith Packard <keithp@keithp.com> | 2008-03-30 00:54:51 -0700 |
---|---|---|
committer | Keith Packard <keithp@keithp.com> | 2008-03-30 18:05:32 -0700 |
commit | 6304b38423f99190a5e54f1a7dcaa75adfad4f2a (patch) | |
tree | 1291c442de6aabe6b2124f502392f768f257fc54 | |
parent | 771a56b1ed0df69345c723cb62a73b6842cd8227 (diff) |
Reimplement wm program for nomask case to handle affine transforms
This involves correctly computing u/v locations based on x/y vectors and
line constants computed in new sf program.
Also, use fewer instructions to make this go a bit faster (2X for 500x500
composite).
-rw-r--r-- | src/exa_wm_nomask.g4a | 94 | ||||
-rw-r--r-- | src/exa_wm_nomask_prog.h | 60 | ||||
-rw-r--r-- | src/i965_render.c | 41 |
3 files changed, 48 insertions, 147 deletions
diff --git a/src/exa_wm_nomask.g4a b/src/exa_wm_nomask.g4a index f92dc1a6..97426ec1 100644 --- a/src/exa_wm_nomask.g4a +++ b/src/exa_wm_nomask.g4a @@ -40,75 +40,49 @@ * Y1_R is g7 */ - /* Set up ss0.x coordinates*/ -mov (1) g4<1>F g1.8<0,1,0>UW { align1 }; -add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 }; -mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 }; -add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 }; - /* Set up ss0.y coordinates */ -mov (1) g6<1>F g1.10<0,1,0>UW { align1 }; -mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 }; -add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 }; -add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 }; - /* set up ss1.x coordinates */ -mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 }; -add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 }; -mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 }; -add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 }; - /* set up ss1.y coordinates */ -mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 }; -mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 }; -add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 }; -add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 }; - /* Set up ss2.x coordinates */ -mov (1) g5<1>F g1.16<0,1,0>UW { align1 }; -add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 }; -mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 }; -add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 }; - /* Set up ss2.y coordinates */ -mov (1) g7<1>F g1.18<0,1,0>UW { align1 }; -mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 }; -add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 }; -add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 }; - /* Set up ss3.x coordinates */ -mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 }; -add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 }; -mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 }; -add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 }; - /* Set up ss3.y coordinates */ -mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 }; -mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 }; -add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 }; -add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 }; + +/* Load X and Y coordinates and compute per-pixel coordinates */ +add (16) g4<1>UW g1.8<2,4,0>UW 0x10101010V { align1 }; +add (16) g6<1>UW g1.10<2,4,0>UW 0x11001100V { align1 }; /* Now, map these screen space coordinates into texture coordinates. */ + /* subtract screen-space X origin of vertex 0. */ -add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 }; -add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 }; - /* scale by texture X increment */ -mul (8) g4<1>F g4<8,8,1>F g3<0,1,0>F { align1 }; -mul (8) g5<1>F g5<8,8,1>F g3<0,1,0>F { align1 }; - /* add in texture X offset */ -add (8) g4<1>F g4<8,8,1>F g3.12<0,1,0>F { align1 }; -add (8) g5<1>F g5<8,8,1>F g3.12<0,1,0>F { align1 }; +add (16) g12<1>F g4<8,8,1>UW -g1.0<0,1,0>F { compr align1 }; + /* subtract screen-space Y origin of vertex 0. */ -add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 }; -add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 }; - /* scale by texture Y increment */ -mul (8) g6<1>F g6<8,8,1>F g3.20<0,1,0>F { align1 }; -mul (8) g7<1>F g7<8,8,1>F g3.20<0,1,0>F { align1 }; - /* add in texture Y offset */ -add (8) g6<1>F g6<8,8,1>F g3.28<0,1,0>F { align1 }; -add (8) g7<1>F g7<8,8,1>F g3.28<0,1,0>F { align1 }; +add (16) g16<1>F g6<8,8,1>UW -g1.4<0,1,0>F { compr align1 }; + + /* g8/g9 = X * du/dx */ +mul (16) g8<1>F g12<8,8,1>F g3.0<0,1,0>F { compr align1 }; + + /* g10/g11 = Y * du/dy */ +mul (16) g10<1>F g16<8,8,1>F g3.4<0,1,0>F { compr align1 }; + + /* g8/g9 = X du/dx + Y du/dy */ +add (16) g8<1>F g8<8,8,1>F g10<8,8,1>F { compr align1 }; + + /* m1/m2 = g8/g9 + uo */ +add (16) m1<1>F g8<8,8,1>F g3.12<0,1,0>F { compr align1 }; + + + /* g8/g9 = X * dv/dx */ +mul (16) g8<1>F g12<8,8,1>F g3.16<0,1,0>F { compr align1 }; + + /* g10/g11 = Y * du/dy */ +mul (16) g10<1>F g16<8,8,1>F g3.20<0,1,0>F { compr align1 }; + + /* g8/g9 = X du/dx + Y du/dy */ +add (16) g8<1>F g8<8,8,1>F g10<8,8,1>F { compr align1 }; + + /* m3/m4 = g8/g9 + vo */ +add (16) m3<1>F g8<8,8,1>F g3.28<0,1,0>F { compr align1 }; + /* prepare sampler read back gX register, which would be written back to output */ /* use simd16 sampler, param 0 is u, param 1 is v. */ /* 'payload' loading, assuming tex coord start from g4 */ -mov (8) m1<1>F g4<8,8,1>F { align1 }; -mov (8) m2<1>F g5<8,8,1>F { align1 }; /* param 0 u in m1, m2 */ -mov (8) m3<1>F g6<8,8,1>F { align1 }; -mov (8) m4<1>F g7<8,8,1>F { align1 }; /* param 1 v in m3, m4 */ /* m0 will be copied with g0, as it contains send desc */ /* emit sampler 'send' cmd */ diff --git a/src/exa_wm_nomask_prog.h b/src/exa_wm_nomask_prog.h index 7870b3b7..c73bdbcd 100644 --- a/src/exa_wm_nomask_prog.h +++ b/src/exa_wm_nomask_prog.h @@ -1,51 +1,15 @@ - { 0x00000001, 0x2080013d, 0x00000028, 0x00000000 }, - { 0x00000040, 0x20840d3d, 0x00000028, 0x00000001 }, - { 0x00000001, 0x2088013d, 0x00000028, 0x00000000 }, - { 0x00000040, 0x208c0d3d, 0x00000028, 0x00000001 }, - { 0x00000001, 0x20c0013d, 0x0000002a, 0x00000000 }, - { 0x00000001, 0x20c4013d, 0x0000002a, 0x00000000 }, - { 0x00000040, 0x20c80d3d, 0x0000002a, 0x00000001 }, - { 0x00000040, 0x20cc0d3d, 0x0000002a, 0x00000001 }, - { 0x00000001, 0x2090013d, 0x0000002c, 0x00000000 }, - { 0x00000040, 0x20940d3d, 0x0000002c, 0x00000001 }, - { 0x00000001, 0x2098013d, 0x0000002c, 0x00000000 }, - { 0x00000040, 0x209c0d3d, 0x0000002c, 0x00000001 }, - { 0x00000001, 0x20d0013d, 0x0000002e, 0x00000000 }, - { 0x00000001, 0x20d4013d, 0x0000002e, 0x00000000 }, - { 0x00000040, 0x20d80d3d, 0x0000002e, 0x00000001 }, - { 0x00000040, 0x20dc0d3d, 0x0000002e, 0x00000001 }, - { 0x00000001, 0x20a0013d, 0x00000030, 0x00000000 }, - { 0x00000040, 0x20a40d3d, 0x00000030, 0x00000001 }, - { 0x00000001, 0x20a8013d, 0x00000030, 0x00000000 }, - { 0x00000040, 0x20ac0d3d, 0x00000030, 0x00000001 }, - { 0x00000001, 0x20e0013d, 0x00000032, 0x00000000 }, - { 0x00000001, 0x20e4013d, 0x00000032, 0x00000000 }, - { 0x00000040, 0x20e80d3d, 0x00000032, 0x00000001 }, - { 0x00000040, 0x20ec0d3d, 0x00000032, 0x00000001 }, - { 0x00000001, 0x20b0013d, 0x00000034, 0x00000000 }, - { 0x00000040, 0x20b40d3d, 0x00000034, 0x00000001 }, - { 0x00000001, 0x20b8013d, 0x00000034, 0x00000000 }, - { 0x00000040, 0x20bc0d3d, 0x00000034, 0x00000001 }, - { 0x00000001, 0x20f0013d, 0x00000036, 0x00000000 }, - { 0x00000001, 0x20f4013d, 0x00000036, 0x00000000 }, - { 0x00000040, 0x20f80d3d, 0x00000036, 0x00000001 }, - { 0x00000040, 0x20fc0d3d, 0x00000036, 0x00000001 }, - { 0x00600040, 0x208077bd, 0x008d0080, 0x00004020 }, - { 0x00600040, 0x20a077bd, 0x008d00a0, 0x00004020 }, - { 0x00600041, 0x208077bd, 0x008d0080, 0x00000060 }, - { 0x00600041, 0x20a077bd, 0x008d00a0, 0x00000060 }, - { 0x00600040, 0x208077bd, 0x008d0080, 0x0000006c }, - { 0x00600040, 0x20a077bd, 0x008d00a0, 0x0000006c }, - { 0x00600040, 0x20c077bd, 0x008d00c0, 0x00004024 }, - { 0x00600040, 0x20e077bd, 0x008d00e0, 0x00004024 }, - { 0x00600041, 0x20c077bd, 0x008d00c0, 0x00000074 }, - { 0x00600041, 0x20e077bd, 0x008d00e0, 0x00000074 }, - { 0x00600040, 0x20c077bd, 0x008d00c0, 0x0000007c }, - { 0x00600040, 0x20e077bd, 0x008d00e0, 0x0000007c }, - { 0x00600001, 0x202003be, 0x008d0080, 0x00000000 }, - { 0x00600001, 0x204003be, 0x008d00a0, 0x00000000 }, - { 0x00600001, 0x206003be, 0x008d00c0, 0x00000000 }, - { 0x00600001, 0x208003be, 0x008d00e0, 0x00000000 }, + { 0x00800040, 0x20806d29, 0x00480028, 0x10101010 }, + { 0x00800040, 0x20c06d29, 0x0048002a, 0x11001100 }, + { 0x00802040, 0x2180753d, 0x008d0080, 0x00004020 }, + { 0x00802040, 0x2200753d, 0x008d00c0, 0x00004024 }, + { 0x00802041, 0x210077bd, 0x008d0180, 0x00000060 }, + { 0x00802041, 0x214077bd, 0x008d0200, 0x00000064 }, + { 0x00802040, 0x210077bd, 0x008d0100, 0x008d0140 }, + { 0x00802040, 0x202077be, 0x008d0100, 0x0000006c }, + { 0x00802041, 0x210077bd, 0x008d0180, 0x00000070 }, + { 0x00802041, 0x214077bd, 0x008d0200, 0x00000074 }, + { 0x00802040, 0x210077bd, 0x008d0100, 0x008d0140 }, + { 0x00802040, 0x206077be, 0x008d0100, 0x0000007c }, { 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 }, { 0x00600001, 0x22600021, 0x008d0260, 0x00000000 }, { 0x00600001, 0x202003be, 0x008d0020, 0x00000000 }, diff --git a/src/i965_render.c b/src/i965_render.c index 93583b0a..26c06aa4 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -318,7 +318,7 @@ static const uint32_t sip_kernel_static[][4] = { */ #define SF_KERNEL_NUM_GRF 16 -#define SF_MAX_THREADS 1 +#define SF_MAX_THREADS 2 static const uint32_t sf_kernel_static[][4] = { #include "exa_sf_prog.h" @@ -328,10 +328,6 @@ static const uint32_t sf_kernel_static_mask[][4] = { #include "exa_sf_mask_prog.h" }; -static const uint32_t sf_kernel_static_rotation[][4] = { -#include "exa_sf_rotation_prog.h" -}; - /* ps kernels */ #define PS_KERNEL_NUM_GRF 32 #define PS_MAX_THREADS 32 @@ -352,10 +348,6 @@ static const uint32_t ps_kernel_static_masknoca [][4] = { #include "exa_wm_masknoca_prog.h" }; -static const uint32_t ps_kernel_static_rotation [][4] = { -#include "exa_wm_rotation_prog.h" -}; - static uint32_t i965_get_card_format(PicturePtr pPict) { @@ -370,21 +362,6 @@ i965_get_card_format(PicturePtr pPict) return i965_tex_formats[i].card_fmt; } -static Bool -i965_check_rotation_transform(PictTransformPtr t) -{ - /* XXX this is arbitrary */ - int a, b; - a = xFixedToInt(t->matrix[0][1]); - b = xFixedToInt(t->matrix[1][0]); - if (a == -1 && b == 1) - return TRUE; - else if (a == 1 && b == -1) - return TRUE; - else - return FALSE; -} - Bool i965_prepare_composite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, PicturePtr pDstPicture, @@ -397,7 +374,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, mask_tiled = 0; uint32_t dst_format, dst_offset, dst_pitch, dst_tile_format = 0, dst_tiled = 0; - Bool rotation_program = FALSE; IntelEmitInvarientState(pScrn); *pI830->last_3d = LAST_3D_RENDER; @@ -431,9 +407,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, pI830->transform[1] = NULL; pI830->scale_units[1][0] = -1; pI830->scale_units[1][1] = -1; - if (pI830->transform[0] && - i965_check_rotation_transform(pI830->transform[0])) - rotation_program = TRUE; } else { pI830->transform[1] = pMaskPicture->transform; if (pI830->transform[1]) @@ -469,8 +442,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, sf_kernel_offset = ALIGN(next_offset, 64); if (pMask) next_offset = sf_kernel_offset + sizeof (sf_kernel_static_mask); - else if (rotation_program) - next_offset = sf_kernel_offset + sizeof (sf_kernel_static_rotation); else next_offset = sf_kernel_offset + sizeof (sf_kernel_static); @@ -488,8 +459,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, } else next_offset = ps_kernel_offset + sizeof(ps_kernel_static_masknoca); - } else if (rotation_program) { - next_offset = ps_kernel_offset + sizeof (ps_kernel_static_rotation); } else { next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask); } @@ -816,9 +785,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, if (pMask) memcpy(sf_kernel, sf_kernel_static_mask, sizeof (sf_kernel_static_mask)); - else if (rotation_program) - memcpy(sf_kernel, sf_kernel_static_rotation, - sizeof (sf_kernel_static_rotation)); else memcpy(sf_kernel, sf_kernel_static, sizeof (sf_kernel_static)); @@ -870,9 +836,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, } else memcpy(ps_kernel, ps_kernel_static_masknoca, sizeof (ps_kernel_static_masknoca)); - } else if (rotation_program) { - memcpy(ps_kernel, ps_kernel_static_rotation, - sizeof (ps_kernel_static_rotation)); } else { memcpy(ps_kernel, ps_kernel_static_nomask, sizeof (ps_kernel_static_nomask)); @@ -883,7 +846,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, wm_state->thread0.kernel_start_pointer = (state_base_offset + ps_kernel_offset) >> 6; wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); - wm_state->thread1.single_program_flow = 1; + wm_state->thread1.single_program_flow = 0; if (!pMask) wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ else |