summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2006-09-01 16:47:17 -0700
committerEric Anholt <eric@anholt.net>2006-09-01 16:47:17 -0700
commit1e6e288b8826789f3b2520d12426ff7852a67ccd (patch)
treefd129aaac4b418f11ec56c94171f4c6fec692bde
parent1feb733eb8b09a8b07b7a6987add5149c53b0157 (diff)
Add a check for intel-gen4asm, and rules to compile {wm,sf}_prog.h using it.
This adds assembly source for the two programs used in the textured video implementation on the 965, which should make them easier to modify in the future. The compiled versions are also included, so that intel-gen4asm isn't a build requirement for people that aren't modifying these programs. There are minor differences in the compiled versions of these programs compared to their previous versions which were compiled with a different tool. I believe the changes should be harmless, and video continues to work on my system.
-rw-r--r--configure.ac3
-rw-r--r--src/Makefile.am12
-rw-r--r--src/i830_video.c35
-rw-r--r--src/packed_yuv_sf.g4a17
-rw-r--r--src/packed_yuv_wm.g4a161
-rw-r--r--src/sf_prog.h17
-rw-r--r--src/wm_prog.h110
7 files changed, 224 insertions, 131 deletions
diff --git a/configure.ac b/configure.ac
index 436d7e5c..a27822e5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -49,6 +49,9 @@ AC_DISABLE_STATIC
AC_PROG_LIBTOOL
AC_PROG_CC
+AC_CHECK_PROG(gen4asm, [intel-gen4asm], yes, no)
+AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes)
+
AH_TOP([#include "xorg-server.h"])
AC_ARG_WITH(xorg-module-dir,
diff --git a/src/Makefile.am b/src/Makefile.am
index 63370ab9..ce7b40e4 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -33,6 +33,7 @@ i810_drv_ladir = @moduledir@/drivers
i810_drv_la_SOURCES = \
brw_defines.h \
brw_structs.h \
+ sf_prog.h \
wm_prog.h \
common.h \
i810_accel.c \
@@ -66,6 +67,13 @@ i810_drv_la_SOURCES = \
i915_reg.h \
i915_video.c
+if HAVE_GEN4ASM
+sf_prog.h: packed_yuv_sf.g4a
+ intel-gen4asm -o sf_prog.h packed_yuv_sf.g4a
+wm_prog.h: packed_yuv_wm.g4a
+ intel-gen4asm -o wm_prog.h packed_yuv_wm.g4a
+endif
+
if DRI
i810_drv_la_SOURCES += \
i810_dri.c \
@@ -74,3 +82,7 @@ i810_drv_la_SOURCES += \
i810_hwmc.c \
i830_dri.h
endif
+
+EXTRA_DIST = \
+ packed_yuv_sf.g4a \
+ packed_yuv_wm.g4a
diff --git a/src/i830_video.c b/src/i830_video.c
index 7f6eb80d..bbf1df76 100644
--- a/src/i830_video.c
+++ b/src/i830_video.c
@@ -2128,40 +2128,7 @@ static const CARD32 sip_kernel_static[][4] = {
#define SF_MAX_THREADS 1
static const CARD32 sf_kernel_static[][4] = {
-/* send 0 (1) g6<1>F g1.12<0,1,0>F math mlen 1 rlen 1 { align1 + } */
- { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
-/* send 0 (1) g6.4<1>F g1.20<0,1,0>F math mlen 1 rlen 1 { align1 + } */
- { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
-/* add (8) g7<1>F g4<8,8,1>F g3<8,8,1>F { align1 + } */
- { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 },
-/* mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 + } */
- { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 },
-/* mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 + } */
- { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 },
-/* mov (8) m1<1>F g7<0,1,0>F { align1 + } */
- { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
-/* mov (8) m2<1>F g7.4<0,1,0>F { align1 + } */
- { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
-/* mov (8) m3<1>F g3<8,8,1>F { align1 + } */
- { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
-/* send 0 (8) a0<1>F g0<8,8,1>F urb mlen 4 rlen 0 write +0 transpose used complete EOT{ align1 + } */
- { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+#include "sf_prog.h"
};
/*
diff --git a/src/packed_yuv_sf.g4a b/src/packed_yuv_sf.g4a
new file mode 100644
index 00000000..8c1398f4
--- /dev/null
+++ b/src/packed_yuv_sf.g4a
@@ -0,0 +1,17 @@
+send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+add (8) g7<1>F g4<8,8,1>F -g3<8,8,1>F { align1 };
+mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 };
+mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 };
+mov (8) m1<1>F g7<0,1,0>F { align1 };
+mov (8) m2<1>F g7.4<0,1,0>F { align1 };
+mov (8) m3<1>F g3<8,8,1>F { align1 };
+send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT };
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/src/packed_yuv_wm.g4a b/src/packed_yuv_wm.g4a
new file mode 100644
index 00000000..d312d170
--- /dev/null
+++ b/src/packed_yuv_wm.g4a
@@ -0,0 +1,161 @@
+/* The initial payload of the thread is always g0.
+ * WM_URB (incoming URB entries) is g3
+ * X0_R is g4
+ * X1_R is g5
+ * Y0_R is g6
+ * Y1_R is g7
+ */
+
+ /* Set up the X/Y screen coordinates of the pixels in our 4 subspans. Each
+ * subspan is a 2x2 rectangle, and the screen x/y of the upper left of each
+ * subspan are given in GRF register 1.2 through 1.5 (which, with the word
+ * addressing below, are 1.4 through 1.11).
+ *
+ * The result is WM_X*_R and WM_Y*R being:
+ *
+ * X0: {ss0.x, ss0.x+1, ss0.x, ss0.x+1, ss1.x, ss1.x+1, ss1.x, ss1.x+y}
+ * Y0: {ss0.y, ss0.y, ss0.y+1, ss0.y+1, ss1.y, ss1.y, ss1.y+1, ss1.y+1}
+ * X1: {ss2.x, ss2.x+1, ss2.x, ss2.x+1, ss3.x, ss3.x+1, ss3.x, ss3.x+y}
+ * Y1: {ss2.y, ss2.y, ss2.y+1, ss2.y+1, ss3.y, ss3.y, ss3.y+1, ss3.y+1}
+ */
+
+ /* Set up ss0.x coordinates*/
+mov (1) g4<1>F g1.8<0,1,0>UW { align1 };
+add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 };
+mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 };
+add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 };
+ /* Set up ss0.y coordinates */
+mov (1) g6<1>F g1.10<0,1,0>UW { align1 };
+mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 };
+add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 };
+add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 };
+ /* set up ss1.x coordinates */
+mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 };
+add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 };
+mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 };
+add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 };
+ /* set up ss1.y coordinates */
+mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 };
+mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 };
+add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 };
+add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.x coordinates */
+mov (1) g5<1>F g1.16<0,1,0>UW { align1 };
+add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 };
+mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 };
+add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.y coordinates */
+mov (1) g7<1>F g1.18<0,1,0>UW { align1 };
+mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 };
+add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 };
+add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.x coordinates */
+mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 };
+add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 };
+mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 };
+add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.y coordinates */
+mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 };
+mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 };
+add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 };
+add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 };
+
+ /* Now, map these screen space coordinates into texture coordinates. */
+ /* subtract screen-space X origin of vertex 0. */
+add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 };
+add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 };
+ /* scale by texture X increment */
+mul (8) g4<1>F g4<8,8,1>F g3<0,1,0>F { align1 };
+mul (8) g5<1>F g5<8,8,1>F g3<0,1,0>F { align1 };
+ /* add in texture X offset */
+add (8) g4<1>F g4<8,8,1>F g3.12<0,1,0>F { align1 };
+add (8) g5<1>F g5<8,8,1>F g3.12<0,1,0>F { align1 };
+ /* subtract screen-space Y origin of vertex 0. */
+add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 };
+add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 };
+ /* scale by texture Y increment */
+mul (8) g6<1>F g6<8,8,1>F g3.20<0,1,0>F { align1 };
+mul (8) g7<1>F g7<8,8,1>F g3.20<0,1,0>F { align1 };
+ /* add in texture Y offset */
+add (8) g6<1>F g6<8,8,1>F g3.28<0,1,0>F { align1 };
+add (8) g7<1>F g7<8,8,1>F g3.28<0,1,0>F { align1 };
+ /* sampler */
+mov (8) m1<1>F g4<8,8,1>F { align1 };
+mov (8) m2<1>F g5<8,8,1>F { align1 };
+mov (8) m3<1>F g6<8,8,1>F { align1 };
+mov (8) m4<1>F g7<8,8,1>F { align1 };
+
+ /*
+ * g0 holds the PS thread payload, which (oddly) contains
+ * precisely what the sampler wants to see in m0
+ */
+send (16) 0 g12<1>UW g0<8,8,1>UW sampler (1,0,F) mlen 5 rlen 8 { align1 };
+mov (8) g19<1>UW g19<8,8,1>UW { align1 };
+
+ /* color space conversion function:
+ * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1)
+ * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1)
+ * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1)
+ *
+ * Y is g14, g15.
+ * Cr is g12, g13.
+ * Cb is g16, g17.
+ *
+ * R is g2, g6.
+ * G is g3, g7.
+ * B is g4, g8.
+ */
+ /* Y = Y - 16/255 */
+add (8) g14<1>F g14<8,8,1>F -0.0627451F { align1 };
+ /* Cr = Cr - 128/255 */
+add (8) g12<1>F g12<8,8,1>F -0.501961F { align1 };
+ /* Cb = Cb - 128 / 255 */
+add (8) g16<1>F g16<8,8,1>F -0.501961F { align1 };
+ /* Y = Y * 1.164 */
+mul (8) g14<1>F g14<8,8,1>F 1.164F { align1 };
+ /* acc = 1.596 * Cr */
+mul (8) null g12<8,8,1>F 1.596F { align1 };
+ /* R = acc + Y */
+mac.sat (8) m2<1>F g14<8,8,1>F 1F { align1 };
+ /* acc = Cr * -0.813 */
+mul (8) null g12<8,8,1>F -0.813F { align1 };
+ /* acc += Cb * -0.392 */
+mac (8) null g16<8,8,1>F -0.392F { align1 };
+ /* G = acc + Y */
+mac.sat (8) m3<1>F g14<8,8,1>F 1F { align1 };
+ /* acc = Cb * 2.017 */
+mul (8) null g16<8,8,1>F 2.017F { align1 };
+ /* B = acc + Y */
+mac.sat (8) m4<1>F g14<8,8,1>F 1F { align1 };
+ /* and do it again */
+add (8) g15<1>F g15<8,8,1>F -0.0627451F { align1 };
+add (8) g13<1>F g13<8,8,1>F -0.501961F { align1 };
+add (8) g17<1>F g17<8,8,1>F -0.501961F { align1 };
+mul (8) g15<1>F g15<8,8,1>F 1.164F { align1 };
+mul (8) null g13<8,8,1>F 1.596F { align1 };
+mac.sat (8) m6<1>F g15<8,8,1>F 1F { align1 };
+mul (8) null g13<8,8,1>F -0.813F { align1 };
+mac (8) null g17<8,8,1>F -0.392F { align1 };
+mac.sat (8) m7<1>F g15<8,8,1>F 1F { align1 };
+mul (8) null g17<8,8,1>F 2.017F { align1 };
+mac.sat (8) m8<1>F g15<8,8,1>F 1F { align1 };
+
+ /* Pass through control information:
+ */
+mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
+ /* Send framebuffer write message: XXX: acc0? */
+send (16) 0 null g0<8,8,1>UW write (
+ 0, /* binding table index 0 */
+ 8, /* pixel scoreboard clear */
+ 4, /* render target write */
+ 0 /* no write commit message */
+ ) mlen 10 rlen 0 { align1 EOT };
+ /* padding */
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/src/sf_prog.h b/src/sf_prog.h
new file mode 100644
index 00000000..830d1760
--- /dev/null
+++ b/src/sf_prog.h
@@ -0,0 +1,17 @@
+ { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
+ { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
+ { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 },
+ { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 },
+ { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 },
+ { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
+ { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/wm_prog.h b/src/wm_prog.h
index 297ddcbf..708e6eb6 100644
--- a/src/wm_prog.h
+++ b/src/wm_prog.h
@@ -1,166 +1,82 @@
-/* wm_program */
-/* mov (1) g4<1>F g1.8<0,1,0>UW { align1 + } */
{ 0x00000001, 0x2080013d, 0x00000028, 0x00000000 },
-/* add (1) g4.4<1>F g1.8<0,1,0>UW 1 { align1 + } */
{ 0x00000040, 0x20840d3d, 0x00000028, 0x00000001 },
-/* mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 + } */
{ 0x00000001, 0x2088013d, 0x00000028, 0x00000000 },
-/* add (1) g4.12<1>F g1.8<0,1,0>UW 1 { align1 + } */
{ 0x00000040, 0x208c0d3d, 0x00000028, 0x00000001 },
-/* mov (1) g6<1>F g1.10<0,1,0>UW { align1 + } */
{ 0x00000001, 0x20c0013d, 0x0000002a, 0x00000000 },
-/* mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 + } */
{ 0x00000001, 0x20c4013d, 0x0000002a, 0x00000000 },
-/* add (1) g6.8<1>F g1.10<0,1,0>UW 1 { align1 + } */
{ 0x00000040, 0x20c80d3d, 0x0000002a, 0x00000001 },
-/* add (1) g6.12<1>F g1.10<0,1,0>UW 1 { align1 + } */
{ 0x00000040, 0x20cc0d3d, 0x0000002a, 0x00000001 },
-/* mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 + } */
{ 0x00000001, 0x2090013d, 0x0000002c, 0x00000000 },
-/* add (1) g4.20<1>F g1.12<0,1,0>UW 1 { align1 + } */
{ 0x00000040, 0x20940d3d, 0x0000002c, 0x00000001 },
-/* mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 + } */
{ 0x00000001, 0x2098013d, 0x0000002c, 0x00000000 },
-/* add (1) g4.28<1>F g1.12<0,1,0>UW 1 { align1 + } */
{ 0x00000040, 0x209c0d3d, 0x0000002c, 0x00000001 },
-/* mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 + } */
{ 0x00000001, 0x20d0013d, 0x0000002e, 0x00000000 },
-/* mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 + } */
{ 0x00000001, 0x20d4013d, 0x0000002e, 0x00000000 },
-/* add (1) g6.24<1>F g1.14<0,1,0>UW 1 { align1 + } */
{ 0x00000040, 0x20d80d3d, 0x0000002e, 0x00000001 },
-/* add (1) g6.28<1>F g1.14<0,1,0>UW 1 { align1 + } */
{ 0x00000040, 0x20dc0d3d, 0x0000002e, 0x00000001 },
-/* mov (1) g5<1>F g1.16<0,1,0>UW { align1 + } */
{ 0x00000001, 0x20a0013d, 0x00000030, 0x00000000 },
-/* add (1) g5.4<1>F g1.16<0,1,0>UW 1 { align1 + } */
{ 0x00000040, 0x20a40d3d, 0x00000030, 0x00000001 },
-/* mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 + } */
{ 0x00000001, 0x20a8013d, 0x00000030, 0x00000000 },
-/* add (1) g5.12<1>F g1.16<0,1,0>UW 1 { align1 + } */
{ 0x00000040, 0x20ac0d3d, 0x00000030, 0x00000001 },
-/* mov (1) g7<1>F g1.18<0,1,0>UW { align1 + } */
{ 0x00000001, 0x20e0013d, 0x00000032, 0x00000000 },
-/* mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 + } */
{ 0x00000001, 0x20e4013d, 0x00000032, 0x00000000 },
-/* add (1) g7.8<1>F g1.18<0,1,0>UW 1 { align1 + } */
{ 0x00000040, 0x20e80d3d, 0x00000032, 0x00000001 },
-/* add (1) g7.12<1>F g1.18<0,1,0>UW 1 { align1 + } */
{ 0x00000040, 0x20ec0d3d, 0x00000032, 0x00000001 },
-/* mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 + } */
{ 0x00000001, 0x20b0013d, 0x00000034, 0x00000000 },
-/* add (1) g5.20<1>F g1.20<0,1,0>UW 1 { align1 + } */
{ 0x00000040, 0x20b40d3d, 0x00000034, 0x00000001 },
-/* mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 + } */
{ 0x00000001, 0x20b8013d, 0x00000034, 0x00000000 },
-/* add (1) g5.28<1>F g1.20<0,1,0>UW 1 { align1 + } */
{ 0x00000040, 0x20bc0d3d, 0x00000034, 0x00000001 },
-/* mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 + } */
{ 0x00000001, 0x20f0013d, 0x00000036, 0x00000000 },
-/* mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 + } */
{ 0x00000001, 0x20f4013d, 0x00000036, 0x00000000 },
-/* add (1) g7.24<1>F g1.22<0,1,0>UW 1 { align1 + } */
{ 0x00000040, 0x20f80d3d, 0x00000036, 0x00000001 },
-/* add (1) g7.28<1>F g1.22<0,1,0>UW 1 { align1 + } */
{ 0x00000040, 0x20fc0d3d, 0x00000036, 0x00000001 },
-/* add (8) g4<1>F g4<8,8,1>F g1<0,1,0>F { align1 + } */
{ 0x00600040, 0x208077bd, 0x008d0080, 0x00004020 },
-/* add (8) g5<1>F g5<8,8,1>F g1<0,1,0>F { align1 + } */
{ 0x00600040, 0x20a077bd, 0x008d00a0, 0x00004020 },
-/* mul (8) g4<1>F g4<8,8,1>F g3<0,1,0>F { align1 + } */
{ 0x00600041, 0x208077bd, 0x008d0080, 0x00000060 },
-/* mul (8) g5<1>F g5<8,8,1>F g3<0,1,0>F { align1 + } */
{ 0x00600041, 0x20a077bd, 0x008d00a0, 0x00000060 },
-/* add (8) g4<1>F g4<8,8,1>F g3.12<0,1,0>F { align1 + } */
{ 0x00600040, 0x208077bd, 0x008d0080, 0x0000006c },
-/* add (8) g5<1>F g5<8,8,1>F g3.12<0,1,0>F { align1 + } */
{ 0x00600040, 0x20a077bd, 0x008d00a0, 0x0000006c },
-/* add (8) g6<1>F g6<8,8,1>F g1.4<0,1,0>F { align1 + } */
{ 0x00600040, 0x20c077bd, 0x008d00c0, 0x00004024 },
-/* add (8) g7<1>F g7<8,8,1>F g1.4<0,1,0>F { align1 + } */
{ 0x00600040, 0x20e077bd, 0x008d00e0, 0x00004024 },
-/* mul (8) g6<1>F g6<8,8,1>F g3.20<0,1,0>F { align1 + } */
{ 0x00600041, 0x20c077bd, 0x008d00c0, 0x00000074 },
-/* mul (8) g7<1>F g7<8,8,1>F g3.20<0,1,0>F { align1 + } */
{ 0x00600041, 0x20e077bd, 0x008d00e0, 0x00000074 },
-/* add (8) g6<1>F g6<8,8,1>F g3.28<0,1,0>F { align1 + } */
{ 0x00600040, 0x20c077bd, 0x008d00c0, 0x0000007c },
-/* add (8) g7<1>F g7<8,8,1>F g3.28<0,1,0>F { align1 + } */
{ 0x00600040, 0x20e077bd, 0x008d00e0, 0x0000007c },
-/* mov (8) m1<1>F g4<8,8,1>F { align1 + } */
{ 0x00600001, 0x202003be, 0x008d0080, 0x00000000 },
-/* mov (8) m2<1>F g5<8,8,1>F { align1 + } */
{ 0x00600001, 0x204003be, 0x008d00a0, 0x00000000 },
-/* mov (8) m3<1>F g6<8,8,1>F { align1 + } */
{ 0x00600001, 0x206003be, 0x008d00c0, 0x00000000 },
-/* mov (8) m4<1>F g7<8,8,1>F { align1 + } */
{ 0x00600001, 0x208003be, 0x008d00e0, 0x00000000 },
-/* send 0 (16) g12<1>UW g0<8,8,1>UW sampler mlen 5 rlen 8 { align1 + } */
{ 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 },
-/* mov (8) g19<1>UW g19<8,8,1>UW { align1 + } */
{ 0x00600001, 0x22600129, 0x008d0260, 0x00000000 },
-/* add (8) g14<1>F g14<8,8,1>F -0.0627451{ align1 + } */
{ 0x00600040, 0x21c07fbd, 0x008d01c0, 0xbd808081 },
-/* add (8) g12<1>F g12<8,8,1>F -0.501961{ align1 + } */
- { 0x00600040, 0x21807fbd, 0x008d0180, 0xbf008081 },
-/* add (8) g16<1>F g16<8,8,1>F -0.501961{ align1 + } */
- { 0x00600040, 0x22007fbd, 0x008d0200, 0xbf008081 },
-/* mul (8) g14<1>F g14<8,8,1>F 1.164{ align1 + } */
+ { 0x00600040, 0x21807fbd, 0x008d0180, 0xbf008084 },
+ { 0x00600040, 0x22007fbd, 0x008d0200, 0xbf008084 },
{ 0x00600041, 0x21c07fbd, 0x008d01c0, 0x3f94fdf4 },
-/* mul (8) a0<1>F g12<8,8,1>F 1.596{ align1 + } */
{ 0x00600041, 0x20007fbc, 0x008d0180, 0x3fcc49ba },
-/* mac (8) m2<1>F g14<8,8,1>F 1{ align1 + Saturate } */
{ 0x80600048, 0x20407fbe, 0x008d01c0, 0x3f800000 },
-/* mul (8) a0<1>F g12<8,8,1>F -0.813{ align1 + } */
{ 0x00600041, 0x20007fbc, 0x008d0180, 0xbf5020c5 },
-/* mac (8) a0<1>F g16<8,8,1>F -0.392{ align1 + } */
{ 0x00600048, 0x20007fbc, 0x008d0200, 0xbec8b439 },
-/* mac (8) m3<1>F g14<8,8,1>F 1{ align1 + Saturate } */
{ 0x80600048, 0x20607fbe, 0x008d01c0, 0x3f800000 },
-/* mul (8) a0<1>F g16<8,8,1>F 2.017{ align1 + } */
{ 0x00600041, 0x20007fbc, 0x008d0200, 0x40011687 },
-/* mac (8) m4<1>F g14<8,8,1>F 1{ align1 + Saturate } */
{ 0x80600048, 0x20807fbe, 0x008d01c0, 0x3f800000 },
-/* add (8) g15<1>F g15<8,8,1>F -0.0627451{ align1 + } */
{ 0x00600040, 0x21e07fbd, 0x008d01e0, 0xbd808081 },
-/* add (8) g13<1>F g13<8,8,1>F -0.501961{ align1 + } */
- { 0x00600040, 0x21a07fbd, 0x008d01a0, 0xbf008081 },
-/* add (8) g17<1>F g17<8,8,1>F -0.501961{ align1 + } */
- { 0x00600040, 0x22207fbd, 0x008d0220, 0xbf008081 },
-/* mul (8) g15<1>F g15<8,8,1>F 1.164{ align1 + } */
+ { 0x00600040, 0x21a07fbd, 0x008d01a0, 0xbf008084 },
+ { 0x00600040, 0x22207fbd, 0x008d0220, 0xbf008084 },
{ 0x00600041, 0x21e07fbd, 0x008d01e0, 0x3f94fdf4 },
-/* mul (8) a0<1>F g13<8,8,1>F 1.596{ align1 + } */
{ 0x00600041, 0x20007fbc, 0x008d01a0, 0x3fcc49ba },
-/* mac (8) m6<1>F g15<8,8,1>F 1{ align1 + Saturate } */
{ 0x80600048, 0x20c07fbe, 0x008d01e0, 0x3f800000 },
-/* mul (8) a0<1>F g13<8,8,1>F -0.813{ align1 + } */
{ 0x00600041, 0x20007fbc, 0x008d01a0, 0xbf5020c5 },
-/* mac (8) a0<1>F g17<8,8,1>F -0.392{ align1 + } */
{ 0x00600048, 0x20007fbc, 0x008d0220, 0xbec8b439 },
-/* mac (8) m7<1>F g15<8,8,1>F 1{ align1 + Saturate } */
{ 0x80600048, 0x20e07fbe, 0x008d01e0, 0x3f800000 },
-/* mul (8) a0<1>F g17<8,8,1>F 2.017{ align1 + } */
{ 0x00600041, 0x20007fbc, 0x008d0220, 0x40011687 },
-/* mac (8) m8<1>F g15<8,8,1>F 1{ align1 + Saturate } */
{ 0x80600048, 0x21007fbe, 0x008d01e0, 0x3f800000 },
-/* mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable + } */
{ 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
-/* send 0 (16) a0<1>UW g0<8,8,1>UW write mlen 10 rlen 0 EOT{ align1 + } */
- { 0x00800031, 0x20001d28, 0x008d0000, 0x85a04800 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-
+ { 0x00800031, 0x20001d3c, 0x008d0000, 0x85a04800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },