summaryrefslogtreecommitdiff
path: root/src/r600_shader.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/r600_shader.c')
-rw-r--r--src/r600_shader.c440
1 files changed, 138 insertions, 302 deletions
diff --git a/src/r600_shader.c b/src/r600_shader.c
index addba36f..0a820cf3 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -560,41 +560,12 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
return i;
}
-/*
- * ; xv ps planar
- * 00 TEX: ADDR(20) CNT(3) NO_BARRIER
- * 0 SAMPLE R1.x__1, R0.xy01, t0, s0
- * 1 SAMPLE R1.__x_, R0.xy01, t1, s1
- * 2 SAMPLE R1._x__, R0.xy01, t2, s2
- * 01 TEX: ADDR(28) CNT(2) NO_BARRIER
- * 0 SAMPLE R1.x__1, R0.xy01, t0, s0
- * 1 SAMPLE R1._xy_, R0.xy01, t1, s1
- * 02 ALU: ADDR(4) CNT(16)
- * 3 x: MULADD R1.x, R1.x, C3.x, C3.y CLAMP
- * y: MULADD R1.y, R1.y, C3.z, C3.w
- * z: MULADD R1.z, R1.z, C3.z, C3.w
- * w: MOV R1.w, 0.0f
- * 4 x: DOT4 R2.x, R1.x, C0.x CLAMP VEC_102
- * y: DOT4 ____, R1.y, C0.y CLAMP VEC_102
- * z: DOT4 ____, R1.z, C0.z CLAMP VEC_102
- * w: DOT4 ____, R1.w, C0.w CLAMP VEC_021
- * 5 x: DOT4 ____, R1.x, C1.x CLAMP VEC_102
- * y: DOT4 R2.y, R1.y, C1.y CLAMP VEC_102
- * z: DOT4 ____, R1.z, C1.z CLAMP VEC_102
- * w: DOT4 ____, R1.w, C1.w CLAMP VEC_021
- * 6 x: DOT4 ____, R1.x, C2.x CLAMP VEC_102
- * y: DOT4 ____, R1.y, C2.y CLAMP VEC_102
- * z: DOT4 R2.z, R1.z, C2.z CLAMP VEC_102
- * w: DOT4 ____, R1.w, C2.w CLAMP VEC_021
- * 03 EXP_DONE: PIX0, R2
- * END_OF_PROGRAM
- */
int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
{
int i = 0;
/* 0 */
- shader[i++] = CF_DWORD0(ADDR(20));
+ shader[i++] = CF_DWORD0(ADDR(16));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_BOOL),
@@ -606,7 +577,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(0));
/* 1 */
- shader[i++] = CF_DWORD0(ADDR(28));
+ shader[i++] = CF_DWORD0(ADDR(24));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_NOT_BOOL),
@@ -625,7 +596,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
- I_COUNT(16),
+ I_COUNT(12),
USES_WATERFALL(0),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
@@ -648,73 +619,74 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 4 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ /* 4,5,6,7 */
+ /* r2.x = MAD(c0.w, r1.x, c0.x) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(259),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Y),
+ SRC2_ELEM(ELEM_X),
SRC2_NEG(0),
ALU_INST(SQ_OP3_INST_MULADD),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
- CLAMP(1));
- /* 5 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ CLAMP(0));
+ /* r2.y = MAD(c0.w, r1.x, c0.y) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(259),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_W),
+ SRC2_ELEM(ELEM_Y),
SRC2_NEG(0),
ALU_INST(SQ_OP3_INST_MULADD),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 6 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ /* r2.z = MAD(c0.w, r1.x, c0.z) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Z),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(259),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_W),
+ SRC2_ELEM(ELEM_Z),
SRC2_NEG(0),
ALU_INST(SQ_OP3_INST_MULADD),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 7 */
+ /* r2.w = MAD(0, 0, 1) */
shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -726,334 +698,198 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MOV),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 8 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+
+ /* 8,9,10,11 */
+ /* r2.x = MAD(c1.x, r1.y, pv.x) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(257),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
- CLAMP(1));
- /* 9 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ CLAMP(0));
+ /* r2.y = MAD(c1.y, r1.y, pv.y) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(257),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
- CLAMP(1));
- /* 10 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ CLAMP(0));
+ /* r2.z = MAD(c1.z, r1.y, pv.z) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(257),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(256),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Z),
- CLAMP(1));
- /* 11 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
- SRC0_NEG(0),
- SRC1_SEL(256),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_021),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_W),
- CLAMP(1));
- /* 12 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(257),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(1));
- /* 13 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
DST_GPR(2),
DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(1));
- /* 14 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Z),
- SRC0_NEG(0),
- SRC1_SEL(257),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
- CLAMP(1));
- /* 15 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ CLAMP(0));
+ /* r2.w = MAD(0, 0, 1) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
+ SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(SQ_ALU_SRC_0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_021),
- DST_GPR(0),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
- CLAMP(1));
- /* 16 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ CLAMP(0));
+ /* 12,13,14,15 */
+ /* r2.x = MAD(c2.x, r1.z, pv.x) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(258),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(1));
- /* 17 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ /* r2.y = MAD(c2.y, r1.z, pv.y) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(258),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(1));
- /* 18 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ /* r2.z = MAD(c2.z, r1.z, pv.z) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(258),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(1));
- /* 19 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ /* r2.w = MAD(0, 0, 1) */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
+ SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(SQ_ALU_SRC_0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_021),
- DST_GPR(0),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_X),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(1));
- /* 20 */
- shader[i++] = CF_DWORD0(ADDR(22));
+
+ /* 16 */
+ shader[i++] = CF_DWORD0(ADDR(18));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -1064,7 +900,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_TEX),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 21 */
+ /* 17 */
shader[i++] = CF_DWORD0(ADDR(0));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -1076,7 +912,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 22/23 */
+ /* 18/19 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -1104,7 +940,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 24/25 */
+ /* 20/21 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -1132,7 +968,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 26/27 */
+ /* 22/23 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -1160,8 +996,8 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 28 */
- shader[i++] = CF_DWORD0(ADDR(30));
+ /* 24 */
+ shader[i++] = CF_DWORD0(ADDR(26));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -1172,7 +1008,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_TEX),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 29 */
+ /* 25 */
shader[i++] = CF_DWORD0(ADDR(0));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -1184,7 +1020,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 30/31 */
+ /* 26/27 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -1212,7 +1048,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 32/33 */
+ /* 28/29 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),