diff options
Diffstat (limited to 'src/r600_shader.c')
-rw-r--r-- | src/r600_shader.c | 440 |
1 files changed, 138 insertions, 302 deletions
diff --git a/src/r600_shader.c b/src/r600_shader.c index addba36f..0a820cf3 100644 --- a/src/r600_shader.c +++ b/src/r600_shader.c @@ -560,41 +560,12 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) return i; } -/* - * ; xv ps planar - * 00 TEX: ADDR(20) CNT(3) NO_BARRIER - * 0 SAMPLE R1.x__1, R0.xy01, t0, s0 - * 1 SAMPLE R1.__x_, R0.xy01, t1, s1 - * 2 SAMPLE R1._x__, R0.xy01, t2, s2 - * 01 TEX: ADDR(28) CNT(2) NO_BARRIER - * 0 SAMPLE R1.x__1, R0.xy01, t0, s0 - * 1 SAMPLE R1._xy_, R0.xy01, t1, s1 - * 02 ALU: ADDR(4) CNT(16) - * 3 x: MULADD R1.x, R1.x, C3.x, C3.y CLAMP - * y: MULADD R1.y, R1.y, C3.z, C3.w - * z: MULADD R1.z, R1.z, C3.z, C3.w - * w: MOV R1.w, 0.0f - * 4 x: DOT4 R2.x, R1.x, C0.x CLAMP VEC_102 - * y: DOT4 ____, R1.y, C0.y CLAMP VEC_102 - * z: DOT4 ____, R1.z, C0.z CLAMP VEC_102 - * w: DOT4 ____, R1.w, C0.w CLAMP VEC_021 - * 5 x: DOT4 ____, R1.x, C1.x CLAMP VEC_102 - * y: DOT4 R2.y, R1.y, C1.y CLAMP VEC_102 - * z: DOT4 ____, R1.z, C1.z CLAMP VEC_102 - * w: DOT4 ____, R1.w, C1.w CLAMP VEC_021 - * 6 x: DOT4 ____, R1.x, C2.x CLAMP VEC_102 - * y: DOT4 ____, R1.y, C2.y CLAMP VEC_102 - * z: DOT4 R2.z, R1.z, C2.z CLAMP VEC_102 - * w: DOT4 ____, R1.w, C2.w CLAMP VEC_021 - * 03 EXP_DONE: PIX0, R2 - * END_OF_PROGRAM - */ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) { int i = 0; /* 0 */ - shader[i++] = CF_DWORD0(ADDR(20)); + shader[i++] = CF_DWORD0(ADDR(16)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_BOOL), @@ -606,7 +577,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(0)); /* 1 */ - shader[i++] = CF_DWORD0(ADDR(28)); + shader[i++] = CF_DWORD0(ADDR(24)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_NOT_BOOL), @@ -625,7 +596,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), KCACHE_ADDR0(0), KCACHE_ADDR1(0), - I_COUNT(16), + I_COUNT(12), USES_WATERFALL(0), CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), @@ -648,73 +619,74 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 4 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + /* 4,5,6,7 */ + /* r2.x = MAD(c0.w, r1.x, c0.x) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(256), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), + SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(259), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Y), + SRC2_ELEM(ELEM_X), SRC2_NEG(0), ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(1), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), - CLAMP(1)); - /* 5 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + CLAMP(0)); + /* r2.y = MAD(c0.w, r1.x, c0.y) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(256), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), + SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(259), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_W), + SRC2_ELEM(ELEM_Y), SRC2_NEG(0), ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(1), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(0)); - /* 6 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + /* r2.z = MAD(c0.w, r1.x, c0.z) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(256), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Z), + SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(259), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_W), + SRC2_ELEM(ELEM_Z), SRC2_NEG(0), ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(1), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(0)); - /* 7 */ + /* r2.w = MAD(0, 0, 1) */ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), @@ -726,334 +698,198 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MOV), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(1), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(0)); - /* 8 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + + /* 8,9,10,11 */ + /* r2.x = MAD(c1.x, r1.y, pv.x) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(257), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(256), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_Y), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), - CLAMP(1)); - /* 9 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + CLAMP(0)); + /* r2.y = MAD(c1.y, r1.y, pv.y) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(257), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(256), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Y), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), - DST_GPR(0), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Y), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), - CLAMP(1)); - /* 10 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + CLAMP(0)); + /* r2.z = MAD(c1.z, r1.y, pv.z) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(257), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(256), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), - DST_GPR(0), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Z), - CLAMP(1)); - /* 11 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), - SRC0_NEG(0), - SRC1_SEL(256), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_021), - DST_GPR(0), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_W), - CLAMP(1)); - /* 12 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), - SRC0_NEG(0), - SRC1_SEL(257), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), - DST_GPR(0), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_X), - CLAMP(1)); - /* 13 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), - SRC0_NEG(0), - SRC1_SEL(257), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Y), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), DST_GPR(2), DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), - CLAMP(1)); - /* 14 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Z), - SRC0_NEG(0), - SRC1_SEL(257), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), - DST_GPR(0), - DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), - CLAMP(1)); - /* 15 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + CLAMP(0)); + /* r2.w = MAD(0, 0, 1) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), + SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(257), + SRC1_SEL(SQ_ALU_SRC_0), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_021), - DST_GPR(0), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_W), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), - CLAMP(1)); - /* 16 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + CLAMP(0)); + /* 12,13,14,15 */ + /* r2.x = MAD(c2.x, r1.z, pv.x) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(258), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(258), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_Z), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), - DST_GPR(0), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(1)); - /* 17 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + /* r2.y = MAD(c2.y, r1.z, pv.y) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(258), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(258), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), + SRC1_ELEM(ELEM_Z), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), - DST_GPR(0), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Y), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(1)); - /* 18 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + /* r2.z = MAD(c2.z, r1.z, pv.z) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(258), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(258), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Z), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(1)); - /* 19 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + /* r2.w = MAD(0, 0, 1) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), + SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(258), + SRC1_SEL(SQ_ALU_SRC_0), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_021), - DST_GPR(0), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(1)); - /* 20 */ - shader[i++] = CF_DWORD0(ADDR(22)); + + /* 16 */ + shader[i++] = CF_DWORD0(ADDR(18)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -1064,7 +900,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_TEX), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 21 */ + /* 17 */ shader[i++] = CF_DWORD0(ADDR(0)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -1076,7 +912,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_RETURN), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 22/23 */ + /* 18/19 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1104,7 +940,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 24/25 */ + /* 20/21 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1132,7 +968,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 26/27 */ + /* 22/23 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1160,8 +996,8 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 28 */ - shader[i++] = CF_DWORD0(ADDR(30)); + /* 24 */ + shader[i++] = CF_DWORD0(ADDR(26)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -1172,7 +1008,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_TEX), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 29 */ + /* 25 */ shader[i++] = CF_DWORD0(ADDR(0)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -1184,7 +1020,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_RETURN), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 30/31 */ + /* 26/27 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1212,7 +1048,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 32/33 */ + /* 28/29 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), |