diff options
author | Matthieu Herrb <matthieu@cvs.openbsd.org> | 2012-02-06 22:53:17 +0000 |
---|---|---|
committer | Matthieu Herrb <matthieu@cvs.openbsd.org> | 2012-02-06 22:53:17 +0000 |
commit | eb2ee4ddbc560aaa5b74172c2c72851785efb40a (patch) | |
tree | a01988fa211bdbff64da3bea425a7f69f725987b /driver/xf86-video-ati/src/r600_shader.c | |
parent | 36f7476dfa8dae71728fafaf1b27e51683bb0e69 (diff) |
Revert the update to xf86-video-ati 6.14.3. Requested by espie@
who experiemnts regressions with this driver.
Diffstat (limited to 'driver/xf86-video-ati/src/r600_shader.c')
-rw-r--r-- | driver/xf86-video-ati/src/r600_shader.c | 2166 |
1 files changed, 666 insertions, 1500 deletions
diff --git a/driver/xf86-video-ati/src/r600_shader.c b/driver/xf86-video-ati/src/r600_shader.c index ab2f4850f..addba36f3 100644 --- a/driver/xf86-video-ati/src/r600_shader.c +++ b/driver/xf86-video-ati/src/r600_shader.c @@ -106,16 +106,12 @@ int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_SEL_Z(SQ_SEL_0), DST_SEL_W(SQ_SEL_1), USE_CONST_FIELDS(0), - DATA_FORMAT(FMT_32_32_FLOAT), - NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), - FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(0), -#if X_BYTE_ORDER == X_BIG_ENDIAN - ENDIAN_SWAP(SQ_ENDIAN_8IN32), -#else - ENDIAN_SWAP(SQ_ENDIAN_NONE), -#endif + ENDIAN_SWAP(ENDIAN_NONE), CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; @@ -161,11 +157,11 @@ int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) BARRIER(1)); /* 2 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), + shader[i++] = ALU_DWORD0(SRC0_SEL(256), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_SEL(0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -187,11 +183,11 @@ int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_ELEM(ELEM_X), CLAMP(1)); /* 3 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), + shader[i++] = ALU_DWORD0(SRC0_SEL(256), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_SEL(0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Y), SRC1_NEG(0), @@ -213,11 +209,11 @@ int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_ELEM(ELEM_Y), CLAMP(1)); /* 4 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), + shader[i++] = ALU_DWORD0(SRC0_SEL(256), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_SEL(0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Z), SRC1_NEG(0), @@ -239,11 +235,11 @@ int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_ELEM(ELEM_Z), CLAMP(1)); /* 5 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), + shader[i++] = ALU_DWORD0(SRC0_SEL(256), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_SEL(0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_W), SRC1_NEG(0), @@ -340,16 +336,12 @@ int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_SEL_Z(SQ_SEL_0), DST_SEL_W(SQ_SEL_1), USE_CONST_FIELDS(0), - DATA_FORMAT(FMT_32_32_FLOAT), - NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), - FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(0), -#if X_BYTE_ORDER == X_BIG_ENDIAN - ENDIAN_SWAP(SQ_ENDIAN_8IN32), -#else - ENDIAN_SWAP(SQ_ENDIAN_NONE), -#endif + ENDIAN_SWAP(ENDIAN_NONE), CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; @@ -369,16 +361,12 @@ int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_SEL_Z(SQ_SEL_0), DST_SEL_W(SQ_SEL_1), USE_CONST_FIELDS(0), - DATA_FORMAT(FMT_32_32_FLOAT), - NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), - FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(8), -#if X_BYTE_ORDER == X_BIG_ENDIAN - ENDIAN_SWAP(SQ_ENDIAN_8IN32), -#else - ENDIAN_SWAP(SQ_ENDIAN_NONE), -#endif + ENDIAN_SWAP(ENDIAN_NONE), CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; @@ -469,7 +457,7 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) int i = 0; /* 0 */ - shader[i++] = CF_DWORD0(ADDR(6)); + shader[i++] = CF_DWORD0(ADDR(4)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -480,22 +468,7 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_VTX), WHOLE_QUAD_MODE(0), BARRIER(1)); - - /* 1 - ALU */ - shader[i++] = CF_ALU_DWORD0(ADDR(4), - KCACHE_BANK0(0), - KCACHE_BANK1(0), - KCACHE_MODE0(SQ_CF_KCACHE_NOP)); - shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), - KCACHE_ADDR0(0), - KCACHE_ADDR1(0), - I_COUNT(2), - USES_WATERFALL(0), - CF_INST(SQ_CF_INST_ALU), - WHOLE_QUAD_MODE(0), - BARRIER(1)); - - /* 2 */ + /* 1 */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), TYPE(SQ_EXPORT_POS), RW_GPR(1), @@ -513,7 +486,7 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 3 */ + /* 2 */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), TYPE(SQ_EXPORT_PARAM), RW_GPR(0), @@ -531,63 +504,9 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(0)); - - - /* 4 texX / w */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 0), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(0), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_X), - CLAMP(0)); - - /* 5 texY / h */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 0), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(0), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), - CLAMP(0)); - - /* 6/7 */ + shader[i++] = 0x00000000; + shader[i++] = 0x00000000; + /* 4/5 */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -604,19 +523,15 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_SEL_W(SQ_SEL_1), USE_CONST_FIELDS(0), DATA_FORMAT(FMT_32_32_FLOAT), - NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(0), -#if X_BYTE_ORDER == X_BIG_ENDIAN - ENDIAN_SWAP(SQ_ENDIAN_8IN32), -#else - ENDIAN_SWAP(SQ_ENDIAN_NONE), -#endif + ENDIAN_SWAP(ENDIAN_NONE), CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; - /* 8/9 */ + /* 6/7 */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -633,15 +548,11 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_SEL_W(SQ_SEL_1), USE_CONST_FIELDS(0), DATA_FORMAT(FMT_32_32_FLOAT), - NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(8), -#if X_BYTE_ORDER == X_BIG_ENDIAN - ENDIAN_SWAP(SQ_ENDIAN_8IN32), -#else - ENDIAN_SWAP(SQ_ENDIAN_NONE), -#endif + ENDIAN_SWAP(ENDIAN_NONE), CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; @@ -649,12 +560,41 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) return i; } +/* + * ; xv ps planar + * 00 TEX: ADDR(20) CNT(3) NO_BARRIER + * 0 SAMPLE R1.x__1, R0.xy01, t0, s0 + * 1 SAMPLE R1.__x_, R0.xy01, t1, s1 + * 2 SAMPLE R1._x__, R0.xy01, t2, s2 + * 01 TEX: ADDR(28) CNT(2) NO_BARRIER + * 0 SAMPLE R1.x__1, R0.xy01, t0, s0 + * 1 SAMPLE R1._xy_, R0.xy01, t1, s1 + * 02 ALU: ADDR(4) CNT(16) + * 3 x: MULADD R1.x, R1.x, C3.x, C3.y CLAMP + * y: MULADD R1.y, R1.y, C3.z, C3.w + * z: MULADD R1.z, R1.z, C3.z, C3.w + * w: MOV R1.w, 0.0f + * 4 x: DOT4 R2.x, R1.x, C0.x CLAMP VEC_102 + * y: DOT4 ____, R1.y, C0.y CLAMP VEC_102 + * z: DOT4 ____, R1.z, C0.z CLAMP VEC_102 + * w: DOT4 ____, R1.w, C0.w CLAMP VEC_021 + * 5 x: DOT4 ____, R1.x, C1.x CLAMP VEC_102 + * y: DOT4 R2.y, R1.y, C1.y CLAMP VEC_102 + * z: DOT4 ____, R1.z, C1.z CLAMP VEC_102 + * w: DOT4 ____, R1.w, C1.w CLAMP VEC_021 + * 6 x: DOT4 ____, R1.x, C2.x CLAMP VEC_102 + * y: DOT4 ____, R1.y, C2.y CLAMP VEC_102 + * z: DOT4 R2.z, R1.z, C2.z CLAMP VEC_102 + * w: DOT4 ____, R1.w, C2.w CLAMP VEC_021 + * 03 EXP_DONE: PIX0, R2 + * END_OF_PROGRAM + */ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) { int i = 0; /* 0 */ - shader[i++] = CF_DWORD0(ADDR(16)); + shader[i++] = CF_DWORD0(ADDR(20)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_BOOL), @@ -666,7 +606,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(0)); /* 1 */ - shader[i++] = CF_DWORD0(ADDR(24)); + shader[i++] = CF_DWORD0(ADDR(28)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_NOT_BOOL), @@ -685,7 +625,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), KCACHE_ADDR0(0), KCACHE_ADDR1(0), - I_COUNT(12), + I_COUNT(16), USES_WATERFALL(0), CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), @@ -708,74 +648,73 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 4,5,6,7 */ - /* r2.x = MAD(c0.w, r1.x, c0.x) */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), + /* 4 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), + SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(259), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_X), + SRC2_ELEM(ELEM_Y), SRC2_NEG(0), ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), - CLAMP(0)); - /* r2.y = MAD(c0.w, r1.x, c0.y) */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), + CLAMP(1)); + /* 5 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), + SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(259), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_Z), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Y), + SRC2_ELEM(ELEM_W), SRC2_NEG(0), ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(0)); - /* r2.z = MAD(c0.w, r1.x, c0.z) */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), + /* 6 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), + SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(259), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_Z), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Z), + SRC2_ELEM(ELEM_W), SRC2_NEG(0), ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(0)); - /* r2.w = MAD(0, 0, 1) */ + /* 7 */ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), @@ -787,198 +726,334 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_X), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(0)); - - /* 8,9,10,11 */ - /* r2.x = MAD(c1.x, r1.y, pv.x) */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), + /* 8 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(256), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_X), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), - BANK_SWIZZLE(SQ_ALU_VEC_012), + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), - CLAMP(0)); - /* r2.y = MAD(c1.y, r1.y, pv.y) */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), + CLAMP(1)); + /* 9 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(256), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Y), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Y), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), - CLAMP(0)); - /* r2.z = MAD(c1.z, r1.y, pv.z) */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), + CLAMP(1)); + /* 10 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(256), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), + SRC1_ELEM(ELEM_Z), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Z), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), - CLAMP(0)); - /* r2.w = MAD(0, 0, 1) */ - shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), + CLAMP(1)); + /* 11 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), + SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(SQ_ALU_SRC_0), + SRC1_SEL(256), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_W), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_W), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_021), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), - CLAMP(0)); - /* 12,13,14,15 */ - /* r2.x = MAD(c2.x, r1.z, pv.x) */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2), + CLAMP(1)); + /* 12 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(257), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_X), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(1)); - /* r2.y = MAD(c2.y, r1.z, pv.y) */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2), + /* 13 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(257), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), + SRC1_ELEM(ELEM_Y), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Y), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), - BANK_SWIZZLE(SQ_ALU_VEC_012), + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(1)); - /* r2.z = MAD(c2.z, r1.z, pv.z) */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2), + /* 14 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(257), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Z), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Z), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(1)); - /* r2.w = MAD(0, 0, 1) */ - shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), + /* 15 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(257), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_021), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + /* 16 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(SQ_ALU_SRC_0), + SRC1_SEL(258), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_X), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), - BANK_SWIZZLE(SQ_ALU_VEC_012), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + /* 17 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(258), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + /* 18 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(258), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), DST_GPR(2), DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + /* 19 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(258), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_021), + DST_GPR(0), + DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(1)); - - /* 16 */ - shader[i++] = CF_DWORD0(ADDR(18)); + /* 20 */ + shader[i++] = CF_DWORD0(ADDR(22)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -989,7 +1064,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_TEX), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 17 */ + /* 21 */ shader[i++] = CF_DWORD0(ADDR(0)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -1001,7 +1076,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_RETURN), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 18/19 */ + /* 22/23 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1029,7 +1104,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 20/21 */ + /* 24/25 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1057,7 +1132,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 22/23 */ + /* 26/27 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1085,8 +1160,8 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 24 */ - shader[i++] = CF_DWORD0(ADDR(26)); + /* 28 */ + shader[i++] = CF_DWORD0(ADDR(30)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -1097,7 +1172,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_TEX), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 25 */ + /* 29 */ shader[i++] = CF_DWORD0(ADDR(0)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -1109,7 +1184,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_RETURN), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 26/27 */ + /* 30/31 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1137,7 +1212,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 28/29 */ + /* 32/33 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1169,6 +1244,230 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) return i; } +/* comp mask ps --------------------------------------- */ +int R600_comp_mask_ps(RADEONChipFamily ChipSet, uint32_t* shader) +{ + int i = 0; + + /* 0 */ + shader[i++] = CF_DWORD0(ADDR(8)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_TEX), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 1 */ + shader[i++] = CF_ALU_DWORD0(ADDR(3), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + USES_WATERFALL(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 2 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(2), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(1), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 3 - alu 0 */ + /* MUL gpr[2].x gpr[1].x gpr[0].x */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + /* 4 - alu 1 */ + /* MUL gpr[2].y gpr[1].y gpr[0].y */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + /* 5 - alu 2 */ + /* MUL gpr[2].z gpr[1].z gpr[0].z */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + /* 6 - alu 3 */ + /* MUL gpr[2].w gpr[1].w gpr[0].w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + /* 7 */ + shader[i++] = 0x00000000; + shader[i++] = 0x00000000; + + /* 8/9 - src */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + BC_FRAC_MODE(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + R7xx_ALT_CONST(0)); + shader[i++] = TEX_DWORD1(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_Z), + DST_SEL_W(SQ_SEL_W), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + /* 10/11 - mask */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + BC_FRAC_MODE(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(1), + SRC_GPR(1), + SRC_REL(ABSOLUTE), + R7xx_ALT_CONST(0)); + shader[i++] = TEX_DWORD1(DST_GPR(1), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_Z), + DST_SEL_W(SQ_SEL_W), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(1), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + + return i; +} + /* comp vs --------------------------------------- */ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) { @@ -1187,7 +1486,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(0)); /* 1 */ - shader[i++] = CF_DWORD0(ADDR(9)); + shader[i++] = CF_DWORD0(ADDR(14)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_NOT_BOOL), @@ -1199,7 +1498,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(0)); /* 2 */ - shader[i++] = CF_DWORD0(ADDR(0)); + shader[i++] = CF_DWORD0(0); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -1211,7 +1510,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(1)); /* 3 - mask sub */ - shader[i++] = CF_DWORD0(ADDR(44)); + shader[i++] = CF_DWORD0(ADDR(8)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -1222,22 +1521,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_VTX), WHOLE_QUAD_MODE(0), BARRIER(1)); - - /* 4 - ALU */ - shader[i++] = CF_ALU_DWORD0(ADDR(14), - KCACHE_BANK0(0), - KCACHE_BANK1(0), - KCACHE_MODE0(SQ_CF_KCACHE_NOP)); - shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), - KCACHE_ADDR0(0), - KCACHE_ADDR1(0), - I_COUNT(20), - USES_WATERFALL(0), - CF_INST(SQ_CF_INST_ALU), - WHOLE_QUAD_MODE(0), - BARRIER(1)); - - /* 5 - dst */ + /* 4 - dst */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), TYPE(SQ_EXPORT_POS), RW_GPR(2), @@ -1246,8 +1530,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) ELEM_SIZE(0)); shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), R6xx_ELEM_LOOP(0), BURST_COUNT(1), END_OF_PROGRAM(0), @@ -1255,7 +1539,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 6 - src */ + /* 5 - src */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), TYPE(SQ_EXPORT_PARAM), RW_GPR(1), @@ -1264,8 +1548,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) ELEM_SIZE(0)); shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), R6xx_ELEM_LOOP(0), BURST_COUNT(1), END_OF_PROGRAM(0), @@ -1273,7 +1557,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_EXPORT), WHOLE_QUAD_MODE(0), BARRIER(0)); - /* 7 - mask */ + /* 6 - mask */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), TYPE(SQ_EXPORT_PARAM), RW_GPR(0), @@ -1282,8 +1566,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) ELEM_SIZE(0)); shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), R6xx_ELEM_LOOP(0), BURST_COUNT(1), END_OF_PROGRAM(0), @@ -1291,82 +1575,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(0)); - /* 8 */ - shader[i++] = CF_DWORD0(ADDR(0)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), - I_COUNT(0), - CALL_COUNT(0), - END_OF_PROGRAM(0), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_RETURN), - WHOLE_QUAD_MODE(0), - BARRIER(1)); - /* 9 - non-mask sub */ - shader[i++] = CF_DWORD0(ADDR(50)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), - I_COUNT(2), - CALL_COUNT(0), - END_OF_PROGRAM(0), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_VTX), - WHOLE_QUAD_MODE(0), - BARRIER(1)); - - /* 10 - ALU */ - shader[i++] = CF_ALU_DWORD0(ADDR(34), - KCACHE_BANK0(0), - KCACHE_BANK1(0), - KCACHE_MODE0(SQ_CF_KCACHE_NOP)); - shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), - KCACHE_ADDR0(0), - KCACHE_ADDR1(0), - I_COUNT(10), - USES_WATERFALL(0), - CF_INST(SQ_CF_INST_ALU), - WHOLE_QUAD_MODE(0), - BARRIER(1)); - - /* 11 - dst */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), - TYPE(SQ_EXPORT_POS), - RW_GPR(1), - RW_REL(ABSOLUTE), - INDEX_GPR(0), - ELEM_SIZE(0)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1), - R6xx_ELEM_LOOP(0), - BURST_COUNT(0), - END_OF_PROGRAM(0), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_EXPORT_DONE), - WHOLE_QUAD_MODE(0), - BARRIER(1)); - /* 12 - src */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), - TYPE(SQ_EXPORT_PARAM), - RW_GPR(0), - RW_REL(ABSOLUTE), - INDEX_GPR(0), - ELEM_SIZE(0)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1), - R6xx_ELEM_LOOP(0), - BURST_COUNT(0), - END_OF_PROGRAM(0), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_EXPORT_DONE), - WHOLE_QUAD_MODE(0), - BARRIER(0)); - /* 13 */ + /* 7 */ shader[i++] = CF_DWORD0(ADDR(0)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -1378,819 +1587,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_RETURN), WHOLE_QUAD_MODE(0), BARRIER(1)); - - - /* 14 srcX.x DOT4 - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 0), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(3), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_X), - CLAMP(0)); - - /* 15 srcX.y DOT4 - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 0), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(3), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), - CLAMP(0)); - - /* 16 srcX.z DOT4 - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Z), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 0), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(3), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Z), - CLAMP(0)); - - /* 17 srcX.w DOT4 - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 0), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(3), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_W), - CLAMP(0)); - - /* 18 srcY.x DOT4 - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 1), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(3), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_X), - CLAMP(0)); - - /* 19 srcY.y DOT4 - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 1), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(3), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), - CLAMP(0)); - - /* 20 srcY.z DOT4 - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Z), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 1), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(3), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Z), - CLAMP(0)); - - /* 21 srcY.w DOT4 - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 1), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(3), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_W), - CLAMP(0)); - - /* 22 maskX.x DOT4 - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 2), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(4), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_X), - CLAMP(0)); - - /* 23 maskX.y DOT4 - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 2), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(4), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), - CLAMP(0)); - - /* 24 maskX.z DOT4 - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Z), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 2), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(4), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Z), - CLAMP(0)); - - /* 25 maskX.w DOT4 - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 2), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(4), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_W), - CLAMP(0)); - - /* 26 maskY.x DOT4 - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 3), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(4), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_X), - CLAMP(0)); - - /* 27 maskY.y DOT4 - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 3), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(4), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), - CLAMP(0)); - - /* 28 maskY.z DOT4 - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Z), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 3), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(4), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Z), - CLAMP(0)); - - /* 29 maskY.w DOT4 - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 3), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(4), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_W), - CLAMP(0)); - - /* 30 srcX / w */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 0), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(1), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_X), - CLAMP(0)); - - /* 31 srcY / h */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 1), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(1), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), - CLAMP(0)); - - /* 32 maskX / w */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 2), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(0), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_X), - CLAMP(0)); - - /* 33 maskY / h */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 3), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(0), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), - CLAMP(0)); - - /* 34 srcX.x DOT4 - non-mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 0), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_X), - CLAMP(0)); - - /* 35 srcX.y DOT4 - non-mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 0), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), - CLAMP(0)); - - /* 36 srcX.z DOT4 - non-mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Z), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 0), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Z), - CLAMP(0)); - - /* 37 srcX.w DOT4 - non-mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 0), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_W), - CLAMP(0)); - - /* 38 srcY.x DOT4 - non-mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 1), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_X), - CLAMP(0)); - - /* 39 srcY.y DOT4 - non-mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 1), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), - CLAMP(0)); - - /* 40 srcY.z DOT4 - non-mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Z), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 1), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Z), - CLAMP(0)); - - /* 41 srcY.w DOT4 - non-mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 1), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_W), - CLAMP(0)); - - /* 42 srcX / w */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 0), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(0), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_X), - CLAMP(0)); - - /* 43 srcY / h */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_CFILE_BASE + 1), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(0), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), - CLAMP(0)); - - /* 44/45 - dst - mask */ + /* 8/9 - dst */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -2206,20 +1603,16 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_SEL_Z(SQ_SEL_0), DST_SEL_W(SQ_SEL_1), USE_CONST_FIELDS(0), - DATA_FORMAT(FMT_32_32_FLOAT), - NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), - FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(0), -#if X_BYTE_ORDER == X_BIG_ENDIAN - ENDIAN_SWAP(SQ_ENDIAN_8IN32), -#else - ENDIAN_SWAP(SQ_ENDIAN_NONE), -#endif + ENDIAN_SWAP(ENDIAN_NONE), CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; - /* 46/47 - src */ + /* 10/11 - src */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -2232,23 +1625,19 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_REL(0), DST_SEL_X(SQ_SEL_X), DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_1), - DST_SEL_W(SQ_SEL_0), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), USE_CONST_FIELDS(0), - DATA_FORMAT(FMT_32_32_FLOAT), - NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), - FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(8), -#if X_BYTE_ORDER == X_BIG_ENDIAN - ENDIAN_SWAP(SQ_ENDIAN_8IN32), -#else - ENDIAN_SWAP(SQ_ENDIAN_NONE), -#endif + ENDIAN_SWAP(ENDIAN_NONE), CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; - /* 48/49 - mask */ + /* 12/13 - mask */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -2261,24 +1650,80 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_REL(0), DST_SEL_X(SQ_SEL_X), DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_1), - DST_SEL_W(SQ_SEL_0), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), USE_CONST_FIELDS(0), - DATA_FORMAT(FMT_32_32_FLOAT), - NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), - FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(16), -#if X_BYTE_ORDER == X_BIG_ENDIAN - ENDIAN_SWAP(SQ_ENDIAN_8IN32), -#else - ENDIAN_SWAP(SQ_ENDIAN_NONE), -#endif + ENDIAN_SWAP(ENDIAN_NONE), CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; - /* 50/51 - dst - non-mask */ + /* 14 - non-mask sub */ + shader[i++] = CF_DWORD0(ADDR(18)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(2), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_VTX), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 15 - dst */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 16 - src */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 17 */ + shader[i++] = CF_DWORD0(ADDR(0)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 18/19 - dst */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -2294,20 +1739,16 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_SEL_Z(SQ_SEL_0), DST_SEL_W(SQ_SEL_1), USE_CONST_FIELDS(0), - DATA_FORMAT(FMT_32_32_FLOAT), - NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), - FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(0), -#if X_BYTE_ORDER == X_BIG_ENDIAN - ENDIAN_SWAP(SQ_ENDIAN_8IN32), -#else - ENDIAN_SWAP(SQ_ENDIAN_NONE), -#endif + ENDIAN_SWAP(ENDIAN_NONE), CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; - /* 52/53 - src */ + /* 20/21 - src */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -2320,19 +1761,15 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_REL(0), DST_SEL_X(SQ_SEL_X), DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_1), - DST_SEL_W(SQ_SEL_0), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), USE_CONST_FIELDS(0), - DATA_FORMAT(FMT_32_32_FLOAT), - NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED), - FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), + DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(8), -#if X_BYTE_ORDER == X_BIG_ENDIAN - ENDIAN_SWAP(SQ_ENDIAN_8IN32), -#else - ENDIAN_SWAP(SQ_ENDIAN_NONE), -#endif + ENDIAN_SWAP(ENDIAN_NONE), CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; @@ -2346,102 +1783,7 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) int i = 0; /* 0 */ - shader[i++] = CF_DWORD0(ADDR(3)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_BOOL), - I_COUNT(0), - CALL_COUNT(0), - END_OF_PROGRAM(0), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_CALL), - WHOLE_QUAD_MODE(0), - BARRIER(0)); - /* 1 */ - shader[i++] = CF_DWORD0(ADDR(7)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_NOT_BOOL), - I_COUNT(0), - CALL_COUNT(0), - END_OF_PROGRAM(0), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_CALL), - WHOLE_QUAD_MODE(0), - BARRIER(0)); - /* 2 */ - shader[i++] = CF_DWORD0(ADDR(0)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), - I_COUNT(0), - CALL_COUNT(0), - END_OF_PROGRAM(1), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_NOP), - WHOLE_QUAD_MODE(0), - BARRIER(1)); - - /* 3 - mask sub */ - shader[i++] = CF_DWORD0(ADDR(14)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), - I_COUNT(2), - CALL_COUNT(0), - END_OF_PROGRAM(0), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_TEX), - WHOLE_QUAD_MODE(0), - BARRIER(1)); - - /* 4 */ - shader[i++] = CF_ALU_DWORD0(ADDR(10), - KCACHE_BANK0(0), - KCACHE_BANK1(0), - KCACHE_MODE0(SQ_CF_KCACHE_NOP)); - shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), - KCACHE_ADDR0(0), - KCACHE_ADDR1(0), - I_COUNT(4), - USES_WATERFALL(0), - CF_INST(SQ_CF_INST_ALU), - WHOLE_QUAD_MODE(0), - BARRIER(1)); - - /* 5 */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), - TYPE(SQ_EXPORT_PIXEL), - RW_GPR(2), - RW_REL(ABSOLUTE), - INDEX_GPR(0), - ELEM_SIZE(1)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), - R6xx_ELEM_LOOP(0), - BURST_COUNT(1), - END_OF_PROGRAM(0), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_EXPORT_DONE), - WHOLE_QUAD_MODE(0), - BARRIER(1)); - /* 6 */ - shader[i++] = CF_DWORD0(ADDR(0)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), - I_COUNT(0), - CALL_COUNT(0), - END_OF_PROGRAM(0), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_RETURN), - WHOLE_QUAD_MODE(0), - BARRIER(1)); - - /* 7 non-mask sub */ - shader[i++] = CF_DWORD0(ADDR(18)); + shader[i++] = CF_DWORD0(ADDR(2)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -2452,204 +1794,28 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_TEX), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 8 */ + /* 1 */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), TYPE(SQ_EXPORT_PIXEL), RW_GPR(0), RW_REL(ABSOLUTE), INDEX_GPR(0), ELEM_SIZE(1)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), SRC_SEL_Y(SQ_SEL_Y), SRC_SEL_Z(SQ_SEL_Z), SRC_SEL_W(SQ_SEL_W), R6xx_ELEM_LOOP(0), BURST_COUNT(1), - END_OF_PROGRAM(0), + END_OF_PROGRAM(1), VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 9 */ - shader[i++] = CF_DWORD0(ADDR(0)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), - I_COUNT(0), - CALL_COUNT(0), - END_OF_PROGRAM(0), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_RETURN), - WHOLE_QUAD_MODE(0), - BARRIER(1)); - /* 10 - alu 0 */ - /* MUL gpr[2].x gpr[1].x gpr[0].x */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 0), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_X), - CLAMP(1)); - /* 11 - alu 1 */ - /* MUL gpr[2].y gpr[1].y gpr[0].y */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 0), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), - CLAMP(1)); - /* 12 - alu 2 */ - /* MUL gpr[2].z gpr[1].z gpr[0].z */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Z), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 0), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Z), - CLAMP(1)); - /* 13 - alu 3 */ - /* MUL gpr[2].w gpr[1].w gpr[0].w */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), - SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 0), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_W), - CLAMP(1)); - - /* 14/15 - src - mask */ - shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), - BC_FRAC_MODE(0), - FETCH_WHOLE_QUAD(0), - RESOURCE_ID(0), - SRC_GPR(0), - SRC_REL(ABSOLUTE), - R7xx_ALT_CONST(0)); - shader[i++] = TEX_DWORD1(DST_GPR(0), - DST_REL(ABSOLUTE), - DST_SEL_X(SQ_SEL_X), - DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_Z), - DST_SEL_W(SQ_SEL_W), - LOD_BIAS(0), - COORD_TYPE_X(TEX_NORMALIZED), - COORD_TYPE_Y(TEX_NORMALIZED), - COORD_TYPE_Z(TEX_NORMALIZED), - COORD_TYPE_W(TEX_NORMALIZED)); - shader[i++] = TEX_DWORD2(OFFSET_X(0), - OFFSET_Y(0), - OFFSET_Z(0), - SAMPLER_ID(0), - SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1)); - shader[i++] = TEX_DWORD_PAD; - /* 16/17 - mask */ - shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), - BC_FRAC_MODE(0), - FETCH_WHOLE_QUAD(0), - RESOURCE_ID(1), - SRC_GPR(1), - SRC_REL(ABSOLUTE), - R7xx_ALT_CONST(0)); - shader[i++] = TEX_DWORD1(DST_GPR(1), - DST_REL(ABSOLUTE), - DST_SEL_X(SQ_SEL_X), - DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_Z), - DST_SEL_W(SQ_SEL_W), - LOD_BIAS(0), - COORD_TYPE_X(TEX_NORMALIZED), - COORD_TYPE_Y(TEX_NORMALIZED), - COORD_TYPE_Z(TEX_NORMALIZED), - COORD_TYPE_W(TEX_NORMALIZED)); - shader[i++] = TEX_DWORD2(OFFSET_X(0), - OFFSET_Y(0), - OFFSET_Z(0), - SAMPLER_ID(1), - SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1)); - shader[i++] = TEX_DWORD_PAD; - /* 18/19 - src - non-mask */ + /* 2/3 - src */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), |