summaryrefslogtreecommitdiff
path: root/driver/xf86-video-ati/src/r600_shader.c
diff options
context:
space:
mode:
authorMatthieu Herrb <matthieu@cvs.openbsd.org>2012-02-06 22:53:17 +0000
committerMatthieu Herrb <matthieu@cvs.openbsd.org>2012-02-06 22:53:17 +0000
commiteb2ee4ddbc560aaa5b74172c2c72851785efb40a (patch)
treea01988fa211bdbff64da3bea425a7f69f725987b /driver/xf86-video-ati/src/r600_shader.c
parent36f7476dfa8dae71728fafaf1b27e51683bb0e69 (diff)
Revert the update to xf86-video-ati 6.14.3. Requested by espie@
who experiemnts regressions with this driver.
Diffstat (limited to 'driver/xf86-video-ati/src/r600_shader.c')
-rw-r--r--driver/xf86-video-ati/src/r600_shader.c2166
1 files changed, 666 insertions, 1500 deletions
diff --git a/driver/xf86-video-ati/src/r600_shader.c b/driver/xf86-video-ati/src/r600_shader.c
index ab2f4850f..addba36f3 100644
--- a/driver/xf86-video-ati/src/r600_shader.c
+++ b/driver/xf86-video-ati/src/r600_shader.c
@@ -106,16 +106,12 @@ int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_SEL_Z(SQ_SEL_0),
DST_SEL_W(SQ_SEL_1),
USE_CONST_FIELDS(0),
- DATA_FORMAT(FMT_32_32_FLOAT),
- NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
- FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(0),
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- ENDIAN_SWAP(SQ_ENDIAN_8IN32),
-#else
- ENDIAN_SWAP(SQ_ENDIAN_NONE),
-#endif
+ ENDIAN_SWAP(ENDIAN_NONE),
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1));
shader[i++] = VTX_DWORD_PAD;
@@ -161,11 +157,11 @@ int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
BARRIER(1));
/* 2 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -187,11 +183,11 @@ int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(1));
/* 3 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -213,11 +209,11 @@ int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(1));
/* 4 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
@@ -239,11 +235,11 @@ int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Z),
CLAMP(1));
/* 5 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -340,16 +336,12 @@ int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_SEL_Z(SQ_SEL_0),
DST_SEL_W(SQ_SEL_1),
USE_CONST_FIELDS(0),
- DATA_FORMAT(FMT_32_32_FLOAT),
- NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
- FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(0),
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- ENDIAN_SWAP(SQ_ENDIAN_8IN32),
-#else
- ENDIAN_SWAP(SQ_ENDIAN_NONE),
-#endif
+ ENDIAN_SWAP(ENDIAN_NONE),
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1));
shader[i++] = VTX_DWORD_PAD;
@@ -369,16 +361,12 @@ int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_SEL_Z(SQ_SEL_0),
DST_SEL_W(SQ_SEL_1),
USE_CONST_FIELDS(0),
- DATA_FORMAT(FMT_32_32_FLOAT),
- NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
- FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(8),
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- ENDIAN_SWAP(SQ_ENDIAN_8IN32),
-#else
- ENDIAN_SWAP(SQ_ENDIAN_NONE),
-#endif
+ ENDIAN_SWAP(ENDIAN_NONE),
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(0));
shader[i++] = VTX_DWORD_PAD;
@@ -469,7 +457,7 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
int i = 0;
/* 0 */
- shader[i++] = CF_DWORD0(ADDR(6));
+ shader[i++] = CF_DWORD0(ADDR(4));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -480,22 +468,7 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_VTX),
WHOLE_QUAD_MODE(0),
BARRIER(1));
-
- /* 1 - ALU */
- shader[i++] = CF_ALU_DWORD0(ADDR(4),
- KCACHE_BANK0(0),
- KCACHE_BANK1(0),
- KCACHE_MODE0(SQ_CF_KCACHE_NOP));
- shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
- KCACHE_ADDR0(0),
- KCACHE_ADDR1(0),
- I_COUNT(2),
- USES_WATERFALL(0),
- CF_INST(SQ_CF_INST_ALU),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
-
- /* 2 */
+ /* 1 */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
TYPE(SQ_EXPORT_POS),
RW_GPR(1),
@@ -513,7 +486,7 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 3 */
+ /* 2 */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
TYPE(SQ_EXPORT_PARAM),
RW_GPR(0),
@@ -531,63 +504,9 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(0));
-
-
- /* 4 texX / w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(0));
-
- /* 5 texY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(0));
-
- /* 6/7 */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+ /* 4/5 */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -604,19 +523,15 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_SEL_W(SQ_SEL_1),
USE_CONST_FIELDS(0),
DATA_FORMAT(FMT_32_32_FLOAT),
- NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(0),
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- ENDIAN_SWAP(SQ_ENDIAN_8IN32),
-#else
- ENDIAN_SWAP(SQ_ENDIAN_NONE),
-#endif
+ ENDIAN_SWAP(ENDIAN_NONE),
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1));
shader[i++] = VTX_DWORD_PAD;
- /* 8/9 */
+ /* 6/7 */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -633,15 +548,11 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_SEL_W(SQ_SEL_1),
USE_CONST_FIELDS(0),
DATA_FORMAT(FMT_32_32_FLOAT),
- NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(8),
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- ENDIAN_SWAP(SQ_ENDIAN_8IN32),
-#else
- ENDIAN_SWAP(SQ_ENDIAN_NONE),
-#endif
+ ENDIAN_SWAP(ENDIAN_NONE),
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(0));
shader[i++] = VTX_DWORD_PAD;
@@ -649,12 +560,41 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
return i;
}
+/*
+ * ; xv ps planar
+ * 00 TEX: ADDR(20) CNT(3) NO_BARRIER
+ * 0 SAMPLE R1.x__1, R0.xy01, t0, s0
+ * 1 SAMPLE R1.__x_, R0.xy01, t1, s1
+ * 2 SAMPLE R1._x__, R0.xy01, t2, s2
+ * 01 TEX: ADDR(28) CNT(2) NO_BARRIER
+ * 0 SAMPLE R1.x__1, R0.xy01, t0, s0
+ * 1 SAMPLE R1._xy_, R0.xy01, t1, s1
+ * 02 ALU: ADDR(4) CNT(16)
+ * 3 x: MULADD R1.x, R1.x, C3.x, C3.y CLAMP
+ * y: MULADD R1.y, R1.y, C3.z, C3.w
+ * z: MULADD R1.z, R1.z, C3.z, C3.w
+ * w: MOV R1.w, 0.0f
+ * 4 x: DOT4 R2.x, R1.x, C0.x CLAMP VEC_102
+ * y: DOT4 ____, R1.y, C0.y CLAMP VEC_102
+ * z: DOT4 ____, R1.z, C0.z CLAMP VEC_102
+ * w: DOT4 ____, R1.w, C0.w CLAMP VEC_021
+ * 5 x: DOT4 ____, R1.x, C1.x CLAMP VEC_102
+ * y: DOT4 R2.y, R1.y, C1.y CLAMP VEC_102
+ * z: DOT4 ____, R1.z, C1.z CLAMP VEC_102
+ * w: DOT4 ____, R1.w, C1.w CLAMP VEC_021
+ * 6 x: DOT4 ____, R1.x, C2.x CLAMP VEC_102
+ * y: DOT4 ____, R1.y, C2.y CLAMP VEC_102
+ * z: DOT4 R2.z, R1.z, C2.z CLAMP VEC_102
+ * w: DOT4 ____, R1.w, C2.w CLAMP VEC_021
+ * 03 EXP_DONE: PIX0, R2
+ * END_OF_PROGRAM
+ */
int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
{
int i = 0;
/* 0 */
- shader[i++] = CF_DWORD0(ADDR(16));
+ shader[i++] = CF_DWORD0(ADDR(20));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_BOOL),
@@ -666,7 +606,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(0));
/* 1 */
- shader[i++] = CF_DWORD0(ADDR(24));
+ shader[i++] = CF_DWORD0(ADDR(28));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_NOT_BOOL),
@@ -685,7 +625,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
- I_COUNT(12),
+ I_COUNT(16),
USES_WATERFALL(0),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
@@ -708,74 +648,73 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 4,5,6,7 */
- /* r2.x = MAD(c0.w, r1.x, c0.x) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+ /* 4 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
+ SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(259),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_X),
+ SRC2_ELEM(ELEM_Y),
SRC2_NEG(0),
ALU_INST(SQ_OP3_INST_MULADD),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
- CLAMP(0));
- /* r2.y = MAD(c0.w, r1.x, c0.y) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+ CLAMP(1));
+ /* 5 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
+ SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(259),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Y),
+ SRC2_ELEM(ELEM_W),
SRC2_NEG(0),
ALU_INST(SQ_OP3_INST_MULADD),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* r2.z = MAD(c0.w, r1.x, c0.z) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0),
+ /* 6 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
+ SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(259),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 0),
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
+ SRC2_ELEM(ELEM_W),
SRC2_NEG(0),
ALU_INST(SQ_OP3_INST_MULADD),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* r2.w = MAD(0, 0, 1) */
+ /* 7 */
shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -787,198 +726,334 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_X),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
-
- /* 8,9,10,11 */
- /* r2.x = MAD(c1.x, r1.y, pv.x) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
+ /* 8 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(256),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_X),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
- CLAMP(0));
- /* r2.y = MAD(c1.y, r1.y, pv.y) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
+ CLAMP(1));
+ /* 9 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(256),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Y),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
- CLAMP(0));
- /* r2.z = MAD(c1.z, r1.y, pv.z) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1),
+ CLAMP(1));
+ /* 10 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(256),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
- CLAMP(0));
- /* r2.w = MAD(0, 0, 1) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ CLAMP(1));
+ /* 11 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_SEL(256),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_W),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_021),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
- CLAMP(0));
- /* 12,13,14,15 */
- /* r2.x = MAD(c2.x, r1.z, pv.x) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
+ CLAMP(1));
+ /* 12 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(257),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_X),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(1));
- /* r2.y = MAD(c2.y, r1.z, pv.y) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
+ /* 13 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(257),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
+ SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Y),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(1));
- /* r2.z = MAD(c2.z, r1.z, pv.z) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2),
+ /* 14 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(257),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(1));
- /* r2.w = MAD(0, 0, 1) */
- shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+ /* 15 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_021),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+ /* 16 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(SQ_ALU_SRC_0),
+ SRC1_SEL(258),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_X),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ /* 17 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ /* 18 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
DST_GPR(2),
DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ /* 19 */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_021),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(1));
-
- /* 16 */
- shader[i++] = CF_DWORD0(ADDR(18));
+ /* 20 */
+ shader[i++] = CF_DWORD0(ADDR(22));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -989,7 +1064,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_TEX),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 17 */
+ /* 21 */
shader[i++] = CF_DWORD0(ADDR(0));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -1001,7 +1076,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 18/19 */
+ /* 22/23 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -1029,7 +1104,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 20/21 */
+ /* 24/25 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -1057,7 +1132,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 22/23 */
+ /* 26/27 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -1085,8 +1160,8 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 24 */
- shader[i++] = CF_DWORD0(ADDR(26));
+ /* 28 */
+ shader[i++] = CF_DWORD0(ADDR(30));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -1097,7 +1172,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_TEX),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 25 */
+ /* 29 */
shader[i++] = CF_DWORD0(ADDR(0));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -1109,7 +1184,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 26/27 */
+ /* 30/31 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -1137,7 +1212,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 28/29 */
+ /* 32/33 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -1169,6 +1244,230 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
return i;
}
+/* comp mask ps --------------------------------------- */
+int R600_comp_mask_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+ int i = 0;
+
+ /* 0 */
+ shader[i++] = CF_DWORD0(ADDR(8));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TEX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 1 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(3),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 2 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 3 - alu 0 */
+ /* MUL gpr[2].x gpr[1].x gpr[0].x */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ /* 4 - alu 1 */
+ /* MUL gpr[2].y gpr[1].y gpr[0].y */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ /* 5 - alu 2 */
+ /* MUL gpr[2].z gpr[1].z gpr[0].z */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ /* 6 - alu 3 */
+ /* MUL gpr[2].w gpr[1].w gpr[0].w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+ /* 7 */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+
+ /* 8/9 - src */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ shader[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 10/11 - mask */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(1),
+ SRC_GPR(1),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(1),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ return i;
+}
+
/* comp vs --------------------------------------- */
int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
{
@@ -1187,7 +1486,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(0));
/* 1 */
- shader[i++] = CF_DWORD0(ADDR(9));
+ shader[i++] = CF_DWORD0(ADDR(14));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_NOT_BOOL),
@@ -1199,7 +1498,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(0));
/* 2 */
- shader[i++] = CF_DWORD0(ADDR(0));
+ shader[i++] = CF_DWORD0(0);
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -1211,7 +1510,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(1));
/* 3 - mask sub */
- shader[i++] = CF_DWORD0(ADDR(44));
+ shader[i++] = CF_DWORD0(ADDR(8));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -1222,22 +1521,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_VTX),
WHOLE_QUAD_MODE(0),
BARRIER(1));
-
- /* 4 - ALU */
- shader[i++] = CF_ALU_DWORD0(ADDR(14),
- KCACHE_BANK0(0),
- KCACHE_BANK1(0),
- KCACHE_MODE0(SQ_CF_KCACHE_NOP));
- shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
- KCACHE_ADDR0(0),
- KCACHE_ADDR1(0),
- I_COUNT(20),
- USES_WATERFALL(0),
- CF_INST(SQ_CF_INST_ALU),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
-
- /* 5 - dst */
+ /* 4 - dst */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
TYPE(SQ_EXPORT_POS),
RW_GPR(2),
@@ -1246,8 +1530,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ELEM_SIZE(0));
shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
R6xx_ELEM_LOOP(0),
BURST_COUNT(1),
END_OF_PROGRAM(0),
@@ -1255,7 +1539,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 6 - src */
+ /* 5 - src */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
TYPE(SQ_EXPORT_PARAM),
RW_GPR(1),
@@ -1264,8 +1548,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ELEM_SIZE(0));
shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
R6xx_ELEM_LOOP(0),
BURST_COUNT(1),
END_OF_PROGRAM(0),
@@ -1273,7 +1557,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT),
WHOLE_QUAD_MODE(0),
BARRIER(0));
- /* 7 - mask */
+ /* 6 - mask */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
TYPE(SQ_EXPORT_PARAM),
RW_GPR(0),
@@ -1282,8 +1566,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ELEM_SIZE(0));
shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
R6xx_ELEM_LOOP(0),
BURST_COUNT(1),
END_OF_PROGRAM(0),
@@ -1291,82 +1575,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(0));
- /* 8 */
- shader[i++] = CF_DWORD0(ADDR(0));
- shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
- COND(SQ_CF_COND_ACTIVE),
- I_COUNT(0),
- CALL_COUNT(0),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_RETURN),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
- /* 9 - non-mask sub */
- shader[i++] = CF_DWORD0(ADDR(50));
- shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
- COND(SQ_CF_COND_ACTIVE),
- I_COUNT(2),
- CALL_COUNT(0),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_VTX),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
-
- /* 10 - ALU */
- shader[i++] = CF_ALU_DWORD0(ADDR(34),
- KCACHE_BANK0(0),
- KCACHE_BANK1(0),
- KCACHE_MODE0(SQ_CF_KCACHE_NOP));
- shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
- KCACHE_ADDR0(0),
- KCACHE_ADDR1(0),
- I_COUNT(10),
- USES_WATERFALL(0),
- CF_INST(SQ_CF_INST_ALU),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
-
- /* 11 - dst */
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
- TYPE(SQ_EXPORT_POS),
- RW_GPR(1),
- RW_REL(ABSOLUTE),
- INDEX_GPR(0),
- ELEM_SIZE(0));
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1),
- R6xx_ELEM_LOOP(0),
- BURST_COUNT(0),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
- /* 12 - src */
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
- TYPE(SQ_EXPORT_PARAM),
- RW_GPR(0),
- RW_REL(ABSOLUTE),
- INDEX_GPR(0),
- ELEM_SIZE(0));
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1),
- R6xx_ELEM_LOOP(0),
- BURST_COUNT(0),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
- WHOLE_QUAD_MODE(0),
- BARRIER(0));
- /* 13 */
+ /* 7 */
shader[i++] = CF_DWORD0(ADDR(0));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -1378,819 +1587,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
BARRIER(1));
-
-
- /* 14 srcX.x DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(3),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(0));
-
- /* 15 srcX.y DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(3),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(0));
-
- /* 16 srcX.z DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Z),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(3),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Z),
- CLAMP(0));
-
- /* 17 srcX.w DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(3),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_W),
- CLAMP(0));
-
- /* 18 srcY.x DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(3),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(0));
-
- /* 19 srcY.y DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(3),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(0));
-
- /* 20 srcY.z DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Z),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(3),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Z),
- CLAMP(0));
-
- /* 21 srcY.w DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(3),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_W),
- CLAMP(0));
-
- /* 22 maskX.x DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(4),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(0));
-
- /* 23 maskX.y DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(4),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(0));
-
- /* 24 maskX.z DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Z),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(4),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Z),
- CLAMP(0));
-
- /* 25 maskX.w DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(4),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_W),
- CLAMP(0));
-
- /* 26 maskY.x DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(4),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(0));
-
- /* 27 maskY.y DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(4),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(0));
-
- /* 28 maskY.z DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Z),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(4),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Z),
- CLAMP(0));
-
- /* 29 maskY.w DOT4 - mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(4),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_W),
- CLAMP(0));
-
- /* 30 srcX / w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(0));
-
- /* 31 srcY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(0));
-
- /* 32 maskX / w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 2),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(0));
-
- /* 33 maskY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 3),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(0));
-
- /* 34 srcX.x DOT4 - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(0));
-
- /* 35 srcX.y DOT4 - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(0));
-
- /* 36 srcX.z DOT4 - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Z),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Z),
- CLAMP(0));
-
- /* 37 srcX.w DOT4 - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_W),
- CLAMP(0));
-
- /* 38 srcY.x DOT4 - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(0));
-
- /* 39 srcY.y DOT4 - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(0));
-
- /* 40 srcY.z DOT4 - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Z),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Z),
- CLAMP(0));
-
- /* 41 srcY.w DOT4 - non-mask */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_W),
- CLAMP(0));
-
- /* 42 srcX / w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(0));
-
- /* 43 srcY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_CFILE_BASE + 1),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(0));
-
- /* 44/45 - dst - mask */
+ /* 8/9 - dst */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -2206,20 +1603,16 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_SEL_Z(SQ_SEL_0),
DST_SEL_W(SQ_SEL_1),
USE_CONST_FIELDS(0),
- DATA_FORMAT(FMT_32_32_FLOAT),
- NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
- FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(0),
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- ENDIAN_SWAP(SQ_ENDIAN_8IN32),
-#else
- ENDIAN_SWAP(SQ_ENDIAN_NONE),
-#endif
+ ENDIAN_SWAP(ENDIAN_NONE),
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1));
shader[i++] = VTX_DWORD_PAD;
- /* 46/47 - src */
+ /* 10/11 - src */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -2232,23 +1625,19 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(0),
DST_SEL_X(SQ_SEL_X),
DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_1),
- DST_SEL_W(SQ_SEL_0),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
USE_CONST_FIELDS(0),
- DATA_FORMAT(FMT_32_32_FLOAT),
- NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
- FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(8),
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- ENDIAN_SWAP(SQ_ENDIAN_8IN32),
-#else
- ENDIAN_SWAP(SQ_ENDIAN_NONE),
-#endif
+ ENDIAN_SWAP(ENDIAN_NONE),
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(0));
shader[i++] = VTX_DWORD_PAD;
- /* 48/49 - mask */
+ /* 12/13 - mask */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -2261,24 +1650,80 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(0),
DST_SEL_X(SQ_SEL_X),
DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_1),
- DST_SEL_W(SQ_SEL_0),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
USE_CONST_FIELDS(0),
- DATA_FORMAT(FMT_32_32_FLOAT),
- NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
- FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(16),
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- ENDIAN_SWAP(SQ_ENDIAN_8IN32),
-#else
- ENDIAN_SWAP(SQ_ENDIAN_NONE),
-#endif
+ ENDIAN_SWAP(ENDIAN_NONE),
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(0));
shader[i++] = VTX_DWORD_PAD;
- /* 50/51 - dst - non-mask */
+ /* 14 - non-mask sub */
+ shader[i++] = CF_DWORD0(ADDR(18));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_VTX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 15 - dst */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 16 - src */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 17 */
+ shader[i++] = CF_DWORD0(ADDR(0));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 18/19 - dst */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -2294,20 +1739,16 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_SEL_Z(SQ_SEL_0),
DST_SEL_W(SQ_SEL_1),
USE_CONST_FIELDS(0),
- DATA_FORMAT(FMT_32_32_FLOAT),
- NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
- FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(0),
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- ENDIAN_SWAP(SQ_ENDIAN_8IN32),
-#else
- ENDIAN_SWAP(SQ_ENDIAN_NONE),
-#endif
+ ENDIAN_SWAP(ENDIAN_NONE),
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1));
shader[i++] = VTX_DWORD_PAD;
- /* 52/53 - src */
+ /* 20/21 - src */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -2320,19 +1761,15 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(0),
DST_SEL_X(SQ_SEL_X),
DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_1),
- DST_SEL_W(SQ_SEL_0),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
USE_CONST_FIELDS(0),
- DATA_FORMAT(FMT_32_32_FLOAT),
- NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
- FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
shader[i++] = VTX_DWORD2(OFFSET(8),
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- ENDIAN_SWAP(SQ_ENDIAN_8IN32),
-#else
- ENDIAN_SWAP(SQ_ENDIAN_NONE),
-#endif
+ ENDIAN_SWAP(ENDIAN_NONE),
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(0));
shader[i++] = VTX_DWORD_PAD;
@@ -2346,102 +1783,7 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
int i = 0;
/* 0 */
- shader[i++] = CF_DWORD0(ADDR(3));
- shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
- COND(SQ_CF_COND_BOOL),
- I_COUNT(0),
- CALL_COUNT(0),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_CALL),
- WHOLE_QUAD_MODE(0),
- BARRIER(0));
- /* 1 */
- shader[i++] = CF_DWORD0(ADDR(7));
- shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
- COND(SQ_CF_COND_NOT_BOOL),
- I_COUNT(0),
- CALL_COUNT(0),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_CALL),
- WHOLE_QUAD_MODE(0),
- BARRIER(0));
- /* 2 */
- shader[i++] = CF_DWORD0(ADDR(0));
- shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
- COND(SQ_CF_COND_ACTIVE),
- I_COUNT(0),
- CALL_COUNT(0),
- END_OF_PROGRAM(1),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_NOP),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
-
- /* 3 - mask sub */
- shader[i++] = CF_DWORD0(ADDR(14));
- shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
- COND(SQ_CF_COND_ACTIVE),
- I_COUNT(2),
- CALL_COUNT(0),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_TEX),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
-
- /* 4 */
- shader[i++] = CF_ALU_DWORD0(ADDR(10),
- KCACHE_BANK0(0),
- KCACHE_BANK1(0),
- KCACHE_MODE0(SQ_CF_KCACHE_NOP));
- shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
- KCACHE_ADDR0(0),
- KCACHE_ADDR1(0),
- I_COUNT(4),
- USES_WATERFALL(0),
- CF_INST(SQ_CF_INST_ALU),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
-
- /* 5 */
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
- TYPE(SQ_EXPORT_PIXEL),
- RW_GPR(2),
- RW_REL(ABSOLUTE),
- INDEX_GPR(0),
- ELEM_SIZE(1));
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
- R6xx_ELEM_LOOP(0),
- BURST_COUNT(1),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
- /* 6 */
- shader[i++] = CF_DWORD0(ADDR(0));
- shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
- COND(SQ_CF_COND_ACTIVE),
- I_COUNT(0),
- CALL_COUNT(0),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_RETURN),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
-
- /* 7 non-mask sub */
- shader[i++] = CF_DWORD0(ADDR(18));
+ shader[i++] = CF_DWORD0(ADDR(2));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -2452,204 +1794,28 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_TEX),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 8 */
+ /* 1 */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
TYPE(SQ_EXPORT_PIXEL),
RW_GPR(0),
RW_REL(ABSOLUTE),
INDEX_GPR(0),
ELEM_SIZE(1));
+
shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
SRC_SEL_Z(SQ_SEL_Z),
SRC_SEL_W(SQ_SEL_W),
R6xx_ELEM_LOOP(0),
BURST_COUNT(1),
- END_OF_PROGRAM(0),
+ END_OF_PROGRAM(1),
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 9 */
- shader[i++] = CF_DWORD0(ADDR(0));
- shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
- COND(SQ_CF_COND_ACTIVE),
- I_COUNT(0),
- CALL_COUNT(0),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_RETURN),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
- /* 10 - alu 0 */
- /* MUL gpr[2].x gpr[1].x gpr[0].x */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(1));
- /* 11 - alu 1 */
- /* MUL gpr[2].y gpr[1].y gpr[0].y */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(1));
- /* 12 - alu 2 */
- /* MUL gpr[2].z gpr[1].z gpr[0].z */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Z),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Z),
- CLAMP(1));
- /* 13 - alu 3 */
- /* MUL gpr[2].w gpr[1].w gpr[0].w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
- SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_W),
- CLAMP(1));
-
- /* 14/15 - src - mask */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- BC_FRAC_MODE(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(0),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- R7xx_ALT_CONST(0));
- shader[i++] = TEX_DWORD1(DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_Z),
- DST_SEL_W(SQ_SEL_W),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
- SAMPLER_ID(0),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
- /* 16/17 - mask */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- BC_FRAC_MODE(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(1),
- SRC_GPR(1),
- SRC_REL(ABSOLUTE),
- R7xx_ALT_CONST(0));
- shader[i++] = TEX_DWORD1(DST_GPR(1),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_Z),
- DST_SEL_W(SQ_SEL_W),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
- SAMPLER_ID(1),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
- /* 18/19 - src - non-mask */
+ /* 2/3 - src */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),