diff options
author | Alex Deucher <alexdeucher@gmail.com> | 2010-11-29 17:23:30 -0500 |
---|---|---|
committer | Alex Deucher <alexdeucher@gmail.com> | 2010-11-29 18:10:13 -0500 |
commit | d9bcac516f2a810acb300b29169e56a2df0b47ac (patch) | |
tree | 3752af315dcc2197ea576b8c04022ee3fc105949 /src | |
parent | 5d3f33729be0639cef17372345b2dab6127e39d9 (diff) |
r6xx/r7xx use dot4 for transforms
Diffstat (limited to 'src')
-rw-r--r-- | src/r600_exa.c | 2 | ||||
-rw-r--r-- | src/r600_shader.c | 670 |
2 files changed, 525 insertions, 147 deletions
diff --git a/src/r600_exa.c b/src/r600_exa.c index a04d66ac..f6cde1db 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -1358,7 +1358,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, /* Shader */ vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.shader_size = accel_state->vs_size; - vs_conf.num_gprs = 3; + vs_conf.num_gprs = 5; vs_conf.stack_size = 1; vs_conf.bo = accel_state->shaders_bo; r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM); diff --git a/src/r600_shader.c b/src/r600_shader.c index e2a41637..b42690c9 100644 --- a/src/r600_shader.c +++ b/src/r600_shader.c @@ -1191,7 +1191,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(1)); /* 3 - mask sub */ - shader[i++] = CF_DWORD0(ADDR(32)); + shader[i++] = CF_DWORD0(ADDR(44)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -1211,7 +1211,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), KCACHE_ADDR0(0), KCACHE_ADDR1(0), - I_COUNT(12), + I_COUNT(20), USES_WATERFALL(0), CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), @@ -1284,7 +1284,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(1)); /* 9 - non-mask sub */ - shader[i++] = CF_DWORD0(ADDR(38)); + shader[i++] = CF_DWORD0(ADDR(50)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -1297,14 +1297,14 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) BARRIER(1)); /* 10 - ALU */ - shader[i++] = CF_ALU_DWORD0(ADDR(26), + shader[i++] = CF_ALU_DWORD0(ADDR(34), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_NOP)); shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), KCACHE_ADDR0(0), KCACHE_ADDR1(0), - I_COUNT(6), + I_COUNT(10), USES_WATERFALL(0), CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), @@ -1360,189 +1360,440 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) BARRIER(1)); - /* 14 srcX MAD - mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(256), + /* 14 srcX.x DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(256), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + + /* 15 srcX.y DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(1), + SRC1_SEL(256), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Y), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Z), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(1), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 16 srcX.z DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(256), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(3), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(0)); - /* 15 srcY MAD */ - shader[i++] = ALU_DWORD0(SRC0_SEL(257), + + /* 17 srcX.w DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), + SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(1), + SRC1_SEL(256), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), + SRC1_ELEM(ELEM_W), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Z), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(1), + DST_GPR(3), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(0)); - /* 16 srcX MAD */ - shader[i++] = ALU_DWORD0(SRC0_SEL(256), + /* 18 srcY.x DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(1), + SRC1_SEL(257), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Z), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(1), + DST_GPR(3), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(0)); - /* 17 srcY MAD */ - shader[i++] = ALU_DWORD0(SRC0_SEL(257), + + /* 19 srcY.y DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(257), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 20 srcY.z DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(257), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + + /* 21 srcY.w DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(257), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(3), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 22 maskX.x DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(1), + SRC1_SEL(258), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_W), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(1), + DST_GPR(4), DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), + DST_ELEM(ELEM_X), CLAMP(0)); - /* 18 maskX MAD */ - shader[i++] = ALU_DWORD0(SRC0_SEL(258), + /* 23 maskX.y DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(0), + SRC1_SEL(258), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Y), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(258), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Z), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(0), + DST_GPR(4), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 24 maskX.z DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(258), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(4), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(0)); - /* 19 maskY MAD */ - shader[i++] = ALU_DWORD0(SRC0_SEL(259), + /* 25 maskX.w DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), + SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(0), + SRC1_SEL(258), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), + SRC1_ELEM(ELEM_W), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Z), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(0), + DST_GPR(4), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(0)); - /* 20 srcX MAD */ - shader[i++] = ALU_DWORD0(SRC0_SEL(258), + /* 26 maskY.x DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(0), + SRC1_SEL(259), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Z), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(0), + DST_GPR(4), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(0)); - /* 21 srcY MAD */ - shader[i++] = ALU_DWORD0(SRC0_SEL(259), + + /* 27 maskY.y DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), + SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(0), + SRC1_SEL(259), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_Y), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_W), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(0), + DST_GPR(4), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(0)); - /* 22 srcX / w */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + /* 28 maskY.z DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(259), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(4), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + + /* 29 maskY.w DOT4 - mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(259), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(4), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 30 srcX / w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(3), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), @@ -1568,8 +1819,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_ELEM(ELEM_X), CLAMP(0)); - /* 23 srcY / h */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + /* 31 srcY / h */ + shader[i++] = ALU_DWORD0(SRC0_SEL(3), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), @@ -1595,8 +1846,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_ELEM(ELEM_Y), CLAMP(0)); - /* 24 maskX / w */ - shader[i++] = ALU_DWORD0(SRC0_SEL(0), + /* 32 maskX / w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(4), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), @@ -1622,8 +1873,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_ELEM(ELEM_X), CLAMP(0)); - /* 25 maskY / h */ - shader[i++] = ALU_DWORD0(SRC0_SEL(0), + /* 33 maskY / h */ + shader[i++] = ALU_DWORD0(SRC0_SEL(4), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), @@ -1649,98 +1900,225 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_ELEM(ELEM_Y), CLAMP(0)); - /* 26 srcX MAD - non-mask */ - shader[i++] = ALU_DWORD0(SRC0_SEL(256), + /* 34 srcX.x DOT4 - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(256), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + + /* 35 srcX.y DOT4 - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(0), + SRC1_SEL(256), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Y), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Z), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(0), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + + /* 36 srcX.z DOT4 - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(256), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(0)); - /* 27 srcY MAD */ - shader[i++] = ALU_DWORD0(SRC0_SEL(257), + + /* 37 srcX.w DOT4 - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), + SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(0), + SRC1_SEL(256), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), + SRC1_ELEM(ELEM_W), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Z), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(0), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(0)); - /* 28 srcX MAD */ - shader[i++] = ALU_DWORD0(SRC0_SEL(256), + /* 38 srcY.x DOT4 - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(0), + SRC1_SEL(257), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Z), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(0), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(0)); - /* 29 srcY MAD */ - shader[i++] = ALU_DWORD0(SRC0_SEL(257), + + /* 39 srcY.y DOT4 - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), + SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(0), + SRC1_SEL(257), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_Y), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0), - SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_W), - SRC2_NEG(0), - ALU_INST(SQ_OP3_INST_MULADD), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(0), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(0)); - /* 30 srcX / w */ + + /* 40 srcY.z DOT4 - non-mask */ shader[i++] = ALU_DWORD0(SRC0_SEL(0), SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(257), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + + /* 41 srcY.w DOT4 - non-mask */ + shader[i++] = ALU_DWORD0(SRC0_SEL(0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(257), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(0), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + + /* 42 srcX / w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(2), + SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), SRC1_SEL(256), @@ -1765,8 +2143,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_ELEM(ELEM_X), CLAMP(0)); - /* 31 srcY / h */ - shader[i++] = ALU_DWORD0(SRC0_SEL(0), + /* 43 srcY / h */ + shader[i++] = ALU_DWORD0(SRC0_SEL(2), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), @@ -1792,7 +2170,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) DST_ELEM(ELEM_Y), CLAMP(0)); - /* 32/33 - dst - mask */ + /* 44/45 - dst - mask */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -1817,7 +2195,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; - /* 34/35 - src */ + /* 46/47 - src */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -1842,7 +2220,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; - /* 36/37 - mask */ + /* 48/49 - mask */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -1868,7 +2246,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; - /* 38/39 - dst - non-mask */ + /* 50/51 - dst - non-mask */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -1893,7 +2271,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; - /* 40/41 - src */ + /* 52/53 - src */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), |