summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex Deucher <alexdeucher@gmail.com>2010-11-29 18:09:05 -0500
committerAlex Deucher <alexdeucher@gmail.com>2010-11-29 18:10:29 -0500
commit90f831361844f1b80b3f6bb718ff5ac584d73d48 (patch)
tree8061a10e67ad4e1e0766841d28e1ef01cf7ce9b0 /src
parent3cae361d0448b6e231c80f53d64bdbbdd74dc4cf (diff)
evergreen: use dot4 for transforms
Diffstat (limited to 'src')
-rw-r--r--src/evergreen_exa.c2
-rw-r--r--src/evergreen_shader.c622
2 files changed, 477 insertions, 147 deletions
diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index 1c027525..89afaff5 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -1318,7 +1318,7 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
/* Shader */
vs_conf.shader_addr = accel_state->vs_mc_addr;
vs_conf.shader_size = accel_state->vs_size;
- vs_conf.num_gprs = 3;
+ vs_conf.num_gprs = 5;
vs_conf.stack_size = 1;
vs_conf.bo = accel_state->shaders_bo;
evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
diff --git a/src/evergreen_shader.c b/src/evergreen_shader.c
index 42cea7ab..ef56d2d4 100644
--- a/src/evergreen_shader.c
+++ b/src/evergreen_shader.c
@@ -1410,7 +1410,7 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(1));
/* 3 - mask sub */
- shader[i++] = CF_DWORD0(ADDR(32),
+ shader[i++] = CF_DWORD0(ADDR(44),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -1430,7 +1430,7 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
- I_COUNT(12),
+ I_COUNT(20),
ALT_CONST(0),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
@@ -1500,7 +1500,7 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(1));
/* 9 - non-mask sub */
- shader[i++] = CF_DWORD0(ADDR(38),
+ shader[i++] = CF_DWORD0(ADDR(50),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -1513,14 +1513,14 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
BARRIER(1));
/* 10 - ALU */
- shader[i++] = CF_ALU_DWORD0(ADDR(26),
+ shader[i++] = CF_ALU_DWORD0(ADDR(34),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
- I_COUNT(6),
+ I_COUNT(10),
ALT_CONST(0),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
@@ -1573,189 +1573,408 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* mask alu - 14 srcX MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ /* 14 srcX.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 15 srcX.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 16 srcX.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 15 srcY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+
+ /* 17 srcX.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 1),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(3),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 16 srcX MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ /* 18 srcY.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 1),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(3),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 17 srcY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+
+ /* 19 srcY.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 20 srcY.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 21 srcY.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 22 maskX.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 1),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_W),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
+ DST_GPR(4),
DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
+ DST_ELEM(ELEM_X),
CLAMP(0));
- /* 18 maskX MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ /* 23 maskX.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 2),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 24 maskX.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 19 maskY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ /* 25 maskX.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 3),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(4),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 20 srcX MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+ /* 26 maskY.x DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(4),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 21 srcY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 3),
+
+ /* 27 maskY.y DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_W),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(4),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 22 srcX / w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ /* 28 maskY.z DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 29 maskY.w DOT4 - mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(4),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 30 srcX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
@@ -1779,8 +1998,8 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 23 srcY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+ /* 31 srcY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
@@ -1804,8 +2023,8 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 24 maskX / w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ /* 32 maskX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
@@ -1829,8 +2048,8 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 25 maskY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ /* 33 maskY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
@@ -1854,98 +2073,209 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* no mask alu - 26 srcX MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ /* 34 srcX.x DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+
+ /* 35 srcX.y DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 36 srcX.z DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 27 srcY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+
+ /* 37 srcX.w DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 1),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 28 srcX MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+ /* 38 srcY.x DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Z),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 29 srcY MAD */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+
+ /* 39 srcY.y DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_W),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(0),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 30 srcX / w */
+
+ /* 40 srcY.z DOT4 - non-mask */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 41 srcY.w DOT4 - non-mask */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 42 srcX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
+ SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
@@ -1968,8 +2298,8 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 31 srcY / h */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ /* 43 srcY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
@@ -1993,7 +2323,7 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* mask vfetch - 32/33 - dst */
+ /* mask vfetch - 44/45 - dst */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -2020,7 +2350,7 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ALT_CONST(0),
BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
shader[i++] = VTX_DWORD_PAD;
- /* 34/35 - src */
+ /* 46/47 - src */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -2047,7 +2377,7 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ALT_CONST(0),
BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
shader[i++] = VTX_DWORD_PAD;
- /* 36/37 - mask */
+ /* 48/49 - mask */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -2075,7 +2405,7 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
shader[i++] = VTX_DWORD_PAD;
- /* no mask vfetch - 38/39 - dst */
+ /* no mask vfetch - 50/51 - dst */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -2102,7 +2432,7 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ALT_CONST(0),
BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
shader[i++] = VTX_DWORD_PAD;
- /* 40/41 - src */
+ /* 52/53 - src */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),