summaryrefslogtreecommitdiff
path: root/src/cayman_shader.c
diff options
context:
space:
mode:
authorGrigori Goronzy <greg@chown.ath.cx>2013-07-18 16:06:23 +0200
committerGrigori Goronzy <greg@chown.ath.cx>2013-07-22 05:05:48 +0200
commit94d0d14914a025525a0766669b556eaa6681def7 (patch)
tree0c945be3a24f486974500aff80fc9626100b8385 /src/cayman_shader.c
parent5bb04351c43a91a1d60348b7293544da05d75e72 (diff)
EXA/evergreen/ni: fast solid pixmap support
Solid pixmaps are currently implemented with scratch pixmaps, which is slow. This replaces the hack with a proper implementation. The Composite shader can now either sample a src/mask or use a constant value.
Diffstat (limited to 'src/cayman_shader.c')
-rw-r--r--src/cayman_shader.c590
1 files changed, 386 insertions, 204 deletions
diff --git a/src/cayman_shader.c b/src/cayman_shader.c
index 2a6d6b1b..59f41776 100644
--- a/src/cayman_shader.c
+++ b/src/cayman_shader.c
@@ -2495,17 +2495,44 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
int i = 0;
/* 0 */
- shader[i++] = CF_DWORD0(ADDR(3),
+ /* call interp-fetch-mask if boolean1 == true */
+ shader[i++] = CF_DWORD0(ADDR(12),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
+ CF_CONST(1),
COND(SQ_CF_COND_BOOL),
I_COUNT(0),
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_CALL),
BARRIER(0));
+
/* 1 */
- shader[i++] = CF_DWORD0(ADDR(8),
+ /* call read-constant-mask if boolean1 == false */
+ shader[i++] = CF_DWORD0(ADDR(15),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(1),
+ COND(SQ_CF_COND_NOT_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_CALL),
+ BARRIER(0));
+
+ /* 2 */
+ /* call interp-fetch-src if boolean0 == true */
+ shader[i++] = CF_DWORD0(ADDR(7),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_CALL),
+ BARRIER(0));
+
+ /* 3 */
+ /* call read-constant-src if boolean0 == false */
+ shader[i++] = CF_DWORD0(ADDR(10),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -2514,7 +2541,41 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_CALL),
BARRIER(0));
- /* 2 - end */
+ /* 4 */
+ /* src IN mask (GPR2 := GPR1 .* GPR0) */
+ shader[i++] = CF_ALU_DWORD0(ADDR(17),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 5 */
+ /* export pixel data */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+
+ /* 6 */
+ /* end of program */
shader[i++] = CF_DWORD0(ADDR(0),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
@@ -2524,33 +2585,53 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_END),
BARRIER(1));
- /* 3 - mask sub */
- shader[i++] = CF_ALU_DWORD0(ADDR(12),
+
+ /* subroutine interp-fetch-src */
+
+ /* 7 */
+ /* interpolate src */
+ shader[i++] = CF_ALU_DWORD0(ADDR(21),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_NOP));
shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
- I_COUNT(8),
+ I_COUNT(4),
ALT_CONST(0),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 4 */
- shader[i++] = CF_DWORD0(ADDR(28),
+ /* 8 */
+ /* texture fetch src into GPR0 */
+ shader[i++] = CF_DWORD0(ADDR(26),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
- I_COUNT(2),
+ I_COUNT(1),
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_TC),
BARRIER(1));
- /* 5 */
- shader[i++] = CF_ALU_DWORD0(ADDR(20),
+ /* 9 */
+ /* return */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ BARRIER(0));
+
+ /* subroutine read-constant-src */
+
+ /* 10 */
+ /* read constants into GPR0 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(28),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
@@ -2558,29 +2639,13 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
I_COUNT(4),
- ALT_CONST(0),
+ ALT_CONST(1),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 6 */
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
- TYPE(SQ_EXPORT_PIXEL),
- RW_GPR(2),
- RW_REL(ABSOLUTE),
- INDEX_GPR(0),
- ELEM_SIZE(1));
-
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
- BURST_COUNT(1),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
- MARK(0),
- BARRIER(1));
- /* 7 */
+ /* 11 */
+ /* return */
shader[i++] = CF_DWORD0(ADDR(0),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
@@ -2589,10 +2654,13 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
I_COUNT(0),
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_RETURN),
- BARRIER(1));
+ BARRIER(0));
- /* 8 - non-mask sub */
- shader[i++] = CF_ALU_DWORD0(ADDR(24),
+ /* subroutine interp-fetch-mask */
+
+ /* 12 */
+ /* interpolate mask */
+ shader[i++] = CF_ALU_DWORD0(ADDR(32),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_NOP));
@@ -2604,8 +2672,10 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 9 */
- shader[i++] = CF_DWORD0(ADDR(32),
+
+ /* 13 */
+ /* texture fetch mask into GPR1 */
+ shader[i++] = CF_DWORD0(ADDR(36),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -2615,24 +2685,37 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_TC),
BARRIER(1));
- /* 10 */
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
- TYPE(SQ_EXPORT_PIXEL),
- RW_GPR(0),
- RW_REL(ABSOLUTE),
- INDEX_GPR(0),
- ELEM_SIZE(1));
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
- BURST_COUNT(1),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
- MARK(0),
- BARRIER(1));
+ /* 14 */
+ /* return */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ BARRIER(0));
- /* 11 */
+ /* subroutine read-constant-src */
+
+ /* 15 */
+ /* read constants into GPR1 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(38),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(1),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 16 */
+ /* return */
shader[i++] = CF_DWORD0(ADDR(0),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
@@ -2641,18 +2724,21 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
I_COUNT(0),
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_RETURN),
- BARRIER(1));
+ BARRIER(0));
+
+ /* ALU clauses */
- /* 12 interpolate src tex coords - mask */
+ /* 17 */
+ /* MUL gpr[0].x gpr[0].x gpr[1].x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2661,22 +2747,24 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_INTERP_XY),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(1),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
- CLAMP(0));
- /* 13 */
+ CLAMP(1));
+
+ /* 18 */
+ /* MUL gpr[0].y gpr[0].y gpr[1].y */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2685,67 +2773,70 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_INTERP_XY),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(1),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
- CLAMP(0));
- /* 14 */
+ CLAMP(1));
+ /* 19 */
+ /* MUL gpr[0].z gpr[0].z gpr[1].z */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(0),
+ WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_INTERP_XY),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(1),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
- CLAMP(0));
- /* 15 */
+ CLAMP(1));
+ /* 20 */
+ /* MUL gpr[0].w gpr[0].w gpr[1].w */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(0),
+ WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_INTERP_XY),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(1),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
- CLAMP(0));
+ CLAMP(1));
- /* 16 interpolate mask tex coords */
+ /* 21 */
+ /* INTERP_XY GPR0.x, GPR0.y PARAM0.x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2764,12 +2855,13 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 17 */
+ /* 22 */
+ /* INTERP_XY GPR0.y, GPR0.x PARAM0.x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2788,12 +2880,13 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 18 */
+ /* 23 */
+ /* INTERP_XY GPR0.z, GPR0.y PARAM0.x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2812,12 +2905,14 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 19 */
+
+ /* 24 */
+ /* INTERP_XY GPR0.w, GPR0.x PARAM0.x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2837,17 +2932,53 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 20 - alu 0 */
- /* MUL gpr[2].x gpr[0].x gpr[1].x */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ /* 25 */
+ shader[i++] = 0;
+ shader[i++] = 0;
+
+ /* 26/27 */
+ /* SAMPLE RID=0 GPR0, GPR0 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ /* 28 */
+ /* MOV GPR0.x, KC4.x */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
+ INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2856,23 +2987,24 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(1));
- /* 21 - alu 1 */
- /* MUL gpr[2].y gpr[0].y gpr[1].y */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+
+ /* 29 */
+ /* MOV GPR0.y, KC4.y */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
+ INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2881,23 +3013,24 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(1));
- /* 22 - alu 2 */
- /* MUL gpr[2].z gpr[0].z gpr[1].z */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+
+ /* 30 */
+ /* MOV GPR0.z, KC4.z */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
+ INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2906,23 +3039,24 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(1));
- /* 23 - alu 3 */
- /* MUL gpr[2].w gpr[0].w gpr[1].w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+
+ /* 31 */
+ /* MOV GPR0.w, KC4.w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
+ INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2931,19 +3065,20 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(1));
- /* 24 - interpolate tex coords - non-mask */
+ /* 32 */
+ /* INTERP_XY GPR1.x, PARAM1 */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2958,16 +3093,17 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_INTERP_XY),
BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(0),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 25 */
+ /* 33 */
+ /* INTERP_XY GPR1.y, PARAM1 */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2982,16 +3118,17 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_INTERP_XY),
BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(0),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 26 */
+ /* 34 */
+ /* INTERP_XY GPR1.z, PARAM1 */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -3006,16 +3143,17 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_INTERP_XY),
BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(0),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 27 */
+ /* 35 */
+ /* INTERP_XY GPR1.w, PARAM1 */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -3030,16 +3168,17 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_INTERP_XY),
BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(0),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 28/29 - src - mask */
+ /* 36/37 */
+ /* SAMPLE RID=1 GPR1, GPR1 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
INST_MOD(0),
FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(0),
+ RESOURCE_ID(1),
SRC_GPR(1),
SRC_REL(ABSOLUTE),
ALT_CONST(0),
@@ -3059,36 +3198,6 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = TEX_DWORD2(OFFSET_X(0),
OFFSET_Y(0),
OFFSET_Z(0),
- SAMPLER_ID(0),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
- /* 30/31 - mask */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- INST_MOD(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(1),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- ALT_CONST(0),
- RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
- SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
- shader[i++] = TEX_DWORD1(DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_Z),
- DST_SEL_W(SQ_SEL_W),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
SAMPLER_ID(1),
SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
@@ -3096,36 +3205,109 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 32/33 - src - non-mask */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- INST_MOD(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(0),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- ALT_CONST(0),
- RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
- SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
- shader[i++] = TEX_DWORD1(DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_Z),
- DST_SEL_W(SQ_SEL_W),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
- SAMPLER_ID(0),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
+ /* 38 */
+ /* MOV GPR1.x, KC5.x */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+
+ /* 39 */
+ /* MOV GPR1.y, KC5.y */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+
+ /* 40 */
+ /* MOV GPR1.z, KC5.z */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+
+ /* 41 */
+ /* MOV GPR1.w, KC5.w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
return i;
}