summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGrigori Goronzy <greg@chown.ath.cx>2013-07-18 16:06:23 +0200
committerGrigori Goronzy <greg@chown.ath.cx>2013-07-22 05:05:48 +0200
commit94d0d14914a025525a0766669b556eaa6681def7 (patch)
tree0c945be3a24f486974500aff80fc9626100b8385 /src
parent5bb04351c43a91a1d60348b7293544da05d75e72 (diff)
EXA/evergreen/ni: fast solid pixmap support
Solid pixmaps are currently implemented with scratch pixmaps, which is slow. This replaces the hack with a proper implementation. The Composite shader can now either sample a src/mask or use a constant value.
Diffstat (limited to 'src')
-rw-r--r--src/cayman_shader.c590
-rw-r--r--src/evergreen_exa.c258
-rw-r--r--src/evergreen_shader.c596
3 files changed, 962 insertions, 482 deletions
diff --git a/src/cayman_shader.c b/src/cayman_shader.c
index 2a6d6b1b..59f41776 100644
--- a/src/cayman_shader.c
+++ b/src/cayman_shader.c
@@ -2495,17 +2495,44 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
int i = 0;
/* 0 */
- shader[i++] = CF_DWORD0(ADDR(3),
+ /* call interp-fetch-mask if boolean1 == true */
+ shader[i++] = CF_DWORD0(ADDR(12),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
+ CF_CONST(1),
COND(SQ_CF_COND_BOOL),
I_COUNT(0),
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_CALL),
BARRIER(0));
+
/* 1 */
- shader[i++] = CF_DWORD0(ADDR(8),
+ /* call read-constant-mask if boolean1 == false */
+ shader[i++] = CF_DWORD0(ADDR(15),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(1),
+ COND(SQ_CF_COND_NOT_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_CALL),
+ BARRIER(0));
+
+ /* 2 */
+ /* call interp-fetch-src if boolean0 == true */
+ shader[i++] = CF_DWORD0(ADDR(7),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_CALL),
+ BARRIER(0));
+
+ /* 3 */
+ /* call read-constant-src if boolean0 == false */
+ shader[i++] = CF_DWORD0(ADDR(10),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -2514,7 +2541,41 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_CALL),
BARRIER(0));
- /* 2 - end */
+ /* 4 */
+ /* src IN mask (GPR2 := GPR1 .* GPR0) */
+ shader[i++] = CF_ALU_DWORD0(ADDR(17),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 5 */
+ /* export pixel data */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+
+ /* 6 */
+ /* end of program */
shader[i++] = CF_DWORD0(ADDR(0),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
@@ -2524,33 +2585,53 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_END),
BARRIER(1));
- /* 3 - mask sub */
- shader[i++] = CF_ALU_DWORD0(ADDR(12),
+
+ /* subroutine interp-fetch-src */
+
+ /* 7 */
+ /* interpolate src */
+ shader[i++] = CF_ALU_DWORD0(ADDR(21),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_NOP));
shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
- I_COUNT(8),
+ I_COUNT(4),
ALT_CONST(0),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 4 */
- shader[i++] = CF_DWORD0(ADDR(28),
+ /* 8 */
+ /* texture fetch src into GPR0 */
+ shader[i++] = CF_DWORD0(ADDR(26),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
- I_COUNT(2),
+ I_COUNT(1),
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_TC),
BARRIER(1));
- /* 5 */
- shader[i++] = CF_ALU_DWORD0(ADDR(20),
+ /* 9 */
+ /* return */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ BARRIER(0));
+
+ /* subroutine read-constant-src */
+
+ /* 10 */
+ /* read constants into GPR0 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(28),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
@@ -2558,29 +2639,13 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
I_COUNT(4),
- ALT_CONST(0),
+ ALT_CONST(1),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 6 */
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
- TYPE(SQ_EXPORT_PIXEL),
- RW_GPR(2),
- RW_REL(ABSOLUTE),
- INDEX_GPR(0),
- ELEM_SIZE(1));
-
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
- BURST_COUNT(1),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
- MARK(0),
- BARRIER(1));
- /* 7 */
+ /* 11 */
+ /* return */
shader[i++] = CF_DWORD0(ADDR(0),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
@@ -2589,10 +2654,13 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
I_COUNT(0),
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_RETURN),
- BARRIER(1));
+ BARRIER(0));
- /* 8 - non-mask sub */
- shader[i++] = CF_ALU_DWORD0(ADDR(24),
+ /* subroutine interp-fetch-mask */
+
+ /* 12 */
+ /* interpolate mask */
+ shader[i++] = CF_ALU_DWORD0(ADDR(32),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_NOP));
@@ -2604,8 +2672,10 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 9 */
- shader[i++] = CF_DWORD0(ADDR(32),
+
+ /* 13 */
+ /* texture fetch mask into GPR1 */
+ shader[i++] = CF_DWORD0(ADDR(36),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -2615,24 +2685,37 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_TC),
BARRIER(1));
- /* 10 */
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
- TYPE(SQ_EXPORT_PIXEL),
- RW_GPR(0),
- RW_REL(ABSOLUTE),
- INDEX_GPR(0),
- ELEM_SIZE(1));
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
- BURST_COUNT(1),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
- MARK(0),
- BARRIER(1));
+ /* 14 */
+ /* return */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ BARRIER(0));
- /* 11 */
+ /* subroutine read-constant-src */
+
+ /* 15 */
+ /* read constants into GPR1 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(38),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(1),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 16 */
+ /* return */
shader[i++] = CF_DWORD0(ADDR(0),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
@@ -2641,18 +2724,21 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
I_COUNT(0),
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_RETURN),
- BARRIER(1));
+ BARRIER(0));
+
+ /* ALU clauses */
- /* 12 interpolate src tex coords - mask */
+ /* 17 */
+ /* MUL gpr[0].x gpr[0].x gpr[1].x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2661,22 +2747,24 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_INTERP_XY),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(1),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
- CLAMP(0));
- /* 13 */
+ CLAMP(1));
+
+ /* 18 */
+ /* MUL gpr[0].y gpr[0].y gpr[1].y */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2685,67 +2773,70 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_INTERP_XY),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(1),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
- CLAMP(0));
- /* 14 */
+ CLAMP(1));
+ /* 19 */
+ /* MUL gpr[0].z gpr[0].z gpr[1].z */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(0),
+ WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_INTERP_XY),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(1),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
- CLAMP(0));
- /* 15 */
+ CLAMP(1));
+ /* 20 */
+ /* MUL gpr[0].w gpr[0].w gpr[1].w */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(0),
+ WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_INTERP_XY),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(1),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
- CLAMP(0));
+ CLAMP(1));
- /* 16 interpolate mask tex coords */
+ /* 21 */
+ /* INTERP_XY GPR0.x, GPR0.y PARAM0.x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2764,12 +2855,13 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 17 */
+ /* 22 */
+ /* INTERP_XY GPR0.y, GPR0.x PARAM0.x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2788,12 +2880,13 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 18 */
+ /* 23 */
+ /* INTERP_XY GPR0.z, GPR0.y PARAM0.x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2812,12 +2905,14 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 19 */
+
+ /* 24 */
+ /* INTERP_XY GPR0.w, GPR0.x PARAM0.x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2837,17 +2932,53 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 20 - alu 0 */
- /* MUL gpr[2].x gpr[0].x gpr[1].x */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ /* 25 */
+ shader[i++] = 0;
+ shader[i++] = 0;
+
+ /* 26/27 */
+ /* SAMPLE RID=0 GPR0, GPR0 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ /* 28 */
+ /* MOV GPR0.x, KC4.x */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
+ INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2856,23 +2987,24 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(1));
- /* 21 - alu 1 */
- /* MUL gpr[2].y gpr[0].y gpr[1].y */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+
+ /* 29 */
+ /* MOV GPR0.y, KC4.y */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
+ INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2881,23 +3013,24 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(1));
- /* 22 - alu 2 */
- /* MUL gpr[2].z gpr[0].z gpr[1].z */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+
+ /* 30 */
+ /* MOV GPR0.z, KC4.z */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
+ INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2906,23 +3039,24 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(1));
- /* 23 - alu 3 */
- /* MUL gpr[2].w gpr[0].w gpr[1].w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+
+ /* 31 */
+ /* MOV GPR0.w, KC4.w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
+ INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2931,19 +3065,20 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(1));
- /* 24 - interpolate tex coords - non-mask */
+ /* 32 */
+ /* INTERP_XY GPR1.x, PARAM1 */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2958,16 +3093,17 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_INTERP_XY),
BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(0),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 25 */
+ /* 33 */
+ /* INTERP_XY GPR1.y, PARAM1 */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2982,16 +3118,17 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_INTERP_XY),
BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(0),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 26 */
+ /* 34 */
+ /* INTERP_XY GPR1.z, PARAM1 */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -3006,16 +3143,17 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_INTERP_XY),
BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(0),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 27 */
+ /* 35 */
+ /* INTERP_XY GPR1.w, PARAM1 */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -3030,16 +3168,17 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_INTERP_XY),
BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(0),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 28/29 - src - mask */
+ /* 36/37 */
+ /* SAMPLE RID=1 GPR1, GPR1 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
INST_MOD(0),
FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(0),
+ RESOURCE_ID(1),
SRC_GPR(1),
SRC_REL(ABSOLUTE),
ALT_CONST(0),
@@ -3059,36 +3198,6 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = TEX_DWORD2(OFFSET_X(0),
OFFSET_Y(0),
OFFSET_Z(0),
- SAMPLER_ID(0),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
- /* 30/31 - mask */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- INST_MOD(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(1),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- ALT_CONST(0),
- RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
- SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
- shader[i++] = TEX_DWORD1(DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_Z),
- DST_SEL_W(SQ_SEL_W),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
SAMPLER_ID(1),
SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
@@ -3096,36 +3205,109 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 32/33 - src - non-mask */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- INST_MOD(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(0),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- ALT_CONST(0),
- RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
- SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
- shader[i++] = TEX_DWORD1(DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_Z),
- DST_SEL_W(SQ_SEL_W),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
- SAMPLER_ID(0),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
+ /* 38 */
+ /* MOV GPR1.x, KC5.x */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+
+ /* 39 */
+ /* MOV GPR1.y, KC5.y */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+
+ /* 40 */
+ /* MOV GPR1.z, KC5.z */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+
+ /* 41 */
+ /* MOV GPR1.w, KC5.w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
return i;
}
diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index 2cdce0f5..5b8a6311 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -777,10 +777,9 @@ static Bool EVERGREENCheckCompositeTexture(PicturePtr pPict,
return TRUE;
}
-static void EVERGREENXFormSetup(PicturePtr pPict, PixmapPtr pPix,
+static void EVERGREENXFormSetup(PicturePtr pPict, ScrnInfoPtr pScrn,
int unit, float *vs_alu_consts)
{
- ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen);
RADEONInfoPtr info = RADEONPTR(pScrn);
struct radeon_accel_state *accel_state = info->accel_state;
int const_offset = unit * 8;
@@ -1118,6 +1117,134 @@ static Bool EVERGREENCheckComposite(int op, PicturePtr pSrcPicture,
}
+static void EVERGREENSetSolidConsts(ScrnInfoPtr pScrn, float *buf, int format, uint32_t fg, int unit)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ struct radeon_accel_state *accel_state = info->accel_state;
+ float pix_r = 0, pix_g = 0, pix_b = 0, pix_a = 0;
+
+ uint32_t w = (fg >> 24) & 0xff;
+ uint32_t z = (fg >> 16) & 0xff;
+ uint32_t y = (fg >> 8) & 0xff;
+ uint32_t x = (fg >> 0) & 0xff;
+ float xf = (float)x / 255; /* R */
+ float yf = (float)y / 255; /* G */
+ float zf = (float)z / 255; /* B */
+ float wf = (float)w / 255; /* A */
+
+ /* component swizzles */
+ switch (format) {
+ case PICT_a1r5g5b5:
+ case PICT_a8r8g8b8:
+ pix_r = zf; /* R */
+ pix_g = yf; /* G */
+ pix_b = xf; /* B */
+ pix_a = wf; /* A */
+ break;
+ case PICT_a8b8g8r8:
+ pix_r = xf; /* R */
+ pix_g = yf; /* G */
+ pix_b = zf; /* B */
+ pix_a = wf; /* A */
+ break;
+ case PICT_x8b8g8r8:
+ pix_r = xf; /* R */
+ pix_g = yf; /* G */
+ pix_b = zf; /* B */
+ pix_a = 1.0; /* A */
+ break;
+ case PICT_b8g8r8a8:
+ pix_r = yf; /* R */
+ pix_g = zf; /* G */
+ pix_b = wf; /* B */
+ pix_a = xf; /* A */
+ break;
+ case PICT_b8g8r8x8:
+ pix_r = yf; /* R */
+ pix_g = zf; /* G */
+ pix_b = wf; /* B */
+ pix_a = 1.0; /* A */
+ break;
+ case PICT_x1r5g5b5:
+ case PICT_x8r8g8b8:
+ case PICT_r5g6b5:
+ pix_r = zf; /* R */
+ pix_g = yf; /* G */
+ pix_b = xf; /* B */
+ pix_a = 1.0; /* A */
+ break;
+ case PICT_a8:
+ pix_r = 0.0; /* R */
+ pix_g = 0.0; /* G */
+ pix_b = 0.0; /* B */
+ pix_a = xf; /* A */
+ break;
+ default:
+ ErrorF("Bad format 0x%x\n", format);
+ }
+
+ if (unit == 0) {
+ if (!accel_state->msk_pic) {
+ if (PICT_FORMAT_RGB(format) == 0) {
+ pix_r = 0.0;
+ pix_g = 0.0;
+ pix_b = 0.0;
+ }
+
+ if (PICT_FORMAT_A(format) == 0)
+ pix_a = 1.0;
+ } else {
+ if (accel_state->component_alpha) {
+ if (accel_state->src_alpha) {
+ if (PICT_FORMAT_A(format) == 0) {
+ pix_r = 1.0;
+ pix_g = 1.0;
+ pix_b = 1.0;
+ pix_a = 1.0;
+ } else {
+ pix_r = pix_a;
+ pix_g = pix_a;
+ pix_b = pix_a;
+ }
+ } else {
+ if (PICT_FORMAT_A(format) == 0)
+ pix_a = 1.0;
+ }
+ } else {
+ if (PICT_FORMAT_RGB(format) == 0) {
+ pix_r = 0;
+ pix_g = 0;
+ pix_b = 0;
+ }
+
+ if (PICT_FORMAT_A(format) == 0)
+ pix_a = 1.0;
+ }
+ }
+ } else {
+ if (accel_state->component_alpha) {
+ if (PICT_FORMAT_A(format) == 0)
+ pix_a = 1.0;
+ } else {
+ if (PICT_FORMAT_A(format) == 0) {
+ pix_r = 1.0;
+ pix_g = 1.0;
+ pix_b = 1.0;
+ pix_a = 1.0;
+ } else {
+ pix_r = pix_a;
+ pix_g = pix_a;
+ pix_b = pix_a;
+ }
+ }
+ }
+
+ buf[0] = pix_r;
+ buf[1] = pix_g;
+ buf[2] = pix_b;
+ buf[3] = pix_a;
+}
+
static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
PicturePtr pMaskPicture, PicturePtr pDstPicture,
PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
@@ -1132,30 +1259,26 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
const_config_t vs_const_conf;
struct r600_accel_object src_obj, mask_obj, dst_obj;
float *cbuf;
+ uint32_t ps_bool_consts = 0;
if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8))
return FALSE;
- if (!pSrc) {
- pSrc = RADEONSolidPixmap(pScreen, pSrcPicture->pSourcePict->solidFill.color);
- if (!pSrc)
- RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
+ if (pSrc) {
+ src_obj.bo = radeon_get_pixmap_bo(pSrc);
+ src_obj.surface = radeon_get_pixmap_surface(pSrc);
+ src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
+ src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
+ src_obj.width = pSrc->drawable.width;
+ src_obj.height = pSrc->drawable.height;
+ src_obj.bpp = pSrc->drawable.bitsPerPixel;
+ src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
}
dst_obj.bo = radeon_get_pixmap_bo(pDst);
- src_obj.bo = radeon_get_pixmap_bo(pSrc);
dst_obj.surface = radeon_get_pixmap_surface(pDst);
- src_obj.surface = radeon_get_pixmap_surface(pSrc);
dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
- src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
- src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
-
- src_obj.width = pSrc->drawable.width;
- src_obj.height = pSrc->drawable.height;
- src_obj.bpp = pSrc->drawable.bitsPerPixel;
- src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
-
dst_obj.width = pDst->drawable.width;
dst_obj.height = pDst->drawable.height;
dst_obj.bpp = pDst->drawable.bitsPerPixel;
@@ -1165,30 +1288,16 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
if (pMaskPicture) {
- if (!pMask) {
- pMask = RADEONSolidPixmap(pScreen, pMaskPicture->pSourcePict->solidFill.color);
- if (!pMask) {
- if (!pSrcPicture->pDrawable)
- pScreen->DestroyPixmap(pSrc);
- RADEON_FALLBACK(("Failed to create solid scratch pixmap\n"));
- }
+ if (pMask) {
+ mask_obj.bo = radeon_get_pixmap_bo(pMask);
+ mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
+ mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
+ mask_obj.surface = radeon_get_pixmap_surface(pMask);
+ mask_obj.width = pMask->drawable.width;
+ mask_obj.height = pMask->drawable.height;
+ mask_obj.bpp = pMask->drawable.bitsPerPixel;
+ mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
}
- mask_obj.bo = radeon_get_pixmap_bo(pMask);
- mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
- mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
- mask_obj.surface = radeon_get_pixmap_surface(pMask);
- mask_obj.width = pMask->drawable.width;
- mask_obj.height = pMask->drawable.height;
- mask_obj.bpp = pMask->drawable.bitsPerPixel;
- mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
-
- if (!R600SetAccelState(pScrn,
- &src_obj,
- &mask_obj,
- &dst_obj,
- accel_state->comp_vs_offset, accel_state->comp_ps_offset,
- 3, 0xffffffff))
- return FALSE;
accel_state->msk_pic = pMaskPicture;
if (pMaskPicture->componentAlpha) {
@@ -1202,19 +1311,19 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
accel_state->src_alpha = FALSE;
}
} else {
- if (!R600SetAccelState(pScrn,
- &src_obj,
- NULL,
- &dst_obj,
- accel_state->comp_vs_offset, accel_state->comp_ps_offset,
- 3, 0xffffffff))
- return FALSE;
-
accel_state->msk_pic = NULL;
accel_state->component_alpha = FALSE;
accel_state->src_alpha = FALSE;
}
+ if (!R600SetAccelState(pScrn,
+ pSrc ? &src_obj : NULL,
+ (pMaskPicture && pMask) ? &mask_obj : NULL,
+ &dst_obj,
+ accel_state->comp_vs_offset, accel_state->comp_ps_offset,
+ 3, 0xffffffff))
+ return FALSE;
+
if (!EVERGREENGetDestFormat(pDstPicture, &dst_format))
return FALSE;
@@ -1238,11 +1347,14 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
- if (!EVERGREENTextureSetup(pSrcPicture, pSrc, 0)) {
- radeon_ib_discard(pScrn);
- radeon_cs_flush_indirect(pScrn);
- return FALSE;
- }
+ if (pSrc) {
+ if (!EVERGREENTextureSetup(pSrcPicture, pSrc, 0)) {
+ radeon_ib_discard(pScrn);
+ radeon_cs_flush_indirect(pScrn);
+ return FALSE;
+ }
+ } else
+ accel_state->is_transform[0] = FALSE;
if (pMask) {
if (!EVERGREENTextureSetup(pMaskPicture, pMask, 1)) {
@@ -1253,12 +1365,16 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
} else
accel_state->is_transform[1] = FALSE;
+ if (pSrc)
+ ps_bool_consts |= (1 << 0);
+ if (pMask)
+ ps_bool_consts |= (1 << 1);
+ evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, ps_bool_consts);
+
if (pMask) {
evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0));
- evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0));
} else {
evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0));
- evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0));
}
/* Shader */
@@ -1271,7 +1387,7 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
ps_conf.shader_addr = accel_state->ps_mc_addr;
ps_conf.shader_size = accel_state->ps_size;
- ps_conf.num_gprs = 3;
+ ps_conf.num_gprs = 2;
ps_conf.stack_size = 1;
ps_conf.clamp_consts = 0;
ps_conf.export_mode = 2;
@@ -1346,9 +1462,27 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
vs_const_conf.const_addr = accel_state->cbuf.vb_offset;
vs_const_conf.cpu_ptr = (uint32_t *)(char *)cbuf;
- EVERGREENXFormSetup(pSrcPicture, pSrc, 0, cbuf);
+ EVERGREENXFormSetup(pSrcPicture, pScrn, 0, cbuf);
if (pMask)
- EVERGREENXFormSetup(pMaskPicture, pMask, 1, cbuf);
+ EVERGREENXFormSetup(pMaskPicture, pScrn, 1, cbuf);
+
+ if (!pSrc) {
+ /* solid src color */
+ EVERGREENSetSolidConsts(pScrn, &cbuf[16], pSrcPicture->format,
+ pSrcPicture->pSourcePict->solidFill.color, 0);
+ }
+
+ if (!pMaskPicture) {
+ /* use identity constant if there is no mask */
+ cbuf[20] = 1.0;
+ cbuf[21] = 1.0;
+ cbuf[22] = 1.0;
+ cbuf[23] = 1.0;
+ } else if (!pMask) {
+ /* solid mask color */
+ EVERGREENSetSolidConsts(pScrn, &cbuf[20], pMaskPicture->format,
+ pMaskPicture->pSourcePict->solidFill.color, 1);
+ }
radeon_vbo_commit(pScrn, &accel_state->cbuf);
evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT);
@@ -1377,7 +1511,7 @@ static void EVERGREENFinishComposite(ScrnInfoPtr pScrn, PixmapPtr pDst,
accel_state->vline_y1,
accel_state->vline_y2);
- vtx_size = accel_state->msk_pic ? 24 : 16;
+ vtx_size = accel_state->msk_pix ? 24 : 16;
evergreen_finish_op(pScrn, vtx_size);
}
@@ -1390,12 +1524,6 @@ static void EVERGREENDoneComposite(PixmapPtr pDst)
struct radeon_accel_state *accel_state = info->accel_state;
EVERGREENFinishComposite(pScrn, pDst, accel_state);
-
- if (!accel_state->src_pic->pDrawable)
- pScreen->DestroyPixmap(accel_state->src_pix);
-
- if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable)
- pScreen->DestroyPixmap(accel_state->msk_pix);
}
static void EVERGREENComposite(PixmapPtr pDst,
@@ -1424,7 +1552,7 @@ static void EVERGREENComposite(PixmapPtr pDst,
if (accel_state->vsync)
RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
- if (accel_state->msk_pic) {
+ if (accel_state->msk_pix) {
vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
diff --git a/src/evergreen_shader.c b/src/evergreen_shader.c
index ebc58f21..4852578e 100644
--- a/src/evergreen_shader.c
+++ b/src/evergreen_shader.c
@@ -2472,15 +2472,16 @@ int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
}
/* comp ps --------------------------------------- */
-int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t *shader)
{
int i = 0;
/* 0 */
- shader[i++] = CF_DWORD0(ADDR(3),
+ /* call interp-fetch-mask if boolean1 == true */
+ shader[i++] = CF_DWORD0(ADDR(11),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
+ CF_CONST(1),
COND(SQ_CF_COND_BOOL),
I_COUNT(0),
VALID_PIXEL_MODE(0),
@@ -2488,11 +2489,13 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_CALL),
WHOLE_QUAD_MODE(0),
BARRIER(0));
+
/* 1 */
- shader[i++] = CF_DWORD0(ADDR(8),
+ /* call read-constant-mask if boolean1 == false */
+ shader[i++] = CF_DWORD0(ADDR(14),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
+ CF_CONST(1),
COND(SQ_CF_COND_NOT_BOOL),
I_COUNT(0),
VALID_PIXEL_MODE(0),
@@ -2500,48 +2503,118 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_CALL),
WHOLE_QUAD_MODE(0),
BARRIER(0));
+
/* 2 */
- shader[i++] = CF_DWORD0(ADDR(0),
- JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ /* call interp-fetch-src if boolean0 == true */
+ shader[i++] = CF_DWORD0(ADDR(6),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
- COND(SQ_CF_COND_ACTIVE),
+ COND(SQ_CF_COND_BOOL),
I_COUNT(0),
VALID_PIXEL_MODE(0),
- END_OF_PROGRAM(1),
- CF_INST(SQ_CF_INST_NOP),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_CALL),
WHOLE_QUAD_MODE(0),
- BARRIER(1));
+ BARRIER(0));
- /* 3 - mask sub */
- shader[i++] = CF_ALU_DWORD0(ADDR(12),
+ /* 3 */
+ /* call read-constant-src if boolean0 == false */
+ shader[i++] = CF_DWORD0(ADDR(9),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_NOT_BOOL),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 4 */
+ /* src IN mask (GPR2 := GPR1 .* GPR0) */
+ shader[i++] = CF_ALU_DWORD0(ADDR(16),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_NOP));
shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
- I_COUNT(8),
+ I_COUNT(4),
ALT_CONST(0),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 4 */
- shader[i++] = CF_DWORD0(ADDR(28),
+ /* 5 */
+ /* export pixel data */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ BURST_COUNT(1),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(1),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ MARK(0),
+ BARRIER(1));
+
+ /* subroutine interp-fetch-src */
+
+ /* 6 */
+ /* interpolate src */
+ shader[i++] = CF_ALU_DWORD0(ADDR(20),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 7 */
+ /* texture fetch src into GPR0 */
+ shader[i++] = CF_DWORD0(ADDR(24),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
- I_COUNT(2),
+ I_COUNT(1),
VALID_PIXEL_MODE(0),
END_OF_PROGRAM(0),
CF_INST(SQ_CF_INST_TC),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 5 */
- shader[i++] = CF_ALU_DWORD0(ADDR(20),
+ /* 8 */
+ /* return */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+
+ /* subroutine read-constant-src */
+
+ /* 9 */
+ /* read constants into GPR0 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(26),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
@@ -2549,30 +2622,13 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
KCACHE_ADDR0(0),
KCACHE_ADDR1(0),
I_COUNT(4),
- ALT_CONST(0),
+ ALT_CONST(1),
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 6 */
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
- TYPE(SQ_EXPORT_PIXEL),
- RW_GPR(2),
- RW_REL(ABSOLUTE),
- INDEX_GPR(0),
- ELEM_SIZE(1));
-
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
- BURST_COUNT(1),
- VALID_PIXEL_MODE(0),
- END_OF_PROGRAM(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
- MARK(0),
- BARRIER(1));
- /* 7 */
+ /* 10 */
+ /* return */
shader[i++] = CF_DWORD0(ADDR(0),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
@@ -2583,10 +2639,13 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
END_OF_PROGRAM(0),
CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
- BARRIER(1));
+ BARRIER(0));
+
+ /* subroutine interp-fetch-mask */
- /* 8 - non-mask sub */
- shader[i++] = CF_ALU_DWORD0(ADDR(24),
+ /* 11 */
+ /* interpolate mask */
+ shader[i++] = CF_ALU_DWORD0(ADDR(30),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_NOP));
@@ -2598,8 +2657,10 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 9 */
- shader[i++] = CF_DWORD0(ADDR(32),
+
+ /* 12 */
+ /* texture fetch mask into GPR1 */
+ shader[i++] = CF_DWORD0(ADDR(34),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -2611,25 +2672,39 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 10 */
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
- TYPE(SQ_EXPORT_PIXEL),
- RW_GPR(0),
- RW_REL(ABSOLUTE),
- INDEX_GPR(0),
- ELEM_SIZE(1));
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
- BURST_COUNT(1),
- VALID_PIXEL_MODE(0),
- END_OF_PROGRAM(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
- MARK(0),
- BARRIER(1));
+ /* 13 */
+ /* return */
+ shader[i++] = CF_DWORD0(ADDR(0),
+ JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ VALID_PIXEL_MODE(0),
+ END_OF_PROGRAM(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
- /* 11 */
+ /* subroutine read-constant-src */
+
+ /* 14 */
+ /* read constants into GPR1 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(36),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ ALT_CONST(1),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 15 */
+ /* return */
shader[i++] = CF_DWORD0(ADDR(0),
JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
shader[i++] = CF_DWORD1(POP_COUNT(0),
@@ -2640,18 +2715,21 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
END_OF_PROGRAM(0),
CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
- BARRIER(1));
+ BARRIER(0));
- /* 12 interpolate src tex coords - mask */
+ /* ALU clauses */
+
+ /* 16 */
+ /* MUL gpr[0].x gpr[0].x gpr[1].x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2660,22 +2738,24 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_INTERP_XY),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(1),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
- CLAMP(0));
- /* 13 */
+ CLAMP(1));
+
+ /* 17 */
+ /* MUL gpr[0].y gpr[0].y gpr[1].y */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2684,67 +2764,70 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_INTERP_XY),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(1),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
- CLAMP(0));
- /* 14 */
+ CLAMP(1));
+ /* 18 */
+ /* MUL gpr[0].z gpr[0].z gpr[1].z */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
+ SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(0),
+ WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_INTERP_XY),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(1),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
- CLAMP(0));
- /* 15 */
+ CLAMP(1));
+ /* 19 */
+ /* MUL gpr[0].w gpr[0].w gpr[1].w */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
+ SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 1),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_AR_X),
+ INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(0),
+ WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_INTERP_XY),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(1),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
- CLAMP(0));
+ CLAMP(1));
- /* 16 interpolate mask tex coords */
+ /* 20 */
+ /* INTERP_XY GPR0.x, GPR0.y PARAM0.x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2763,12 +2846,13 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 17 */
+ /* 21 */
+ /* INTERP_XY GPR0.y, GPR0.x PARAM0.x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2787,12 +2871,13 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 18 */
+ /* 22 */
+ /* INTERP_XY GPR0.z, GPR0.y PARAM0.x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2811,12 +2896,14 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 19 */
+
+ /* 23 */
+ /* INTERP_XY GPR0.w, GPR0.x PARAM0.x */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2836,17 +2923,49 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 20 - alu 0 */
- /* MUL gpr[2].x gpr[0].x gpr[1].x */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+ /* 24/25 */
+ /* SAMPLE RID=0 GPR0, GPR0 */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ INST_MOD(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ ALT_CONST(0),
+ RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+ SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+ shader[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+
+ /* 26 */
+ /* MOV GPR0.x, KC4.x */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
+ INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2855,23 +2974,24 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(1));
- /* 21 - alu 1 */
- /* MUL gpr[2].y gpr[0].y gpr[1].y */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+
+ /* 27 */
+ /* MOV GPR0.y, KC4.y */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
+ INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2880,23 +3000,24 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(1));
- /* 22 - alu 2 */
- /* MUL gpr[2].z gpr[0].z gpr[1].z */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+
+ /* 28 */
+ /* MOV GPR0.z, KC4.z */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
+ INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2905,23 +3026,24 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(1));
- /* 23 - alu 3 */
- /* MUL gpr[2].w gpr[0].w gpr[1].w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+
+ /* 29 */
+ /* MOV GPR0.w, KC4.w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
+ INDEX_MODE(SQ_INDEX_AR_X),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(1));
shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
@@ -2930,19 +3052,20 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
UPDATE_PRED(0),
WRITE_MASK(1),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
+ ALU_INST(SQ_OP2_INST_MOV),
BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
+ DST_GPR(0),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(1));
- /* 24 - interpolate tex coords - non-mask */
+ /* 30 */
+ /* INTERP_XY GPR1.x, PARAM1 */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2957,16 +3080,17 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_INTERP_XY),
BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(0),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 25 */
+ /* 31 */
+ /* INTERP_XY GPR1.y, PARAM1 */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2981,16 +3105,17 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_INTERP_XY),
BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(0),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 26 */
+ /* 32 */
+ /* INTERP_XY GPR1.z, PARAM1 */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -3005,16 +3130,17 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_INTERP_XY),
BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(0),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 27 */
+ /* 33 */
+ /* INTERP_XY GPR1.w, PARAM1 */
shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+ SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -3029,16 +3155,17 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_INTERP_XY),
BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(0),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 28/29 - src - mask */
+ /* 34/35 */
+ /* SAMPLE RID=1 GPR1, GPR1 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
INST_MOD(0),
FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(0),
+ RESOURCE_ID(1),
SRC_GPR(1),
SRC_REL(ABSOLUTE),
ALT_CONST(0),
@@ -3058,36 +3185,6 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = TEX_DWORD2(OFFSET_X(0),
OFFSET_Y(0),
OFFSET_Z(0),
- SAMPLER_ID(0),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
- /* 30/31 - mask */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- INST_MOD(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(1),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- ALT_CONST(0),
- RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
- SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
- shader[i++] = TEX_DWORD1(DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_Z),
- DST_SEL_W(SQ_SEL_W),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
SAMPLER_ID(1),
SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
@@ -3095,36 +3192,109 @@ int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 32/33 - src - non-mask */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- INST_MOD(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(0),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- ALT_CONST(0),
- RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
- SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
- shader[i++] = TEX_DWORD1(DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_Z),
- DST_SEL_W(SQ_SEL_W),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
- SAMPLER_ID(0),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
+ /* 36 */
+ /* MOV GPR1.x, KC5.x */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+
+ /* 37 */
+ /* MOV GPR1.y, KC5.y */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+
+ /* 38 */
+ /* MOV GPR1.z, KC5.z */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+
+ /* 39 */
+ /* MOV GPR1.w, KC5.w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
return i;
}