diff options
author | Grigori Goronzy <greg@chown.ath.cx> | 2013-07-18 16:06:23 +0200 |
---|---|---|
committer | Grigori Goronzy <greg@chown.ath.cx> | 2013-07-22 05:05:48 +0200 |
commit | 94d0d14914a025525a0766669b556eaa6681def7 (patch) | |
tree | 0c945be3a24f486974500aff80fc9626100b8385 /src/cayman_shader.c | |
parent | 5bb04351c43a91a1d60348b7293544da05d75e72 (diff) |
EXA/evergreen/ni: fast solid pixmap support
Solid pixmaps are currently implemented with scratch pixmaps, which
is slow. This replaces the hack with a proper implementation. The
Composite shader can now either sample a src/mask or use a constant
value.
Diffstat (limited to 'src/cayman_shader.c')
-rw-r--r-- | src/cayman_shader.c | 590 |
1 files changed, 386 insertions, 204 deletions
diff --git a/src/cayman_shader.c b/src/cayman_shader.c index 2a6d6b1b..59f41776 100644 --- a/src/cayman_shader.c +++ b/src/cayman_shader.c @@ -2495,17 +2495,44 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) int i = 0; /* 0 */ - shader[i++] = CF_DWORD0(ADDR(3), + /* call interp-fetch-mask if boolean1 == true */ + shader[i++] = CF_DWORD0(ADDR(12), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), + CF_CONST(1), COND(SQ_CF_COND_BOOL), I_COUNT(0), VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_CALL), BARRIER(0)); + /* 1 */ - shader[i++] = CF_DWORD0(ADDR(8), + /* call read-constant-mask if boolean1 == false */ + shader[i++] = CF_DWORD0(ADDR(15), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(1), + COND(SQ_CF_COND_NOT_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_CALL), + BARRIER(0)); + + /* 2 */ + /* call interp-fetch-src if boolean0 == true */ + shader[i++] = CF_DWORD0(ADDR(7), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_CALL), + BARRIER(0)); + + /* 3 */ + /* call read-constant-src if boolean0 == false */ + shader[i++] = CF_DWORD0(ADDR(10), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -2514,7 +2541,41 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_CALL), BARRIER(0)); - /* 2 - end */ + /* 4 */ + /* src IN mask (GPR2 := GPR1 .* GPR0) */ + shader[i++] = CF_ALU_DWORD0(ADDR(17), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 5 */ + /* export pixel data */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + + /* 6 */ + /* end of program */ shader[i++] = CF_DWORD0(ADDR(0), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), @@ -2524,33 +2585,53 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_END), BARRIER(1)); - /* 3 - mask sub */ - shader[i++] = CF_ALU_DWORD0(ADDR(12), + + /* subroutine interp-fetch-src */ + + /* 7 */ + /* interpolate src */ + shader[i++] = CF_ALU_DWORD0(ADDR(21), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_NOP)); shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), KCACHE_ADDR0(0), KCACHE_ADDR1(0), - I_COUNT(8), + I_COUNT(4), ALT_CONST(0), CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 4 */ - shader[i++] = CF_DWORD0(ADDR(28), + /* 8 */ + /* texture fetch src into GPR0 */ + shader[i++] = CF_DWORD0(ADDR(26), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), - I_COUNT(2), + I_COUNT(1), VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_TC), BARRIER(1)); - /* 5 */ - shader[i++] = CF_ALU_DWORD0(ADDR(20), + /* 9 */ + /* return */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_RETURN), + BARRIER(0)); + + /* subroutine read-constant-src */ + + /* 10 */ + /* read constants into GPR0 */ + shader[i++] = CF_ALU_DWORD0(ADDR(28), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); @@ -2558,29 +2639,13 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) KCACHE_ADDR0(0), KCACHE_ADDR1(0), I_COUNT(4), - ALT_CONST(0), + ALT_CONST(1), CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 6 */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), - TYPE(SQ_EXPORT_PIXEL), - RW_GPR(2), - RW_REL(ABSOLUTE), - INDEX_GPR(0), - ELEM_SIZE(1)); - - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), - BURST_COUNT(1), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_EXPORT_DONE), - MARK(0), - BARRIER(1)); - /* 7 */ + /* 11 */ + /* return */ shader[i++] = CF_DWORD0(ADDR(0), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), @@ -2589,10 +2654,13 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) I_COUNT(0), VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_RETURN), - BARRIER(1)); + BARRIER(0)); - /* 8 - non-mask sub */ - shader[i++] = CF_ALU_DWORD0(ADDR(24), + /* subroutine interp-fetch-mask */ + + /* 12 */ + /* interpolate mask */ + shader[i++] = CF_ALU_DWORD0(ADDR(32), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_NOP)); @@ -2604,8 +2672,10 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 9 */ - shader[i++] = CF_DWORD0(ADDR(32), + + /* 13 */ + /* texture fetch mask into GPR1 */ + shader[i++] = CF_DWORD0(ADDR(36), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -2615,24 +2685,37 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_TC), BARRIER(1)); - /* 10 */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), - TYPE(SQ_EXPORT_PIXEL), - RW_GPR(0), - RW_REL(ABSOLUTE), - INDEX_GPR(0), - ELEM_SIZE(1)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), - BURST_COUNT(1), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_EXPORT_DONE), - MARK(0), - BARRIER(1)); + /* 14 */ + /* return */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_RETURN), + BARRIER(0)); - /* 11 */ + /* subroutine read-constant-src */ + + /* 15 */ + /* read constants into GPR1 */ + shader[i++] = CF_ALU_DWORD0(ADDR(38), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(1), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 16 */ + /* return */ shader[i++] = CF_DWORD0(ADDR(0), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), @@ -2641,18 +2724,21 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) I_COUNT(0), VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_RETURN), - BARRIER(1)); + BARRIER(0)); + + /* ALU clauses */ - /* 12 interpolate src tex coords - mask */ + /* 17 */ + /* MUL gpr[0].x gpr[0].x gpr[1].x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), + SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), + INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2661,22 +2747,24 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_INTERP_XY), - BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(1), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), - CLAMP(0)); - /* 13 */ + CLAMP(1)); + + /* 18 */ + /* MUL gpr[0].y gpr[0].y gpr[1].y */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), + SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_Y), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), + INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2685,67 +2773,70 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_INTERP_XY), - BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(1), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), - CLAMP(0)); - /* 14 */ + CLAMP(1)); + /* 19 */ + /* MUL gpr[0].z gpr[0].z gpr[1].z */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), + SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_Z), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), + INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), SRC1_ABS(0), UPDATE_EXECUTE_MASK(0), UPDATE_PRED(0), - WRITE_MASK(0), + WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_INTERP_XY), - BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(1), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), - CLAMP(0)); - /* 15 */ + CLAMP(1)); + /* 20 */ + /* MUL gpr[0].w gpr[0].w gpr[1].w */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), + SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_W), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), + INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), SRC1_ABS(0), UPDATE_EXECUTE_MASK(0), UPDATE_PRED(0), - WRITE_MASK(0), + WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_INTERP_XY), - BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(1), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), - CLAMP(0)); + CLAMP(1)); - /* 16 interpolate mask tex coords */ + /* 21 */ + /* INTERP_XY GPR0.x, GPR0.y PARAM0.x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2764,12 +2855,13 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(0)); - /* 17 */ + /* 22 */ + /* INTERP_XY GPR0.y, GPR0.x PARAM0.x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2788,12 +2880,13 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(0)); - /* 18 */ + /* 23 */ + /* INTERP_XY GPR0.z, GPR0.y PARAM0.x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2812,12 +2905,14 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(0)); - /* 19 */ + + /* 24 */ + /* INTERP_XY GPR0.w, GPR0.x PARAM0.x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2837,17 +2932,53 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_ELEM(ELEM_W), CLAMP(0)); - /* 20 - alu 0 */ - /* MUL gpr[2].x gpr[0].x gpr[1].x */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + /* 25 */ + shader[i++] = 0; + shader[i++] = 0; + + /* 26/27 */ + /* SAMPLE RID=0 GPR0, GPR0 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_Z), + DST_SEL_W(SQ_SEL_W), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + + /* 28 */ + /* MOV GPR0.x, KC4.x */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), + INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2856,23 +2987,24 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ALU_INST(SQ_OP2_INST_MOV), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(1)); - /* 21 - alu 1 */ - /* MUL gpr[2].y gpr[0].y gpr[1].y */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + + /* 29 */ + /* MOV GPR0.y, KC4.y */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), + INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2881,23 +3013,24 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ALU_INST(SQ_OP2_INST_MOV), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(1)); - /* 22 - alu 2 */ - /* MUL gpr[2].z gpr[0].z gpr[1].z */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + + /* 30 */ + /* MOV GPR0.z, KC4.z */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), + INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2906,23 +3039,24 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ALU_INST(SQ_OP2_INST_MOV), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(1)); - /* 23 - alu 3 */ - /* MUL gpr[2].w gpr[0].w gpr[1].w */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + + /* 31 */ + /* MOV GPR0.w, KC4.w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), + INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2931,19 +3065,20 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ALU_INST(SQ_OP2_INST_MOV), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(1)); - /* 24 - interpolate tex coords - non-mask */ + /* 32 */ + /* INTERP_XY GPR1.x, PARAM1 */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2958,16 +3093,17 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_INTERP_XY), BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(0), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(0)); - /* 25 */ + /* 33 */ + /* INTERP_XY GPR1.y, PARAM1 */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2982,16 +3118,17 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_INTERP_XY), BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(0), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(0)); - /* 26 */ + /* 34 */ + /* INTERP_XY GPR1.z, PARAM1 */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -3006,16 +3143,17 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_INTERP_XY), BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(0), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(0)); - /* 27 */ + /* 35 */ + /* INTERP_XY GPR1.w, PARAM1 */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -3030,16 +3168,17 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_INTERP_XY), BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(0), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(0)); - /* 28/29 - src - mask */ + /* 36/37 */ + /* SAMPLE RID=1 GPR1, GPR1 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), INST_MOD(0), FETCH_WHOLE_QUAD(0), - RESOURCE_ID(0), + RESOURCE_ID(1), SRC_GPR(1), SRC_REL(ABSOLUTE), ALT_CONST(0), @@ -3059,36 +3198,6 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) shader[i++] = TEX_DWORD2(OFFSET_X(0), OFFSET_Y(0), OFFSET_Z(0), - SAMPLER_ID(0), - SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1)); - shader[i++] = TEX_DWORD_PAD; - /* 30/31 - mask */ - shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), - INST_MOD(0), - FETCH_WHOLE_QUAD(0), - RESOURCE_ID(1), - SRC_GPR(0), - SRC_REL(ABSOLUTE), - ALT_CONST(0), - RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), - SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); - shader[i++] = TEX_DWORD1(DST_GPR(0), - DST_REL(ABSOLUTE), - DST_SEL_X(SQ_SEL_X), - DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_Z), - DST_SEL_W(SQ_SEL_W), - LOD_BIAS(0), - COORD_TYPE_X(TEX_NORMALIZED), - COORD_TYPE_Y(TEX_NORMALIZED), - COORD_TYPE_Z(TEX_NORMALIZED), - COORD_TYPE_W(TEX_NORMALIZED)); - shader[i++] = TEX_DWORD2(OFFSET_X(0), - OFFSET_Y(0), - OFFSET_Z(0), SAMPLER_ID(1), SRC_SEL_X(SQ_SEL_X), SRC_SEL_Y(SQ_SEL_Y), @@ -3096,36 +3205,109 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 32/33 - src - non-mask */ - shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), - INST_MOD(0), - FETCH_WHOLE_QUAD(0), - RESOURCE_ID(0), - SRC_GPR(0), - SRC_REL(ABSOLUTE), - ALT_CONST(0), - RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), - SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); - shader[i++] = TEX_DWORD1(DST_GPR(0), - DST_REL(ABSOLUTE), - DST_SEL_X(SQ_SEL_X), - DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_Z), - DST_SEL_W(SQ_SEL_W), - LOD_BIAS(0), - COORD_TYPE_X(TEX_NORMALIZED), - COORD_TYPE_Y(TEX_NORMALIZED), - COORD_TYPE_Z(TEX_NORMALIZED), - COORD_TYPE_W(TEX_NORMALIZED)); - shader[i++] = TEX_DWORD2(OFFSET_X(0), - OFFSET_Y(0), - OFFSET_Z(0), - SAMPLER_ID(0), - SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1)); - shader[i++] = TEX_DWORD_PAD; + /* 38 */ + /* MOV GPR1.x, KC5.x */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + + /* 39 */ + /* MOV GPR1.y, KC5.y */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + + /* 40 */ + /* MOV GPR1.z, KC5.z */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + + /* 41 */ + /* MOV GPR1.w, KC5.w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); return i; } |