diff options
Diffstat (limited to 'src/r600_shader.c')
-rw-r--r-- | src/r600_shader.c | 418 |
1 files changed, 333 insertions, 85 deletions
diff --git a/src/r600_shader.c b/src/r600_shader.c index 4cb2fc89..26a6ab64 100644 --- a/src/r600_shader.c +++ b/src/r600_shader.c @@ -2318,9 +2318,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) int i = 0; /* 0 */ - shader[i++] = CF_DWORD0(ADDR(3)); + /* call fetch-mask if boolean1 == true */ + shader[i++] = CF_DWORD0(ADDR(10)); shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), + CF_CONST(1), COND(SQ_CF_COND_BOOL), I_COUNT(0), CALL_COUNT(0), @@ -2330,9 +2331,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(0)); /* 1 */ - shader[i++] = CF_DWORD0(ADDR(7)); + /* call read-constant-mask if boolean1 == false */ + shader[i++] = CF_DWORD0(ADDR(12)); shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), + CF_CONST(1), COND(SQ_CF_COND_NOT_BOOL), I_COUNT(0), CALL_COUNT(0), @@ -2342,33 +2344,36 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(0)); /* 2 */ - shader[i++] = CF_DWORD0(ADDR(0)); + /* call fetch-src if boolean0 == true */ + shader[i++] = CF_DWORD0(ADDR(6)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), + COND(SQ_CF_COND_BOOL), I_COUNT(0), CALL_COUNT(0), - END_OF_PROGRAM(1), + END_OF_PROGRAM(0), VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_NOP), + CF_INST(SQ_CF_INST_CALL), WHOLE_QUAD_MODE(0), - BARRIER(1)); + BARRIER(0)); - /* 3 - mask sub */ - shader[i++] = CF_DWORD0(ADDR(14)); + /* 3 */ + /* call read-constant-src if boolean0 == false */ + shader[i++] = CF_DWORD0(ADDR(8)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), - I_COUNT(2), + COND(SQ_CF_COND_NOT_BOOL), + I_COUNT(0), CALL_COUNT(0), END_OF_PROGRAM(0), VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_TEX), + CF_INST(SQ_CF_INST_CALL), WHOLE_QUAD_MODE(0), - BARRIER(1)); + BARRIER(0)); /* 4 */ - shader[i++] = CF_ALU_DWORD0(ADDR(10), + /* src IN mask (GPR0 := GPR1 .* GPR0) */ + shader[i++] = CF_ALU_DWORD0(ADDR(14), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_NOP)); @@ -2382,9 +2387,10 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) BARRIER(1)); /* 5 */ + /* export pixel data */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), TYPE(SQ_EXPORT_PIXEL), - RW_GPR(2), + RW_GPR(0), RW_REL(ABSOLUTE), INDEX_GPR(0), ELEM_SIZE(1)); @@ -2394,55 +2400,57 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_W(SQ_SEL_W), R6xx_ELEM_LOOP(0), BURST_COUNT(1), - END_OF_PROGRAM(0), + END_OF_PROGRAM(1), VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(1)); + /* subroutine fetch src */ /* 6 */ - shader[i++] = CF_DWORD0(ADDR(0)); + /* fetch src into GPR0*/ + shader[i++] = CF_DWORD0(ADDR(26)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), - I_COUNT(0), + I_COUNT(1), CALL_COUNT(0), END_OF_PROGRAM(0), VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_RETURN), + CF_INST(SQ_CF_INST_TEX), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 7 non-mask sub */ - shader[i++] = CF_DWORD0(ADDR(18)); + /* 7 */ + /* return */ + shader[i++] = CF_DWORD0(ADDR(0)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), - I_COUNT(1), + I_COUNT(0), CALL_COUNT(0), END_OF_PROGRAM(0), VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_TEX), + CF_INST(SQ_CF_INST_RETURN), WHOLE_QUAD_MODE(0), BARRIER(1)); + + /* subroutine read-constant-src*/ /* 8 */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), - TYPE(SQ_EXPORT_PIXEL), - RW_GPR(0), - RW_REL(ABSOLUTE), - INDEX_GPR(0), - ELEM_SIZE(1)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), - R6xx_ELEM_LOOP(0), - BURST_COUNT(1), - END_OF_PROGRAM(0), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_EXPORT_DONE), - WHOLE_QUAD_MODE(0), - BARRIER(1)); + /* read constants into GPR0 */ + shader[i++] = CF_ALU_DWORD0(ADDR(18), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + USES_WATERFALL(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); /* 9 */ + /* return */ shader[i++] = CF_DWORD0(ADDR(0)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -2455,8 +2463,67 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 10 - alu 0 */ - /* MUL gpr[2].x gpr[1].x gpr[0].x */ + /* subroutine fetch mask */ + /* 10 */ + /* fetch mask into GPR1*/ + shader[i++] = CF_DWORD0(ADDR(28)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(1), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_TEX), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 11 */ + /* return */ + shader[i++] = CF_DWORD0(ADDR(0)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* subroutine read-constant-mask*/ + /* 12 */ + /* read constants into GPR1 */ + shader[i++] = CF_ALU_DWORD0(ADDR(22), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + USES_WATERFALL(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 13 */ + /* return */ + shader[i++] = CF_DWORD0(ADDR(0)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* ALU clauses */ + + /* 14 - alu 0 */ + /* MUL gpr[0].x gpr[1].x gpr[0].x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), @@ -2478,12 +2545,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_MUL), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(1)); - /* 11 - alu 1 */ - /* MUL gpr[2].y gpr[1].y gpr[0].y */ + /* 15 - alu 1 */ + /* MUL gpr[0].y gpr[1].y gpr[0].y */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), @@ -2505,12 +2572,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_MUL), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(1)); - /* 12 - alu 2 */ - /* MUL gpr[2].z gpr[1].z gpr[0].z */ + /* 16 - alu 2 */ + /* MUL gpr[0].z gpr[1].z gpr[0].z */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Z), @@ -2532,12 +2599,12 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_MUL), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(1)); - /* 13 - alu 3 */ - /* MUL gpr[2].w gpr[1].w gpr[0].w */ + /* 17 - alu 3 */ + /* MUL gpr[0].w gpr[1].w gpr[0].w */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_W), @@ -2559,12 +2626,222 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_MUL), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + + /* 18 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + /* 19 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + /* 20 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + /* 21 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(1)); - /* 14/15 - src - mask */ + /* 22 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + /* 23 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + /* 24 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + /* 25 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_W), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + + /* 26/27 - src */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -2592,7 +2869,7 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 16/17 - mask */ + /* 28/29 - mask */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -2621,34 +2898,5 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 18/19 - src - non-mask */ - shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), - BC_FRAC_MODE(0), - FETCH_WHOLE_QUAD(0), - RESOURCE_ID(0), - SRC_GPR(0), - SRC_REL(ABSOLUTE), - R7xx_ALT_CONST(0)); - shader[i++] = TEX_DWORD1(DST_GPR(0), - DST_REL(ABSOLUTE), - DST_SEL_X(SQ_SEL_X), - DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_Z), - DST_SEL_W(SQ_SEL_W), - LOD_BIAS(0), - COORD_TYPE_X(TEX_NORMALIZED), - COORD_TYPE_Y(TEX_NORMALIZED), - COORD_TYPE_Z(TEX_NORMALIZED), - COORD_TYPE_W(TEX_NORMALIZED)); - shader[i++] = TEX_DWORD2(OFFSET_X(0), - OFFSET_Y(0), - OFFSET_Z(0), - SAMPLER_ID(0), - SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1)); - shader[i++] = TEX_DWORD_PAD; - return i; } |