diff options
author | Alex Deucher <alexdeucher@gmail.com> | 2009-03-02 04:08:09 -0500 |
---|---|---|
committer | Alex Deucher <alexdeucher@gmail.com> | 2009-03-02 04:08:09 -0500 |
commit | b7164ac4ad55e5d0fc474df8ae762b469b91ba30 (patch) | |
tree | 5c2914edcf594868e395cc3098d118a5d7fb7d95 | |
parent | fa98f424de739be2c6005b740a74bbf1ee968a8b (diff) |
R6xx/R7xx EXA: combine composite mask/non-mask VS
Also fix set_bool_const()
the CF bool consts are not contiguous by shader type
There are 96 boolean constants (32 each for PS, VS, GS) and
they are ordered as follows:
ps, vs, gs ... ps, vs, gs
-rw-r--r-- | src/r600_exa.c | 26 | ||||
-rw-r--r-- | src/r600_reg_r6xx.h | 9 | ||||
-rw-r--r-- | src/r600_shader.c | 367 | ||||
-rw-r--r-- | src/r600_state.h | 4 | ||||
-rw-r--r-- | src/r600_textured_videofuncs.c | 10 | ||||
-rw-r--r-- | src/r6xx_accel.c | 22 | ||||
-rw-r--r-- | src/radeon.h | 1 |
7 files changed, 227 insertions, 212 deletions
diff --git a/src/r600_exa.c b/src/r600_exa.c index 70c59b22..f4066308 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -1496,12 +1496,14 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, accel_state->is_transform[1] = FALSE; } - if (pMask != NULL) - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->comp_mask_vs_offset; + /* VS bool constant */ + if (pMask) + set_bool_const(pScrn, accel_state->ib, 1, 1); else - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->comp_vs_offset; + set_bool_const(pScrn, accel_state->ib, 1, 0); + + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->comp_vs_offset; memcpy ((char *)accel_state->ib->address + (accel_state->ib->total / 2) - 256, ps, sizeof(ps)); accel_state->ps_mc_addr = info->gartLocation + info->dri->bufStart + @@ -1518,7 +1520,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 3; - vs_conf.stack_size = 0; + vs_conf.stack_size = 1; vs_setup (pScrn, accel_state->ib, &vs_conf); /* flush SQ cache */ @@ -1980,7 +1982,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) RADEONChipFamily ChipSet = info->ChipFamily; uint32_t *shader; /* 512 bytes per shader for now */ - int size = 512 * 11; + int size = 512 * 9; accel_state->shaders = NULL; @@ -2016,20 +2018,16 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) accel_state->comp_ps_offset = 2560; /* not yet */ - /* comp mask vs --------------------------------------- */ - accel_state->comp_mask_vs_offset = 3072; - R600_comp_mask_vs(ChipSet, shader + accel_state->comp_mask_vs_offset / 4); - /* comp mask ps --------------------------------------- */ - accel_state->comp_mask_ps_offset = 3584; + accel_state->comp_mask_ps_offset = 3072; /* not yet */ /* xv vs --------------------------------------- */ - accel_state->xv_vs_offset = 4096; + accel_state->xv_vs_offset = 3584; R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4); /* xv ps --------------------------------------- */ - accel_state->xv_ps_offset = 4608; + accel_state->xv_ps_offset = 4096; R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4); return TRUE; diff --git a/src/r600_reg_r6xx.h b/src/r600_reg_r6xx.h index 2c9113ea..2e7dfa94 100644 --- a/src/r600_reg_r6xx.h +++ b/src/r600_reg_r6xx.h @@ -488,15 +488,6 @@ enum { SQ_LOOP_CONST_ps = 0, SQ_LOOP_CONST_vs = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num, SQ_LOOP_CONST_gs = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num, - SQ_BOOL_CONST = SQ_BOOL_CONST_0, /* 32 per PS, VS, GS */ - SQ_BOOL_CONST_ps_num = 32, - SQ_BOOL_CONST_vs_num = 32, - SQ_BOOL_CONST_gs_num = 32, - SQ_BOOL_CONST_all_num = 96, - SQ_BOOL_CONST_offset = 4, - SQ_BOOL_CONST_ps = 0, - SQ_BOOL_CONST_vs = SQ_BOOL_CONST_ps + SQ_BOOL_CONST_ps_num, - SQ_BOOL_CONST_gs = SQ_BOOL_CONST_vs + SQ_BOOL_CONST_vs_num, } ; diff --git a/src/r600_shader.c b/src/r600_shader.c index c5522f95..21c4c682 100644 --- a/src/r600_shader.c +++ b/src/r600_shader.c @@ -1244,156 +1244,6 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) return i; } -/* comp mask vs --------------------------------------- */ -int R600_comp_mask_vs(RADEONChipFamily ChipSet, uint32_t* shader) -{ - int i = 0; - - /* 0 */ - shader[i++] = CF_DWORD0(ADDR(4)); - shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), - I_COUNT(3), - CALL_COUNT(0), - END_OF_PROGRAM(0), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_VTX), - WHOLE_QUAD_MODE(0), - BARRIER(1)); - /* 1 - dst */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), - TYPE(SQ_EXPORT_POS), - RW_GPR(2), - RW_REL(ABSOLUTE), - INDEX_GPR(0), - ELEM_SIZE(0)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), - R6xx_ELEM_LOOP(0), - BURST_COUNT(1), - END_OF_PROGRAM(0), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_EXPORT_DONE), - WHOLE_QUAD_MODE(0), - BARRIER(1)); - /* 2 - src */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), - TYPE(SQ_EXPORT_PARAM), - RW_GPR(1), - RW_REL(ABSOLUTE), - INDEX_GPR(0), - ELEM_SIZE(0)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), - R6xx_ELEM_LOOP(0), - BURST_COUNT(1), - END_OF_PROGRAM(0), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_EXPORT), - WHOLE_QUAD_MODE(0), - BARRIER(0)); - /* 3 - mask */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), - TYPE(SQ_EXPORT_PARAM), - RW_GPR(0), - RW_REL(ABSOLUTE), - INDEX_GPR(0), - ELEM_SIZE(0)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), - R6xx_ELEM_LOOP(0), - BURST_COUNT(1), - END_OF_PROGRAM(1), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_EXPORT_DONE), - WHOLE_QUAD_MODE(0), - BARRIER(0)); - /* 4/5 - dst */ - shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), - FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), - FETCH_WHOLE_QUAD(0), - BUFFER_ID(0), - SRC_GPR(0), - SRC_REL(ABSOLUTE), - SRC_SEL_X(SQ_SEL_X), - MEGA_FETCH_COUNT(24)); - shader[i++] = VTX_DWORD1_GPR(DST_GPR(2), - DST_REL(0), - DST_SEL_X(SQ_SEL_X), - DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_0), - DST_SEL_W(SQ_SEL_1), - USE_CONST_FIELDS(0), - DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ - NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ - FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ - SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); - shader[i++] = VTX_DWORD2(OFFSET(0), - ENDIAN_SWAP(ENDIAN_NONE), - CONST_BUF_NO_STRIDE(0), - MEGA_FETCH(1)); - shader[i++] = VTX_DWORD_PAD; - /* 6/7 - src */ - shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), - FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), - FETCH_WHOLE_QUAD(0), - BUFFER_ID(0), - SRC_GPR(0), - SRC_REL(ABSOLUTE), - SRC_SEL_X(SQ_SEL_X), - MEGA_FETCH_COUNT(8)); - shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), - DST_REL(0), - DST_SEL_X(SQ_SEL_X), - DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_0), - DST_SEL_W(SQ_SEL_1), - USE_CONST_FIELDS(0), - DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ - NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ - FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ - SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); - shader[i++] = VTX_DWORD2(OFFSET(8), - ENDIAN_SWAP(ENDIAN_NONE), - CONST_BUF_NO_STRIDE(0), - MEGA_FETCH(0)); - shader[i++] = VTX_DWORD_PAD; - /* 8/9 - mask */ - shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), - FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), - FETCH_WHOLE_QUAD(0), - BUFFER_ID(0), - SRC_GPR(0), - SRC_REL(ABSOLUTE), - SRC_SEL_X(SQ_SEL_X), - MEGA_FETCH_COUNT(8)); - shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), - DST_REL(0), - DST_SEL_X(SQ_SEL_X), - DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_0), - DST_SEL_W(SQ_SEL_1), - USE_CONST_FIELDS(0), - DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ - NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ - FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ - SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); - shader[i++] = VTX_DWORD2(OFFSET(16), - ENDIAN_SWAP(ENDIAN_NONE), - CONST_BUF_NO_STRIDE(0), - MEGA_FETCH(0)); - shader[i++] = VTX_DWORD_PAD; - - return i; -} - /* comp mask ps --------------------------------------- */ int R600_comp_mask_ps(RADEONChipFamily ChipSet, uint32_t* shader, @@ -1627,7 +1477,197 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) int i = 0; /* 0 */ - shader[i++] = CF_DWORD0(ADDR(4)); + shader[i++] = CF_DWORD0(ADDR(3)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_BOOL), + I_COUNT(0), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_CALL), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 1 */ + shader[i++] = CF_DWORD0(ADDR(14)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_NOT_BOOL), + I_COUNT(0), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_CALL), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 2 */ + shader[i++] = CF_DWORD0(0); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + CALL_COUNT(0), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_NOP), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 3 - mask sub */ + shader[i++] = CF_DWORD0(ADDR(8)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(3), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_VTX), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 4 - dst */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), + TYPE(SQ_EXPORT_POS), + RW_GPR(2), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(1), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 5 - src */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(1), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(1), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 6 - mask */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1), + TYPE(SQ_EXPORT_PARAM), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(0)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(1), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 7 */ + shader[i++] = CF_DWORD0(ADDR(0)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 8/9 - dst */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(24)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(2), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(0), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(1)); + shader[i++] = VTX_DWORD_PAD; + /* 10/11 - src */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(1), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(8), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0)); + shader[i++] = VTX_DWORD_PAD; + /* 12/13 - mask */ + shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), + FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), + FETCH_WHOLE_QUAD(0), + BUFFER_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + SRC_SEL_X(SQ_SEL_X), + MEGA_FETCH_COUNT(8)); + shader[i++] = VTX_DWORD1_GPR(DST_GPR(0), + DST_REL(0), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_0), + DST_SEL_W(SQ_SEL_1), + USE_CONST_FIELDS(0), + DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */ + NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */ + FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */ + SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); + shader[i++] = VTX_DWORD2(OFFSET(16), + ENDIAN_SWAP(ENDIAN_NONE), + CONST_BUF_NO_STRIDE(0), + MEGA_FETCH(0)); + shader[i++] = VTX_DWORD_PAD; + + /* 14 - non-mask sub */ + shader[i++] = CF_DWORD0(ADDR(18)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -1638,7 +1678,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_VTX), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 1 - dst */ + /* 15 - dst */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0), TYPE(SQ_EXPORT_POS), RW_GPR(1), @@ -1656,7 +1696,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 2 - src */ + /* 16 - src */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0), TYPE(SQ_EXPORT_PARAM), RW_GPR(0), @@ -1669,15 +1709,24 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_W(SQ_SEL_W), R6xx_ELEM_LOOP(0), BURST_COUNT(0), - END_OF_PROGRAM(1), + END_OF_PROGRAM(0), VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(0)); - /* 3 */ - shader[i++] = 0x00000000; - shader[i++] = 0x00000000; - /* 4/5 - dst */ + /* 17 */ + shader[i++] = CF_DWORD0(ADDR(0)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 18/19 - dst */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), @@ -1702,7 +1751,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader) CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; - /* 6/7 - src */ + /* 20/21 - src */ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH), FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA), FETCH_WHOLE_QUAD(0), diff --git a/src/r600_state.h b/src/r600_state.h index 8e7334dc..6621420b 100644 --- a/src/r600_state.h +++ b/src/r600_state.h @@ -255,9 +255,7 @@ ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf); void set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf); void -set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, uint32_t *const_buf); -void -set_loop_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, uint32_t *const_buf); +set_bool_const(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val); void set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res); void diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index d208b076..56adc6d5 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -115,7 +115,6 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_sampler_t tex_samp; shader_config_t vs_conf, ps_conf; int uv_offset; - uint32_t bool_consts[1] = { 0 }; static float ps_alu_consts[] = { 1.0, 0.0, 1.4020, 0, /* r - c[0] */ 1.0, -0.34414, -0.71414, 0, /* g - c[1] */ @@ -169,15 +168,16 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + accel_state->xv_ps_offset; + /* PS bool constant */ switch(pPriv->id) { case FOURCC_YV12: case FOURCC_I420: - bool_consts[0] = 1; + set_bool_const(pScrn, accel_state->ib, 0, 1); break; case FOURCC_UYVY: case FOURCC_YUY2: default: - bool_consts[0] = 0; + set_bool_const(pScrn, accel_state->ib, 0, 0); break; } @@ -211,10 +211,6 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); - /* CF bool constants */ - set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, - sizeof(bool_consts) / SQ_BOOL_CONST_offset, bool_consts); - /* Texture */ switch(pPriv->id) { case FOURCC_YV12: diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c index aa2ab86b..114ccf50 100644 --- a/src/r6xx_accel.c +++ b/src/r6xx_accel.c @@ -433,26 +433,10 @@ set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *co } void -set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, uint32_t *const_buf) +set_bool_const(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val) { - int i; - const int countreg = count * (SQ_BOOL_CONST_offset >> 2); - - PACK0(ib, SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, countreg); - for (i = 0; i < countreg; i++) - E32(ib, const_buf[i]); - -} - -void -set_loop_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, uint32_t *const_buf) -{ - int i; - const int countreg = count * (SQ_LOOP_CONST_offset >> 2); - - PACK0(ib, SQ_LOOP_CONST + offset * SQ_LOOP_CONST_offset, countreg); - for (i = 0; i < countreg; i++) - E32(ib, const_buf[i]); + /* bool order is: ps, vs, gs, ps, vs, gs, ... */ + EREG(ib, SQ_BOOL_CONST_0 + (offset << 2), val); } void diff --git a/src/radeon.h b/src/radeon.h index 6fcc36ae..355a9494 100644 --- a/src/radeon.h +++ b/src/radeon.h @@ -623,7 +623,6 @@ struct radeon_accel_state { uint32_t copy_ps_offset; uint32_t comp_vs_offset; uint32_t comp_ps_offset; - uint32_t comp_mask_vs_offset; uint32_t comp_mask_ps_offset; uint32_t xv_vs_offset; uint32_t xv_ps_offset; |