summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex Deucher <alexdeucher@gmail.com>2009-03-02 02:28:57 -0500
committerAlex Deucher <alexdeucher@gmail.com>2009-03-02 02:28:57 -0500
commitfa98f424de739be2c6005b740a74bbf1ee968a8b (patch)
tree70af76e5cac9e4b6909b19d514dea93c05419a89 /src
parentccde35c3eda3fff0de29eb8c6fdc392629724a34 (diff)
R6xx/R7xx Xv: combine packed and planar shaders
use a bool const to select the tex fetch routine
Diffstat (limited to 'src')
-rw-r--r--src/r600_exa.c13
-rw-r--r--src/r600_reg_r6xx.h9
-rw-r--r--src/r600_shader.c651
-rw-r--r--src/r600_shader.h3
-rw-r--r--src/r600_state.h4
-rw-r--r--src/r600_textured_videofuncs.c20
-rw-r--r--src/r6xx_accel.c23
-rw-r--r--src/radeon.h3
8 files changed, 171 insertions, 555 deletions
diff --git a/src/r600_exa.c b/src/r600_exa.c
index a44b6118..70c59b22 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -246,7 +246,8 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
ps_alu_consts[2] = (float)b / 255; /* B */
ps_alu_consts[3] = (float)a / 255; /* A */
}
- set_alu_consts(pScrn, accel_state->ib, 0, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
+ set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
+ sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
accel_state->vb_index = 0;
@@ -2027,13 +2028,9 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
accel_state->xv_vs_offset = 4096;
R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
- /* xv ps packed --------------------------------------- */
- accel_state->xv_ps_offset_packed = 4608;
- R600_xv_ps_packet(ChipSet, shader + accel_state->xv_ps_offset_packed / 4);
-
- /* xv ps planar ---------------------------------- */
- accel_state->xv_ps_offset_planar = 5120;
- R600_xv_ps_planar(ChipSet, shader + accel_state->xv_ps_offset_planar / 4);
+ /* xv ps --------------------------------------- */
+ accel_state->xv_ps_offset = 4608;
+ R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
return TRUE;
}
diff --git a/src/r600_reg_r6xx.h b/src/r600_reg_r6xx.h
index 2e7dfa94..2c9113ea 100644
--- a/src/r600_reg_r6xx.h
+++ b/src/r600_reg_r6xx.h
@@ -488,6 +488,15 @@ enum {
SQ_LOOP_CONST_ps = 0,
SQ_LOOP_CONST_vs = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num,
SQ_LOOP_CONST_gs = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num,
+ SQ_BOOL_CONST = SQ_BOOL_CONST_0, /* 32 per PS, VS, GS */
+ SQ_BOOL_CONST_ps_num = 32,
+ SQ_BOOL_CONST_vs_num = 32,
+ SQ_BOOL_CONST_gs_num = 32,
+ SQ_BOOL_CONST_all_num = 96,
+ SQ_BOOL_CONST_offset = 4,
+ SQ_BOOL_CONST_ps = 0,
+ SQ_BOOL_CONST_vs = SQ_BOOL_CONST_ps + SQ_BOOL_CONST_ps_num,
+ SQ_BOOL_CONST_gs = SQ_BOOL_CONST_vs + SQ_BOOL_CONST_vs_num,
} ;
diff --git a/src/r600_shader.c b/src/r600_shader.c
index ba716da3..c5522f95 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -561,31 +561,35 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
}
/*
- * ; xv ps packed
- * 00 TEX: ADDR(20) CNT(2) NO_BARRIER
+ * ; xv ps planar
+ * 00 TEX: ADDR(20) CNT(3) NO_BARRIER
+ * 0 SAMPLE R1.x__1, R0.xy01, t0, s0
+ * 1 SAMPLE R1.__x_, R0.xy01, t1, s1
+ * 2 SAMPLE R1._x__, R0.xy01, t2, s2
+ * 01 TEX: ADDR(28) CNT(2) NO_BARRIER
* 0 SAMPLE R1.x__1, R0.xy01, t0, s0
* 1 SAMPLE R1._xy_, R0.xy01, t1, s1
- * 01 ALU: ADDR(3) CNT(16)
- * 2 x: MULADD R1.x, R1.x, C3.x, C3.y CLAMP
+ * 02 ALU: ADDR(4) CNT(16)
+ * 3 x: MULADD R1.x, R1.x, C3.x, C3.y CLAMP
* y: MULADD R1.y, R1.y, C3.z, C3.w
* z: MULADD R1.z, R1.z, C3.z, C3.w
- * w: MOV R1.w, 0.0f
- * 3 x: DOT4 R2.x, R1.x, C0.x CLAMP VEC_102
+ * w: MOV R1.w, 0.0f
+ * 4 x: DOT4 R2.x, R1.x, C0.x CLAMP VEC_102
* y: DOT4 ____, R1.y, C0.y CLAMP VEC_102
* z: DOT4 ____, R1.z, C0.z CLAMP VEC_102
* w: DOT4 ____, R1.w, C0.w CLAMP VEC_021
- * 4 x: DOT4 ____, R1.x, C1.x CLAMP VEC_102
+ * 5 x: DOT4 ____, R1.x, C1.x CLAMP VEC_102
* y: DOT4 R2.y, R1.y, C1.y CLAMP VEC_102
* z: DOT4 ____, R1.z, C1.z CLAMP VEC_102
* w: DOT4 ____, R1.w, C1.w CLAMP VEC_021
- * 5 x: DOT4 ____, R1.x, C2.x CLAMP VEC_102
+ * 6 x: DOT4 ____, R1.x, C2.x CLAMP VEC_102
* y: DOT4 ____, R1.y, C2.y CLAMP VEC_102
* z: DOT4 R2.z, R1.z, C2.z CLAMP VEC_102
* w: DOT4 ____, R1.w, C2.w CLAMP VEC_021
- * 02 EXP_DONE: PIX0, R2
+ * 03 EXP_DONE: PIX0, R2
* END_OF_PROGRAM
*/
-int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
+int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
{
int i = 0;
@@ -593,16 +597,28 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = CF_DWORD0(ADDR(20));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
- COND(SQ_CF_COND_ACTIVE),
- I_COUNT(2),
+ COND(SQ_CF_COND_BOOL),
+ I_COUNT(0),
CALL_COUNT(0),
END_OF_PROGRAM(0),
VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_TEX),
+ CF_INST(SQ_CF_INST_CALL),
WHOLE_QUAD_MODE(0),
BARRIER(0));
/* 1 */
- shader[i++] = CF_ALU_DWORD0(ADDR(3),
+ shader[i++] = CF_DWORD0(ADDR(28));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_NOT_BOOL),
+ I_COUNT(0),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 2 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(4),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_NOP));
@@ -614,7 +630,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_ALU),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 2 */
+ /* 3 */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
TYPE(SQ_EXPORT_PIXEL),
RW_GPR(2),
@@ -632,7 +648,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 3 */
+ /* 4 */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -654,7 +670,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(1));
- /* 4 */
+ /* 5 */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
@@ -676,7 +692,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 5 */
+ /* 6 */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
@@ -698,7 +714,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 6 */
+ /* 7 */
shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -724,7 +740,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 7 */
+ /* 8 */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -750,7 +766,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(1));
- /* 8 */
+ /* 9 */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
@@ -776,7 +792,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(1));
- /* 9 */
+ /* 10 */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
@@ -802,7 +818,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(1));
- /* 10 */
+ /* 11 */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
@@ -828,7 +844,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(1));
- /* 11 */
+ /* 12 */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -854,7 +870,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(1));
- /* 12 */
+ /* 13 */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
@@ -880,7 +896,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(1));
- /* 13 */
+ /* 14 */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
@@ -906,7 +922,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(1));
- /* 14 */
+ /* 15 */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
@@ -932,7 +948,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(1));
- /* 15 */
+ /* 16 */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -958,7 +974,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(1));
- /* 16 */
+ /* 17 */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
@@ -984,7 +1000,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(1));
- /* 17 */
+ /* 18 */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
@@ -1010,7 +1026,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(1));
- /* 18 */
+ /* 19 */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
@@ -1036,9 +1052,31 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
CLAMP(1));
- shader[i++] = 0x00000000;
- shader[i++] = 0x00000000;
- /* 20/21 */
+ /* 20 */
+ shader[i++] = CF_DWORD0(ADDR(22));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(3),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TEX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 21 */
+ shader[i++] = CF_DWORD0(ADDR(0));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 22/23 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -1066,7 +1104,7 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 22/23 */
+ /* 24/25 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
@@ -1077,8 +1115,8 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = TEX_DWORD1(DST_GPR(1),
DST_REL(ABSOLUTE),
DST_SEL_X(SQ_SEL_MASK),
- DST_SEL_Y(SQ_SEL_X),
- DST_SEL_Z(SQ_SEL_Y),
+ DST_SEL_Y(SQ_SEL_MASK),
+ DST_SEL_Z(SQ_SEL_X),
DST_SEL_W(SQ_SEL_MASK),
LOD_BIAS(0),
COORD_TYPE_X(TEX_NORMALIZED),
@@ -1094,503 +1132,20 @@ int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
-
- return i;
-}
-
-/*
- * ; xv ps planar
- * 00 TEX: ADDR(20) CNT(3) NO_BARRIER
- * 0 SAMPLE R1.x__1, R0.xy01, t0, s0
- * 1 SAMPLE R1.__x_, R0.xy01, t1, s1
- * 2 SAMPLE R1._x__, R0.xy01, t2, s2
- * 01 ALU: ADDR(3) CNT(16)
- * 3 x: MULADD R1.x, R1.x, C3.x, C3.y CLAMP
- * y: MULADD R1.y, R1.y, C3.z, C3.w
- * z: MULADD R1.z, R1.z, C3.z, C3.w
- * w: MOV R1.w, 0.0f
- * 4 x: DOT4 R2.x, R1.x, C0.x CLAMP VEC_102
- * y: DOT4 ____, R1.y, C0.y CLAMP VEC_102
- * z: DOT4 ____, R1.z, C0.z CLAMP VEC_102
- * w: DOT4 ____, R1.w, C0.w CLAMP VEC_021
- * 5 x: DOT4 ____, R1.x, C1.x CLAMP VEC_102
- * y: DOT4 R2.y, R1.y, C1.y CLAMP VEC_102
- * z: DOT4 ____, R1.z, C1.z CLAMP VEC_102
- * w: DOT4 ____, R1.w, C1.w CLAMP VEC_021
- * 6 x: DOT4 ____, R1.x, C2.x CLAMP VEC_102
- * y: DOT4 ____, R1.y, C2.y CLAMP VEC_102
- * z: DOT4 R2.z, R1.z, C2.z CLAMP VEC_102
- * w: DOT4 ____, R1.w, C2.w CLAMP VEC_021
- * 02 EXP_DONE: PIX0, R2
- * END_OF_PROGRAM
- */
-int R600_xv_ps_planar(RADEONChipFamily ChipSet, uint32_t* shader)
-{
- int i=0;
-
- /* 0 */
- shader[i++] = CF_DWORD0(ADDR(20));
- shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
- COND(SQ_CF_COND_ACTIVE),
- I_COUNT(3),
- CALL_COUNT(0),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_TEX),
- WHOLE_QUAD_MODE(0),
- BARRIER(0));
- /* 1 */
- shader[i++] = CF_ALU_DWORD0(ADDR(3),
- KCACHE_BANK0(0),
- KCACHE_BANK1(0),
- KCACHE_MODE0(SQ_CF_KCACHE_NOP));
- shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
- KCACHE_ADDR0(0),
- KCACHE_ADDR1(0),
- I_COUNT(16),
- USES_WATERFALL(0),
- CF_INST(SQ_CF_INST_ALU),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
- /* 2 */
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
- TYPE(SQ_EXPORT_PIXEL),
- RW_GPR(2),
- RW_REL(ABSOLUTE),
- INDEX_GPR(0),
- ELEM_SIZE(3));
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
- R6xx_ELEM_LOOP(0),
- BURST_COUNT(1),
- END_OF_PROGRAM(1),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
- /* 3 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(259),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_Y),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(1));
- /* 4 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(259),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_W),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(0));
- /* 5 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Z),
- SRC0_NEG(0),
- SRC1_SEL(259),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
- SRC2_REL(ABSOLUTE),
- SRC2_ELEM(ELEM_W),
- SRC2_NEG(0),
- ALU_INST(SQ_OP3_INST_MULADD),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Z),
- CLAMP(0));
- /* 6 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(SQ_ALU_SRC_0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MOV),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(1),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_W),
- CLAMP(0));
- /* 7 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(256),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(1));
- /* 8 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(256),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(1));
- /* 9 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Z),
- SRC0_NEG(0),
- SRC1_SEL(256),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Z),
- CLAMP(1));
- /* 10 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
- SRC0_NEG(0),
- SRC1_SEL(256),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_021),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_W),
- CLAMP(1));
- /* 11 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(257),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(1));
- /* 12 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(257),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(1));
- /* 13 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Z),
- SRC0_NEG(0),
- SRC1_SEL(257),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Z),
- CLAMP(1));
- /* 14 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
- SRC0_NEG(0),
- SRC1_SEL(257),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_021),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_W),
- CLAMP(1));
- /* 15 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(258),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(1));
- /* 16 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(258),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(1));
- /* 17 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Z),
- SRC0_NEG(0),
- SRC1_SEL(258),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Z),
- CLAMP(1));
- /* 18 */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
- SRC0_NEG(0),
- SRC1_SEL(258),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_021),
- DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_W),
- CLAMP(1));
- shader[i++] = 0x00000000;
- shader[i++] = 0x00000000;
- /* 20/21 */
+ /* 26/27 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(0),
+ RESOURCE_ID(2),
SRC_GPR(0),
SRC_REL(ABSOLUTE),
R7xx_ALT_CONST(0));
shader[i++] = TEX_DWORD1(DST_GPR(1),
DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_MASK),
+ DST_SEL_X(SQ_SEL_MASK),
+ DST_SEL_Y(SQ_SEL_X),
DST_SEL_Z(SQ_SEL_MASK),
- DST_SEL_W(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_MASK),
LOD_BIAS(0),
COORD_TYPE_X(TEX_NORMALIZED),
COORD_TYPE_Y(TEX_NORMALIZED),
@@ -1599,26 +1154,50 @@ int R600_xv_ps_planar(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = TEX_DWORD2(OFFSET_X(0),
OFFSET_Y(0),
OFFSET_Z(0),
- SAMPLER_ID(0),
+ SAMPLER_ID(2),
SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 22/23 */
+ /* 28 */
+ shader[i++] = CF_DWORD0(ADDR(30));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TEX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 29 */
+ shader[i++] = CF_DWORD0(ADDR(0));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 30/31 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(1),
+ RESOURCE_ID(0),
SRC_GPR(0),
SRC_REL(ABSOLUTE),
R7xx_ALT_CONST(0));
shader[i++] = TEX_DWORD1(DST_GPR(1),
DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_MASK),
+ DST_SEL_X(SQ_SEL_X),
DST_SEL_Y(SQ_SEL_MASK),
- DST_SEL_Z(SQ_SEL_X),
- DST_SEL_W(SQ_SEL_MASK),
+ DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_W(SQ_SEL_1),
LOD_BIAS(0),
COORD_TYPE_X(TEX_NORMALIZED),
COORD_TYPE_Y(TEX_NORMALIZED),
@@ -1627,17 +1206,17 @@ int R600_xv_ps_planar(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = TEX_DWORD2(OFFSET_X(0),
OFFSET_Y(0),
OFFSET_Z(0),
- SAMPLER_ID(1),
+ SAMPLER_ID(0),
SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 24/25 */
+ /* 32/33 */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(2),
+ RESOURCE_ID(1),
SRC_GPR(0),
SRC_REL(ABSOLUTE),
R7xx_ALT_CONST(0));
@@ -1645,7 +1224,7 @@ int R600_xv_ps_planar(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_SEL_X(SQ_SEL_MASK),
DST_SEL_Y(SQ_SEL_X),
- DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_Z(SQ_SEL_Y),
DST_SEL_W(SQ_SEL_MASK),
LOD_BIAS(0),
COORD_TYPE_X(TEX_NORMALIZED),
@@ -1655,7 +1234,7 @@ int R600_xv_ps_planar(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = TEX_DWORD2(OFFSET_X(0),
OFFSET_Y(0),
OFFSET_Z(0),
- SAMPLER_ID(2),
+ SAMPLER_ID(1),
SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
SRC_SEL_Z(SQ_SEL_0),
diff --git a/src/r600_shader.h b/src/r600_shader.h
index 7333d0ba..ffb50c34 100644
--- a/src/r600_shader.h
+++ b/src/r600_shader.h
@@ -350,8 +350,7 @@ extern int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* vs);
extern int R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* ps);
extern int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader);
-extern int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader);
-extern int R600_xv_ps_planar(RADEONChipFamily ChipSet, uint32_t* shader);
+extern int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader);
extern int R600_comp_mask_vs(RADEONChipFamily ChipSet, uint32_t* vs);
extern int R600_comp_mask_ps(RADEONChipFamily ChipSet,
diff --git a/src/r600_state.h b/src/r600_state.h
index e3f491bd..8e7334dc 100644
--- a/src/r600_state.h
+++ b/src/r600_state.h
@@ -255,6 +255,10 @@ ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf);
void
set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf);
void
+set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, uint32_t *const_buf);
+void
+set_loop_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, uint32_t *const_buf);
+void
set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res);
void
set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res);
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index bf98ec78..d208b076 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -115,7 +115,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
tex_sampler_t tex_samp;
shader_config_t vs_conf, ps_conf;
int uv_offset;
-
+ uint32_t bool_consts[1] = { 0 };
static float ps_alu_consts[] = {
1.0, 0.0, 1.4020, 0, /* r - c[0] */
1.0, -0.34414, -0.71414, 0, /* g - c[1] */
@@ -166,17 +166,18 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
accel_state->xv_vs_offset;
+ accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
+ accel_state->xv_ps_offset;
+
switch(pPriv->id) {
case FOURCC_YV12:
case FOURCC_I420:
- accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
- accel_state->xv_ps_offset_planar;
+ bool_consts[0] = 1;
break;
case FOURCC_UYVY:
case FOURCC_YUY2:
default:
- accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
- accel_state->xv_ps_offset_packed;
+ bool_consts[0] = 0;
break;
}
@@ -200,14 +201,19 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
ps_conf.shader_addr = accel_state->ps_mc_addr;
ps_conf.num_gprs = 3;
- ps_conf.stack_size = 0;
+ ps_conf.stack_size = 1;
ps_conf.uncached_first_inst = 1;
ps_conf.clamp_consts = 0;
ps_conf.export_mode = 2;
ps_setup (pScrn, accel_state->ib, &ps_conf);
/* PS alu constants */
- set_alu_consts(pScrn, accel_state->ib, 0, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
+ set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
+ sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
+
+ /* CF bool constants */
+ set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps,
+ sizeof(bool_consts) / SQ_BOOL_CONST_offset, bool_consts);
/* Texture */
switch(pPriv->id) {
diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c
index 93c3ae37..aa2ab86b 100644
--- a/src/r6xx_accel.c
+++ b/src/r6xx_accel.c
@@ -433,6 +433,29 @@ set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *co
}
void
+set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, uint32_t *const_buf)
+{
+ int i;
+ const int countreg = count * (SQ_BOOL_CONST_offset >> 2);
+
+ PACK0(ib, SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, countreg);
+ for (i = 0; i < countreg; i++)
+ E32(ib, const_buf[i]);
+
+}
+
+void
+set_loop_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, uint32_t *const_buf)
+{
+ int i;
+ const int countreg = count * (SQ_LOOP_CONST_offset >> 2);
+
+ PACK0(ib, SQ_LOOP_CONST + offset * SQ_LOOP_CONST_offset, countreg);
+ for (i = 0; i < countreg; i++)
+ E32(ib, const_buf[i]);
+}
+
+void
set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res)
{
uint32_t sq_vtx_constant_word2;
diff --git a/src/radeon.h b/src/radeon.h
index 0eea7c13..6fcc36ae 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -626,8 +626,7 @@ struct radeon_accel_state {
uint32_t comp_mask_vs_offset;
uint32_t comp_mask_ps_offset;
uint32_t xv_vs_offset;
- uint32_t xv_ps_offset_packed;
- uint32_t xv_ps_offset_planar;
+ uint32_t xv_ps_offset;
//size/addr stuff
uint32_t src_size[2];