summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/r600_exa.c204
-rw-r--r--src/r600_textured_videofuncs.c2
2 files changed, 99 insertions, 107 deletions
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 34e67d83..542d42d7 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -2841,7 +2841,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
// 2
ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
TYPE(SQ_EXPORT_PIXEL),
- RW_GPR(3),
+ RW_GPR(2),
RW_REL(ABSOLUTE),
INDEX_GPR(0),
ELEM_SIZE(3));
@@ -2856,96 +2856,88 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(1));
+ /* Undo scaling of Y'CbCr values
+ * Y' is scaled from 16:235
+ * Cb/Cr are scaled from 16:240
+ */
// 3 - alu 0
- // DP4 gpr[2].x gpr[1].x c[0].x
+ // MULADD gpr[1].x gpr[1].x c[3].x c[3].y
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(259),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(2),
+ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(1));
// 4 - alu 1
- // DP4 gpr[2].y gpr[1].y c[0].y
+ // MULADD gpr[1].y gpr[1].y c[3].z c[3].w
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(259),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(2),
+ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
- CLAMP(1));
+ CLAMP(0));
// 5 - alu 2
- // DP4 gpr[2].z gpr[1].z c[0].z
+ // MULADD gpr[1].z gpr[1].z c[3].z c[3].w
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(259),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(2),
+ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
- CLAMP(1));
+ CLAMP(0));
// 6 - alu 3
- // DP4 gpr[2].w gpr[1].w c[0].w
- ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ // MOV gpr[1].w 0.0
+ ps[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
+ SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(SQ_ALU_SRC_0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
@@ -2955,22 +2947,22 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(0),
+ WRITE_MASK(1),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_021),
- DST_GPR(2),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
- CLAMP(1));
+ CLAMP(0));
// 7 - alu 4
- // DP4 gpr[2].x gpr[1].x c[1].x
+ // DP4 gpr[2].x gpr[1].x c[0].x
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(256),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -2982,7 +2974,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(0),
+ WRITE_MASK(1),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_DOT4),
@@ -2992,12 +2984,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
DST_ELEM(ELEM_X),
CLAMP(1));
// 8 - alu 5
- // DP4 gpr[2].y gpr[1].y c[1].y
+ // DP4 gpr[2].y gpr[1].y c[0].y
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(256),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -3009,7 +3001,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(1),
+ WRITE_MASK(0),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_DOT4),
@@ -3019,12 +3011,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
DST_ELEM(ELEM_Y),
CLAMP(1));
// 9 - alu 6
- // DP4 gpr[2].z gpr[1].z c[1].z
+ // DP4 gpr[2].z gpr[1].z c[0].z
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(256),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
@@ -3046,12 +3038,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
DST_ELEM(ELEM_Z),
CLAMP(1));
// 10 - alu 7
- // DP4 gpr[2].w gpr[1].w c[1].w
+ // DP4 gpr[2].w gpr[1].w c[0].w
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(256),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -3073,12 +3065,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
DST_ELEM(ELEM_W),
CLAMP(1));
// 11 - alu 8
- // DP4 gpr[2].x gpr[1].x c[2].x
+ // DP4 gpr[2].x gpr[1].x c[1].x
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(257),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -3100,12 +3092,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
DST_ELEM(ELEM_X),
CLAMP(1));
// 12 - alu 9
- // DP4 gpr[2].y gpr[1].y c[2].y
+ // DP4 gpr[2].y gpr[1].y c[1].y
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(257),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -3117,7 +3109,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(0),
+ WRITE_MASK(1),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_DOT4),
@@ -3127,12 +3119,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
DST_ELEM(ELEM_Y),
CLAMP(1));
// 13 - alu 10
- // DP4 gpr[2].z gpr[1].z c[2].z
+ // DP4 gpr[2].z gpr[1].z c[1].z
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(257),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
@@ -3144,7 +3136,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(1),
+ WRITE_MASK(0),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_DOT4),
@@ -3154,12 +3146,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
DST_ELEM(ELEM_Z),
CLAMP(1));
// 14 - alu 11
- // DP4 gpr[2].w gpr[1].w c[2].w
+ // DP4 gpr[2].w gpr[1].w c[1].w
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(257),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -3181,12 +3173,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
DST_ELEM(ELEM_W),
CLAMP(1));
// 15 - alu 12
- // MOV gpr[3].x gpr[2].x
- ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+ // DP4 gpr[2].x gpr[1].x c[2].x
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(258),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -3198,24 +3190,24 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(1),
+ WRITE_MASK(0),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MOV),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(3),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
- CLAMP(0));
+ CLAMP(1));
// 16 - alu 13
- // MOV gpr[3].y gpr[2].y
- ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+ // DP4 gpr[2].y gpr[1].y c[2].y
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(258),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
@@ -3225,24 +3217,24 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(1),
+ WRITE_MASK(0),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MOV),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(3),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
- CLAMP(0));
+ CLAMP(1));
// 17 - alu 14
- // MOV gpr[3].z gpr[2].z
- ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+ // DP4 gpr[2].z gpr[1].z c[2].z
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(258),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
@@ -3255,21 +3247,21 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
WRITE_MASK(1),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MOV),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(3),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
- CLAMP(0));
+ CLAMP(1));
// 18 - alu 15
- // MOV gpr[3].w gpr[2].w
- ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+ // DP4 gpr[2].w gpr[1].w c[2].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(258),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
@@ -3279,15 +3271,15 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(1),
+ WRITE_MASK(0),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MOV),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(3),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_021),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
- CLAMP(0));
+ CLAMP(1));
// 19 - alignment
ps[i++] = 0x00000000;
ps[i++] = 0x00000000;
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 4a7391c8..c06512a5 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -202,7 +202,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
accel_state->ps_size, accel_state->ps_mc_addr);
ps_conf.shader_addr = accel_state->ps_mc_addr;
- ps_conf.num_gprs = 4;
+ ps_conf.num_gprs = 3;
ps_conf.stack_size = 0;
ps_conf.uncached_first_inst = 1;
ps_conf.clamp_consts = 0;