diff options
-rw-r--r-- | src/r600_exa.c | 204 | ||||
-rw-r--r-- | src/r600_textured_videofuncs.c | 2 |
2 files changed, 99 insertions, 107 deletions
diff --git a/src/r600_exa.c b/src/r600_exa.c index 34e67d83..542d42d7 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -2841,7 +2841,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) // 2 ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), TYPE(SQ_EXPORT_PIXEL), - RW_GPR(3), + RW_GPR(2), RW_REL(ABSOLUTE), INDEX_GPR(0), ELEM_SIZE(3)); @@ -2856,96 +2856,88 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(1)); + /* Undo scaling of Y'CbCr values + * Y' is scaled from 16:235 + * Cb/Cr are scaled from 16:240 + */ // 3 - alu 0 - // DP4 gpr[2].x gpr[1].x c[0].x + // MULADD gpr[1].x gpr[1].x c[3].x c[3].y ps[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(256), + SRC1_SEL(259), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), - DST_GPR(2), + ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Y), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(1)); // 4 - alu 1 - // DP4 gpr[2].y gpr[1].y c[0].y + // MULADD gpr[1].y gpr[1].y c[3].z c[3].w ps[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(256), + SRC1_SEL(259), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), + SRC1_ELEM(ELEM_Z), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), - DST_GPR(2), + ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_W), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), - CLAMP(1)); + CLAMP(0)); // 5 - alu 2 - // DP4 gpr[2].z gpr[1].z c[0].z + // MULADD gpr[1].z gpr[1].z c[3].z c[3].w ps[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(256), + SRC1_SEL(259), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Z), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - ps[i++] = ALU_DWORD1_OP2(info->ChipFamily, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), - DST_GPR(2), + ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_W), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), - CLAMP(1)); + CLAMP(0)); // 6 - alu 3 - // DP4 gpr[2].w gpr[1].w c[0].w - ps[i++] = ALU_DWORD0(SRC0_SEL(1), + // MOV gpr[1].w 0.0 + ps[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), + SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(256), + SRC1_SEL(SQ_ALU_SRC_0), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), @@ -2955,22 +2947,22 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) SRC1_ABS(0), UPDATE_EXECUTE_MASK(0), UPDATE_PRED(0), - WRITE_MASK(0), + WRITE_MASK(1), FOG_MERGE(0), OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_021), - DST_GPR(2), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), - CLAMP(1)); + CLAMP(0)); // 7 - alu 4 - // DP4 gpr[2].x gpr[1].x c[1].x + // DP4 gpr[2].x gpr[1].x c[0].x ps[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(257), + SRC1_SEL(256), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2982,7 +2974,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) SRC1_ABS(0), UPDATE_EXECUTE_MASK(0), UPDATE_PRED(0), - WRITE_MASK(0), + WRITE_MASK(1), FOG_MERGE(0), OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_DOT4), @@ -2992,12 +2984,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) DST_ELEM(ELEM_X), CLAMP(1)); // 8 - alu 5 - // DP4 gpr[2].y gpr[1].y c[1].y + // DP4 gpr[2].y gpr[1].y c[0].y ps[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(257), + SRC1_SEL(256), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Y), SRC1_NEG(0), @@ -3009,7 +3001,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) SRC1_ABS(0), UPDATE_EXECUTE_MASK(0), UPDATE_PRED(0), - WRITE_MASK(1), + WRITE_MASK(0), FOG_MERGE(0), OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_DOT4), @@ -3019,12 +3011,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) DST_ELEM(ELEM_Y), CLAMP(1)); // 9 - alu 6 - // DP4 gpr[2].z gpr[1].z c[1].z + // DP4 gpr[2].z gpr[1].z c[0].z ps[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(257), + SRC1_SEL(256), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Z), SRC1_NEG(0), @@ -3046,12 +3038,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) DST_ELEM(ELEM_Z), CLAMP(1)); // 10 - alu 7 - // DP4 gpr[2].w gpr[1].w c[1].w + // DP4 gpr[2].w gpr[1].w c[0].w ps[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(257), + SRC1_SEL(256), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_W), SRC1_NEG(0), @@ -3073,12 +3065,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) DST_ELEM(ELEM_W), CLAMP(1)); // 11 - alu 8 - // DP4 gpr[2].x gpr[1].x c[2].x + // DP4 gpr[2].x gpr[1].x c[1].x ps[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(258), + SRC1_SEL(257), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -3100,12 +3092,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) DST_ELEM(ELEM_X), CLAMP(1)); // 12 - alu 9 - // DP4 gpr[2].y gpr[1].y c[2].y + // DP4 gpr[2].y gpr[1].y c[1].y ps[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(258), + SRC1_SEL(257), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Y), SRC1_NEG(0), @@ -3117,7 +3109,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) SRC1_ABS(0), UPDATE_EXECUTE_MASK(0), UPDATE_PRED(0), - WRITE_MASK(0), + WRITE_MASK(1), FOG_MERGE(0), OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_DOT4), @@ -3127,12 +3119,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) DST_ELEM(ELEM_Y), CLAMP(1)); // 13 - alu 10 - // DP4 gpr[2].z gpr[1].z c[2].z + // DP4 gpr[2].z gpr[1].z c[1].z ps[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(258), + SRC1_SEL(257), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Z), SRC1_NEG(0), @@ -3144,7 +3136,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) SRC1_ABS(0), UPDATE_EXECUTE_MASK(0), UPDATE_PRED(0), - WRITE_MASK(1), + WRITE_MASK(0), FOG_MERGE(0), OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_DOT4), @@ -3154,12 +3146,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) DST_ELEM(ELEM_Z), CLAMP(1)); // 14 - alu 11 - // DP4 gpr[2].w gpr[1].w c[2].w + // DP4 gpr[2].w gpr[1].w c[1].w ps[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(258), + SRC1_SEL(257), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_W), SRC1_NEG(0), @@ -3181,12 +3173,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) DST_ELEM(ELEM_W), CLAMP(1)); // 15 - alu 12 - // MOV gpr[3].x gpr[2].x - ps[i++] = ALU_DWORD0(SRC0_SEL(2), + // DP4 gpr[2].x gpr[1].x c[2].x + ps[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(0), + SRC1_SEL(258), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -3198,24 +3190,24 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) SRC1_ABS(0), UPDATE_EXECUTE_MASK(0), UPDATE_PRED(0), - WRITE_MASK(1), + WRITE_MASK(0), FOG_MERGE(0), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MOV), - BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(3), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), - CLAMP(0)); + CLAMP(1)); // 16 - alu 13 - // MOV gpr[3].y gpr[2].y - ps[i++] = ALU_DWORD0(SRC0_SEL(2), + // DP4 gpr[2].y gpr[1].y c[2].y + ps[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(0), + SRC1_SEL(258), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_Y), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), @@ -3225,24 +3217,24 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) SRC1_ABS(0), UPDATE_EXECUTE_MASK(0), UPDATE_PRED(0), - WRITE_MASK(1), + WRITE_MASK(0), FOG_MERGE(0), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MOV), - BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(3), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), - CLAMP(0)); + CLAMP(1)); // 17 - alu 14 - // MOV gpr[3].z gpr[2].z - ps[i++] = ALU_DWORD0(SRC0_SEL(2), + // DP4 gpr[2].z gpr[1].z c[2].z + ps[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(0), + SRC1_SEL(258), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_Z), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), @@ -3255,21 +3247,21 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) WRITE_MASK(1), FOG_MERGE(0), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MOV), - BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(3), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_102), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), - CLAMP(0)); + CLAMP(1)); // 18 - alu 15 - // MOV gpr[3].w gpr[2].w - ps[i++] = ALU_DWORD0(SRC0_SEL(2), + // DP4 gpr[2].w gpr[1].w c[2].w + ps[i++] = ALU_DWORD0(SRC0_SEL(1), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(0), + SRC1_SEL(258), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_W), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), @@ -3279,15 +3271,15 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) SRC1_ABS(0), UPDATE_EXECUTE_MASK(0), UPDATE_PRED(0), - WRITE_MASK(1), + WRITE_MASK(0), FOG_MERGE(0), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MOV), - BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(3), + ALU_INST(SQ_OP2_INST_DOT4), + BANK_SWIZZLE(SQ_ALU_VEC_021), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), - CLAMP(0)); + CLAMP(1)); // 19 - alignment ps[i++] = 0x00000000; ps[i++] = 0x00000000; diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index 4a7391c8..c06512a5 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -202,7 +202,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) accel_state->ps_size, accel_state->ps_mc_addr); ps_conf.shader_addr = accel_state->ps_mc_addr; - ps_conf.num_gprs = 4; + ps_conf.num_gprs = 3; ps_conf.stack_size = 0; ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; |