From adf0912006b4f1597784dbfcc563d5c6d1c5667d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 14 Apr 2009 16:16:33 -0400 Subject: R6xx/R7xx: implement Xv attributes - brightness, contrast, hue, etc. - TODO: implement gamma --- src/r600_shader.c | 440 +++++++++++++---------------------------- src/r600_textured_videofuncs.c | 106 ++++++++-- src/radeon_textured_video.c | 13 +- src/radeon_video.c | 6 - src/radeon_video.h | 6 + 5 files changed, 244 insertions(+), 327 deletions(-) (limited to 'src') diff --git a/src/r600_shader.c b/src/r600_shader.c index addba36f..0a820cf3 100644 --- a/src/r600_shader.c +++ b/src/r600_shader.c @@ -560,41 +560,12 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader) return i; } -/* - * ; xv ps planar - * 00 TEX: ADDR(20) CNT(3) NO_BARRIER - * 0 SAMPLE R1.x__1, R0.xy01, t0, s0 - * 1 SAMPLE R1.__x_, R0.xy01, t1, s1 - * 2 SAMPLE R1._x__, R0.xy01, t2, s2 - * 01 TEX: ADDR(28) CNT(2) NO_BARRIER - * 0 SAMPLE R1.x__1, R0.xy01, t0, s0 - * 1 SAMPLE R1._xy_, R0.xy01, t1, s1 - * 02 ALU: ADDR(4) CNT(16) - * 3 x: MULADD R1.x, R1.x, C3.x, C3.y CLAMP - * y: MULADD R1.y, R1.y, C3.z, C3.w - * z: MULADD R1.z, R1.z, C3.z, C3.w - * w: MOV R1.w, 0.0f - * 4 x: DOT4 R2.x, R1.x, C0.x CLAMP VEC_102 - * y: DOT4 ____, R1.y, C0.y CLAMP VEC_102 - * z: DOT4 ____, R1.z, C0.z CLAMP VEC_102 - * w: DOT4 ____, R1.w, C0.w CLAMP VEC_021 - * 5 x: DOT4 ____, R1.x, C1.x CLAMP VEC_102 - * y: DOT4 R2.y, R1.y, C1.y CLAMP VEC_102 - * z: DOT4 ____, R1.z, C1.z CLAMP VEC_102 - * w: DOT4 ____, R1.w, C1.w CLAMP VEC_021 - * 6 x: DOT4 ____, R1.x, C2.x CLAMP VEC_102 - * y: DOT4 ____, R1.y, C2.y CLAMP VEC_102 - * z: DOT4 R2.z, R1.z, C2.z CLAMP VEC_102 - * w: DOT4 ____, R1.w, C2.w CLAMP VEC_021 - * 03 EXP_DONE: PIX0, R2 - * END_OF_PROGRAM - */ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) { int i = 0; /* 0 */ - shader[i++] = CF_DWORD0(ADDR(20)); + shader[i++] = CF_DWORD0(ADDR(16)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_BOOL), @@ -606,7 +577,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(0)); /* 1 */ - shader[i++] = CF_DWORD0(ADDR(28)); + shader[i++] = CF_DWORD0(ADDR(24)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_NOT_BOOL), @@ -625,7 +596,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), KCACHE_ADDR0(0), KCACHE_ADDR1(0), - I_COUNT(16), + I_COUNT(12), USES_WATERFALL(0), CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), @@ -648,73 +619,74 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 4 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + /* 4,5,6,7 */ + /* r2.x = MAD(c0.w, r1.x, c0.x) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(256), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), + SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(259), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_Y), + SRC2_ELEM(ELEM_X), SRC2_NEG(0), ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(1), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), - CLAMP(1)); - /* 5 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + CLAMP(0)); + /* r2.y = MAD(c0.w, r1.x, c0.y) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(256), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), + SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(259), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_W), + SRC2_ELEM(ELEM_Y), SRC2_NEG(0), ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(1), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(0)); - /* 6 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + /* r2.z = MAD(c0.w, r1.x, c0.z) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(256), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Z), + SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(259), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256), SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_W), + SRC2_ELEM(ELEM_Z), SRC2_NEG(0), ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(1), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(0)); - /* 7 */ + /* r2.w = MAD(0, 0, 1) */ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), @@ -726,334 +698,198 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MOV), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(1), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(0)); - /* 8 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + + /* 8,9,10,11 */ + /* r2.x = MAD(c1.x, r1.y, pv.x) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(257), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(256), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_Y), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), - CLAMP(1)); - /* 9 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + CLAMP(0)); + /* r2.y = MAD(c1.y, r1.y, pv.y) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(257), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(256), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Y), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), - DST_GPR(0), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Y), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), - CLAMP(1)); - /* 10 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + CLAMP(0)); + /* r2.z = MAD(c1.z, r1.y, pv.z) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(257), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(256), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), - DST_GPR(0), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Z), - CLAMP(1)); - /* 11 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), - SRC0_NEG(0), - SRC1_SEL(256), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_021), - DST_GPR(0), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_W), - CLAMP(1)); - /* 12 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), - SRC0_NEG(0), - SRC1_SEL(257), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), - DST_GPR(0), - DST_REL(ABSOLUTE), - DST_ELEM(ELEM_X), - CLAMP(1)); - /* 13 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), - SRC0_NEG(0), - SRC1_SEL(257), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Y), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), DST_GPR(2), DST_REL(ABSOLUTE), - DST_ELEM(ELEM_Y), - CLAMP(1)); - /* 14 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), - SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Z), - SRC0_NEG(0), - SRC1_SEL(257), - SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), - SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), - PRED_SEL(SQ_PRED_SEL_OFF), - LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), - DST_GPR(0), - DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), - CLAMP(1)); - /* 15 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + CLAMP(0)); + /* r2.w = MAD(0, 0, 1) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), + SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(257), + SRC1_SEL(SQ_ALU_SRC_0), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_021), - DST_GPR(0), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_W), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), - CLAMP(1)); - /* 16 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + CLAMP(0)); + /* 12,13,14,15 */ + /* r2.x = MAD(c2.x, r1.z, pv.x) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(258), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(258), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_Z), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), - DST_GPR(0), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(1)); - /* 17 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + /* r2.y = MAD(c2.y, r1.z, pv.y) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(258), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(258), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), + SRC1_ELEM(ELEM_Z), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), - DST_GPR(0), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Y), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(1)); - /* 18 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + /* r2.z = MAD(c2.z, r1.z, pv.z) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(258), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(258), + SRC1_SEL(1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_Z), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(1), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_102), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(1)); - /* 19 */ - shader[i++] = ALU_DWORD0(SRC0_SEL(1), + /* r2.w = MAD(0, 0, 1) */ + shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_W), + SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(258), + SRC1_SEL(SQ_ALU_SRC_0), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); - shader[i++] = ALU_DWORD1_OP2(ChipSet, - SRC0_ABS(0), - SRC1_ABS(0), - UPDATE_EXECUTE_MASK(0), - UPDATE_PRED(0), - WRITE_MASK(0), - FOG_MERGE(0), - OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_DOT4), - BANK_SWIZZLE(SQ_ALU_VEC_021), - DST_GPR(0), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(2), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(1)); - /* 20 */ - shader[i++] = CF_DWORD0(ADDR(22)); + + /* 16 */ + shader[i++] = CF_DWORD0(ADDR(18)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -1064,7 +900,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_TEX), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 21 */ + /* 17 */ shader[i++] = CF_DWORD0(ADDR(0)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -1076,7 +912,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_RETURN), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 22/23 */ + /* 18/19 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1104,7 +940,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 24/25 */ + /* 20/21 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1132,7 +968,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 26/27 */ + /* 22/23 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1160,8 +996,8 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 28 */ - shader[i++] = CF_DWORD0(ADDR(30)); + /* 24 */ + shader[i++] = CF_DWORD0(ADDR(26)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -1172,7 +1008,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_TEX), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 29 */ + /* 25 */ shader[i++] = CF_DWORD0(ADDR(0)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -1184,7 +1020,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_RETURN), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 30/31 */ + /* 26/27 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1212,7 +1048,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 32/33 */ + /* 28/29 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index 88745d5c..600262b3 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -45,6 +45,15 @@ #include "damage.h" +/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces + note the difference to the parameters used in overlay are due + to 10bit vs. float calcs */ +static REF_TRANSFORM trans[2] = +{ + {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */ + {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */ +}; + static void R600DoneTexturedVideo(ScrnInfoPtr pScrn) { @@ -115,18 +124,91 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_sampler_t tex_samp; shader_config_t vs_conf, ps_conf; int uv_offset; - static float ps_alu_consts[] = { - 1.0, 0.0, 1.4020, 0, /* r - c[0] */ - 1.0, -0.34414, -0.71414, 0, /* g - c[1] */ - 1.0, 1.7720, 0.0, 0, /* b - c[2] */ - /* Constants for undoing Y'CbCr scaling - * - Y' is scaled from 16:235 - * - Cb/Cr are scaled from 16:240 - * Unscaled value N' = N * N_mul + N_shift (N' in range [-0.5, 0.5]) - * Vector is [Y_mul, Y_shfit, C_mul, C_shift] - */ - 256.0/219.0, -16.0/219.0, 256.0/224.0, -128.0/224.0, - }; + /* + * y' = y - .0625 + * u' = u - .5 + * v' = v - .5; + * + * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' + * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' + * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' + * + * DP3 might look like the straightforward solution + * but we'd need to move the texture yuv values in + * the same reg for this to work. Therefore use MADs. + * Brightness just adds to the off constant. + * Contrast is multiplication of luminance. + * Saturation and hue change the u and v coeffs. + * Default values (before adjustments - depend on colorspace): + * yco = 1.1643 + * uco = 0, -0.39173, 2.017 + * vco = 1.5958, -0.8129, 0 + * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], + * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], + * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], + * + * temp = MAD(yco, yuv.yyyy, off) + * temp = MAD(uco, yuv.uuuu, temp) + * result = MAD(vco, yuv.vvvv, temp) + */ + /* TODO: calc consts in the shader */ + const float Loff = -0.0627; + const float Coff = -0.502; + float uvcosf, uvsinf; + float yco; + float uco[3], vco[3], off[3]; + float bright, cont, gamma; + int ref = pPriv->transform_index; + Bool needgamma = FALSE; + float ps_alu_consts[12]; + + cont = RTFContrast(pPriv->contrast); + bright = RTFBrightness(pPriv->brightness); + gamma = (float)pPriv->gamma / 1000.0; + uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); + uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); + /* overlay video also does pre-gamma contrast/sat adjust, should we? */ + + yco = trans[ref].RefLuma * cont; + uco[0] = -trans[ref].RefRCr * uvsinf; + uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; + uco[2] = trans[ref].RefBCb * uvcosf; + vco[0] = trans[ref].RefRCr * uvcosf; + vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; + vco[2] = trans[ref].RefBCb * uvsinf; + off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; + off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; + off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; + + // XXX + gamma = 1.0; + + if (gamma != 1.0) { + needgamma = TRUE; + /* note: gamma correction is out = in ^ gamma; + gpu can only do LG2/EX2 therefore we transform into + in ^ gamma = 2 ^ (log2(in) * gamma). + Lots of scalar ops, unfortunately (better solution?) - + without gamma that's 3 inst, with gamma it's 10... + could use different gamma factors per channel, + if that's of any use. */ + } + + /* setup the ps consts */ + ps_alu_consts[0] = off[0]; + ps_alu_consts[1] = off[1]; + ps_alu_consts[2] = off[2]; + ps_alu_consts[3] = yco; + + ps_alu_consts[4] = uco[0]; + ps_alu_consts[5] = uco[1]; + ps_alu_consts[6] = uco[2]; + ps_alu_consts[7] = gamma; + + ps_alu_consts[8] = vco[0]; + ps_alu_consts[9] = vco[1]; + ps_alu_consts[10] = vco[2]; + ps_alu_consts[11] = 0.0; CLEAR (cb_conf); CLEAR (tex_res); diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c index 82675d9f..542fc2ae 100644 --- a/src/radeon_textured_video.c +++ b/src/radeon_textured_video.c @@ -137,12 +137,6 @@ static REF_TRANSFORM trans[2] = {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */ }; - -#define RTFSaturation(a) (1.0 + ((a)*1.0)/1000.0) -#define RTFBrightness(a) (((a)*1.0)/2000.0) -#define RTFContrast(a) (1.0 + ((a)*1.0)/1000.0) -#define RTFHue(a) (((a)*3.1416)/1000.0) - #define ACCEL_MMIO #define ACCEL_PREAMBLE() unsigned char *RADEONMMIO = info->MMIO #define BEGIN_ACCEL(n) RADEONWaitForFifo(pScrn, (n)) @@ -712,11 +706,16 @@ static XF86AttributeRec Attributes_r500[NUM_ATTRIBUTES_R500+1] = {0, 0, 0, NULL} }; -#define NUM_ATTRIBUTES_R600 1 +#define NUM_ATTRIBUTES_R600 6 static XF86AttributeRec Attributes_r600[NUM_ATTRIBUTES_R600+1] = { {XvSettable | XvGettable, 0, 1, "XV_VSYNC"}, + {XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS"}, + {XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST"}, + {XvSettable | XvGettable, -1000, 1000, "XV_SATURATION"}, + {XvSettable | XvGettable, -1000, 1000, "XV_HUE"}, + {XvSettable | XvGettable, 100, 10000, "XV_COLORSPACE"}, {0, 0, 0, NULL} }; diff --git a/src/radeon_video.c b/src/radeon_video.c index 84791608..a14f44c9 100644 --- a/src/radeon_video.c +++ b/src/radeon_video.c @@ -1704,12 +1704,6 @@ RADEONSetPortAttribute(ScrnInfoPtr pScrn, RADEON_SYNC(info, pScrn); -#define RTFSaturation(a) (1.0 + ((a)*1.0)/1000.0) -#define RTFBrightness(a) (((a)*1.0)/2000.0) -#define RTFIntensity(a) (((a)*1.0)/2000.0) -#define RTFContrast(a) (1.0 + ((a)*1.0)/1000.0) -#define RTFHue(a) (((a)*3.1416)/1000.0) - if(attribute == xvAutopaintColorkey) { pPriv->autopaint_colorkey = ClipValue (value, 0, 1); diff --git a/src/radeon_video.h b/src/radeon_video.h index be338715..0f8342a3 100644 --- a/src/radeon_video.h +++ b/src/radeon_video.h @@ -135,6 +135,12 @@ typedef struct tagREF_TRANSFORM float RefBCr; } REF_TRANSFORM; +#define RTFSaturation(a) (1.0 + ((a)*1.0)/1000.0) +#define RTFBrightness(a) (((a)*1.0)/2000.0) +#define RTFIntensity(a) (((a)*1.0)/2000.0) +#define RTFContrast(a) (1.0 + ((a)*1.0)/1000.0) +#define RTFHue(a) (((a)*3.1416)/1000.0) + xf86CrtcPtr radeon_xv_pick_best_crtc(ScrnInfoPtr pScrn, int x1, int x2, int y1, int y2); -- cgit v1.2.3