summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex Deucher <alexdeucher@gmail.com>2009-02-09 13:02:27 -0500
committerAlex Deucher <alexdeucher@gmail.com>2009-02-09 13:02:27 -0500
commit231aee18a73805be2f6c962e94a8345dd89fd0df (patch)
tree5a112519741fdcbbc6b6f91eadfc4071b84074b9 /src
parent6c76bfe8105e3cf4e7e6ea1bfe1235be2079110f (diff)
R6xx/R7xx Xv: implement native shader for planar formats
Diffstat (limited to 'src')
-rw-r--r--src/r600_exa.c574
-rw-r--r--src/r600_textured_videofuncs.c262
-rw-r--r--src/radeon.h3
-rw-r--r--src/radeon_textured_video.c67
4 files changed, 803 insertions, 103 deletions
diff --git a/src/r600_exa.c b/src/r600_exa.c
index a38469af..950e6ac8 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -2263,7 +2263,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
uint32_t *vs;
uint32_t *ps;
// 512 bytes per shader for now
- int size = 512 * 10;
+ int size = 512 * 11;
int i;
accel_state->shaders = NULL;
@@ -2285,7 +2285,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
accel_state->comp_mask_vs_offset = 3072;
accel_state->comp_mask_ps_offset = 3584;
accel_state->xv_vs_offset = 4096;
- accel_state->xv_ps_offset = 4608;
+ accel_state->xv_ps_offset_nv12 = 4608;
+ accel_state->xv_ps_offset_planar = 5120;
// solid vs ---------------------------------------
i = accel_state->solid_vs_offset / 4;
@@ -2774,8 +2775,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
MEGA_FETCH(0));
vs[i++] = VTX_DWORD_PAD;
- // xv ps ---------------------------------------
- i = accel_state->xv_ps_offset / 4;
+ // xv ps nv12 ----------------------------------
+ i = accel_state->xv_ps_offset_nv12 / 4;
// 0
ps[i++] = CF_DWORD0(ADDR(20));
ps[i++] = CF_DWORD1(POP_COUNT(0),
@@ -3311,6 +3312,571 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC_SEL_W(SQ_SEL_1));
ps[i++] = TEX_DWORD_PAD;
+ // xv ps planar ----------------------------------
+ i = accel_state->xv_ps_offset_planar / 4;
+ // 0
+ ps[i++] = CF_DWORD0(ADDR(20));
+ ps[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(3),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TEX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ // 1
+ ps[i++] = CF_ALU_DWORD0(ADDR(3),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ ps[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(16),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ // 2
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(3),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(3));
+ ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ // 3 - alu 0
+ // DP4 gpr[2].x gpr[1].x c[0].x
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ // 4 - alu 1
+ // DP4 gpr[2].y gpr[1].y c[0].y
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ // 5 - alu 2
+ // DP4 gpr[2].z gpr[1].z c[0].z
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ // 6 - alu 3
+ // DP4 gpr[2].w gpr[1].w c[0].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_021),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+ // 7 - alu 4
+ // DP4 gpr[2].x gpr[1].x c[1].x
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ // 8 - alu 5
+ // DP4 gpr[2].y gpr[1].y c[1].y
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ // 9 - alu 6
+ // DP4 gpr[2].z gpr[1].z c[1].z
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ // 10 - alu 7
+ // DP4 gpr[2].w gpr[1].w c[1].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_021),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+ // 11 - alu 8
+ // DP4 gpr[2].x gpr[1].x c[2].x
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ // 12 - alu 9
+ // DP4 gpr[2].y gpr[1].y c[2].y
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ // 13 - alu 10
+ // DP4 gpr[2].z gpr[1].z c[2].z
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ // 14 - alu 11
+ // DP4 gpr[2].w gpr[1].w c[2].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(0),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_021),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+ // 15 - alu 12
+ // MOV gpr[3].x gpr[2].x
+ ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ // 16 - alu 13
+ // MOV gpr[3].y gpr[2].y
+ ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ // 17 - alu 14
+ // MOV gpr[3].z gpr[2].z
+ ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_210),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ // 18 - alu 15
+ // MOV gpr[3].w gpr[2].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MOV),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(3),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+ // 19 - alignment
+ ps[i++] = 0x00000000;
+ ps[i++] = 0x00000000;
+ // 20/21 - tex 0
+ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ ps[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X), //R
+ DST_SEL_Y(SQ_SEL_MASK), //G
+ DST_SEL_Z(SQ_SEL_MASK), //B
+ DST_SEL_W(SQ_SEL_1), //A
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ ps[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ ps[i++] = TEX_DWORD_PAD;
+ // 22/23 - tex 1
+ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(1),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ ps[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_MASK), //R
+ DST_SEL_Y(SQ_SEL_MASK), //G
+ DST_SEL_Z(SQ_SEL_X), //B
+ DST_SEL_W(SQ_SEL_MASK), //A
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ ps[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(1),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ ps[i++] = TEX_DWORD_PAD;
+ // 24/25 - tex 2
+ ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(2),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ ps[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_MASK), //R
+ DST_SEL_Y(SQ_SEL_X), //G
+ DST_SEL_Z(SQ_SEL_MASK), //B
+ DST_SEL_W(SQ_SEL_MASK), //A
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ ps[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(2),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ ps[i++] = TEX_DWORD_PAD;
+
// comp mask vs ---------------------------------------
i = accel_state->comp_mask_vs_offset / 4;
//0
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 222740e5..42a5d68d 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -108,8 +108,20 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
accel_state->xv_vs_offset;
- accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
- accel_state->xv_ps_offset;
+
+ switch(pPriv->id) {
+ case FOURCC_YV12:
+ case FOURCC_I420:
+ accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
+ accel_state->xv_ps_offset_planar;
+ break;
+ case FOURCC_UYVY:
+ case FOURCC_YUY2:
+ default:
+ accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset +
+ accel_state->xv_ps_offset_nv12;
+ break;
+ }
accel_state->vs_size = 512;
accel_state->ps_size = 512;
@@ -141,76 +153,182 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
set_alu_consts(pScrn, accel_state->ib, 0, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
/* Texture */
- accel_state->src_mc_addr[0] = pPriv->src_offset;
- accel_state->src_size[0] = exaGetPixmapPitch(pPixmap) * pPriv->w;
-
- /* flush texture cache */
- cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0],
- accel_state->src_mc_addr[0]);
-
- // Y texture
- tex_res.id = 0;
- tex_res.w = pPriv->w;
- tex_res.h = pPriv->h;
- tex_res.pitch = accel_state->src_pitch[0];
- tex_res.depth = 0;
- tex_res.dim = SQ_TEX_DIM_2D;
- tex_res.base = accel_state->src_mc_addr[0];
- tex_res.mip_base = accel_state->src_mc_addr[0];
-
- tex_res.format = FMT_8;
- tex_res.dst_sel_x = SQ_SEL_X; //Y
- tex_res.dst_sel_y = SQ_SEL_1;
- tex_res.dst_sel_z = SQ_SEL_1;
- tex_res.dst_sel_w = SQ_SEL_1;
-
- tex_res.request_size = 1;
- tex_res.base_level = 0;
- tex_res.last_level = 0;
- tex_res.perf_modulation = 0;
- tex_res.interlaced = 0;
- set_tex_resource (pScrn, accel_state->ib, &tex_res);
-
- // UV texture
- uv_offset = accel_state->src_pitch[0] * pPriv->h;
- uv_offset = (uv_offset + 255) & ~255;
-
- cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
- accel_state->src_size[0] / 2,
- accel_state->src_mc_addr[0] + uv_offset);
-
- tex_res.id = 1;
- tex_res.format = FMT_8_8;
- tex_res.w = pPriv->w >> 1;
- tex_res.h = pPriv->h >> 1;
- tex_res.pitch = accel_state->src_pitch[0] >> 1;
- tex_res.dst_sel_x = SQ_SEL_Y; //V
- tex_res.dst_sel_y = SQ_SEL_X; //U
- tex_res.dst_sel_z = SQ_SEL_1;
- tex_res.dst_sel_w = SQ_SEL_1;
- tex_res.interlaced = 0;
- // XXX tex bases need to be 256B aligned
- tex_res.base = accel_state->src_mc_addr[0] + uv_offset;
- tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset;
- set_tex_resource (pScrn, accel_state->ib, &tex_res);
-
- // Y sampler
- tex_samp.id = 0;
- tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
- tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
- tex_samp.clamp_z = SQ_TEX_WRAP;
-
- // xxx: switch to bicubic
- tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR;
- tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR;
-
- tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
- tex_samp.mip_filter = 0; /* no mipmap */
- set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
-
- // UV sampler
- tex_samp.id = 1;
- set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+ switch(pPriv->id) {
+ case FOURCC_YV12:
+ case FOURCC_I420:
+ accel_state->src_mc_addr[0] = pPriv->src_offset;
+ accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h;
+
+ /* flush texture cache */
+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0],
+ accel_state->src_mc_addr[0]);
+
+ // Y texture
+ tex_res.id = 0;
+ tex_res.w = pPriv->w;
+ tex_res.h = pPriv->h;
+ tex_res.pitch = accel_state->src_pitch[0];
+ tex_res.depth = 0;
+ tex_res.dim = SQ_TEX_DIM_2D;
+ tex_res.base = accel_state->src_mc_addr[0];
+ tex_res.mip_base = accel_state->src_mc_addr[0];
+
+ tex_res.format = FMT_8;
+ tex_res.dst_sel_x = SQ_SEL_X; //Y
+ tex_res.dst_sel_y = SQ_SEL_1;
+ tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.dst_sel_w = SQ_SEL_1;
+
+ tex_res.request_size = 1;
+ tex_res.base_level = 0;
+ tex_res.last_level = 0;
+ tex_res.perf_modulation = 0;
+ tex_res.interlaced = 0;
+ set_tex_resource (pScrn, accel_state->ib, &tex_res);
+
+ // Y sampler
+ tex_samp.id = 0;
+ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_z = SQ_TEX_WRAP;
+
+ // xxx: switch to bicubic
+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR;
+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR;
+
+ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
+ tex_samp.mip_filter = 0; /* no mipmap */
+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+
+ // U or V texture
+ uv_offset = accel_state->src_pitch[0] * pPriv->h;
+ uv_offset = (uv_offset + 255) & ~255;
+
+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
+ accel_state->src_size[0] / 2,
+ accel_state->src_mc_addr[0] + uv_offset);
+
+ tex_res.id = 1;
+ tex_res.format = FMT_8;
+ tex_res.w = pPriv->w >> 1;
+ tex_res.h = pPriv->h >> 1;
+ tex_res.pitch = accel_state->src_pitch[0] >> 1;
+ tex_res.dst_sel_x = SQ_SEL_X; //V or U
+ tex_res.dst_sel_y = SQ_SEL_1;
+ tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.dst_sel_w = SQ_SEL_1;
+ tex_res.interlaced = 0;
+ // XXX tex bases need to be 256B aligned
+ tex_res.base = accel_state->src_mc_addr[0] + uv_offset;
+ tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset;
+ set_tex_resource (pScrn, accel_state->ib, &tex_res);
+
+ // U or V sampler
+ tex_samp.id = 1;
+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+
+ // U or V texture
+ uv_offset += ((accel_state->src_pitch[0] >> 1) * (pPriv->h >> 1));
+ uv_offset = (uv_offset + 255) & ~255;
+
+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
+ accel_state->src_size[0] / 2,
+ accel_state->src_mc_addr[0] + uv_offset);
+
+ tex_res.id = 2;
+ tex_res.format = FMT_8;
+ tex_res.w = pPriv->w >> 1;
+ tex_res.h = pPriv->h >> 1;
+ tex_res.pitch = accel_state->src_pitch[0] >> 1;
+ tex_res.dst_sel_x = SQ_SEL_X; //V or U
+ tex_res.dst_sel_y = SQ_SEL_1;
+ tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.dst_sel_w = SQ_SEL_1;
+ tex_res.interlaced = 0;
+ // XXX tex bases need to be 256B aligned
+ tex_res.base = accel_state->src_mc_addr[0] + uv_offset;
+ tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset;
+ set_tex_resource (pScrn, accel_state->ib, &tex_res);
+
+ // UV sampler
+ tex_samp.id = 2;
+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+ break;
+ case FOURCC_UYVY:
+ case FOURCC_YUY2:
+ default:
+ accel_state->src_mc_addr[0] = pPriv->src_offset;
+ accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h;
+
+ /* flush texture cache */
+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0],
+ accel_state->src_mc_addr[0]);
+
+ // Y texture
+ tex_res.id = 0;
+ tex_res.w = pPriv->w;
+ tex_res.h = pPriv->h;
+ tex_res.pitch = accel_state->src_pitch[0];
+ tex_res.depth = 0;
+ tex_res.dim = SQ_TEX_DIM_2D;
+ tex_res.base = accel_state->src_mc_addr[0];
+ tex_res.mip_base = accel_state->src_mc_addr[0];
+
+ tex_res.format = FMT_8;
+ tex_res.dst_sel_x = SQ_SEL_X; //Y
+ tex_res.dst_sel_y = SQ_SEL_1;
+ tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.dst_sel_w = SQ_SEL_1;
+
+ tex_res.request_size = 1;
+ tex_res.base_level = 0;
+ tex_res.last_level = 0;
+ tex_res.perf_modulation = 0;
+ tex_res.interlaced = 0;
+ set_tex_resource (pScrn, accel_state->ib, &tex_res);
+
+ // Y sampler
+ tex_samp.id = 0;
+ tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
+ tex_samp.clamp_z = SQ_TEX_WRAP;
+
+ // xxx: switch to bicubic
+ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR;
+ tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR;
+
+ tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE;
+ tex_samp.mip_filter = 0; /* no mipmap */
+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+
+ // UV texture
+ uv_offset = accel_state->src_pitch[0] * pPriv->h;
+ uv_offset = (uv_offset + 255) & ~255;
+
+ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit,
+ accel_state->src_size[0] / 2,
+ accel_state->src_mc_addr[0] + uv_offset);
+
+ tex_res.id = 1;
+ tex_res.format = FMT_8_8;
+ tex_res.w = pPriv->w >> 1;
+ tex_res.h = pPriv->h >> 1;
+ tex_res.pitch = accel_state->src_pitch[0] >> 1;
+ tex_res.dst_sel_x = SQ_SEL_Y; //V
+ tex_res.dst_sel_y = SQ_SEL_X; //U
+ tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.dst_sel_w = SQ_SEL_1;
+ tex_res.interlaced = 0;
+ // XXX tex bases need to be 256B aligned
+ tex_res.base = accel_state->src_mc_addr[0] + uv_offset;
+ tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset;
+ set_tex_resource (pScrn, accel_state->ib, &tex_res);
+
+ // UV sampler
+ tex_samp.id = 1;
+ set_tex_sampler (pScrn, accel_state->ib, &tex_samp);
+ break;
+ }
/* Render setup */
ereg (accel_state->ib, CB_SHADER_MASK, (0x0f << OUTPUT0_ENABLE_shift));
diff --git a/src/radeon.h b/src/radeon.h
index 629e1ffa..2974cdf4 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -658,7 +658,8 @@ struct radeon_accel_state {
uint32_t comp_mask_vs_offset;
uint32_t comp_mask_ps_offset;
uint32_t xv_vs_offset;
- uint32_t xv_ps_offset;
+ uint32_t xv_ps_offset_nv12;
+ uint32_t xv_ps_offset_planar;
//size/addr stuff
uint32_t src_size[2];
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index 16b2c829..22e7d174 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -154,12 +154,14 @@ static __inline__ uint32_t F_TO_24(float val)
#endif /* XF86DRI */
static void
-CopyPlanartoNV12(unsigned char *y_src, unsigned char *u_src, unsigned char *v_src,
- unsigned char *dst,
- int srcPitch, int srcPitch2, int dstPitch,
- int w, int h)
+R600CopyPlanar(unsigned char *y_src, unsigned char *u_src, unsigned char *v_src,
+ unsigned char *dst,
+ int srcPitch, int srcPitch2, int dstPitch,
+ int w, int h)
{
- int i, j;
+ int i;
+ int dstPitch2 = dstPitch >> 1;
+ int h2 = h >> 1;
/* Y */
if (srcPitch == dstPitch) {
@@ -177,21 +179,34 @@ CopyPlanartoNV12(unsigned char *y_src, unsigned char *u_src, unsigned char *v_sr
if (h & 1)
dst += dstPitch;
- /* UV */
- for (i = 0; i < (h >> 1); i++) {
- unsigned char *u = u_src;
- unsigned char *v = v_src;
- unsigned char *uv = dst;
+ /* V */
+ if (srcPitch2 == dstPitch2) {
+ memcpy(dst, v_src, srcPitch2 * h2);
+ dst += (dstPitch2 * h2);
+ } else {
+ for (i = 0; i < h2; i++) {
+ memcpy(dst, v_src, srcPitch2);
+ v_src += srcPitch2;
+ dst += dstPitch2;
+ }
+ }
- for (j = 0; j < w; j++) {
- uv[0] = v[j];
- uv[1] = u[j];
- uv += 2;
- }
- dst += dstPitch;
- u_src += srcPitch2;
- v_src += srcPitch2;
+ /* tex base need 256B alignment */
+ if (h2 & 1)
+ dst += dstPitch2;
+
+ /* U */
+ if (srcPitch2 == dstPitch2) {
+ memcpy(dst, u_src, srcPitch2 * h2);
+ dst += (dstPitch2 * h2);
+ } else {
+ for (i = 0; i < h2; i++) {
+ memcpy(dst, u_src, srcPitch2);
+ u_src += srcPitch2;
+ dst += dstPitch2;
+ }
}
+
}
static void
@@ -392,15 +407,15 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
s2offset = srcPitch * height;
s3offset = (srcPitch2 * (height >> 1)) + s2offset;
if (id == FOURCC_YV12)
- CopyPlanartoNV12(buf, buf + s3offset, buf + s2offset,
- pPriv->src_addr,
- srcPitch, srcPitch2, pPriv->src_pitch,
- width, height);
+ R600CopyPlanar(buf, buf + s3offset, buf + s2offset,
+ pPriv->src_addr,
+ srcPitch, srcPitch2, pPriv->src_pitch,
+ width, height);
else
- CopyPlanartoNV12(buf, buf + s2offset, buf + s3offset,
- pPriv->src_addr,
- srcPitch, srcPitch2, pPriv->src_pitch,
- width, height);
+ R600CopyPlanar(buf, buf + s2offset, buf + s3offset,
+ pPriv->src_addr,
+ srcPitch, srcPitch2, pPriv->src_pitch,
+ width, height);
} else {
top &= ~1;