summaryrefslogtreecommitdiff
path: root/src/radeon_textured_videofuncs.c
diff options
context:
space:
mode:
authorRoland Scheidegger <sroland@tungstengraphics.com>2009-02-21 04:46:31 +0100
committerRoland Scheidegger <sroland@tungstengraphics.com>2009-03-24 19:57:19 +0100
commit58530bf4912800f9e09ebaea42a13cff8a80c19e (patch)
tree49689b0e5c1940f9d52c5b25827868fcda0f7222 /src/radeon_textured_videofuncs.c
parent97e19d96ba65a3df2fa3bbf73cfcc01b6dc3e796 (diff)
don't convert planar yuv to packed for r300
uses 3 textures for planar yuv and does yuv->rgb conversion in the shader. small performance advantage, but manual texture cache setting is necessary otherwise it may be measurably slower (but probably not relevant) in some cases. Unlike some other drivers, using MADs instead of DP3s, since this requires less instructions due to no MOVs are required, the end result is the same though the constants need to be different. Use of this is user settable for now (XV_HWPLANAR attrib).
Diffstat (limited to 'src/radeon_textured_videofuncs.c')
-rw-r--r--src/radeon_textured_videofuncs.c226
1 files changed, 215 insertions, 11 deletions
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index f55ae12f..aa5d4108 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -97,6 +97,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
uint32_t dst_offset, dst_pitch, dst_format;
uint32_t txenable, colorpitch;
uint32_t blendcntl;
+ Bool isplanar = FALSE;
int dstxoff, dstyoff, pixel_shift, vtx_count;
BoxPtr pBox = REGION_RECTS(&pPriv->clip);
int nBox = REGION_NUM_RECTS(&pPriv->clip);
@@ -181,16 +182,29 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
if (RADEONTilingEnabled(pScrn, pPixmap))
colorpitch |= R300_COLORTILE;
- if (pPriv->id == FOURCC_UYVY)
- txformat1 = R300_TX_FORMAT_YVYU422;
- else
- txformat1 = R300_TX_FORMAT_VYUY422;
+ if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+ isplanar = TRUE;
+ }
- txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
+ if (isplanar) {
+ txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0;
+ txpitch = pPriv->src_pitch;
+ } else {
+ if (pPriv->id == FOURCC_UYVY)
+ txformat1 = R300_TX_FORMAT_YVYU422;
+ else
+ txformat1 = R300_TX_FORMAT_VYUY422;
+
+ txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
+
+ /* pitch is in pixels */
+ txpitch = pPriv->src_pitch / 2;
+ }
+ txpitch -= 1;
txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
- (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
- R300_TXPITCH_EN);
+ (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
info->accel_state->texW[0] = pPriv->w;
info->accel_state->texH[0] = pPriv->h;
@@ -201,9 +215,6 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
R300_TX_MIN_FILTER_LINEAR |
(0 << R300_TX_ID_SHIFT));
- /* pitch is in pixels */
- txpitch = pPriv->src_pitch / 2;
- txpitch -= 1;
if (IS_R500_3D && ((pPriv->w - 1) & 0x800))
txpitch |= R500_TXWIDTH_11;
@@ -224,6 +235,34 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
txenable = R300_TEX_0_ENABLE;
+ if (isplanar) {
+ txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
+ (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
+ txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
+ txpitch -= 1;
+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_MIN_FILTER_LINEAR |
+ R300_TX_MAG_FILTER_LINEAR);
+
+ BEGIN_ACCEL(12);
+ OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT));
+ OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset);
+ OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
+ OUT_ACCEL_REG(R300_TX_FILTER1_2, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset);
+ FINISH_ACCEL();
+ txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE;
+ }
+
if (pPriv->bicubic_enabled) {
/* Size is 128x1 */
txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) |
@@ -691,6 +730,171 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0);
FINISH_ACCEL();
+ } else if (isplanar) {
+ /*
+ * y' = y - .0625
+ * u' = u - .5
+ * v' = v - .5;
+ *
+ * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
+ * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
+ * b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
+ *
+ * DP3 might look like the straightforward solution
+ * but we'd need to move the texture yuv values in
+ * the same reg for this to work. Therefore use MADs.
+ * Without changing the shader at all (only the constants)
+ * could also provide hue/saturation/brightness/contrast control.
+ *
+ * yco = 1.1643
+ * uco = 0, -0.39173, 2.017
+ * vco = 1.5958, -0.8129, 0
+ * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
+ * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
+ * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
+ *
+ * temp = MAD(yco, yuv.yyyy, off)
+ * temp = MAD(uco, yuv.uuuu, temp)
+ * result = MAD(vco, yuv.vvvv, temp)
+ */
+ float yco = 1.1643;
+ float uco[3] = {0.0, -0.39173, 2.017};
+ float vco[3] = {1.5958, -0.8129, 0.0};
+ float off[3] = {-0.0625 * yco + -0.5 * uco[0] + -0.5 * vco[0],
+ -0.0625 * yco + -0.5 * uco[1] + -0.5 * vco[1],
+ -0.0625 * yco + -0.5 * uco[2] + -0.5 * vco[2]};
+
+ BEGIN_ACCEL(33);
+ /* 2 components: same 2 for tex0/1/2 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */
+
+ /* Indirection levels */
+ OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
+ R300_FIRST_TEX));
+
+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
+ R300_ALU_CODE_SIZE(3) |
+ R300_TEX_CODE_OFFSET(0) |
+ R300_TEX_CODE_SIZE(3)));
+
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
+ R300_ALU_SIZE(2) |
+ R300_TEX_START(0) |
+ R300_TEX_SIZE(2) |
+ R300_RGBA_OUT));
+
+ /* tex inst */
+ OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(0) |
+ R300_TEX_ID(0) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+ OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(1) |
+ R300_TEX_ID(1) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+ OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(2) |
+ R300_TEX_ID(2) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+
+ /* ALU inst */
+ /* MAD temp0, const0.a, temp0, const0.rgb */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop, but need to set up alpha source for rgb usage */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
+ R300_ALU_ALPHA_ADDR1(0) |
+ R300_ALU_ALPHA_ADDR2(0) |
+ R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* MAD const1, temp1, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR1(1) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* MAD result, const2, temp2, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
+ R300_ALU_RGB_ADDR1(2) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
+ R300_ALU_RGB_CLAMP));
+ /* write alpha 1 */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
+ R300_ALU_ALPHA_TARGET_A));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
+
+ /* Shader constants. */
+ /* constant 0: off, yco */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco));
+ /* constant 1: uco */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(0.0));
+ /* constant 2: vco */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0));
+
+ FINISH_ACCEL();
+
} else {
BEGIN_ACCEL(11);
/* 2 components: 2 for tex0 */
@@ -760,7 +964,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
R300_ALU_ALPHA_CLAMP));
FINISH_ACCEL();
- }
+ }
} else {
if (pPriv->bicubic_enabled) {
BEGIN_ACCEL(7);