summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoland Scheidegger <sroland@tungstengraphics.com>2009-02-21 04:46:31 +0100
committerRoland Scheidegger <sroland@tungstengraphics.com>2009-03-24 19:57:19 +0100
commit58530bf4912800f9e09ebaea42a13cff8a80c19e (patch)
tree49689b0e5c1940f9d52c5b25827868fcda0f7222
parent97e19d96ba65a3df2fa3bbf73cfcc01b6dc3e796 (diff)
don't convert planar yuv to packed for r300
uses 3 textures for planar yuv and does yuv->rgb conversion in the shader. small performance advantage, but manual texture cache setting is necessary otherwise it may be measurably slower (but probably not relevant) in some cases. Unlike some other drivers, using MADs instead of DP3s, since this requires less instructions due to no MOVs are required, the end result is the same though the constants need to be different. Use of this is user settable for now (XV_HWPLANAR attrib).
-rw-r--r--src/radeon_reg.h15
-rw-r--r--src/radeon_textured_video.c89
-rw-r--r--src/radeon_textured_videofuncs.c226
-rw-r--r--src/radeon_video.h5
4 files changed, 305 insertions, 30 deletions
diff --git a/src/radeon_reg.h b/src/radeon_reg.h
index 0af88597..247a0e7d 100644
--- a/src/radeon_reg.h
+++ b/src/radeon_reg.h
@@ -4406,6 +4406,7 @@
#define R300_TX_INVALTAGS 0x4100
#define R300_TX_FILTER0_0 0x4400
#define R300_TX_FILTER0_1 0x4404
+#define R300_TX_FILTER0_2 0x4408
# define R300_TX_CLAMP_S(x) ((x) << 0)
# define R300_TX_CLAMP_T(x) ((x) << 3)
# define R300_TX_CLAMP_R(x) ((x) << 6)
@@ -4424,8 +4425,10 @@
# define R300_TX_ID_SHIFT 28
#define R300_TX_FILTER1_0 0x4440
#define R300_TX_FILTER1_1 0x4444
+#define R300_TX_FILTER1_2 0x4448
#define R300_TX_FORMAT0_0 0x4480
#define R300_TX_FORMAT0_1 0x4484
+#define R300_TX_FORMAT0_2 0x4488
# define R300_TXWIDTH_SHIFT 0
# define R300_TXHEIGHT_SHIFT 11
# define R300_NUM_LEVELS_SHIFT 26
@@ -4434,6 +4437,7 @@
# define R300_TXPITCH_EN (1 << 31)
#define R300_TX_FORMAT1_0 0x44c0
#define R300_TX_FORMAT1_1 0x44c4
+#define R300_TX_FORMAT1_2 0x44c8
# define R300_TX_FORMAT_X8 0x0
# define R300_TX_FORMAT_X16 0x1
# define R300_TX_FORMAT_Y4X4 0x2
@@ -4506,13 +4510,23 @@
# define R300_TX_FORMAT_YUV_TO_RGB_NO_CLAMP (2 << 22)
# define R300_TX_FORMAT_SWAP_YUV (1 << 24)
+# define R300_TX_FORMAT_CACHE_WHOLE (0 << 27)
+# define R300_TX_FORMAT_CACHE_HALF_REGION_0 (2 << 27)
+# define R300_TX_FORMAT_CACHE_HALF_REGION_1 (3 << 27)
+# define R300_TX_FORMAT_CACHE_FOURTH_REGION_0 (4 << 27)
+# define R300_TX_FORMAT_CACHE_FOURTH_REGION_1 (5 << 27)
+# define R300_TX_FORMAT_CACHE_FOURTH_REGION_2 (6 << 27)
+# define R300_TX_FORMAT_CACHE_FOURTH_REGION_3 (7 << 27)
+
#define R300_TX_FORMAT2_0 0x4500
#define R300_TX_FORMAT2_1 0x4504
+#define R300_TX_FORMAT2_2 0x4508
# define R500_TXWIDTH_11 (1 << 15)
# define R500_TXHEIGHT_11 (1 << 16)
#define R300_TX_OFFSET_0 0x4540
#define R300_TX_OFFSET_1 0x4544
+#define R300_TX_OFFSET_2 0x4548
# define R300_ENDIAN_SWAP_16_BIT (1 << 0)
# define R300_ENDIAN_SWAP_32_BIT (2 << 0)
# define R300_ENDIAN_SWAP_HALF_DWORD (3 << 0)
@@ -4523,6 +4537,7 @@
#define R300_TX_ENABLE 0x4104
# define R300_TEX_0_ENABLE (1 << 0)
# define R300_TEX_1_ENABLE (1 << 1)
+# define R300_TEX_2_ENABLE (1 << 2)
#define R300_US_W_FMT 0x46b4
#define R300_US_OUT_FMT_1 0x46a8
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index f72f2c59..ed4dd3e0 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -304,8 +304,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
RADEONInfoPtr info = RADEONPTR(pScrn);
RADEONPortPrivPtr pPriv = (RADEONPortPrivPtr)data;
INT32 x1, x2, y1, y2;
- int srcPitch, srcPitch2, dstPitch;
+ int srcPitch, srcPitch2, dstPitch, dstPitch2 = 0;
int s2offset, s3offset, tmp;
+ int d2line, d3line;
int top, left, npixels, nlines, size;
BoxRec dstBox;
int dst_width = width, dst_height = height;
@@ -335,18 +336,45 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
if ((x1 >= x2) || (y1 >= y2))
return Success;
+ /* Bicubic filter setup */
+ pPriv->bicubic_enabled = (pPriv->bicubic_state != BICUBIC_OFF);
+ if (!(IS_R300_3D || IS_R500_3D || IS_R600_3D))
+ pPriv->bicubic_enabled = FALSE;
+ if (pPriv->bicubic_enabled && (pPriv->bicubic_state == BICUBIC_AUTO)) {
+ /*
+ * Applying the bicubic filter with a scale of less than 200%
+ * results in a blurred picture, so disable the filter.
+ */
+ if ((src_w > drw_w / 2) || (src_h > drw_h / 2))
+ pPriv->bicubic_enabled = FALSE;
+ }
+
+ pPriv->planar_hw = pPriv->planar_state;
+ if (pPriv->bicubic_enabled || !( IS_R300_3D ))
+ pPriv->planar_hw = 0;
+
switch(id) {
case FOURCC_YV12:
case FOURCC_I420:
- dstPitch = ((dst_width << 1) + 15) & ~15;
srcPitch = (width + 3) & ~3;
srcPitch2 = ((width >> 1) + 3) & ~3;
- size = dstPitch * dst_height;
+ if (pPriv->planar_hw) {
+ dstPitch = (dst_width + 15) & ~15;
+ dstPitch = (dstPitch + 63) & ~63;
+ dstPitch2 = ((dst_width >> 1) + 15) & ~15;
+ dstPitch2 = (dstPitch2 + 63) & ~63;
+ size = dstPitch * dst_height + 2 * dstPitch2 * ((dst_height + 1) >> 1);
+ } else {
+ dstPitch = ((dst_width << 1) + 15) & ~15;
+ dstPitch = (dstPitch + 63) & ~63;
+ size = dstPitch * dst_height;
+ }
break;
case FOURCC_UYVY:
case FOURCC_YUY2:
default:
dstPitch = ((dst_width << 1) + 15) & ~15;
+ dstPitch = (dstPitch + 63) & ~63;
srcPitch = (width << 1);
srcPitch2 = 0;
size = dstPitch * dst_height;
@@ -355,8 +383,7 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
if (info->ChipFamily >= CHIP_FAMILY_R600)
dstPitch = (dstPitch + 255) & ~255;
- else
- dstPitch = (dstPitch + 63) & ~63;
+ /* FIXME: size calc (adjust dstPitch earlier) */
if (pPriv->video_memory != NULL && size != pPriv->size) {
radeon_legacy_free_memory(pScrn, pPriv->video_memory);
@@ -376,19 +403,6 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
return BadAlloc;
}
- /* Bicubic filter setup */
- pPriv->bicubic_enabled = (pPriv->bicubic_state != BICUBIC_OFF);
- if (!(IS_R300_3D || IS_R500_3D || IS_R600_3D))
- pPriv->bicubic_enabled = FALSE;
- if (pPriv->bicubic_enabled && (pPriv->bicubic_state == BICUBIC_AUTO)) {
- /*
- * Applying the bicubic filter with a scale of less than 200%
- * results in a blurred picture, so disable the filter.
- */
- if ((src_w > drw_w / 2) || (src_h > drw_h / 2))
- pPriv->bicubic_enabled = FALSE;
- }
-
/* Bicubic filter loading */
if (pPriv->bicubic_memory == NULL && pPriv->bicubic_enabled) {
pPriv->bicubic_offset = radeon_legacy_allocate_memory(pScrn,
@@ -432,10 +446,16 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
else
pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset + (top * dstPitch));
pPriv->src_pitch = dstPitch;
+ pPriv->planeu_offset = dstPitch * dst_height;
+ pPriv->planev_offset = pPriv->planeu_offset + dstPitch2 * ((dst_height + 1) >> 1);
pPriv->size = size;
pPriv->pDraw = pDraw;
+
#if 0
+ ErrorF("planeu_offset: 0x%x\n", pPriv->planeu_offset);
+ ErrorF("planev_offset: 0x%x\n", pPriv->planev_offset);
+ ErrorF("dstPitch2: 0x%x\n", dstPitch2);
ErrorF("src_offset: 0x%x\n", pPriv->src_offset);
ErrorF("src_addr: 0x%x\n", pPriv->src_addr);
ErrorF("src_pitch: 0x%x\n", pPriv->src_pitch);
@@ -470,6 +490,29 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
srcPitch, srcPitch2, pPriv->src_pitch,
width, height);
}
+ }
+ else if (pPriv->planar_hw) {
+ top &= ~1;
+ s2offset = srcPitch * ((height + 1) & ~1);
+ s3offset = s2offset + srcPitch2 * ((height + 1) >> 1);
+ s2offset += (top >> 1) * srcPitch2 + (left >> 1);
+ s3offset += (top >> 1) * srcPitch2 + (left >> 1);
+ d2line = pPriv->planeu_offset;
+ d3line = pPriv->planev_offset;
+ d2line += (top >> 1) * dstPitch2 - (top * dstPitch);
+ d3line += (top >> 1) * dstPitch2 - (top * dstPitch);
+ nlines = ((y2 + 0xffff) >> 16) - top;
+ if(id == FOURCC_YV12) {
+ tmp = s2offset;
+ s2offset = s3offset;
+ s3offset = tmp;
+ }
+ RADEONCopyData(pScrn, buf + (top * srcPitch) + left, pPriv->src_addr + left,
+ srcPitch, dstPitch, nlines, npixels, 1);
+ RADEONCopyData(pScrn, buf + s2offset, pPriv->src_addr + d2line + (left >> 1),
+ srcPitch2, dstPitch2, (nlines + 1) >> 1, npixels >> 1, 1);
+ RADEONCopyData(pScrn, buf + s3offset, pPriv->src_addr + d3line + (left >> 1),
+ srcPitch2, dstPitch2, (nlines + 1) >> 1, npixels >> 1, 1);
} else {
top &= ~1;
nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
@@ -590,17 +633,19 @@ static XF86AttributeRec Attributes[NUM_ATTRIBUTES+1] =
{0, 0, 0, NULL}
};
-#define NUM_ATTRIBUTES_R300 2
+#define NUM_ATTRIBUTES_R300 3
static XF86AttributeRec Attributes_r300[NUM_ATTRIBUTES_R300+1] =
{
{XvSettable | XvGettable, 0, 2, "XV_BICUBIC"},
{XvSettable | XvGettable, 0, 1, "XV_VSYNC"},
+ {XvSettable | XvGettable, 0, 1, "XV_HWPLANAR"},
{0, 0, 0, NULL}
};
static Atom xvBicubic;
static Atom xvVSync;
+static Atom xvHWPlanar;
#define NUM_IMAGES 4
@@ -627,6 +672,8 @@ RADEONGetTexPortAttribute(ScrnInfoPtr pScrn,
*value = pPriv->bicubic_state;
else if (attribute == xvVSync)
*value = pPriv->vsync;
+ else if (attribute == xvHWPlanar)
+ *value = pPriv->planar_state;
else
return BadMatch;
@@ -648,6 +695,8 @@ RADEONSetTexPortAttribute(ScrnInfoPtr pScrn,
pPriv->bicubic_state = ClipValue (value, 0, 2);
else if (attribute == xvVSync)
pPriv->vsync = ClipValue (value, 0, 1);
+ else if (attribute == xvHWPlanar)
+ pPriv->planar_state = ClipValue (value, 0, 1);
else
return BadMatch;
@@ -671,6 +720,7 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen)
xvBicubic = MAKE_ATOM("XV_BICUBIC");
xvVSync = MAKE_ATOM("XV_VSYNC");
+ xvHWPlanar = MAKE_ATOM("XV_HWPLANAR");
adapt->type = XvWindowMask | XvInputMask | XvImageMask;
adapt->flags = 0;
@@ -720,6 +770,7 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen)
pPriv->doubleBuffer = 0;
pPriv->bicubic_state = BICUBIC_AUTO;
pPriv->vsync = TRUE;
+ pPriv->planar_state = 1;
/* gotta uninit this someplace, XXX: shouldn't be necessary for textured */
REGION_NULL(pScreen, &pPriv->clip);
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index f55ae12f..aa5d4108 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -97,6 +97,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
uint32_t dst_offset, dst_pitch, dst_format;
uint32_t txenable, colorpitch;
uint32_t blendcntl;
+ Bool isplanar = FALSE;
int dstxoff, dstyoff, pixel_shift, vtx_count;
BoxPtr pBox = REGION_RECTS(&pPriv->clip);
int nBox = REGION_NUM_RECTS(&pPriv->clip);
@@ -181,16 +182,29 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
if (RADEONTilingEnabled(pScrn, pPixmap))
colorpitch |= R300_COLORTILE;
- if (pPriv->id == FOURCC_UYVY)
- txformat1 = R300_TX_FORMAT_YVYU422;
- else
- txformat1 = R300_TX_FORMAT_VYUY422;
+ if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+ isplanar = TRUE;
+ }
- txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
+ if (isplanar) {
+ txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0;
+ txpitch = pPriv->src_pitch;
+ } else {
+ if (pPriv->id == FOURCC_UYVY)
+ txformat1 = R300_TX_FORMAT_YVYU422;
+ else
+ txformat1 = R300_TX_FORMAT_VYUY422;
+
+ txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
+
+ /* pitch is in pixels */
+ txpitch = pPriv->src_pitch / 2;
+ }
+ txpitch -= 1;
txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
- (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
- R300_TXPITCH_EN);
+ (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
info->accel_state->texW[0] = pPriv->w;
info->accel_state->texH[0] = pPriv->h;
@@ -201,9 +215,6 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
R300_TX_MIN_FILTER_LINEAR |
(0 << R300_TX_ID_SHIFT));
- /* pitch is in pixels */
- txpitch = pPriv->src_pitch / 2;
- txpitch -= 1;
if (IS_R500_3D && ((pPriv->w - 1) & 0x800))
txpitch |= R500_TXWIDTH_11;
@@ -224,6 +235,34 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
txenable = R300_TEX_0_ENABLE;
+ if (isplanar) {
+ txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
+ (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
+ txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
+ txpitch -= 1;
+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_MIN_FILTER_LINEAR |
+ R300_TX_MAG_FILTER_LINEAR);
+
+ BEGIN_ACCEL(12);
+ OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT));
+ OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset);
+ OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
+ OUT_ACCEL_REG(R300_TX_FILTER1_2, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset);
+ FINISH_ACCEL();
+ txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE;
+ }
+
if (pPriv->bicubic_enabled) {
/* Size is 128x1 */
txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) |
@@ -691,6 +730,171 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0);
FINISH_ACCEL();
+ } else if (isplanar) {
+ /*
+ * y' = y - .0625
+ * u' = u - .5
+ * v' = v - .5;
+ *
+ * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
+ * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
+ * b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
+ *
+ * DP3 might look like the straightforward solution
+ * but we'd need to move the texture yuv values in
+ * the same reg for this to work. Therefore use MADs.
+ * Without changing the shader at all (only the constants)
+ * could also provide hue/saturation/brightness/contrast control.
+ *
+ * yco = 1.1643
+ * uco = 0, -0.39173, 2.017
+ * vco = 1.5958, -0.8129, 0
+ * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
+ * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
+ * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
+ *
+ * temp = MAD(yco, yuv.yyyy, off)
+ * temp = MAD(uco, yuv.uuuu, temp)
+ * result = MAD(vco, yuv.vvvv, temp)
+ */
+ float yco = 1.1643;
+ float uco[3] = {0.0, -0.39173, 2.017};
+ float vco[3] = {1.5958, -0.8129, 0.0};
+ float off[3] = {-0.0625 * yco + -0.5 * uco[0] + -0.5 * vco[0],
+ -0.0625 * yco + -0.5 * uco[1] + -0.5 * vco[1],
+ -0.0625 * yco + -0.5 * uco[2] + -0.5 * vco[2]};
+
+ BEGIN_ACCEL(33);
+ /* 2 components: same 2 for tex0/1/2 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */
+
+ /* Indirection levels */
+ OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
+ R300_FIRST_TEX));
+
+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
+ R300_ALU_CODE_SIZE(3) |
+ R300_TEX_CODE_OFFSET(0) |
+ R300_TEX_CODE_SIZE(3)));
+
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
+ R300_ALU_SIZE(2) |
+ R300_TEX_START(0) |
+ R300_TEX_SIZE(2) |
+ R300_RGBA_OUT));
+
+ /* tex inst */
+ OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(0) |
+ R300_TEX_ID(0) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+ OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(1) |
+ R300_TEX_ID(1) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+ OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(2) |
+ R300_TEX_ID(2) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+
+ /* ALU inst */
+ /* MAD temp0, const0.a, temp0, const0.rgb */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop, but need to set up alpha source for rgb usage */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
+ R300_ALU_ALPHA_ADDR1(0) |
+ R300_ALU_ALPHA_ADDR2(0) |
+ R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* MAD const1, temp1, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR1(1) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* MAD result, const2, temp2, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
+ R300_ALU_RGB_ADDR1(2) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
+ R300_ALU_RGB_CLAMP));
+ /* write alpha 1 */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
+ R300_ALU_ALPHA_TARGET_A));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
+
+ /* Shader constants. */
+ /* constant 0: off, yco */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco));
+ /* constant 1: uco */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(0.0));
+ /* constant 2: vco */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0));
+
+ FINISH_ACCEL();
+
} else {
BEGIN_ACCEL(11);
/* 2 components: 2 for tex0 */
@@ -760,7 +964,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
R300_ALU_ALPHA_CLAMP));
FINISH_ACCEL();
- }
+ }
} else {
if (pPriv->bicubic_enabled) {
BEGIN_ACCEL(7);
diff --git a/src/radeon_video.h b/src/radeon_video.h
index 7f1891e4..34fb07f0 100644
--- a/src/radeon_video.h
+++ b/src/radeon_video.h
@@ -90,6 +90,11 @@ typedef struct {
void *video_memory;
int video_offset;
+ Bool planar_hw;
+ Bool planar_state;
+ int planeu_offset;
+ int planev_offset;
+
/* bicubic filtering */
void *bicubic_memory;
int bicubic_offset;