summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlex Deucher <alexdeucher@gmail.com>2009-04-13 17:04:31 -0400
committerAlex Deucher <alexdeucher@gmail.com>2009-04-17 10:24:58 -0400
commit12839fc17a2cca4ac14b9757bdaa63ba4679f96f (patch)
tree8914fdb9b745d8653b0bf122653c0fac98d2b8c6 /src
parenta30737b337edb31528174b483c9094941a5d41bb (diff)
Tex vid: split by family
Diffstat (limited to 'src')
-rw-r--r--src/radeon_textured_video.c28
-rw-r--r--src/radeon_textured_videofuncs.c4874
2 files changed, 2804 insertions, 2098 deletions
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index bf8a2763..f64da025 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -598,13 +598,29 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
pPriv->h = height;
#ifdef XF86DRI
- if (IS_R600_3D)
- R600DisplayTexturedVideo(pScrn, pPriv);
- else if (info->directRenderingEnabled)
- RADEONDisplayTexturedVideoCP(pScrn, pPriv);
- else
+ if (info->directRenderingEnabled) {
+ if (IS_R600_3D)
+ R600DisplayTexturedVideo(pScrn, pPriv);
+ else if (IS_R500_3D)
+ R500DisplayTexturedVideoCP(pScrn, pPriv);
+ else if (IS_R300_3D)
+ R300DisplayTexturedVideoCP(pScrn, pPriv);
+ else if (IS_R200_3D)
+ R200DisplayTexturedVideoCP(pScrn, pPriv);
+ else
+ RADEONDisplayTexturedVideoCP(pScrn, pPriv);
+ } else
#endif
- RADEONDisplayTexturedVideoMMIO(pScrn, pPriv);
+ {
+ if (IS_R500_3D)
+ R500DisplayTexturedVideoMMIO(pScrn, pPriv);
+ else if (IS_R300_3D)
+ R300DisplayTexturedVideoMMIO(pScrn, pPriv);
+ else if (IS_R200_3D)
+ R200DisplayTexturedVideoMMIO(pScrn, pPriv);
+ else
+ RADEONDisplayTexturedVideoMMIO(pScrn, pPriv);
+ }
return Success;
}
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 3c4289f0..9361f07f 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -93,10 +93,8 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
RADEONInfoPtr info = RADEONPTR(pScrn);
PixmapPtr pPixmap = pPriv->pPixmap;
uint32_t txformat;
- uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
uint32_t dst_offset, dst_pitch, dst_format;
- uint32_t txenable, colorpitch;
- uint32_t blendcntl;
+ uint32_t colorpitch;
Bool isplanar = FALSE;
int dstxoff, dstyoff, pixel_shift, vtx_count;
BoxPtr pBox = REGION_RECTS(&pPriv->clip);
@@ -132,10 +130,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
#endif
{
BEGIN_ACCEL(2);
- if (IS_R300_3D || IS_R500_3D)
- OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
- else
- OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH);
+ OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH);
/* We must wait for 3d to idle, in case source was just written as a dest. */
OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
RADEON_WAIT_HOST_IDLECLEAN |
@@ -148,2057 +143,1835 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
RADEONInit3DEngine(pScrn);
}
- if (pPriv->bicubic_enabled)
- vtx_count = 6;
- else
- vtx_count = 4;
+ vtx_count = 4;
- if (IS_R300_3D || IS_R500_3D) {
- uint32_t output_fmt;
-
- switch (pPixmap->drawable.bitsPerPixel) {
- case 16:
- if (pPixmap->drawable.depth == 15)
- dst_format = R300_COLORFORMAT_ARGB1555;
- else
- dst_format = R300_COLORFORMAT_RGB565;
- break;
- case 32:
- dst_format = R300_COLORFORMAT_ARGB8888;
- break;
- default:
- return;
+ /* Same for R100/R200 */
+ switch (pPixmap->drawable.bitsPerPixel) {
+ case 16:
+ if (pPixmap->drawable.depth == 15)
+ dst_format = RADEON_COLOR_FORMAT_ARGB1555;
+ else
+ dst_format = RADEON_COLOR_FORMAT_RGB565;
+ break;
+ case 32:
+ dst_format = RADEON_COLOR_FORMAT_ARGB8888;
+ break;
+ default:
+ return;
+ }
+
+ if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+ isplanar = TRUE;
+ }
+
+ if (isplanar) {
+ txformat = RADEON_TXFORMAT_I8;
+ } else {
+ if (pPriv->id == FOURCC_UYVY)
+ txformat = RADEON_TXFORMAT_YVYU422;
+ else
+ txformat = RADEON_TXFORMAT_VYUY422;
+ }
+
+ txformat |= RADEON_TXFORMAT_NON_POWER2;
+
+ colorpitch = dst_pitch >> pixel_shift;
+
+ if (RADEONTilingEnabled(pScrn, pPixmap))
+ colorpitch |= RADEON_COLOR_TILE_ENABLE;
+
+ BEGIN_ACCEL(4);
+
+ OUT_ACCEL_REG(RADEON_RB3D_CNTL,
+ dst_format /*| RADEON_ALPHA_BLEND_ENABLE*/);
+ OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
+
+ OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
+
+ OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL,
+ RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
+
+ FINISH_ACCEL();
+
+
+ info->accel_state->texW[0] = 1;
+ info->accel_state->texH[0] = 1;
+
+ BEGIN_ACCEL(9);
+
+ OUT_ACCEL_REG(RADEON_PP_CNTL,
+ RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
+
+ OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
+ RADEON_SE_VTX_FMT_ST0));
+
+ OUT_ACCEL_REG(RADEON_PP_TXFILTER_0,
+ RADEON_MAG_FILTER_LINEAR |
+ RADEON_MIN_FILTER_LINEAR |
+ RADEON_CLAMP_S_CLAMP_LAST |
+ RADEON_CLAMP_T_CLAMP_LAST |
+ RADEON_YUV_TO_RGB);
+ OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
+ OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, pPriv->src_offset);
+ OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0,
+ RADEON_COLOR_ARG_A_ZERO |
+ RADEON_COLOR_ARG_B_ZERO |
+ RADEON_COLOR_ARG_C_T0_COLOR |
+ RADEON_BLEND_CTL_ADD |
+ RADEON_CLAMP_TX);
+ OUT_ACCEL_REG(RADEON_PP_TXABLEND_0,
+ RADEON_ALPHA_ARG_A_ZERO |
+ RADEON_ALPHA_ARG_B_ZERO |
+ RADEON_ALPHA_ARG_C_T0_ALPHA |
+ RADEON_BLEND_CTL_ADD |
+ RADEON_CLAMP_TX);
+
+ OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
+ (pPriv->w - 1) |
+ ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
+ OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0,
+ pPriv->src_pitch - 32);
+ FINISH_ACCEL();
+
+ if (pPriv->vsync) {
+ xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn,
+ pPriv->drw_x,
+ pPriv->drw_x + pPriv->dst_w,
+ pPriv->drw_y,
+ pPriv->drw_y + pPriv->dst_h);
+ if (crtc) {
+ RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private;
+
+ FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap,
+ radeon_crtc->crtc_id,
+ pPriv->drw_y - crtc->y,
+ (pPriv->drw_y - crtc->y) + pPriv->dst_h);
}
+ }
+ /*
+ * Rendering of the actual polygon is done in two different
+ * ways depending on chip generation:
+ *
+ * < R300:
+ *
+ * These chips can render a rectangle in one pass, so
+ * handling is pretty straight-forward.
+ *
+ * >= R300:
+ *
+ * These chips can accept a quad, but will render it as
+ * two triangles which results in a diagonal tear. Instead
+ * We render a single, large triangle and use the scissor
+ * functionality to restrict it to the desired rectangle.
+ * Due to guardband limits on r3xx/r4xx, we can only use
+ * the single triangle up to 2880 pixels; above that we
+ * render as a quad.
+ */
+
+ while (nBox--) {
+ int srcX, srcY, srcw, srch;
+ int dstX, dstY, dstw, dsth;
+ dstX = pBox->x1 + dstxoff;
+ dstY = pBox->y1 + dstyoff;
+ dstw = pBox->x2 - pBox->x1;
+ dsth = pBox->y2 - pBox->y1;
+
+ srcX = ((pBox->x1 - pPriv->drw_x) *
+ pPriv->src_w) / pPriv->dst_w;
+ srcY = ((pBox->y1 - pPriv->drw_y) *
+ pPriv->src_h) / pPriv->dst_h;
+
+ srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
+ srch = (pPriv->src_h * dsth) / pPriv->dst_h;
+
+#ifdef ACCEL_CP
+ BEGIN_RING(3 * vtx_count + 3);
+ OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
+ 3 * vtx_count + 1));
+ OUT_RING(RADEON_CP_VC_FRMT_XY |
+ RADEON_CP_VC_FRMT_ST0);
+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
+ RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+ RADEON_CP_VC_CNTL_MAOS_ENABLE |
+ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+ (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
+#else /* ACCEL_CP */
+ BEGIN_ACCEL(1 + vtx_count * 3);
+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
+ RADEON_VF_PRIM_WALK_DATA |
+ RADEON_VF_RADEON_MODE |
+ (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
+#endif
+ /*
+ * Just render a rect (using three coords).
+ */
+ VTX_OUT((float)dstX, (float)(dstY + dsth),
+ (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth),
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT((float)(dstX + dstw), (float)dstY,
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
+
+#ifdef ACCEL_CP
+ ADVANCE_RING();
+#else
+ FINISH_ACCEL();
+#endif /* !ACCEL_CP */
+
+ pBox++;
+ }
- output_fmt = (R300_OUT_FMT_C4_8 |
- R300_OUT_FMT_C0_SEL_BLUE |
- R300_OUT_FMT_C1_SEL_GREEN |
- R300_OUT_FMT_C2_SEL_RED |
- R300_OUT_FMT_C3_SEL_ALPHA);
+ BEGIN_ACCEL(1);
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+ FINISH_ACCEL();
- colorpitch = dst_pitch >> pixel_shift;
- colorpitch |= dst_format;
+ DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
+}
- if (RADEONTilingEnabled(pScrn, pPixmap))
- colorpitch |= R300_COLORTILE;
+static void
+FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ PixmapPtr pPixmap = pPriv->pPixmap;
+ uint32_t txformat;
+ uint32_t txfilter, txformat0, txpitch;
+ uint32_t dst_offset, dst_pitch, dst_format;
+ uint32_t colorpitch;
+ Bool isplanar = FALSE;
+ int dstxoff, dstyoff, pixel_shift, vtx_count;
+ BoxPtr pBox = REGION_RECTS(&pPriv->clip);
+ int nBox = REGION_NUM_RECTS(&pPriv->clip);
+ ACCEL_PREAMBLE();
- if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
- isplanar = TRUE;
+ pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
+
+#ifdef USE_EXA
+ if (info->useEXA) {
+ dst_offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
+ dst_pitch = exaGetPixmapPitch(pPixmap);
+ } else
+#endif
+ {
+ dst_offset = (pPixmap->devPrivate.ptr - info->FB) +
+ info->fbLocation + pScrn->fbOffset;
+ dst_pitch = pPixmap->devKind;
}
- if (isplanar) {
- txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0;
- txpitch = pPriv->src_pitch;
- } else {
- if (pPriv->id == FOURCC_UYVY)
- txformat1 = R300_TX_FORMAT_YVYU422;
- else
- txformat1 = R300_TX_FORMAT_VYUY422;
+#ifdef COMPOSITE
+ dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
+ dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
+#else
+ dstxoff = 0;
+ dstyoff = 0;
+#endif
- txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
+#ifdef USE_EXA
+ if (info->useEXA) {
+ RADEON_SWITCH_TO_3D();
+ } else
+#endif
+ {
+ BEGIN_ACCEL(2);
+ OUT_ACCEL_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH);
+ /* We must wait for 3d to idle, in case source was just written as a dest. */
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
+ RADEON_WAIT_HOST_IDLECLEAN |
+ RADEON_WAIT_2D_IDLECLEAN |
+ RADEON_WAIT_3D_IDLECLEAN |
+ RADEON_WAIT_DMA_GUI_IDLE);
+ FINISH_ACCEL();
- /* pitch is in pixels */
- txpitch = pPriv->src_pitch / 2;
+ if (!info->accel_state->XInited3D)
+ RADEONInit3DEngine(pScrn);
}
- txpitch -= 1;
- txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
- (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
- R300_TXPITCH_EN);
+ vtx_count = 4;
- info->accel_state->texW[0] = pPriv->w;
- info->accel_state->texH[0] = pPriv->h;
+ /* Same for R100/R200 */
+ switch (pPixmap->drawable.bitsPerPixel) {
+ case 16:
+ if (pPixmap->drawable.depth == 15)
+ dst_format = RADEON_COLOR_FORMAT_ARGB1555;
+ else
+ dst_format = RADEON_COLOR_FORMAT_RGB565;
+ break;
+ case 32:
+ dst_format = RADEON_COLOR_FORMAT_ARGB8888;
+ break;
+ default:
+ return;
+ }
- txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
- R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
- R300_TX_MAG_FILTER_LINEAR |
- R300_TX_MIN_FILTER_LINEAR |
- (0 << R300_TX_ID_SHIFT));
+ if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+ isplanar = TRUE;
+ }
+ if (isplanar) {
+ txformat = RADEON_TXFORMAT_I8;
+ } else {
+ if (pPriv->id == FOURCC_UYVY)
+ txformat = RADEON_TXFORMAT_YVYU422;
+ else
+ txformat = RADEON_TXFORMAT_VYUY422;
+ }
- if (IS_R500_3D && ((pPriv->w - 1) & 0x800))
- txpitch |= R500_TXWIDTH_11;
+ txformat |= RADEON_TXFORMAT_NON_POWER2;
- if (IS_R500_3D && ((pPriv->h - 1) & 0x800))
- txpitch |= R500_TXHEIGHT_11;
+ colorpitch = dst_pitch >> pixel_shift;
- txoffset = pPriv->src_offset;
+ if (RADEONTilingEnabled(pScrn, pPixmap))
+ colorpitch |= RADEON_COLOR_TILE_ENABLE;
- BEGIN_ACCEL(6);
- OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter);
- OUT_ACCEL_REG(R300_TX_FILTER1_0, 0);
- OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0);
- OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1);
- OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch);
- OUT_ACCEL_REG(R300_TX_OFFSET_0, txoffset);
- FINISH_ACCEL();
+ BEGIN_ACCEL(4);
- txenable = R300_TEX_0_ENABLE;
+ OUT_ACCEL_REG(RADEON_RB3D_CNTL,
+ dst_format /*| RADEON_ALPHA_BLEND_ENABLE*/);
+ OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
- if (isplanar) {
- txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
- (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
- R300_TXPITCH_EN);
- txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
- txpitch -= 1;
- txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
- R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
- R300_TX_MIN_FILTER_LINEAR |
- R300_TX_MAG_FILTER_LINEAR);
-
- BEGIN_ACCEL(12);
- OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT));
- OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
- OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
- OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2);
- OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
- OUT_ACCEL_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset);
- OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
- OUT_ACCEL_REG(R300_TX_FILTER1_2, 0);
- OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0);
- OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3);
- OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch);
- OUT_ACCEL_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset);
- FINISH_ACCEL();
- txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE;
+ OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
+
+ OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL,
+ RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
+
+ FINISH_ACCEL();
+
+ info->accel_state->texW[0] = pPriv->w;
+ info->accel_state->texH[0] = pPriv->h;
+
+ if (isplanar) {
+ /* note: in contrast to r300, use input biasing on uv components */
+ const float Loff = -0.0627;
+ float uvcosf, uvsinf;
+ float yco, yoff;
+ float uco[3], vco[3];
+ float bright, cont, sat;
+ int ref = pPriv->transform_index;
+ float ucscale = 0.25, vcscale = 0.25;
+ Bool needux8 = FALSE, needvx8 = FALSE;
+
+ /* contrast can cause constant overflow, clamp */
+ cont = RTFContrast(pPriv->contrast);
+ if (cont * trans[ref].RefLuma > 2.0)
+ cont = 2.0 / trans[ref].RefLuma;
+ /* brightness is only from -0.5 to 0.5 should be safe */
+ bright = RTFBrightness(pPriv->brightness);
+ /* saturation can also cause overflow, clamp */
+ sat = RTFSaturation(pPriv->saturation);
+ if (sat * trans[ref].RefBCb > 4.0)
+ sat = 4.0 / trans[ref].RefBCb;
+ uvcosf = sat * cos(RTFHue(pPriv->hue));
+ uvsinf = sat * sin(RTFHue(pPriv->hue));
+
+ yco = trans[ref].RefLuma * cont;
+ uco[0] = -trans[ref].RefRCr * uvsinf;
+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
+ uco[2] = trans[ref].RefBCb * uvcosf;
+ vco[0] = trans[ref].RefRCr * uvcosf;
+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
+ vco[2] = trans[ref].RefBCb * uvsinf;
+ yoff = Loff * yco + bright;
+
+ if ((uco[0] > 2.0) || (uco[2] > 2.0)) {
+ needux8 = TRUE;
+ ucscale = 0.125;
+ }
+ if ((vco[0] > 2.0) || (vco[2] > 2.0)) {
+ needvx8 = TRUE;
+ vcscale = 0.125;
}
- if (pPriv->bicubic_enabled) {
- /* Size is 128x1 */
- txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) |
- (0x0 << R300_TXHEIGHT_SHIFT) |
- R300_TXPITCH_EN);
- /* Format is 32-bit floats, 4bpp */
- txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16);
- /* Pitch is 127 (128-1) */
- txpitch = 0x7f;
- /* Tex filter */
- txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) |
- R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) |
- R300_TX_MIN_FILTER_NEAREST |
- R300_TX_MAG_FILTER_NEAREST |
- (1 << R300_TX_ID_SHIFT));
-
- BEGIN_ACCEL(6);
- OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter);
- OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
- OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
- OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1);
- OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
- OUT_ACCEL_REG(R300_TX_OFFSET_1, pPriv->bicubic_src_offset);
- FINISH_ACCEL();
-
- /* Enable tex 1 */
- txenable |= R300_TEX_1_ENABLE;
+ /* need 2 texcoord sets (even though they are identical) due
+ to denormalization! hw apparently can't premultiply
+ same coord set by different texture size */
+ vtx_count = 6;
+
+ txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
+ (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
+ txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
+ txpitch -= 32;
+ txfilter = R200_MAG_FILTER_LINEAR |
+ R200_MIN_FILTER_LINEAR |
+ R200_CLAMP_S_CLAMP_LAST |
+ R200_CLAMP_T_CLAMP_LAST;
+
+ BEGIN_ACCEL(36);
+
+ OUT_ACCEL_REG(RADEON_PP_CNTL,
+ RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE |
+ RADEON_TEX_BLEND_0_ENABLE |
+ RADEON_TEX_BLEND_1_ENABLE |
+ RADEON_TEX_BLEND_2_ENABLE);
+
+ OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
+ OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
+ (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
+ (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
+
+ OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
+ OUT_ACCEL_REG(R200_PP_TXSIZE_0,
+ (pPriv->w - 1) |
+ ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
+ OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
+ OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
+
+ OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
+ OUT_ACCEL_REG(R200_PP_TXSIZE_1, txformat0);
+ OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch);
+ OUT_ACCEL_REG(R200_PP_TXOFFSET_1, pPriv->src_offset + pPriv->planeu_offset);
+
+ OUT_ACCEL_REG(R200_PP_TXFILTER_2, txfilter);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_X_2, 0);
+ OUT_ACCEL_REG(R200_PP_TXSIZE_2, txformat0);
+ OUT_ACCEL_REG(R200_PP_TXPITCH_2, txpitch);
+ OUT_ACCEL_REG(R200_PP_TXOFFSET_2, pPriv->src_offset + pPriv->planev_offset);
+
+ /* similar to r300 code. Note the big problem is that hardware constants
+ * are 8 bits only, representing 0.0-1.0. We can get that up (using bias
+ * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually
+ * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but
+ * the constants not. To get larger range can use output scale, but for
+ * that 2.018 value we need a total scale by 8, which means the constants
+ * really have no accuracy whatsoever (5 fractional bits only).
+ * The only direct way to get high precision "constants" into the fragment
+ * pipe I know of is to use the texcoord interpolator (not color, this one
+ * is 8 bit only too), which seems a bit expensive. We're lucky though it
+ * seems the values we need seem to fit better than worst case (get about
+ * 6 fractional bits for this instead of 5, at least when not correcting for
+ * hue/saturation/contrast/brightness, which is the same as for vco - yco and
+ * yoff get 8 fractional bits). Try to preserve as much accuracy as possible
+ * even with non-default saturation/hue/contrast/brightness adjustments,
+ * it gets a little crazy and ultimately precision might still be lacking.
+ *
+ * A higher precision (8 fractional bits) version might just put uco into
+ * a texcoord, and calculate a new vcoconst in the shader, like so:
+ * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable
+ * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0}
+ * vcocalc = ADD temp, bias/scale(cohelper), vco
+ * would in total use 4 tex units, 4 instructions which seems fairly
+ * balanced for this architecture (instead of 3 + 3 for the solution here)
+ *
+ * temp = MAD(yco, yuv.yyyy, yoff)
+ * temp = MAD(uco, yuv.uuuu, temp)
+ * result = MAD(vco, yuv.vvvv, temp)
+ *
+ * note first mad produces actually scalar, hence we transform
+ * it into a dp2a to get 8 bit precision of yco instead of 7 -
+ * That's assuming hw correctly expands consts to internal precision.
+ * (y * 1 + y * (yco - 1) + yoff)
+ * temp = DP2A / 2 (yco, yuv.yyyy, yoff)
+ * temp = MAD (uco / 4, yuv.uuuu * 2, temp)
+ * result = MAD x2 (vco / 2, yuv.vvvv, temp)
+ *
+ * vco, uco need bias (and hence scale too)
+ *
+ */
+
+ /* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
+ R200_TXC_ARG_A_TFACTOR_COLOR |
+ R200_TXC_ARG_B_R0_COLOR |
+ R200_TXC_ARG_C_TFACTOR_COLOR |
+ (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) |
+ R200_TXC_OP_DOT2_ADD);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
+ (0 << R200_TXC_TFACTOR_SEL_SHIFT) |
+ R200_TXC_SCALE_INV2 |
+ R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0);
+ OUT_ACCEL_REG(R200_PP_TXABLEND_0,
+ R200_TXA_ARG_A_ZERO |
+ R200_TXA_ARG_B_ZERO |
+ R200_TXA_ARG_C_ZERO |
+ R200_TXA_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
+ R200_TXA_OUTPUT_REG_NONE);
+
+ /* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_1,
+ R200_TXC_ARG_A_TFACTOR_COLOR |
+ R200_TXC_BIAS_ARG_A |
+ R200_TXC_SCALE_ARG_A |
+ R200_TXC_ARG_B_R1_COLOR |
+ R200_TXC_BIAS_ARG_B |
+ (needux8 ? R200_TXC_SCALE_ARG_B : 0) |
+ R200_TXC_ARG_C_R0_COLOR |
+ R200_TXC_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_1,
+ (1 << R200_TXC_TFACTOR_SEL_SHIFT) |
+ R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0);
+ OUT_ACCEL_REG(R200_PP_TXABLEND_1,
+ R200_TXA_ARG_A_ZERO |
+ R200_TXA_ARG_B_ZERO |
+ R200_TXA_ARG_C_ZERO |
+ R200_TXA_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXABLEND2_1,
+ R200_TXA_OUTPUT_REG_NONE);
+
+ /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_2,
+ R200_TXC_ARG_A_TFACTOR_COLOR |
+ R200_TXC_BIAS_ARG_A |
+ R200_TXC_SCALE_ARG_A |
+ R200_TXC_ARG_B_R2_COLOR |
+ R200_TXC_BIAS_ARG_B |
+ (needvx8 ? R200_TXC_SCALE_ARG_B : 0) |
+ R200_TXC_ARG_C_R0_COLOR |
+ R200_TXC_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_2,
+ (2 << R200_TXC_TFACTOR_SEL_SHIFT) |
+ R200_TXC_SCALE_2X |
+ R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
+ OUT_ACCEL_REG(R200_PP_TXABLEND_2,
+ R200_TXA_ARG_A_ZERO |
+ R200_TXA_ARG_B_ZERO |
+ R200_TXA_ARG_C_ZERO |
+ R200_TXA_COMP_ARG_C |
+ R200_TXA_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXABLEND2_2,
+ R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
+
+ /* shader constants */
+ OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */
+ yco > 1.0 ? yco - 1.0: yco,
+ yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */
+ 0.0));
+ OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */
+ uco[1] * ucscale + 0.5, /* or [-2, 2] */
+ uco[2] * ucscale + 0.5,
+ 0.0));
+ OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */
+ vco[1] * vcscale + 0.5, /* or [-4, 4] */
+ vco[2] * vcscale + 0.5,
+ 0.0));
+
+ FINISH_ACCEL();
+ } else if (info->ChipFamily == CHIP_FAMILY_RV250) {
+ /* fix up broken packed yuv - shader same as above except
+ yuv components are all in same reg */
+ /* note: in contrast to r300, use input biasing on uv components */
+ const float Loff = -0.0627;
+ float uvcosf, uvsinf;
+ float yco, yoff;
+ float uco[3], vco[3];
+ float bright, cont, sat;
+ int ref = pPriv->transform_index;
+ float ucscale = 0.25, vcscale = 0.25;
+ Bool needux8 = FALSE, needvx8 = FALSE;
+
+ /* contrast can cause constant overflow, clamp */
+ cont = RTFContrast(pPriv->contrast);
+ if (cont * trans[ref].RefLuma > 2.0)
+ cont = 2.0 / trans[ref].RefLuma;
+ /* brightness is only from -0.5 to 0.5 should be safe */
+ bright = RTFBrightness(pPriv->brightness);
+ /* saturation can also cause overflow, clamp */
+ sat = RTFSaturation(pPriv->saturation);
+ if (sat * trans[ref].RefBCb > 4.0)
+ sat = 4.0 / trans[ref].RefBCb;
+ uvcosf = sat * cos(RTFHue(pPriv->hue));
+ uvsinf = sat * sin(RTFHue(pPriv->hue));
+
+ yco = trans[ref].RefLuma * cont;
+ uco[0] = -trans[ref].RefRCr * uvsinf;
+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
+ uco[2] = trans[ref].RefBCb * uvcosf;
+ vco[0] = trans[ref].RefRCr * uvcosf;
+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
+ vco[2] = trans[ref].RefBCb * uvsinf;
+ yoff = Loff * yco + bright;
+
+ if ((uco[0] > 2.0) || (uco[2] > 2.0)) {
+ needux8 = TRUE;
+ ucscale = 0.125;
+ }
+ if ((vco[0] > 2.0) || (vco[2] > 2.0)) {
+ needvx8 = TRUE;
+ vcscale = 0.125;
}
- /* setup the VAP */
- if (info->accel_state->has_tcl) {
- if (pPriv->bicubic_enabled)
- BEGIN_ACCEL(7);
- else
- BEGIN_ACCEL(6);
- } else {
- if (pPriv->bicubic_enabled)
- BEGIN_ACCEL(5);
- else
- BEGIN_ACCEL(4);
+ txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
+ (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
+ txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
+ txpitch -= 32;
+ txfilter = R200_MAG_FILTER_LINEAR |
+ R200_MIN_FILTER_LINEAR |
+ R200_CLAMP_S_CLAMP_LAST |
+ R200_CLAMP_T_CLAMP_LAST;
+
+ BEGIN_ACCEL(24);
+
+ OUT_ACCEL_REG(RADEON_PP_CNTL,
+ RADEON_TEX_0_ENABLE |
+ RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE |
+ RADEON_TEX_BLEND_2_ENABLE);
+
+ OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
+ OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
+ (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
+
+ OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
+ OUT_ACCEL_REG(R200_PP_TXSIZE_0,
+ (pPriv->w - 1) |
+ ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
+ OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
+ OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
+
+ /* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
+ R200_TXC_ARG_A_TFACTOR_COLOR |
+ R200_TXC_ARG_B_R0_COLOR |
+ R200_TXC_ARG_C_TFACTOR_COLOR |
+ (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) |
+ R200_TXC_OP_DOT2_ADD);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
+ (0 << R200_TXC_TFACTOR_SEL_SHIFT) |
+ R200_TXC_SCALE_INV2 |
+ (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) |
+ R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1);
+ OUT_ACCEL_REG(R200_PP_TXABLEND_0,
+ R200_TXA_ARG_A_ZERO |
+ R200_TXA_ARG_B_ZERO |
+ R200_TXA_ARG_C_ZERO |
+ R200_TXA_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
+ R200_TXA_OUTPUT_REG_NONE);
+
+ /* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_1,
+ R200_TXC_ARG_A_TFACTOR_COLOR |
+ R200_TXC_BIAS_ARG_A |
+ R200_TXC_SCALE_ARG_A |
+ R200_TXC_ARG_B_R0_COLOR |
+ R200_TXC_BIAS_ARG_B |
+ (needux8 ? R200_TXC_SCALE_ARG_B : 0) |
+ R200_TXC_ARG_C_R1_COLOR |
+ R200_TXC_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_1,
+ (1 << R200_TXC_TFACTOR_SEL_SHIFT) |
+ (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) |
+ R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1);
+ OUT_ACCEL_REG(R200_PP_TXABLEND_1,
+ R200_TXA_ARG_A_ZERO |
+ R200_TXA_ARG_B_ZERO |
+ R200_TXA_ARG_C_ZERO |
+ R200_TXA_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXABLEND2_1,
+ R200_TXA_OUTPUT_REG_NONE);
+
+ /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_2,
+ R200_TXC_ARG_A_TFACTOR_COLOR |
+ R200_TXC_BIAS_ARG_A |
+ R200_TXC_SCALE_ARG_A |
+ R200_TXC_ARG_B_R0_COLOR |
+ R200_TXC_BIAS_ARG_B |
+ (needvx8 ? R200_TXC_SCALE_ARG_B : 0) |
+ R200_TXC_ARG_C_R1_COLOR |
+ R200_TXC_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_2,
+ (2 << R200_TXC_TFACTOR_SEL_SHIFT) |
+ R200_TXC_SCALE_2X |
+ (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) |
+ R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
+ OUT_ACCEL_REG(R200_PP_TXABLEND_2,
+ R200_TXA_ARG_A_ZERO |
+ R200_TXA_ARG_B_ZERO |
+ R200_TXA_ARG_C_ZERO |
+ R200_TXA_COMP_ARG_C |
+ R200_TXA_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXABLEND2_2,
+ R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
+
+ /* shader constants */
+ OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */
+ yco > 1.0 ? yco - 1.0: yco,
+ yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */
+ 0.0));
+ OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */
+ uco[1] * ucscale + 0.5, /* or [-2, 2] */
+ uco[2] * ucscale + 0.5,
+ 0.0));
+ OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */
+ vco[1] * vcscale + 0.5, /* or [-4, 4] */
+ vco[2] * vcscale + 0.5,
+ 0.0));
+
+ FINISH_ACCEL();
+ } else {
+ BEGIN_ACCEL(13);
+ OUT_ACCEL_REG(RADEON_PP_CNTL,
+ RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
+
+ OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
+ OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
+ (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
+
+ OUT_ACCEL_REG(R200_PP_TXFILTER_0,
+ R200_MAG_FILTER_LINEAR |
+ R200_MIN_FILTER_LINEAR |
+ R200_CLAMP_S_CLAMP_LAST |
+ R200_CLAMP_T_CLAMP_LAST |
+ R200_YUV_TO_RGB);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
+ OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
+ OUT_ACCEL_REG(R200_PP_TXSIZE_0,
+ (pPriv->w - 1) |
+ ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
+ OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
+
+ OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
+
+ OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
+ R200_TXC_ARG_A_ZERO |
+ R200_TXC_ARG_B_ZERO |
+ R200_TXC_ARG_C_R0_COLOR |
+ R200_TXC_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
+ R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
+ OUT_ACCEL_REG(R200_PP_TXABLEND_0,
+ R200_TXA_ARG_A_ZERO |
+ R200_TXA_ARG_B_ZERO |
+ R200_TXA_ARG_C_R0_ALPHA |
+ R200_TXA_OP_MADD);
+ OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
+ R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
+ FINISH_ACCEL();
+ }
+
+ if (pPriv->vsync) {
+ xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn,
+ pPriv->drw_x,
+ pPriv->drw_x + pPriv->dst_w,
+ pPriv->drw_y,
+ pPriv->drw_y + pPriv->dst_h);
+ if (crtc) {
+ RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private;
+
+ FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap,
+ radeon_crtc->crtc_id,
+ pPriv->drw_y - crtc->y,
+ (pPriv->drw_y - crtc->y) + pPriv->dst_h);
}
+ }
+ /*
+ * Rendering of the actual polygon is done in two different
+ * ways depending on chip generation:
+ *
+ * < R300:
+ *
+ * These chips can render a rectangle in one pass, so
+ * handling is pretty straight-forward.
+ *
+ * >= R300:
+ *
+ * These chips can accept a quad, but will render it as
+ * two triangles which results in a diagonal tear. Instead
+ * We render a single, large triangle and use the scissor
+ * functionality to restrict it to the desired rectangle.
+ * Due to guardband limits on r3xx/r4xx, we can only use
+ * the single triangle up to 2880 pixels; above that we
+ * render as a quad.
+ */
- /* These registers define the number, type, and location of data submitted
- * to the PVS unit of GA input (when PVS is disabled)
- * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
- * enabled. This memory provides the imputs to the vertex shader program
- * and ordering is not important. When PVS/TCL is disabled, this field maps
- * directly to the GA input memory and the order is signifigant. In
- * PVS_BYPASS mode the order is as follows:
- * Position
- * Point Size
- * Color 0-3
- * Textures 0-7
- * Fog
- */
- if (pPriv->bicubic_enabled) {
- OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
- ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
- (0 << R300_SKIP_DWORDS_0_SHIFT) |
- (0 << R300_DST_VEC_LOC_0_SHIFT) |
- R300_SIGNED_0 |
- (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
- (0 << R300_SKIP_DWORDS_1_SHIFT) |
- (6 << R300_DST_VEC_LOC_1_SHIFT) |
- R300_SIGNED_1));
- OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
- ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
- (0 << R300_SKIP_DWORDS_2_SHIFT) |
- (7 << R300_DST_VEC_LOC_2_SHIFT) |
- R300_LAST_VEC_2 |
- R300_SIGNED_2));
+ while (nBox--) {
+ int srcX, srcY, srcw, srch;
+ int dstX, dstY, dstw, dsth;
+ dstX = pBox->x1 + dstxoff;
+ dstY = pBox->y1 + dstyoff;
+ dstw = pBox->x2 - pBox->x1;
+ dsth = pBox->y2 - pBox->y1;
+
+ srcX = ((pBox->x1 - pPriv->drw_x) *
+ pPriv->src_w) / pPriv->dst_w;
+ srcY = ((pBox->y1 - pPriv->drw_y) *
+ pPriv->src_h) / pPriv->dst_h;
+
+ srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
+ srch = (pPriv->src_h * dsth) / pPriv->dst_h;
+
+#ifdef ACCEL_CP
+ BEGIN_RING(3 * vtx_count + 2);
+ OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
+ 3 * vtx_count));
+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
+ RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+ (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
+#else /* ACCEL_CP */
+ BEGIN_ACCEL(1 + vtx_count * 3);
+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
+ RADEON_VF_PRIM_WALK_DATA |
+ (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
+
+#endif
+ if (isplanar) {
+ /*
+ * Just render a rect (using three coords).
+ * Filter is a bit a misnomer, it's just texcoords...
+ */
+ VTX_OUT_FILTER((float)dstX, (float)(dstY + dsth),
+ (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
+ (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT_FILTER((float)(dstX + dstw), (float)(dstY + dsth),
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT_FILTER((float)(dstX + dstw), (float)dstY,
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
} else {
- OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
- ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
- (0 << R300_SKIP_DWORDS_0_SHIFT) |
- (0 << R300_DST_VEC_LOC_0_SHIFT) |
- R300_SIGNED_0 |
- (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
- (0 << R300_SKIP_DWORDS_1_SHIFT) |
- (6 << R300_DST_VEC_LOC_1_SHIFT) |
- R300_LAST_VEC_1 |
- R300_SIGNED_1));
+ /*
+ * Just render a rect (using three coords).
+ */
+ VTX_OUT((float)dstX, (float)(dstY + dsth),
+ (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth),
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
+ VTX_OUT((float)(dstX + dstw), (float)dstY,
+ (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
}
- /* load the vertex shader
- * We pre-load vertex programs in RADEONInit3DEngine():
- * - exa mask/Xv bicubic
- * - exa no mask
- * - Xv
- * Here we select the offset of the vertex program we want to use
- */
- if (info->accel_state->has_tcl) {
- if (pPriv->bicubic_enabled) {
- OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
- ((0 << R300_PVS_FIRST_INST_SHIFT) |
- (2 << R300_PVS_XYZW_VALID_INST_SHIFT) |
- (2 << R300_PVS_LAST_INST_SHIFT)));
- OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
- (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
- } else {
- OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
- ((5 << R300_PVS_FIRST_INST_SHIFT) |
- (6 << R300_PVS_XYZW_VALID_INST_SHIFT) |
- (6 << R300_PVS_LAST_INST_SHIFT)));
- OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
- (6 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
- }
+#ifdef ACCEL_CP
+ ADVANCE_RING();
+#else
+ FINISH_ACCEL();
+#endif /* !ACCEL_CP */
+
+ pBox++;
+ }
+
+ BEGIN_ACCEL(1);
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+ FINISH_ACCEL();
+
+ DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
+}
+
+static void
+FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ PixmapPtr pPixmap = pPriv->pPixmap;
+ uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
+ uint32_t dst_offset, dst_pitch, dst_format;
+ uint32_t txenable, colorpitch;
+ uint32_t output_fmt;
+ Bool isplanar = FALSE;
+ int dstxoff, dstyoff, pixel_shift, vtx_count;
+ BoxPtr pBox = REGION_RECTS(&pPriv->clip);
+ int nBox = REGION_NUM_RECTS(&pPriv->clip);
+ ACCEL_PREAMBLE();
+
+ pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
+
+#ifdef USE_EXA
+ if (info->useEXA) {
+ dst_offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
+ dst_pitch = exaGetPixmapPitch(pPixmap);
+ } else
+#endif
+ {
+ dst_offset = (pPixmap->devPrivate.ptr - info->FB) +
+ info->fbLocation + pScrn->fbOffset;
+ dst_pitch = pPixmap->devKind;
}
- /* Position and one set of 2 texture coordinates */
- OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
- if (pPriv->bicubic_enabled)
- OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
- (2 << R300_TEX_1_COMP_CNT_SHIFT)));
+#ifdef COMPOSITE
+ dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
+ dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
+#else
+ dstxoff = 0;
+ dstyoff = 0;
+#endif
+
+#ifdef USE_EXA
+ if (info->useEXA) {
+ RADEON_SWITCH_TO_3D();
+ } else
+#endif
+ {
+ BEGIN_ACCEL(2);
+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+ /* We must wait for 3d to idle, in case source was just written as a dest. */
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
+ RADEON_WAIT_HOST_IDLECLEAN |
+ RADEON_WAIT_2D_IDLECLEAN |
+ RADEON_WAIT_3D_IDLECLEAN |
+ RADEON_WAIT_DMA_GUI_IDLE);
+ FINISH_ACCEL();
+
+ if (!info->accel_state->XInited3D)
+ RADEONInit3DEngine(pScrn);
+ }
+
+ if (pPriv->bicubic_enabled)
+ vtx_count = 6;
+ else
+ vtx_count = 4;
+
+ switch (pPixmap->drawable.bitsPerPixel) {
+ case 16:
+ if (pPixmap->drawable.depth == 15)
+ dst_format = R300_COLORFORMAT_ARGB1555;
else
- OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT));
+ dst_format = R300_COLORFORMAT_RGB565;
+ break;
+ case 32:
+ dst_format = R300_COLORFORMAT_ARGB8888;
+ break;
+ default:
+ return;
+ }
- OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
- FINISH_ACCEL();
+ output_fmt = (R300_OUT_FMT_C4_8 |
+ R300_OUT_FMT_C0_SEL_BLUE |
+ R300_OUT_FMT_C1_SEL_GREEN |
+ R300_OUT_FMT_C2_SEL_RED |
+ R300_OUT_FMT_C3_SEL_ALPHA);
- /* setup pixel shader */
- if (IS_R300_3D) {
- if (pPriv->bicubic_enabled) {
- BEGIN_ACCEL(79);
-
- /* 4 components: 2 for tex0 and 2 for tex1 */
- OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
-
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
-
- /* Pixel stack frame size. */
- OUT_ACCEL_REG(R300_US_PIXSIZE, 5);
-
- /* Indirection levels */
- OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) |
- R300_FIRST_TEX));
-
- /* Set nodes. */
- OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
- R300_ALU_CODE_SIZE(14) |
- R300_TEX_CODE_OFFSET(0) |
- R300_TEX_CODE_SIZE(6)));
-
- /* Nodes are allocated highest first, but executed lowest first */
- OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0);
- OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) |
- R300_ALU_SIZE(0) |
- R300_TEX_START(0) |
- R300_TEX_SIZE(0)));
- OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) |
- R300_ALU_SIZE(9) |
- R300_TEX_START(1) |
- R300_TEX_SIZE(0)));
- OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) |
- R300_ALU_SIZE(2) |
- R300_TEX_START(2) |
- R300_TEX_SIZE(3) |
- R300_RGBA_OUT));
-
- /* ** BICUBIC FP ** */
-
- /* texcoord0 => temp0
- * texcoord1 => temp1 */
-
- // first node
- /* TEX temp2, temp1.rrr0, tex1, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(1) |
- R300_TEX_SRC_ADDR(1) |
- R300_TEX_DST_ADDR(2)));
-
- /* MOV temp1.r, temp1.ggg0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) |
- R300_ALU_RGB_ADDRD(1) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ colorpitch = dst_pitch >> pixel_shift;
+ colorpitch |= dst_format;
+ if (RADEONTilingEnabled(pScrn, pPixmap))
+ colorpitch |= R300_COLORTILE;
- // second node
- /* TEX temp1, temp1, tex1, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(1) |
- R300_TEX_SRC_ADDR(1) |
- R300_TEX_DST_ADDR(1)));
-
- /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
- R300_ALU_RGB_ADDRD(3) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+ isplanar = TRUE;
+ }
+ if (isplanar) {
+ txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0;
+ txpitch = pPriv->src_pitch;
+ } else {
+ if (pPriv->id == FOURCC_UYVY)
+ txformat1 = R300_TX_FORMAT_YVYU422;
+ else
+ txformat1 = R300_TX_FORMAT_VYUY422;
- /* MUL temp2.rg, temp2.rrr0, const0.rgb */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
- R300_ALU_RGB_ADDRD(2) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
- /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
- R300_ALU_RGB_ADDR2(3) |
- R300_ALU_RGB_ADDRD(4) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ /* pitch is in pixels */
+ txpitch = pPriv->src_pitch / 2;
+ }
+ txpitch -= 1;
+
+ txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
+ (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
+
+ info->accel_state->texW[0] = pPriv->w;
+ info->accel_state->texH[0] = pPriv->h;
+
+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_MAG_FILTER_LINEAR |
+ R300_TX_MIN_FILTER_LINEAR |
+ (0 << R300_TX_ID_SHIFT));
+
+ txoffset = pPriv->src_offset;
+
+ BEGIN_ACCEL(6);
+ OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter);
+ OUT_ACCEL_REG(R300_TX_FILTER1_0, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_0, txoffset);
+ FINISH_ACCEL();
- /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
- R300_ALU_RGB_ADDR2(2) |
- R300_ALU_RGB_ADDRD(5) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ txenable = R300_TEX_0_ENABLE;
- /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
- R300_ALU_RGB_ADDR2(3) |
- R300_ALU_RGB_ADDRD(3) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ if (isplanar) {
+ txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
+ (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
+ txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
+ txpitch -= 1;
+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_MIN_FILTER_LINEAR |
+ R300_TX_MAG_FILTER_LINEAR);
+
+ BEGIN_ACCEL(12);
+ OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT));
+ OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset);
+ OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
+ OUT_ACCEL_REG(R300_TX_FILTER1_2, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset);
+ FINISH_ACCEL();
+ txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE;
+ }
- /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) |
- R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
- R300_ALU_RGB_ADDR2(2) |
- R300_ALU_RGB_ADDRD(1) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ if (pPriv->bicubic_enabled) {
+ /* Size is 128x1 */
+ txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) |
+ (0x0 << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
+ /* Format is 32-bit floats, 4bpp */
+ txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16);
+ /* Pitch is 127 (128-1) */
+ txpitch = 0x7f;
+ /* Tex filter */
+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) |
+ R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) |
+ R300_TX_MIN_FILTER_NEAREST |
+ R300_TX_MAG_FILTER_NEAREST |
+ (1 << R300_TX_ID_SHIFT));
- /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR2(1) |
- R300_ALU_RGB_ADDRD(1) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ BEGIN_ACCEL(6);
+ OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter);
+ OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_1, pPriv->bicubic_src_offset);
+ FINISH_ACCEL();
- /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR2(3) |
- R300_ALU_RGB_ADDRD(2) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ /* Enable tex 1 */
+ txenable |= R300_TEX_1_ENABLE;
+ }
- /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR2(5) |
- R300_ALU_RGB_ADDRD(3) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ /* setup the VAP */
+ if (info->accel_state->has_tcl) {
+ if (pPriv->bicubic_enabled)
+ BEGIN_ACCEL(7);
+ else
+ BEGIN_ACCEL(6);
+ } else {
+ if (pPriv->bicubic_enabled)
+ BEGIN_ACCEL(5);
+ else
+ BEGIN_ACCEL(4);
+ }
- /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR2(4) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ /* These registers define the number, type, and location of data submitted
+ * to the PVS unit of GA input (when PVS is disabled)
+ * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
+ * enabled. This memory provides the imputs to the vertex shader program
+ * and ordering is not important. When PVS/TCL is disabled, this field maps
+ * directly to the GA input memory and the order is signifigant. In
+ * PVS_BYPASS mode the order is as follows:
+ * Position
+ * Point Size
+ * Color 0-3
+ * Textures 0-7
+ * Fog
+ */
+ if (pPriv->bicubic_enabled) {
+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+ (0 << R300_SKIP_DWORDS_0_SHIFT) |
+ (0 << R300_DST_VEC_LOC_0_SHIFT) |
+ R300_SIGNED_0 |
+ (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+ (0 << R300_SKIP_DWORDS_1_SHIFT) |
+ (6 << R300_DST_VEC_LOC_1_SHIFT) |
+ R300_SIGNED_1));
+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
+ (0 << R300_SKIP_DWORDS_2_SHIFT) |
+ (7 << R300_DST_VEC_LOC_2_SHIFT) |
+ R300_LAST_VEC_2 |
+ R300_SIGNED_2));
+ } else {
+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+ (0 << R300_SKIP_DWORDS_0_SHIFT) |
+ (0 << R300_DST_VEC_LOC_0_SHIFT) |
+ R300_SIGNED_0 |
+ (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+ (0 << R300_SKIP_DWORDS_1_SHIFT) |
+ (6 << R300_DST_VEC_LOC_1_SHIFT) |
+ R300_LAST_VEC_1 |
+ R300_SIGNED_1));
+ }
+
+ /* load the vertex shader
+ * We pre-load vertex programs in RADEONInit3DEngine():
+ * - exa mask/Xv bicubic
+ * - exa no mask
+ * - Xv
+ * Here we select the offset of the vertex program we want to use
+ */
+ if (info->accel_state->has_tcl) {
+ if (pPriv->bicubic_enabled) {
+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
+ ((0 << R300_PVS_FIRST_INST_SHIFT) |
+ (2 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (2 << R300_PVS_LAST_INST_SHIFT)));
+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
+ (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+ } else {
+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
+ ((5 << R300_PVS_FIRST_INST_SHIFT) |
+ (6 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (6 << R300_PVS_LAST_INST_SHIFT)));
+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
+ (6 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+ }
+ }
+
+ /* Position and one set of 2 texture coordinates */
+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
+ if (pPriv->bicubic_enabled)
+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
+ (2 << R300_TEX_1_COMP_CNT_SHIFT)));
+ else
+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT));
+
+ OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
+ FINISH_ACCEL();
+
+ /* setup pixel shader */
+ if (pPriv->bicubic_enabled) {
+ BEGIN_ACCEL(79);
+
+ /* 4 components: 2 for tex0 and 2 for tex1 */
+ OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
+
+ /* Pixel stack frame size. */
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 5);
+
+ /* Indirection levels */
+ OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) |
+ R300_FIRST_TEX));
+
+ /* Set nodes. */
+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
+ R300_ALU_CODE_SIZE(14) |
+ R300_TEX_CODE_OFFSET(0) |
+ R300_TEX_CODE_SIZE(6)));
+
+ /* Nodes are allocated highest first, but executed lowest first */
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0);
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) |
+ R300_ALU_SIZE(0) |
+ R300_TEX_START(0) |
+ R300_TEX_SIZE(0)));
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) |
+ R300_ALU_SIZE(9) |
+ R300_TEX_START(1) |
+ R300_TEX_SIZE(0)));
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) |
+ R300_ALU_SIZE(2) |
+ R300_TEX_START(2) |
+ R300_TEX_SIZE(3) |
+ R300_RGBA_OUT));
+
+ /* ** BICUBIC FP ** */
+
+ /* texcoord0 => temp0
+ * texcoord1 => temp1 */
+
+ // first node
+ /* TEX temp2, temp1.rrr0, tex1, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(1) |
+ R300_TEX_SRC_ADDR(1) |
+ R300_TEX_DST_ADDR(2)));
+
+ /* MOV temp1.r, temp1.ggg0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) |
+ R300_ALU_RGB_ADDRD(1) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+
+ // second node
+ /* TEX temp1, temp1, tex1, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(1) |
+ R300_TEX_SRC_ADDR(1) |
+ R300_TEX_DST_ADDR(1)));
+
+ /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
+ R300_ALU_RGB_ADDRD(3) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+
+ /* MUL temp2.rg, temp2.rrr0, const0.rgb */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
+ R300_ALU_RGB_ADDRD(2) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR2(3) |
+ R300_ALU_RGB_ADDRD(4) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR2(2) |
+ R300_ALU_RGB_ADDRD(5) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR2(3) |
+ R300_ALU_RGB_ADDRD(3) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) |
+ R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR2(2) |
+ R300_ALU_RGB_ADDRD(1) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR2(1) |
+ R300_ALU_RGB_ADDRD(1) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR2(3) |
+ R300_ALU_RGB_ADDRD(2) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR2(5) |
+ R300_ALU_RGB_ADDRD(3) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR2(4) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) |
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- // third node
- /* TEX temp4, temp1.rg--, tex0, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(0) |
- R300_TEX_SRC_ADDR(1) |
- R300_TEX_DST_ADDR(4)));
-
- /* TEX temp3, temp3.rg--, tex0, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(0) |
- R300_TEX_SRC_ADDR(3) |
- R300_TEX_DST_ADDR(3)));
-
- /* TEX temp5, temp2.rg--, tex0, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(0) |
- R300_TEX_SRC_ADDR(2) |
- R300_TEX_DST_ADDR(5)));
-
- /* TEX temp0, temp0.rg--, tex0, 1D */
- OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) |
- R300_TEX_ID(0) |
- R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(0)));
-
- /* LRP temp3, temp1.bbbb, temp4, temp3 ->
- * - PRESUB temps, temp4 - temp3
- * - MAD temp3, temp1.bbbb, temps, temp3 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) |
- R300_ALU_RGB_ADDR1(4) |
- R300_ALU_RGB_ADDR2(1) |
- R300_ALU_RGB_ADDRD(3) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ // third node
+ /* TEX temp4, temp1.rg--, tex0, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(0) |
+ R300_TEX_SRC_ADDR(1) |
+ R300_TEX_DST_ADDR(4)));
+
+ /* TEX temp3, temp3.rg--, tex0, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(0) |
+ R300_TEX_SRC_ADDR(3) |
+ R300_TEX_DST_ADDR(3)));
+
+ /* TEX temp5, temp2.rg--, tex0, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(0) |
+ R300_TEX_SRC_ADDR(2) |
+ R300_TEX_DST_ADDR(5)));
+
+ /* TEX temp0, temp0.rg--, tex0, 1D */
+ OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(0) |
+ R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(0)));
+
+ /* LRP temp3, temp1.bbbb, temp4, temp3 ->
+ * - PRESUB temps, temp4 - temp3
+ * - MAD temp3, temp1.bbbb, temps, temp3 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) |
+ R300_ALU_RGB_ADDR1(4) |
+ R300_ALU_RGB_ADDR2(1) |
+ R300_ALU_RGB_ADDRD(3) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) |
R300_ALU_ALPHA_ADDR1(4) |
R300_ALU_ALPHA_ADDR2(1) |
R300_ALU_ALPHA_ADDRD(3) |
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A)));
- /* LRP temp0, temp1.bbbb, temp5, temp0 ->
- * - PRESUB temps, temp5 - temp0
- * - MAD temp0, temp1.bbbb, temps, temp0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) |
- R300_ALU_RGB_INSERT_NOP));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR1(5) |
- R300_ALU_RGB_ADDR2(1) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ /* LRP temp0, temp1.bbbb, temp5, temp0 ->
+ * - PRESUB temps, temp5 - temp0
+ * - MAD temp0, temp1.bbbb, temps, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) |
+ R300_ALU_RGB_INSERT_NOP));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(5) |
+ R300_ALU_RGB_ADDR2(1) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) |
R300_ALU_ALPHA_ADDR1(5) |
R300_ALU_ALPHA_ADDR2(1) |
R300_ALU_ALPHA_ADDRD(0) |
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A)));
- /* LRP output, temp2.bbbb, temp3, temp0 ->
- * - PRESUB temps, temp3 - temp0
- * - MAD output, temp2.bbbb, temps, temp0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR1(3) |
- R300_ALU_RGB_ADDR2(2) |
- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ /* LRP output, temp2.bbbb, temp3, temp0 ->
+ * - PRESUB temps, temp3 - temp0
+ * - MAD output, temp2.bbbb, temps, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(3) |
+ R300_ALU_RGB_ADDR2(2) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) |
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) |
R300_ALU_ALPHA_ADDR1(3) |
R300_ALU_ALPHA_ADDR2(2) |
R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A)));
- /* Shader constants. */
- OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w));
- OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0);
- OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0);
- OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0);
-
- OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0);
- OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h));
- OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0);
- OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0);
-
- FINISH_ACCEL();
- } else if (isplanar) {
- /*
- * y' = y - .0625
- * u' = u - .5
- * v' = v - .5;
- *
- * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
- * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
- * b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
- *
- * DP3 might look like the straightforward solution
- * but we'd need to move the texture yuv values in
- * the same reg for this to work. Therefore use MADs.
- * Brightness just adds to the off constant.
- * Contrast is multiplication of luminance.
- * Saturation and hue change the u and v coeffs.
- * Default values (before adjustments - depend on colorspace):
- * yco = 1.1643
- * uco = 0, -0.39173, 2.017
- * vco = 1.5958, -0.8129, 0
- * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
- * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
- * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
- *
- * temp = MAD(yco, yuv.yyyy, off)
- * temp = MAD(uco, yuv.uuuu, temp)
- * result = MAD(vco, yuv.vvvv, temp)
- */
- /* TODO: don't recalc consts always */
- const float Loff = -0.0627;
- const float Coff = -0.502;
- float uvcosf, uvsinf;
- float yco;
- float uco[3], vco[3], off[3];
- float bright, cont, gamma;
- int ref = pPriv->transform_index;
- Bool needgamma = FALSE;
-
- cont = RTFContrast(pPriv->contrast);
- bright = RTFBrightness(pPriv->brightness);
- gamma = (float)pPriv->gamma / 1000.0;
- uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
- uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
- /* overlay video also does pre-gamma contrast/sat adjust, should we? */
-
- yco = trans[ref].RefLuma * cont;
- uco[0] = -trans[ref].RefRCr * uvsinf;
- uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
- uco[2] = trans[ref].RefBCb * uvcosf;
- vco[0] = trans[ref].RefRCr * uvcosf;
- vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
- vco[2] = trans[ref].RefBCb * uvsinf;
- off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
- off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
- off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
-
- if (gamma != 1.0) {
- needgamma = TRUE;
- /* note: gamma correction is out = in ^ gamma;
- gpu can only do LG2/EX2 therefore we transform into
- in ^ gamma = 2 ^ (log2(in) * gamma).
- Lots of scalar ops, unfortunately (better solution?) -
- without gamma that's 3 inst, with gamma it's 10...
- could use different gamma factors per channel,
- if that's of any use. */
- }
-
- BEGIN_ACCEL(needgamma ? 28 + 33 : 33);
- /* 2 components: same 2 for tex0/1/2 */
- OUT_ACCEL_REG(R300_RS_COUNT,
- ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
-
- OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */
-
- /* Indirection levels */
- OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
- R300_FIRST_TEX));
-
- OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
- R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) |
- R300_TEX_CODE_OFFSET(0) |
- R300_TEX_CODE_SIZE(3)));
-
- OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
- R300_ALU_SIZE(needgamma ? 7 + 2 : 2) |
- R300_TEX_START(0) |
- R300_TEX_SIZE(2) |
- R300_RGBA_OUT));
-
- /* tex inst */
- OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(0) |
- R300_TEX_ID(0) |
- R300_TEX_INST(R300_TEX_INST_LD)));
- OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(1) |
- R300_TEX_ID(1) |
- R300_TEX_INST(R300_TEX_INST_LD)));
- OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(2) |
- R300_TEX_ID(2) |
- R300_TEX_INST(R300_TEX_INST_LD)));
-
- /* ALU inst */
- /* MAD temp0, const0.a, temp0, const0.rgb */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
- R300_ALU_RGB_ADDR1(0) |
- R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
- /* alpha nop, but need to set up alpha source for rgb usage */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
- R300_ALU_ALPHA_ADDR1(0) |
- R300_ALU_ALPHA_ADDR2(0) |
- R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* MAD const1, temp1, temp0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
- R300_ALU_RGB_ADDR1(1) |
- R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
- /* alpha nop */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* MAD result, const2, temp2, temp0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
- R300_ALU_RGB_ADDR1(2) |
- R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
- (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
- R300_ALU_RGB_CLAMP));
- /* write alpha 1 */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
- R300_ALU_ALPHA_TARGET_A));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
-
- if (needgamma) {
- /* rgb temp0.r = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha lg2 temp0, temp0.r */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* rgb temp0.g = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha lg2 temp0, temp0.g */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ /* Shader constants. */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0);
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0);
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0);
- /* rgb temp0.b = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha lg2 temp0, temp0.b */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0);
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0);
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0);
- /* MUL const1, temp1, temp0 */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR1(0) |
- R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
- /* alpha nop, but set up const1 */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* rgb out0.r = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) |
- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha ex2 temp0, temp0.r */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* rgb out0.g = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) |
- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha ex2 temp0, temp0.g */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
-
- /* rgb out0.b = op_sop, set up src0 reg */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) |
- R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B)));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9),
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
- /* alpha ex2 temp0, temp0.b */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
- R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
- }
-
- /* Shader constants. */
- /* constant 0: off, yco */
- OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco));
- /* constant 1: uco */
- OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma));
- /* constant 2: vco */
- OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2]));
- OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0));
-
- FINISH_ACCEL();
-
- } else {
- BEGIN_ACCEL(11);
- /* 2 components: 2 for tex0 */
- OUT_ACCEL_REG(R300_RS_COUNT,
- ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
-
- OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */
-
- /* Indirection levels */
- OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
- R300_FIRST_TEX));
-
- OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
- R300_ALU_CODE_SIZE(1) |
- R300_TEX_CODE_OFFSET(0) |
- R300_TEX_CODE_SIZE(1)));
-
- OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
- R300_ALU_SIZE(0) |
- R300_TEX_START(0) |
- R300_TEX_SIZE(0) |
- R300_RGBA_OUT));
-
- /* tex inst */
- OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
- R300_TEX_DST_ADDR(0) |
- R300_TEX_ID(0) |
- R300_TEX_INST(R300_TEX_INST_LD)));
-
- /* ALU inst */
- /* RGB */
- OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR1(0) |
- R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
- R300_ALU_RGB_MASK_G |
- R300_ALU_RGB_MASK_B)) |
- R300_ALU_RGB_TARGET_A));
- OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
- R300_ALU_RGB_CLAMP));
- /* Alpha */
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) |
- R300_ALU_ALPHA_ADDR1(0) |
- R300_ALU_ALPHA_ADDR2(0) |
- R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
- R300_ALU_ALPHA_TARGET_A |
- R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
- OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) |
- R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) |
- R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
- R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
- R300_ALU_ALPHA_CLAMP));
- FINISH_ACCEL();
- }
- } else {
- if (pPriv->bicubic_enabled) {
- BEGIN_ACCEL(7);
-
- /* 4 components: 2 for tex0 and 2 for tex1 */
- OUT_ACCEL_REG(R300_RS_COUNT,
- ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
-
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
-
- /* Pixel stack frame size. */
- OUT_ACCEL_REG(R300_US_PIXSIZE, 5);
-
- /* FP length. */
- OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
- R500_US_CODE_END_ADDR(13)));
- OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
- R500_US_CODE_RANGE_SIZE(13)));
-
- /* Prepare for FP emission. */
- OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
- FINISH_ACCEL();
-
- BEGIN_ACCEL(89);
- /* Pixel shader.
- * I've gone ahead and annotated each instruction, since this
- * thing is MASSIVE. :3
- * Note: In order to avoid buggies with temps and multiple
- * inputs, all temps are offset by 2. temp0 -> register2. */
-
- /* TEX temp2, input1.xxxx, tex1, 1D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
- R500_TEX_INST_LD |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
- R500_TEX_SRC_S_SWIZ_R |
- R500_TEX_SRC_T_SWIZ_R |
- R500_TEX_SRC_R_SWIZ_R |
- R500_TEX_SRC_Q_SWIZ_R |
- R500_TEX_DST_ADDR(2) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* TEX temp5, input1.yyyy, tex1, 1D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
- R500_TEX_INST_LD |
- R500_TEX_SEM_ACQUIRE |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
- R500_TEX_SRC_S_SWIZ_G |
- R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_SRC_R_SWIZ_G |
- R500_TEX_SRC_Q_SWIZ_G |
- R500_TEX_DST_ADDR(5) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* MUL temp4, const0.x0x0, temp2.yyxx */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_ADDR0_CONST |
- R500_RGB_ADDR1(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_ADDR0_CONST |
- R500_ALPHA_ADDR1(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_R |
- R500_ALU_RGB_G_SWIZ_A_0 |
- R500_ALU_RGB_B_SWIZ_A_R |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_G |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC0 |
- R500_ALPHA_SWIZ_A_0 |
- R500_ALPHA_SEL_B_SRC1 |
- R500_ALPHA_SWIZ_B_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_R_SWIZ_0 |
- R500_ALU_RGBA_G_SWIZ_0 |
- R500_ALU_RGBA_B_SWIZ_0 |
- R500_ALU_RGBA_A_SWIZ_0));
-
- /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_ADDR0_CONST |
- R500_RGB_ADDR1(5) |
- R500_RGB_ADDR2(4)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_ADDR0_CONST |
- R500_ALPHA_ADDR1(5) |
- R500_ALPHA_ADDR2(4)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_0 |
- R500_ALU_RGB_G_SWIZ_A_G |
- R500_ALU_RGB_B_SWIZ_A_0 |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_R |
- R500_ALU_RGB_B_SWIZ_B_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC0 |
- R500_ALPHA_SWIZ_A_G |
- R500_ALPHA_SEL_B_SRC1 |
- R500_ALPHA_SWIZ_B_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_A_SWIZ_A));
-
- /* ADD temp3, temp3, input0.xyxy */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) |
- R500_RGB_ADDR2(0)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) |
- R500_ALPHA_ADDR2(0)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 |
- R500_ALU_RGB_G_SWIZ_A_1 |
- R500_ALU_RGB_B_SWIZ_A_1 |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SWIZ_A_1 |
- R500_ALPHA_SEL_B_SRC1 |
- R500_ALPHA_SWIZ_B_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_R |
- R500_ALU_RGBA_A_SWIZ_G));
-
- /* TEX temp1, temp3.zwxy, tex0, 2D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) |
- R500_TEX_SRC_S_SWIZ_B |
- R500_TEX_SRC_T_SWIZ_A |
- R500_TEX_SRC_R_SWIZ_R |
- R500_TEX_SRC_Q_SWIZ_G |
- R500_TEX_DST_ADDR(1) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* TEX temp3, temp3.xyzw, tex0, 2D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_SEM_ACQUIRE |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) |
- R500_TEX_SRC_S_SWIZ_R |
- R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_SRC_R_SWIZ_B |
- R500_TEX_SRC_Q_SWIZ_A |
- R500_TEX_DST_ADDR(3) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_ADDR0_CONST |
- R500_RGB_ADDR1(5) |
- R500_RGB_ADDR2(4)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_ADDR0_CONST |
- R500_ALPHA_ADDR1(5) |
- R500_ALPHA_ADDR2(4)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_0 |
- R500_ALU_RGB_G_SWIZ_A_G |
- R500_ALU_RGB_B_SWIZ_A_0 |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_G |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_G));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC0 |
- R500_ALPHA_SWIZ_A_G |
- R500_ALPHA_SEL_B_SRC1 |
- R500_ALPHA_SWIZ_B_G));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_A_SWIZ_A));
-
- /* ADD temp0, temp4, input0.xyxy */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) |
- R500_RGB_ADDR2(0)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) |
- R500_ALPHA_ADDR2(0)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 |
- R500_ALU_RGB_G_SWIZ_A_1 |
- R500_ALU_RGB_B_SWIZ_A_1 |
- R500_ALU_RGB_SEL_B_SRC1 |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SWIZ_A_1 |
- R500_ALPHA_SEL_B_SRC1 |
- R500_ALPHA_SWIZ_B_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC2 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_R |
- R500_ALU_RGBA_A_SWIZ_G));
-
- /* TEX temp4, temp0.zwzw, tex0, 2D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
- R500_TEX_SRC_S_SWIZ_B |
- R500_TEX_SRC_T_SWIZ_A |
- R500_TEX_SRC_R_SWIZ_B |
- R500_TEX_SRC_Q_SWIZ_A |
- R500_TEX_DST_ADDR(4) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* TEX temp0, temp0.xyzw, tex0, 2D */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_SEM_ACQUIRE |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
- R500_TEX_SRC_S_SWIZ_R |
- R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_SRC_R_SWIZ_B |
- R500_TEX_SRC_Q_SWIZ_A |
- R500_TEX_DST_ADDR(0) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* LRP temp3, temp2.zzzz, temp1, temp3 ->
- * - PRESUB temps, temp1 - temp3
- * - MAD temp2.zzzz, temps, temp3 */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) |
- R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
- R500_RGB_ADDR1(1) |
- R500_RGB_ADDR2(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) |
- R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
- R500_ALPHA_ADDR1(1) |
- R500_ALPHA_ADDR2(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
- R500_ALU_RGB_R_SWIZ_A_B |
- R500_ALU_RGB_G_SWIZ_A_B |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRCP |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC2 |
- R500_ALPHA_SWIZ_A_B |
- R500_ALPHA_SEL_B_SRCP |
- R500_ALPHA_SWIZ_B_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC0 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_A_SWIZ_A));
-
- /* LRP temp0, temp2.zzzz, temp4, temp0 ->
- * - PRESUB temps, temp4 - temp1
- * - MAD temp2.zzzz, temps, temp0 */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
- R500_RGB_ADDR1(4) |
- R500_RGB_ADDR2(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
- R500_ALPHA_ADDR1(4) |
- R500_ALPHA_ADDR2(2)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
- R500_ALU_RGB_R_SWIZ_A_B |
- R500_ALU_RGB_G_SWIZ_A_B |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRCP |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC2 |
- R500_ALPHA_SWIZ_A_B |
- R500_ALPHA_SEL_B_SRCP |
- R500_ALPHA_SWIZ_B_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC0 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_A_SWIZ_A));
-
- /* LRP output, temp5.zzzz, temp3, temp0 ->
- * - PRESUB temps, temp3 - temp0
- * - MAD temp5.zzzz, temps, temp0 */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
- R500_INST_LAST |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK |
- R500_INST_RGB_OMASK_R |
- R500_INST_RGB_OMASK_G |
- R500_INST_RGB_OMASK_B |
- R500_INST_ALPHA_OMASK));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
- R500_RGB_ADDR1(3) |
- R500_RGB_ADDR2(5)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
- R500_ALPHA_ADDR1(3) |
- R500_ALPHA_ADDR2(5)));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
- R500_ALU_RGB_R_SWIZ_A_B |
- R500_ALU_RGB_G_SWIZ_A_B |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRCP |
- R500_ALU_RGB_R_SWIZ_B_R |
- R500_ALU_RGB_G_SWIZ_B_G |
- R500_ALU_RGB_B_SWIZ_B_B));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
- R500_ALPHA_OP_MAD |
- R500_ALPHA_SEL_A_SRC2 |
- R500_ALPHA_SWIZ_A_B |
- R500_ALPHA_SEL_B_SRCP |
- R500_ALPHA_SWIZ_B_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
- R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_SEL_C_SRC0 |
- R500_ALU_RGBA_R_SWIZ_R |
- R500_ALU_RGBA_G_SWIZ_G |
- R500_ALU_RGBA_B_SWIZ_B |
- R500_ALU_RGBA_A_SWIZ_A));
-
- /* Shader constants. */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0));
-
- /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w));
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h));
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0);
- OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0);
-
- FINISH_ACCEL();
-
- } else {
- BEGIN_ACCEL(19);
- /* 2 components: 2 for tex0 */
- OUT_ACCEL_REG(R300_RS_COUNT,
- ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
-
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
-
- /* Pixel stack frame size. */
- OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */
-
- /* FP length. */
- OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
- R500_US_CODE_END_ADDR(1)));
- OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
- R500_US_CODE_RANGE_SIZE(1)));
-
- /* Prepare for FP emission. */
- OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
-
- /* tex inst */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK |
- R500_INST_RGB_CLAMP |
- R500_INST_ALPHA_CLAMP));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_SEM_ACQUIRE |
- R500_TEX_IGNORE_UNCOVERED));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
- R500_TEX_SRC_S_SWIZ_R |
- R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_DST_ADDR(0) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
- R500_DX_S_SWIZ_R |
- R500_DX_T_SWIZ_R |
- R500_DX_R_SWIZ_R |
- R500_DX_Q_SWIZ_R |
- R500_DY_ADDR(0) |
- R500_DY_S_SWIZ_R |
- R500_DY_T_SWIZ_R |
- R500_DY_R_SWIZ_R |
- R500_DY_Q_SWIZ_R));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* ALU inst */
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_LAST |
- R500_INST_RGB_OMASK_R |
- R500_INST_RGB_OMASK_G |
- R500_INST_RGB_OMASK_B |
- R500_INST_ALPHA_OMASK |
- R500_INST_RGB_CLAMP |
- R500_INST_ALPHA_CLAMP));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_ADDR1(0) |
- R500_RGB_ADDR1_CONST |
- R500_RGB_ADDR2(0) |
- R500_RGB_ADDR2_CONST));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_ADDR1(0) |
- R500_ALPHA_ADDR1_CONST |
- R500_ALPHA_ADDR2(0) |
- R500_ALPHA_ADDR2_CONST));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_R |
- R500_ALU_RGB_G_SWIZ_A_G |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRC0 |
- R500_ALU_RGB_R_SWIZ_B_1 |
- R500_ALU_RGB_B_SWIZ_B_1 |
- R500_ALU_RGB_G_SWIZ_B_1));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
- R500_ALPHA_SWIZ_A_A |
- R500_ALPHA_SWIZ_B_1));
- OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_R_SWIZ_0 |
- R500_ALU_RGBA_G_SWIZ_0 |
- R500_ALU_RGBA_B_SWIZ_0 |
- R500_ALU_RGBA_A_SWIZ_0));
- FINISH_ACCEL();
- }
- }
-
- BEGIN_ACCEL(6);
- OUT_ACCEL_REG(R300_TX_INVALTAGS, 0);
- OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
-
- OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset);
- OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch);
-
- blendcntl = RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO;
- /* no need to enable blending */
- OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, blendcntl);
-
- OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count);
FINISH_ACCEL();
-
- } else {
-
- /* Same for R100/R200 */
- switch (pPixmap->drawable.bitsPerPixel) {
- case 16:
- if (pPixmap->drawable.depth == 15)
- dst_format = RADEON_COLOR_FORMAT_ARGB1555;
- else
- dst_format = RADEON_COLOR_FORMAT_RGB565;
- break;
- case 32:
- dst_format = RADEON_COLOR_FORMAT_ARGB8888;
- break;
- default:
- return;
- }
-
- if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
- isplanar = TRUE;
+ } else if (isplanar) {
+ /*
+ * y' = y - .0625
+ * u' = u - .5
+ * v' = v - .5;
+ *
+ * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
+ * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
+ * b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
+ *
+ * DP3 might look like the straightforward solution
+ * but we'd need to move the texture yuv values in
+ * the same reg for this to work. Therefore use MADs.
+ * Brightness just adds to the off constant.
+ * Contrast is multiplication of luminance.
+ * Saturation and hue change the u and v coeffs.
+ * Default values (before adjustments - depend on colorspace):
+ * yco = 1.1643
+ * uco = 0, -0.39173, 2.017
+ * vco = 1.5958, -0.8129, 0
+ * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
+ * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
+ * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
+ *
+ * temp = MAD(yco, yuv.yyyy, off)
+ * temp = MAD(uco, yuv.uuuu, temp)
+ * result = MAD(vco, yuv.vvvv, temp)
+ */
+ /* TODO: don't recalc consts always */
+ const float Loff = -0.0627;
+ const float Coff = -0.502;
+ float uvcosf, uvsinf;
+ float yco;
+ float uco[3], vco[3], off[3];
+ float bright, cont, gamma;
+ int ref = pPriv->transform_index;
+ Bool needgamma = FALSE;
+
+ cont = RTFContrast(pPriv->contrast);
+ bright = RTFBrightness(pPriv->brightness);
+ gamma = (float)pPriv->gamma / 1000.0;
+ uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
+ uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
+ /* overlay video also does pre-gamma contrast/sat adjust, should we? */
+
+ yco = trans[ref].RefLuma * cont;
+ uco[0] = -trans[ref].RefRCr * uvsinf;
+ uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
+ uco[2] = trans[ref].RefBCb * uvcosf;
+ vco[0] = trans[ref].RefRCr * uvcosf;
+ vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
+ vco[2] = trans[ref].RefBCb * uvsinf;
+ off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
+ off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
+ off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
+
+ if (gamma != 1.0) {
+ needgamma = TRUE;
+ /* note: gamma correction is out = in ^ gamma;
+ gpu can only do LG2/EX2 therefore we transform into
+ in ^ gamma = 2 ^ (log2(in) * gamma).
+ Lots of scalar ops, unfortunately (better solution?) -
+ without gamma that's 3 inst, with gamma it's 10...
+ could use different gamma factors per channel,
+ if that's of any use. */
}
- if (isplanar) {
- txformat = RADEON_TXFORMAT_I8;
- } else {
- if (pPriv->id == FOURCC_UYVY)
- txformat = RADEON_TXFORMAT_YVYU422;
- else
- txformat = RADEON_TXFORMAT_VYUY422;
+ BEGIN_ACCEL(needgamma ? 28 + 33 : 33);
+ /* 2 components: same 2 for tex0/1/2 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */
+
+ /* Indirection levels */
+ OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
+ R300_FIRST_TEX));
+
+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
+ R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) |
+ R300_TEX_CODE_OFFSET(0) |
+ R300_TEX_CODE_SIZE(3)));
+
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
+ R300_ALU_SIZE(needgamma ? 7 + 2 : 2) |
+ R300_TEX_START(0) |
+ R300_TEX_SIZE(2) |
+ R300_RGBA_OUT));
+
+ /* tex inst */
+ OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(0) |
+ R300_TEX_ID(0) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+ OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(1) |
+ R300_TEX_ID(1) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+ OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(2) |
+ R300_TEX_ID(2) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+
+ /* ALU inst */
+ /* MAD temp0, const0.a, temp0, const0.rgb */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop, but need to set up alpha source for rgb usage */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) |
+ R300_ALU_ALPHA_ADDR1(0) |
+ R300_ALU_ALPHA_ADDR2(0) |
+ R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* MAD const1, temp1, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR1(1) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* MAD result, const2, temp2, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) |
+ R300_ALU_RGB_ADDR1(2) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) |
+ (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
+ R300_ALU_RGB_CLAMP));
+ /* write alpha 1 */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
+ R300_ALU_ALPHA_TARGET_A));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
+
+ if (needgamma) {
+ /* rgb temp0.r = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.r */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb temp0.g = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.g */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb temp0.b = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha lg2 temp0, temp0.b */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* MUL const1, temp1, temp0 */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+ /* alpha nop, but set up const1 */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.r = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.r */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.g = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.g */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+ /* rgb out0.b = op_sop, set up src0 reg */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) |
+ R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B)));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9),
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE));
+ /* alpha ex2 temp0, temp0.b */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) |
+ R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
}
- txformat |= RADEON_TXFORMAT_NON_POWER2;
-
- colorpitch = dst_pitch >> pixel_shift;
+ /* Shader constants. */
+ /* constant 0: off, yco */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco));
+ /* constant 1: uco */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma));
+ /* constant 2: vco */
+ OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2]));
+ OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0));
- if (RADEONTilingEnabled(pScrn, pPixmap))
- colorpitch |= RADEON_COLOR_TILE_ENABLE;
-
- BEGIN_ACCEL(4);
-
- OUT_ACCEL_REG(RADEON_RB3D_CNTL,
- dst_format /*| RADEON_ALPHA_BLEND_ENABLE*/);
- OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
-
- OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
-
- OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL,
- RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
+ FINISH_ACCEL();
+ } else {
+ BEGIN_ACCEL(11);
+ /* 2 components: 2 for tex0 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */
+
+ /* Indirection levels */
+ OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
+ R300_FIRST_TEX));
+
+ OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
+ R300_ALU_CODE_SIZE(1) |
+ R300_TEX_CODE_OFFSET(0) |
+ R300_TEX_CODE_SIZE(1)));
+
+ OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
+ R300_ALU_SIZE(0) |
+ R300_TEX_START(0) |
+ R300_TEX_SIZE(0) |
+ R300_RGBA_OUT));
+
+ /* tex inst */
+ OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(0) |
+ R300_TEX_ID(0) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+
+ /* ALU inst */
+ /* RGB */
+ OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+ R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
+ R300_ALU_RGB_MASK_G |
+ R300_ALU_RGB_MASK_B)) |
+ R300_ALU_RGB_TARGET_A));
+ OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+ R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+ R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
+ R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+ R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+ R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
+ R300_ALU_RGB_CLAMP));
+ /* Alpha */
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) |
+ R300_ALU_ALPHA_ADDR1(0) |
+ R300_ALU_ALPHA_ADDR2(0) |
+ R300_ALU_ALPHA_ADDRD(0) |
+ R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
+ R300_ALU_ALPHA_TARGET_A |
+ R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) |
+ R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
+ R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) |
+ R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
+ R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
+ R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
+ R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+ R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
+ R300_ALU_ALPHA_CLAMP));
FINISH_ACCEL();
+ }
+ BEGIN_ACCEL(6);
+ OUT_ACCEL_REG(R300_TX_INVALTAGS, 0);
+ OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
- if (IS_R200_3D) {
-
- info->accel_state->texW[0] = pPriv->w;
- info->accel_state->texH[0] = pPriv->h;
-
- if (isplanar) {
- /* note: in contrast to r300, use input biasing on uv components */
- const float Loff = -0.0627;
- float uvcosf, uvsinf;
- float yco, yoff;
- float uco[3], vco[3];
- float bright, cont, sat;
- int ref = pPriv->transform_index;
- float ucscale = 0.25, vcscale = 0.25;
- Bool needux8 = FALSE, needvx8 = FALSE;
-
- /* contrast can cause constant overflow, clamp */
- cont = RTFContrast(pPriv->contrast);
- if (cont * trans[ref].RefLuma > 2.0)
- cont = 2.0 / trans[ref].RefLuma;
- /* brightness is only from -0.5 to 0.5 should be safe */
- bright = RTFBrightness(pPriv->brightness);
- /* saturation can also cause overflow, clamp */
- sat = RTFSaturation(pPriv->saturation);
- if (sat * trans[ref].RefBCb > 4.0)
- sat = 4.0 / trans[ref].RefBCb;
- uvcosf = sat * cos(RTFHue(pPriv->hue));
- uvsinf = sat * sin(RTFHue(pPriv->hue));
-
- yco = trans[ref].RefLuma * cont;
- uco[0] = -trans[ref].RefRCr * uvsinf;
- uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
- uco[2] = trans[ref].RefBCb * uvcosf;
- vco[0] = trans[ref].RefRCr * uvcosf;
- vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
- vco[2] = trans[ref].RefBCb * uvsinf;
- yoff = Loff * yco + bright;
-
- if ((uco[0] > 2.0) || (uco[2] > 2.0)) {
- needux8 = TRUE;
- ucscale = 0.125;
- }
- if ((vco[0] > 2.0) || (vco[2] > 2.0)) {
- needvx8 = TRUE;
- vcscale = 0.125;
- }
-
- /* need 2 texcoord sets (even though they are identical) due
- to denormalization! hw apparently can't premultiply
- same coord set by different texture size */
- vtx_count = 6;
-
- txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
- (((((pPriv->h + 1 ) >> 1) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
- txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
- txpitch -= 32;
- txfilter = R200_MAG_FILTER_LINEAR |
- R200_MIN_FILTER_LINEAR |
- R200_CLAMP_S_CLAMP_LAST |
- R200_CLAMP_T_CLAMP_LAST;
-
- BEGIN_ACCEL(36);
-
- OUT_ACCEL_REG(RADEON_PP_CNTL,
- RADEON_TEX_0_ENABLE | RADEON_TEX_1_ENABLE | RADEON_TEX_2_ENABLE |
- RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE |
- RADEON_TEX_BLEND_2_ENABLE);
-
- OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
- OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
- (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
- (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
-
- OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
- OUT_ACCEL_REG(R200_PP_TXSIZE_0,
- (pPriv->w - 1) |
- ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
- OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
- OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
-
- OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
- OUT_ACCEL_REG(R200_PP_TXSIZE_1, txformat0);
- OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch);
- OUT_ACCEL_REG(R200_PP_TXOFFSET_1, pPriv->src_offset + pPriv->planeu_offset);
-
- OUT_ACCEL_REG(R200_PP_TXFILTER_2, txfilter);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_X_2, 0);
- OUT_ACCEL_REG(R200_PP_TXSIZE_2, txformat0);
- OUT_ACCEL_REG(R200_PP_TXPITCH_2, txpitch);
- OUT_ACCEL_REG(R200_PP_TXOFFSET_2, pPriv->src_offset + pPriv->planev_offset);
-
- /* similar to r300 code. Note the big problem is that hardware constants
- * are 8 bits only, representing 0.0-1.0. We can get that up (using bias
- * + scale) to -1.0-1.0 (but precision will suffer). AFAIK the hw actually
- * has 12 bits fractional precision (plus 1 sign bit, 3 range bits) but
- * the constants not. To get larger range can use output scale, but for
- * that 2.018 value we need a total scale by 8, which means the constants
- * really have no accuracy whatsoever (5 fractional bits only).
- * The only direct way to get high precision "constants" into the fragment
- * pipe I know of is to use the texcoord interpolator (not color, this one
- * is 8 bit only too), which seems a bit expensive. We're lucky though it
- * seems the values we need seem to fit better than worst case (get about
- * 6 fractional bits for this instead of 5, at least when not correcting for
- * hue/saturation/contrast/brightness, which is the same as for vco - yco and
- * yoff get 8 fractional bits). Try to preserve as much accuracy as possible
- * even with non-default saturation/hue/contrast/brightness adjustments,
- * it gets a little crazy and ultimately precision might still be lacking.
- *
- * A higher precision (8 fractional bits) version might just put uco into
- * a texcoord, and calculate a new vcoconst in the shader, like so:
- * cohelper = {1.0, 0.0, 0.0} - shouldn't use 0.5 since not exactly representable
- * vco = {1.5958 - 1.0, -0.8129 + 1.0, 1.0}
- * vcocalc = ADD temp, bias/scale(cohelper), vco
- * would in total use 4 tex units, 4 instructions which seems fairly
- * balanced for this architecture (instead of 3 + 3 for the solution here)
- *
- * temp = MAD(yco, yuv.yyyy, yoff)
- * temp = MAD(uco, yuv.uuuu, temp)
- * result = MAD(vco, yuv.vvvv, temp)
- *
- * note first mad produces actually scalar, hence we transform
- * it into a dp2a to get 8 bit precision of yco instead of 7 -
- * That's assuming hw correctly expands consts to internal precision.
- * (y * 1 + y * (yco - 1) + yoff)
- * temp = DP2A / 2 (yco, yuv.yyyy, yoff)
- * temp = MAD (uco / 4, yuv.uuuu * 2, temp)
- * result = MAD x2 (vco / 2, yuv.vvvv, temp)
- *
- * vco, uco need bias (and hence scale too)
- *
- */
+ OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset);
+ OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch);
- /* MAD temp0 / 2, const0.a * 2, temp0, -const0.rgb */
- OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
- R200_TXC_ARG_A_TFACTOR_COLOR |
- R200_TXC_ARG_B_R0_COLOR |
- R200_TXC_ARG_C_TFACTOR_COLOR |
- (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) |
- R200_TXC_OP_DOT2_ADD);
- OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
- (0 << R200_TXC_TFACTOR_SEL_SHIFT) |
- R200_TXC_SCALE_INV2 |
- R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0);
- OUT_ACCEL_REG(R200_PP_TXABLEND_0,
- R200_TXA_ARG_A_ZERO |
- R200_TXA_ARG_B_ZERO |
- R200_TXA_ARG_C_ZERO |
- R200_TXA_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
- R200_TXA_OUTPUT_REG_NONE);
-
- /* MAD temp0, (const1 - 0.5) * 2, (temp1 - 0.5) * 2, temp0 */
- OUT_ACCEL_REG(R200_PP_TXCBLEND_1,
- R200_TXC_ARG_A_TFACTOR_COLOR |
- R200_TXC_BIAS_ARG_A |
- R200_TXC_SCALE_ARG_A |
- R200_TXC_ARG_B_R1_COLOR |
- R200_TXC_BIAS_ARG_B |
- (needux8 ? R200_TXC_SCALE_ARG_B : 0) |
- R200_TXC_ARG_C_R0_COLOR |
- R200_TXC_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXCBLEND2_1,
- (1 << R200_TXC_TFACTOR_SEL_SHIFT) |
- R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R0);
- OUT_ACCEL_REG(R200_PP_TXABLEND_1,
- R200_TXA_ARG_A_ZERO |
- R200_TXA_ARG_B_ZERO |
- R200_TXA_ARG_C_ZERO |
- R200_TXA_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXABLEND2_1,
- R200_TXA_OUTPUT_REG_NONE);
-
- /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp2 - 0.5), temp0 */
- OUT_ACCEL_REG(R200_PP_TXCBLEND_2,
- R200_TXC_ARG_A_TFACTOR_COLOR |
- R200_TXC_BIAS_ARG_A |
- R200_TXC_SCALE_ARG_A |
- R200_TXC_ARG_B_R2_COLOR |
- R200_TXC_BIAS_ARG_B |
- (needvx8 ? R200_TXC_SCALE_ARG_B : 0) |
- R200_TXC_ARG_C_R0_COLOR |
- R200_TXC_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXCBLEND2_2,
- (2 << R200_TXC_TFACTOR_SEL_SHIFT) |
- R200_TXC_SCALE_2X |
- R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
- OUT_ACCEL_REG(R200_PP_TXABLEND_2,
- R200_TXA_ARG_A_ZERO |
- R200_TXA_ARG_B_ZERO |
- R200_TXA_ARG_C_ZERO |
- R200_TXA_COMP_ARG_C |
- R200_TXA_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXABLEND2_2,
- R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
-
- /* shader constants */
- OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */
- yco > 1.0 ? yco - 1.0: yco,
- yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */
- 0.0));
- OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */
- uco[1] * ucscale + 0.5, /* or [-2, 2] */
- uco[2] * ucscale + 0.5,
- 0.0));
- OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */
- vco[1] * vcscale + 0.5, /* or [-4, 4] */
- vco[2] * vcscale + 0.5,
- 0.0));
-
- FINISH_ACCEL();
- }
- else if (info->ChipFamily == CHIP_FAMILY_RV250) {
- /* fix up broken packed yuv - shader same as above except
- yuv components are all in same reg */
- /* note: in contrast to r300, use input biasing on uv components */
- const float Loff = -0.0627;
- float uvcosf, uvsinf;
- float yco, yoff;
- float uco[3], vco[3];
- float bright, cont, sat;
- int ref = pPriv->transform_index;
- float ucscale = 0.25, vcscale = 0.25;
- Bool needux8 = FALSE, needvx8 = FALSE;
-
- /* contrast can cause constant overflow, clamp */
- cont = RTFContrast(pPriv->contrast);
- if (cont * trans[ref].RefLuma > 2.0)
- cont = 2.0 / trans[ref].RefLuma;
- /* brightness is only from -0.5 to 0.5 should be safe */
- bright = RTFBrightness(pPriv->brightness);
- /* saturation can also cause overflow, clamp */
- sat = RTFSaturation(pPriv->saturation);
- if (sat * trans[ref].RefBCb > 4.0)
- sat = 4.0 / trans[ref].RefBCb;
- uvcosf = sat * cos(RTFHue(pPriv->hue));
- uvsinf = sat * sin(RTFHue(pPriv->hue));
-
- yco = trans[ref].RefLuma * cont;
- uco[0] = -trans[ref].RefRCr * uvsinf;
- uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
- uco[2] = trans[ref].RefBCb * uvcosf;
- vco[0] = trans[ref].RefRCr * uvcosf;
- vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
- vco[2] = trans[ref].RefBCb * uvsinf;
- yoff = Loff * yco + bright;
-
- if ((uco[0] > 2.0) || (uco[2] > 2.0)) {
- needux8 = TRUE;
- ucscale = 0.125;
- }
- if ((vco[0] > 2.0) || (vco[2] > 2.0)) {
- needvx8 = TRUE;
- vcscale = 0.125;
- }
-
- txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) |
- (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT));
- txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
- txpitch -= 32;
- txfilter = R200_MAG_FILTER_LINEAR |
- R200_MIN_FILTER_LINEAR |
- R200_CLAMP_S_CLAMP_LAST |
- R200_CLAMP_T_CLAMP_LAST;
-
- BEGIN_ACCEL(24);
-
- OUT_ACCEL_REG(RADEON_PP_CNTL,
- RADEON_TEX_0_ENABLE |
- RADEON_TEX_BLEND_0_ENABLE | RADEON_TEX_BLEND_1_ENABLE |
- RADEON_TEX_BLEND_2_ENABLE);
-
- OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
- OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
- (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
-
- OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
- OUT_ACCEL_REG(R200_PP_TXSIZE_0,
- (pPriv->w - 1) |
- ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
- OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
- OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
-
- /* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */
- OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
- R200_TXC_ARG_A_TFACTOR_COLOR |
- R200_TXC_ARG_B_R0_COLOR |
- R200_TXC_ARG_C_TFACTOR_COLOR |
- (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) |
- R200_TXC_OP_DOT2_ADD);
- OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
- (0 << R200_TXC_TFACTOR_SEL_SHIFT) |
- R200_TXC_SCALE_INV2 |
- (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_B_SHIFT) |
- R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1);
- OUT_ACCEL_REG(R200_PP_TXABLEND_0,
- R200_TXA_ARG_A_ZERO |
- R200_TXA_ARG_B_ZERO |
- R200_TXA_ARG_C_ZERO |
- R200_TXA_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
- R200_TXA_OUTPUT_REG_NONE);
-
- /* MAD temp1, (const1 - 0.5) * 2, (temp0.rrr - 0.5) * 2, temp1 */
- OUT_ACCEL_REG(R200_PP_TXCBLEND_1,
- R200_TXC_ARG_A_TFACTOR_COLOR |
- R200_TXC_BIAS_ARG_A |
- R200_TXC_SCALE_ARG_A |
- R200_TXC_ARG_B_R0_COLOR |
- R200_TXC_BIAS_ARG_B |
- (needux8 ? R200_TXC_SCALE_ARG_B : 0) |
- R200_TXC_ARG_C_R1_COLOR |
- R200_TXC_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXCBLEND2_1,
- (1 << R200_TXC_TFACTOR_SEL_SHIFT) |
- (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_B_SHIFT) |
- R200_TXC_CLAMP_8_8 | R200_TXC_OUTPUT_REG_R1);
- OUT_ACCEL_REG(R200_PP_TXABLEND_1,
- R200_TXA_ARG_A_ZERO |
- R200_TXA_ARG_B_ZERO |
- R200_TXA_ARG_C_ZERO |
- R200_TXA_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXABLEND2_1,
- R200_TXA_OUTPUT_REG_NONE);
-
- /* MAD temp0 x 2, (const2 - 0.5) * 2, (temp0.bbb - 0.5), temp1 */
- OUT_ACCEL_REG(R200_PP_TXCBLEND_2,
- R200_TXC_ARG_A_TFACTOR_COLOR |
- R200_TXC_BIAS_ARG_A |
- R200_TXC_SCALE_ARG_A |
- R200_TXC_ARG_B_R0_COLOR |
- R200_TXC_BIAS_ARG_B |
- (needvx8 ? R200_TXC_SCALE_ARG_B : 0) |
- R200_TXC_ARG_C_R1_COLOR |
- R200_TXC_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXCBLEND2_2,
- (2 << R200_TXC_TFACTOR_SEL_SHIFT) |
- R200_TXC_SCALE_2X |
- (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_B_SHIFT) |
- R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
- OUT_ACCEL_REG(R200_PP_TXABLEND_2,
- R200_TXA_ARG_A_ZERO |
- R200_TXA_ARG_B_ZERO |
- R200_TXA_ARG_C_ZERO |
- R200_TXA_COMP_ARG_C |
- R200_TXA_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXABLEND2_2,
- R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
-
- /* shader constants */
- OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */
- yco > 1.0 ? yco - 1.0: yco,
- yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */
- 0.0));
- OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */
- uco[1] * ucscale + 0.5, /* or [-2, 2] */
- uco[2] * ucscale + 0.5,
- 0.0));
- OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */
- vco[1] * vcscale + 0.5, /* or [-4, 4] */
- vco[2] * vcscale + 0.5,
- 0.0));
-
- FINISH_ACCEL();
- }
- else {
- BEGIN_ACCEL(13);
- OUT_ACCEL_REG(RADEON_PP_CNTL,
- RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
-
- OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
- OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
- (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
-
- OUT_ACCEL_REG(R200_PP_TXFILTER_0,
- R200_MAG_FILTER_LINEAR |
- R200_MIN_FILTER_LINEAR |
- R200_CLAMP_S_CLAMP_LAST |
- R200_CLAMP_T_CLAMP_LAST |
- R200_YUV_TO_RGB);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
- OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
- OUT_ACCEL_REG(R200_PP_TXSIZE_0,
- (pPriv->w - 1) |
- ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
- OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
-
- OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
-
- OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
- R200_TXC_ARG_A_ZERO |
- R200_TXC_ARG_B_ZERO |
- R200_TXC_ARG_C_R0_COLOR |
- R200_TXC_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
- R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
- OUT_ACCEL_REG(R200_PP_TXABLEND_0,
- R200_TXA_ARG_A_ZERO |
- R200_TXA_ARG_B_ZERO |
- R200_TXA_ARG_C_R0_ALPHA |
- R200_TXA_OP_MADD);
- OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
- R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
- FINISH_ACCEL();
- }
- } else {
+ /* no need to enable blending */
+ OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
- info->accel_state->texW[0] = 1;
- info->accel_state->texH[0] = 1;
-
- BEGIN_ACCEL(9);
-
- OUT_ACCEL_REG(RADEON_PP_CNTL,
- RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
-
- OUT_ACCEL_REG(RADEON_SE_VTX_FMT, (RADEON_SE_VTX_FMT_XY |
- RADEON_SE_VTX_FMT_ST0));
-
- OUT_ACCEL_REG(RADEON_PP_TXFILTER_0,
- RADEON_MAG_FILTER_LINEAR |
- RADEON_MIN_FILTER_LINEAR |
- RADEON_CLAMP_S_CLAMP_LAST |
- RADEON_CLAMP_T_CLAMP_LAST |
- RADEON_YUV_TO_RGB);
- OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
- OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, pPriv->src_offset);
- OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0,
- RADEON_COLOR_ARG_A_ZERO |
- RADEON_COLOR_ARG_B_ZERO |
- RADEON_COLOR_ARG_C_T0_COLOR |
- RADEON_BLEND_CTL_ADD |
- RADEON_CLAMP_TX);
- OUT_ACCEL_REG(RADEON_PP_TXABLEND_0,
- RADEON_ALPHA_ARG_A_ZERO |
- RADEON_ALPHA_ARG_B_ZERO |
- RADEON_ALPHA_ARG_C_T0_ALPHA |
- RADEON_BLEND_CTL_ADD |
- RADEON_CLAMP_TX);
-
- OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
- (pPriv->w - 1) |
- ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
- OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0,
- pPriv->src_pitch - 32);
- FINISH_ACCEL();
- }
- }
+ OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count);
+ FINISH_ACCEL();
if (pPriv->vsync) {
xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn,
@@ -2257,92 +2030,49 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
ErrorF("src: %d, %d, %d, %d\n", srcX, srcY, srcw, srch);
#endif
- if (IS_R300_3D || IS_R500_3D) {
- if (IS_R300_3D && ((dstw+dsth) > 2880))
- use_quad = TRUE;
- /*
- * Set up the scissor area to that of the output size.
- */
- BEGIN_ACCEL(2);
- if (IS_R300_3D) {
- /* R300 has an offset */
- OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX + 1088) << R300_SCISSOR_X_SHIFT) |
- ((dstY + 1088) << R300_SCISSOR_Y_SHIFT)));
- OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw + 1088 - 1) << R300_SCISSOR_X_SHIFT) |
- ((dstY + dsth + 1088 - 1) << R300_SCISSOR_Y_SHIFT)));
- } else {
- OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX) << R300_SCISSOR_X_SHIFT) |
- ((dstY) << R300_SCISSOR_Y_SHIFT)));
- OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw - 1) << R300_SCISSOR_X_SHIFT) |
- ((dstY + dsth - 1) << R300_SCISSOR_Y_SHIFT)));
- }
- FINISH_ACCEL();
- }
+ if (IS_R300_3D && ((dstw+dsth) > 2880))
+ use_quad = TRUE;
+ /*
+ * Set up the scissor area to that of the output size.
+ */
+ BEGIN_ACCEL(2);
+ /* R300 has an offset */
+ OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX + 1088) << R300_SCISSOR_X_SHIFT) |
+ ((dstY + 1088) << R300_SCISSOR_Y_SHIFT)));
+ OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw + 1088 - 1) << R300_SCISSOR_X_SHIFT) |
+ ((dstY + dsth + 1088 - 1) << R300_SCISSOR_Y_SHIFT)));
+ FINISH_ACCEL();
#ifdef ACCEL_CP
- if (info->ChipFamily < CHIP_FAMILY_R200) {
- BEGIN_RING(3 * vtx_count + 3);
- OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
- 3 * vtx_count + 1));
- OUT_RING(RADEON_CP_VC_FRMT_XY |
- RADEON_CP_VC_FRMT_ST0);
- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
+ if (use_quad) {
+ BEGIN_RING(4 * vtx_count + 4);
+ OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
+ 4 * vtx_count));
+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
RADEON_CP_VC_CNTL_PRIM_WALK_RING |
- RADEON_CP_VC_CNTL_MAOS_ENABLE |
- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
- (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
- } else if (IS_R300_3D || IS_R500_3D) {
- if (use_quad) {
- BEGIN_RING(4 * vtx_count + 4);
- OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
- 4 * vtx_count));
- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
- RADEON_CP_VC_CNTL_PRIM_WALK_RING |
- (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
- } else {
- BEGIN_RING(3 * vtx_count + 4);
- OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
- 3 * vtx_count));
- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST |
- RADEON_CP_VC_CNTL_PRIM_WALK_RING |
- (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
- }
+ (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
} else {
- BEGIN_RING(3 * vtx_count + 2);
+ BEGIN_RING(3 * vtx_count + 4);
OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
3 * vtx_count));
- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST |
RADEON_CP_VC_CNTL_PRIM_WALK_RING |
(3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
}
#else /* ACCEL_CP */
- if (IS_R300_3D || IS_R500_3D) {
- if (use_quad)
- BEGIN_ACCEL(2 + vtx_count * 4);
- else
- BEGIN_ACCEL(2 + vtx_count * 3);
- } else
- BEGIN_ACCEL(1 + vtx_count * 3);
-
- if (info->ChipFamily < CHIP_FAMILY_R200)
- OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
+ if (use_quad)
+ BEGIN_ACCEL(2 + vtx_count * 4);
+ else
+ BEGIN_ACCEL(2 + vtx_count * 3);
+
+ if (use_quad)
+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
RADEON_VF_PRIM_WALK_DATA |
- RADEON_VF_RADEON_MODE |
- (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
- else if (IS_R300_3D || IS_R500_3D) {
- if (use_quad)
- OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
- RADEON_VF_PRIM_WALK_DATA |
- (4 << RADEON_VF_NUM_VERTICES_SHIFT)));
- else
- OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST |
- RADEON_VF_PRIM_WALK_DATA |
- (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
- } else
- OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
+ (4 << RADEON_VF_NUM_VERTICES_SHIFT)));
+ else
+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST |
RADEON_VF_PRIM_WALK_DATA |
(3 << RADEON_VF_NUM_VERTICES_SHIFT)));
-
#endif
if (pPriv->bicubic_enabled) {
/*
@@ -2376,61 +2106,33 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
(float)srcY + 0.5);
}
} else {
- if (IS_R300_3D || IS_R500_3D) {
- if (use_quad) {
- VTX_OUT((float)dstX, (float)dstY,
- (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
- VTX_OUT((float)dstX, (float)(dstY + dsth),
- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
- VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth),
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
- VTX_OUT((float)(dstX + dstw), (float)dstY,
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
- } else {
- /*
- * Render a big, scissored triangle. This means
- * increasing the triangle size and adjusting
- * texture coordinates.
- */
- VTX_OUT((float)dstX, (float)dstY,
- (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
- VTX_OUT((float)dstX, (float)(dstY + dsth + dstw),
- (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0]);
-
- VTX_OUT((float)(dstX + dstw + dsth), (float)dstY,
- ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
- (float)srcY / info->accel_state->texH[0]);
- }
- } else if (isplanar) {
- /*
- * Just render a rect (using three coords).
- * Filter is a bit a misnomer, it's just texcoords...
- */
- VTX_OUT_FILTER((float)dstX, (float)(dstY + dsth),
- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
- (float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
- VTX_OUT_FILTER((float)(dstX + dstw), (float)(dstY + dsth),
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0],
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
- VTX_OUT_FILTER((float)(dstX + dstw), (float)dstY,
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
- (float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
- } else {
- /*
- * Just render a rect (using three coords).
- */
+ if (use_quad) {
+ VTX_OUT((float)dstX, (float)dstY,
+ (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
VTX_OUT((float)dstX, (float)(dstY + dsth),
(float)srcX / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
VTX_OUT((float)(dstX + dstw), (float)(dstY + dsth),
(float)(srcX + srcw) / info->accel_state->texW[0], (float)(srcY + srch) / info->accel_state->texH[0]);
VTX_OUT((float)(dstX + dstw), (float)dstY,
(float)(srcX + srcw) / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
+ } else {
+ /*
+ * Render a big, scissored triangle. This means
+ * increasing the triangle size and adjusting
+ * texture coordinates.
+ */
+ VTX_OUT((float)dstX, (float)dstY,
+ (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
+ VTX_OUT((float)dstX, (float)(dstY + dsth + dstw),
+ (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0]);
+ VTX_OUT((float)(dstX + dstw + dsth), (float)dstY,
+ ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
+ (float)srcY / info->accel_state->texH[0]);
}
}
- if (IS_R300_3D || IS_R500_3D)
- /* flushing is pipelined, free/finish is not */
- OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+ /* flushing is pipelined, free/finish is not */
+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
#ifdef ACCEL_CP
ADVANCE_RING();
@@ -2441,12 +2143,1000 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
pBox++;
}
- if (IS_R300_3D || IS_R500_3D) {
- BEGIN_ACCEL(3);
- OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
- OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
+ BEGIN_ACCEL(3);
+ OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+ FINISH_ACCEL();
+
+ DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
+}
+
+static void
+FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ PixmapPtr pPixmap = pPriv->pPixmap;
+ uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
+ uint32_t dst_offset, dst_pitch, dst_format;
+ uint32_t txenable, colorpitch;
+ uint32_t output_fmt;
+ Bool isplanar = FALSE;
+ int dstxoff, dstyoff, pixel_shift, vtx_count;
+ BoxPtr pBox = REGION_RECTS(&pPriv->clip);
+ int nBox = REGION_NUM_RECTS(&pPriv->clip);
+ ACCEL_PREAMBLE();
+
+ pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
+
+#ifdef USE_EXA
+ if (info->useEXA) {
+ dst_offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
+ dst_pitch = exaGetPixmapPitch(pPixmap);
} else
- BEGIN_ACCEL(1);
+#endif
+ {
+ dst_offset = (pPixmap->devPrivate.ptr - info->FB) +
+ info->fbLocation + pScrn->fbOffset;
+ dst_pitch = pPixmap->devKind;
+ }
+
+#ifdef COMPOSITE
+ dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
+ dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
+#else
+ dstxoff = 0;
+ dstyoff = 0;
+#endif
+
+#ifdef USE_EXA
+ if (info->useEXA) {
+ RADEON_SWITCH_TO_3D();
+ } else
+#endif
+ {
+ BEGIN_ACCEL(2);
+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+ /* We must wait for 3d to idle, in case source was just written as a dest. */
+ OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
+ RADEON_WAIT_HOST_IDLECLEAN |
+ RADEON_WAIT_2D_IDLECLEAN |
+ RADEON_WAIT_3D_IDLECLEAN |
+ RADEON_WAIT_DMA_GUI_IDLE);
+ FINISH_ACCEL();
+
+ if (!info->accel_state->XInited3D)
+ RADEONInit3DEngine(pScrn);
+ }
+
+ if (pPriv->bicubic_enabled)
+ vtx_count = 6;
+ else
+ vtx_count = 4;
+
+ switch (pPixmap->drawable.bitsPerPixel) {
+ case 16:
+ if (pPixmap->drawable.depth == 15)
+ dst_format = R300_COLORFORMAT_ARGB1555;
+ else
+ dst_format = R300_COLORFORMAT_RGB565;
+ break;
+ case 32:
+ dst_format = R300_COLORFORMAT_ARGB8888;
+ break;
+ default:
+ return;
+ }
+
+ output_fmt = (R300_OUT_FMT_C4_8 |
+ R300_OUT_FMT_C0_SEL_BLUE |
+ R300_OUT_FMT_C1_SEL_GREEN |
+ R300_OUT_FMT_C2_SEL_RED |
+ R300_OUT_FMT_C3_SEL_ALPHA);
+
+ colorpitch = dst_pitch >> pixel_shift;
+ colorpitch |= dst_format;
+
+ if (RADEONTilingEnabled(pScrn, pPixmap))
+ colorpitch |= R300_COLORTILE;
+
+ if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+ isplanar = TRUE;
+ }
+
+ if (isplanar) {
+ txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0;
+ txpitch = pPriv->src_pitch;
+ } else {
+ if (pPriv->id == FOURCC_UYVY)
+ txformat1 = R300_TX_FORMAT_YVYU422;
+ else
+ txformat1 = R300_TX_FORMAT_VYUY422;
+
+ txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
+
+ /* pitch is in pixels */
+ txpitch = pPriv->src_pitch / 2;
+ }
+ txpitch -= 1;
+
+ txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
+ (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
+
+ info->accel_state->texW[0] = pPriv->w;
+ info->accel_state->texH[0] = pPriv->h;
+
+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_MAG_FILTER_LINEAR |
+ R300_TX_MIN_FILTER_LINEAR |
+ (0 << R300_TX_ID_SHIFT));
+
+
+ if ((pPriv->w - 1) & 0x800)
+ txpitch |= R500_TXWIDTH_11;
+
+ if ((pPriv->h - 1) & 0x800)
+ txpitch |= R500_TXHEIGHT_11;
+
+ txoffset = pPriv->src_offset;
+
+ BEGIN_ACCEL(6);
+ OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter);
+ OUT_ACCEL_REG(R300_TX_FILTER1_0, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_0, txoffset);
+ FINISH_ACCEL();
+
+ txenable = R300_TEX_0_ENABLE;
+
+ if (isplanar) {
+ txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
+ (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
+ txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
+ txpitch -= 1;
+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_MIN_FILTER_LINEAR |
+ R300_TX_MAG_FILTER_LINEAR);
+
+ BEGIN_ACCEL(12);
+ OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT));
+ OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset);
+ OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
+ OUT_ACCEL_REG(R300_TX_FILTER1_2, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset);
+ FINISH_ACCEL();
+ txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE;
+ }
+
+ if (pPriv->bicubic_enabled) {
+ /* Size is 128x1 */
+ txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) |
+ (0x0 << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
+ /* Format is 32-bit floats, 4bpp */
+ txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16);
+ /* Pitch is 127 (128-1) */
+ txpitch = 0x7f;
+ /* Tex filter */
+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) |
+ R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) |
+ R300_TX_MIN_FILTER_NEAREST |
+ R300_TX_MAG_FILTER_NEAREST |
+ (1 << R300_TX_ID_SHIFT));
+
+ BEGIN_ACCEL(6);
+ OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter);
+ OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
+ OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
+ OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1);
+ OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
+ OUT_ACCEL_REG(R300_TX_OFFSET_1, pPriv->bicubic_src_offset);
+ FINISH_ACCEL();
+
+ /* Enable tex 1 */
+ txenable |= R300_TEX_1_ENABLE;
+ }
+
+ /* setup the VAP */
+ if (info->accel_state->has_tcl) {
+ if (pPriv->bicubic_enabled)
+ BEGIN_ACCEL(7);
+ else
+ BEGIN_ACCEL(6);
+ } else {
+ if (pPriv->bicubic_enabled)
+ BEGIN_ACCEL(5);
+ else
+ BEGIN_ACCEL(4);
+ }
+
+ /* These registers define the number, type, and location of data submitted
+ * to the PVS unit of GA input (when PVS is disabled)
+ * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
+ * enabled. This memory provides the imputs to the vertex shader program
+ * and ordering is not important. When PVS/TCL is disabled, this field maps
+ * directly to the GA input memory and the order is signifigant. In
+ * PVS_BYPASS mode the order is as follows:
+ * Position
+ * Point Size
+ * Color 0-3
+ * Textures 0-7
+ * Fog
+ */
+ if (pPriv->bicubic_enabled) {
+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+ (0 << R300_SKIP_DWORDS_0_SHIFT) |
+ (0 << R300_DST_VEC_LOC_0_SHIFT) |
+ R300_SIGNED_0 |
+ (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+ (0 << R300_SKIP_DWORDS_1_SHIFT) |
+ (6 << R300_DST_VEC_LOC_1_SHIFT) |
+ R300_SIGNED_1));
+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
+ (0 << R300_SKIP_DWORDS_2_SHIFT) |
+ (7 << R300_DST_VEC_LOC_2_SHIFT) |
+ R300_LAST_VEC_2 |
+ R300_SIGNED_2));
+ } else {
+ OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+ (0 << R300_SKIP_DWORDS_0_SHIFT) |
+ (0 << R300_DST_VEC_LOC_0_SHIFT) |
+ R300_SIGNED_0 |
+ (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+ (0 << R300_SKIP_DWORDS_1_SHIFT) |
+ (6 << R300_DST_VEC_LOC_1_SHIFT) |
+ R300_LAST_VEC_1 |
+ R300_SIGNED_1));
+ }
+
+ /* load the vertex shader
+ * We pre-load vertex programs in RADEONInit3DEngine():
+ * - exa mask/Xv bicubic
+ * - exa no mask
+ * - Xv
+ * Here we select the offset of the vertex program we want to use
+ */
+ if (info->accel_state->has_tcl) {
+ if (pPriv->bicubic_enabled) {
+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
+ ((0 << R300_PVS_FIRST_INST_SHIFT) |
+ (2 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (2 << R300_PVS_LAST_INST_SHIFT)));
+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
+ (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+ } else {
+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
+ ((5 << R300_PVS_FIRST_INST_SHIFT) |
+ (6 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (6 << R300_PVS_LAST_INST_SHIFT)));
+ OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
+ (6 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+ }
+ }
+
+ /* Position and one set of 2 texture coordinates */
+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
+ if (pPriv->bicubic_enabled)
+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
+ (2 << R300_TEX_1_COMP_CNT_SHIFT)));
+ else
+ OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT));
+
+ OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt);
+ FINISH_ACCEL();
+
+ /* setup pixel shader */
+ if (pPriv->bicubic_enabled) {
+ BEGIN_ACCEL(7);
+
+ /* 4 components: 2 for tex0 and 2 for tex1 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
+
+ /* Pixel stack frame size. */
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 5);
+
+ /* FP length. */
+ OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
+ R500_US_CODE_END_ADDR(13)));
+ OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
+ R500_US_CODE_RANGE_SIZE(13)));
+
+ /* Prepare for FP emission. */
+ OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
+ FINISH_ACCEL();
+
+ BEGIN_ACCEL(89);
+ /* Pixel shader.
+ * I've gone ahead and annotated each instruction, since this
+ * thing is MASSIVE. :3
+ * Note: In order to avoid buggies with temps and multiple
+ * inputs, all temps are offset by 2. temp0 -> register2. */
+
+ /* TEX temp2, input1.xxxx, tex1, 1D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
+ R500_TEX_INST_LD |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_R |
+ R500_TEX_SRC_R_SWIZ_R |
+ R500_TEX_SRC_Q_SWIZ_R |
+ R500_TEX_DST_ADDR(2) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* TEX temp5, input1.yyyy, tex1, 1D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(1) |
+ R500_TEX_INST_LD |
+ R500_TEX_SEM_ACQUIRE |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(1) |
+ R500_TEX_SRC_S_SWIZ_G |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_SRC_R_SWIZ_G |
+ R500_TEX_SRC_Q_SWIZ_G |
+ R500_TEX_DST_ADDR(5) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* MUL temp4, const0.x0x0, temp2.yyxx */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_0 |
+ R500_ALU_RGB_B_SWIZ_A_R |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_G |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC0 |
+ R500_ALPHA_SWIZ_A_0 |
+ R500_ALPHA_SEL_B_SRC1 |
+ R500_ALPHA_SWIZ_B_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_R_SWIZ_0 |
+ R500_ALU_RGBA_G_SWIZ_0 |
+ R500_ALU_RGBA_B_SWIZ_0 |
+ R500_ALU_RGBA_A_SWIZ_0));
+
+ /* MAD temp3, const0.0y0y, temp5.xxxx, temp4 */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(5) |
+ R500_RGB_ADDR2(4)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(5) |
+ R500_ALPHA_ADDR2(4)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_0 |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_0 |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_R |
+ R500_ALU_RGB_B_SWIZ_B_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC0 |
+ R500_ALPHA_SWIZ_A_G |
+ R500_ALPHA_SEL_B_SRC1 |
+ R500_ALPHA_SWIZ_B_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_A_SWIZ_A));
+
+ /* ADD temp3, temp3, input0.xyxy */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(3) |
+ R500_RGB_ADDR2(0)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(3) |
+ R500_ALPHA_ADDR2(0)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 |
+ R500_ALU_RGB_G_SWIZ_A_1 |
+ R500_ALU_RGB_B_SWIZ_A_1 |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SWIZ_A_1 |
+ R500_ALPHA_SEL_B_SRC1 |
+ R500_ALPHA_SWIZ_B_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_R |
+ R500_ALU_RGBA_A_SWIZ_G));
+
+ /* TEX temp1, temp3.zwxy, tex0, 2D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) |
+ R500_TEX_SRC_S_SWIZ_B |
+ R500_TEX_SRC_T_SWIZ_A |
+ R500_TEX_SRC_R_SWIZ_R |
+ R500_TEX_SRC_Q_SWIZ_G |
+ R500_TEX_DST_ADDR(1) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* TEX temp3, temp3.xyzw, tex0, 2D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_SEM_ACQUIRE |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(3) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_SRC_R_SWIZ_B |
+ R500_TEX_SRC_Q_SWIZ_A |
+ R500_TEX_DST_ADDR(3) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* MAD temp4, const0.0y0y, temp5.yyyy, temp4 */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR0_CONST |
+ R500_RGB_ADDR1(5) |
+ R500_RGB_ADDR2(4)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR0_CONST |
+ R500_ALPHA_ADDR1(5) |
+ R500_ALPHA_ADDR2(4)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_0 |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_0 |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_G |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_G));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(4) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC0 |
+ R500_ALPHA_SWIZ_A_G |
+ R500_ALPHA_SEL_B_SRC1 |
+ R500_ALPHA_SWIZ_B_G));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(4) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_A_SWIZ_A));
+
+ /* ADD temp0, temp4, input0.xyxy */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR1(4) |
+ R500_RGB_ADDR2(0)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR1(4) |
+ R500_ALPHA_ADDR2(0)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_R_SWIZ_A_1 |
+ R500_ALU_RGB_G_SWIZ_A_1 |
+ R500_ALU_RGB_B_SWIZ_A_1 |
+ R500_ALU_RGB_SEL_B_SRC1 |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SWIZ_A_1 |
+ R500_ALPHA_SEL_B_SRC1 |
+ R500_ALPHA_SWIZ_B_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC2 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_R |
+ R500_ALU_RGBA_A_SWIZ_G));
+
+ /* TEX temp4, temp0.zwzw, tex0, 2D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_B |
+ R500_TEX_SRC_T_SWIZ_A |
+ R500_TEX_SRC_R_SWIZ_B |
+ R500_TEX_SRC_Q_SWIZ_A |
+ R500_TEX_DST_ADDR(4) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* TEX temp0, temp0.xyzw, tex0, 2D */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_SEM_ACQUIRE |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_SRC_R_SWIZ_B |
+ R500_TEX_SRC_Q_SWIZ_A |
+ R500_TEX_DST_ADDR(0) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* LRP temp3, temp2.zzzz, temp1, temp3 ->
+ * - PRESUB temps, temp1 - temp3
+ * - MAD temp2.zzzz, temps, temp3 */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(3) |
+ R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
+ R500_RGB_ADDR1(1) |
+ R500_RGB_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(3) |
+ R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
+ R500_ALPHA_ADDR1(1) |
+ R500_ALPHA_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
+ R500_ALU_RGB_R_SWIZ_A_B |
+ R500_ALU_RGB_G_SWIZ_A_B |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRCP |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(3) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC2 |
+ R500_ALPHA_SWIZ_A_B |
+ R500_ALPHA_SEL_B_SRCP |
+ R500_ALPHA_SWIZ_B_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(3) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC0 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_A_SWIZ_A));
+
+ /* LRP temp0, temp2.zzzz, temp4, temp0 ->
+ * - PRESUB temps, temp4 - temp1
+ * - MAD temp2.zzzz, temps, temp0 */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_ALU |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
+ R500_RGB_ADDR1(4) |
+ R500_RGB_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
+ R500_ALPHA_ADDR1(4) |
+ R500_ALPHA_ADDR2(2)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
+ R500_ALU_RGB_R_SWIZ_A_B |
+ R500_ALU_RGB_G_SWIZ_A_B |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRCP |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC2 |
+ R500_ALPHA_SWIZ_A_B |
+ R500_ALPHA_SEL_B_SRCP |
+ R500_ALPHA_SWIZ_B_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC0 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_A_SWIZ_A));
+
+ /* LRP output, temp5.zzzz, temp3, temp0 ->
+ * - PRESUB temps, temp3 - temp0
+ * - MAD temp5.zzzz, temps, temp0 */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
+ R500_INST_LAST |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK |
+ R500_INST_RGB_OMASK_R |
+ R500_INST_RGB_OMASK_G |
+ R500_INST_RGB_OMASK_B |
+ R500_INST_ALPHA_OMASK));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 |
+ R500_RGB_ADDR1(3) |
+ R500_RGB_ADDR2(5)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_SRCP_OP_A1_MINUS_A0 |
+ R500_ALPHA_ADDR1(3) |
+ R500_ALPHA_ADDR2(5)));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC2 |
+ R500_ALU_RGB_R_SWIZ_A_B |
+ R500_ALU_RGB_G_SWIZ_A_B |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRCP |
+ R500_ALU_RGB_R_SWIZ_B_R |
+ R500_ALU_RGB_G_SWIZ_B_G |
+ R500_ALU_RGB_B_SWIZ_B_B));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDRD(0) |
+ R500_ALPHA_OP_MAD |
+ R500_ALPHA_SEL_A_SRC2 |
+ R500_ALPHA_SWIZ_A_B |
+ R500_ALPHA_SEL_B_SRCP |
+ R500_ALPHA_SWIZ_B_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_ADDRD(0) |
+ R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_SEL_C_SRC0 |
+ R500_ALU_RGBA_R_SWIZ_R |
+ R500_ALU_RGBA_G_SWIZ_G |
+ R500_ALU_RGBA_B_SWIZ_B |
+ R500_ALU_RGBA_A_SWIZ_A));
+
+ /* Shader constants. */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_CONST_INDEX(0));
+
+ /* const0 = {1 / texture[0].width, 1 / texture[0].height, 0, 0} */
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->w));
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, (1.0/(float)pPriv->h));
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0);
+ OUT_ACCEL_REG_F(R500_GA_US_VECTOR_DATA, 0x0);
+
+ FINISH_ACCEL();
+
+ } else {
+ BEGIN_ACCEL(19);
+ /* 2 components: 2 for tex0 */
+ OUT_ACCEL_REG(R300_RS_COUNT,
+ ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+ /* Pixel stack frame size. */
+ OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */
+
+ /* FP length. */
+ OUT_ACCEL_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
+ R500_US_CODE_END_ADDR(1)));
+ OUT_ACCEL_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
+ R500_US_CODE_RANGE_SIZE(1)));
+
+ /* Prepare for FP emission. */
+ OUT_ACCEL_REG(R500_US_CODE_OFFSET, 0);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, R500_US_VECTOR_INST_INDEX(0));
+
+ /* tex inst */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_RGB_WMASK_R |
+ R500_INST_RGB_WMASK_G |
+ R500_INST_RGB_WMASK_B |
+ R500_INST_ALPHA_WMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
+ R500_TEX_INST_LD |
+ R500_TEX_SEM_ACQUIRE |
+ R500_TEX_IGNORE_UNCOVERED));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
+ R500_TEX_SRC_S_SWIZ_R |
+ R500_TEX_SRC_T_SWIZ_G |
+ R500_TEX_DST_ADDR(0) |
+ R500_TEX_DST_R_SWIZ_R |
+ R500_TEX_DST_G_SWIZ_G |
+ R500_TEX_DST_B_SWIZ_B |
+ R500_TEX_DST_A_SWIZ_A));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
+ R500_DX_S_SWIZ_R |
+ R500_DX_T_SWIZ_R |
+ R500_DX_R_SWIZ_R |
+ R500_DX_Q_SWIZ_R |
+ R500_DY_ADDR(0) |
+ R500_DY_S_SWIZ_R |
+ R500_DY_T_SWIZ_R |
+ R500_DY_R_SWIZ_R |
+ R500_DY_Q_SWIZ_R));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+
+ /* ALU inst */
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
+ R500_INST_TEX_SEM_WAIT |
+ R500_INST_LAST |
+ R500_INST_RGB_OMASK_R |
+ R500_INST_RGB_OMASK_G |
+ R500_INST_RGB_OMASK_B |
+ R500_INST_ALPHA_OMASK |
+ R500_INST_RGB_CLAMP |
+ R500_INST_ALPHA_CLAMP));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
+ R500_RGB_ADDR1(0) |
+ R500_RGB_ADDR1_CONST |
+ R500_RGB_ADDR2(0) |
+ R500_RGB_ADDR2_CONST));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
+ R500_ALPHA_ADDR1(0) |
+ R500_ALPHA_ADDR1_CONST |
+ R500_ALPHA_ADDR2(0) |
+ R500_ALPHA_ADDR2_CONST));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
+ R500_ALU_RGB_R_SWIZ_A_R |
+ R500_ALU_RGB_G_SWIZ_A_G |
+ R500_ALU_RGB_B_SWIZ_A_B |
+ R500_ALU_RGB_SEL_B_SRC0 |
+ R500_ALU_RGB_R_SWIZ_B_1 |
+ R500_ALU_RGB_B_SWIZ_B_1 |
+ R500_ALU_RGB_G_SWIZ_B_1));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
+ R500_ALPHA_SWIZ_A_A |
+ R500_ALPHA_SWIZ_B_1));
+ OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
+ R500_ALU_RGBA_R_SWIZ_0 |
+ R500_ALU_RGBA_G_SWIZ_0 |
+ R500_ALU_RGBA_B_SWIZ_0 |
+ R500_ALU_RGBA_A_SWIZ_0));
+ FINISH_ACCEL();
+ }
+
+ BEGIN_ACCEL(6);
+ OUT_ACCEL_REG(R300_TX_INVALTAGS, 0);
+ OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
+
+ OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset);
+ OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch);
+
+ /* no need to enable blending */
+ OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
+
+ OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count);
+ FINISH_ACCEL();
+
+ if (pPriv->vsync) {
+ xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn,
+ pPriv->drw_x,
+ pPriv->drw_x + pPriv->dst_w,
+ pPriv->drw_y,
+ pPriv->drw_y + pPriv->dst_h);
+ if (crtc) {
+ RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private;
+
+ FUNC_NAME(RADEONWaitForVLine)(pScrn, pPixmap,
+ radeon_crtc->crtc_id,
+ pPriv->drw_y - crtc->y,
+ (pPriv->drw_y - crtc->y) + pPriv->dst_h);
+ }
+ }
+ /*
+ * Rendering of the actual polygon is done in two different
+ * ways depending on chip generation:
+ *
+ * < R300:
+ *
+ * These chips can render a rectangle in one pass, so
+ * handling is pretty straight-forward.
+ *
+ * >= R300:
+ *
+ * These chips can accept a quad, but will render it as
+ * two triangles which results in a diagonal tear. Instead
+ * We render a single, large triangle and use the scissor
+ * functionality to restrict it to the desired rectangle.
+ * Due to guardband limits on r3xx/r4xx, we can only use
+ * the single triangle up to 2880 pixels; above that we
+ * render as a quad.
+ */
+
+ while (nBox--) {
+ int srcX, srcY, srcw, srch;
+ int dstX, dstY, dstw, dsth;
+ dstX = pBox->x1 + dstxoff;
+ dstY = pBox->y1 + dstyoff;
+ dstw = pBox->x2 - pBox->x1;
+ dsth = pBox->y2 - pBox->y1;
+
+ srcX = ((pBox->x1 - pPriv->drw_x) *
+ pPriv->src_w) / pPriv->dst_w;
+ srcY = ((pBox->y1 - pPriv->drw_y) *
+ pPriv->src_h) / pPriv->dst_h;
+
+ srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
+ srch = (pPriv->src_h * dsth) / pPriv->dst_h;
+
+ BEGIN_ACCEL(2);
+ OUT_ACCEL_REG(R300_SC_SCISSOR0, (((dstX) << R300_SCISSOR_X_SHIFT) |
+ ((dstY) << R300_SCISSOR_Y_SHIFT)));
+ OUT_ACCEL_REG(R300_SC_SCISSOR1, (((dstX + dstw - 1) << R300_SCISSOR_X_SHIFT) |
+ ((dstY + dsth - 1) << R300_SCISSOR_Y_SHIFT)));
+ FINISH_ACCEL();
+
+#ifdef ACCEL_CP
+ BEGIN_RING(3 * vtx_count + 4);
+ OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
+ 3 * vtx_count));
+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST |
+ RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+ (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
+#else /* ACCEL_CP */
+ BEGIN_ACCEL(2 + vtx_count * 3);
+ OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_LIST |
+ RADEON_VF_PRIM_WALK_DATA |
+ (3 << RADEON_VF_NUM_VERTICES_SHIFT)));
+#endif
+ if (pPriv->bicubic_enabled) {
+ VTX_OUT_FILTER((float)dstX, (float)dstY,
+ (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0],
+ (float)srcX + 0.5, (float)srcY + 0.5);
+ VTX_OUT_FILTER((float)dstX, (float)(dstY + dstw + dsth),
+ (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0],
+ (float)srcX + 0.5, (float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0) + 0.5);
+ VTX_OUT_FILTER((float)(dstX + dstw + dsth), (float)dstY,
+ ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
+ (float)srcY / info->accel_state->texH[0],
+ (float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0) + 0.5,
+ (float)srcY + 0.5);
+ } else {
+ /*
+ * Render a big, scissored triangle. This means
+ * increasing the triangle size and adjusting
+ * texture coordinates.
+ */
+ VTX_OUT((float)dstX, (float)dstY,
+ (float)srcX / info->accel_state->texW[0], (float)srcY / info->accel_state->texH[0]);
+ VTX_OUT((float)dstX, (float)(dstY + dsth + dstw),
+ (float)srcX / info->accel_state->texW[0], ((float)srcY + (float)srch * (((float)dstw / (float)dsth) + 1.0)) / info->accel_state->texH[0]);
+ VTX_OUT((float)(dstX + dstw + dsth), (float)dstY,
+ ((float)srcX + (float)srcw * (((float)dsth / (float)dstw) + 1.0)) / info->accel_state->texW[0],
+ (float)srcY / info->accel_state->texH[0]);
+ }
+
+ /* flushing is pipelined, free/finish is not */
+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+
+#ifdef ACCEL_CP
+ ADVANCE_RING();
+#else
+ FINISH_ACCEL();
+#endif /* !ACCEL_CP */
+
+ pBox++;
+ }
+
+ BEGIN_ACCEL(3);
+ OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
+ OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
FINISH_ACCEL();