summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichel Daenzer <michel@daenzer.net>2004-12-06 06:27:53 +0000
committerMichel Daenzer <michel@daenzer.net>2004-12-06 06:27:53 +0000
commit2b8ab42b05b32710fa4f71a25e2f84192e637fbd (patch)
tree9ca947b192ecd009d6e7a08ccde60d95b931bce8
parent960d15ae3a44efe7c02a3d6d0acbabe63e07be74 (diff)
When direct rendering is enabled, use hostdata blits to transfer data from
system memory to video RAM, which should reduce CPU usage especially with larger videos. Can be disabled via Option "DMAForXv" if there should be any stability issues, but it's been stable for me during a week of testing. Based on a patch by Nikolaus Meine <meine@tnt.uni-hannover.de>. Probably fix endianness issues in some newer XVideo code, untested. Also use hostdata blits to transfer RENDER image data to video RAM to avoid idling the accelerator engine. Increases RENDER performance significantly for me. These changes were only tested on an M9 in a Titanium PowerBook but should work with all Radeons where direct rendering is supported.
-rw-r--r--man/radeon.man6
-rw-r--r--src/radeon.h11
-rw-r--r--src/radeon_accel.c125
-rw-r--r--src/radeon_driver.c10
-rw-r--r--src/radeon_render.c66
-rw-r--r--src/radeon_video.c285
6 files changed, 431 insertions, 72 deletions
diff --git a/man/radeon.man b/man/radeon.man
index cdd3e1f..4d66fe3 100644
--- a/man/radeon.man
+++ b/man/radeon.man
@@ -461,6 +461,12 @@ unsupported). The default is to
.B enable
Render acceleration.
.TP
+.BI "Option \*qDMAForXv\*q \*q" boolean \*q
+Try or don't try to use DMA for Xv image transfers. This will reduce CPU
+usage when playing big videos like DVDs, but may cause instabilities.
+Default:
+.B on.
+.TP
.BI "Option \*qSubPixelOrder\*q \*q" "string" \*q
Force subpixel order to specified order.
Subpixel order is used for subpixel decimation on flat panels.
diff --git a/src/radeon.h b/src/radeon.h
index f22e09e..098627d 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -540,6 +540,8 @@ typedef struct {
int irq;
+ Bool DMAForXv;
+
#ifdef PER_CONTEXT_SAREA
int perctx_sarea_size;
#endif
@@ -671,6 +673,15 @@ extern void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard);
extern void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn);
extern int RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info);
+extern CARD8* RADEONHostDataBlit(ScrnInfoPtr pScrn, unsigned int bpp,
+ unsigned int w, CARD32 dstPitch,
+ CARD32 *bufPitch, CARD8 **dst,
+ unsigned int *h, unsigned int *hpass);
+extern void RADEONHostDataBlitCopyPass(CARD8 *dst, CARD8 *src,
+ unsigned int hpass,
+ unsigned int dstPitch,
+ unsigned int srcPitch);
+
extern Bool RADEONGetBIOSInfo(ScrnInfoPtr pScrn, xf86Int10InfoPtr pInt10);
extern Bool RADEONGetConnectorInfoFromBIOS (ScrnInfoPtr pScrn);
extern Bool RADEONGetClockInfoFromBIOS (ScrnInfoPtr pScrn);
diff --git a/src/radeon_accel.c b/src/radeon_accel.c
index 62ce879..ca7124b 100644
--- a/src/radeon_accel.c
+++ b/src/radeon_accel.c
@@ -578,6 +578,131 @@ void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn)
drmCommandWriteRead(info->drmFD, DRM_RADEON_INDIRECT,
&indirect, sizeof(drmRadeonIndirect));
}
+
+
+/* Set up a hostdata blit to transfer data from system memory to the
+ * framebuffer. Returns the address where the data can be written to and sets
+ * the dstPitch and hpass variables as required.
+ */
+CARD8*
+RADEONHostDataBlit(
+ ScrnInfoPtr pScrn,
+ unsigned int bpp,
+ unsigned int w,
+ CARD32 dstPitch,
+ CARD32 *bufPitch,
+ CARD8* *dst,
+ unsigned int *h,
+ unsigned int *hpass
+){
+ RADEONInfoPtr info = RADEONPTR( pScrn );
+ CARD32 format, dst_offs, dwords, x, y;
+ CARD8 *ret;
+ RING_LOCALS;
+
+ if ( *h == 0 )
+ {
+ return NULL;
+ }
+
+ switch ( bpp )
+ {
+ case 4:
+ format = RADEON_GMC_DST_32BPP;
+ *bufPitch = 4 * w;
+ break;
+ case 2:
+ format = RADEON_GMC_DST_16BPP;
+ w = (w + 1) & ~1;
+ *bufPitch = 2 * w;
+ break;
+ case 1:
+ format = RADEON_GMC_DST_8BPP_CI;
+ w = (w + 3) & ~3;
+ *bufPitch = w;
+ break;
+ default:
+ xf86DrvMsg( pScrn->scrnIndex, X_ERROR,
+ "%s: Unsupported bpp %d!\n", __func__, bpp );
+ return NULL;
+ }
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ BEGIN_RING(2);
+ if (bpp == 2)
+ OUT_RING_REG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_16BIT);
+ else if (bpp == 1)
+ OUT_RING_REG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT);
+ else
+ OUT_RING_REG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_NONE);
+ ADVANCE_RING();
+#endif
+
+ /*RADEON_PURGE_CACHE();
+ RADEON_WAIT_UNTIL_IDLE();*/
+
+ dst_offs = *dst - info->FB + info->fbLocation;
+ *hpass = min( *h, ( ( RADEON_BUFFER_SIZE - 8 * 4 ) / *bufPitch ) );
+ dwords = *hpass * *bufPitch / 4;
+
+ y = ( dst_offs & 1023 ) / dstPitch;
+ x = ( ( dst_offs & 1023 ) - ( y * dstPitch ) ) / bpp;
+
+ BEGIN_RING( dwords + 8 );
+ OUT_RING( CP_PACKET3( RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT, dwords + 8 - 2 ) );
+ OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL
+ | RADEON_GMC_BRUSH_NONE
+ | format
+ | RADEON_GMC_SRC_DATATYPE_COLOR
+ | RADEON_ROP3_S
+ | RADEON_DP_SRC_SOURCE_HOST_DATA
+ | RADEON_GMC_CLR_CMP_CNTL_DIS
+ | RADEON_GMC_WR_MSK_DIS );
+ OUT_RING( dstPitch << 16 | dst_offs >> 10 );
+ OUT_RING( 0xffffffff );
+ OUT_RING( 0xffffffff );
+ OUT_RING( y << 16 | x );
+ OUT_RING( *hpass << 16 | w );
+ OUT_RING( dwords );
+
+ ret = ( CARD8* )&__head[__count];
+
+ __count += dwords;
+ ADVANCE_RING();
+
+ *dst += *hpass * dstPitch;
+ *h -= *hpass;
+
+ return ret;
+}
+
+/* Copies a single pass worth of data for a hostdata blit set up by
+ * RADEONHostDataBlit().
+ */
+void
+RADEONHostDataBlitCopyPass(
+ CARD8 *dst,
+ CARD8 *src,
+ unsigned int hpass,
+ unsigned int dstPitch,
+ unsigned int srcPitch
+){
+ if ( dstPitch == srcPitch )
+ {
+ memcpy( dst, src, hpass * dstPitch );
+ }
+ else
+ {
+ unsigned int minPitch = min( dstPitch, srcPitch );
+ while ( hpass-- )
+ {
+ memcpy( dst, src, minPitch );
+ src += srcPitch;
+ dst += dstPitch;
+ }
+ }
+}
+
#endif
/* Initialize XAA for supported acceleration and also initialize the
diff --git a/src/radeon_driver.c b/src/radeon_driver.c
index 68323ca..f937464 100644
--- a/src/radeon_driver.c
+++ b/src/radeon_driver.c
@@ -141,6 +141,7 @@ typedef enum {
OPTION_DEPTH_MOVE,
OPTION_PAGE_FLIP,
OPTION_NO_BACKBUFFER,
+ OPTION_XV_DMA,
#endif
OPTION_PANEL_OFF,
OPTION_DDC_MODE,
@@ -192,6 +193,7 @@ static const OptionInfoRec RADEONOptions[] = {
{ OPTION_DEPTH_MOVE, "EnableDepthMoves", OPTV_BOOLEAN, {0}, FALSE },
{ OPTION_PAGE_FLIP, "EnablePageFlip", OPTV_BOOLEAN, {0}, FALSE },
{ OPTION_NO_BACKBUFFER, "NoBackBuffer", OPTV_BOOLEAN, {0}, FALSE },
+ { OPTION_XV_DMA, "DMAForXv", OPTV_BOOLEAN, {0}, FALSE },
#endif
{ OPTION_PANEL_OFF, "PanelOff", OPTV_BOOLEAN, {0}, FALSE },
{ OPTION_DDC_MODE, "DDCMode", OPTV_BOOLEAN, {0}, FALSE },
@@ -3896,6 +3898,7 @@ static Bool RADEONPreInitInt10(ScrnInfoPtr pScrn, xf86Int10InfoPtr *ppInt10)
static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
+ MessageType from;
if (xf86ReturnOptValBool(info->Options, OPTION_CP_PIO, FALSE)) {
xf86DrvMsg(pScrn->scrnIndex, X_CONFIG, "Forcing CP into PIO mode\n");
@@ -4032,6 +4035,13 @@ static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn)
xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Page flipping %sabled\n",
info->allowPageFlip ? "en" : "dis");
+ info->DMAForXv = TRUE;
+ from = xf86GetOptValBool(info->Options, OPTION_XV_DMA, &info->DMAForXv)
+ ? X_CONFIG : X_INFO;
+ xf86DrvMsg(pScrn->scrnIndex, from,
+ "Will %stry to use DMA for Xv image transfers\n",
+ info->DMAForXv ? "" : "not ");
+
return TRUE;
}
#endif
diff --git a/src/radeon_render.c b/src/radeon_render.c
index 7c85ca7..29ef966 100644
--- a/src/radeon_render.c
+++ b/src/radeon_render.c
@@ -422,6 +422,10 @@ static Bool FUNC_NAME(R100SetupTexture)(
CARD8 *dst;
CARD32 tex_size = 0, txformat;
int dst_pitch, offset, size, i, tex_bytepp;
+#ifdef ACCEL_CP
+ CARD32 buf_pitch;
+ unsigned int hpass;
+#endif
ACCEL_PREAMBLE();
if ((width > 2048) || (height > 2048))
@@ -430,6 +434,8 @@ static Bool FUNC_NAME(R100SetupTexture)(
txformat = RadeonGetTextureFormat(format);
tex_bytepp = PICT_FORMAT_BPP(format) >> 3;
+#ifndef ACCEL_CP
+
#if X_BYTE_ORDER == X_BIG_ENDIAN
if (!RADEONSetupRenderByteswap(pScrn, tex_bytepp)) {
xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: RADEONSetupRenderByteswap() "
@@ -438,7 +444,9 @@ static Bool FUNC_NAME(R100SetupTexture)(
}
#endif
- dst_pitch = (width * tex_bytepp + 31) & ~31;
+#endif
+
+ dst_pitch = (width * tex_bytepp + 63) & ~63;
size = dst_pitch * height;
if (!AllocateLinear(pScrn, size))
@@ -453,10 +461,27 @@ static Bool FUNC_NAME(R100SetupTexture)(
}
offset = info->RenderTex->offset * pScrn->bitsPerPixel / 8;
+ dst = (CARD8*)(info->FB + offset);
+
+ /* Upload texture to card. */
+
+#ifdef ACCEL_CP
+
+ while ( height )
+ {
+ RADEONHostDataBlitCopyPass( RADEONHostDataBlit( pScrn, tex_bytepp, width,
+ dst_pitch, &buf_pitch,
+ &dst, &height, &hpass ),
+ src, hpass, buf_pitch, src_pitch );
+ src += hpass * src_pitch;
+ }
+
+ RADEON_PURGE_CACHE();
+ RADEON_WAIT_UNTIL_IDLE();
+
+#else
- /* Upload texture to card. Should use ImageWrite to avoid syncing. */
i = height;
- dst = (CARD8*)(info->FB + offset);
if (info->accel->NeedToSync)
info->accel->Sync(pScrn);
@@ -471,6 +496,8 @@ static Bool FUNC_NAME(R100SetupTexture)(
RADEONRestoreByteswap(info);
#endif
+#endif /* ACCEL_CP */
+
BEGIN_ACCEL(5);
OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, tex_size);
@@ -713,6 +740,10 @@ static Bool FUNC_NAME(R200SetupTexture)(
CARD8 *dst;
CARD32 tex_size = 0, txformat;
int dst_pitch, offset, size, i, tex_bytepp;
+#ifdef ACCEL_CP
+ CARD32 buf_pitch;
+ unsigned int hpass;
+#endif
ACCEL_PREAMBLE();
if ((width > 2048) || (height > 2048))
@@ -721,6 +752,8 @@ static Bool FUNC_NAME(R200SetupTexture)(
txformat = RadeonGetTextureFormat(format);
tex_bytepp = PICT_FORMAT_BPP(format) >> 3;
+#ifndef ACCEL_CP
+
#if X_BYTE_ORDER == X_BIG_ENDIAN
if (!RADEONSetupRenderByteswap(pScrn, tex_bytepp)) {
xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: RADEONSetupRenderByteswap() "
@@ -729,7 +762,9 @@ static Bool FUNC_NAME(R200SetupTexture)(
}
#endif
- dst_pitch = (width * tex_bytepp + 31) & ~31;
+#endif
+
+ dst_pitch = (width * tex_bytepp + 63) & ~63;
size = dst_pitch * height;
if (!AllocateLinear(pScrn, size))
@@ -744,10 +779,27 @@ static Bool FUNC_NAME(R200SetupTexture)(
}
offset = info->RenderTex->offset * pScrn->bitsPerPixel / 8;
+ dst = (CARD8*)(info->FB + offset);
+
+ /* Upload texture to card. */
+
+#ifdef ACCEL_CP
+
+ while ( height )
+ {
+ RADEONHostDataBlitCopyPass( RADEONHostDataBlit( pScrn, tex_bytepp, width,
+ dst_pitch, &buf_pitch,
+ &dst, &height, &hpass ),
+ src, hpass, buf_pitch, src_pitch );
+ src += hpass * src_pitch;
+ }
+
+ RADEON_PURGE_CACHE();
+ RADEON_WAIT_UNTIL_IDLE();
+
+#else
- /* Upload texture to card. Should use ImageWrite to avoid syncing. */
i = height;
- dst = (CARD8*)(info->FB + offset);
if (info->accel->NeedToSync)
info->accel->Sync(pScrn);
@@ -761,6 +813,8 @@ static Bool FUNC_NAME(R200SetupTexture)(
RADEONRestoreByteswap(info);
#endif
+#endif /* ACCEL_CP */
+
BEGIN_ACCEL(6);
OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
diff --git a/src/radeon_video.c b/src/radeon_video.c
index bdabc31..f21e3af 100644
--- a/src/radeon_video.c
+++ b/src/radeon_video.c
@@ -1941,23 +1941,96 @@ static struct {
static void
RADEONCopyData(
+ ScrnInfoPtr pScrn,
unsigned char *src,
unsigned char *dst,
int srcPitch,
int dstPitch,
int h,
- int w
+ int w,
+ int bpp
){
- w <<= 1;
- while(h--) {
- memcpy(dst, src, w);
- src += srcPitch;
- dst += dstPitch;
+#ifdef XF86DRI
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ if ( info->directRenderingEnabled && info->DMAForXv )
+ {
+ CARD8 *buf;
+ CARD32 bufPitch;
+ unsigned int hpass;
+
+ /* Get the byte-swapping right for big endian systems */
+ if ( bpp == 2 )
+ {
+ w *= 2;
+ bpp = 1;
+ }
+
+ while ( buf = RADEONHostDataBlit( pScrn, bpp, w, dstPitch,
+ &bufPitch, &dst, &h, &hpass ) )
+ {
+ RADEONHostDataBlitCopyPass( buf, src, hpass, bufPitch, srcPitch );
+ src += hpass * srcPitch;
+ }
+
+ FLUSH_RING();
+
+ return;
+ }
+ else
+#endif /* XF86DRI */
+ {
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ unsigned char *RADEONMMIO = info->MMIO;
+ if ( bpp == 2 )
+ {
+ OUTREG(RADEON_SURFACE_CNTL, info->ModeReg.surface_cntl
+ & ~(RADEON_NONSURF_AP0_SWP_32BPP
+ | RADEON_NONSURF_AP0_SWP_16BPP));
+ }
+ else /* bpp == 4 */
+ {
+ OUTREG(RADEON_SURFACE_CNTL, (info->ModeReg.surface_cntl
+ | RADEON_NONSURF_AP0_SWP_32BPP)
+ & ~RADEON_NONSURF_AP0_SWP_16BPP);
+ }
+#endif
+
+ w *= 2;
+
+ while (h--)
+ {
+ memcpy(dst, src, w);
+ src += srcPitch;
+ dst += dstPitch;
+ }
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ /* restore byte swapping */
+ OUTREG(RADEON_SURFACE_CNTL, info->ModeReg.surface_cntl);
+#endif
}
}
+static void RADEON_420_422(
+ unsigned int *d,
+ unsigned char *s1,
+ unsigned char *s2,
+ unsigned char *s3,
+ unsigned int n
+)
+{
+ while ( n ) {
+ *(d++) = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
+ s1+=2; s2++; s3++;
+ n--;
+ }
+}
+
+
static void
RADEONCopyRGB24Data(
+ ScrnInfoPtr pScrn,
unsigned char *src,
unsigned char *dst,
int srcPitch,
@@ -1968,22 +2041,68 @@ RADEONCopyRGB24Data(
CARD32 *dptr;
CARD8 *sptr;
int i,j;
-
+#ifdef XF86DRI
+ RADEONInfoPtr info = RADEONPTR(pScrn);
- for(j=0;j<h;j++){
- dptr=(CARD32 *)(dst+j*dstPitch);
- sptr=src+j*srcPitch;
-
- for(i=w;i>0;i--){
- dptr[0]=((sptr[0])<<24)|((sptr[1])<<16)|(sptr[2]);
- dptr++;
- sptr+=3;
+ if ( info->directRenderingEnabled && info->DMAForXv )
+ {
+ CARD32 bufPitch;
+ unsigned int hpass;
+
+ while ( dptr = ( CARD32* )RADEONHostDataBlit( pScrn, 4, w, dstPitch,
+ &bufPitch, &dst, &h,
+ &hpass ) )
+ {
+ for( j = 0; j < hpass; j++ )
+ {
+ sptr = src;
+
+ for ( i = 0 ; i < w; i++ )
+ {
+ *dptr++ = ( ( *sptr++ ) << 24 ) | ( ( *sptr++ ) << 16 ) |
+ ( *sptr++ );
+ }
+
+ src += hpass * srcPitch;
+ dptr += hpass * bufPitch;
+ }
+ }
+
+ FLUSH_RING();
+
+ return;
+ }
+ else
+#endif /* XF86DRI */
+ {
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ unsigned char *RADEONMMIO = info->MMIO;
+ OUTREG(RADEON_SURFACE_CNTL, (info->ModeReg.surface_cntl
+ | RADEON_NONSURF_AP0_SWP_32BPP)
+ & ~RADEON_NONSURF_AP0_SWP_16BPP);
+#endif
+
+ for(j=0;j<h;j++){
+ dptr=(CARD32 *)(dst+j*dstPitch);
+ sptr=src+j*srcPitch;
+
+ for(i=w;i>0;i--){
+ dptr[0]=((sptr[0])<<24)|((sptr[1])<<16)|(sptr[2]);
+ dptr++;
+ sptr+=3;
+ }
}
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ /* restore byte swapping */
+ OUTREG(RADEON_SURFACE_CNTL, info->ModeReg.surface_cntl);
+#endif
}
}
static void
RADEONCopyMungedData(
+ ScrnInfoPtr pScrn,
unsigned char *src1,
unsigned char *src2,
unsigned char *src3,
@@ -1994,37 +2113,86 @@ RADEONCopyMungedData(
int h,
int w
){
- CARD32 *dst;
- CARD8 *s1, *s2, *s3;
- int i, j;
-
- w >>= 1;
-
- for(j = 0; j < h; j++) {
- dst = (pointer)dst1;
- s1 = src1; s2 = src2; s3 = src3;
- i = w;
- while(i > 4) {
- dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
- dst[1] = s1[2] | (s1[3] << 16) | (s3[1] << 8) | (s2[1] << 24);
- dst[2] = s1[4] | (s1[5] << 16) | (s3[2] << 8) | (s2[2] << 24);
- dst[3] = s1[6] | (s1[7] << 16) | (s3[3] << 8) | (s2[3] << 24);
- dst += 4; s2 += 4; s3 += 4; s1 += 8;
- i -= 4;
- }
- while(i--) {
- dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
- dst++; s2++; s3++;
- s1 += 2;
+#ifdef XF86DRI
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+
+ if ( info->directRenderingEnabled && info->DMAForXv )
+ {
+ CARD8 *buf;
+ CARD32 y = 0, bufPitch;
+ unsigned int hpass;
+
+ while ( buf = RADEONHostDataBlit( pScrn, 4, w/2, dstPitch,
+ &bufPitch, &dst1, &h, &hpass ) )
+ {
+ while ( hpass-- )
+ {
+ RADEON_420_422( (unsigned int *) buf, src1, src2, src3,
+ bufPitch / 4 );
+ src1 += srcPitch;
+ if ( y & 1 )
+ {
+ src2 += srcPitch2;
+ src3 += srcPitch2;
+ }
+ buf += bufPitch;
+ y++;
+ }
}
- dst1 += dstPitch;
- src1 += srcPitch;
- if(j & 1) {
- src2 += srcPitch2;
- src3 += srcPitch2;
+ FLUSH_RING();
+ }
+ else
+#endif /* XF86DRI */
+ {
+ CARD32 *dst;
+ CARD8 *s1, *s2, *s3;
+ int i, j;
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ unsigned char *RADEONMMIO = info->MMIO;
+ OUTREG(RADEON_SURFACE_CNTL, (info->ModeReg.surface_cntl
+ | RADEON_NONSURF_AP0_SWP_32BPP)
+ & ~RADEON_NONSURF_AP0_SWP_16BPP);
+#endif
+
+ w /= 2;
+
+ for( j = 0; j < h; j++ )
+ {
+ dst = (pointer)dst1;
+ s1 = src1; s2 = src2; s3 = src3;
+ i = w;
+ while( i > 4 )
+ {
+ dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
+ dst[1] = s1[2] | (s1[3] << 16) | (s3[1] << 8) | (s2[1] << 24);
+ dst[2] = s1[4] | (s1[5] << 16) | (s3[2] << 8) | (s2[2] << 24);
+ dst[3] = s1[6] | (s1[7] << 16) | (s3[3] << 8) | (s2[3] << 24);
+ dst += 4; s2 += 4; s3 += 4; s1 += 8;
+ i -= 4;
+ }
+ while( i-- )
+ {
+ dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
+ dst++; s2++; s3++;
+ s1 += 2;
+ }
+
+ dst1 += dstPitch;
+ src1 += srcPitch;
+ if( j & 1 )
+ {
+ src2 += srcPitch2;
+ src3 += srcPitch2;
+ }
}
- }
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ /* restore byte swapping */
+ OUTREG(RADEON_SURFACE_CNTL, info->ModeReg.surface_cntl);
+#endif
+ }
}
@@ -2472,7 +2640,7 @@ RADEONPutImage(
break;
case FOURCC_YV12:
case FOURCC_I420:
- dstPitch = ((width << 1) + 15) & ~15;
+ dstPitch = ((width << 1) + 63) & ~63;
new_size = ((dstPitch * height) + bpp - 1) / bpp;
srcPitch = (width + 3) & ~3;
s2offset = srcPitch * height;
@@ -2482,7 +2650,7 @@ RADEONPutImage(
case FOURCC_UYVY:
case FOURCC_YUY2:
default:
- dstPitch = ((width << 1) + 15) & ~15;
+ dstPitch = ((width << 1) + 63) & ~63;
new_size = ((dstPitch * height) + bpp - 1) / bpp;
srcPitch = (width << 1);
break;
@@ -2521,14 +2689,9 @@ RADEONPutImage(
s3offset = tmp;
}
nlines = ((((yb + 0xffff) >> 16) + 1) & ~1) - top;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- OUTREG(RADEON_SURFACE_CNTL, (info->ModeReg.surface_cntl |
- RADEON_NONSURF_AP0_SWP_32BPP)
- & ~RADEON_NONSURF_AP0_SWP_16BPP);
-#endif
- RADEONCopyMungedData(buf + (top * srcPitch) + left, buf + s2offset,
- buf + s3offset, dst_start, srcPitch, srcPitch2,
- dstPitch, nlines, npixels);
+ RADEONCopyMungedData(pScrn, buf + (top * srcPitch) + left,
+ buf + s2offset, buf + s3offset, dst_start,
+ srcPitch, srcPitch2, dstPitch, nlines, npixels);
break;
case FOURCC_RGBT16:
case FOURCC_RGB16:
@@ -2539,32 +2702,22 @@ RADEONPutImage(
buf += (top * srcPitch) + left;
nlines = ((yb + 0xffff) >> 16) - top;
dst_start += left;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- OUTREG(RADEON_SURFACE_CNTL, info->ModeReg.surface_cntl &
- ~(RADEON_NONSURF_AP0_SWP_32BPP
- | RADEON_NONSURF_AP0_SWP_16BPP));
-#endif
- RADEONCopyData(buf, dst_start, srcPitch, dstPitch, nlines, npixels);
+ RADEONCopyData(pScrn, buf, dst_start, srcPitch, dstPitch, nlines, npixels, 2);
break;
case FOURCC_RGBA32:
buf += (top * srcPitch) + left*4;
nlines = ((yb + 0xffff) >> 16) - top;
dst_start += left*4;
- RADEONCopyData(buf, dst_start, srcPitch, dstPitch, nlines, 2*npixels);
+ RADEONCopyData(pScrn, buf, dst_start, srcPitch, dstPitch, nlines, npixels, 4);
break;
case FOURCC_RGB24:
buf += (top * srcPitch) + left*3;
nlines = ((yb + 0xffff) >> 16) - top;
dst_start += left*4;
- RADEONCopyRGB24Data(buf, dst_start, srcPitch, dstPitch, nlines, npixels);
+ RADEONCopyRGB24Data(pScrn, buf, dst_start, srcPitch, dstPitch, nlines, npixels);
break;
}
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- /* restore byte swapping */
- OUTREG(RADEON_SURFACE_CNTL, info->ModeReg.surface_cntl);
-#endif
-
/* update cliplist */
if(!REGION_EQUAL(pScrn->pScreen, &pPriv->clip, clipBoxes))
{