diff options
author | Michel Dänzer <michel@tungstengraphics.com> | 2006-06-18 17:39:09 +0200 |
---|---|---|
committer | Michel Dänzer <michel@tungstengraphics.com> | 2006-06-18 19:00:57 +0200 |
commit | 79d1c573133e64ddf4446b6bb0eaab4a5a6c73ab (patch) | |
tree | 723276ecb25aba2ba2810f94a1ad6d04f8939811 | |
parent | 2bcdc7cfd013bd457d35f927390e5cc3544f30f1 (diff) |
Implement accelerated EXA DownloadFromScreen hook.
x11perf -getimage numbers only increase by about 20-30% on my PowerBook with an
M9, but by about 100 times(!) with a PCIe X550. I suspect the former could
perform better with PCI as opposed to AGP transfers, which would also remove
the need to disable this by default with AGP.
-rw-r--r-- | man/radeon.man | 9 | ||||
-rw-r--r-- | src/radeon.h | 6 | ||||
-rw-r--r-- | src/radeon_driver.c | 65 | ||||
-rw-r--r-- | src/radeon_exa_funcs.c | 124 |
4 files changed, 187 insertions, 17 deletions
diff --git a/man/radeon.man b/man/radeon.man index ba2ae84..9dc75fc 100644 --- a/man/radeon.man +++ b/man/radeon.man @@ -506,6 +506,15 @@ the Render and Composite extensions, but the rendering code for it is newer and possibly unstable. The default is .B XAA. .TP +.BI "Option \*qAccelDFS\*q \*q" boolean \*q +Use or don't use accelerated EXA DownloadFromScreen hook when possible (only +when Direct Rendering is enabled, e.g.). +Default: +.B off +with AGP due to issues with GPU->host transfers with some AGP bridges, +.B on +otherwise. +.TP .BI "Option \*qFBTexPercent\*q \*q" integer \*q Amount of video RAM to reserve for OpenGL textures, in percent. With EXA, the remainder of video RAM is reserved for EXA offscreen management. Specifying 0 diff --git a/src/radeon.h b/src/radeon.h index 06b3409..8e77313 100644 --- a/src/radeon.h +++ b/src/radeon.h @@ -334,7 +334,8 @@ typedef struct { unsigned long LinearAddr; /* Frame buffer physical address */ unsigned long MMIOAddr; /* MMIO region physical address */ unsigned long BIOSAddr; /* BIOS physical address */ - unsigned int fbLocation; + CARD32 fbLocation; + CARD32 gartLocation; CARD32 mc_fb_location; CARD32 mc_agp_location; @@ -416,6 +417,9 @@ typedef struct { #define EXA_ENGINEMODE_UNKNOWN 0 #define EXA_ENGINEMODE_2D 1 #define EXA_ENGINEMODE_3D 2 +#ifdef XF86DRI + Bool accelDFS; +#endif #endif #ifdef USE_XAA XAAInfoRecPtr accel; diff --git a/src/radeon_driver.c b/src/radeon_driver.c index ff2dba7..5a20ac6 100644 --- a/src/radeon_driver.c +++ b/src/radeon_driver.c @@ -162,6 +162,9 @@ typedef enum { OPTION_XV_DMA, OPTION_FBTEX_PERCENT, OPTION_DEPTH_BITS, +#ifdef USE_EXA + OPTION_ACCEL_DFS, +#endif #endif OPTION_PANEL_OFF, OPTION_DDC_MODE, @@ -227,6 +230,9 @@ static const OptionInfoRec RADEONOptions[] = { { OPTION_XV_DMA, "DMAForXv", OPTV_BOOLEAN, {0}, FALSE }, { OPTION_FBTEX_PERCENT, "FBTexPercent", OPTV_INTEGER, {0}, FALSE }, { OPTION_DEPTH_BITS, "DepthBits", OPTV_INTEGER, {0}, FALSE }, +#ifdef USE_EXA + { OPTION_ACCEL_DFS, "AccelDFS", OPTV_BOOLEAN, {0}, FALSE }, +#endif #endif { OPTION_PANEL_OFF, "PanelOff", OPTV_BOOLEAN, {0}, FALSE }, { OPTION_DDC_MODE, "DDCMode", OPTV_BOOLEAN, {0}, FALSE }, @@ -5941,21 +5947,36 @@ _X_EXPORT Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, #ifdef USE_EXA if (info->useEXA) { #ifdef XF86DRI - /* Reserve approx. half of offscreen memory for local textures by - * default, can be overridden with Option "FBTexPercent". - * Round down to a whole number of texture regions. - */ - info->textureSize = 50; + MessageType from = X_DEFAULT; - if (xf86GetOptValInteger(info->Options, OPTION_FBTEX_PERCENT, - &(info->textureSize))) { - if (info->textureSize < 0 || info->textureSize > 100) { - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, - "Illegal texture memory percentage: %dx, setting to default 50%%\n", - info->textureSize); - info->textureSize = 50; + if (hasDRI) { + info->accelDFS = info->cardType != CARD_AGP; + + if (xf86GetOptValInteger(info->Options, OPTION_ACCEL_DFS, + &info->accelDFS)) { + from = X_CONFIG; + } + + /* Reserve approx. half of offscreen memory for local textures by + * default, can be overridden with Option "FBTexPercent". + * Round down to a whole number of texture regions. + */ + info->textureSize = 50; + + if (xf86GetOptValInteger(info->Options, OPTION_FBTEX_PERCENT, + &(info->textureSize))) { + if (info->textureSize < 0 || info->textureSize > 100) { + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, + "Illegal texture memory percentage: %dx, setting to default 50%%\n", + info->textureSize); + info->textureSize = 50; + } } } + + xf86DrvMsg(pScrn->scrnIndex, from, + "%ssing accelerated EXA DownloadFromScreen hook\n", + info->accelDFS ? "U" : "Not u"); #endif /* XF86DRI */ if (!RADEONSetupMemEXA(pScreen)) @@ -6350,6 +6371,26 @@ static void RADEONAdjustMemMapRegisters(ScrnInfoPtr pScrn, RADEONSavePtr save) if (fb_loc_changed) RADEONRestoreMemMapRegisters(pScrn, save); } + + if (info->accelDFS) + { + drmRadeonGetParam gp; + int gart_base; + + memset(&gp, 0, sizeof(gp)); + gp.param = RADEON_PARAM_GART_BASE; + gp.value = &gart_base; + + if (drmCommandWriteRead(info->drmFD, DRM_RADEON_GETPARAM, &gp, + sizeof(gp)) < 0) { + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, + "Failed to determine GART area MC location, not using " + "accelerated DownloadFromScreen hook!\n"); + info->accelDFS = FALSE; + } else { + info->gartLocation = gart_base; + } + } } /* Write common registers */ diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c index 77cffcf..bc22147 100644 --- a/src/radeon_exa_funcs.c +++ b/src/radeon_exa_funcs.c @@ -1,6 +1,7 @@ /* * Copyright 2005 Eric Anholt * Copyright 2005 Benjamin Herrenschmidt + * Copyright 2006 Tungsten Graphics, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -26,6 +27,7 @@ * Eric Anholt <anholt@FreeBSD.org> * Zack Rusin <zrusin@trolltech.com> * Benjamin Herrenschmidt <benh@kernel.crashing.org> + * Michel Dänzer <michel@tungstengraphics.com> * */ @@ -287,27 +289,141 @@ FUNC_NAME(RADEONUploadToScreen)(PixmapPtr pDst, int x, int y, int w, int h, return TRUE; } +#ifdef ACCEL_CP +/* Emit blit with arbitrary source and destination offsets and pitches */ +static void +RADEONBlitChunk(ScrnInfoPtr pScrn, CARD32 datatype, CARD32 src_pitch_offset, + CARD32 dst_pitch_offset, int srcX, int srcY, int dstX, int dstY, + int w, int h) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + ACCEL_PREAMBLE(); + + BEGIN_ACCEL(6); + OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, + RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_SRC_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_NONE | + (datatype << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP3_S | + RADEON_DP_SRC_SOURCE_MEMORY | + RADEON_GMC_CLR_CMP_CNTL_DIS | + RADEON_GMC_WR_MSK_DIS); + OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, src_pitch_offset); + OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset); + OUT_ACCEL_REG(RADEON_SRC_Y_X, (srcY << 16) | srcX); + OUT_ACCEL_REG(RADEON_DST_Y_X, (dstY << 16) | dstX); + OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w); + FINISH_ACCEL(); +} +#endif + static Bool FUNC_NAME(RADEONDownloadFromScreen)(PixmapPtr pSrc, int x, int y, int w, int h, char *dst, int dst_pitch) { -#if X_BYTE_ORDER == X_BIG_ENDIAN +#if defined(ACCEL_CP) || X_BYTE_ORDER == X_BIG_ENDIAN RINFO_FROM_SCREEN(pSrc->drawable.pScreen); +#endif +#if X_BYTE_ORDER == X_BIG_ENDIAN unsigned char *RADEONMMIO = info->MMIO; unsigned int swapper = info->ModeReg.surface_cntl & ~(RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP | RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP); #endif - unsigned char *src = pSrc->devPrivate.ptr; + CARD8 *src = pSrc->devPrivate.ptr; int src_pitch = exaGetPixmapPitch(pSrc); int bpp = pSrc->drawable.bitsPerPixel; +#ifdef ACCEL_CP + CARD32 datatype, src_pitch_offset, scratch_pitch = (w * bpp/8 + 63) & ~63, scratch_off = 0; + drmBufPtr scratch; +#endif TRACE; +#ifdef ACCEL_CP /* - * This is currently done without DMA until I have ironed out the - * various endian issues with R300 among others + * Try to accelerate download. Use an indirect buffer as scratch space, + * blitting the bits to one half while copying them out of the other one and + * then swapping the halves. */ + if (info->accelDFS && bpp != 24 && RADEONGetDatatypeBpp(bpp, &datatype) && + RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset) && + (scratch = RADEONCPGetBuffer(pScrn))) + { + int swap = RADEON_HOST_DATA_SWAP_NONE, wpass = w * bpp / 8; + int hpass = min(h, scratch->total/2 / scratch_pitch); + CARD32 scratch_pitch_offset = scratch_pitch << 16 + | (info->gartLocation + info->bufStart + + scratch->idx * scratch->total) >> 10; + drmRadeonIndirect indirect; + ACCEL_PREAMBLE(); + + RADEON_SWITCH_TO_2D(); + + /* Kick the first blit as early as possible */ + RADEONBlitChunk(pScrn, datatype, src_pitch_offset, scratch_pitch_offset, + x, y, 0, 0, w, hpass); + FLUSH_RING(); + +#if X_BYTE_ORDER == X_BIG_ENDIAN + switch (bpp) { + case 16: + swap = RADEON_HOST_DATA_SWAP_16BIT; + break; + case 32: + swap = RADEON_HOST_DATA_SWAP_32BIT; + break; + } +#endif + + while (h) { + int oldhpass = hpass; + + src = (CARD8*)scratch->address + scratch_off; + + y += oldhpass; + h -= oldhpass; + hpass = min(h, scratch->total/2 / scratch_pitch); + + /* Prepare next blit if anything's left */ + if (hpass) { + scratch_off = scratch->total/2 - scratch_off; + RADEONBlitChunk(pScrn, datatype, src_pitch_offset, scratch_pitch_offset + (scratch_off >> 10), + x, y, 0, 0, w, hpass); + } + + /* Wait for previous blit to complete */ + RADEONWaitForIdleMMIO(pScrn); + + /* Kick next blit */ + if (hpass) + FLUSH_RING(); + + /* Copy out data from previous blit */ + if (wpass == scratch_pitch && wpass == dst_pitch) { + RADEONCopySwap((CARD8*)dst, src, wpass * oldhpass, swap); + dst += dst_pitch * oldhpass; + } else while (oldhpass--) { + RADEONCopySwap((CARD8*)dst, src, wpass, swap); + src += scratch_pitch; + dst += dst_pitch; + } + } + + indirect.idx = scratch->idx; + indirect.start = indirect.end = 0; + indirect.discard = 1; + + drmCommandWriteRead(info->drmFD, DRM_RADEON_INDIRECT, + &indirect, sizeof(drmRadeonIndirect)); + + return TRUE; + } +#endif + + /* Can't accelerate download */ exaWaitSync(pSrc->drawable.pScreen); #if X_BYTE_ORDER == X_BIG_ENDIAN |