summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichel Dänzer <michel@tungstengraphics.com>2006-06-18 17:39:09 +0200
committerMichel Dänzer <michel@tungstengraphics.com>2006-06-18 19:00:57 +0200
commit79d1c573133e64ddf4446b6bb0eaab4a5a6c73ab (patch)
tree723276ecb25aba2ba2810f94a1ad6d04f8939811
parent2bcdc7cfd013bd457d35f927390e5cc3544f30f1 (diff)
Implement accelerated EXA DownloadFromScreen hook.
x11perf -getimage numbers only increase by about 20-30% on my PowerBook with an M9, but by about 100 times(!) with a PCIe X550. I suspect the former could perform better with PCI as opposed to AGP transfers, which would also remove the need to disable this by default with AGP.
-rw-r--r--man/radeon.man9
-rw-r--r--src/radeon.h6
-rw-r--r--src/radeon_driver.c65
-rw-r--r--src/radeon_exa_funcs.c124
4 files changed, 187 insertions, 17 deletions
diff --git a/man/radeon.man b/man/radeon.man
index ba2ae84..9dc75fc 100644
--- a/man/radeon.man
+++ b/man/radeon.man
@@ -506,6 +506,15 @@ the Render and Composite extensions, but the rendering code for it is newer and
possibly unstable. The default is
.B XAA.
.TP
+.BI "Option \*qAccelDFS\*q \*q" boolean \*q
+Use or don't use accelerated EXA DownloadFromScreen hook when possible (only
+when Direct Rendering is enabled, e.g.).
+Default:
+.B off
+with AGP due to issues with GPU->host transfers with some AGP bridges,
+.B on
+otherwise.
+.TP
.BI "Option \*qFBTexPercent\*q \*q" integer \*q
Amount of video RAM to reserve for OpenGL textures, in percent. With EXA, the
remainder of video RAM is reserved for EXA offscreen management. Specifying 0
diff --git a/src/radeon.h b/src/radeon.h
index 06b3409..8e77313 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -334,7 +334,8 @@ typedef struct {
unsigned long LinearAddr; /* Frame buffer physical address */
unsigned long MMIOAddr; /* MMIO region physical address */
unsigned long BIOSAddr; /* BIOS physical address */
- unsigned int fbLocation;
+ CARD32 fbLocation;
+ CARD32 gartLocation;
CARD32 mc_fb_location;
CARD32 mc_agp_location;
@@ -416,6 +417,9 @@ typedef struct {
#define EXA_ENGINEMODE_UNKNOWN 0
#define EXA_ENGINEMODE_2D 1
#define EXA_ENGINEMODE_3D 2
+#ifdef XF86DRI
+ Bool accelDFS;
+#endif
#endif
#ifdef USE_XAA
XAAInfoRecPtr accel;
diff --git a/src/radeon_driver.c b/src/radeon_driver.c
index ff2dba7..5a20ac6 100644
--- a/src/radeon_driver.c
+++ b/src/radeon_driver.c
@@ -162,6 +162,9 @@ typedef enum {
OPTION_XV_DMA,
OPTION_FBTEX_PERCENT,
OPTION_DEPTH_BITS,
+#ifdef USE_EXA
+ OPTION_ACCEL_DFS,
+#endif
#endif
OPTION_PANEL_OFF,
OPTION_DDC_MODE,
@@ -227,6 +230,9 @@ static const OptionInfoRec RADEONOptions[] = {
{ OPTION_XV_DMA, "DMAForXv", OPTV_BOOLEAN, {0}, FALSE },
{ OPTION_FBTEX_PERCENT, "FBTexPercent", OPTV_INTEGER, {0}, FALSE },
{ OPTION_DEPTH_BITS, "DepthBits", OPTV_INTEGER, {0}, FALSE },
+#ifdef USE_EXA
+ { OPTION_ACCEL_DFS, "AccelDFS", OPTV_BOOLEAN, {0}, FALSE },
+#endif
#endif
{ OPTION_PANEL_OFF, "PanelOff", OPTV_BOOLEAN, {0}, FALSE },
{ OPTION_DDC_MODE, "DDCMode", OPTV_BOOLEAN, {0}, FALSE },
@@ -5941,21 +5947,36 @@ _X_EXPORT Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen,
#ifdef USE_EXA
if (info->useEXA) {
#ifdef XF86DRI
- /* Reserve approx. half of offscreen memory for local textures by
- * default, can be overridden with Option "FBTexPercent".
- * Round down to a whole number of texture regions.
- */
- info->textureSize = 50;
+ MessageType from = X_DEFAULT;
- if (xf86GetOptValInteger(info->Options, OPTION_FBTEX_PERCENT,
- &(info->textureSize))) {
- if (info->textureSize < 0 || info->textureSize > 100) {
- xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
- "Illegal texture memory percentage: %dx, setting to default 50%%\n",
- info->textureSize);
- info->textureSize = 50;
+ if (hasDRI) {
+ info->accelDFS = info->cardType != CARD_AGP;
+
+ if (xf86GetOptValInteger(info->Options, OPTION_ACCEL_DFS,
+ &info->accelDFS)) {
+ from = X_CONFIG;
+ }
+
+ /* Reserve approx. half of offscreen memory for local textures by
+ * default, can be overridden with Option "FBTexPercent".
+ * Round down to a whole number of texture regions.
+ */
+ info->textureSize = 50;
+
+ if (xf86GetOptValInteger(info->Options, OPTION_FBTEX_PERCENT,
+ &(info->textureSize))) {
+ if (info->textureSize < 0 || info->textureSize > 100) {
+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+ "Illegal texture memory percentage: %dx, setting to default 50%%\n",
+ info->textureSize);
+ info->textureSize = 50;
+ }
}
}
+
+ xf86DrvMsg(pScrn->scrnIndex, from,
+ "%ssing accelerated EXA DownloadFromScreen hook\n",
+ info->accelDFS ? "U" : "Not u");
#endif /* XF86DRI */
if (!RADEONSetupMemEXA(pScreen))
@@ -6350,6 +6371,26 @@ static void RADEONAdjustMemMapRegisters(ScrnInfoPtr pScrn, RADEONSavePtr save)
if (fb_loc_changed)
RADEONRestoreMemMapRegisters(pScrn, save);
}
+
+ if (info->accelDFS)
+ {
+ drmRadeonGetParam gp;
+ int gart_base;
+
+ memset(&gp, 0, sizeof(gp));
+ gp.param = RADEON_PARAM_GART_BASE;
+ gp.value = &gart_base;
+
+ if (drmCommandWriteRead(info->drmFD, DRM_RADEON_GETPARAM, &gp,
+ sizeof(gp)) < 0) {
+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+ "Failed to determine GART area MC location, not using "
+ "accelerated DownloadFromScreen hook!\n");
+ info->accelDFS = FALSE;
+ } else {
+ info->gartLocation = gart_base;
+ }
+ }
}
/* Write common registers */
diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c
index 77cffcf..bc22147 100644
--- a/src/radeon_exa_funcs.c
+++ b/src/radeon_exa_funcs.c
@@ -1,6 +1,7 @@
/*
* Copyright 2005 Eric Anholt
* Copyright 2005 Benjamin Herrenschmidt
+ * Copyright 2006 Tungsten Graphics, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -26,6 +27,7 @@
* Eric Anholt <anholt@FreeBSD.org>
* Zack Rusin <zrusin@trolltech.com>
* Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ * Michel Dänzer <michel@tungstengraphics.com>
*
*/
@@ -287,27 +289,141 @@ FUNC_NAME(RADEONUploadToScreen)(PixmapPtr pDst, int x, int y, int w, int h,
return TRUE;
}
+#ifdef ACCEL_CP
+/* Emit blit with arbitrary source and destination offsets and pitches */
+static void
+RADEONBlitChunk(ScrnInfoPtr pScrn, CARD32 datatype, CARD32 src_pitch_offset,
+ CARD32 dst_pitch_offset, int srcX, int srcY, int dstX, int dstY,
+ int w, int h)
+{
+ RADEONInfoPtr info = RADEONPTR(pScrn);
+ ACCEL_PREAMBLE();
+
+ BEGIN_ACCEL(6);
+ OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL,
+ RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+ RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+ RADEON_GMC_BRUSH_NONE |
+ (datatype << 8) |
+ RADEON_GMC_SRC_DATATYPE_COLOR |
+ RADEON_ROP3_S |
+ RADEON_DP_SRC_SOURCE_MEMORY |
+ RADEON_GMC_CLR_CMP_CNTL_DIS |
+ RADEON_GMC_WR_MSK_DIS);
+ OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, src_pitch_offset);
+ OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset);
+ OUT_ACCEL_REG(RADEON_SRC_Y_X, (srcY << 16) | srcX);
+ OUT_ACCEL_REG(RADEON_DST_Y_X, (dstY << 16) | dstX);
+ OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w);
+ FINISH_ACCEL();
+}
+#endif
+
static Bool
FUNC_NAME(RADEONDownloadFromScreen)(PixmapPtr pSrc, int x, int y, int w, int h,
char *dst, int dst_pitch)
{
-#if X_BYTE_ORDER == X_BIG_ENDIAN
+#if defined(ACCEL_CP) || X_BYTE_ORDER == X_BIG_ENDIAN
RINFO_FROM_SCREEN(pSrc->drawable.pScreen);
+#endif
+#if X_BYTE_ORDER == X_BIG_ENDIAN
unsigned char *RADEONMMIO = info->MMIO;
unsigned int swapper = info->ModeReg.surface_cntl &
~(RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP |
RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP);
#endif
- unsigned char *src = pSrc->devPrivate.ptr;
+ CARD8 *src = pSrc->devPrivate.ptr;
int src_pitch = exaGetPixmapPitch(pSrc);
int bpp = pSrc->drawable.bitsPerPixel;
+#ifdef ACCEL_CP
+ CARD32 datatype, src_pitch_offset, scratch_pitch = (w * bpp/8 + 63) & ~63, scratch_off = 0;
+ drmBufPtr scratch;
+#endif
TRACE;
+#ifdef ACCEL_CP
/*
- * This is currently done without DMA until I have ironed out the
- * various endian issues with R300 among others
+ * Try to accelerate download. Use an indirect buffer as scratch space,
+ * blitting the bits to one half while copying them out of the other one and
+ * then swapping the halves.
*/
+ if (info->accelDFS && bpp != 24 && RADEONGetDatatypeBpp(bpp, &datatype) &&
+ RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset) &&
+ (scratch = RADEONCPGetBuffer(pScrn)))
+ {
+ int swap = RADEON_HOST_DATA_SWAP_NONE, wpass = w * bpp / 8;
+ int hpass = min(h, scratch->total/2 / scratch_pitch);
+ CARD32 scratch_pitch_offset = scratch_pitch << 16
+ | (info->gartLocation + info->bufStart
+ + scratch->idx * scratch->total) >> 10;
+ drmRadeonIndirect indirect;
+ ACCEL_PREAMBLE();
+
+ RADEON_SWITCH_TO_2D();
+
+ /* Kick the first blit as early as possible */
+ RADEONBlitChunk(pScrn, datatype, src_pitch_offset, scratch_pitch_offset,
+ x, y, 0, 0, w, hpass);
+ FLUSH_RING();
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ switch (bpp) {
+ case 16:
+ swap = RADEON_HOST_DATA_SWAP_16BIT;
+ break;
+ case 32:
+ swap = RADEON_HOST_DATA_SWAP_32BIT;
+ break;
+ }
+#endif
+
+ while (h) {
+ int oldhpass = hpass;
+
+ src = (CARD8*)scratch->address + scratch_off;
+
+ y += oldhpass;
+ h -= oldhpass;
+ hpass = min(h, scratch->total/2 / scratch_pitch);
+
+ /* Prepare next blit if anything's left */
+ if (hpass) {
+ scratch_off = scratch->total/2 - scratch_off;
+ RADEONBlitChunk(pScrn, datatype, src_pitch_offset, scratch_pitch_offset + (scratch_off >> 10),
+ x, y, 0, 0, w, hpass);
+ }
+
+ /* Wait for previous blit to complete */
+ RADEONWaitForIdleMMIO(pScrn);
+
+ /* Kick next blit */
+ if (hpass)
+ FLUSH_RING();
+
+ /* Copy out data from previous blit */
+ if (wpass == scratch_pitch && wpass == dst_pitch) {
+ RADEONCopySwap((CARD8*)dst, src, wpass * oldhpass, swap);
+ dst += dst_pitch * oldhpass;
+ } else while (oldhpass--) {
+ RADEONCopySwap((CARD8*)dst, src, wpass, swap);
+ src += scratch_pitch;
+ dst += dst_pitch;
+ }
+ }
+
+ indirect.idx = scratch->idx;
+ indirect.start = indirect.end = 0;
+ indirect.discard = 1;
+
+ drmCommandWriteRead(info->drmFD, DRM_RADEON_INDIRECT,
+ &indirect, sizeof(drmRadeonIndirect));
+
+ return TRUE;
+ }
+#endif
+
+ /* Can't accelerate download */
exaWaitSync(pSrc->drawable.pScreen);
#if X_BYTE_ORDER == X_BIG_ENDIAN