summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Deucher <alexdeucher@gmail.com>2009-02-06 11:04:59 -0500
committerAlex Deucher <alexdeucher@gmail.com>2009-02-06 11:04:59 -0500
commitdef317e22d072405cd95ddb19d17eacd784ffd9e (patch)
treec63b37818becbe87fa87262a8e45ff7f5587b197
parent0dfadc1843e0d14b9cc1ee19a72f4fd60a2c495b (diff)
R6xx/R7xx EXA: add accelerated UTS/DFS hooks
I'm not sure how much of a win these are. I need to do some benchmarking.
-rw-r--r--src/r600_exa.c134
1 files changed, 114 insertions, 20 deletions
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 6f158e9e..88a9d913 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -2028,23 +2028,69 @@ R600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
{
ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
RADEONInfoPtr info = RADEONPTR(pScrn);
-// struct radeon_accel_state *accel_state = info->accel_state;
- uint8_t *dst = (pointer)((char *)info->FB + exaGetPixmapOffset(pDst));
- int dst_pitch = exaGetPixmapPitch(pDst);
+ uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
+ uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
+ uint32_t dst_height = pDst->drawable.height;
int bpp = pDst->drawable.bitsPerPixel;
+ uint32_t scratch_mc_addr;
+ int wpass = w * (bpp/8);
+ int scratch_pitch_bytes = (wpass + 255) & ~255;
+ uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
+ int scratch_offset = 0, hpass, temph;
+ char *dst;
+ drmBufPtr scratch;
+
+ if (dst_pitch & 7)
+ return FALSE;
+ if (dst_mc_addr & 0xff)
+ return FALSE;
- //return FALSE;
+ scratch = RADEONCPGetBuffer(pScrn);
+ if (scratch == NULL)
+ return FALSE;
- dst += (x * bpp / 8) + (y * dst_pitch);
- w *= bpp / 8;
+ scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
+ temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
+ dst = (char *)scratch->address;
- while (h--) {
- memcpy(dst, src, w);
+ //memcopy from sys to scratch
+ while (temph--) {
+ memcpy (dst, src, wpass);
src += src_pitch;
- dst += dst_pitch;
+ dst += scratch_pitch_bytes;
+ }
+
+ while (h) {
+ uint32_t offset = scratch_mc_addr + scratch_offset;
+ int oldhpass = hpass;
+ h -= oldhpass;
+ temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
+
+ if (hpass) {
+ scratch_offset = scratch->total/2 - scratch_offset;
+ dst = (char *)scratch->address + scratch_offset;
+ // wait for the engine to be idle
+ R600WaitforIdlePoll(pScrn);
+ //memcopy from sys to scratch
+ while (temph--) {
+ memcpy (dst, src, wpass);
+ src += src_pitch;
+ dst += scratch_pitch_bytes;
+ }
+ }
+ //blit from scratch to vram
+ R600DoPrepareCopy(pScrn,
+ scratch_pitch, w, oldhpass, offset, bpp,
+ dst_pitch, dst_height, dst_mc_addr, bpp,
+ 3, 0xffffffff);
+ R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass);
+ R600DoCopy(pScrn);
+ y += oldhpass;
}
+ R600IBDiscard(pScrn, scratch);
+
return TRUE;
}
@@ -2054,23 +2100,68 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
{
ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
RADEONInfoPtr info = RADEONPTR(pScrn);
-// struct radeon_accel_state *accel_state = info->accel_state;
- uint8_t *src = (pointer)((char *)info->FB + exaGetPixmapOffset(pSrc));
- int src_pitch = exaGetPixmapPitch(pSrc);
- int bpp = pSrc->drawable.bitsPerPixel;
+ uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
+ uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset;
+ uint32_t src_width = pSrc->drawable.width;
+ uint32_t src_height = pSrc->drawable.height;
+ int bpp = pSrc->drawable.bitsPerPixel;
+ uint32_t scratch_mc_addr;
+ int scratch_pitch_bytes = (dst_pitch + 255) & ~255;
+ int scratch_offset = 0, hpass;
+ uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8);
+ int wpass = w * (bpp/8);
+ drmBufPtr scratch;
+
+ if (src_pitch & 7)
+ return FALSE;
- //return FALSE;
+ scratch = RADEONCPGetBuffer(pScrn);
+ if (scratch == NULL)
+ return FALSE;
- src += (x * bpp / 8) + (y * src_pitch);
- w *= bpp / 8;
+ scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
+ hpass = min(h, scratch->total/2 / scratch_pitch);
+
+ //blit from vram to scratch
+ R600DoPrepareCopy(pScrn,
+ src_pitch, src_width, src_height, src_mc_addr, bpp,
+ scratch_pitch, hpass, scratch_mc_addr, bpp,
+ 3, 0xffffffff);
+ R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
+ R600DoCopy(pScrn);
+
+ while (h) {
+ char *src = (char *)scratch->address + scratch_offset;
+ int oldhpass = hpass;
+ h -= oldhpass;
+ y += oldhpass;
+ hpass = min(h, scratch->total/2 / scratch_pitch);
+
+ if (hpass) {
+ scratch_offset = scratch->total/2 - scratch_offset;
+ //blit from vram to scratch
+ R600DoPrepareCopy(pScrn,
+ src_pitch, src_width, src_height, src_mc_addr, bpp,
+ scratch_pitch, hpass, scratch_mc_addr + scratch_offset, bpp,
+ 3, 0xffffffff);
+ R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass);
+ R600DoCopy(pScrn);
+ }
- while (h--) {
- memcpy(dst, src, w);
- src += src_pitch;
- dst += dst_pitch;
+ // wait for the engine to be idle
+ R600WaitforIdlePoll(pScrn);
+ //memcopy from scratch to sys
+ while (oldhpass--) {
+ memcpy (dst, src, wpass);
+ dst += dst_pitch;
+ src += scratch_pitch_bytes;
+ }
}
+ R600IBDiscard(pScrn, scratch);
+
return TRUE;
+
}
static int
@@ -3466,6 +3557,9 @@ R600DrawInit(ScreenPtr pScreen)
info->accel_state->exa->PrepareAccess = R600PrepareAccess;
info->accel_state->exa->FinishAccess = R600FinishAccess;
+ info->accel_state->exa->UploadToScreen = R600UploadToScreen;
+ info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen;
+
info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
info->accel_state->exa->pixmapOffsetAlign = 256;
info->accel_state->exa->pixmapPitchAlign = 256;