diff options
author | Alex Deucher <alexdeucher@gmail.com> | 2009-02-06 11:04:59 -0500 |
---|---|---|
committer | Alex Deucher <alexdeucher@gmail.com> | 2009-02-06 11:04:59 -0500 |
commit | def317e22d072405cd95ddb19d17eacd784ffd9e (patch) | |
tree | c63b37818becbe87fa87262a8e45ff7f5587b197 | |
parent | 0dfadc1843e0d14b9cc1ee19a72f4fd60a2c495b (diff) |
R6xx/R7xx EXA: add accelerated UTS/DFS hooks
I'm not sure how much of a win these are. I need to
do some benchmarking.
-rw-r--r-- | src/r600_exa.c | 134 |
1 files changed, 114 insertions, 20 deletions
diff --git a/src/r600_exa.c b/src/r600_exa.c index 6f158e9e..88a9d913 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -2028,23 +2028,69 @@ R600UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, { ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; RADEONInfoPtr info = RADEONPTR(pScrn); -// struct radeon_accel_state *accel_state = info->accel_state; - uint8_t *dst = (pointer)((char *)info->FB + exaGetPixmapOffset(pDst)); - int dst_pitch = exaGetPixmapPitch(pDst); + uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); + uint32_t dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + uint32_t dst_height = pDst->drawable.height; int bpp = pDst->drawable.bitsPerPixel; + uint32_t scratch_mc_addr; + int wpass = w * (bpp/8); + int scratch_pitch_bytes = (wpass + 255) & ~255; + uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8); + int scratch_offset = 0, hpass, temph; + char *dst; + drmBufPtr scratch; + + if (dst_pitch & 7) + return FALSE; + if (dst_mc_addr & 0xff) + return FALSE; - //return FALSE; + scratch = RADEONCPGetBuffer(pScrn); + if (scratch == NULL) + return FALSE; - dst += (x * bpp / 8) + (y * dst_pitch); - w *= bpp / 8; + scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total); + temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes); + dst = (char *)scratch->address; - while (h--) { - memcpy(dst, src, w); + //memcopy from sys to scratch + while (temph--) { + memcpy (dst, src, wpass); src += src_pitch; - dst += dst_pitch; + dst += scratch_pitch_bytes; + } + + while (h) { + uint32_t offset = scratch_mc_addr + scratch_offset; + int oldhpass = hpass; + h -= oldhpass; + temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes); + + if (hpass) { + scratch_offset = scratch->total/2 - scratch_offset; + dst = (char *)scratch->address + scratch_offset; + // wait for the engine to be idle + R600WaitforIdlePoll(pScrn); + //memcopy from sys to scratch + while (temph--) { + memcpy (dst, src, wpass); + src += src_pitch; + dst += scratch_pitch_bytes; + } + } + //blit from scratch to vram + R600DoPrepareCopy(pScrn, + scratch_pitch, w, oldhpass, offset, bpp, + dst_pitch, dst_height, dst_mc_addr, bpp, + 3, 0xffffffff); + R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass); + R600DoCopy(pScrn); + y += oldhpass; } + R600IBDiscard(pScrn, scratch); + return TRUE; } @@ -2054,23 +2100,68 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, { ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; RADEONInfoPtr info = RADEONPTR(pScrn); -// struct radeon_accel_state *accel_state = info->accel_state; - uint8_t *src = (pointer)((char *)info->FB + exaGetPixmapOffset(pSrc)); - int src_pitch = exaGetPixmapPitch(pSrc); - int bpp = pSrc->drawable.bitsPerPixel; + uint32_t src_pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); + uint32_t src_mc_addr = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; + uint32_t src_width = pSrc->drawable.width; + uint32_t src_height = pSrc->drawable.height; + int bpp = pSrc->drawable.bitsPerPixel; + uint32_t scratch_mc_addr; + int scratch_pitch_bytes = (dst_pitch + 255) & ~255; + int scratch_offset = 0, hpass; + uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8); + int wpass = w * (bpp/8); + drmBufPtr scratch; + + if (src_pitch & 7) + return FALSE; - //return FALSE; + scratch = RADEONCPGetBuffer(pScrn); + if (scratch == NULL) + return FALSE; - src += (x * bpp / 8) + (y * src_pitch); - w *= bpp / 8; + scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total); + hpass = min(h, scratch->total/2 / scratch_pitch); + + //blit from vram to scratch + R600DoPrepareCopy(pScrn, + src_pitch, src_width, src_height, src_mc_addr, bpp, + scratch_pitch, hpass, scratch_mc_addr, bpp, + 3, 0xffffffff); + R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); + R600DoCopy(pScrn); + + while (h) { + char *src = (char *)scratch->address + scratch_offset; + int oldhpass = hpass; + h -= oldhpass; + y += oldhpass; + hpass = min(h, scratch->total/2 / scratch_pitch); + + if (hpass) { + scratch_offset = scratch->total/2 - scratch_offset; + //blit from vram to scratch + R600DoPrepareCopy(pScrn, + src_pitch, src_width, src_height, src_mc_addr, bpp, + scratch_pitch, hpass, scratch_mc_addr + scratch_offset, bpp, + 3, 0xffffffff); + R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); + R600DoCopy(pScrn); + } - while (h--) { - memcpy(dst, src, w); - src += src_pitch; - dst += dst_pitch; + // wait for the engine to be idle + R600WaitforIdlePoll(pScrn); + //memcopy from scratch to sys + while (oldhpass--) { + memcpy (dst, src, wpass); + dst += dst_pitch; + src += scratch_pitch_bytes; + } } + R600IBDiscard(pScrn, scratch); + return TRUE; + } static int @@ -3466,6 +3557,9 @@ R600DrawInit(ScreenPtr pScreen) info->accel_state->exa->PrepareAccess = R600PrepareAccess; info->accel_state->exa->FinishAccess = R600FinishAccess; + info->accel_state->exa->UploadToScreen = R600UploadToScreen; + info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen; + info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS; info->accel_state->exa->pixmapOffsetAlign = 256; info->accel_state->exa->pixmapPitchAlign = 256; |