diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2009-11-29 21:16:49 +0000 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2009-11-30 00:36:49 +0000 |
commit | 19d8c0cf50e98909c533ebfce3a0dd3f72b755c1 (patch) | |
tree | 5295b56f47662855923c6e65946c63f38d330ff6 | |
parent | f7540f06090753cba1190aa9e8cdea05a9512077 (diff) |
uxa: PutImage acceleration
Avoid waiting on dirty buffer object by streaming the upload to a fresh,
non-GPU hot buffer and blitting to the destination.
This should help to redress the regression reported in bug 18075:
[UXA] XPutImage performance regression
https://bugs.freedesktop.org/show_bug.cgi?id=18075
Using the particular synthetic benchmark in question on a g45:
Before:
9542.910448 Ops/s; put composition (!); 15x15
5623.271889 Ops/s; put composition (!); 75x75
1685.520362 Ops/s; put composition (!); 250x250
After:
40173.865300 Ops/s; put composition (!); 15x15
28670.280612 Ops/s; put composition (!); 75x75
4794.368601 Ops/s; put composition (!); 250x250
which while not stellar performance is at least an improvement. As
anticipated this has little impact on the non-fallback RENDER paths, for
instance the current cairo-xlib backend is unaffected by this change.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | src/i830_uxa.c | 161 |
1 files changed, 161 insertions, 0 deletions
diff --git a/src/i830_uxa.c b/src/i830_uxa.c index b11f2f75..5f3d5059 100644 --- a/src/i830_uxa.c +++ b/src/i830_uxa.c @@ -627,6 +627,164 @@ static void i830_uxa_finish_access(PixmapPtr pixmap) } } +static Bool +i830_uxa_pixmap_swap_bo_with_image(PixmapPtr pixmap, + char *src, int src_pitch) +{ + ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum]; + intel_screen_private *intel = intel_get_screen_private(scrn); + struct intel_pixmap *priv; + dri_bo *bo; + uint32_t tiling = I915_TILING_X; + int stride; + int w = pixmap->drawable.width; + int h = pixmap->drawable.height; + + priv = i830_get_pixmap_intel(pixmap); + + if (priv->batch_read_domains || drm_intel_bo_busy(priv->bo)) { + unsigned int size; + + size = i830_uxa_pixmap_compute_size (pixmap, w, h, + &tiling, &stride); + if (size > intel->max_gtt_map_size) + return FALSE; + + bo = drm_intel_bo_alloc(intel->bufmgr, "pixmap", size, 0); + if (bo == NULL) + return FALSE; + + if (tiling != I915_TILING_NONE) + drm_intel_bo_set_tiling(bo, &tiling, stride); + + dri_bo_unreference(priv->bo); + priv->bo = bo; + priv->tiling = tiling; + priv->batch_read_domains = priv->batch_write_domain = 0; + priv->flush_read_domains = priv->flush_write_domain = 0; + list_del(&priv->batch); + list_del(&priv->flush); + pixmap->drawable.pScreen->ModifyPixmapHeader(pixmap, + w, h, + 0, 0, + stride, NULL); + } else { + bo = priv->bo; + stride = i830_pixmap_pitch(pixmap); + } + + if (drm_intel_gem_bo_map_gtt(bo)) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s: bo map failed\n", __FUNCTION__); + return FALSE; + } + + if (src_pitch == stride) { + memcpy (bo->virtual, src, src_pitch * h); + } else { + char *dst = bo->virtual; + + w *= pixmap->drawable.bitsPerPixel/8; + while (h--) { + memcpy (dst, src, w); + src += src_pitch; + dst += stride; + } + } + + drm_intel_gem_bo_unmap_gtt(bo); + + return TRUE; +} + +static Bool i830_uxa_put_image(PixmapPtr pixmap, + int x, int y, + int w, int h, + char *src, int src_pitch) +{ + ScreenPtr screen = pixmap->drawable.pScreen; + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; + PixmapPtr scratch; + struct intel_pixmap *priv; + Bool scratch_pixmap; + GCPtr gc; + Bool ret; + + if (x == 0 && y == 0 && + w == pixmap->drawable.width && + h == pixmap->drawable.height) + { + /* Replace GPU hot bo with new CPU data. */ + return i830_uxa_pixmap_swap_bo_with_image(pixmap, + src, src_pitch); + } + + priv = i830_get_pixmap_intel(pixmap); + if (priv->batch_read_domains || drm_intel_bo_busy(priv->bo)) { + dri_bo *bo; + int stride; + + /* Partial replacement, copy incoming image to a bo and blit. */ + scratch = (*screen->CreatePixmap)(screen, w, h, + pixmap->drawable.depth, + UXA_CREATE_PIXMAP_FOR_MAP); + if (!scratch) + return FALSE; + + bo = i830_get_pixmap_bo(scratch); + if (drm_intel_gem_bo_map_gtt(bo)) { + (*screen->DestroyPixmap) (scratch); + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s: bo map failed\n", __FUNCTION__); + return FALSE; + } + + stride = i830_pixmap_pitch(scratch); + if (src_pitch == stride) { + memcpy (bo->virtual, src, stride * h); + } else { + char *dst = bo->virtual; + int row_length = w * pixmap->drawable.bitsPerPixel/8; + int num_rows = h; + while (num_rows--) { + memcpy (dst, src, row_length); + src += src_pitch; + dst += stride; + } + } + + drm_intel_gem_bo_unmap_gtt(bo); + scratch_pixmap = FALSE; + } else { + /* bo is not busy so can be mapped without a stall, upload in-place. */ + scratch = GetScratchPixmapHeader(screen, w, h, + pixmap->drawable.depth, + pixmap->drawable.bitsPerPixel, + src_pitch, src); + scratch_pixmap = TRUE; + } + + ret = FALSE; + gc = GetScratchGC(pixmap->drawable.depth, screen); + if (gc) { + ValidateGC(&pixmap->drawable, gc); + + (*gc->ops->CopyArea)(&scratch->drawable, + &pixmap->drawable, + gc, 0, 0, w, h, x, y); + + FreeScratchGC(gc); + ret = TRUE; + } + + if (scratch_pixmap) + FreeScratchPixmapHeader(scratch); + else + (*screen->DestroyPixmap)(scratch); + + return ret; +} + void i830_uxa_block_handler(ScreenPtr screen) { ScrnInfoPtr scrn = xf86Screens[screen->myNum]; @@ -797,6 +955,9 @@ Bool i830_uxa_init(ScreenPtr screen) intel->uxa_driver->done_composite = i830_done_composite; } + /* PutImage */ + intel->uxa_driver->put_image = i830_uxa_put_image; + intel->uxa_driver->prepare_access = i830_uxa_prepare_access; intel->uxa_driver->finish_access = i830_uxa_finish_access; intel->uxa_driver->pixmap_is_offscreen = i830_uxa_pixmap_is_offscreen; |