diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2013-06-27 10:45:22 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2013-06-27 15:27:48 +0100 |
commit | b615ce97ec43ea8fe02e995244c757138abcb2de (patch) | |
tree | 3403b4a44691ef206cadb142f786633e33180ddb | |
parent | 6493c8c65f93ad2554c2512a07ba640e966fd026 (diff) |
sna: Add a fast path for reading back from tiled X bo
This is lower latency than the double copy incurred for first moving the
bo to the CPU and then copying it back - but due to the less efficient
tiled memcpy, it has lower throughput. So x11perf -shmget500 suffers
(by about 30%) but real world applications improve by about 2x.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | src/sna/sna_accel.c | 62 |
1 files changed, 60 insertions, 2 deletions
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index 3783933c..46e383d1 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -4028,7 +4028,7 @@ try_upload_tiled_x(PixmapPtr pixmap, RegionRec *region, if (__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)) return false; - dst = __kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); + dst = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); if (dst == NULL) return false; @@ -4048,7 +4048,6 @@ try_upload_tiled_x(PixmapPtr pixmap, RegionRec *region, box->x2 - box->x1, box->y2 - box->y1); box++; } while (--n); - __kgem_bo_unmap__cpu(&sna->kgem, priv->gpu_bo, dst); if (!DAMAGE_IS_ALL(priv->gpu_damage)) { if (replaces) { @@ -14345,6 +14344,62 @@ sna_get_image_blt(DrawablePtr drawable, return ok; } +static bool +sna_get_image_tiled(DrawablePtr drawable, + RegionPtr region, + char *dst, + unsigned flags) +{ + PixmapPtr pixmap = get_drawable_pixmap(drawable); + struct sna_pixmap *priv = sna_pixmap(pixmap); + struct sna *sna = to_sna_from_pixmap(pixmap); + char *src; + + if (!sna->kgem.memcpy_from_tiled_x) + return false; + + if (flags & MOVE_INPLACE_HINT) + return false; + + if (priv == NULL || priv->gpu_bo == NULL) + return false; + + if (priv->gpu_bo->tiling != I915_TILING_X) + return false; + + if (priv->gpu_bo->scanout) + return false; + + if (!sna->kgem.has_llc && priv->gpu_bo->domain != DOMAIN_CPU) + return false; + + if (priv->gpu_damage == NULL || + !(DAMAGE_IS_ALL(priv->gpu_damage) || + sna_damage_contains_box__no_reduce(priv->gpu_damage, + ®ion->extents))) + return false; + + src = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); + if (src == NULL) + return false; + + DBG(("%s: download through a tiled CPU map\n", __FUNCTION__)); + + kgem_bo_sync__cpu_full(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC); + + memcpy_from_tiled_x(&sna->kgem, src, dst, + pixmap->drawable.bitsPerPixel, + priv->gpu_bo->pitch, + PixmapBytePad(region->extents.x2 - region->extents.x1, + drawable->depth), + region->extents.x1, region->extents.y1, + 0, 0, + region->extents.x2 - region->extents.x1, + region->extents.y2 - region->extents.y1); + + return true; +} + static void sna_get_image(DrawablePtr drawable, int x, int y, int w, int h, @@ -14379,6 +14434,9 @@ sna_get_image(DrawablePtr drawable, if (can_blt && sna_get_image_blt(drawable, ®ion, dst, flags)) return; + if (can_blt && sna_get_image_tiled(drawable, ®ion, dst, flags)) + return; + if (!sna_drawable_move_region_to_cpu(drawable, ®ion, flags)) return; |