summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2013-06-27 10:45:22 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2013-06-27 15:27:48 +0100
commitb615ce97ec43ea8fe02e995244c757138abcb2de (patch)
tree3403b4a44691ef206cadb142f786633e33180ddb
parent6493c8c65f93ad2554c2512a07ba640e966fd026 (diff)
sna: Add a fast path for reading back from tiled X bo
This is lower latency than the double copy incurred for first moving the bo to the CPU and then copying it back - but due to the less efficient tiled memcpy, it has lower throughput. So x11perf -shmget500 suffers (by about 30%) but real world applications improve by about 2x. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/sna/sna_accel.c62
1 files changed, 60 insertions, 2 deletions
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 3783933c..46e383d1 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4028,7 +4028,7 @@ try_upload_tiled_x(PixmapPtr pixmap, RegionRec *region,
if (__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))
return false;
- dst = __kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo);
+ dst = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo);
if (dst == NULL)
return false;
@@ -4048,7 +4048,6 @@ try_upload_tiled_x(PixmapPtr pixmap, RegionRec *region,
box->x2 - box->x1, box->y2 - box->y1);
box++;
} while (--n);
- __kgem_bo_unmap__cpu(&sna->kgem, priv->gpu_bo, dst);
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
if (replaces) {
@@ -14345,6 +14344,62 @@ sna_get_image_blt(DrawablePtr drawable,
return ok;
}
+static bool
+sna_get_image_tiled(DrawablePtr drawable,
+ RegionPtr region,
+ char *dst,
+ unsigned flags)
+{
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ struct sna *sna = to_sna_from_pixmap(pixmap);
+ char *src;
+
+ if (!sna->kgem.memcpy_from_tiled_x)
+ return false;
+
+ if (flags & MOVE_INPLACE_HINT)
+ return false;
+
+ if (priv == NULL || priv->gpu_bo == NULL)
+ return false;
+
+ if (priv->gpu_bo->tiling != I915_TILING_X)
+ return false;
+
+ if (priv->gpu_bo->scanout)
+ return false;
+
+ if (!sna->kgem.has_llc && priv->gpu_bo->domain != DOMAIN_CPU)
+ return false;
+
+ if (priv->gpu_damage == NULL ||
+ !(DAMAGE_IS_ALL(priv->gpu_damage) ||
+ sna_damage_contains_box__no_reduce(priv->gpu_damage,
+ &region->extents)))
+ return false;
+
+ src = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo);
+ if (src == NULL)
+ return false;
+
+ DBG(("%s: download through a tiled CPU map\n", __FUNCTION__));
+
+ kgem_bo_sync__cpu_full(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC);
+
+ memcpy_from_tiled_x(&sna->kgem, src, dst,
+ pixmap->drawable.bitsPerPixel,
+ priv->gpu_bo->pitch,
+ PixmapBytePad(region->extents.x2 - region->extents.x1,
+ drawable->depth),
+ region->extents.x1, region->extents.y1,
+ 0, 0,
+ region->extents.x2 - region->extents.x1,
+ region->extents.y2 - region->extents.y1);
+
+ return true;
+}
+
static void
sna_get_image(DrawablePtr drawable,
int x, int y, int w, int h,
@@ -14379,6 +14434,9 @@ sna_get_image(DrawablePtr drawable,
if (can_blt && sna_get_image_blt(drawable, &region, dst, flags))
return;
+ if (can_blt && sna_get_image_tiled(drawable, &region, dst, flags))
+ return;
+
if (!sna_drawable_move_region_to_cpu(drawable, &region, flags))
return;