summaryrefslogtreecommitdiff
path: root/src/sna/sna_io.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-09-11 21:48:24 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2012-09-21 11:56:16 +0100
commit0be1d964713ca407f029278a8256d02d925dc9da (patch)
treed360eb12a9eed2b0938df9a5c5475da2bf82c1b2 /src/sna/sna_io.c
parentd853064e7eebc5719645c12605782f995131a6fe (diff)
sna: Use inplace X tiling for LLC uploads
Based on a suggestion by Chad Versace (taken from a patch for mesa). This allows for a faster upload of pixel data through a ShmImage, or for complete replacement of a GPU bo. Using a modified version of x11perf to upload to a pixmap rather than scanout on an IVB i7-3720qm: Before: 40000000 trep @ 0.0007 msec (1410000.0/sec): ShmPutImage 10x10 square 4000000 trep @ 0.0110 msec ( 90700.0/sec): ShmPutImage 100x100 square 160000 trep @ 0.1689 msec ( 5920.0/sec): ShmPutImage 500x500 square After: 40000000 trep @ 0.0007 msec (1450000.0/sec): ShmPutImage 10x10 square 6000000 trep @ 0.0061 msec ( 164000.0/sec): ShmPutImage 100x100 square 400000 trep @ 0.1126 msec ( 8880.0/sec): ShmPutImage 500x500 square However, the real takeaway from this is that the overheads for ShmPutImage are substantial, only hitting around 70% expected efficiency, and overshadowed by PutImage, which for reference is 60000000 trep @ 0.0006 msec (1800000.0/sec): PutImage 10x10 square Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/sna/sna_io.c')
-rw-r--r--src/sna/sna_io.c95
1 files changed, 90 insertions, 5 deletions
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index a466f558..cdaadc01 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -482,6 +482,49 @@ fallback:
sna->blt_state.fill_bo = 0;
}
+static bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo)
+{
+ if (kgem->gen < 50) /* bit17 swizzling :( */
+ return false;
+
+ if (bo->tiling != I915_TILING_X)
+ return false;
+
+ if (bo->scanout)
+ return false;
+
+ return bo->domain == DOMAIN_CPU || kgem->has_llc;
+}
+
+static bool
+write_boxes_inplace__tiled(struct kgem *kgem,
+ const uint8_t *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
+ struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int n)
+{
+ uint8_t *dst;
+ int swizzle;
+
+ assert(bo->tiling == I915_TILING_X);
+
+ dst = __kgem_bo_map__cpu(kgem, bo);
+ if (dst == NULL)
+ return false;
+
+ kgem_bo_sync__cpu(kgem, bo);
+ swizzle = kgem_bo_get_swizzling(kgem, bo);
+ do {
+ memcpy_to_tiled_x(src, dst, bpp, swizzle, stride, bo->pitch,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ box->x2 - box->x1, box->y2 - box->y1);
+ box++;
+ } while (--n);
+ __kgem_bo_unmap__cpu(kgem, bo, dst);
+
+ return true;
+}
+
static bool write_boxes_inplace(struct kgem *kgem,
const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
@@ -492,6 +535,11 @@ static bool write_boxes_inplace(struct kgem *kgem,
DBG(("%s x %d, handle=%d, tiling=%d\n",
__FUNCTION__, n, bo->handle, bo->tiling));
+ if (upload_inplace__tiled(kgem, bo) &&
+ write_boxes_inplace__tiled(kgem, src, stride, bpp, src_dx, src_dy,
+ bo, dst_dx, dst_dy, box, n))
+ return true;
+
if (!kgem_bo_can_map(kgem, bo))
return false;
@@ -539,7 +587,7 @@ static bool upload_inplace(struct kgem *kgem,
{
unsigned int bytes;
- if (!kgem_bo_can_map(kgem, bo))
+ if (!kgem_bo_can_map(kgem, bo) && !upload_inplace__tiled(kgem, bo))
return false;
if (FORCE_INPLACE)
@@ -871,8 +919,6 @@ write_boxes_inplace__xor(struct kgem *kgem,
const BoxRec *box, int n,
uint32_t and, uint32_t or)
{
- int dst_pitch = bo->pitch;
- int src_pitch = stride;
void *dst;
DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling));
@@ -888,10 +934,22 @@ write_boxes_inplace__xor(struct kgem *kgem,
box->x1 + src_dx, box->y1 + src_dy,
box->x1 + dst_dx, box->y1 + dst_dy,
box->x2 - box->x1, box->y2 - box->y1,
- bpp, src_pitch, dst_pitch));
+ bpp, stride, bo->pitch));
+
+ assert(box->x2 > box->x1);
+ assert(box->y2 > box->y1);
+
+ assert(box->x1 + dst_dx >= 0);
+ assert((box->x2 + dst_dx)*bpp <= 8*bo->pitch);
+ assert(box->y1 + dst_dy >= 0);
+ assert((box->y2 + dst_dy)*bo->pitch <= kgem_bo_size(bo));
+
+ assert(box->x1 + src_dx >= 0);
+ assert((box->x2 + src_dx)*bpp <= 8*stride);
+ assert(box->y1 + src_dy >= 0);
memcpy_xor(src, dst, bpp,
- src_pitch, dst_pitch,
+ stride, bo->pitch,
box->x1 + src_dx, box->y1 + src_dy,
box->x1 + dst_dx, box->y1 + dst_dy,
box->x2 - box->x1, box->y2 - box->y1,
@@ -1282,6 +1340,19 @@ bool sna_replace(struct sna *sna,
pixmap->drawable.bitsPerPixel,
bo->tiling, busy));
+ if (!busy && upload_inplace__tiled(kgem, bo)) {
+ BoxRec box;
+
+ box.x1 = box.y1 = 0;
+ box.x2 = pixmap->drawable.width;
+ box.y2 = pixmap->drawable.height;
+
+ if (write_boxes_inplace__tiled(kgem, src,
+ stride, pixmap->drawable.bitsPerPixel, 0, 0,
+ bo, 0, 0, &box, 1))
+ return true;
+ }
+
if ((busy || !kgem_bo_can_map(kgem, bo)) &&
indirect_replace(sna, pixmap, bo, src, stride))
return true;
@@ -1304,6 +1375,19 @@ bool sna_replace(struct sna *sna,
(pixmap->drawable.height-1)*stride + pixmap->drawable.width*pixmap->drawable.bitsPerPixel/8))
goto err;
} else {
+ if (upload_inplace__tiled(kgem, bo)) {
+ BoxRec box;
+
+ box.x1 = box.y1 = 0;
+ box.x2 = pixmap->drawable.width;
+ box.y2 = pixmap->drawable.height;
+
+ if (write_boxes_inplace__tiled(kgem, src,
+ stride, pixmap->drawable.bitsPerPixel, 0, 0,
+ bo, 0, 0, &box, 1))
+ goto done;
+ }
+
if (kgem_bo_is_mappable(kgem, bo)) {
dst = kgem_bo_map(kgem, bo);
if (!dst)
@@ -1330,6 +1414,7 @@ bool sna_replace(struct sna *sna,
}
}
+done:
if (bo != *_bo)
kgem_bo_destroy(kgem, *_bo);
*_bo = bo;