summaryrefslogtreecommitdiff
path: root/src/sna/kgem.h
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-09-11 21:48:24 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2012-09-21 11:56:16 +0100
commit0be1d964713ca407f029278a8256d02d925dc9da (patch)
treed360eb12a9eed2b0938df9a5c5475da2bf82c1b2 /src/sna/kgem.h
parentd853064e7eebc5719645c12605782f995131a6fe (diff)
sna: Use inplace X tiling for LLC uploads
Based on a suggestion by Chad Versace (taken from a patch for mesa). This allows for a faster upload of pixel data through a ShmImage, or for complete replacement of a GPU bo. Using a modified version of x11perf to upload to a pixmap rather than scanout on an IVB i7-3720qm: Before: 40000000 trep @ 0.0007 msec (1410000.0/sec): ShmPutImage 10x10 square 4000000 trep @ 0.0110 msec ( 90700.0/sec): ShmPutImage 100x100 square 160000 trep @ 0.1689 msec ( 5920.0/sec): ShmPutImage 500x500 square After: 40000000 trep @ 0.0007 msec (1450000.0/sec): ShmPutImage 10x10 square 6000000 trep @ 0.0061 msec ( 164000.0/sec): ShmPutImage 100x100 square 400000 trep @ 0.1126 msec ( 8880.0/sec): ShmPutImage 500x500 square However, the real takeaway from this is that the overheads for ShmPutImage are substantial, only hitting around 70% expected efficiency, and overshadowed by PutImage, which for reference is 60000000 trep @ 0.0006 msec (1800000.0/sec): PutImage 10x10 square Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/sna/kgem.h')
-rw-r--r--src/sna/kgem.h10
1 files changed, 8 insertions, 2 deletions
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 832b3f06..cdbb7cbf 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -262,6 +262,7 @@ struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem,
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format);
void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset);
+int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo);
bool kgem_retire(struct kgem *kgem);
@@ -419,6 +420,8 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo);
+void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
+void __kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr);
uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo);
bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
@@ -494,7 +497,7 @@ static inline bool kgem_bo_is_mappable(struct kgem *kgem,
return bo->presumed_offset + kgem_bo_size(bo) <= kgem->aperture_mappable;
}
-static inline bool kgem_bo_mapped(struct kgem_bo *bo)
+static inline bool kgem_bo_mapped(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: map=%p, tiling=%d, domain=%d\n",
__FUNCTION__, bo->map, bo->tiling, bo->domain));
@@ -502,12 +505,15 @@ static inline bool kgem_bo_mapped(struct kgem_bo *bo)
if (bo->map == NULL)
return bo->tiling == I915_TILING_NONE && bo->domain == DOMAIN_CPU;
+ if (bo->tiling == I915_TILING_X && !bo->scanout && kgem->has_llc)
+ return IS_CPU_MAP(bo->map);
+
return IS_CPU_MAP(bo->map) == !bo->tiling;
}
static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo)
{
- if (kgem_bo_mapped(bo))
+ if (kgem_bo_mapped(kgem, bo))
return true;
if (!bo->tiling && kgem->has_llc)