diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2012-09-11 21:48:24 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2012-09-21 11:56:16 +0100 |
commit | 0be1d964713ca407f029278a8256d02d925dc9da (patch) | |
tree | d360eb12a9eed2b0938df9a5c5475da2bf82c1b2 /src/sna/kgem.h | |
parent | d853064e7eebc5719645c12605782f995131a6fe (diff) |
sna: Use inplace X tiling for LLC uploads
Based on a suggestion by Chad Versace (taken from a patch for mesa).
This allows for a faster upload of pixel data through a ShmImage, or for
complete replacement of a GPU bo.
Using a modified version of x11perf to upload to a pixmap rather than
scanout on an IVB i7-3720qm:
Before:
40000000 trep @ 0.0007 msec (1410000.0/sec): ShmPutImage 10x10 square
4000000 trep @ 0.0110 msec ( 90700.0/sec): ShmPutImage 100x100 square
160000 trep @ 0.1689 msec ( 5920.0/sec): ShmPutImage 500x500 square
After:
40000000 trep @ 0.0007 msec (1450000.0/sec): ShmPutImage 10x10 square
6000000 trep @ 0.0061 msec ( 164000.0/sec): ShmPutImage 100x100 square
400000 trep @ 0.1126 msec ( 8880.0/sec): ShmPutImage 500x500 square
However, the real takeaway from this is that the overheads for
ShmPutImage are substantial, only hitting around 70% expected efficiency,
and overshadowed by PutImage, which for reference is
60000000 trep @ 0.0006 msec (1800000.0/sec): PutImage 10x10 square
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/sna/kgem.h')
-rw-r--r-- | src/sna/kgem.h | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/src/sna/kgem.h b/src/sna/kgem.h index 832b3f06..cdbb7cbf 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -262,6 +262,7 @@ struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem, uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format); void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset); +int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo); void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo); bool kgem_retire(struct kgem *kgem); @@ -419,6 +420,8 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo); void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo); void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo); void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo); +void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo); +void __kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr); uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo); bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, @@ -494,7 +497,7 @@ static inline bool kgem_bo_is_mappable(struct kgem *kgem, return bo->presumed_offset + kgem_bo_size(bo) <= kgem->aperture_mappable; } -static inline bool kgem_bo_mapped(struct kgem_bo *bo) +static inline bool kgem_bo_mapped(struct kgem *kgem, struct kgem_bo *bo) { DBG(("%s: map=%p, tiling=%d, domain=%d\n", __FUNCTION__, bo->map, bo->tiling, bo->domain)); @@ -502,12 +505,15 @@ static inline bool kgem_bo_mapped(struct kgem_bo *bo) if (bo->map == NULL) return bo->tiling == I915_TILING_NONE && bo->domain == DOMAIN_CPU; + if (bo->tiling == I915_TILING_X && !bo->scanout && kgem->has_llc) + return IS_CPU_MAP(bo->map); + return IS_CPU_MAP(bo->map) == !bo->tiling; } static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo) { - if (kgem_bo_mapped(bo)) + if (kgem_bo_mapped(kgem, bo)) return true; if (!bo->tiling && kgem->has_llc) |