diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2014-10-23 10:34:44 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2014-11-06 16:55:46 +0000 |
commit | 87802b3402ec4a829c6507b4b78fcd03e30b6fe1 (patch) | |
tree | b88ed62dc659cfb80fd92257cb9847323c71764e | |
parent | 33764685cbcd01548b6a23c217c7b511c1639694 (diff) |
sna: Experimental support for write-combining mmaps
If we have a linear buffer, we can request the kernel mmap it directly
with write-combining without having to pin it into the GTT. This allows
us to efficiently upload very large buffers, and can avoid the dreaded
aperture thrashing.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | configure.ac | 10 | ||||
-rw-r--r-- | src/sna/kgem.c | 275 | ||||
-rw-r--r-- | src/sna/kgem.h | 13 | ||||
-rw-r--r-- | src/sna/sna.h | 3 | ||||
-rw-r--r-- | src/sna/sna_accel.c | 68 | ||||
-rw-r--r-- | src/sna/sna_display.c | 2 | ||||
-rw-r--r-- | src/sna/sna_io.c | 21 |
7 files changed, 266 insertions, 126 deletions
diff --git a/configure.ac b/configure.ac index 8b34c773..75fd3b6f 100644 --- a/configure.ac +++ b/configure.ac @@ -760,6 +760,16 @@ if test "x$RENDERNODE" = "xyes"; then xp_msg="$xp_msg rendernode" fi +AC_ARG_ENABLE(wc-mmap, + AS_HELP_STRING([--enable-wc-map], + [Enable use of WriteCombining mmaps [default=no]]), + [WC_MMAP="$enableval"], + [WC_MMAP="no"]) +if test "x$WC_MMAP" = "xyes"; then + AC_DEFINE(USE_WC_MMAP,1,[Enable use of WriteCombining mmaps]) + xp_msg="$xp_msg mmap(wc)" +fi + AC_ARG_ENABLE(create2, AS_HELP_STRING([--enable-create2], [Enable use of create2 ioctl (experimental) [default=no]]), diff --git a/src/sna/kgem.c b/src/sna/kgem.c index 90474377..4f793014 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -83,6 +83,7 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define DBG_NO_FAST_RELOC 0 #define DBG_NO_HANDLE_LUT 0 #define DBG_NO_WT 0 +#define DBG_NO_WC_MMAP 0 #define DBG_DUMP 0 #define DBG_NO_MALLOC_CACHE 0 @@ -95,6 +96,11 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define SHOW_BATCH_BEFORE 0 #define SHOW_BATCH_AFTER 0 +#if !USE_WC_MMAP +#undef DBG_NO_WC_MMAP +#define DBG_NO_WC_MMAP 1 +#endif + #if 0 #define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__)) #define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__)) @@ -127,12 +133,14 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define LOCAL_I915_PARAM_HAS_BLT 11 #define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12 #define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15 +#define LOCAL_I915_PARAM_HAS_LLC 17 #define LOCAL_I915_PARAM_HAS_SEMAPHORES 20 #define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23 #define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24 #define LOCAL_I915_PARAM_HAS_NO_RELOC 25 #define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26 #define LOCAL_I915_PARAM_HAS_WT 27 +#define LOCAL_I915_PARAM_MMAP_VERSION 29 #define LOCAL_I915_EXEC_IS_PINNED (1<<10) #define LOCAL_I915_EXEC_NO_RELOC (1<<11) @@ -179,6 +187,17 @@ struct local_i915_gem_caching { #define LOCAL_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_SET_CACHING, struct local_i915_gem_caching) #define LOCAL_IOCTL_I915_GEM_GET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_GET_CACHING, struct local_i915_gem_caching) +struct local_i915_gem_mmap2 { + uint32_t handle; + uint32_t pad; + uint64_t offset; + uint64_t size; + uint64_t addr_ptr; + uint64_t flags; +#define I915_MMAP_WC 0x1 +}; +#define LOCAL_IOCTL_I915_GEM_MMAP_v2 DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct local_i915_gem_mmap2) + struct kgem_buffer { struct kgem_bo base; void *mem; @@ -411,7 +430,7 @@ static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags) static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) { - struct drm_i915_gem_mmap_gtt mmap_arg; + struct drm_i915_gem_mmap_gtt gtt; void *ptr; int err; @@ -419,12 +438,13 @@ static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) bo->handle, bytes(bo))); assert(bo->proxy == NULL); assert(!bo->snoop); - assert(num_pages(bo) <= kgem->aperture_mappable / 4); + assert(num_pages(bo) <= kgem->aperture_mappable / 2); + assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y); + VG_CLEAR(gtt); retry_gtt: - VG_CLEAR(mmap_arg); - mmap_arg.handle = bo->handle; - if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg))) { + gtt.handle = bo->handle; + if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, >t))) { assert(err != EINVAL); (void)__kgem_throttle_retire(kgem, 0); @@ -441,7 +461,7 @@ retry_gtt: retry_mmap: ptr = mmap(0, bytes(bo), PROT_READ | PROT_WRITE, MAP_SHARED, - kgem->fd, mmap_arg.offset); + kgem->fd, gtt.offset); if (ptr == MAP_FAILED) { err = errno; assert(err != EINVAL); @@ -457,7 +477,50 @@ retry_mmap: ptr = NULL; } - return ptr; + /* Cache this mapping to avoid the overhead of an + * excruciatingly slow GTT pagefault. This is more an + * issue with compositing managers which need to + * frequently flush CPU damage to their GPU bo. + */ + return bo->map__gtt = ptr; +} + +static void *__kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo) +{ + struct local_i915_gem_mmap2 wc; + int err; + + DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, + bo->handle, bytes(bo))); + assert(bo->proxy == NULL); + assert(!bo->snoop); + assert(kgem->has_wc_mmap); + + VG_CLEAR(wc); + +retry_wc: + wc.handle = bo->handle; + wc.offset = 0; + wc.size = bytes(bo); + wc.flags = I915_MMAP_WC; + if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc))) { + assert(err != EINVAL); + + if (__kgem_throttle_retire(kgem, 0)) + goto retry_wc; + + if (kgem_cleanup_cache(kgem)) + goto retry_wc; + + ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU(wc) domain: %d\n", + __FUNCTION__, bo->handle, bytes(bo), -err)); + return NULL; + } + + VG(VALGRIND_MAKE_MEM_DEFINED(wc.addr_ptr, bytes(bo))); + + DBG(("%s: caching CPU(wc) vma for %d\n", __FUNCTION__, bo->handle)); + return bo->map__wc = (void *)(uintptr_t)wc.addr_ptr; } static int gem_write(int fd, uint32_t handle, @@ -1029,9 +1092,7 @@ static bool test_has_llc(struct kgem *kgem) if (DBG_NO_LLC) return false; -#if defined(I915_PARAM_HAS_LLC) /* Expected in libdrm-2.4.31 */ - has_llc = gem_param(kgem, I915_PARAM_HAS_LLC); -#endif + has_llc = gem_param(kgem, LOCAL_I915_PARAM_HAS_LLC); if (has_llc == -1) { DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__)); has_llc = kgem->gen >= 060; @@ -1040,6 +1101,28 @@ static bool test_has_llc(struct kgem *kgem) return has_llc; } +static bool test_has_wc_mmap(struct kgem *kgem) +{ + struct local_i915_gem_mmap2 wc; + bool ret; + + if (DBG_NO_WC_MMAP) + return false; + + if (gem_param(kgem, LOCAL_I915_PARAM_MMAP_VERSION) < 1) + return false; + + VG_CLEAR(wc); + wc.handle = gem_create(kgem->fd, 1); + wc.offset = 0; + wc.size = 4096; + wc.flags = I915_MMAP_WC; + ret = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc) == 0; + gem_close(kgem->fd, wc.handle); + + return ret; +} + static bool test_has_caching(struct kgem *kgem) { uint32_t handle; @@ -1428,6 +1511,10 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) DBG(("%s: has write-through caching for scanouts? %d\n", __FUNCTION__, kgem->has_wt)); + kgem->has_wc_mmap = test_has_wc_mmap(kgem); + DBG(("%s: has wc-mmapping? %d\n", __FUNCTION__, + kgem->has_wc_mmap)); + kgem->has_caching = test_has_caching(kgem); DBG(("%s: has set-cache-level? %d\n", __FUNCTION__, kgem->has_caching)); @@ -1975,17 +2062,23 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo) DBG(("%s: releasing %p:%p vma for handle=%d, count=%d\n", __FUNCTION__, bo->map__gtt, bo->map__cpu, - bo->handle, list_is_empty(&bo->vma) ? 0 : kgem->vma[bo->map__gtt == NULL].count)); + bo->handle, list_is_empty(&bo->vma) ? 0 : kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count)); if (!list_is_empty(&bo->vma)) { _list_del(&bo->vma); - kgem->vma[bo->map__gtt == NULL].count--; + kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count--; } if (bo->map__gtt) - munmap(MAP(bo->map__gtt), bytes(bo)); - if (bo->map__cpu) + munmap(bo->map__gtt, bytes(bo)); + if (bo->map__wc) { + VG(VALGRIND_MAKE_MEM_NOACCESS(bo->map__wc, bytes(bo))); + munmap(bo->map__wc, bytes(bo)); + } + if (bo->map__cpu) { + VG(VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map__cpu), bytes(bo))); munmap(MAP(bo->map__cpu), bytes(bo)); + } _list_del(&bo->list); _list_del(&bo->request); @@ -2021,25 +2114,24 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem, if (bucket(bo) >= NUM_CACHE_BUCKETS) { if (bo->map__gtt) { - munmap(MAP(bo->map__gtt), bytes(bo)); + munmap(bo->map__gtt, bytes(bo)); bo->map__gtt = NULL; } list_move(&bo->list, &kgem->large_inactive); } else { assert(bo->flush == false); + assert(list_is_empty(&bo->vma)); list_move(&bo->list, &kgem->inactive[bucket(bo)]); - if (bo->map__gtt) { - if (!kgem_bo_can_map(kgem, bo)) { - munmap(MAP(bo->map__gtt), bytes(bo)); - bo->map__gtt = NULL; - } - if (bo->map__gtt) { - list_add(&bo->vma, &kgem->vma[0].inactive[bucket(bo)]); - kgem->vma[0].count++; - } + if (bo->map__gtt && !kgem_bo_can_map(kgem, bo)) { + munmap(bo->map__gtt, bytes(bo)); + bo->map__gtt = NULL; } - if (bo->map__cpu && !bo->map__gtt) { + if (bo->map__gtt || (bo->map__wc && !bo->tiling)) { + list_add(&bo->vma, &kgem->vma[0].inactive[bucket(bo)]); + kgem->vma[0].count++; + } + if (bo->map__cpu && list_is_empty(&bo->vma)) { list_add(&bo->vma, &kgem->vma[1].inactive[bucket(bo)]); kgem->vma[1].count++; } @@ -2087,9 +2179,9 @@ inline static void kgem_bo_remove_from_inactive(struct kgem *kgem, assert(bo->rq == NULL); assert(bo->exec == NULL); if (!list_is_empty(&bo->vma)) { - assert(bo->map__gtt || bo->map__cpu); + assert(bo->map__gtt || bo->map__wc || bo->map__cpu); list_del(&bo->vma); - kgem->vma[bo->map__gtt == NULL].count--; + kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count--; } } @@ -2824,6 +2916,7 @@ static void kgem_commit(struct kgem *kgem) assert(list_is_empty(&rq->buffers)); assert(rq->bo->map__gtt == NULL); + assert(rq->bo->map__wc == NULL); assert(rq->bo->map__cpu == NULL); gem_close(kgem->fd, rq->bo->handle); kgem_cleanup_cache(kgem); @@ -3985,7 +4078,7 @@ discard: __FUNCTION__, for_cpu ? "cpu" : "gtt")); cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)]; list_for_each_entry(bo, cache, vma) { - assert(for_cpu ? bo->map__cpu : bo->map__gtt); + assert(for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc)); assert(bucket(bo) == cache_bucket(num_pages)); assert(bo->proxy == NULL); assert(bo->rq == NULL); @@ -4067,10 +4160,10 @@ discard: bo->pitch = 0; } - if (bo->map__gtt || bo->map__cpu) { + if (bo->map__gtt || bo->map__wc || bo->map__cpu) { if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { int for_cpu = !!(flags & CREATE_CPU_MAP); - if (for_cpu ? bo->map__cpu : bo->map__gtt){ + if (for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc)){ if (first != NULL) break; @@ -4477,7 +4570,7 @@ unsigned kgem_can_create_2d(struct kgem *kgem, flags |= KGEM_CAN_CREATE_CPU; if (size > 4096 && size <= kgem->max_gpu_size) flags |= KGEM_CAN_CREATE_GPU; - if (size <= PAGE_SIZE*kgem->aperture_mappable/4) + if (size <= PAGE_SIZE*kgem->aperture_mappable/4 || kgem->has_wc_mmap) flags |= KGEM_CAN_CREATE_GTT; if (size > kgem->large_object_size) flags |= KGEM_CAN_CREATE_LARGE; @@ -4903,7 +4996,7 @@ large_inactive: assert(bucket(bo) == bucket); assert(bo->refcnt == 0); assert(!bo->scanout); - assert(for_cpu ? bo->map__cpu : bo->map__gtt); + assert(for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc)); assert(bo->rq == NULL); assert(bo->exec == NULL); assert(list_is_empty(&bo->request)); @@ -5996,7 +6089,6 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) i = 0; while (kgem->vma[type].count > 0) { struct kgem_bo *bo = NULL; - void **ptr; for (j = 0; bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive); @@ -6011,12 +6103,23 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) DBG(("%s: discarding inactive %s vma cache for %d\n", __FUNCTION__, type ? "CPU" : "GTT", bo->handle)); - ptr = type ? &bo->map__cpu : &bo->map__gtt; assert(bo->rq == NULL); + if (type) { + VG(VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map__cpu), bytes(bo))); + munmap(MAP(bo->map__cpu), bytes(bo)); + bo->map__cpu = NULL; + } else { + if (bo->map__wc) { + VG(VALGRIND_MAKE_MEM_NOACCESS(bo->map__wc, bytes(bo))); + munmap(bo->map__wc, bytes(bo)); + bo->map__wc = NULL; + } + if (bo->map__gtt) { + munmap(bo->map__gtt, bytes(bo)); + bo->map__gtt = NULL; + } + } - VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(*ptr), bytes(bo))); - munmap(MAP(*ptr), bytes(bo)); - *ptr = NULL; list_del(&bo->vma); kgem->vma[type].count--; @@ -6028,10 +6131,28 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) } } -void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo) +static void *__kgem_bo_map__gtt_or_wc(struct kgem *kgem, struct kgem_bo *bo) { void *ptr; + DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); + kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); + + if (bo->tiling || !kgem->has_wc_mmap) { + ptr = bo->map__gtt; + if (ptr == NULL) + ptr = __kgem_bo_map__gtt(kgem, bo); + } else { + ptr = bo->map__wc; + if (ptr == NULL) + ptr = __kgem_bo_map__wc(kgem, bo); + } + + return ptr; +} + +void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo) +{ DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__, bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain)); @@ -6046,26 +6167,7 @@ void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo) return kgem_bo_map__cpu(kgem, bo); } - ptr = MAP(bo->map__gtt); - if (ptr == NULL) { - assert(num_pages(bo) <= kgem->aperture_mappable / 2); - - kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); - - ptr = __kgem_bo_map__gtt(kgem, bo); - if (ptr == NULL) - return NULL; - - /* Cache this mapping to avoid the overhead of an - * excruciatingly slow GTT pagefault. This is more an - * issue with compositing managers which need to frequently - * flush CPU damage to their GPU bo. - */ - bo->map__gtt = ptr; - DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); - } - - return ptr; + return __kgem_bo_map__gtt_or_wc(kgem, bo); } void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) @@ -6091,25 +6193,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) return ptr; } - ptr = MAP(bo->map__gtt); - if (ptr == NULL) { - assert(num_pages(bo) <= kgem->aperture_mappable / 2); - assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y); - - kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); - - ptr = __kgem_bo_map__gtt(kgem, bo); - if (ptr == NULL) - return NULL; - - /* Cache this mapping to avoid the overhead of an - * excruciatingly slow GTT pagefault. This is more an - * issue with compositing managers which need to frequently - * flush CPU damage to their GPU bo. - */ - bo->map__gtt = ptr; - DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); - } + ptr = __kgem_bo_map__gtt_or_wc(kgem, bo); if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) { struct drm_i915_gem_set_domain set_domain; @@ -6137,8 +6221,6 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) { - void *ptr; - DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__, bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain)); @@ -6148,26 +6230,24 @@ void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) assert_tiling(kgem, bo); assert(!bo->purged || bo->reusable); - ptr = MAP(bo->map__gtt); - if (ptr == NULL) { - assert(num_pages(bo) <= kgem->aperture_mappable / 4); + return __kgem_bo_map__gtt_or_wc(kgem, bo); +} - kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); +void *kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo) +{ + DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__, + bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain)); - ptr = __kgem_bo_map__gtt(kgem, bo); - if (ptr == NULL) - return NULL; + assert(bo->proxy == NULL); + assert(bo->exec == NULL); + assert(list_is_empty(&bo->list)); + assert_tiling(kgem, bo); + assert(!bo->purged || bo->reusable); - /* Cache this mapping to avoid the overhead of an - * excruciatingly slow GTT pagefault. This is more an - * issue with compositing managers which need to frequently - * flush CPU damage to their GPU bo. - */ - bo->map__gtt = ptr; - DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); - } + if (bo->map__wc) + return bo->map__wc; - return ptr; + return __kgem_bo_map__wc(kgem, bo); } void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo) @@ -6512,6 +6592,7 @@ init_buffer_from_bo(struct kgem_buffer *bo, struct kgem_bo *old) __FUNCTION__, old->handle)); assert(old->proxy == NULL); + assert(list_is_empty(&old->list)); memcpy(&bo->base, old, sizeof(*old)); if (old->rq) @@ -6817,7 +6898,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, assert(alloc); alloc /= PAGE_SIZE; - if (alloc > kgem->aperture_mappable / 4) + if (alloc > kgem->aperture_mappable / 4 && !kgem->has_wc_mmap) flags &= ~KGEM_BUFFER_INPLACE; if (kgem->has_llc && @@ -7049,7 +7130,7 @@ init: assert(!bo->need_io || !bo->base.needs_flush); assert(!bo->need_io || bo->base.domain != DOMAIN_GPU); assert(bo->mem); - assert(bo->mmapped != MMAPPED_GTT || MAP(bo->base.map__gtt) == bo->mem); + assert(bo->mmapped != MMAPPED_GTT || bo->base.map__gtt == bo->mem || bo->base.map__wc == bo->mem); assert(bo->mmapped != MMAPPED_CPU || MAP(bo->base.map__cpu) == bo->mem); bo->used = size; diff --git a/src/sna/kgem.h b/src/sna/kgem.h index 7ac81cba..6adae3bf 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -62,6 +62,7 @@ struct kgem_bo { void *map__cpu; void *map__gtt; + void *map__wc; #define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3)) struct kgem_bo_binding { @@ -192,6 +193,7 @@ struct kgem { uint32_t has_wt :1; uint32_t has_no_reloc :1; uint32_t has_handle_lut :1; + uint32_t has_wc_mmap :1; uint32_t can_blt_cpu :1; uint32_t can_render_y :1; @@ -504,6 +506,7 @@ uint64_t kgem_add_reloc64(struct kgem *kgem, void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo); void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo); void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo); +void *kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo); void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo); void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo); void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo); @@ -715,13 +718,16 @@ static inline bool kgem_bo_mapped(struct kgem *kgem, struct kgem_bo *bo) if (bo->tiling == I915_TILING_NONE && (bo->domain == DOMAIN_CPU || kgem->has_llc)) return bo->map__cpu != NULL; + if (bo->tiling == I915_TILING_NONE && bo->map__wc) + return true; + return bo->map__gtt != NULL; } static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo) { - DBG(("%s: handle=%d, map=%p:%p, tiling=%d, domain=%d, offset=%ld\n", - __FUNCTION__, bo->handle, bo->map__gtt, bo->map__cpu, bo->tiling, bo->domain, (long)bo->presumed_offset)); + DBG(("%s: handle=%d, map=%p:%p:%p, tiling=%d, domain=%d, offset=%ld\n", + __FUNCTION__, bo->handle, bo->map__gtt, bo->map__wc, bo->map__cpu, bo->tiling, bo->domain, (long)bo->presumed_offset)); if (!bo->tiling && (kgem->has_llc || bo->domain == DOMAIN_CPU)) return true; @@ -734,6 +740,9 @@ static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo) if (kgem->gen == 021 && bo->tiling == I915_TILING_Y) return false; + if (!bo->tiling && kgem->has_wc_mmap) + return true; + return __kgem_bo_num_pages(bo) <= kgem->aperture_mappable / 4; } diff --git a/src/sna/sna.h b/src/sna/sna.h index 30b0292b..9d139ede 100644 --- a/src/sna/sna.h +++ b/src/sna/sna.h @@ -675,7 +675,8 @@ void sna_pixmap_destroy(PixmapPtr pixmap); #define assert_pixmap_map(pixmap, priv) do { \ assert(priv->mapped != MAPPED_NONE || pixmap->devPrivate.ptr == PTR(priv->ptr)); \ - assert(priv->mapped == MAPPED_NONE || pixmap->devPrivate.ptr == (priv->mapped == MAPPED_CPU ? MAP(priv->gpu_bo->map__cpu) : MAP(priv->gpu_bo->map__gtt))); \ + assert(priv->mapped != MAPPED_CPU || pixmap->devPrivate.ptr == MAP(priv->gpu_bo->map__cpu)); \ + assert(priv->mapped != MAPPED_GTT || pixmap->devPrivate.ptr == priv->gpu_bo->map__gtt || pixmap->devPrivate.ptr == priv->gpu_bo->map__wc); \ } while (0) static inline void sna_pixmap_unmap(PixmapPtr pixmap, struct sna_pixmap *priv) diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index cd0b9072..0ef56fb9 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -4588,7 +4588,7 @@ can_create_upload_tiled_x(struct sna *sna, if (sna->kgem.has_llc) return true; - if (sna_pixmap_default_tiling(sna, pixmap)) + if (!sna->kgem.has_wc_mmap && sna_pixmap_default_tiling(sna, pixmap)) return false; return true; @@ -4608,13 +4608,21 @@ create_upload_tiled_x(struct sna *sna, assert(priv->gpu_bo == NULL); assert(priv->gpu_damage == NULL); - create = CREATE_CPU_MAP | CREATE_INACTIVE; - if (!sna->kgem.has_llc) - create |= CREATE_CACHED; + if (sna->kgem.has_llc) + create = CREATE_CPU_MAP | CREATE_INACTIVE; + else if (sna->kgem.has_wc_mmap) + create = CREATE_GTT_MAP | CREATE_INACTIVE; + else + create = CREATE_CPU_MAP | CREATE_INACTIVE | CREATE_CACHED; return sna_pixmap_alloc_gpu(sna, pixmap, priv, create); } +static bool can_upload__tiled_x(struct kgem *kgem, struct kgem_bo *bo) +{ + return kgem_bo_can_map__cpu(kgem, bo, true) || kgem->has_wc_mmap; +} + static bool try_upload__tiled_x(PixmapPtr pixmap, RegionRec *region, int x, int y, int w, int h, char *bits, int stride) @@ -4625,7 +4633,7 @@ try_upload__tiled_x(PixmapPtr pixmap, RegionRec *region, uint8_t *dst; int n; - if (!kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, true)) { + if (!can_upload__tiled_x(&sna->kgem, priv->gpu_bo)) { DBG(("%s: no, cannot map through the CPU\n", __FUNCTION__)); return false; } @@ -4638,11 +4646,19 @@ try_upload__tiled_x(PixmapPtr pixmap, RegionRec *region, __kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)) return false; - dst = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); - if (dst == NULL) - return false; + if (kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, true)) { + dst = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); + if (dst == NULL) + return false; - kgem_bo_sync__cpu(&sna->kgem, priv->gpu_bo); + kgem_bo_sync__cpu(&sna->kgem, priv->gpu_bo); + } else { + dst = kgem_bo_map__wc(&sna->kgem, priv->gpu_bo); + if (dst == NULL) + return false; + + kgem_bo_sync__gtt(&sna->kgem, priv->gpu_bo); + } box = region_rects(region); n = region_num_rects(region); @@ -4712,12 +4728,14 @@ try_upload__tiled_x(PixmapPtr pixmap, RegionRec *region, } while (--n); if (!priv->shm) { - assert(dst == MAP(priv->gpu_bo->map__cpu)); pixmap->devPrivate.ptr = dst; pixmap->devKind = priv->gpu_bo->pitch; - priv->mapped = MAPPED_CPU; + if (dst == MAP(priv->gpu_bo->map__cpu)) { + priv->mapped = MAPPED_CPU; + priv->cpu = true; + } else + priv->mapped = MAPPED_GTT; assert_pixmap_map(pixmap, priv); - priv->cpu = true; } } @@ -6129,7 +6147,7 @@ upload_inplace: return false; } - if (!kgem_bo_can_map__cpu(&sna->kgem, dst_priv->gpu_bo, true) || + if (!can_upload__tiled_x(&sna->kgem, dst_priv->gpu_bo) || __kgem_bo_is_busy(&sna->kgem, dst_priv->gpu_bo)) { if (replaces && !dst_priv->pinned) { unsigned create; @@ -6156,7 +6174,7 @@ upload_inplace: return false; } - if (!kgem_bo_can_map__cpu(&sna->kgem, dst_priv->gpu_bo, true)) { + if (!can_upload__tiled_x(&sna->kgem, dst_priv->gpu_bo)) { DBG(("%s - no, cannot map dst for reads into the CPU\n", __FUNCTION__)); return false; } @@ -6169,13 +6187,23 @@ upload_inplace: return false; } - ptr = kgem_bo_map__cpu(&sna->kgem, dst_priv->gpu_bo); - if (ptr == NULL) { - DBG(("%s - no, map failed\n", __FUNCTION__)); - return false; - } + if (kgem_bo_can_map__cpu(&sna->kgem, dst_priv->gpu_bo, true)) { + ptr = kgem_bo_map__cpu(&sna->kgem, dst_priv->gpu_bo); + if (ptr == NULL) { + DBG(("%s - no, map failed\n", __FUNCTION__)); + return false; + } + + kgem_bo_sync__cpu(&sna->kgem, dst_priv->gpu_bo); + } else { + ptr = kgem_bo_map__wc(&sna->kgem, dst_priv->gpu_bo); + if (ptr == NULL) { + DBG(("%s - no, map failed\n", __FUNCTION__)); + return false; + } - kgem_bo_sync__cpu(&sna->kgem, dst_priv->gpu_bo); + kgem_bo_sync__gtt(&sna->kgem, dst_priv->gpu_bo); + } if (!DAMAGE_IS_ALL(dst_priv->gpu_damage)) { assert(!dst_priv->clear); diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c index 0889f9dc..f16a46cb 100644 --- a/src/sna/sna_display.c +++ b/src/sna/sna_display.c @@ -5032,7 +5032,7 @@ sna_cursor_pre_init(struct sna *sna) #define DRM_CAP_CURSOR_WIDTH 8 #define DRM_CAP_CURSOR_HEIGHT 9 -#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29 +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 30 sna->cursor.max_size = 64; diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c index 0181838d..d6aa1294 100644 --- a/src/sna/sna_io.c +++ b/src/sna/sna_io.c @@ -652,6 +652,9 @@ static bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo) break; } + if (kgem->has_wc_mmap) + return true; + return kgem_bo_can_map__cpu(kgem, bo, true); } @@ -663,14 +666,22 @@ write_boxes_inplace__tiled(struct kgem *kgem, { uint8_t *dst; - assert(kgem_bo_can_map__cpu(kgem, bo, true)); + assert(kgem->has_wc_mmap || kgem_bo_can_map__cpu(kgem, bo, true)); assert(bo->tiling != I915_TILING_Y); - dst = kgem_bo_map__cpu(kgem, bo); - if (dst == NULL) - return false; + if (kgem_bo_can_map__cpu(kgem, bo, true)) { + dst = kgem_bo_map__cpu(kgem, bo); + if (dst == NULL) + return false; - kgem_bo_sync__cpu(kgem, bo); + kgem_bo_sync__cpu(kgem, bo); + } else { + dst = kgem_bo_map__wc(kgem, bo); + if (dst == NULL) + return false; + + kgem_bo_sync__gtt(kgem, bo); + } if (sigtrap_get()) return false; |