summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2014-10-23 10:34:44 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2014-11-06 16:55:46 +0000
commit87802b3402ec4a829c6507b4b78fcd03e30b6fe1 (patch)
treeb88ed62dc659cfb80fd92257cb9847323c71764e
parent33764685cbcd01548b6a23c217c7b511c1639694 (diff)
sna: Experimental support for write-combining mmaps
If we have a linear buffer, we can request the kernel mmap it directly with write-combining without having to pin it into the GTT. This allows us to efficiently upload very large buffers, and can avoid the dreaded aperture thrashing. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--configure.ac10
-rw-r--r--src/sna/kgem.c275
-rw-r--r--src/sna/kgem.h13
-rw-r--r--src/sna/sna.h3
-rw-r--r--src/sna/sna_accel.c68
-rw-r--r--src/sna/sna_display.c2
-rw-r--r--src/sna/sna_io.c21
7 files changed, 266 insertions, 126 deletions
diff --git a/configure.ac b/configure.ac
index 8b34c773..75fd3b6f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -760,6 +760,16 @@ if test "x$RENDERNODE" = "xyes"; then
xp_msg="$xp_msg rendernode"
fi
+AC_ARG_ENABLE(wc-mmap,
+ AS_HELP_STRING([--enable-wc-map],
+ [Enable use of WriteCombining mmaps [default=no]]),
+ [WC_MMAP="$enableval"],
+ [WC_MMAP="no"])
+if test "x$WC_MMAP" = "xyes"; then
+ AC_DEFINE(USE_WC_MMAP,1,[Enable use of WriteCombining mmaps])
+ xp_msg="$xp_msg mmap(wc)"
+fi
+
AC_ARG_ENABLE(create2,
AS_HELP_STRING([--enable-create2],
[Enable use of create2 ioctl (experimental) [default=no]]),
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 90474377..4f793014 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -83,6 +83,7 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define DBG_NO_FAST_RELOC 0
#define DBG_NO_HANDLE_LUT 0
#define DBG_NO_WT 0
+#define DBG_NO_WC_MMAP 0
#define DBG_DUMP 0
#define DBG_NO_MALLOC_CACHE 0
@@ -95,6 +96,11 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define SHOW_BATCH_BEFORE 0
#define SHOW_BATCH_AFTER 0
+#if !USE_WC_MMAP
+#undef DBG_NO_WC_MMAP
+#define DBG_NO_WC_MMAP 1
+#endif
+
#if 0
#define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__))
#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__))
@@ -127,12 +133,14 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define LOCAL_I915_PARAM_HAS_BLT 11
#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12
#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15
+#define LOCAL_I915_PARAM_HAS_LLC 17
#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20
#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23
#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24
#define LOCAL_I915_PARAM_HAS_NO_RELOC 25
#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26
#define LOCAL_I915_PARAM_HAS_WT 27
+#define LOCAL_I915_PARAM_MMAP_VERSION 29
#define LOCAL_I915_EXEC_IS_PINNED (1<<10)
#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
@@ -179,6 +187,17 @@ struct local_i915_gem_caching {
#define LOCAL_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_SET_CACHING, struct local_i915_gem_caching)
#define LOCAL_IOCTL_I915_GEM_GET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_GET_CACHING, struct local_i915_gem_caching)
+struct local_i915_gem_mmap2 {
+ uint32_t handle;
+ uint32_t pad;
+ uint64_t offset;
+ uint64_t size;
+ uint64_t addr_ptr;
+ uint64_t flags;
+#define I915_MMAP_WC 0x1
+};
+#define LOCAL_IOCTL_I915_GEM_MMAP_v2 DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct local_i915_gem_mmap2)
+
struct kgem_buffer {
struct kgem_bo base;
void *mem;
@@ -411,7 +430,7 @@ static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags)
static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
{
- struct drm_i915_gem_mmap_gtt mmap_arg;
+ struct drm_i915_gem_mmap_gtt gtt;
void *ptr;
int err;
@@ -419,12 +438,13 @@ static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
bo->handle, bytes(bo)));
assert(bo->proxy == NULL);
assert(!bo->snoop);
- assert(num_pages(bo) <= kgem->aperture_mappable / 4);
+ assert(num_pages(bo) <= kgem->aperture_mappable / 2);
+ assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
+ VG_CLEAR(gtt);
retry_gtt:
- VG_CLEAR(mmap_arg);
- mmap_arg.handle = bo->handle;
- if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg))) {
+ gtt.handle = bo->handle;
+ if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &gtt))) {
assert(err != EINVAL);
(void)__kgem_throttle_retire(kgem, 0);
@@ -441,7 +461,7 @@ retry_gtt:
retry_mmap:
ptr = mmap(0, bytes(bo), PROT_READ | PROT_WRITE, MAP_SHARED,
- kgem->fd, mmap_arg.offset);
+ kgem->fd, gtt.offset);
if (ptr == MAP_FAILED) {
err = errno;
assert(err != EINVAL);
@@ -457,7 +477,50 @@ retry_mmap:
ptr = NULL;
}
- return ptr;
+ /* Cache this mapping to avoid the overhead of an
+ * excruciatingly slow GTT pagefault. This is more an
+ * issue with compositing managers which need to
+ * frequently flush CPU damage to their GPU bo.
+ */
+ return bo->map__gtt = ptr;
+}
+
+static void *__kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo)
+{
+ struct local_i915_gem_mmap2 wc;
+ int err;
+
+ DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
+ bo->handle, bytes(bo)));
+ assert(bo->proxy == NULL);
+ assert(!bo->snoop);
+ assert(kgem->has_wc_mmap);
+
+ VG_CLEAR(wc);
+
+retry_wc:
+ wc.handle = bo->handle;
+ wc.offset = 0;
+ wc.size = bytes(bo);
+ wc.flags = I915_MMAP_WC;
+ if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc))) {
+ assert(err != EINVAL);
+
+ if (__kgem_throttle_retire(kgem, 0))
+ goto retry_wc;
+
+ if (kgem_cleanup_cache(kgem))
+ goto retry_wc;
+
+ ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU(wc) domain: %d\n",
+ __FUNCTION__, bo->handle, bytes(bo), -err));
+ return NULL;
+ }
+
+ VG(VALGRIND_MAKE_MEM_DEFINED(wc.addr_ptr, bytes(bo)));
+
+ DBG(("%s: caching CPU(wc) vma for %d\n", __FUNCTION__, bo->handle));
+ return bo->map__wc = (void *)(uintptr_t)wc.addr_ptr;
}
static int gem_write(int fd, uint32_t handle,
@@ -1029,9 +1092,7 @@ static bool test_has_llc(struct kgem *kgem)
if (DBG_NO_LLC)
return false;
-#if defined(I915_PARAM_HAS_LLC) /* Expected in libdrm-2.4.31 */
- has_llc = gem_param(kgem, I915_PARAM_HAS_LLC);
-#endif
+ has_llc = gem_param(kgem, LOCAL_I915_PARAM_HAS_LLC);
if (has_llc == -1) {
DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__));
has_llc = kgem->gen >= 060;
@@ -1040,6 +1101,28 @@ static bool test_has_llc(struct kgem *kgem)
return has_llc;
}
+static bool test_has_wc_mmap(struct kgem *kgem)
+{
+ struct local_i915_gem_mmap2 wc;
+ bool ret;
+
+ if (DBG_NO_WC_MMAP)
+ return false;
+
+ if (gem_param(kgem, LOCAL_I915_PARAM_MMAP_VERSION) < 1)
+ return false;
+
+ VG_CLEAR(wc);
+ wc.handle = gem_create(kgem->fd, 1);
+ wc.offset = 0;
+ wc.size = 4096;
+ wc.flags = I915_MMAP_WC;
+ ret = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc) == 0;
+ gem_close(kgem->fd, wc.handle);
+
+ return ret;
+}
+
static bool test_has_caching(struct kgem *kgem)
{
uint32_t handle;
@@ -1428,6 +1511,10 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
DBG(("%s: has write-through caching for scanouts? %d\n", __FUNCTION__,
kgem->has_wt));
+ kgem->has_wc_mmap = test_has_wc_mmap(kgem);
+ DBG(("%s: has wc-mmapping? %d\n", __FUNCTION__,
+ kgem->has_wc_mmap));
+
kgem->has_caching = test_has_caching(kgem);
DBG(("%s: has set-cache-level? %d\n", __FUNCTION__,
kgem->has_caching));
@@ -1975,17 +2062,23 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
DBG(("%s: releasing %p:%p vma for handle=%d, count=%d\n",
__FUNCTION__, bo->map__gtt, bo->map__cpu,
- bo->handle, list_is_empty(&bo->vma) ? 0 : kgem->vma[bo->map__gtt == NULL].count));
+ bo->handle, list_is_empty(&bo->vma) ? 0 : kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count));
if (!list_is_empty(&bo->vma)) {
_list_del(&bo->vma);
- kgem->vma[bo->map__gtt == NULL].count--;
+ kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count--;
}
if (bo->map__gtt)
- munmap(MAP(bo->map__gtt), bytes(bo));
- if (bo->map__cpu)
+ munmap(bo->map__gtt, bytes(bo));
+ if (bo->map__wc) {
+ VG(VALGRIND_MAKE_MEM_NOACCESS(bo->map__wc, bytes(bo)));
+ munmap(bo->map__wc, bytes(bo));
+ }
+ if (bo->map__cpu) {
+ VG(VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map__cpu), bytes(bo)));
munmap(MAP(bo->map__cpu), bytes(bo));
+ }
_list_del(&bo->list);
_list_del(&bo->request);
@@ -2021,25 +2114,24 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
if (bucket(bo) >= NUM_CACHE_BUCKETS) {
if (bo->map__gtt) {
- munmap(MAP(bo->map__gtt), bytes(bo));
+ munmap(bo->map__gtt, bytes(bo));
bo->map__gtt = NULL;
}
list_move(&bo->list, &kgem->large_inactive);
} else {
assert(bo->flush == false);
+ assert(list_is_empty(&bo->vma));
list_move(&bo->list, &kgem->inactive[bucket(bo)]);
- if (bo->map__gtt) {
- if (!kgem_bo_can_map(kgem, bo)) {
- munmap(MAP(bo->map__gtt), bytes(bo));
- bo->map__gtt = NULL;
- }
- if (bo->map__gtt) {
- list_add(&bo->vma, &kgem->vma[0].inactive[bucket(bo)]);
- kgem->vma[0].count++;
- }
+ if (bo->map__gtt && !kgem_bo_can_map(kgem, bo)) {
+ munmap(bo->map__gtt, bytes(bo));
+ bo->map__gtt = NULL;
}
- if (bo->map__cpu && !bo->map__gtt) {
+ if (bo->map__gtt || (bo->map__wc && !bo->tiling)) {
+ list_add(&bo->vma, &kgem->vma[0].inactive[bucket(bo)]);
+ kgem->vma[0].count++;
+ }
+ if (bo->map__cpu && list_is_empty(&bo->vma)) {
list_add(&bo->vma, &kgem->vma[1].inactive[bucket(bo)]);
kgem->vma[1].count++;
}
@@ -2087,9 +2179,9 @@ inline static void kgem_bo_remove_from_inactive(struct kgem *kgem,
assert(bo->rq == NULL);
assert(bo->exec == NULL);
if (!list_is_empty(&bo->vma)) {
- assert(bo->map__gtt || bo->map__cpu);
+ assert(bo->map__gtt || bo->map__wc || bo->map__cpu);
list_del(&bo->vma);
- kgem->vma[bo->map__gtt == NULL].count--;
+ kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count--;
}
}
@@ -2824,6 +2916,7 @@ static void kgem_commit(struct kgem *kgem)
assert(list_is_empty(&rq->buffers));
assert(rq->bo->map__gtt == NULL);
+ assert(rq->bo->map__wc == NULL);
assert(rq->bo->map__cpu == NULL);
gem_close(kgem->fd, rq->bo->handle);
kgem_cleanup_cache(kgem);
@@ -3985,7 +4078,7 @@ discard:
__FUNCTION__, for_cpu ? "cpu" : "gtt"));
cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)];
list_for_each_entry(bo, cache, vma) {
- assert(for_cpu ? bo->map__cpu : bo->map__gtt);
+ assert(for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc));
assert(bucket(bo) == cache_bucket(num_pages));
assert(bo->proxy == NULL);
assert(bo->rq == NULL);
@@ -4067,10 +4160,10 @@ discard:
bo->pitch = 0;
}
- if (bo->map__gtt || bo->map__cpu) {
+ if (bo->map__gtt || bo->map__wc || bo->map__cpu) {
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
- if (for_cpu ? bo->map__cpu : bo->map__gtt){
+ if (for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc)){
if (first != NULL)
break;
@@ -4477,7 +4570,7 @@ unsigned kgem_can_create_2d(struct kgem *kgem,
flags |= KGEM_CAN_CREATE_CPU;
if (size > 4096 && size <= kgem->max_gpu_size)
flags |= KGEM_CAN_CREATE_GPU;
- if (size <= PAGE_SIZE*kgem->aperture_mappable/4)
+ if (size <= PAGE_SIZE*kgem->aperture_mappable/4 || kgem->has_wc_mmap)
flags |= KGEM_CAN_CREATE_GTT;
if (size > kgem->large_object_size)
flags |= KGEM_CAN_CREATE_LARGE;
@@ -4903,7 +4996,7 @@ large_inactive:
assert(bucket(bo) == bucket);
assert(bo->refcnt == 0);
assert(!bo->scanout);
- assert(for_cpu ? bo->map__cpu : bo->map__gtt);
+ assert(for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc));
assert(bo->rq == NULL);
assert(bo->exec == NULL);
assert(list_is_empty(&bo->request));
@@ -5996,7 +6089,6 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
i = 0;
while (kgem->vma[type].count > 0) {
struct kgem_bo *bo = NULL;
- void **ptr;
for (j = 0;
bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive);
@@ -6011,12 +6103,23 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
DBG(("%s: discarding inactive %s vma cache for %d\n",
__FUNCTION__, type ? "CPU" : "GTT", bo->handle));
- ptr = type ? &bo->map__cpu : &bo->map__gtt;
assert(bo->rq == NULL);
+ if (type) {
+ VG(VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map__cpu), bytes(bo)));
+ munmap(MAP(bo->map__cpu), bytes(bo));
+ bo->map__cpu = NULL;
+ } else {
+ if (bo->map__wc) {
+ VG(VALGRIND_MAKE_MEM_NOACCESS(bo->map__wc, bytes(bo)));
+ munmap(bo->map__wc, bytes(bo));
+ bo->map__wc = NULL;
+ }
+ if (bo->map__gtt) {
+ munmap(bo->map__gtt, bytes(bo));
+ bo->map__gtt = NULL;
+ }
+ }
- VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(*ptr), bytes(bo)));
- munmap(MAP(*ptr), bytes(bo));
- *ptr = NULL;
list_del(&bo->vma);
kgem->vma[type].count--;
@@ -6028,10 +6131,28 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
}
}
-void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo)
+static void *__kgem_bo_map__gtt_or_wc(struct kgem *kgem, struct kgem_bo *bo)
{
void *ptr;
+ DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
+ kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
+
+ if (bo->tiling || !kgem->has_wc_mmap) {
+ ptr = bo->map__gtt;
+ if (ptr == NULL)
+ ptr = __kgem_bo_map__gtt(kgem, bo);
+ } else {
+ ptr = bo->map__wc;
+ if (ptr == NULL)
+ ptr = __kgem_bo_map__wc(kgem, bo);
+ }
+
+ return ptr;
+}
+
+void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo)
+{
DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
@@ -6046,26 +6167,7 @@ void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo)
return kgem_bo_map__cpu(kgem, bo);
}
- ptr = MAP(bo->map__gtt);
- if (ptr == NULL) {
- assert(num_pages(bo) <= kgem->aperture_mappable / 2);
-
- kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
-
- ptr = __kgem_bo_map__gtt(kgem, bo);
- if (ptr == NULL)
- return NULL;
-
- /* Cache this mapping to avoid the overhead of an
- * excruciatingly slow GTT pagefault. This is more an
- * issue with compositing managers which need to frequently
- * flush CPU damage to their GPU bo.
- */
- bo->map__gtt = ptr;
- DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
- }
-
- return ptr;
+ return __kgem_bo_map__gtt_or_wc(kgem, bo);
}
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
@@ -6091,25 +6193,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
return ptr;
}
- ptr = MAP(bo->map__gtt);
- if (ptr == NULL) {
- assert(num_pages(bo) <= kgem->aperture_mappable / 2);
- assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
-
- kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
-
- ptr = __kgem_bo_map__gtt(kgem, bo);
- if (ptr == NULL)
- return NULL;
-
- /* Cache this mapping to avoid the overhead of an
- * excruciatingly slow GTT pagefault. This is more an
- * issue with compositing managers which need to frequently
- * flush CPU damage to their GPU bo.
- */
- bo->map__gtt = ptr;
- DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
- }
+ ptr = __kgem_bo_map__gtt_or_wc(kgem, bo);
if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) {
struct drm_i915_gem_set_domain set_domain;
@@ -6137,8 +6221,6 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
{
- void *ptr;
-
DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
@@ -6148,26 +6230,24 @@ void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
assert_tiling(kgem, bo);
assert(!bo->purged || bo->reusable);
- ptr = MAP(bo->map__gtt);
- if (ptr == NULL) {
- assert(num_pages(bo) <= kgem->aperture_mappable / 4);
+ return __kgem_bo_map__gtt_or_wc(kgem, bo);
+}
- kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
+void *kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo)
+{
+ DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
+ bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
- ptr = __kgem_bo_map__gtt(kgem, bo);
- if (ptr == NULL)
- return NULL;
+ assert(bo->proxy == NULL);
+ assert(bo->exec == NULL);
+ assert(list_is_empty(&bo->list));
+ assert_tiling(kgem, bo);
+ assert(!bo->purged || bo->reusable);
- /* Cache this mapping to avoid the overhead of an
- * excruciatingly slow GTT pagefault. This is more an
- * issue with compositing managers which need to frequently
- * flush CPU damage to their GPU bo.
- */
- bo->map__gtt = ptr;
- DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
- }
+ if (bo->map__wc)
+ return bo->map__wc;
- return ptr;
+ return __kgem_bo_map__wc(kgem, bo);
}
void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo)
@@ -6512,6 +6592,7 @@ init_buffer_from_bo(struct kgem_buffer *bo, struct kgem_bo *old)
__FUNCTION__, old->handle));
assert(old->proxy == NULL);
+ assert(list_is_empty(&old->list));
memcpy(&bo->base, old, sizeof(*old));
if (old->rq)
@@ -6817,7 +6898,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
assert(alloc);
alloc /= PAGE_SIZE;
- if (alloc > kgem->aperture_mappable / 4)
+ if (alloc > kgem->aperture_mappable / 4 && !kgem->has_wc_mmap)
flags &= ~KGEM_BUFFER_INPLACE;
if (kgem->has_llc &&
@@ -7049,7 +7130,7 @@ init:
assert(!bo->need_io || !bo->base.needs_flush);
assert(!bo->need_io || bo->base.domain != DOMAIN_GPU);
assert(bo->mem);
- assert(bo->mmapped != MMAPPED_GTT || MAP(bo->base.map__gtt) == bo->mem);
+ assert(bo->mmapped != MMAPPED_GTT || bo->base.map__gtt == bo->mem || bo->base.map__wc == bo->mem);
assert(bo->mmapped != MMAPPED_CPU || MAP(bo->base.map__cpu) == bo->mem);
bo->used = size;
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 7ac81cba..6adae3bf 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -62,6 +62,7 @@ struct kgem_bo {
void *map__cpu;
void *map__gtt;
+ void *map__wc;
#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3))
struct kgem_bo_binding {
@@ -192,6 +193,7 @@ struct kgem {
uint32_t has_wt :1;
uint32_t has_no_reloc :1;
uint32_t has_handle_lut :1;
+ uint32_t has_wc_mmap :1;
uint32_t can_blt_cpu :1;
uint32_t can_render_y :1;
@@ -504,6 +506,7 @@ uint64_t kgem_add_reloc64(struct kgem *kgem,
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo);
+void *kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
@@ -715,13 +718,16 @@ static inline bool kgem_bo_mapped(struct kgem *kgem, struct kgem_bo *bo)
if (bo->tiling == I915_TILING_NONE && (bo->domain == DOMAIN_CPU || kgem->has_llc))
return bo->map__cpu != NULL;
+ if (bo->tiling == I915_TILING_NONE && bo->map__wc)
+ return true;
+
return bo->map__gtt != NULL;
}
static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo)
{
- DBG(("%s: handle=%d, map=%p:%p, tiling=%d, domain=%d, offset=%ld\n",
- __FUNCTION__, bo->handle, bo->map__gtt, bo->map__cpu, bo->tiling, bo->domain, (long)bo->presumed_offset));
+ DBG(("%s: handle=%d, map=%p:%p:%p, tiling=%d, domain=%d, offset=%ld\n",
+ __FUNCTION__, bo->handle, bo->map__gtt, bo->map__wc, bo->map__cpu, bo->tiling, bo->domain, (long)bo->presumed_offset));
if (!bo->tiling && (kgem->has_llc || bo->domain == DOMAIN_CPU))
return true;
@@ -734,6 +740,9 @@ static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo)
if (kgem->gen == 021 && bo->tiling == I915_TILING_Y)
return false;
+ if (!bo->tiling && kgem->has_wc_mmap)
+ return true;
+
return __kgem_bo_num_pages(bo) <= kgem->aperture_mappable / 4;
}
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 30b0292b..9d139ede 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -675,7 +675,8 @@ void sna_pixmap_destroy(PixmapPtr pixmap);
#define assert_pixmap_map(pixmap, priv) do { \
assert(priv->mapped != MAPPED_NONE || pixmap->devPrivate.ptr == PTR(priv->ptr)); \
- assert(priv->mapped == MAPPED_NONE || pixmap->devPrivate.ptr == (priv->mapped == MAPPED_CPU ? MAP(priv->gpu_bo->map__cpu) : MAP(priv->gpu_bo->map__gtt))); \
+ assert(priv->mapped != MAPPED_CPU || pixmap->devPrivate.ptr == MAP(priv->gpu_bo->map__cpu)); \
+ assert(priv->mapped != MAPPED_GTT || pixmap->devPrivate.ptr == priv->gpu_bo->map__gtt || pixmap->devPrivate.ptr == priv->gpu_bo->map__wc); \
} while (0)
static inline void sna_pixmap_unmap(PixmapPtr pixmap, struct sna_pixmap *priv)
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index cd0b9072..0ef56fb9 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4588,7 +4588,7 @@ can_create_upload_tiled_x(struct sna *sna,
if (sna->kgem.has_llc)
return true;
- if (sna_pixmap_default_tiling(sna, pixmap))
+ if (!sna->kgem.has_wc_mmap && sna_pixmap_default_tiling(sna, pixmap))
return false;
return true;
@@ -4608,13 +4608,21 @@ create_upload_tiled_x(struct sna *sna,
assert(priv->gpu_bo == NULL);
assert(priv->gpu_damage == NULL);
- create = CREATE_CPU_MAP | CREATE_INACTIVE;
- if (!sna->kgem.has_llc)
- create |= CREATE_CACHED;
+ if (sna->kgem.has_llc)
+ create = CREATE_CPU_MAP | CREATE_INACTIVE;
+ else if (sna->kgem.has_wc_mmap)
+ create = CREATE_GTT_MAP | CREATE_INACTIVE;
+ else
+ create = CREATE_CPU_MAP | CREATE_INACTIVE | CREATE_CACHED;
return sna_pixmap_alloc_gpu(sna, pixmap, priv, create);
}
+static bool can_upload__tiled_x(struct kgem *kgem, struct kgem_bo *bo)
+{
+ return kgem_bo_can_map__cpu(kgem, bo, true) || kgem->has_wc_mmap;
+}
+
static bool
try_upload__tiled_x(PixmapPtr pixmap, RegionRec *region,
int x, int y, int w, int h, char *bits, int stride)
@@ -4625,7 +4633,7 @@ try_upload__tiled_x(PixmapPtr pixmap, RegionRec *region,
uint8_t *dst;
int n;
- if (!kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, true)) {
+ if (!can_upload__tiled_x(&sna->kgem, priv->gpu_bo)) {
DBG(("%s: no, cannot map through the CPU\n", __FUNCTION__));
return false;
}
@@ -4638,11 +4646,19 @@ try_upload__tiled_x(PixmapPtr pixmap, RegionRec *region,
__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))
return false;
- dst = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo);
- if (dst == NULL)
- return false;
+ if (kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, true)) {
+ dst = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo);
+ if (dst == NULL)
+ return false;
- kgem_bo_sync__cpu(&sna->kgem, priv->gpu_bo);
+ kgem_bo_sync__cpu(&sna->kgem, priv->gpu_bo);
+ } else {
+ dst = kgem_bo_map__wc(&sna->kgem, priv->gpu_bo);
+ if (dst == NULL)
+ return false;
+
+ kgem_bo_sync__gtt(&sna->kgem, priv->gpu_bo);
+ }
box = region_rects(region);
n = region_num_rects(region);
@@ -4712,12 +4728,14 @@ try_upload__tiled_x(PixmapPtr pixmap, RegionRec *region,
} while (--n);
if (!priv->shm) {
- assert(dst == MAP(priv->gpu_bo->map__cpu));
pixmap->devPrivate.ptr = dst;
pixmap->devKind = priv->gpu_bo->pitch;
- priv->mapped = MAPPED_CPU;
+ if (dst == MAP(priv->gpu_bo->map__cpu)) {
+ priv->mapped = MAPPED_CPU;
+ priv->cpu = true;
+ } else
+ priv->mapped = MAPPED_GTT;
assert_pixmap_map(pixmap, priv);
- priv->cpu = true;
}
}
@@ -6129,7 +6147,7 @@ upload_inplace:
return false;
}
- if (!kgem_bo_can_map__cpu(&sna->kgem, dst_priv->gpu_bo, true) ||
+ if (!can_upload__tiled_x(&sna->kgem, dst_priv->gpu_bo) ||
__kgem_bo_is_busy(&sna->kgem, dst_priv->gpu_bo)) {
if (replaces && !dst_priv->pinned) {
unsigned create;
@@ -6156,7 +6174,7 @@ upload_inplace:
return false;
}
- if (!kgem_bo_can_map__cpu(&sna->kgem, dst_priv->gpu_bo, true)) {
+ if (!can_upload__tiled_x(&sna->kgem, dst_priv->gpu_bo)) {
DBG(("%s - no, cannot map dst for reads into the CPU\n", __FUNCTION__));
return false;
}
@@ -6169,13 +6187,23 @@ upload_inplace:
return false;
}
- ptr = kgem_bo_map__cpu(&sna->kgem, dst_priv->gpu_bo);
- if (ptr == NULL) {
- DBG(("%s - no, map failed\n", __FUNCTION__));
- return false;
- }
+ if (kgem_bo_can_map__cpu(&sna->kgem, dst_priv->gpu_bo, true)) {
+ ptr = kgem_bo_map__cpu(&sna->kgem, dst_priv->gpu_bo);
+ if (ptr == NULL) {
+ DBG(("%s - no, map failed\n", __FUNCTION__));
+ return false;
+ }
+
+ kgem_bo_sync__cpu(&sna->kgem, dst_priv->gpu_bo);
+ } else {
+ ptr = kgem_bo_map__wc(&sna->kgem, dst_priv->gpu_bo);
+ if (ptr == NULL) {
+ DBG(("%s - no, map failed\n", __FUNCTION__));
+ return false;
+ }
- kgem_bo_sync__cpu(&sna->kgem, dst_priv->gpu_bo);
+ kgem_bo_sync__gtt(&sna->kgem, dst_priv->gpu_bo);
+ }
if (!DAMAGE_IS_ALL(dst_priv->gpu_damage)) {
assert(!dst_priv->clear);
diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index 0889f9dc..f16a46cb 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -5032,7 +5032,7 @@ sna_cursor_pre_init(struct sna *sna)
#define DRM_CAP_CURSOR_WIDTH 8
#define DRM_CAP_CURSOR_HEIGHT 9
-#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
+#define I915_PARAM_HAS_COHERENT_PHYS_GTT 30
sna->cursor.max_size = 64;
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index 0181838d..d6aa1294 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -652,6 +652,9 @@ static bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo)
break;
}
+ if (kgem->has_wc_mmap)
+ return true;
+
return kgem_bo_can_map__cpu(kgem, bo, true);
}
@@ -663,14 +666,22 @@ write_boxes_inplace__tiled(struct kgem *kgem,
{
uint8_t *dst;
- assert(kgem_bo_can_map__cpu(kgem, bo, true));
+ assert(kgem->has_wc_mmap || kgem_bo_can_map__cpu(kgem, bo, true));
assert(bo->tiling != I915_TILING_Y);
- dst = kgem_bo_map__cpu(kgem, bo);
- if (dst == NULL)
- return false;
+ if (kgem_bo_can_map__cpu(kgem, bo, true)) {
+ dst = kgem_bo_map__cpu(kgem, bo);
+ if (dst == NULL)
+ return false;
- kgem_bo_sync__cpu(kgem, bo);
+ kgem_bo_sync__cpu(kgem, bo);
+ } else {
+ dst = kgem_bo_map__wc(kgem, bo);
+ if (dst == NULL)
+ return false;
+
+ kgem_bo_sync__gtt(kgem, bo);
+ }
if (sigtrap_get())
return false;