summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/sna/kgem.c90
-rw-r--r--src/sna/kgem.h5
-rw-r--r--src/sna/kgem_debug_gen3.c4
-rw-r--r--src/sna/kgem_debug_gen4.c8
-rw-r--r--src/sna/kgem_debug_gen5.c8
-rw-r--r--src/sna/kgem_debug_gen6.c10
-rw-r--r--src/sna/kgem_debug_gen7.c10
-rw-r--r--src/sna/sna_accel.c9
-rw-r--r--src/sna/sna_io.c5
-rw-r--r--src/sna/sna_video.c1
10 files changed, 107 insertions, 43 deletions
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 68a1831b..3609a6f3 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -45,6 +45,12 @@ static inline void list_move(struct list *list, struct list *head)
list_add(list, head);
}
+static inline void list_move_tail(struct list *list, struct list *head)
+{
+ __list_del(list->prev, list->next);
+ list_add_tail(list, head);
+}
+
static inline void list_replace(struct list *old,
struct list *new)
{
@@ -75,6 +81,7 @@ static inline void list_replace(struct list *old,
#endif
#define PAGE_SIZE 4096
+#define MAX_VMA_CACHE 128
struct kgem_partial_bo {
struct kgem_bo base;
@@ -125,7 +132,6 @@ static int gem_set_tiling(int fd, uint32_t handle, int tiling, int stride)
static void *gem_mmap(int fd, uint32_t handle, int size, int prot)
{
struct drm_i915_gem_mmap_gtt mmap_arg;
- struct drm_i915_gem_set_domain set_domain;
void *ptr;
DBG(("%s(handle=%d, size=%d, prot=%s)\n", __FUNCTION__,
@@ -144,12 +150,6 @@ static void *gem_mmap(int fd, uint32_t handle, int size, int prot)
ptr = NULL;
}
- VG_CLEAR(set_domain);
- set_domain.handle = handle;
- set_domain.read_domains = I915_GEM_DOMAIN_GTT;
- set_domain.write_domain = prot & PROT_WRITE ? I915_GEM_DOMAIN_GTT : 0;
- drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
-
return ptr;
}
@@ -274,6 +274,7 @@ static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo,
bo->cpu_write = true;
list_init(&bo->request);
list_init(&bo->list);
+ list_init(&bo->vma);
return bo;
}
@@ -352,6 +353,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
list_init(&kgem->partial);
list_init(&kgem->requests);
list_init(&kgem->flushing);
+ list_init(&kgem->vma_cache);
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
list_init(&kgem->inactive[i]);
for (i = 0; i < ARRAY_SIZE(kgem->active); i++)
@@ -594,6 +596,12 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
b = next;
}
+ if (bo->map) {
+ munmap(bo->map, bo->size);
+ list_del(&bo->vma);
+ kgem->vma_count--;
+ }
+
list_del(&bo->list);
list_del(&bo->request);
gem_close(kgem->fd, bo->handle);
@@ -620,6 +628,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
base->reusable = true;
list_init(&base->list);
list_replace(&bo->request, &base->request);
+ list_replace(&bo->vma, &base->vma);
free(bo);
bo = base;
}
@@ -1814,19 +1823,76 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
{
void *ptr;
- ptr = gem_mmap(kgem->fd, bo->handle, bo->size, prot);
- if (ptr == NULL)
- return NULL;
+ ptr = bo->map;
+ if (ptr == NULL) {
+ /* vma are limited on a per-process basis to around 64k.
+ * This includes all malloc arenas as well as other file
+ * mappings. In order to be fair and not hog the cache,
+ * and more importantly not to exhaust that limit and to
+ * start failing mappings, we keep our own number of open
+ * vma to within a conservative value.
+ */
+ while (kgem->vma_count > MAX_VMA_CACHE) {
+ struct kgem_bo *old;
+
+ old = list_first_entry(&kgem->vma_cache,
+ struct kgem_bo,
+ vma);
+ DBG(("%s: discarding vma cache for %d\n",
+ __FUNCTION__, old->handle));
+ munmap(old->map, old->size);
+ old->map = NULL;
+ list_del(&old->vma);
+ kgem->vma_count--;
+ }
+
+ ptr = gem_mmap(kgem->fd, bo->handle, bo->size,
+ PROT_READ | PROT_WRITE);
+ if (ptr == NULL)
+ return NULL;
+
+ /* Cache this mapping to avoid the overhead of an
+ * excruciatingly slow GTT pagefault. This is more an
+ * issue with compositing managers which need to frequently
+ * flush CPU damage to their GPU bo.
+ */
+ bo->map = ptr;
+ kgem->vma_count++;
+
+ DBG(("%s: caching vma for %d\n",
+ __FUNCTION__, bo->handle));
+ }
+
+ if (bo->needs_flush | bo->gpu) {
+ struct drm_i915_gem_set_domain set_domain;
+
+ VG_CLEAR(set_domain);
+ set_domain.handle = bo->handle;
+ set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+ set_domain.write_domain = prot & PROT_WRITE ? I915_GEM_DOMAIN_GTT : 0;
+ drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
- if (prot & PROT_WRITE) {
bo->needs_flush = false;
if (bo->gpu)
kgem_retire(kgem);
}
+ list_move_tail(&bo->vma, &kgem->vma_cache);
+
return ptr;
}
+void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo)
+{
+ assert(bo->map);
+
+ munmap(bo->map, bo->size);
+ bo->map = NULL;
+
+ list_del(&bo->vma);
+ kgem->vma_count--;
+}
+
uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_gem_flink flink;
@@ -2151,6 +2217,8 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
&bo->base.request);
else
list_init(&bo->base.request);
+ list_replace(&old->vma,
+ &bo->base.vma);
free(old);
bo->base.refcnt = 1;
} else {
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index e9e7cdcb..0d85f643 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -47,7 +47,9 @@ struct kgem_bo {
struct list list;
struct list request;
+ struct list vma;
+ void *map;
struct kgem_request *rq;
struct drm_i915_gem_exec_object2 *exec;
@@ -103,6 +105,7 @@ struct kgem {
struct list flushing, active[16], inactive[16];
struct list partial;
struct list requests;
+ struct list vma_cache;
struct kgem_request *next_request;
uint16_t nbatch;
@@ -110,6 +113,7 @@ struct kgem {
uint16_t nexec;
uint16_t nreloc;
uint16_t nfence;
+ uint16_t vma_count;
uint32_t flush:1;
uint32_t sync:1;
@@ -314,6 +318,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
uint32_t delta);
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot);
+void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo);
uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo);
Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
diff --git a/src/sna/kgem_debug_gen3.c b/src/sna/kgem_debug_gen3.c
index d152b608..0238b734 100644
--- a/src/sna/kgem_debug_gen3.c
+++ b/src/sna/kgem_debug_gen3.c
@@ -102,7 +102,7 @@ static void gen3_update_vertex_buffer_addr(struct kgem *kgem,
ptr = (char *)base + kgem->reloc[i].delta;
if (state.vb.current)
- munmap(state.vb.base, state.vb.current->size);
+ kgem_bo_unmap(kgem, state.vb.current);
state.vb.current = bo;
state.vb.base = base;
@@ -1613,7 +1613,7 @@ int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset)
void kgem_gen3_finish_state(struct kgem *kgem)
{
if (state.vb.current)
- munmap(state.vb.base, state.vb.current->size);
+ kgem_bo_unmap(kgem, state.vb.current);
memset(&state, 0, sizeof(state));
}
diff --git a/src/sna/kgem_debug_gen4.c b/src/sna/kgem_debug_gen4.c
index d736cbd9..0f91d29a 100644
--- a/src/sna/kgem_debug_gen4.c
+++ b/src/sna/kgem_debug_gen4.c
@@ -90,7 +90,7 @@ static void gen4_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
i = data[0] >> 27;
if (state.vb[i].current)
- munmap(state.vb[i].base, state.vb[i].current->size);
+ kgem_bo_unmap(kgem, state.vb[i].current);
state.vb[i].current = bo;
state.vb[i].base = base;
@@ -420,7 +420,7 @@ static void
put_reloc(struct kgem *kgem, struct reloc *r)
{
if (r->bo != NULL)
- munmap(r->base, r->bo->size);
+ kgem_bo_unmap(kgem, r->bo);
}
#endif
@@ -697,7 +697,7 @@ static void finish_vertex_buffers(struct kgem *kgem)
for (i = 0; i < ARRAY_SIZE(state.vb); i++)
if (state.vb[i].current)
- munmap(state.vb[i].base, state.vb[i].current->size);
+ kgem_bo_unmap(kgem, state.vb[i].current);
}
void kgem_gen4_finish_state(struct kgem *kgem)
@@ -705,7 +705,7 @@ void kgem_gen4_finish_state(struct kgem *kgem)
finish_vertex_buffers(kgem);
if (state.dynamic_state.current)
- munmap(state.dynamic_state.base, state.dynamic_state.current->size);
+ kgem_bo_unmap(kgem, state.dynamic_state.base);
memset(&state, 0, sizeof(state));
}
diff --git a/src/sna/kgem_debug_gen5.c b/src/sna/kgem_debug_gen5.c
index 78ba4432..c4f5df15 100644
--- a/src/sna/kgem_debug_gen5.c
+++ b/src/sna/kgem_debug_gen5.c
@@ -85,7 +85,7 @@ static void gen5_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
i = data[0] >> 27;
if (state.vb[i].current)
- munmap(state.vb[i].base, state.vb[i].current->size);
+ kgem_bo_unmap(kgem, state.vb[i].current);
state.vb[i].handle = reloc->target_handle;
state.vb[i].current = bo;
@@ -394,7 +394,7 @@ static void
put_reloc(struct kgem *kgem, struct reloc *r)
{
if (r->bo != NULL)
- munmap(r->base, r->bo->size);
+ kgem_bo_umap(kgem, r->bo);
}
#endif
@@ -673,7 +673,7 @@ static void finish_vertex_buffers(struct kgem *kgem)
for (i = 0; i < ARRAY_SIZE(state.vb); i++)
if (state.vb[i].current)
- munmap(state.vb[i].base, state.vb[i].current->size);
+ kgem_bo_unmap(kgem, state.vb[i].current);
}
void kgem_gen5_finish_state(struct kgem *kgem)
@@ -681,7 +681,7 @@ void kgem_gen5_finish_state(struct kgem *kgem)
finish_vertex_buffers(kgem);
if (state.dynamic_state.current)
- munmap(state.dynamic_state.base, state.dynamic_state.current->size);
+ kgem_bo_unmap(kgem,state. dynamic_state.current);
memset(&state, 0, sizeof(state));
}
diff --git a/src/sna/kgem_debug_gen6.c b/src/sna/kgem_debug_gen6.c
index d441b536..5bcd85dc 100644
--- a/src/sna/kgem_debug_gen6.c
+++ b/src/sna/kgem_debug_gen6.c
@@ -89,7 +89,7 @@ static void gen6_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
i = data[0] >> 26;
if (state.vb[i].current)
- munmap(state.vb[i].base, state.vb[i].current->size);
+ kgem_bo_unmap(kgem, state.vb[i].current);
state.vb[i].current = bo;
state.vb[i].base = base;
@@ -130,7 +130,7 @@ static void gen6_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset)
}
if (state.dynamic_state.current)
- munmap(state.dynamic_state.base, state.dynamic_state.current->size);
+ kgem_bo_unmap(kgem, state.dynamic_state.current);
state.dynamic_state.current = bo;
state.dynamic_state.base = base;
@@ -306,7 +306,7 @@ static void finish_vertex_buffers(struct kgem *kgem)
for (i = 0; i < ARRAY_SIZE(state.vb); i++)
if (state.vb[i].current)
- munmap(state.vb[i].base, state.vb[i].current->size);
+ kgem_bo_unmap(kgem, state.vb[i].current);
}
static void finish_state(struct kgem *kgem)
@@ -314,7 +314,7 @@ static void finish_state(struct kgem *kgem)
finish_vertex_buffers(kgem);
if (state.dynamic_state.current)
- munmap(state.dynamic_state.base, state.dynamic_state.current->size);
+ kgem_bo_unmap(kgem, state.dynamic_state.base);
memset(&state, 0, sizeof(state));
}
@@ -482,7 +482,7 @@ static void
put_reloc(struct kgem *kgem, struct reloc *r)
{
if (r->bo != NULL)
- munmap(r->base, r->bo->size);
+ kgem_bo_unmap(kgem, r->bo);
}
static const char *
diff --git a/src/sna/kgem_debug_gen7.c b/src/sna/kgem_debug_gen7.c
index f6a49752..a33a918d 100644
--- a/src/sna/kgem_debug_gen7.c
+++ b/src/sna/kgem_debug_gen7.c
@@ -89,7 +89,7 @@ static void gen7_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
i = data[0] >> 26;
if (state.vb[i].current)
- munmap(state.vb[i].base, state.vb[i].current->size);
+ kgem_bo_unmap(kgem, state.vb[i].base);
state.vb[i].current = bo;
state.vb[i].base = base;
@@ -130,7 +130,7 @@ static void gen7_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset)
}
if (state.dynamic_state.current)
- munmap(state.dynamic_state.base, state.dynamic_state.current->size);
+ kgem_bo_unmap(kgem, state.dynamic_state.base);
state.dynamic_state.current = bo;
state.dynamic_state.base = base;
@@ -306,7 +306,7 @@ static void finish_vertex_buffers(struct kgem *kgem)
for (i = 0; i < ARRAY_SIZE(state.vb); i++)
if (state.vb[i].current)
- munmap(state.vb[i].base, state.vb[i].current->size);
+ kgem_bo_unmap(kgem, state.vb[i].current);
}
static void finish_state(struct kgem *kgem)
@@ -314,7 +314,7 @@ static void finish_state(struct kgem *kgem)
finish_vertex_buffers(kgem);
if (state.dynamic_state.current)
- munmap(state.dynamic_state.base, state.dynamic_state.current->size);
+ kgem_bo_unmap(kgem, state.dynamic_state.base);
memset(&state, 0, sizeof(state));
}
@@ -482,7 +482,7 @@ static void
put_reloc(struct kgem *kgem, struct reloc *r)
{
if (r->bo != NULL)
- munmap(r->base, r->bo->size);
+ kgem_bo_unmap(kgem, r->bo);
}
static const char *
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index bb52770b..44580be1 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -187,9 +187,6 @@ static Bool sna_destroy_private(PixmapPtr pixmap, struct sna_pixmap *priv)
sna_damage_destroy(&priv->gpu_damage);
sna_damage_destroy(&priv->cpu_damage);
- if (priv->mapped)
- munmap(pixmap->devPrivate.ptr, priv->gpu_bo->size);
-
/* Always release the gpu bo back to the lower levels of caching */
if (priv->gpu_bo)
kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
@@ -1407,9 +1404,10 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
/* XXX performing the upload inplace is currently about 20x slower
* for putimage10 on gen6 -- mostly due to slow page faulting in kernel.
+ * So we try again with vma caching and only for pixmaps who will be
+ * immediately flushed...
*/
-#if 0
- if (priv->gpu_bo->rq == NULL &&
+ if (priv->flush &&
sna_put_image_upload_blt(drawable, gc, region,
x, y, w, h, bits, stride)) {
if (region_subsumes_drawable(region, &pixmap->drawable)) {
@@ -1425,7 +1423,6 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
return true;
}
-#endif
if (priv->cpu_bo)
kgem_bo_sync(&sna->kgem, priv->cpu_bo, true);
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index aba636cc..767824fa 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -80,8 +80,6 @@ static void read_boxes_inplace(struct kgem *kgem,
box->x2 - box->x1, box->y2 - box->y1);
box++;
} while (--n);
-
- munmap(src, bo->size);
}
void sna_read_boxes(struct sna *sna,
@@ -283,8 +281,6 @@ static void write_boxes_inplace(struct kgem *kgem,
box->x2 - box->x1, box->y2 - box->y1);
box++;
} while (--n);
-
- munmap(dst, bo->size);
}
void sna_write_boxes(struct sna *sna,
@@ -464,7 +460,6 @@ struct kgem_bo *sna_replace(struct sna *sna,
0, 0,
pixmap->drawable.width,
pixmap->drawable.height);
- munmap(dst, bo->size);
}
}
diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
index bd5ff14a..d6d56f40 100644
--- a/src/sna/sna_video.c
+++ b/src/sna/sna_video.c
@@ -481,7 +481,6 @@ sna_video_copy_data(struct sna *sna,
else
sna_copy_packed_data(video, frame, buf, dst);
- munmap(dst, frame->bo->size);
return TRUE;
}