diff options
-rw-r--r-- | src/sna/kgem.c | 90 | ||||
-rw-r--r-- | src/sna/kgem.h | 5 | ||||
-rw-r--r-- | src/sna/kgem_debug_gen3.c | 4 | ||||
-rw-r--r-- | src/sna/kgem_debug_gen4.c | 8 | ||||
-rw-r--r-- | src/sna/kgem_debug_gen5.c | 8 | ||||
-rw-r--r-- | src/sna/kgem_debug_gen6.c | 10 | ||||
-rw-r--r-- | src/sna/kgem_debug_gen7.c | 10 | ||||
-rw-r--r-- | src/sna/sna_accel.c | 9 | ||||
-rw-r--r-- | src/sna/sna_io.c | 5 | ||||
-rw-r--r-- | src/sna/sna_video.c | 1 |
10 files changed, 107 insertions, 43 deletions
diff --git a/src/sna/kgem.c b/src/sna/kgem.c index 68a1831b..3609a6f3 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -45,6 +45,12 @@ static inline void list_move(struct list *list, struct list *head) list_add(list, head); } +static inline void list_move_tail(struct list *list, struct list *head) +{ + __list_del(list->prev, list->next); + list_add_tail(list, head); +} + static inline void list_replace(struct list *old, struct list *new) { @@ -75,6 +81,7 @@ static inline void list_replace(struct list *old, #endif #define PAGE_SIZE 4096 +#define MAX_VMA_CACHE 128 struct kgem_partial_bo { struct kgem_bo base; @@ -125,7 +132,6 @@ static int gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) static void *gem_mmap(int fd, uint32_t handle, int size, int prot) { struct drm_i915_gem_mmap_gtt mmap_arg; - struct drm_i915_gem_set_domain set_domain; void *ptr; DBG(("%s(handle=%d, size=%d, prot=%s)\n", __FUNCTION__, @@ -144,12 +150,6 @@ static void *gem_mmap(int fd, uint32_t handle, int size, int prot) ptr = NULL; } - VG_CLEAR(set_domain); - set_domain.handle = handle; - set_domain.read_domains = I915_GEM_DOMAIN_GTT; - set_domain.write_domain = prot & PROT_WRITE ? I915_GEM_DOMAIN_GTT : 0; - drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); - return ptr; } @@ -274,6 +274,7 @@ static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo, bo->cpu_write = true; list_init(&bo->request); list_init(&bo->list); + list_init(&bo->vma); return bo; } @@ -352,6 +353,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) list_init(&kgem->partial); list_init(&kgem->requests); list_init(&kgem->flushing); + list_init(&kgem->vma_cache); for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) list_init(&kgem->inactive[i]); for (i = 0; i < ARRAY_SIZE(kgem->active); i++) @@ -594,6 +596,12 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo) b = next; } + if (bo->map) { + munmap(bo->map, bo->size); + list_del(&bo->vma); + kgem->vma_count--; + } + list_del(&bo->list); list_del(&bo->request); gem_close(kgem->fd, bo->handle); @@ -620,6 +628,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) base->reusable = true; list_init(&base->list); list_replace(&bo->request, &base->request); + list_replace(&bo->vma, &base->vma); free(bo); bo = base; } @@ -1814,19 +1823,76 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot) { void *ptr; - ptr = gem_mmap(kgem->fd, bo->handle, bo->size, prot); - if (ptr == NULL) - return NULL; + ptr = bo->map; + if (ptr == NULL) { + /* vma are limited on a per-process basis to around 64k. + * This includes all malloc arenas as well as other file + * mappings. In order to be fair and not hog the cache, + * and more importantly not to exhaust that limit and to + * start failing mappings, we keep our own number of open + * vma to within a conservative value. + */ + while (kgem->vma_count > MAX_VMA_CACHE) { + struct kgem_bo *old; + + old = list_first_entry(&kgem->vma_cache, + struct kgem_bo, + vma); + DBG(("%s: discarding vma cache for %d\n", + __FUNCTION__, old->handle)); + munmap(old->map, old->size); + old->map = NULL; + list_del(&old->vma); + kgem->vma_count--; + } + + ptr = gem_mmap(kgem->fd, bo->handle, bo->size, + PROT_READ | PROT_WRITE); + if (ptr == NULL) + return NULL; + + /* Cache this mapping to avoid the overhead of an + * excruciatingly slow GTT pagefault. This is more an + * issue with compositing managers which need to frequently + * flush CPU damage to their GPU bo. + */ + bo->map = ptr; + kgem->vma_count++; + + DBG(("%s: caching vma for %d\n", + __FUNCTION__, bo->handle)); + } + + if (bo->needs_flush | bo->gpu) { + struct drm_i915_gem_set_domain set_domain; + + VG_CLEAR(set_domain); + set_domain.handle = bo->handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = prot & PROT_WRITE ? I915_GEM_DOMAIN_GTT : 0; + drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); - if (prot & PROT_WRITE) { bo->needs_flush = false; if (bo->gpu) kgem_retire(kgem); } + list_move_tail(&bo->vma, &kgem->vma_cache); + return ptr; } +void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo) +{ + assert(bo->map); + + munmap(bo->map, bo->size); + bo->map = NULL; + + list_del(&bo->vma); + kgem->vma_count--; +} + uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo) { struct drm_gem_flink flink; @@ -2151,6 +2217,8 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, &bo->base.request); else list_init(&bo->base.request); + list_replace(&old->vma, + &bo->base.vma); free(old); bo->base.refcnt = 1; } else { diff --git a/src/sna/kgem.h b/src/sna/kgem.h index e9e7cdcb..0d85f643 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -47,7 +47,9 @@ struct kgem_bo { struct list list; struct list request; + struct list vma; + void *map; struct kgem_request *rq; struct drm_i915_gem_exec_object2 *exec; @@ -103,6 +105,7 @@ struct kgem { struct list flushing, active[16], inactive[16]; struct list partial; struct list requests; + struct list vma_cache; struct kgem_request *next_request; uint16_t nbatch; @@ -110,6 +113,7 @@ struct kgem { uint16_t nexec; uint16_t nreloc; uint16_t nfence; + uint16_t vma_count; uint32_t flush:1; uint32_t sync:1; @@ -314,6 +318,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem, uint32_t delta); void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot); +void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo); uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo); Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, diff --git a/src/sna/kgem_debug_gen3.c b/src/sna/kgem_debug_gen3.c index d152b608..0238b734 100644 --- a/src/sna/kgem_debug_gen3.c +++ b/src/sna/kgem_debug_gen3.c @@ -102,7 +102,7 @@ static void gen3_update_vertex_buffer_addr(struct kgem *kgem, ptr = (char *)base + kgem->reloc[i].delta; if (state.vb.current) - munmap(state.vb.base, state.vb.current->size); + kgem_bo_unmap(kgem, state.vb.current); state.vb.current = bo; state.vb.base = base; @@ -1613,7 +1613,7 @@ int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset) void kgem_gen3_finish_state(struct kgem *kgem) { if (state.vb.current) - munmap(state.vb.base, state.vb.current->size); + kgem_bo_unmap(kgem, state.vb.current); memset(&state, 0, sizeof(state)); } diff --git a/src/sna/kgem_debug_gen4.c b/src/sna/kgem_debug_gen4.c index d736cbd9..0f91d29a 100644 --- a/src/sna/kgem_debug_gen4.c +++ b/src/sna/kgem_debug_gen4.c @@ -90,7 +90,7 @@ static void gen4_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) i = data[0] >> 27; if (state.vb[i].current) - munmap(state.vb[i].base, state.vb[i].current->size); + kgem_bo_unmap(kgem, state.vb[i].current); state.vb[i].current = bo; state.vb[i].base = base; @@ -420,7 +420,7 @@ static void put_reloc(struct kgem *kgem, struct reloc *r) { if (r->bo != NULL) - munmap(r->base, r->bo->size); + kgem_bo_unmap(kgem, r->bo); } #endif @@ -697,7 +697,7 @@ static void finish_vertex_buffers(struct kgem *kgem) for (i = 0; i < ARRAY_SIZE(state.vb); i++) if (state.vb[i].current) - munmap(state.vb[i].base, state.vb[i].current->size); + kgem_bo_unmap(kgem, state.vb[i].current); } void kgem_gen4_finish_state(struct kgem *kgem) @@ -705,7 +705,7 @@ void kgem_gen4_finish_state(struct kgem *kgem) finish_vertex_buffers(kgem); if (state.dynamic_state.current) - munmap(state.dynamic_state.base, state.dynamic_state.current->size); + kgem_bo_unmap(kgem, state.dynamic_state.base); memset(&state, 0, sizeof(state)); } diff --git a/src/sna/kgem_debug_gen5.c b/src/sna/kgem_debug_gen5.c index 78ba4432..c4f5df15 100644 --- a/src/sna/kgem_debug_gen5.c +++ b/src/sna/kgem_debug_gen5.c @@ -85,7 +85,7 @@ static void gen5_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) i = data[0] >> 27; if (state.vb[i].current) - munmap(state.vb[i].base, state.vb[i].current->size); + kgem_bo_unmap(kgem, state.vb[i].current); state.vb[i].handle = reloc->target_handle; state.vb[i].current = bo; @@ -394,7 +394,7 @@ static void put_reloc(struct kgem *kgem, struct reloc *r) { if (r->bo != NULL) - munmap(r->base, r->bo->size); + kgem_bo_umap(kgem, r->bo); } #endif @@ -673,7 +673,7 @@ static void finish_vertex_buffers(struct kgem *kgem) for (i = 0; i < ARRAY_SIZE(state.vb); i++) if (state.vb[i].current) - munmap(state.vb[i].base, state.vb[i].current->size); + kgem_bo_unmap(kgem, state.vb[i].current); } void kgem_gen5_finish_state(struct kgem *kgem) @@ -681,7 +681,7 @@ void kgem_gen5_finish_state(struct kgem *kgem) finish_vertex_buffers(kgem); if (state.dynamic_state.current) - munmap(state.dynamic_state.base, state.dynamic_state.current->size); + kgem_bo_unmap(kgem,state. dynamic_state.current); memset(&state, 0, sizeof(state)); } diff --git a/src/sna/kgem_debug_gen6.c b/src/sna/kgem_debug_gen6.c index d441b536..5bcd85dc 100644 --- a/src/sna/kgem_debug_gen6.c +++ b/src/sna/kgem_debug_gen6.c @@ -89,7 +89,7 @@ static void gen6_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) i = data[0] >> 26; if (state.vb[i].current) - munmap(state.vb[i].base, state.vb[i].current->size); + kgem_bo_unmap(kgem, state.vb[i].current); state.vb[i].current = bo; state.vb[i].base = base; @@ -130,7 +130,7 @@ static void gen6_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset) } if (state.dynamic_state.current) - munmap(state.dynamic_state.base, state.dynamic_state.current->size); + kgem_bo_unmap(kgem, state.dynamic_state.current); state.dynamic_state.current = bo; state.dynamic_state.base = base; @@ -306,7 +306,7 @@ static void finish_vertex_buffers(struct kgem *kgem) for (i = 0; i < ARRAY_SIZE(state.vb); i++) if (state.vb[i].current) - munmap(state.vb[i].base, state.vb[i].current->size); + kgem_bo_unmap(kgem, state.vb[i].current); } static void finish_state(struct kgem *kgem) @@ -314,7 +314,7 @@ static void finish_state(struct kgem *kgem) finish_vertex_buffers(kgem); if (state.dynamic_state.current) - munmap(state.dynamic_state.base, state.dynamic_state.current->size); + kgem_bo_unmap(kgem, state.dynamic_state.base); memset(&state, 0, sizeof(state)); } @@ -482,7 +482,7 @@ static void put_reloc(struct kgem *kgem, struct reloc *r) { if (r->bo != NULL) - munmap(r->base, r->bo->size); + kgem_bo_unmap(kgem, r->bo); } static const char * diff --git a/src/sna/kgem_debug_gen7.c b/src/sna/kgem_debug_gen7.c index f6a49752..a33a918d 100644 --- a/src/sna/kgem_debug_gen7.c +++ b/src/sna/kgem_debug_gen7.c @@ -89,7 +89,7 @@ static void gen7_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) i = data[0] >> 26; if (state.vb[i].current) - munmap(state.vb[i].base, state.vb[i].current->size); + kgem_bo_unmap(kgem, state.vb[i].base); state.vb[i].current = bo; state.vb[i].base = base; @@ -130,7 +130,7 @@ static void gen7_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset) } if (state.dynamic_state.current) - munmap(state.dynamic_state.base, state.dynamic_state.current->size); + kgem_bo_unmap(kgem, state.dynamic_state.base); state.dynamic_state.current = bo; state.dynamic_state.base = base; @@ -306,7 +306,7 @@ static void finish_vertex_buffers(struct kgem *kgem) for (i = 0; i < ARRAY_SIZE(state.vb); i++) if (state.vb[i].current) - munmap(state.vb[i].base, state.vb[i].current->size); + kgem_bo_unmap(kgem, state.vb[i].current); } static void finish_state(struct kgem *kgem) @@ -314,7 +314,7 @@ static void finish_state(struct kgem *kgem) finish_vertex_buffers(kgem); if (state.dynamic_state.current) - munmap(state.dynamic_state.base, state.dynamic_state.current->size); + kgem_bo_unmap(kgem, state.dynamic_state.base); memset(&state, 0, sizeof(state)); } @@ -482,7 +482,7 @@ static void put_reloc(struct kgem *kgem, struct reloc *r) { if (r->bo != NULL) - munmap(r->base, r->bo->size); + kgem_bo_unmap(kgem, r->bo); } static const char * diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index bb52770b..44580be1 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -187,9 +187,6 @@ static Bool sna_destroy_private(PixmapPtr pixmap, struct sna_pixmap *priv) sna_damage_destroy(&priv->gpu_damage); sna_damage_destroy(&priv->cpu_damage); - if (priv->mapped) - munmap(pixmap->devPrivate.ptr, priv->gpu_bo->size); - /* Always release the gpu bo back to the lower levels of caching */ if (priv->gpu_bo) kgem_bo_destroy(&sna->kgem, priv->gpu_bo); @@ -1407,9 +1404,10 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, /* XXX performing the upload inplace is currently about 20x slower * for putimage10 on gen6 -- mostly due to slow page faulting in kernel. + * So we try again with vma caching and only for pixmaps who will be + * immediately flushed... */ -#if 0 - if (priv->gpu_bo->rq == NULL && + if (priv->flush && sna_put_image_upload_blt(drawable, gc, region, x, y, w, h, bits, stride)) { if (region_subsumes_drawable(region, &pixmap->drawable)) { @@ -1425,7 +1423,6 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, return true; } -#endif if (priv->cpu_bo) kgem_bo_sync(&sna->kgem, priv->cpu_bo, true); diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c index aba636cc..767824fa 100644 --- a/src/sna/sna_io.c +++ b/src/sna/sna_io.c @@ -80,8 +80,6 @@ static void read_boxes_inplace(struct kgem *kgem, box->x2 - box->x1, box->y2 - box->y1); box++; } while (--n); - - munmap(src, bo->size); } void sna_read_boxes(struct sna *sna, @@ -283,8 +281,6 @@ static void write_boxes_inplace(struct kgem *kgem, box->x2 - box->x1, box->y2 - box->y1); box++; } while (--n); - - munmap(dst, bo->size); } void sna_write_boxes(struct sna *sna, @@ -464,7 +460,6 @@ struct kgem_bo *sna_replace(struct sna *sna, 0, 0, pixmap->drawable.width, pixmap->drawable.height); - munmap(dst, bo->size); } } diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c index bd5ff14a..d6d56f40 100644 --- a/src/sna/sna_video.c +++ b/src/sna/sna_video.c @@ -481,7 +481,6 @@ sna_video_copy_data(struct sna *sna, else sna_copy_packed_data(video, frame, buf, dst); - munmap(dst, frame->bo->size); return TRUE; } |