diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2012-01-08 08:58:41 +0000 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2012-01-08 17:34:48 +0000 |
commit | c037b4f542a7b21cbaecedec259da3589db10039 (patch) | |
tree | 8ec7790314ad2a0adfabb09d48074b810046a354 /src/sna | |
parent | 26042b2660d87044e1920a1267d9984c00c9566a (diff) |
sna: Tune cache size for cpu bo cache
This helps SNB on cairo-traces that utilize lots of temporary uploads
(rasterised sources and masks for instance), but comes at a cost of
regressing others...
In order to counter the regression from increasing the GTT cache size,
the CPU/GTT vma cache are split and accounted separately.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/sna')
-rw-r--r-- | src/sna/kgem.c | 435 | ||||
-rw-r--r-- | src/sna/kgem.h | 24 | ||||
-rw-r--r-- | src/sna/kgem_debug_gen3.c | 6 | ||||
-rw-r--r-- | src/sna/kgem_debug_gen4.c | 23 | ||||
-rw-r--r-- | src/sna/kgem_debug_gen5.c | 23 | ||||
-rw-r--r-- | src/sna/kgem_debug_gen6.c | 30 | ||||
-rw-r--r-- | src/sna/kgem_debug_gen7.c | 30 | ||||
-rw-r--r-- | src/sna/sna_accel.c | 2 |
8 files changed, 236 insertions, 337 deletions
diff --git a/src/sna/kgem.c b/src/sna/kgem.c index 0c1b2b1d..643771fd 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -94,7 +94,9 @@ static inline void list_replace(struct list *old, #endif #define PAGE_SIZE 4096 -#define MAX_VMA_CACHE 256 +#define MAX_GTT_VMA_CACHE 512 +#define MAX_CPU_VMA_CACHE INT16_MAX +#define MAP_PRESERVE_TIME 10 #define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1) #define CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) & ~1)) @@ -200,8 +202,8 @@ static void *gem_mmap(int fd, uint32_t handle, int size, int prot) } static int __gem_write(int fd, uint32_t handle, - int offset, int length, - const void *src) + int offset, int length, + const void *src) { struct drm_i915_gem_pwrite pwrite; @@ -371,7 +373,7 @@ kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo) madv.handle = bo->handle; madv.madv = I915_MADV_WILLNEED; if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) { - bo->purged = 0; + bo->purged = !madv.retained; return madv.retained; } @@ -388,14 +390,32 @@ static void gem_close(int fd, uint32_t handle) (void)drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close); } +static inline unsigned long __fls(unsigned long word) +{ + asm("bsr %1,%0" + : "=r" (word) + : "rm" (word)); + return word; +} + +constant inline static int cache_bucket(int size) +{ + uint32_t order = __fls(size / PAGE_SIZE); + assert(order < NUM_CACHE_BUCKETS); + return order; +} + static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo, int handle, int size) { + assert(size); memset(bo, 0, sizeof(*bo)); bo->refcnt = 1; bo->handle = handle; bo->size = size; + bo->bucket = cache_bucket(size); + assert(bo->size < 1 << (12 + bo->bucket + 1)); bo->reusable = true; bo->domain = DOMAIN_CPU; list_init(&bo->request); @@ -436,30 +456,14 @@ static struct kgem_request *__kgem_request_alloc(void) return rq; } -static inline unsigned long __fls(unsigned long word) -{ - asm("bsr %1,%0" - : "=r" (word) - : "rm" (word)); - return word; -} - -static struct list *inactive(struct kgem *kgem, - int size) +static struct list *inactive(struct kgem *kgem, int size) { - uint32_t order = __fls(size / PAGE_SIZE); - if (order >= ARRAY_SIZE(kgem->inactive)) - order = ARRAY_SIZE(kgem->inactive)-1; - return &kgem->inactive[order]; + return &kgem->inactive[cache_bucket(size)]; } -static struct list *active(struct kgem *kgem, - int size) +static struct list *active(struct kgem *kgem, int size) { - uint32_t order = __fls(size / PAGE_SIZE); - if (order >= ARRAY_SIZE(kgem->active)) - order = ARRAY_SIZE(kgem->active)-1; - return &kgem->active[order]; + return &kgem->active[cache_bucket(size)]; } static size_t @@ -508,7 +512,7 @@ static int gem_param(struct kgem *kgem, int name) void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) { struct drm_i915_gem_get_aperture aperture; - unsigned int i; + unsigned int i, j; memset(kgem, 0, sizeof(*kgem)); @@ -527,12 +531,16 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) list_init(&kgem->partial); list_init(&kgem->requests); list_init(&kgem->flushing); - list_init(&kgem->vma_cache); - list_init(&kgem->vma_inactive); for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) list_init(&kgem->inactive[i]); for (i = 0; i < ARRAY_SIZE(kgem->active); i++) list_init(&kgem->active[i]); + for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) { + for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++) + list_init(&kgem->vma[i].inactive[j]); + } + kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE; + kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE; kgem->next_request = __kgem_request_alloc(); @@ -572,6 +580,8 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) kgem->max_object_size = kgem->aperture_mappable / 2; if (kgem->max_object_size > kgem->aperture_low) kgem->max_object_size = kgem->aperture_low; + if (kgem->max_object_size > MAX_OBJECT_SIZE) + kgem->max_object_size = MAX_OBJECT_SIZE; DBG(("%s: max object size %d\n", __FUNCTION__, kgem->max_object_size)); kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2; @@ -765,6 +775,21 @@ static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo) } } +static void kgem_bo_release_map(struct kgem *kgem, struct kgem_bo *bo) +{ + int type = IS_CPU_MAP(bo->map); + + DBG(("%s: releasing %s vma for handle=%d, count=%d\n", + __FUNCTION__, type ? "CPU" : "GTT", + bo->handle, kgem->vma[type].count)); + + munmap(CPU_MAP(bo->map), bo->size); + bo->map = NULL; + + list_del(&bo->vma); + kgem->vma[type].count--; +} + static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo) { DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); @@ -773,14 +798,8 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo) kgem_bo_binding_free(kgem, bo); - if (bo->map) { - DBG(("%s: releasing %s vma for handle=%d, count=%d\n", - __FUNCTION__, IS_CPU_MAP(bo->map) ? "CPU" : "GTT", - bo->handle, kgem->vma_count-1)); - munmap(CPU_MAP(bo->map), bo->size); - list_del(&bo->vma); - kgem->vma_count--; - } + if (bo->map) + kgem_bo_release_map(kgem, bo); assert(list_is_empty(&bo->vma)); _list_del(&bo->list); @@ -799,6 +818,44 @@ static bool is_mmaped_buffer(struct kgem_partial_bo *bo) return bo->mem != bo+1; } +inline static void kgem_bo_move_to_inactive(struct kgem *kgem, + struct kgem_bo *bo) +{ + assert(!kgem_busy(kgem, bo->handle)); + assert(!bo->proxy); + assert(!bo->io); + + list_move(&bo->list, &kgem->inactive[bo->bucket]); + if (bo->map) { + int type = IS_CPU_MAP(bo->map); + list_move_tail(&bo->vma, &kgem->vma[type].inactive[bo->bucket]); + kgem->vma[type].count++; + } + + kgem->need_expire = true; +} + +inline static void kgem_bo_remove_from_inactive(struct kgem *kgem, + struct kgem_bo *bo) +{ + list_del(&bo->list); + assert(bo->rq == NULL); + if (bo->map) { + assert(!list_is_empty(&bo->vma)); + list_del(&bo->vma); + kgem->vma[IS_CPU_MAP(bo->map)].count--; + } +} + +inline static void kgem_bo_remove_from_active(struct kgem *kgem, + struct kgem_bo *bo) +{ + list_del(&bo->list); + if (bo->rq == &_kgem_static_request) + list_del(&bo->request); + assert(list_is_empty(&bo->vma)); +} + static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) { DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); @@ -812,12 +869,8 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) goto destroy; if (bo->io) { - struct kgem_partial_bo *io = (struct kgem_partial_bo *)bo; struct kgem_bo *base; - if (is_mmaped_buffer(io)) - kgem_bo_unmap__cpu(kgem, bo, io->mem); - base = malloc(sizeof(*base)); if (base) { DBG(("%s: transferring io handle=%d to bo\n", @@ -843,14 +896,15 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) assert(bo->vmap == false && bo->sync == false); bo->scanout = bo->flush = false; + assert(list_is_empty(&bo->vma)); if (bo->rq) { DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle)); - list_move(&bo->list, active(kgem, bo->size)); + list_move(&bo->list, &kgem->active[bo->bucket]); } else if (bo->needs_flush) { DBG(("%s: handle=%d -> flushing\n", __FUNCTION__, bo->handle)); assert(list_is_empty(&bo->request)); list_add(&bo->request, &kgem->flushing); - list_move(&bo->list, active(kgem, bo->size)); + list_move(&bo->list, &kgem->active[bo->bucket]); bo->rq = &_kgem_static_request; } else { assert(bo->exec == NULL); @@ -864,10 +918,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) } DBG(("%s: handle=%d -> inactive\n", __FUNCTION__, bo->handle)); - assert(!kgem_busy(kgem, bo->handle)); - list_move(&bo->list, inactive(kgem, bo->size)); - if (bo->map) - list_move(&bo->vma, &kgem->vma_inactive); + kgem_bo_move_to_inactive(kgem, bo); kgem->need_expire = true; } @@ -905,7 +956,7 @@ bool kgem_retire(struct kgem *kgem) bo->needs_flush = false; bo->domain = DOMAIN_NONE; bo->rq = NULL; - list_move(&bo->list, inactive(kgem, bo->size)); + kgem_bo_move_to_inactive(kgem, bo); list_del(&bo->request); } else kgem_bo_free(kgem, bo); @@ -948,8 +999,7 @@ bool kgem_retire(struct kgem *kgem) } else if(kgem_bo_set_purgeable(kgem, bo)) { DBG(("%s: moving %d to inactive\n", __FUNCTION__, bo->handle)); - list_move(&bo->list, - inactive(kgem, bo->size)); + kgem_bo_move_to_inactive(kgem, bo); retired = true; } else { DBG(("%s: closing %d\n", @@ -969,7 +1019,7 @@ bool kgem_retire(struct kgem *kgem) if (kgem_bo_set_purgeable(kgem, rq->bo)) { assert(rq->bo->rq == NULL); assert(list_is_empty(&rq->bo->request)); - list_move(&rq->bo->list, inactive(kgem, rq->bo->size)); + kgem_bo_move_to_inactive(kgem, rq->bo); retired = true; } else { kgem->need_purge = 1; @@ -1483,6 +1533,9 @@ bool kgem_expire_cache(struct kgem *kgem) idle = true; for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { + struct list preserve; + + list_init(&preserve); while (!list_is_empty(&kgem->inactive[i])) { bo = list_last_entry(&kgem->inactive[i], struct kgem_bo, list); @@ -1492,10 +1545,20 @@ bool kgem_expire_cache(struct kgem *kgem) break; } - count++; - size += bo->size; - - kgem_bo_free(kgem, bo); + if (bo->map && bo->delta + MAP_PRESERVE_TIME > expire) { + idle = false; + list_move_tail(&bo->list, &preserve); + } else { + count++; + size += bo->size; + kgem_bo_free(kgem, bo); + } + } + if (!list_is_empty(&preserve)) { + preserve.prev->next = kgem->inactive[i].next; + kgem->inactive[i].next->prev = preserve.prev; + kgem->inactive[i].next = preserve.next; + preserve.next->prev = &kgem->inactive[i]; } } @@ -1552,15 +1615,24 @@ search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags) bool use_active = (flags & CREATE_INACTIVE) == 0; struct list *cache; - if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { - int for_cpu = !!(flags & CREATE_CPU_MAP); - assert(for_cpu || use_active == false); - list_for_each_entry(bo, &kgem->vma_inactive, vma) { - if (IS_CPU_MAP(bo->map) != for_cpu) - continue; + if (!use_active && + list_is_empty(inactive(kgem, size)) && + !list_is_empty(active(kgem, size)) && + !kgem_retire(kgem)) + return NULL; - if (size > bo->size || 2*size < bo->size) + if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { + int for_cpu = !!(flags & CREATE_CPU_MAP); + cache = &kgem->vma[for_cpu].inactive[cache_bucket(size)]; + list_for_each_entry(bo, cache, vma) { + assert(IS_CPU_MAP(bo->map) == for_cpu); + assert(bo->bucket == cache_bucket(size)); + + if (size > bo->size) { + DBG(("inactive too small: %d < %d\n", + bo->size, size)); continue; + } if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { kgem->need_purge |= bo->domain == DOMAIN_GPU; @@ -1573,10 +1645,7 @@ search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags) I915_TILING_NONE, 0) != I915_TILING_NONE) continue; - list_del(&bo->list); - if (bo->rq == &_kgem_static_request) - list_del(&bo->request); - list_move_tail(&bo->vma, &kgem->vma_cache); + kgem_bo_remove_from_inactive(kgem, bo); bo->tiling = I915_TILING_NONE; bo->pitch = 0; @@ -1590,7 +1659,7 @@ search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags) } } - cache = use_active ? active(kgem, size): inactive(kgem, size); + cache = use_active ? active(kgem, size) : inactive(kgem, size); list_for_each_entry_safe(bo, next, cache, list) { assert(bo->refcnt == 0); assert(bo->reusable); @@ -1644,13 +1713,10 @@ search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags) continue; } - list_del(&bo->list); - if (bo->rq == &_kgem_static_request) - list_del(&bo->request); - if (bo->map) { - assert(!list_is_empty(&bo->vma)); - list_move_tail(&bo->vma, &kgem->vma_cache); - } + if (use_active) + kgem_bo_remove_from_active(kgem, bo); + else + kgem_bo_remove_from_inactive(kgem, bo); bo->tiling = I915_TILING_NONE; bo->pitch = 0; @@ -1673,22 +1739,14 @@ search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags) I915_TILING_NONE, 0) != I915_TILING_NONE) return NULL; - if (first->map) { - munmap(CPU_MAP(first->map), first->size); - first->map = NULL; - - list_del(&first->vma); - kgem->vma_count--; - } + if (first->map) + kgem_bo_release_map(kgem, first); } - list_del(&first->list); - if (first->rq == &_kgem_static_request) - list_del(&first->request); - if (first->map) { - assert(!list_is_empty(&first->vma)); - list_move_tail(&first->vma, &kgem->vma_cache); - } + if (use_active) + kgem_bo_remove_from_active(kgem, first); + else + kgem_bo_remove_from_inactive(kgem, first); first->tiling = I915_TILING_NONE; first->pitch = 0; @@ -1718,7 +1776,7 @@ struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name) return NULL; DBG(("%s: new handle=%d\n", __FUNCTION__, open_arg.handle)); - bo = __kgem_bo_alloc(open_arg.handle, 0); + bo = __kgem_bo_alloc(open_arg.handle, open_arg.size); if (bo == NULL) { gem_close(kgem->fd, open_arg.handle); return NULL; @@ -1740,14 +1798,6 @@ struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size) if (bo) return kgem_bo_reference(bo); - if (!list_is_empty(&kgem->requests)) { - if (kgem_retire(kgem)) { - bo = search_linear_cache(kgem, size, CREATE_INACTIVE); - if (bo) - return kgem_bo_reference(bo); - } - } - handle = gem_create(kgem->fd, size); if (handle == 0) return NULL; @@ -1877,11 +1927,11 @@ static bool _kgem_can_create_2d(struct kgem *kgem, size = kgem_surface_size(kgem, false, false, width, height, bpp, tiling, &pitch); - if (size == 0 || size > kgem->max_object_size) + if (size == 0 || size >= kgem->max_object_size) size = kgem_surface_size(kgem, false, false, width, height, bpp, I915_TILING_NONE, &pitch); - return size > 0 && size <= kgem->max_object_size; + return size > 0 && size < kgem->max_object_size; } #if DEBUG_KGEM @@ -1954,18 +2004,18 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, /* We presume that we will need to upload to this bo, * and so would prefer to have an active VMA. */ + cache = &kgem->vma[for_cpu].inactive[cache_bucket(size)]; do { - list_for_each_entry(bo, &kgem->vma_inactive, vma) { + list_for_each_entry(bo, cache, vma) { + assert(bo->bucket == cache_bucket(size)); assert(bo->refcnt == 0); assert(bo->map); + assert(IS_CPU_MAP(bo->map) == for_cpu); assert(bo->rq == NULL); assert(list_is_empty(&bo->request)); - if (IS_CPU_MAP(bo->map) != for_cpu) - continue; - - if (size > bo->size || 2*size < bo->size) { - DBG(("inactive vma too small/large: %d < %d\n", + if (size > bo->size) { + DBG(("inactive too small: %d < %d\n", bo->size, size)); continue; } @@ -1977,25 +2027,24 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, continue; } - bo->pitch = pitch; - list_del(&bo->list); - if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { kgem_bo_free(kgem, bo); break; } + bo->pitch = pitch; bo->delta = 0; bo->unique_id = kgem_get_unique_id(kgem); - list_move_tail(&bo->vma, &kgem->vma_cache); - assert(bo->pitch); + + kgem_bo_remove_from_inactive(kgem, bo); + DBG((" from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); assert(bo->reusable); assert(bo->domain != DOMAIN_GPU && !kgem_busy(kgem, bo->handle)); return kgem_bo_reference(bo); } - } while (!list_is_empty(&kgem->vma_cache) && kgem_retire(kgem)); + } while (!list_is_empty(cache) && kgem_retire(kgem)); } if (flags & CREATE_INACTIVE) @@ -2013,6 +2062,8 @@ search_active: /* Best active match first */ list_for_each_entry(bo, cache, list) { uint32_t s; + assert(bo->bucket == cache_bucket(size)); + if (bo->tiling) { if (bo->pitch < pitch) { DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", @@ -2031,10 +2082,6 @@ search_active: /* Best active match first */ continue; } - list_del(&bo->list); - if (bo->rq == &_kgem_static_request) - list_del(&bo->request); - if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { kgem->need_purge |= bo->domain == DOMAIN_GPU; kgem_bo_free(kgem, bo); @@ -2042,6 +2089,8 @@ search_active: /* Best active match first */ goto search_active; } + kgem_bo_remove_from_active(kgem, bo); + bo->unique_id = kgem_get_unique_id(kgem); bo->delta = 0; DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", @@ -2061,6 +2110,8 @@ search_active: /* Best active match first */ kgem->need_purge |= next->domain == DOMAIN_GPU; kgem_bo_free(kgem, next); } else { + kgem_bo_remove_from_active(kgem, next); + next->unique_id = kgem_get_unique_id(kgem); next->delta = 0; DBG((" 2:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", @@ -2075,6 +2126,8 @@ skip_active_search: /* Now just look for a close match and prefer any currently active */ cache = inactive(kgem, size); list_for_each_entry_safe(bo, next, cache, list) { + assert(bo->bucket == cache_bucket(size)); + if (size > bo->size) { DBG(("inactive too small: %d < %d\n", bo->size, size)); @@ -2090,29 +2143,20 @@ skip_active_search: continue; } - if (bo->map) { - munmap(CPU_MAP(bo->map), bo->size); - bo->map = NULL; - - list_del(&bo->vma); - kgem->vma_count--; - } + if (bo->map) + kgem_bo_release_map(kgem, bo); } - bo->pitch = pitch; - bo->tiling = tiling; - - list_del(&bo->list); - assert(list_is_empty(&bo->request)); - if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { kgem->need_purge |= bo->domain == DOMAIN_GPU; kgem_bo_free(kgem, bo); continue; } - if (bo->map) - list_move_tail(&bo->vma, &kgem->vma_cache); + kgem_bo_remove_from_inactive(kgem, bo); + + bo->pitch = pitch; + bo->tiling = tiling; bo->delta = 0; bo->unique_id = kgem_get_unique_id(kgem); @@ -2372,9 +2416,14 @@ uint32_t kgem_add_reloc(struct kgem *kgem, return delta; } -static void kgem_trim_vma_cache(struct kgem *kgem) +static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) { - if (kgem->vma_count > MAX_VMA_CACHE && kgem->need_purge) + int i, j; + + if (kgem->vma[type].count <= 0) + return; + + if (kgem->need_purge) kgem_purge_cache(kgem); /* vma are limited on a per-process basis to around 64k. @@ -2384,33 +2433,36 @@ static void kgem_trim_vma_cache(struct kgem *kgem) * start failing mappings, we keep our own number of open * vma to within a conservative value. */ - while (kgem->vma_count > MAX_VMA_CACHE) { - struct kgem_bo *old; - - if (list_is_empty(&kgem->vma_inactive)) { - old = list_first_entry(&kgem->vma_cache, - struct kgem_bo, - vma); - } else { - old = list_last_entry(&kgem->vma_inactive, - struct kgem_bo, - vma); + i = 0; + while (kgem->vma[type].count > 0) { + struct kgem_bo *bo = NULL; + + for (j = 0; + bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive); + j++) { + struct list *head = &kgem->vma[type].inactive[i++%ARRAY_SIZE(kgem->vma[type].inactive)]; + if (!list_is_empty(head)) + bo = list_first_entry(head, + struct kgem_bo, + vma); } - DBG(("%s: discarding %s %s vma cache for %d\n", + if (bo == NULL) + break; + + DBG(("%s: discarding inactive %s vma cache for %d\n", __FUNCTION__, - list_is_empty(&kgem->vma_inactive) ? "cached" : "inactive", - IS_CPU_MAP(old->map) ? "CPU" : "GTT", old->handle)); - assert(old->map); - munmap(CPU_MAP(old->map), old->size); - old->map = NULL; - list_del(&old->vma); - kgem->vma_count--; - - if (old->rq == NULL && old->refcnt == 0) { - DBG(("%s: discarding unused vma bo handle=%d\n", - __FUNCTION__, old->handle)); - kgem_bo_free(kgem, old); -} + IS_CPU_MAP(bo->map) ? "CPU" : "GTT", bo->handle)); + assert(IS_CPU_MAP(bo->map) == type); + assert(bo->map); + assert(bo->rq == NULL); + + munmap(CPU_MAP(bo->map), bo->size); + bo->map = NULL; + list_del(&bo->vma); + kgem->vma[type].count--; + + if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) + kgem_bo_free(kgem, bo); } } @@ -2421,18 +2473,12 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot) assert(bo->refcnt || bo->exec); /* allow for debugging purposes */ assert(!bo->purged); - if (IS_CPU_MAP(bo->map)) { - DBG(("%s: discarding CPU vma cache for %d\n", - __FUNCTION__, bo->handle)); - munmap(CPU_MAP(bo->map), bo->size); - bo->map = NULL; - list_del(&bo->vma); - kgem->vma_count--; - } + if (IS_CPU_MAP(bo->map)) + kgem_bo_release_map(kgem, bo); ptr = bo->map; if (ptr == NULL) { - kgem_trim_vma_cache(kgem); + kgem_trim_vma_cache(kgem, MAP_GTT, bo->bucket); ptr = gem_mmap(kgem->fd, bo->handle, bo->size, PROT_READ | PROT_WRITE); @@ -2445,10 +2491,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot) * flush CPU damage to their GPU bo. */ bo->map = ptr; - kgem->vma_count++; - - DBG(("%s: caching vma for %d, count=%d\n", - __FUNCTION__, bo->handle, kgem->vma_count)); + DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); } if (bo->domain != DOMAIN_GTT) { @@ -2473,8 +2516,6 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot) bo->domain = DOMAIN_GTT; } - list_move_tail(&bo->vma, &kgem->vma_cache); - return ptr; } @@ -2486,25 +2527,13 @@ void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) assert(bo->refcnt); assert(!bo->purged); - if (IS_CPU_MAP(bo->map)) { - void *ptr = CPU_MAP(bo->map); - list_del(&bo->vma); - kgem->vma_count--; - bo->map = NULL; - VG(VALGRIND_MALLOCLIKE_BLOCK(ptr, bo->size, 0, 1)); - return ptr; - } + if (IS_CPU_MAP(bo->map)) + return CPU_MAP(bo->map); - if (bo->map) { - DBG(("%s: discarding GTT vma cache for %d\n", - __FUNCTION__, bo->handle)); - munmap(CPU_MAP(bo->map), bo->size); - bo->map = NULL; - list_del(&bo->vma); - kgem->vma_count--; - } + if (bo->map) + kgem_bo_release_map(kgem, bo); - kgem_trim_vma_cache(kgem); + kgem_trim_vma_cache(kgem, MAP_CPU, bo->bucket); VG_CLEAR(mmap_arg); mmap_arg.handle = bo->handle; @@ -2515,38 +2544,11 @@ void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) return NULL; } - VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, bo->size, 0, 1)); + DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle)); + bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr); return (void *)(uintptr_t)mmap_arg.addr_ptr; } -void kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr) -{ - assert(bo->map == NULL); - assert(ptr != NULL); - - bo->map = MAKE_CPU_MAP(ptr); - list_move(&bo->vma, &kgem->vma_cache); - kgem->vma_count++; - - VG(VALGRIND_FREELIKE_BLOCK(ptr, 0)); -} - -void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo) -{ - if (bo->map == NULL) - return; - - DBG(("%s: (debug) releasing vma for handle=%d, count=%d\n", - __FUNCTION__, bo->handle, kgem->vma_count-1)); - assert(!IS_CPU_MAP(bo->map)); - - munmap(CPU_MAP(bo->map), bo->size); - bo->map = NULL; - - list_del(&bo->vma); - kgem->vma_count--; -} - uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo) { struct drm_gem_flink flink; @@ -2876,6 +2878,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, bo->mem = kgem_bo_map__cpu(kgem, &bo->base); if (bo->mem == NULL) { + bo->base.refcnt = 0; /* for valgrind */ kgem_bo_free(kgem, &bo->base); return NULL; } diff --git a/src/sna/kgem.h b/src/sna/kgem.h index 57ac6478..1bc0d9b4 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -63,8 +63,10 @@ struct kgem_bo { uint32_t refcnt; uint32_t handle; uint32_t presumed_offset; - uint32_t size; uint32_t delta; + uint32_t size:28; + uint32_t bucket:4; +#define MAX_OBJECT_SIZE (1 << 28) uint32_t pitch : 18; /* max 128k */ uint32_t tiling : 2; @@ -90,6 +92,14 @@ struct kgem_request { struct list buffers; }; +enum { + MAP_GTT = 0, + MAP_CPU, + NUM_MAP_TYPES, +}; + +#define NUM_CACHE_BUCKETS 16 + struct kgem { int fd; int wedged; @@ -105,20 +115,22 @@ struct kgem { KGEM_BLT, } mode, ring; - struct list flushing, active[16], inactive[16]; + struct list flushing, active[NUM_CACHE_BUCKETS], inactive[NUM_CACHE_BUCKETS]; struct list partial; struct list requests; - struct list vma_cache; - struct list vma_inactive; struct kgem_request *next_request; + struct { + struct list inactive[NUM_CACHE_BUCKETS]; + int16_t count; + } vma[NUM_MAP_TYPES]; + uint16_t nbatch; uint16_t surface; uint16_t nexec; uint16_t nreloc; uint16_t nfence; uint16_t max_batch_size; - uint16_t vma_count; uint32_t flush:1; uint32_t sync:1; @@ -332,10 +344,8 @@ uint32_t kgem_add_reloc(struct kgem *kgem, uint32_t delta); void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot); -void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo); void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo); void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo); -void kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr); uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo); Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, diff --git a/src/sna/kgem_debug_gen3.c b/src/sna/kgem_debug_gen3.c index 0238b734..213c69f6 100644 --- a/src/sna/kgem_debug_gen3.c +++ b/src/sna/kgem_debug_gen3.c @@ -101,9 +101,6 @@ static void gen3_update_vertex_buffer_addr(struct kgem *kgem, } ptr = (char *)base + kgem->reloc[i].delta; - if (state.vb.current) - kgem_bo_unmap(kgem, state.vb.current); - state.vb.current = bo; state.vb.base = base; state.vb.ptr = ptr; @@ -1612,8 +1609,5 @@ int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset) void kgem_gen3_finish_state(struct kgem *kgem) { - if (state.vb.current) - kgem_bo_unmap(kgem, state.vb.current); - memset(&state, 0, sizeof(state)); } diff --git a/src/sna/kgem_debug_gen4.c b/src/sna/kgem_debug_gen4.c index 0f91d29a..0004ecf5 100644 --- a/src/sna/kgem_debug_gen4.c +++ b/src/sna/kgem_debug_gen4.c @@ -89,8 +89,6 @@ static void gen4_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) ptr = (char *)base + kgem->reloc[i].delta; i = data[0] >> 27; - if (state.vb[i].current) - kgem_bo_unmap(kgem, state.vb[i].current); state.vb[i].current = bo; state.vb[i].base = base; @@ -415,13 +413,6 @@ get_reloc(struct kgem *kgem, return (char *)base + delta; } - -static void -put_reloc(struct kgem *kgem, struct reloc *r) -{ - if (r->bo != NULL) - kgem_bo_unmap(kgem, r->bo); -} #endif int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset) @@ -691,21 +682,7 @@ int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset) return len; } -static void finish_vertex_buffers(struct kgem *kgem) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(state.vb); i++) - if (state.vb[i].current) - kgem_bo_unmap(kgem, state.vb[i].current); -} - void kgem_gen4_finish_state(struct kgem *kgem) { - finish_vertex_buffers(kgem); - - if (state.dynamic_state.current) - kgem_bo_unmap(kgem, state.dynamic_state.base); - memset(&state, 0, sizeof(state)); } diff --git a/src/sna/kgem_debug_gen5.c b/src/sna/kgem_debug_gen5.c index c4f5df15..7912cc91 100644 --- a/src/sna/kgem_debug_gen5.c +++ b/src/sna/kgem_debug_gen5.c @@ -84,8 +84,6 @@ static void gen5_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) ptr = (char *)base + reloc->delta; i = data[0] >> 27; - if (state.vb[i].current) - kgem_bo_unmap(kgem, state.vb[i].current); state.vb[i].handle = reloc->target_handle; state.vb[i].current = bo; @@ -389,13 +387,6 @@ get_reloc(struct kgem *kgem, return (char *)base + delta; } - -static void -put_reloc(struct kgem *kgem, struct reloc *r) -{ - if (r->bo != NULL) - kgem_bo_umap(kgem, r->bo); -} #endif int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset) @@ -667,21 +658,7 @@ int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset) return len; } -static void finish_vertex_buffers(struct kgem *kgem) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(state.vb); i++) - if (state.vb[i].current) - kgem_bo_unmap(kgem, state.vb[i].current); -} - void kgem_gen5_finish_state(struct kgem *kgem) { - finish_vertex_buffers(kgem); - - if (state.dynamic_state.current) - kgem_bo_unmap(kgem,state. dynamic_state.current); - memset(&state, 0, sizeof(state)); } diff --git a/src/sna/kgem_debug_gen6.c b/src/sna/kgem_debug_gen6.c index 5bcd85dc..d23e2d93 100644 --- a/src/sna/kgem_debug_gen6.c +++ b/src/sna/kgem_debug_gen6.c @@ -88,8 +88,6 @@ static void gen6_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) ptr = (char *)base + kgem->reloc[i].delta; i = data[0] >> 26; - if (state.vb[i].current) - kgem_bo_unmap(kgem, state.vb[i].current); state.vb[i].current = bo; state.vb[i].base = base; @@ -129,9 +127,6 @@ static void gen6_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset) ptr = NULL; } - if (state.dynamic_state.current) - kgem_bo_unmap(kgem, state.dynamic_state.current); - state.dynamic_state.current = bo; state.dynamic_state.base = base; state.dynamic_state.ptr = ptr; @@ -300,22 +295,8 @@ static void primitive_out(struct kgem *kgem, uint32_t *data) } } -static void finish_vertex_buffers(struct kgem *kgem) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(state.vb); i++) - if (state.vb[i].current) - kgem_bo_unmap(kgem, state.vb[i].current); -} - static void finish_state(struct kgem *kgem) { - finish_vertex_buffers(kgem); - - if (state.dynamic_state.current) - kgem_bo_unmap(kgem, state.dynamic_state.base); - memset(&state, 0, sizeof(state)); } @@ -478,13 +459,6 @@ get_reloc(struct kgem *kgem, return (char *)base + (delta & ~3); } -static void -put_reloc(struct kgem *kgem, struct reloc *r) -{ - if (r->bo != NULL) - kgem_bo_unmap(kgem, r->bo); -} - static const char * gen6_filter_to_string(uint32_t filter) { @@ -539,8 +513,6 @@ gen6_decode_sampler_state(struct kgem *kgem, const uint32_t *reloc) ErrorF(" Sampler 1:\n"); ErrorF(" filter: min=%s, mag=%s\n", min, mag); ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap); - - put_reloc(kgem, &r); } static const char * @@ -604,8 +576,6 @@ gen6_decode_blend(struct kgem *kgem, const uint32_t *reloc) ErrorF(" Blend (%s): function %s, src=%s, dst=%s\n", blend->blend0.blend_enable ? "enabled" : "disabled", func, src, dst); - - put_reloc(kgem, &r); } int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset) diff --git a/src/sna/kgem_debug_gen7.c b/src/sna/kgem_debug_gen7.c index a33a918d..c13e96f2 100644 --- a/src/sna/kgem_debug_gen7.c +++ b/src/sna/kgem_debug_gen7.c @@ -88,8 +88,6 @@ static void gen7_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) ptr = (char *)base + kgem->reloc[i].delta; i = data[0] >> 26; - if (state.vb[i].current) - kgem_bo_unmap(kgem, state.vb[i].base); state.vb[i].current = bo; state.vb[i].base = base; @@ -129,9 +127,6 @@ static void gen7_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset) ptr = NULL; } - if (state.dynamic_state.current) - kgem_bo_unmap(kgem, state.dynamic_state.base); - state.dynamic_state.current = bo; state.dynamic_state.base = base; state.dynamic_state.ptr = ptr; @@ -300,22 +295,8 @@ static void primitive_out(struct kgem *kgem, uint32_t *data) } } -static void finish_vertex_buffers(struct kgem *kgem) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(state.vb); i++) - if (state.vb[i].current) - kgem_bo_unmap(kgem, state.vb[i].current); -} - static void finish_state(struct kgem *kgem) { - finish_vertex_buffers(kgem); - - if (state.dynamic_state.current) - kgem_bo_unmap(kgem, state.dynamic_state.base); - memset(&state, 0, sizeof(state)); } @@ -478,13 +459,6 @@ get_reloc(struct kgem *kgem, return (char *)base + (delta & ~3); } -static void -put_reloc(struct kgem *kgem, struct reloc *r) -{ - if (r->bo != NULL) - kgem_bo_unmap(kgem, r->bo); -} - static const char * gen7_filter_to_string(uint32_t filter) { @@ -539,8 +513,6 @@ gen7_decode_sampler_state(struct kgem *kgem, const uint32_t *reloc) ErrorF(" Sampler 1:\n"); ErrorF(" filter: min=%s, mag=%s\n", min, mag); ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap); - - put_reloc(kgem, &r); } static const char * @@ -604,8 +576,6 @@ gen7_decode_blend(struct kgem *kgem, const uint32_t *reloc) ErrorF(" Blend (%s): function %s, src=%s, dst=%s\n", blend->blend0.blend_enable ? "enabled" : "disabled", func, src, dst); - - put_reloc(kgem, &r); } int kgem_gen7_decode_3d(struct kgem *kgem, uint32_t offset) diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index 4e42c6db..6b69a6e9 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -238,9 +238,7 @@ static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv) DBG(("%s: discarding CPU buffer, handle=%d, size=%d\n", __FUNCTION__, priv->cpu_bo->handle, priv->cpu_bo->size)); - kgem_bo_unmap__cpu(&sna->kgem, priv->cpu_bo, priv->ptr); kgem_bo_destroy(&sna->kgem, priv->cpu_bo); - priv->cpu_bo = NULL; } else free(priv->ptr); |