summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-01-08 08:58:41 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2012-01-08 17:34:48 +0000
commitc037b4f542a7b21cbaecedec259da3589db10039 (patch)
tree8ec7790314ad2a0adfabb09d48074b810046a354 /src
parent26042b2660d87044e1920a1267d9984c00c9566a (diff)
sna: Tune cache size for cpu bo cache
This helps SNB on cairo-traces that utilize lots of temporary uploads (rasterised sources and masks for instance), but comes at a cost of regressing others... In order to counter the regression from increasing the GTT cache size, the CPU/GTT vma cache are split and accounted separately. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src')
-rw-r--r--src/sna/kgem.c435
-rw-r--r--src/sna/kgem.h24
-rw-r--r--src/sna/kgem_debug_gen3.c6
-rw-r--r--src/sna/kgem_debug_gen4.c23
-rw-r--r--src/sna/kgem_debug_gen5.c23
-rw-r--r--src/sna/kgem_debug_gen6.c30
-rw-r--r--src/sna/kgem_debug_gen7.c30
-rw-r--r--src/sna/sna_accel.c2
8 files changed, 236 insertions, 337 deletions
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 0c1b2b1d..643771fd 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -94,7 +94,9 @@ static inline void list_replace(struct list *old,
#endif
#define PAGE_SIZE 4096
-#define MAX_VMA_CACHE 256
+#define MAX_GTT_VMA_CACHE 512
+#define MAX_CPU_VMA_CACHE INT16_MAX
+#define MAP_PRESERVE_TIME 10
#define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1)
#define CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) & ~1))
@@ -200,8 +202,8 @@ static void *gem_mmap(int fd, uint32_t handle, int size, int prot)
}
static int __gem_write(int fd, uint32_t handle,
- int offset, int length,
- const void *src)
+ int offset, int length,
+ const void *src)
{
struct drm_i915_gem_pwrite pwrite;
@@ -371,7 +373,7 @@ kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo)
madv.handle = bo->handle;
madv.madv = I915_MADV_WILLNEED;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
- bo->purged = 0;
+ bo->purged = !madv.retained;
return madv.retained;
}
@@ -388,14 +390,32 @@ static void gem_close(int fd, uint32_t handle)
(void)drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
}
+static inline unsigned long __fls(unsigned long word)
+{
+ asm("bsr %1,%0"
+ : "=r" (word)
+ : "rm" (word));
+ return word;
+}
+
+constant inline static int cache_bucket(int size)
+{
+ uint32_t order = __fls(size / PAGE_SIZE);
+ assert(order < NUM_CACHE_BUCKETS);
+ return order;
+}
+
static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo,
int handle, int size)
{
+ assert(size);
memset(bo, 0, sizeof(*bo));
bo->refcnt = 1;
bo->handle = handle;
bo->size = size;
+ bo->bucket = cache_bucket(size);
+ assert(bo->size < 1 << (12 + bo->bucket + 1));
bo->reusable = true;
bo->domain = DOMAIN_CPU;
list_init(&bo->request);
@@ -436,30 +456,14 @@ static struct kgem_request *__kgem_request_alloc(void)
return rq;
}
-static inline unsigned long __fls(unsigned long word)
-{
- asm("bsr %1,%0"
- : "=r" (word)
- : "rm" (word));
- return word;
-}
-
-static struct list *inactive(struct kgem *kgem,
- int size)
+static struct list *inactive(struct kgem *kgem, int size)
{
- uint32_t order = __fls(size / PAGE_SIZE);
- if (order >= ARRAY_SIZE(kgem->inactive))
- order = ARRAY_SIZE(kgem->inactive)-1;
- return &kgem->inactive[order];
+ return &kgem->inactive[cache_bucket(size)];
}
-static struct list *active(struct kgem *kgem,
- int size)
+static struct list *active(struct kgem *kgem, int size)
{
- uint32_t order = __fls(size / PAGE_SIZE);
- if (order >= ARRAY_SIZE(kgem->active))
- order = ARRAY_SIZE(kgem->active)-1;
- return &kgem->active[order];
+ return &kgem->active[cache_bucket(size)];
}
static size_t
@@ -508,7 +512,7 @@ static int gem_param(struct kgem *kgem, int name)
void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
{
struct drm_i915_gem_get_aperture aperture;
- unsigned int i;
+ unsigned int i, j;
memset(kgem, 0, sizeof(*kgem));
@@ -527,12 +531,16 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
list_init(&kgem->partial);
list_init(&kgem->requests);
list_init(&kgem->flushing);
- list_init(&kgem->vma_cache);
- list_init(&kgem->vma_inactive);
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
list_init(&kgem->inactive[i]);
for (i = 0; i < ARRAY_SIZE(kgem->active); i++)
list_init(&kgem->active[i]);
+ for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) {
+ for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++)
+ list_init(&kgem->vma[i].inactive[j]);
+ }
+ kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE;
+ kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE;
kgem->next_request = __kgem_request_alloc();
@@ -572,6 +580,8 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
kgem->max_object_size = kgem->aperture_mappable / 2;
if (kgem->max_object_size > kgem->aperture_low)
kgem->max_object_size = kgem->aperture_low;
+ if (kgem->max_object_size > MAX_OBJECT_SIZE)
+ kgem->max_object_size = MAX_OBJECT_SIZE;
DBG(("%s: max object size %d\n", __FUNCTION__, kgem->max_object_size));
kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2;
@@ -765,6 +775,21 @@ static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo)
}
}
+static void kgem_bo_release_map(struct kgem *kgem, struct kgem_bo *bo)
+{
+ int type = IS_CPU_MAP(bo->map);
+
+ DBG(("%s: releasing %s vma for handle=%d, count=%d\n",
+ __FUNCTION__, type ? "CPU" : "GTT",
+ bo->handle, kgem->vma[type].count));
+
+ munmap(CPU_MAP(bo->map), bo->size);
+ bo->map = NULL;
+
+ list_del(&bo->vma);
+ kgem->vma[type].count--;
+}
+
static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
@@ -773,14 +798,8 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
kgem_bo_binding_free(kgem, bo);
- if (bo->map) {
- DBG(("%s: releasing %s vma for handle=%d, count=%d\n",
- __FUNCTION__, IS_CPU_MAP(bo->map) ? "CPU" : "GTT",
- bo->handle, kgem->vma_count-1));
- munmap(CPU_MAP(bo->map), bo->size);
- list_del(&bo->vma);
- kgem->vma_count--;
- }
+ if (bo->map)
+ kgem_bo_release_map(kgem, bo);
assert(list_is_empty(&bo->vma));
_list_del(&bo->list);
@@ -799,6 +818,44 @@ static bool is_mmaped_buffer(struct kgem_partial_bo *bo)
return bo->mem != bo+1;
}
+inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
+ struct kgem_bo *bo)
+{
+ assert(!kgem_busy(kgem, bo->handle));
+ assert(!bo->proxy);
+ assert(!bo->io);
+
+ list_move(&bo->list, &kgem->inactive[bo->bucket]);
+ if (bo->map) {
+ int type = IS_CPU_MAP(bo->map);
+ list_move_tail(&bo->vma, &kgem->vma[type].inactive[bo->bucket]);
+ kgem->vma[type].count++;
+ }
+
+ kgem->need_expire = true;
+}
+
+inline static void kgem_bo_remove_from_inactive(struct kgem *kgem,
+ struct kgem_bo *bo)
+{
+ list_del(&bo->list);
+ assert(bo->rq == NULL);
+ if (bo->map) {
+ assert(!list_is_empty(&bo->vma));
+ list_del(&bo->vma);
+ kgem->vma[IS_CPU_MAP(bo->map)].count--;
+ }
+}
+
+inline static void kgem_bo_remove_from_active(struct kgem *kgem,
+ struct kgem_bo *bo)
+{
+ list_del(&bo->list);
+ if (bo->rq == &_kgem_static_request)
+ list_del(&bo->request);
+ assert(list_is_empty(&bo->vma));
+}
+
static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
@@ -812,12 +869,8 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
goto destroy;
if (bo->io) {
- struct kgem_partial_bo *io = (struct kgem_partial_bo *)bo;
struct kgem_bo *base;
- if (is_mmaped_buffer(io))
- kgem_bo_unmap__cpu(kgem, bo, io->mem);
-
base = malloc(sizeof(*base));
if (base) {
DBG(("%s: transferring io handle=%d to bo\n",
@@ -843,14 +896,15 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
assert(bo->vmap == false && bo->sync == false);
bo->scanout = bo->flush = false;
+ assert(list_is_empty(&bo->vma));
if (bo->rq) {
DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle));
- list_move(&bo->list, active(kgem, bo->size));
+ list_move(&bo->list, &kgem->active[bo->bucket]);
} else if (bo->needs_flush) {
DBG(("%s: handle=%d -> flushing\n", __FUNCTION__, bo->handle));
assert(list_is_empty(&bo->request));
list_add(&bo->request, &kgem->flushing);
- list_move(&bo->list, active(kgem, bo->size));
+ list_move(&bo->list, &kgem->active[bo->bucket]);
bo->rq = &_kgem_static_request;
} else {
assert(bo->exec == NULL);
@@ -864,10 +918,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
}
DBG(("%s: handle=%d -> inactive\n", __FUNCTION__, bo->handle));
- assert(!kgem_busy(kgem, bo->handle));
- list_move(&bo->list, inactive(kgem, bo->size));
- if (bo->map)
- list_move(&bo->vma, &kgem->vma_inactive);
+ kgem_bo_move_to_inactive(kgem, bo);
kgem->need_expire = true;
}
@@ -905,7 +956,7 @@ bool kgem_retire(struct kgem *kgem)
bo->needs_flush = false;
bo->domain = DOMAIN_NONE;
bo->rq = NULL;
- list_move(&bo->list, inactive(kgem, bo->size));
+ kgem_bo_move_to_inactive(kgem, bo);
list_del(&bo->request);
} else
kgem_bo_free(kgem, bo);
@@ -948,8 +999,7 @@ bool kgem_retire(struct kgem *kgem)
} else if(kgem_bo_set_purgeable(kgem, bo)) {
DBG(("%s: moving %d to inactive\n",
__FUNCTION__, bo->handle));
- list_move(&bo->list,
- inactive(kgem, bo->size));
+ kgem_bo_move_to_inactive(kgem, bo);
retired = true;
} else {
DBG(("%s: closing %d\n",
@@ -969,7 +1019,7 @@ bool kgem_retire(struct kgem *kgem)
if (kgem_bo_set_purgeable(kgem, rq->bo)) {
assert(rq->bo->rq == NULL);
assert(list_is_empty(&rq->bo->request));
- list_move(&rq->bo->list, inactive(kgem, rq->bo->size));
+ kgem_bo_move_to_inactive(kgem, rq->bo);
retired = true;
} else {
kgem->need_purge = 1;
@@ -1483,6 +1533,9 @@ bool kgem_expire_cache(struct kgem *kgem)
idle = true;
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
+ struct list preserve;
+
+ list_init(&preserve);
while (!list_is_empty(&kgem->inactive[i])) {
bo = list_last_entry(&kgem->inactive[i],
struct kgem_bo, list);
@@ -1492,10 +1545,20 @@ bool kgem_expire_cache(struct kgem *kgem)
break;
}
- count++;
- size += bo->size;
-
- kgem_bo_free(kgem, bo);
+ if (bo->map && bo->delta + MAP_PRESERVE_TIME > expire) {
+ idle = false;
+ list_move_tail(&bo->list, &preserve);
+ } else {
+ count++;
+ size += bo->size;
+ kgem_bo_free(kgem, bo);
+ }
+ }
+ if (!list_is_empty(&preserve)) {
+ preserve.prev->next = kgem->inactive[i].next;
+ kgem->inactive[i].next->prev = preserve.prev;
+ kgem->inactive[i].next = preserve.next;
+ preserve.next->prev = &kgem->inactive[i];
}
}
@@ -1552,15 +1615,24 @@ search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags)
bool use_active = (flags & CREATE_INACTIVE) == 0;
struct list *cache;
- if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
- int for_cpu = !!(flags & CREATE_CPU_MAP);
- assert(for_cpu || use_active == false);
- list_for_each_entry(bo, &kgem->vma_inactive, vma) {
- if (IS_CPU_MAP(bo->map) != for_cpu)
- continue;
+ if (!use_active &&
+ list_is_empty(inactive(kgem, size)) &&
+ !list_is_empty(active(kgem, size)) &&
+ !kgem_retire(kgem))
+ return NULL;
- if (size > bo->size || 2*size < bo->size)
+ if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
+ int for_cpu = !!(flags & CREATE_CPU_MAP);
+ cache = &kgem->vma[for_cpu].inactive[cache_bucket(size)];
+ list_for_each_entry(bo, cache, vma) {
+ assert(IS_CPU_MAP(bo->map) == for_cpu);
+ assert(bo->bucket == cache_bucket(size));
+
+ if (size > bo->size) {
+ DBG(("inactive too small: %d < %d\n",
+ bo->size, size));
continue;
+ }
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem->need_purge |= bo->domain == DOMAIN_GPU;
@@ -1573,10 +1645,7 @@ search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags)
I915_TILING_NONE, 0) != I915_TILING_NONE)
continue;
- list_del(&bo->list);
- if (bo->rq == &_kgem_static_request)
- list_del(&bo->request);
- list_move_tail(&bo->vma, &kgem->vma_cache);
+ kgem_bo_remove_from_inactive(kgem, bo);
bo->tiling = I915_TILING_NONE;
bo->pitch = 0;
@@ -1590,7 +1659,7 @@ search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags)
}
}
- cache = use_active ? active(kgem, size): inactive(kgem, size);
+ cache = use_active ? active(kgem, size) : inactive(kgem, size);
list_for_each_entry_safe(bo, next, cache, list) {
assert(bo->refcnt == 0);
assert(bo->reusable);
@@ -1644,13 +1713,10 @@ search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags)
continue;
}
- list_del(&bo->list);
- if (bo->rq == &_kgem_static_request)
- list_del(&bo->request);
- if (bo->map) {
- assert(!list_is_empty(&bo->vma));
- list_move_tail(&bo->vma, &kgem->vma_cache);
- }
+ if (use_active)
+ kgem_bo_remove_from_active(kgem, bo);
+ else
+ kgem_bo_remove_from_inactive(kgem, bo);
bo->tiling = I915_TILING_NONE;
bo->pitch = 0;
@@ -1673,22 +1739,14 @@ search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags)
I915_TILING_NONE, 0) != I915_TILING_NONE)
return NULL;
- if (first->map) {
- munmap(CPU_MAP(first->map), first->size);
- first->map = NULL;
-
- list_del(&first->vma);
- kgem->vma_count--;
- }
+ if (first->map)
+ kgem_bo_release_map(kgem, first);
}
- list_del(&first->list);
- if (first->rq == &_kgem_static_request)
- list_del(&first->request);
- if (first->map) {
- assert(!list_is_empty(&first->vma));
- list_move_tail(&first->vma, &kgem->vma_cache);
- }
+ if (use_active)
+ kgem_bo_remove_from_active(kgem, first);
+ else
+ kgem_bo_remove_from_inactive(kgem, first);
first->tiling = I915_TILING_NONE;
first->pitch = 0;
@@ -1718,7 +1776,7 @@ struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name)
return NULL;
DBG(("%s: new handle=%d\n", __FUNCTION__, open_arg.handle));
- bo = __kgem_bo_alloc(open_arg.handle, 0);
+ bo = __kgem_bo_alloc(open_arg.handle, open_arg.size);
if (bo == NULL) {
gem_close(kgem->fd, open_arg.handle);
return NULL;
@@ -1740,14 +1798,6 @@ struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size)
if (bo)
return kgem_bo_reference(bo);
- if (!list_is_empty(&kgem->requests)) {
- if (kgem_retire(kgem)) {
- bo = search_linear_cache(kgem, size, CREATE_INACTIVE);
- if (bo)
- return kgem_bo_reference(bo);
- }
- }
-
handle = gem_create(kgem->fd, size);
if (handle == 0)
return NULL;
@@ -1877,11 +1927,11 @@ static bool _kgem_can_create_2d(struct kgem *kgem,
size = kgem_surface_size(kgem, false, false,
width, height, bpp, tiling, &pitch);
- if (size == 0 || size > kgem->max_object_size)
+ if (size == 0 || size >= kgem->max_object_size)
size = kgem_surface_size(kgem, false, false,
width, height, bpp,
I915_TILING_NONE, &pitch);
- return size > 0 && size <= kgem->max_object_size;
+ return size > 0 && size < kgem->max_object_size;
}
#if DEBUG_KGEM
@@ -1954,18 +2004,18 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
/* We presume that we will need to upload to this bo,
* and so would prefer to have an active VMA.
*/
+ cache = &kgem->vma[for_cpu].inactive[cache_bucket(size)];
do {
- list_for_each_entry(bo, &kgem->vma_inactive, vma) {
+ list_for_each_entry(bo, cache, vma) {
+ assert(bo->bucket == cache_bucket(size));
assert(bo->refcnt == 0);
assert(bo->map);
+ assert(IS_CPU_MAP(bo->map) == for_cpu);
assert(bo->rq == NULL);
assert(list_is_empty(&bo->request));
- if (IS_CPU_MAP(bo->map) != for_cpu)
- continue;
-
- if (size > bo->size || 2*size < bo->size) {
- DBG(("inactive vma too small/large: %d < %d\n",
+ if (size > bo->size) {
+ DBG(("inactive too small: %d < %d\n",
bo->size, size));
continue;
}
@@ -1977,25 +2027,24 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
continue;
}
- bo->pitch = pitch;
- list_del(&bo->list);
-
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
+ bo->pitch = pitch;
bo->delta = 0;
bo->unique_id = kgem_get_unique_id(kgem);
- list_move_tail(&bo->vma, &kgem->vma_cache);
- assert(bo->pitch);
+
+ kgem_bo_remove_from_inactive(kgem, bo);
+
DBG((" from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->reusable);
assert(bo->domain != DOMAIN_GPU && !kgem_busy(kgem, bo->handle));
return kgem_bo_reference(bo);
}
- } while (!list_is_empty(&kgem->vma_cache) && kgem_retire(kgem));
+ } while (!list_is_empty(cache) && kgem_retire(kgem));
}
if (flags & CREATE_INACTIVE)
@@ -2013,6 +2062,8 @@ search_active: /* Best active match first */
list_for_each_entry(bo, cache, list) {
uint32_t s;
+ assert(bo->bucket == cache_bucket(size));
+
if (bo->tiling) {
if (bo->pitch < pitch) {
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
@@ -2031,10 +2082,6 @@ search_active: /* Best active match first */
continue;
}
- list_del(&bo->list);
- if (bo->rq == &_kgem_static_request)
- list_del(&bo->request);
-
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem->need_purge |= bo->domain == DOMAIN_GPU;
kgem_bo_free(kgem, bo);
@@ -2042,6 +2089,8 @@ search_active: /* Best active match first */
goto search_active;
}
+ kgem_bo_remove_from_active(kgem, bo);
+
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
@@ -2061,6 +2110,8 @@ search_active: /* Best active match first */
kgem->need_purge |= next->domain == DOMAIN_GPU;
kgem_bo_free(kgem, next);
} else {
+ kgem_bo_remove_from_active(kgem, next);
+
next->unique_id = kgem_get_unique_id(kgem);
next->delta = 0;
DBG((" 2:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
@@ -2075,6 +2126,8 @@ skip_active_search:
/* Now just look for a close match and prefer any currently active */
cache = inactive(kgem, size);
list_for_each_entry_safe(bo, next, cache, list) {
+ assert(bo->bucket == cache_bucket(size));
+
if (size > bo->size) {
DBG(("inactive too small: %d < %d\n",
bo->size, size));
@@ -2090,29 +2143,20 @@ skip_active_search:
continue;
}
- if (bo->map) {
- munmap(CPU_MAP(bo->map), bo->size);
- bo->map = NULL;
-
- list_del(&bo->vma);
- kgem->vma_count--;
- }
+ if (bo->map)
+ kgem_bo_release_map(kgem, bo);
}
- bo->pitch = pitch;
- bo->tiling = tiling;
-
- list_del(&bo->list);
- assert(list_is_empty(&bo->request));
-
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem->need_purge |= bo->domain == DOMAIN_GPU;
kgem_bo_free(kgem, bo);
continue;
}
- if (bo->map)
- list_move_tail(&bo->vma, &kgem->vma_cache);
+ kgem_bo_remove_from_inactive(kgem, bo);
+
+ bo->pitch = pitch;
+ bo->tiling = tiling;
bo->delta = 0;
bo->unique_id = kgem_get_unique_id(kgem);
@@ -2372,9 +2416,14 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
return delta;
}
-static void kgem_trim_vma_cache(struct kgem *kgem)
+static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
{
- if (kgem->vma_count > MAX_VMA_CACHE && kgem->need_purge)
+ int i, j;
+
+ if (kgem->vma[type].count <= 0)
+ return;
+
+ if (kgem->need_purge)
kgem_purge_cache(kgem);
/* vma are limited on a per-process basis to around 64k.
@@ -2384,33 +2433,36 @@ static void kgem_trim_vma_cache(struct kgem *kgem)
* start failing mappings, we keep our own number of open
* vma to within a conservative value.
*/
- while (kgem->vma_count > MAX_VMA_CACHE) {
- struct kgem_bo *old;
-
- if (list_is_empty(&kgem->vma_inactive)) {
- old = list_first_entry(&kgem->vma_cache,
- struct kgem_bo,
- vma);
- } else {
- old = list_last_entry(&kgem->vma_inactive,
- struct kgem_bo,
- vma);
+ i = 0;
+ while (kgem->vma[type].count > 0) {
+ struct kgem_bo *bo = NULL;
+
+ for (j = 0;
+ bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive);
+ j++) {
+ struct list *head = &kgem->vma[type].inactive[i++%ARRAY_SIZE(kgem->vma[type].inactive)];
+ if (!list_is_empty(head))
+ bo = list_first_entry(head,
+ struct kgem_bo,
+ vma);
}
- DBG(("%s: discarding %s %s vma cache for %d\n",
+ if (bo == NULL)
+ break;
+
+ DBG(("%s: discarding inactive %s vma cache for %d\n",
__FUNCTION__,
- list_is_empty(&kgem->vma_inactive) ? "cached" : "inactive",
- IS_CPU_MAP(old->map) ? "CPU" : "GTT", old->handle));
- assert(old->map);
- munmap(CPU_MAP(old->map), old->size);
- old->map = NULL;
- list_del(&old->vma);
- kgem->vma_count--;
-
- if (old->rq == NULL && old->refcnt == 0) {
- DBG(("%s: discarding unused vma bo handle=%d\n",
- __FUNCTION__, old->handle));
- kgem_bo_free(kgem, old);
-}
+ IS_CPU_MAP(bo->map) ? "CPU" : "GTT", bo->handle));
+ assert(IS_CPU_MAP(bo->map) == type);
+ assert(bo->map);
+ assert(bo->rq == NULL);
+
+ munmap(CPU_MAP(bo->map), bo->size);
+ bo->map = NULL;
+ list_del(&bo->vma);
+ kgem->vma[type].count--;
+
+ if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo))
+ kgem_bo_free(kgem, bo);
}
}
@@ -2421,18 +2473,12 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
assert(bo->refcnt || bo->exec); /* allow for debugging purposes */
assert(!bo->purged);
- if (IS_CPU_MAP(bo->map)) {
- DBG(("%s: discarding CPU vma cache for %d\n",
- __FUNCTION__, bo->handle));
- munmap(CPU_MAP(bo->map), bo->size);
- bo->map = NULL;
- list_del(&bo->vma);
- kgem->vma_count--;
- }
+ if (IS_CPU_MAP(bo->map))
+ kgem_bo_release_map(kgem, bo);
ptr = bo->map;
if (ptr == NULL) {
- kgem_trim_vma_cache(kgem);
+ kgem_trim_vma_cache(kgem, MAP_GTT, bo->bucket);
ptr = gem_mmap(kgem->fd, bo->handle, bo->size,
PROT_READ | PROT_WRITE);
@@ -2445,10 +2491,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
* flush CPU damage to their GPU bo.
*/
bo->map = ptr;
- kgem->vma_count++;
-
- DBG(("%s: caching vma for %d, count=%d\n",
- __FUNCTION__, bo->handle, kgem->vma_count));
+ DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
if (bo->domain != DOMAIN_GTT) {
@@ -2473,8 +2516,6 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
bo->domain = DOMAIN_GTT;
}
- list_move_tail(&bo->vma, &kgem->vma_cache);
-
return ptr;
}
@@ -2486,25 +2527,13 @@ void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
assert(bo->refcnt);
assert(!bo->purged);
- if (IS_CPU_MAP(bo->map)) {
- void *ptr = CPU_MAP(bo->map);
- list_del(&bo->vma);
- kgem->vma_count--;
- bo->map = NULL;
- VG(VALGRIND_MALLOCLIKE_BLOCK(ptr, bo->size, 0, 1));
- return ptr;
- }
+ if (IS_CPU_MAP(bo->map))
+ return CPU_MAP(bo->map);
- if (bo->map) {
- DBG(("%s: discarding GTT vma cache for %d\n",
- __FUNCTION__, bo->handle));
- munmap(CPU_MAP(bo->map), bo->size);
- bo->map = NULL;
- list_del(&bo->vma);
- kgem->vma_count--;
- }
+ if (bo->map)
+ kgem_bo_release_map(kgem, bo);
- kgem_trim_vma_cache(kgem);
+ kgem_trim_vma_cache(kgem, MAP_CPU, bo->bucket);
VG_CLEAR(mmap_arg);
mmap_arg.handle = bo->handle;
@@ -2515,38 +2544,11 @@ void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
return NULL;
}
- VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, bo->size, 0, 1));
+ DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle));
+ bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr);
return (void *)(uintptr_t)mmap_arg.addr_ptr;
}
-void kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr)
-{
- assert(bo->map == NULL);
- assert(ptr != NULL);
-
- bo->map = MAKE_CPU_MAP(ptr);
- list_move(&bo->vma, &kgem->vma_cache);
- kgem->vma_count++;
-
- VG(VALGRIND_FREELIKE_BLOCK(ptr, 0));
-}
-
-void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo)
-{
- if (bo->map == NULL)
- return;
-
- DBG(("%s: (debug) releasing vma for handle=%d, count=%d\n",
- __FUNCTION__, bo->handle, kgem->vma_count-1));
- assert(!IS_CPU_MAP(bo->map));
-
- munmap(CPU_MAP(bo->map), bo->size);
- bo->map = NULL;
-
- list_del(&bo->vma);
- kgem->vma_count--;
-}
-
uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_gem_flink flink;
@@ -2876,6 +2878,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
if (bo->mem == NULL) {
+ bo->base.refcnt = 0; /* for valgrind */
kgem_bo_free(kgem, &bo->base);
return NULL;
}
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 57ac6478..1bc0d9b4 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -63,8 +63,10 @@ struct kgem_bo {
uint32_t refcnt;
uint32_t handle;
uint32_t presumed_offset;
- uint32_t size;
uint32_t delta;
+ uint32_t size:28;
+ uint32_t bucket:4;
+#define MAX_OBJECT_SIZE (1 << 28)
uint32_t pitch : 18; /* max 128k */
uint32_t tiling : 2;
@@ -90,6 +92,14 @@ struct kgem_request {
struct list buffers;
};
+enum {
+ MAP_GTT = 0,
+ MAP_CPU,
+ NUM_MAP_TYPES,
+};
+
+#define NUM_CACHE_BUCKETS 16
+
struct kgem {
int fd;
int wedged;
@@ -105,20 +115,22 @@ struct kgem {
KGEM_BLT,
} mode, ring;
- struct list flushing, active[16], inactive[16];
+ struct list flushing, active[NUM_CACHE_BUCKETS], inactive[NUM_CACHE_BUCKETS];
struct list partial;
struct list requests;
- struct list vma_cache;
- struct list vma_inactive;
struct kgem_request *next_request;
+ struct {
+ struct list inactive[NUM_CACHE_BUCKETS];
+ int16_t count;
+ } vma[NUM_MAP_TYPES];
+
uint16_t nbatch;
uint16_t surface;
uint16_t nexec;
uint16_t nreloc;
uint16_t nfence;
uint16_t max_batch_size;
- uint16_t vma_count;
uint32_t flush:1;
uint32_t sync:1;
@@ -332,10 +344,8 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
uint32_t delta);
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot);
-void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo);
-void kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr);
uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo);
Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
diff --git a/src/sna/kgem_debug_gen3.c b/src/sna/kgem_debug_gen3.c
index 0238b734..213c69f6 100644
--- a/src/sna/kgem_debug_gen3.c
+++ b/src/sna/kgem_debug_gen3.c
@@ -101,9 +101,6 @@ static void gen3_update_vertex_buffer_addr(struct kgem *kgem,
}
ptr = (char *)base + kgem->reloc[i].delta;
- if (state.vb.current)
- kgem_bo_unmap(kgem, state.vb.current);
-
state.vb.current = bo;
state.vb.base = base;
state.vb.ptr = ptr;
@@ -1612,8 +1609,5 @@ int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset)
void kgem_gen3_finish_state(struct kgem *kgem)
{
- if (state.vb.current)
- kgem_bo_unmap(kgem, state.vb.current);
-
memset(&state, 0, sizeof(state));
}
diff --git a/src/sna/kgem_debug_gen4.c b/src/sna/kgem_debug_gen4.c
index 0f91d29a..0004ecf5 100644
--- a/src/sna/kgem_debug_gen4.c
+++ b/src/sna/kgem_debug_gen4.c
@@ -89,8 +89,6 @@ static void gen4_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
ptr = (char *)base + kgem->reloc[i].delta;
i = data[0] >> 27;
- if (state.vb[i].current)
- kgem_bo_unmap(kgem, state.vb[i].current);
state.vb[i].current = bo;
state.vb[i].base = base;
@@ -415,13 +413,6 @@ get_reloc(struct kgem *kgem,
return (char *)base + delta;
}
-
-static void
-put_reloc(struct kgem *kgem, struct reloc *r)
-{
- if (r->bo != NULL)
- kgem_bo_unmap(kgem, r->bo);
-}
#endif
int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset)
@@ -691,21 +682,7 @@ int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset)
return len;
}
-static void finish_vertex_buffers(struct kgem *kgem)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(state.vb); i++)
- if (state.vb[i].current)
- kgem_bo_unmap(kgem, state.vb[i].current);
-}
-
void kgem_gen4_finish_state(struct kgem *kgem)
{
- finish_vertex_buffers(kgem);
-
- if (state.dynamic_state.current)
- kgem_bo_unmap(kgem, state.dynamic_state.base);
-
memset(&state, 0, sizeof(state));
}
diff --git a/src/sna/kgem_debug_gen5.c b/src/sna/kgem_debug_gen5.c
index c4f5df15..7912cc91 100644
--- a/src/sna/kgem_debug_gen5.c
+++ b/src/sna/kgem_debug_gen5.c
@@ -84,8 +84,6 @@ static void gen5_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
ptr = (char *)base + reloc->delta;
i = data[0] >> 27;
- if (state.vb[i].current)
- kgem_bo_unmap(kgem, state.vb[i].current);
state.vb[i].handle = reloc->target_handle;
state.vb[i].current = bo;
@@ -389,13 +387,6 @@ get_reloc(struct kgem *kgem,
return (char *)base + delta;
}
-
-static void
-put_reloc(struct kgem *kgem, struct reloc *r)
-{
- if (r->bo != NULL)
- kgem_bo_umap(kgem, r->bo);
-}
#endif
int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset)
@@ -667,21 +658,7 @@ int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset)
return len;
}
-static void finish_vertex_buffers(struct kgem *kgem)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(state.vb); i++)
- if (state.vb[i].current)
- kgem_bo_unmap(kgem, state.vb[i].current);
-}
-
void kgem_gen5_finish_state(struct kgem *kgem)
{
- finish_vertex_buffers(kgem);
-
- if (state.dynamic_state.current)
- kgem_bo_unmap(kgem,state. dynamic_state.current);
-
memset(&state, 0, sizeof(state));
}
diff --git a/src/sna/kgem_debug_gen6.c b/src/sna/kgem_debug_gen6.c
index 5bcd85dc..d23e2d93 100644
--- a/src/sna/kgem_debug_gen6.c
+++ b/src/sna/kgem_debug_gen6.c
@@ -88,8 +88,6 @@ static void gen6_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
ptr = (char *)base + kgem->reloc[i].delta;
i = data[0] >> 26;
- if (state.vb[i].current)
- kgem_bo_unmap(kgem, state.vb[i].current);
state.vb[i].current = bo;
state.vb[i].base = base;
@@ -129,9 +127,6 @@ static void gen6_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset)
ptr = NULL;
}
- if (state.dynamic_state.current)
- kgem_bo_unmap(kgem, state.dynamic_state.current);
-
state.dynamic_state.current = bo;
state.dynamic_state.base = base;
state.dynamic_state.ptr = ptr;
@@ -300,22 +295,8 @@ static void primitive_out(struct kgem *kgem, uint32_t *data)
}
}
-static void finish_vertex_buffers(struct kgem *kgem)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(state.vb); i++)
- if (state.vb[i].current)
- kgem_bo_unmap(kgem, state.vb[i].current);
-}
-
static void finish_state(struct kgem *kgem)
{
- finish_vertex_buffers(kgem);
-
- if (state.dynamic_state.current)
- kgem_bo_unmap(kgem, state.dynamic_state.base);
-
memset(&state, 0, sizeof(state));
}
@@ -478,13 +459,6 @@ get_reloc(struct kgem *kgem,
return (char *)base + (delta & ~3);
}
-static void
-put_reloc(struct kgem *kgem, struct reloc *r)
-{
- if (r->bo != NULL)
- kgem_bo_unmap(kgem, r->bo);
-}
-
static const char *
gen6_filter_to_string(uint32_t filter)
{
@@ -539,8 +513,6 @@ gen6_decode_sampler_state(struct kgem *kgem, const uint32_t *reloc)
ErrorF(" Sampler 1:\n");
ErrorF(" filter: min=%s, mag=%s\n", min, mag);
ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap);
-
- put_reloc(kgem, &r);
}
static const char *
@@ -604,8 +576,6 @@ gen6_decode_blend(struct kgem *kgem, const uint32_t *reloc)
ErrorF(" Blend (%s): function %s, src=%s, dst=%s\n",
blend->blend0.blend_enable ? "enabled" : "disabled",
func, src, dst);
-
- put_reloc(kgem, &r);
}
int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset)
diff --git a/src/sna/kgem_debug_gen7.c b/src/sna/kgem_debug_gen7.c
index a33a918d..c13e96f2 100644
--- a/src/sna/kgem_debug_gen7.c
+++ b/src/sna/kgem_debug_gen7.c
@@ -88,8 +88,6 @@ static void gen7_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
ptr = (char *)base + kgem->reloc[i].delta;
i = data[0] >> 26;
- if (state.vb[i].current)
- kgem_bo_unmap(kgem, state.vb[i].base);
state.vb[i].current = bo;
state.vb[i].base = base;
@@ -129,9 +127,6 @@ static void gen7_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset)
ptr = NULL;
}
- if (state.dynamic_state.current)
- kgem_bo_unmap(kgem, state.dynamic_state.base);
-
state.dynamic_state.current = bo;
state.dynamic_state.base = base;
state.dynamic_state.ptr = ptr;
@@ -300,22 +295,8 @@ static void primitive_out(struct kgem *kgem, uint32_t *data)
}
}
-static void finish_vertex_buffers(struct kgem *kgem)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(state.vb); i++)
- if (state.vb[i].current)
- kgem_bo_unmap(kgem, state.vb[i].current);
-}
-
static void finish_state(struct kgem *kgem)
{
- finish_vertex_buffers(kgem);
-
- if (state.dynamic_state.current)
- kgem_bo_unmap(kgem, state.dynamic_state.base);
-
memset(&state, 0, sizeof(state));
}
@@ -478,13 +459,6 @@ get_reloc(struct kgem *kgem,
return (char *)base + (delta & ~3);
}
-static void
-put_reloc(struct kgem *kgem, struct reloc *r)
-{
- if (r->bo != NULL)
- kgem_bo_unmap(kgem, r->bo);
-}
-
static const char *
gen7_filter_to_string(uint32_t filter)
{
@@ -539,8 +513,6 @@ gen7_decode_sampler_state(struct kgem *kgem, const uint32_t *reloc)
ErrorF(" Sampler 1:\n");
ErrorF(" filter: min=%s, mag=%s\n", min, mag);
ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap);
-
- put_reloc(kgem, &r);
}
static const char *
@@ -604,8 +576,6 @@ gen7_decode_blend(struct kgem *kgem, const uint32_t *reloc)
ErrorF(" Blend (%s): function %s, src=%s, dst=%s\n",
blend->blend0.blend_enable ? "enabled" : "disabled",
func, src, dst);
-
- put_reloc(kgem, &r);
}
int kgem_gen7_decode_3d(struct kgem *kgem, uint32_t offset)
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 4e42c6db..6b69a6e9 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -238,9 +238,7 @@ static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv)
DBG(("%s: discarding CPU buffer, handle=%d, size=%d\n",
__FUNCTION__, priv->cpu_bo->handle, priv->cpu_bo->size));
- kgem_bo_unmap__cpu(&sna->kgem, priv->cpu_bo, priv->ptr);
kgem_bo_destroy(&sna->kgem, priv->cpu_bo);
-
priv->cpu_bo = NULL;
} else
free(priv->ptr);