diff options
-rw-r--r-- | src/sna/blt.c | 2 | ||||
-rw-r--r-- | src/sna/gen6_render.c | 18 | ||||
-rw-r--r-- | src/sna/kgem.c | 428 | ||||
-rw-r--r-- | src/sna/kgem.h | 67 | ||||
-rw-r--r-- | src/sna/kgem_debug_gen5.c | 2 | ||||
-rw-r--r-- | src/sna/sna.h | 11 | ||||
-rw-r--r-- | src/sna/sna_accel.c | 164 | ||||
-rw-r--r-- | src/sna/sna_blt.c | 58 | ||||
-rw-r--r-- | src/sna/sna_io.c | 447 | ||||
-rw-r--r-- | src/sna/sna_render.c | 12 | ||||
-rw-r--r-- | src/sna/sna_video.c | 2 | ||||
-rw-r--r-- | src/sna/sna_video_textured.c | 2 |
12 files changed, 822 insertions, 391 deletions
diff --git a/src/sna/blt.c b/src/sna/blt.c index fb3dd35c..65d586cb 100644 --- a/src/sna/blt.c +++ b/src/sna/blt.c @@ -154,6 +154,8 @@ memcpy_blt(const void *src, void *dst, int bpp, uint8_t *dst_bytes; int byte_width; + assert(src); + assert(dst); assert(width && height); assert(bpp >= 8); diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c index d813d95a..41e05d0e 100644 --- a/src/sna/gen6_render.c +++ b/src/sna/gen6_render.c @@ -1193,6 +1193,7 @@ gen6_bind_bo(struct sna *sna, bo, domains, 0); ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT | (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT); + assert(bo->pitch <= (1 << 18)); ss[3] = (gen6_tiling_bits(bo->tiling) | (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT); ss[4] = 0; @@ -3136,10 +3137,19 @@ fallback: if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) return false; - return sna_blt_copy_boxes_fallback(sna, alu, - src, src_bo, src_dx, src_dy, - dst, dst_bo, dst_dx, dst_dy, - box, n); + if (sna_blt_copy_boxes_fallback(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n)) + return true; + + return false; +#if 0 + return sna_tiling_copy_boxes(sna, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n); +#endif } if (dst->drawable.depth == src->drawable.depth) { diff --git a/src/sna/kgem.c b/src/sna/kgem.c index 86a43729..208c8f27 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -45,7 +45,7 @@ #endif static struct kgem_bo * -search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags); +search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); static inline void _list_del(struct list *list) { @@ -99,7 +99,6 @@ static inline void list_replace(struct list *old, #define DBG(x) ErrorF x #endif -#define PAGE_SIZE 4096 #define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE) #define MAX_GTT_VMA_CACHE 512 #define MAX_CPU_VMA_CACHE INT16_MAX @@ -120,6 +119,14 @@ struct kgem_partial_bo { static struct kgem_bo *__kgem_freed_bo; static struct drm_i915_gem_exec_object2 _kgem_dummy_exec; +static inline int bytes(struct kgem_bo *bo) +{ + return kgem_bo_size(bo); +} + +#define bucket(B) (B)->size.pages.bucket +#define num_pages(B) (B)->size.pages.count + #ifndef NDEBUG static bool validate_partials(struct kgem *kgem) { @@ -128,10 +135,10 @@ static bool validate_partials(struct kgem *kgem) list_for_each_entry_safe(bo, next, &kgem->partial, base.list) { if (bo->base.list.next == &kgem->partial) return true; - if (bo->base.size - bo->used < next->base.size - next->used) { + if (bytes(&bo->base) - bo->used < bytes(&next->base) - next->used) { ErrorF("this rem: %d, next rem: %d\n", - bo->base.size - bo->used, - next->base.size - next->used); + bytes(&bo->base) - bo->used, + bytes(&next->base) - next->used); goto err; } } @@ -140,7 +147,7 @@ static bool validate_partials(struct kgem *kgem) err: list_for_each_entry(bo, &kgem->partial, base.list) ErrorF("bo: used=%d / %d, rem=%d\n", - bo->used, bo->base.size, bo->base.size - bo->used); + bo->used, bytes(&bo->base), bytes(&bo->base) - bo->used); return false; } #else @@ -312,7 +319,7 @@ Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, assert(!bo->purged); assert(!kgem_busy(kgem, bo->handle)); - assert(length <= bo->size); + assert(length <= bytes(bo)); if (gem_write(kgem->fd, bo->handle, 0, length, data)) return FALSE; @@ -322,17 +329,13 @@ Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, return TRUE; } -static uint32_t gem_create(int fd, int size) +static uint32_t gem_create(int fd, int num_pages) { struct drm_i915_gem_create create; -#if DEBUG_KGEM - assert((size & (PAGE_SIZE-1)) == 0); -#endif - VG_CLEAR(create); create.handle = 0; - create.size = size; + create.size = PAGE_SIZE * num_pages; (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create); return create.handle; @@ -415,7 +418,7 @@ static void gem_close(int fd, uint32_t handle) (void)drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close); } -static inline unsigned long __fls(unsigned long word) +constant inline static unsigned long __fls(unsigned long word) { asm("bsr %1,%0" : "=r" (word) @@ -423,24 +426,21 @@ static inline unsigned long __fls(unsigned long word) return word; } -constant inline static int cache_bucket(int size) +constant inline static int cache_bucket(int num_pages) { - uint32_t order = __fls(size / PAGE_SIZE); - assert(order < NUM_CACHE_BUCKETS); - return order; + return __fls(num_pages); } static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo, - int handle, int size) + int handle, int num_pages) { - assert(size); + assert(num_pages); memset(bo, 0, sizeof(*bo)); bo->refcnt = 1; bo->handle = handle; - bo->size = size; - bo->bucket = cache_bucket(size); - assert(bo->size < 1 << (12 + bo->bucket + 1)); + num_pages(bo) = num_pages; + bucket(bo) = cache_bucket(num_pages); bo->reusable = true; bo->domain = DOMAIN_CPU; list_init(&bo->request); @@ -450,7 +450,7 @@ static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo, return bo; } -static struct kgem_bo *__kgem_bo_alloc(int handle, int size) +static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages) { struct kgem_bo *bo; @@ -463,7 +463,7 @@ static struct kgem_bo *__kgem_bo_alloc(int handle, int size) return NULL; } - return __kgem_bo_init(bo, handle, size); + return __kgem_bo_init(bo, handle, num_pages); } static struct kgem_request _kgem_static_request; @@ -481,14 +481,14 @@ static struct kgem_request *__kgem_request_alloc(void) return rq; } -static struct list *inactive(struct kgem *kgem, int size) +static struct list *inactive(struct kgem *kgem, int num_pages) { - return &kgem->inactive[cache_bucket(size)]; + return &kgem->inactive[cache_bucket(num_pages)]; } -static struct list *active(struct kgem *kgem, int size, int tiling) +static struct list *active(struct kgem *kgem, int num_pages, int tiling) { - return &kgem->active[cache_bucket(size)][tiling]; + return &kgem->active[cache_bucket(num_pages)][tiling]; } static size_t @@ -575,6 +575,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) list_init(&kgem->partial); list_init(&kgem->requests); list_init(&kgem->flushing); + list_init(&kgem->large); for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) list_init(&kgem->inactive[i]); for (i = 0; i < ARRAY_SIZE(kgem->active); i++) { @@ -658,11 +659,9 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) * disable dual-stream mode */ kgem->min_alignment = 64; + kgem->max_object_size = kgem->aperture_total / 2; kgem->max_cpu_size = kgem->aperture_total / 2; - if (kgem->max_cpu_size > MAX_OBJECT_SIZE) - kgem->max_cpu_size = MAX_OBJECT_SIZE; - - kgem->max_gpu_size = -1; + kgem->max_gpu_size = MAX_CACHE_SIZE; if (gen < 40) { /* If we have to use fences for blitting, we have to make * sure we can fit them into the aperture. @@ -677,6 +676,10 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) DBG(("%s: max object size (tiled=%d, linear=%d)\n", __FUNCTION__, kgem->max_gpu_size, kgem->max_cpu_size)); + /* Convert the aperture thresholds to pages */ + kgem->aperture_low /= PAGE_SIZE; + kgem->aperture_high /= PAGE_SIZE; + kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2; if ((int)kgem->fence_max < 0) kgem->fence_max = 5; /* minimum safe value for all hw */ @@ -811,7 +814,7 @@ kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo) exec->handle = bo->handle; exec->offset = bo->presumed_offset; - kgem->aperture += bo->size; + kgem->aperture += num_pages(bo); return exec; } @@ -875,7 +878,7 @@ static void kgem_bo_release_map(struct kgem *kgem, struct kgem_bo *bo) bo->handle, kgem->vma[type].count)); VG(if (type) VALGRIND_FREELIKE_BLOCK(CPU_MAP(bo->map), 0)); - munmap(CPU_MAP(bo->map), bo->size); + munmap(CPU_MAP(bo->map), bytes(bo)); bo->map = NULL; if (!list_is_empty(&bo->vma)) { @@ -917,16 +920,22 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem, assert(bo->rq == NULL); assert(bo->domain != DOMAIN_GPU); - list_move(&bo->list, &kgem->inactive[bo->bucket]); + if (bucket(bo) >= NUM_CACHE_BUCKETS) { + kgem_bo_free(kgem, bo); + return; + } + + list_move(&bo->list, &kgem->inactive[bucket(bo)]); if (bo->map) { int type = IS_CPU_MAP(bo->map); - if (!type && !kgem_bo_is_mappable(kgem, bo)) { + if (bucket(bo) >= NUM_CACHE_BUCKETS || + (!type && !kgem_bo_is_mappable(kgem, bo))) { list_del(&bo->vma); - munmap(CPU_MAP(bo->map), bo->size); + munmap(CPU_MAP(bo->map), bytes(bo)); bo->map = NULL; } if (bo->map) { - list_move(&bo->vma, &kgem->vma[type].inactive[bo->bucket]); + list_move(&bo->vma, &kgem->vma[type].inactive[bucket(bo)]); kgem->vma[type].count++; } } @@ -1002,8 +1011,14 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) bo->scanout = bo->flush = false; if (bo->rq) { + struct list *cache; + DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle)); - list_add(&bo->list, &kgem->active[bo->bucket][bo->tiling]); + if (bucket(bo) < NUM_CACHE_BUCKETS) + cache = &kgem->active[bucket(bo)][bo->tiling]; + else + cache = &kgem->large; + list_add(&bo->list, cache); return; } @@ -1012,10 +1027,17 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) if (bo->needs_flush) { if ((bo->needs_flush = kgem_busy(kgem, bo->handle))) { + struct list *cache; + DBG(("%s: handle=%d -> flushing\n", __FUNCTION__, bo->handle)); + list_add(&bo->request, &kgem->flushing); - list_add(&bo->list, &kgem->active[bo->bucket][bo->tiling]); + if (bucket(bo) < NUM_CACHE_BUCKETS) + cache = &kgem->active[bucket(bo)][bo->tiling]; + else + cache = &kgem->large; + list_add(&bo->list, cache); bo->rq = &_kgem_static_request; return; } @@ -1231,7 +1253,7 @@ static void kgem_close_inactive(struct kgem *kgem) static void bubble_sort_partial(struct kgem *kgem, struct kgem_partial_bo *bo) { - int remain = bo->base.size - bo->used; + int remain = bytes(&bo->base) - bo->used; while (bo->base.list.prev != &kgem->partial) { struct kgem_partial_bo *p; @@ -1239,7 +1261,7 @@ static void bubble_sort_partial(struct kgem *kgem, struct kgem_partial_bo *bo) p = list_entry(bo->base.list.prev, struct kgem_partial_bo, base.list); - if (remain <= p->base.size - p->used) + if (remain <= bytes(&p->base) - p->used) break; assert(p->base.list.next == &bo->base.list); @@ -1282,7 +1304,7 @@ static void kgem_finish_partials(struct kgem *kgem) assert(bo->base.rq == kgem->next_request); if (bo->used && bo->need_io) { if (bo->base.refcnt == 1 && - bo->used < bo->base.size / 2) { + bo->used < bytes(&bo->base) / 2) { struct kgem_bo *shrink; shrink = search_linear_cache(kgem, @@ -1293,10 +1315,10 @@ static void kgem_finish_partials(struct kgem *kgem) DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n", __FUNCTION__, - bo->used, bo->base.size, shrink->size, + bo->used, bytes(&bo->base), bytes(shrink), bo->base.handle, shrink->handle)); - assert(bo->used <= shrink->size); + assert(bo->used <= bytes(shrink)); gem_write(kgem->fd, shrink->handle, 0, bo->used, bo->mem); @@ -1330,9 +1352,9 @@ static void kgem_finish_partials(struct kgem *kgem) } DBG(("%s: handle=%d, uploading %d/%d\n", - __FUNCTION__, bo->base.handle, bo->used, bo->base.size)); + __FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base))); assert(!kgem_busy(kgem, bo->base.handle)); - assert(bo->used <= bo->base.size); + assert(bo->used <= bytes(&bo->base)); gem_write(kgem->fd, bo->base.handle, 0, bo->used, bo->mem); bo->need_io = 0; @@ -1616,7 +1638,7 @@ void _kgem_submit(struct kgem *kgem) i, kgem->exec[i].handle, (int)kgem->exec[i].offset, - found ? found->size : -1, + found ? bytes(found) : -1, found ? found->tiling : -1, (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE), found ? found->purged : -1); @@ -1690,7 +1712,7 @@ static void kgem_expire_partial(struct kgem *kgem) continue; DBG(("%s: discarding unused partial buffer: %d/%d, write? %d\n", - __FUNCTION__, bo->used, bo->base.size, bo->write)); + __FUNCTION__, bo->used, bytes(&bo->base), bo->write)); list_del(&bo->base.list); kgem_bo_unref(kgem, &bo->base); } @@ -1773,7 +1795,7 @@ bool kgem_expire_cache(struct kgem *kgem) list_move_tail(&bo->list, &preserve); } else { count++; - size += bo->size; + size += bytes(bo); kgem_bo_free(kgem, bo); DBG(("%s: expiring %d\n", __FUNCTION__, bo->handle)); @@ -1834,28 +1856,31 @@ void kgem_cleanup_cache(struct kgem *kgem) } static struct kgem_bo * -search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags) +search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) { struct kgem_bo *bo, *first = NULL; bool use_active = (flags & CREATE_INACTIVE) == 0; struct list *cache; + if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE) + return NULL; + if (!use_active && - list_is_empty(inactive(kgem, size)) && - !list_is_empty(active(kgem, size, I915_TILING_NONE)) && + list_is_empty(inactive(kgem, num_pages)) && + !list_is_empty(active(kgem, num_pages, I915_TILING_NONE)) && !kgem_retire(kgem)) return NULL; if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { int for_cpu = !!(flags & CREATE_CPU_MAP); - cache = &kgem->vma[for_cpu].inactive[cache_bucket(size)]; + cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)]; list_for_each_entry(bo, cache, vma) { assert(IS_CPU_MAP(bo->map) == for_cpu); - assert(bo->bucket == cache_bucket(size)); + assert(bucket(bo) == cache_bucket(num_pages)); - if (size > bo->size) { + if (num_pages > num_pages(bo)) { DBG(("inactive too small: %d < %d\n", - bo->size, size)); + num_pages(bo), num_pages)); continue; } @@ -1874,8 +1899,8 @@ search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags) bo->tiling = I915_TILING_NONE; bo->pitch = 0; bo->delta = 0; - DBG((" %s: found handle=%d (size=%d) in linear vma cache\n", - __FUNCTION__, bo->handle, bo->size)); + DBG((" %s: found handle=%d (num_pages=%d) in linear vma cache\n", + __FUNCTION__, bo->handle, num_pages(bo))); assert(use_active || bo->domain != DOMAIN_GPU); assert(!bo->needs_flush); //assert(!kgem_busy(kgem, bo->handle)); @@ -1883,13 +1908,13 @@ search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags) } } - cache = use_active ? active(kgem, size, I915_TILING_NONE) : inactive(kgem, size); + cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages); list_for_each_entry(bo, cache, list) { assert(bo->refcnt == 0); assert(bo->reusable); assert(!!bo->rq == !!use_active); - if (size > bo->size) + if (num_pages > num_pages(bo)) continue; if (use_active && bo->tiling != I915_TILING_NONE) @@ -1946,8 +1971,8 @@ search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags) assert(bo->tiling == I915_TILING_NONE); bo->pitch = 0; bo->delta = 0; - DBG((" %s: found handle=%d (size=%d) in linear %s cache\n", - __FUNCTION__, bo->handle, bo->size, + DBG((" %s: found handle=%d (num_pages=%d) in linear %s cache\n", + __FUNCTION__, bo->handle, num_pages(bo), use_active ? "active" : "inactive")); assert(use_active || bo->domain != DOMAIN_GPU); assert(!bo->needs_flush || use_active); @@ -1965,8 +1990,8 @@ search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags) first->pitch = 0; first->delta = 0; - DBG((" %s: found handle=%d (size=%d) in linear %s cache\n", - __FUNCTION__, first->handle, first->size, + DBG((" %s: found handle=%d (num_pages=%d) in linear %s cache\n", + __FUNCTION__, first->handle, num_pages(first), use_active ? "active" : "inactive")); assert(use_active || first->domain != DOMAIN_GPU); assert(!first->needs_flush || use_active); @@ -1990,7 +2015,7 @@ struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name) return NULL; DBG(("%s: new handle=%d\n", __FUNCTION__, open_arg.handle)); - bo = __kgem_bo_alloc(open_arg.handle, open_arg.size); + bo = __kgem_bo_alloc(open_arg.handle, open_arg.size / PAGE_SIZE); if (bo == NULL) { gem_close(kgem->fd, open_arg.handle); return NULL; @@ -2007,7 +2032,7 @@ struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size) DBG(("%s(%d)\n", __FUNCTION__, size)); - size = PAGE_ALIGN(size); + size = (size + PAGE_SIZE - 1) / PAGE_SIZE; bo = search_linear_cache(kgem, size, CREATE_INACTIVE); if (bo) return kgem_bo_reference(bo); @@ -2019,7 +2044,7 @@ struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size) DBG(("%s: new handle=%d\n", __FUNCTION__, handle)); bo = __kgem_bo_alloc(handle, size); if (bo == NULL) { - gem_close(kgem->fd, size); + gem_close(kgem->fd, handle); return NULL; } @@ -2028,8 +2053,6 @@ struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size) int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int bpp) { - uint32_t pitch; - if (DBG_NO_TILING) return tiling < 0 ? tiling : I915_TILING_NONE; @@ -2058,17 +2081,6 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int } } - /* First check that we can fence the whole object */ - if (tiling && - kgem_surface_size(kgem, false, false, - width, height, bpp, tiling, - &pitch) > kgem->max_gpu_size) { - DBG(("%s: too large (%dx%d) to be fenced, discarding tiling\n", - __FUNCTION__, width, height)); - tiling = I915_TILING_NONE; - goto done; - } - if (tiling < 0) return tiling; @@ -2125,18 +2137,42 @@ done: return tiling; } -bool kgem_can_create_cpu(struct kgem *kgem, +bool kgem_can_create_2d(struct kgem *kgem, int width, int height, int depth) { + int bpp = BitsPerPixel(depth); uint32_t pitch, size; if (depth < 8 || kgem->wedged) return false; size = kgem_surface_size(kgem, false, false, - width, height, BitsPerPixel(depth), + width, height, bpp, + I915_TILING_X, &pitch); + if (size > 0 && size <= kgem->max_object_size) + return true; + + size = kgem_surface_size(kgem, false, false, + width, height, bpp, I915_TILING_NONE, &pitch); - return size > 0 && size < kgem->max_cpu_size; + if (size > 0 && size <= kgem->max_object_size) + return true; + + return false; +} + +bool kgem_can_create_cpu(struct kgem *kgem, + int width, int height, int bpp) +{ + uint32_t pitch, size; + + if (bpp < 8 || kgem->wedged) + return false; + + size = kgem_surface_size(kgem, false, false, + width, height, bpp, + I915_TILING_NONE, &pitch); + return size > 0 && size <= kgem->max_cpu_size; } static bool _kgem_can_create_gpu(struct kgem *kgem, @@ -2179,7 +2215,7 @@ inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo) size = 512 * 1024; else size = 1024 * 1024; - while (size < bo->size) + while (size < bytes(bo)) size *= 2; return size; @@ -2213,10 +2249,52 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, kgem->has_relaxed_fencing, flags & CREATE_SCANOUT, width, height, bpp, tiling, &pitch); - assert(size && size < kgem->max_cpu_size); - assert(tiling == I915_TILING_NONE || size < kgem->max_gpu_size); + assert(size && size <= kgem->max_object_size); + size /= PAGE_SIZE; bucket = cache_bucket(size); + if (bucket >= NUM_CACHE_BUCKETS) { + DBG(("%s: large bo num pages=%d, bucket=%d\n", + __FUNCTION__, size, bucket)); + + if (flags & CREATE_INACTIVE) + goto create; + + tiled_height = kgem_aligned_height(kgem, height, I915_TILING_Y); + untiled_pitch = kgem_untiled_pitch(kgem, + width, bpp, + flags & CREATE_SCANOUT); + + list_for_each_entry(bo, &kgem->large, list) { + assert(!bo->purged); + assert(bo->refcnt == 0); + assert(bo->reusable); + + if (bo->tiling) { + if (bo->pitch < pitch) { + DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", + bo->tiling, tiling, + bo->pitch, pitch)); + continue; + } + } else + bo->pitch = untiled_pitch; + + if (bo->pitch * tiled_height > bytes(bo)) + continue; + + kgem_bo_remove_from_active(kgem, bo); + + bo->unique_id = kgem_get_unique_id(kgem); + bo->delta = 0; + DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", + bo->pitch, bo->tiling, bo->handle, bo->unique_id)); + return kgem_bo_reference(bo); + } + + goto create; + } + if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { int for_cpu = !!(flags & CREATE_CPU_MAP); if (kgem->has_llc && tiling == I915_TILING_NONE) @@ -2227,16 +2305,16 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, cache = &kgem->vma[for_cpu].inactive[bucket]; do { list_for_each_entry(bo, cache, vma) { - assert(bo->bucket == bucket); + assert(bucket(bo) == bucket); assert(bo->refcnt == 0); assert(bo->map); assert(IS_CPU_MAP(bo->map) == for_cpu); assert(bo->rq == NULL); assert(list_is_empty(&bo->request)); - if (size > bo->size) { + if (size > num_pages(bo)) { DBG(("inactive too small: %d < %d\n", - bo->size, size)); + num_pages(bo), size)); continue; } @@ -2275,13 +2353,14 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, if (retry > 3) retry = 3; search_again: + assert(bucket < NUM_CACHE_BUCKETS); cache = &kgem->active[bucket][tiling]; if (tiling) { tiled_height = kgem_aligned_height(kgem, height, tiling); list_for_each_entry(bo, cache, list) { assert(!bo->purged); assert(bo->refcnt == 0); - assert(bo->bucket == bucket); + assert(bucket(bo) == bucket); assert(bo->reusable); assert(bo->tiling == tiling); @@ -2292,7 +2371,7 @@ search_again: continue; } - if (bo->pitch * tiled_height > bo->size) + if (bo->pitch * tiled_height > bytes(bo)) continue; kgem_bo_remove_from_active(kgem, bo); @@ -2305,13 +2384,13 @@ search_again: } } else { list_for_each_entry(bo, cache, list) { - assert(bo->bucket == bucket); + assert(bucket(bo) == bucket); assert(!bo->purged); assert(bo->refcnt == 0); assert(bo->reusable); assert(bo->tiling == tiling); - if (bo->size < size) + if (num_pages(bo) < size) continue; kgem_bo_remove_from_active(kgem, bo); @@ -2340,7 +2419,7 @@ search_again: kgem->has_relaxed_fencing, flags & CREATE_SCANOUT, width, height, bpp, tiling, &pitch); - cache = active(kgem, tiled_height, i); + cache = active(kgem, tiled_height / PAGE_SIZE, i); tiled_height = kgem_aligned_height(kgem, height, i); list_for_each_entry(bo, cache, list) { assert(!bo->purged); @@ -2357,7 +2436,7 @@ search_again: } else bo->pitch = untiled_pitch; - if (bo->pitch * tiled_height > bo->size) + if (bo->pitch * tiled_height > bytes(bo)) continue; kgem_bo_remove_from_active(kgem, bo); @@ -2378,13 +2457,14 @@ skip_active_search: retry = 3; search_inactive: /* Now just look for a close match and prefer any currently active */ + assert(bucket < NUM_CACHE_BUCKETS); cache = &kgem->inactive[bucket]; list_for_each_entry_safe(bo, next, cache, list) { - assert(bo->bucket == bucket); + assert(bucket(bo) == bucket); - if (size > bo->size) { + if (size > num_pages(bo)) { DBG(("inactive too small: %d < %d\n", - bo->size, size)); + num_pages(bo), size)); continue; } @@ -2439,6 +2519,7 @@ search_inactive: goto search_inactive; } +create: handle = gem_create(kgem->fd, size); if (handle == 0) return NULL; @@ -2455,7 +2536,7 @@ search_inactive: if (tiling != I915_TILING_NONE) bo->tiling = gem_set_tiling(kgem->fd, handle, tiling, pitch); - assert(bo->size >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling)); + assert(bytes(bo) >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling)); DBG((" new pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); @@ -2470,9 +2551,9 @@ static void _kgem_bo_delete_partial(struct kgem *kgem, struct kgem_bo *bo) return; DBG(("%s: size=%d, offset=%d, parent used=%d\n", - __FUNCTION__, bo->size, bo->delta, io->used)); + __FUNCTION__, bo->size.bytes, bo->delta, io->used)); - if (bo->delta + bo->size == io->used) { + if (bo->delta + bo->size.bytes == io->used) { io->used = bo->delta; bubble_sort_partial(kgem, io); } @@ -2508,25 +2589,30 @@ bool kgem_check_bo(struct kgem *kgem, ...) va_list ap; struct kgem_bo *bo; int num_exec = 0; - int size = 0; + int num_pages = 0; va_start(ap, kgem); while ((bo = va_arg(ap, struct kgem_bo *))) { if (bo->exec) continue; - size += bo->size; + if (bo->proxy) { + bo = bo->proxy; + if (bo->exec) + continue; + } + num_pages += num_pages(bo); num_exec++; } va_end(ap); - if (!size) + if (!num_pages) return true; if (kgem->aperture > kgem->aperture_low) return false; - if (size + kgem->aperture > kgem->aperture_high) + if (num_pages + kgem->aperture > kgem->aperture_high) return false; if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) @@ -2541,11 +2627,13 @@ bool kgem_check_bo_fenced(struct kgem *kgem, ...) struct kgem_bo *bo; int num_fence = 0; int num_exec = 0; - int size = 0; + int num_pages = 0; int fenced_size = 0; va_start(ap, kgem); while ((bo = va_arg(ap, struct kgem_bo *))) { + if (bo->proxy) + bo = bo->proxy; if (bo->exec) { if (kgem->gen >= 40 || bo->tiling == I915_TILING_NONE) continue; @@ -2558,7 +2646,7 @@ bool kgem_check_bo_fenced(struct kgem *kgem, ...) continue; } - size += bo->size; + num_pages += num_pages(bo); num_exec++; if (kgem->gen < 40 && bo->tiling) { fenced_size += kgem_bo_fenced_size(kgem, bo); @@ -2573,13 +2661,13 @@ bool kgem_check_bo_fenced(struct kgem *kgem, ...) if (kgem->nfence + num_fence > kgem->fence_max) return false; - if (!size) + if (!num_pages) return true; if (kgem->aperture > kgem->aperture_low) return false; - if (size + kgem->aperture > kgem->aperture_high) + if (num_pages + kgem->aperture > kgem->aperture_high) return false; if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) @@ -2698,7 +2786,7 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) assert(bo->rq == NULL); VG(if (type) VALGRIND_FREELIKE_BLOCK(CPU_MAP(bo->map), 0)); - munmap(CPU_MAP(bo->map), bo->size); + munmap(CPU_MAP(bo->map), bytes(bo)); bo->map = NULL; list_del(&bo->vma); kgem->vma[type].count--; @@ -2736,11 +2824,11 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) ptr = bo->map; if (ptr == NULL) { - assert(bo->size <= kgem->aperture_mappable / 4); + assert(bytes(bo) <= kgem->aperture_mappable / 4); - kgem_trim_vma_cache(kgem, MAP_GTT, bo->bucket); + kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); - ptr = gem_mmap(kgem->fd, bo->handle, bo->size, + ptr = gem_mmap(kgem->fd, bo->handle, bytes(bo), PROT_READ | PROT_WRITE); if (ptr == NULL) return NULL; @@ -2780,8 +2868,8 @@ void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo) if (bo->map) return bo->map; - kgem_trim_vma_cache(kgem, MAP_GTT, bo->bucket); - return bo->map = gem_mmap(kgem->fd, bo->handle, bo->size, + kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); + return bo->map = gem_mmap(kgem->fd, bo->handle, bytes(bo), PROT_READ | PROT_WRITE); } @@ -2789,7 +2877,7 @@ void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) { struct drm_i915_gem_mmap mmap_arg; - DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, bo->handle, bo->size)); + DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, bo->handle, bytes(bo))); assert(!bo->purged); assert(list_is_empty(&bo->list)); @@ -2799,18 +2887,19 @@ void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) if (bo->map) kgem_bo_release_map(kgem, bo); - kgem_trim_vma_cache(kgem, MAP_CPU, bo->bucket); + kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo)); VG_CLEAR(mmap_arg); mmap_arg.handle = bo->handle; mmap_arg.offset = 0; - mmap_arg.size = bo->size; + mmap_arg.size = bytes(bo); if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) { - assert(0); + ErrorF("%s: failed to mmap %d, %d bytes, into CPU domain\n", + __FUNCTION__, bo->handle, bytes(bo)); return NULL; } - VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, bo->size, 0, 1)); + VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, bytes(bo), 0, 1)); DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle)); bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr); @@ -2876,6 +2965,9 @@ struct kgem_bo *kgem_create_map(struct kgem *kgem, if (!kgem->has_vmap) return NULL; + if (size >= MAX_CACHE_SIZE) + return NULL; + handle = gem_vmap(kgem->fd, ptr, size, read_only); if (handle == 0) return NULL; @@ -2972,6 +3064,7 @@ struct kgem_bo *kgem_create_proxy(struct kgem_bo *target, if (bo == NULL) return NULL; + bo->size.bytes = length; bo->io = target->io; bo->dirty = target->dirty; bo->tiling = target->tiling; @@ -2982,11 +3075,11 @@ struct kgem_bo *kgem_create_proxy(struct kgem_bo *target, return bo; } -static struct kgem_partial_bo *partial_bo_alloc(int size) +static struct kgem_partial_bo *partial_bo_alloc(int num_pages) { struct kgem_partial_bo *bo; - bo = malloc(sizeof(*bo) + 128 + size); + bo = malloc(sizeof(*bo) + 128 + num_pages * PAGE_SIZE); if (bo) { bo->mem = (void *)ALIGN((uintptr_t)bo + sizeof(*bo), 64); bo->mmapped = false; @@ -3010,20 +3103,20 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, !!(flags & KGEM_BUFFER_LAST))); assert(size); /* we should never be asked to create anything TOO large */ - assert(size < kgem->max_cpu_size); + assert(size <= kgem->max_cpu_size); list_for_each_entry(bo, &kgem->partial, base.list) { if (flags == KGEM_BUFFER_LAST && bo->write) { /* We can reuse any write buffer which we can fit */ - if (size <= bo->base.size) { + if (size <= bytes(&bo->base)) { if (bo->base.refcnt == 1 && bo->base.exec) { DBG(("%s: reusing write buffer for read of %d bytes? used=%d, total=%d\n", - __FUNCTION__, size, bo->used, bo->base.size)); + __FUNCTION__, size, bo->used, bytes(&bo->base))); offset = 0; goto done; - } else if (bo->used + size <= bo->base.size) { + } else if (bo->used + size <= bytes(&bo->base)) { DBG(("%s: reusing unfinished write buffer for read of %d bytes? used=%d, total=%d\n", - __FUNCTION__, size, bo->used, bo->base.size)); + __FUNCTION__, size, bo->used, bytes(&bo->base))); offset = bo->used; goto done; } @@ -3037,24 +3130,25 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, continue; } - if (bo->used + size <= bo->base.size) { + if (bo->used + size <= bytes(&bo->base)) { DBG(("%s: reusing partial buffer? used=%d + size=%d, total=%d\n", - __FUNCTION__, bo->used, size, bo->base.size)); + __FUNCTION__, bo->used, size, bytes(&bo->base))); offset = bo->used; bo->used += size; goto done; } DBG(("%s: too small (%d < %d)\n", - __FUNCTION__, bo->base.size - bo->used, size)); + __FUNCTION__, bytes(&bo->base) - bo->used, size)); break; } #if !DBG_NO_MAP_UPLOAD /* Be a little more generous and hope to hold fewer mmappings */ alloc = ALIGN(2*size, kgem->partial_buffer_size); - if (alloc >= kgem->max_cpu_size) + if (alloc > kgem->max_gpu_size) alloc = PAGE_ALIGN(size); + alloc /= PAGE_SIZE; if (kgem->has_cpu_bo) { bo = malloc(sizeof(*bo)); if (bo == NULL) @@ -3098,7 +3192,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, bo->base.io = true; bo->mmapped = true; - alloc = bo->base.size; + alloc = num_pages(&bo->base); goto init; } else { bo->base.refcnt = 0; /* for valgrind */ @@ -3107,7 +3201,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, } } - if (alloc > kgem->aperture_mappable / 4) + if (PAGE_SIZE * alloc > kgem->aperture_mappable / 4) flags &= ~KGEM_BUFFER_INPLACE; if ((flags & KGEM_BUFFER_WRITE_INPLACE) == KGEM_BUFFER_WRITE_INPLACE) { @@ -3164,7 +3258,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, bo->mmapped = true; bo->base.refcnt = 1; - alloc = bo->base.size; + alloc = num_pages(&bo->base); goto init; } else { kgem_bo_free(kgem, &bo->base); @@ -3173,11 +3267,11 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, } } #else - alloc = ALIGN(size, 64*1024); + alloc = ALIGN(size, 64*1024) / PAGE_SIZE; #endif /* Be more parsimonious with pwrite/pread buffers */ if ((flags & KGEM_BUFFER_INPLACE) == 0) - alloc = PAGE_ALIGN(size); + alloc = PAGE_ALIGN(size) / PAGE_SIZE; flags &= ~KGEM_BUFFER_INPLACE; old = NULL; @@ -3188,7 +3282,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, if (old) { DBG(("%s: reusing ordinary handle %d for io\n", __FUNCTION__, old->handle)); - alloc = old->size; + alloc = num_pages(old); bo = partial_bo_alloc(alloc); if (bo == NULL) return NULL; @@ -3240,21 +3334,40 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, } bo->mem = kgem_bo_map__cpu(kgem, &bo->base); - if (bo->mem == NULL) { - kgem_bo_free(kgem, &bo->base); + if (bo->mem != NULL) { + if (flags & KGEM_BUFFER_WRITE) + kgem_bo_sync__cpu(kgem, &bo->base); + + bo->need_io = false; + bo->base.io = true; + bo->mmapped = true; + goto init; + } + + DBG(("%s: failing back to new pwrite buffer\n", __FUNCTION__)); + old = &bo->base; + bo = partial_bo_alloc(alloc); + if (bo == NULL) { + free(old); return NULL; } - if (flags & KGEM_BUFFER_WRITE) - kgem_bo_sync__cpu(kgem, &bo->base); + memcpy(&bo->base, old, sizeof(*old)); + free(old); + + assert(bo->mem); + assert(!bo->mmapped); - bo->need_io = false; + list_init(&bo->base.request); + list_init(&bo->base.vma); + list_init(&bo->base.list); + bo->base.refcnt = 1; + bo->need_io = flags & KGEM_BUFFER_WRITE; bo->base.io = true; - bo->mmapped = true; } init: bo->base.reusable = false; - assert(bo->base.size == alloc); + assert(num_pages(&bo->base) == alloc); assert(!bo->need_io || !bo->base.needs_flush); assert(!bo->need_io || bo->base.domain != DOMAIN_GPU); @@ -3263,12 +3376,12 @@ init: offset = 0; list_add(&bo->base.list, &kgem->partial); - DBG(("%s(size=%d) new handle=%d\n", + DBG(("%s(pages=%d) new handle=%d\n", __FUNCTION__, alloc, bo->base.handle)); done: /* adjust the position within the list to maintain decreasing order */ - alloc = bo->base.size - bo->used; + alloc = bytes(&bo->base) - bo->used; { struct kgem_partial_bo *p, *first; @@ -3276,9 +3389,9 @@ done: struct kgem_partial_bo, base.list); while (&p->base.list != &kgem->partial && - alloc < p->base.size - p->used) { + alloc < bytes(&p->base) - p->used) { DBG(("%s: this=%d, right=%d\n", - __FUNCTION__, alloc, p->base.size -p->used)); + __FUNCTION__, alloc, bytes(&p->base) -p->used)); p = list_first_entry(&p->base.list, struct kgem_partial_bo, base.list); @@ -3287,6 +3400,7 @@ done: list_move_tail(&bo->base.list, &p->base.list); assert(validate_partials(kgem)); } + assert(bo->mem); *ret = (char *)bo->mem + offset; return kgem_create_proxy(&bo->base, offset, size); } @@ -3300,6 +3414,7 @@ struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem, int stride; assert(width > 0 && height > 0); + assert(ret != NULL); stride = ALIGN(width, 2) * bpp >> 3; stride = ALIGN(stride, kgem->min_alignment); @@ -3307,8 +3422,12 @@ struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem, __FUNCTION__, width, height, bpp, stride)); bo = kgem_create_buffer(kgem, stride * ALIGN(height, 2), flags, ret); - if (bo == NULL) + if (bo == NULL) { + DBG(("%s: allocation failure for upload buffer\n", + __FUNCTION__)); return NULL; + } + assert(*ret != NULL); if (height & 1) { struct kgem_partial_bo *io = (struct kgem_partial_bo *)bo->proxy; @@ -3319,7 +3438,7 @@ struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem, */ if (io->used) io->used -= stride; - bo->size -= stride; + bo->size.bytes -= stride; bubble_sort_partial(kgem, io); } @@ -3357,8 +3476,9 @@ struct kgem_bo *kgem_upload_source_image(struct kgem *kgem, void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo) { struct kgem_partial_bo *bo; - uint32_t offset = _bo->delta, length = _bo->size; + uint32_t offset = _bo->delta, length = _bo->size.bytes; + assert(_bo->io); assert(_bo->exec == NULL); if (_bo->proxy) _bo = _bo->proxy; @@ -3461,7 +3581,7 @@ kgem_replace_bo(struct kgem *kgem, assert(src->tiling == I915_TILING_NONE); size = height * pitch; - size = PAGE_ALIGN(size); + size = PAGE_ALIGN(size) / PAGE_SIZE; dst = search_linear_cache(kgem, size, 0); if (dst == NULL) diff --git a/src/sna/kgem.h b/src/sna/kgem.h index 0dc67dac..2631e818 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -66,10 +66,16 @@ struct kgem_bo { uint32_t handle; uint32_t presumed_offset; uint32_t delta; - uint32_t size:28; - uint32_t bucket:4; -#define MAX_OBJECT_SIZE (1 << 28) - + union { + struct { + uint32_t count:27; +#define PAGE_SIZE 4096 + uint32_t bucket:5; +#define NUM_CACHE_BUCKETS 16 +#define MAX_CACHE_SIZE (1 << (NUM_CACHE_BUCKETS+12)) + } pages; + uint32_t bytes; + } size; uint32_t pitch : 18; /* max 128k */ uint32_t tiling : 2; uint32_t reusable : 1; @@ -100,8 +106,6 @@ enum { NUM_MAP_TYPES, }; -#define NUM_CACHE_BUCKETS 16 - struct kgem { int fd; int wedged; @@ -117,7 +121,10 @@ struct kgem { KGEM_BLT, } mode, ring; - struct list flushing, active[NUM_CACHE_BUCKETS][3], inactive[NUM_CACHE_BUCKETS]; + struct list flushing; + struct list large; + struct list active[NUM_CACHE_BUCKETS][3]; + struct list inactive[NUM_CACHE_BUCKETS]; struct list partial; struct list requests; struct kgem_request *next_request; @@ -154,7 +161,7 @@ struct kgem { uint32_t aperture_total, aperture_high, aperture_low, aperture_mappable; uint32_t aperture, aperture_fenced; uint32_t min_alignment; - uint32_t max_gpu_size, max_cpu_size; + uint32_t max_gpu_size, max_cpu_size, max_object_size; uint32_t partial_buffer_size; void (*context_switch)(struct kgem *kgem, int new_mode); @@ -194,8 +201,9 @@ struct kgem_bo *kgem_upload_source_image(struct kgem *kgem, int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int bpp); +bool kgem_can_create_2d(struct kgem *kgem, int width, int height, int depth); bool kgem_can_create_gpu(struct kgem *kgem, int width, int height, int bpp); -bool kgem_can_create_cpu(struct kgem *kgem, int width, int height, int depth); +bool kgem_can_create_cpu(struct kgem *kgem, int width, int height, int bpp); struct kgem_bo * kgem_replace_bo(struct kgem *kgem, @@ -354,11 +362,46 @@ Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo); +static inline int kgem_bo_size(struct kgem_bo *bo) +{ + assert(!(bo->proxy && bo->io)); + return PAGE_SIZE * bo->size.pages.count; +} + +static inline int kgem_buffer_size(struct kgem_bo *bo) +{ + assert(bo->proxy && bo->io); + return bo->size.bytes; +} + +static inline bool kgem_bo_can_blt(struct kgem *kgem, + struct kgem_bo *bo) +{ + int pitch; + + if (bo->tiling == I915_TILING_Y) { + DBG(("%s: can not blt to handle=%d, tiling=Y\n", + __FUNCTION__, bo->handle)); + return false; + } + + pitch = bo->pitch; + if (kgem->gen >= 40 && bo->tiling) + pitch /= 4; + if (pitch > MAXSHORT) { + DBG(("%s: can not blt to handle=%d, adjusted pitch=%d\n", + __FUNCTION__, pitch)); + return false; + } + + return true; +} + static inline bool kgem_bo_is_mappable(struct kgem *kgem, struct kgem_bo *bo) { DBG_HDR(("%s: domain=%d, offset: %d size: %d\n", - __FUNCTION__, bo->domain, bo->presumed_offset, bo->size)); + __FUNCTION__, bo->domain, bo->presumed_offset, kgem_bo_size(bo))); if (bo->domain == DOMAIN_GTT) return true; @@ -371,9 +414,9 @@ static inline bool kgem_bo_is_mappable(struct kgem *kgem, return false; if (!bo->presumed_offset) - return bo->size <= kgem->aperture_mappable / 4; + return kgem_bo_size(bo) <= kgem->aperture_mappable / 4; - return bo->presumed_offset + bo->size <= kgem->aperture_mappable; + return bo->presumed_offset + kgem_bo_size(bo) <= kgem->aperture_mappable; } static inline bool kgem_bo_mapped(struct kgem_bo *bo) diff --git a/src/sna/kgem_debug_gen5.c b/src/sna/kgem_debug_gen5.c index f21220f3..9e7360af 100644 --- a/src/sna/kgem_debug_gen5.c +++ b/src/sna/kgem_debug_gen5.c @@ -79,7 +79,7 @@ static void gen5_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) } else { bo = kgem_debug_get_bo_for_reloc_entry(kgem, reloc); base = kgem_bo_map__debug(kgem, bo); - size = bo->size; + size = kgem_bo_size(bo); } ptr = (char *)base + reloc->delta; diff --git a/src/sna/sna.h b/src/sna/sna.h index 5910daf2..d9ba7735 100644 --- a/src/sna/sna.h +++ b/src/sna/sna.h @@ -92,7 +92,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define DEBUG_NO_RENDER 0 #define DEBUG_NO_BLT 0 -#define DEBUG_NO_IO 0 #define DEBUG_FLUSH_BATCH 0 #define DEBUG_FLUSH_SYNC 0 @@ -647,7 +646,7 @@ void sna_read_boxes(struct sna *sna, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, PixmapPtr dst, int16_t dst_dx, int16_t dst_dy, const BoxRec *box, int n); -void sna_write_boxes(struct sna *sna, PixmapPtr dst, +bool sna_write_boxes(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, const void *src, int stride, int16_t src_dx, int16_t src_dy, const BoxRec *box, int n); @@ -657,10 +656,10 @@ void sna_write_boxes__xor(struct sna *sna, PixmapPtr dst, const BoxRec *box, int nbox, uint32_t and, uint32_t or); -struct kgem_bo *sna_replace(struct sna *sna, - PixmapPtr pixmap, - struct kgem_bo *bo, - const void *src, int stride); +bool sna_replace(struct sna *sna, + PixmapPtr pixmap, + struct kgem_bo **bo, + const void *src, int stride); struct kgem_bo *sna_replace__xor(struct sna *sna, PixmapPtr pixmap, struct kgem_bo *bo, diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index f2997d02..ce351131 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -243,9 +243,14 @@ sna_pixmap_alloc_cpu(struct sna *sna, if (priv->ptr) goto done; + DBG(("%s: pixmap=%ld\n", __FUNCTION__, pixmap->drawable.serialNumber)); assert(priv->stride); - if (sna->kgem.has_cpu_bo || !priv->gpu) { + if ((sna->kgem.has_cpu_bo || !priv->gpu) && + kgem_can_create_cpu(&sna->kgem, + pixmap->drawable.width, + pixmap->drawable.height, + pixmap->drawable.bitsPerPixel)) { DBG(("%s: allocating CPU buffer (%dx%d)\n", __FUNCTION__, pixmap->drawable.width, pixmap->drawable.height)); @@ -270,8 +275,11 @@ sna_pixmap_alloc_cpu(struct sna *sna, } } - if (priv->ptr == NULL) + if (priv->ptr == NULL) { + DBG(("%s: allocating ordinary memory for shadow pixels [%d bytes]\n", + __FUNCTION__, priv->stride * pixmap->drawable.height)); priv->ptr = malloc(priv->stride * pixmap->drawable.height); + } assert(priv->ptr); done: @@ -289,7 +297,7 @@ static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv) if (priv->cpu_bo) { DBG(("%s: discarding CPU buffer, handle=%d, size=%d\n", - __FUNCTION__, priv->cpu_bo->handle, priv->cpu_bo->size)); + __FUNCTION__, priv->cpu_bo->handle, kgem_bo_size(priv->cpu_bo))); kgem_bo_destroy(&sna->kgem, priv->cpu_bo); priv->cpu_bo = NULL; @@ -515,10 +523,10 @@ struct sna_pixmap *_sna_pixmap_attach(PixmapPtr pixmap) break; default: - if (!kgem_can_create_gpu(&sna->kgem, - pixmap->drawable.width, - pixmap->drawable.height, - pixmap->drawable.bitsPerPixel)) + if (!kgem_can_create_2d(&sna->kgem, + pixmap->drawable.width, + pixmap->drawable.height, + pixmap->drawable.depth)) return NULL; break; } @@ -669,8 +677,11 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen, DBG(("%s(%d, %d, %d, usage=%x)\n", __FUNCTION__, width, height, depth, usage)); - if (!kgem_can_create_cpu(&sna->kgem, width, height, depth)) + if (!kgem_can_create_2d(&sna->kgem, width, height, depth)) { + DBG(("%s: can not use GPU, just creating shadow\n", + __FUNCTION__)); return create_pixmap(sna, screen, width, height, depth, usage); + } if (!sna->have_render) return create_pixmap(sna, screen, @@ -704,6 +715,8 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen, pad = PixmapBytePad(width, depth); if (pad * height <= 4096) { + DBG(("%s: small buffer [%d], attaching to shadow pixmap\n", + __FUNCTION__, pad * height)); pixmap = create_pixmap(sna, screen, width, height, depth, usage); if (pixmap == NullPixmap) @@ -713,6 +726,9 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen, } else { struct sna_pixmap *priv; + DBG(("%s: creating GPU pixmap %dx%d, stride=%d\n", + __FUNCTION__, width, height, pad)); + pixmap = create_pixmap(sna, screen, 0, 0, depth, usage); if (pixmap == NullPixmap) return NullPixmap; @@ -1609,19 +1625,20 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, BoxPtr box, unsigned int flags) box->x1 <= 0 && box->y1 <= 0 && box->x2 >= pixmap->drawable.width && box->y2 >= pixmap->drawable.height) { - priv->gpu_bo = - sna_replace(sna, pixmap, - priv->gpu_bo, - pixmap->devPrivate.ptr, - pixmap->devKind); + ok = sna_replace(sna, pixmap, + &priv->gpu_bo, + pixmap->devPrivate.ptr, + pixmap->devKind); } else { - sna_write_boxes(sna, pixmap, - priv->gpu_bo, 0, 0, - pixmap->devPrivate.ptr, - pixmap->devKind, - 0, 0, - box, n); + ok = sna_write_boxes(sna, pixmap, + priv->gpu_bo, 0, 0, + pixmap->devPrivate.ptr, + pixmap->devKind, + 0, 0, + box, n); } + if (!ok) + return false; } } @@ -1637,12 +1654,14 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, BoxPtr box, unsigned int flags) pixmap, priv->gpu_bo, 0, 0, box, 1); if (!ok) - sna_write_boxes(sna, pixmap, - priv->gpu_bo, 0, 0, - pixmap->devPrivate.ptr, - pixmap->devKind, - 0, 0, - box, 1); + ok = sna_write_boxes(sna, pixmap, + priv->gpu_bo, 0, 0, + pixmap->devPrivate.ptr, + pixmap->devKind, + 0, 0, + box, 1); + if (!ok) + return false; sna_damage_subtract(&priv->cpu_damage, &r); priv->undamaged = true; @@ -1658,12 +1677,14 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, BoxPtr box, unsigned int flags) pixmap, priv->gpu_bo, 0, 0, box, n); if (!ok) - sna_write_boxes(sna, pixmap, - priv->gpu_bo, 0, 0, - pixmap->devPrivate.ptr, - pixmap->devKind, - 0, 0, - box, n); + ok = sna_write_boxes(sna, pixmap, + priv->gpu_bo, 0, 0, + pixmap->devPrivate.ptr, + pixmap->devKind, + 0, 0, + box, n); + if (!ok) + return false; sna_damage_subtract(&priv->cpu_damage, &r); priv->undamaged = true; @@ -1671,7 +1692,7 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, BoxPtr box, unsigned int flags) } done: - if (!priv->pinned) + if (!priv->pinned && priv->gpu) list_move(&priv->inactive, &sna->active_pixmaps); priv->clear = false; return true; @@ -1811,7 +1832,7 @@ done: use_gpu_bo: priv->clear = false; - if (!priv->pinned) + if (!priv->pinned && priv->gpu) list_move(&priv->inactive, &to_sna_from_pixmap(pixmap)->active_pixmaps); *damage = NULL; @@ -1978,6 +1999,17 @@ sna_pixmap_force_to_gpu(PixmapPtr pixmap, unsigned flags) if (!sna_pixmap_move_to_gpu(pixmap, flags)) return NULL; + /* For large bo, try to keep only a single copy around */ + if (!priv->gpu && priv->ptr) { + sna_damage_all(&priv->gpu_damage, + pixmap->drawable.width, + pixmap->drawable.height); + sna_damage_destroy(&priv->cpu_damage); + priv->undamaged = false; + list_del(&priv->list); + sna_pixmap_free_cpu(to_sna_from_pixmap(pixmap), priv); + } + return priv; } @@ -2070,19 +2102,20 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) if (n == 1 && !priv->pinned && (box->x2 - box->x1) >= pixmap->drawable.width && (box->y2 - box->y1) >= pixmap->drawable.height) { - priv->gpu_bo = - sna_replace(sna, pixmap, - priv->gpu_bo, - pixmap->devPrivate.ptr, - pixmap->devKind); + ok = sna_replace(sna, pixmap, + &priv->gpu_bo, + pixmap->devPrivate.ptr, + pixmap->devKind); } else { - sna_write_boxes(sna, pixmap, + ok = sna_write_boxes(sna, pixmap, priv->gpu_bo, 0, 0, pixmap->devPrivate.ptr, pixmap->devKind, 0, 0, box, n); } + if (!ok) + return NULL; } } @@ -2098,7 +2131,7 @@ done: if (DAMAGE_IS_ALL(priv->gpu_damage)) priv->undamaged = false; active: - if (!priv->pinned) + if (!priv->pinned && priv->gpu) list_move(&priv->inactive, &sna->active_pixmaps); priv->clear = false; return priv; @@ -2321,11 +2354,8 @@ sna_put_image_upload_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, !priv->pinned && nbox == 1 && box->x1 <= 0 && box->y1 <= 0 && box->x2 >= pixmap->drawable.width && - box->y2 >= pixmap->drawable.height) { - priv->gpu_bo = - sna_replace(sna, pixmap, priv->gpu_bo, bits, stride); - return TRUE; - } + box->y2 >= pixmap->drawable.height) + return sna_replace(sna, pixmap, &priv->gpu_bo, bits, stride); get_drawable_deltas(drawable, pixmap, &dx, &dy); x += dx + drawable->x; @@ -2341,15 +2371,13 @@ sna_put_image_upload_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, kgem_bo_destroy(&sna->kgem, src_bo); } - if (!ok && gc->alu == GXcopy) { - sna_write_boxes(sna, pixmap, - priv->gpu_bo, 0, 0, - bits, - stride, - -x, -y, - box, nbox); - ok = TRUE; - } + if (!ok && gc->alu == GXcopy) + ok = sna_write_boxes(sna, pixmap, + priv->gpu_bo, 0, 0, + bits, + stride, + -x, -y, + box, nbox); return ok; } @@ -3213,7 +3241,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, } } else { dst_priv->clear = false; - if (!dst_priv->pinned) + if (!dst_priv->pinned && dst_priv->gpu) list_move(&dst_priv->inactive, &sna->active_pixmaps); } @@ -3400,10 +3428,10 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, assert(src_dy + box->y1 + dst_pixmap->drawable.height <= src_pixmap->drawable.height); assert(src_dx + box->x1 + dst_pixmap->drawable.width <= src_pixmap->drawable.width); - dst_priv->gpu_bo = - sna_replace(sna, dst_pixmap, - dst_priv->gpu_bo, - bits, stride); + if (!sna_replace(sna, dst_pixmap, + &dst_priv->gpu_bo, + bits, stride)) + goto fallback; if (!DAMAGE_IS_ALL(dst_priv->gpu_damage)) { sna_damage_destroy(&dst_priv->cpu_damage); @@ -3416,12 +3444,13 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, } else { DBG(("%s: dst is on the GPU, src is on the CPU, uploading into dst\n", __FUNCTION__)); - sna_write_boxes(sna, dst_pixmap, - dst_priv->gpu_bo, dst_dx, dst_dy, - src_pixmap->devPrivate.ptr, - src_pixmap->devKind, - src_dx, src_dy, - box, n); + if (!sna_write_boxes(sna, dst_pixmap, + dst_priv->gpu_bo, dst_dx, dst_dy, + src_pixmap->devPrivate.ptr, + src_pixmap->devKind, + src_dx, src_dy, + box, n)) + goto fallback; if (!DAMAGE_IS_ALL(dst_priv->gpu_damage)) { RegionTranslate(®ion, dst_dx, dst_dy); @@ -11502,7 +11531,7 @@ static void sna_accel_inactive(struct sna *sna) count = bytes = 0; list_for_each_entry(priv, &sna->inactive_clock[1], inactive) if (!priv->pinned) - count++, bytes += priv->gpu_bo->size; + count++, bytes += kgem_bo_size(priv->gpu_bo); DBG(("%s: trimming %d inactive GPU buffers, %d bytes\n", __FUNCTION__, count, bytes)); @@ -11528,6 +11557,9 @@ static void sna_accel_inactive(struct sna *sna) priv = list_first_entry(&sna->inactive_clock[1], struct sna_pixmap, inactive); + assert(priv->gpu); + assert(priv->gpu_bo); + /* XXX Rather than discarding the GPU buffer here, we * could mark it purgeable and allow the shrinker to * reap its storage only under memory pressure. diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c index 535628c0..7efbcf90 100644 --- a/src/sna/sna_blt.c +++ b/src/sna/sna_blt.c @@ -235,7 +235,7 @@ inline static void sna_blt_fill_one(struct sna *sna, assert(x >= 0); assert(y >= 0); - assert((y+height) * blt->bo[0]->pitch <= blt->bo[0]->size); + assert((y+height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0])); if (!kgem_check_batch(kgem, 3)) sna_blt_fill_begin(sna, blt); @@ -358,10 +358,10 @@ static void sna_blt_alpha_fixup_one(struct sna *sna, assert(src_x >= 0); assert(src_y >= 0); - assert((src_y + height) * blt->bo[0]->pitch <= blt->bo[0]->size); + assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0])); assert(dst_x >= 0); assert(dst_y >= 0); - assert((dst_y + height) * blt->bo[1]->pitch <= blt->bo[1]->size); + assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1])); assert(width > 0); assert(height > 0); @@ -409,10 +409,10 @@ static void sna_blt_copy_one(struct sna *sna, assert(src_x >= 0); assert(src_y >= 0); - assert((src_y + height) * blt->bo[0]->pitch <= blt->bo[0]->size); + assert((src_y + height) * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0])); assert(dst_x >= 0); assert(dst_y >= 0); - assert((dst_y + height) * blt->bo[1]->pitch <= blt->bo[1]->size); + assert((dst_y + height) * blt->bo[1]->pitch <= kgem_bo_size(blt->bo[1])); assert(width > 0); assert(height > 0); @@ -787,7 +787,7 @@ inline static void _sna_blt_fill_box(struct sna *sna, assert(box->x1 >= 0); assert(box->y1 >= 0); - assert(box->y2 * blt->bo[0]->pitch <= blt->bo[0]->size); + assert(box->y2 * blt->bo[0]->pitch <= kgem_bo_size(blt->bo[0])); if (!kgem_check_batch(kgem, 3)) sna_blt_fill_begin(sna, blt); @@ -1106,7 +1106,7 @@ prepare_blt_copy(struct sna *sna, PixmapPtr src = op->u.blt.src_pixmap; struct sna_pixmap *priv = sna_pixmap(src); - if (priv->gpu_bo->tiling == I915_TILING_Y) + if (!kgem_bo_can_blt(&sna->kgem, priv->gpu_bo)) return FALSE; if (!kgem_check_bo_fenced(&sna->kgem, priv->gpu_bo, NULL)) { @@ -1176,9 +1176,8 @@ blt_put_composite(struct sna *sna, data += (src_x - dst_x) * bpp / 8; data += (src_y - dst_y) * pitch; - dst_priv->gpu_bo = - sna_replace(sna, op->dst.pixmap, dst_priv->gpu_bo, - data, pitch); + sna_replace(sna, op->dst.pixmap, &dst_priv->gpu_bo, + data, pitch); } else { BoxRec box; @@ -1215,9 +1214,8 @@ fastcall static void blt_put_composite_box(struct sna *sna, data += (box->y1 + op->u.blt.sy) * pitch; data += (box->x1 + op->u.blt.sx) * bpp; - dst_priv->gpu_bo = - sna_replace(sna, op->dst.pixmap, op->dst.bo, - data, pitch); + sna_replace(sna, op->dst.pixmap, &dst_priv->gpu_bo, + data, pitch); } else { sna_write_boxes(sna, op->dst.pixmap, op->dst.bo, op->dst.x, op->dst.y, @@ -1250,9 +1248,8 @@ static void blt_put_composite_boxes(struct sna *sna, data += (box->y1 + op->u.blt.sy) * pitch; data += (box->x1 + op->u.blt.sx) * bpp; - dst_priv->gpu_bo = - sna_replace(sna, op->dst.pixmap, op->dst.bo, - data, pitch); + sna_replace(sna, op->dst.pixmap, &dst_priv->gpu_bo, + data, pitch); } else { sna_write_boxes(sna, op->dst.pixmap, op->dst.bo, op->dst.x, op->dst.y, @@ -1573,9 +1570,13 @@ sna_blt_composite(struct sna *sna, tmp->dst.pixmap = get_drawable_pixmap(dst->pDrawable); priv = sna_pixmap_move_to_gpu(tmp->dst.pixmap, MOVE_WRITE | MOVE_READ); - if (priv == NULL || priv->gpu_bo->tiling == I915_TILING_Y) { - DBG(("%s: dst not on the gpu or using Y-tiling\n", - __FUNCTION__)); + if (priv == NULL) { + DBG(("%s: dst not attached\n", __FUNCTION__)); + return FALSE; + } + if (!kgem_bo_can_blt(&sna->kgem, priv->gpu_bo)) { + DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n", + __FUNCTION__, priv->gpu_bo->tiling, priv->gpu_bo->pitch)); return FALSE; } @@ -1747,7 +1748,7 @@ bool sna_blt_fill(struct sna *sna, uint8_t alu, DBG(("%s(alu=%d, pixel=%x, bpp=%d)\n", __FUNCTION__, alu, pixel, bpp)); - if (bo->tiling == I915_TILING_Y) { + if (!kgem_bo_can_blt(&sna->kgem, bo)) { DBG(("%s: rejected due to incompatible Y-tiling\n", __FUNCTION__)); return FALSE; @@ -1797,10 +1798,10 @@ bool sna_blt_copy(struct sna *sna, uint8_t alu, return FALSE; #endif - if (src->tiling == I915_TILING_Y) + if (!kgem_bo_can_blt(&sna->kgem, src)) return FALSE; - if (dst->tiling == I915_TILING_Y) + if (!kgem_bo_can_blt(&sna->kgem, dst)) return FALSE; if (!sna_blt_copy_init(sna, &op->base.u.blt, @@ -1926,7 +1927,7 @@ Bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, DBG(("%s (%d, %08x, %d) x %d\n", __FUNCTION__, bpp, pixel, alu, nbox)); - if (bo->tiling == I915_TILING_Y) { + if (!kgem_bo_can_blt(kgem, bo)) { DBG(("%s: fallback -- dst uses Y-tiling\n", __FUNCTION__)); return FALSE; } @@ -2020,7 +2021,7 @@ Bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, assert(box->x1 >= 0); assert(box->y1 >= 0); - assert(box->y2 * bo->pitch <= bo->size); + assert(box->y2 * bo->pitch <= kgem_bo_size(bo)); b = kgem->batch + kgem->nbatch; kgem->nbatch += 3; @@ -2075,8 +2076,13 @@ Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, src_bo->tiling, dst_bo->tiling, src_bo->pitch, dst_bo->pitch)); - if (src_bo->tiling == I915_TILING_Y || dst_bo->tiling == I915_TILING_Y) + if (!kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) { + DBG(("%s: cannot blt to src? %d or dst? %d\n", + __FUNCTION__, + kgem_bo_can_blt(kgem, src_bo), + kgem_bo_can_blt(kgem, dst_bo))); return FALSE; + } cmd = XY_SRC_COPY_BLT_CMD; if (bpp == 32) @@ -2087,7 +2093,7 @@ Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, cmd |= BLT_SRC_TILED; src_pitch >>= 2; } - assert(src_pitch < MAXSHORT); + assert(src_pitch <= MAXSHORT); br13 = dst_bo->pitch; if (kgem->gen >= 40 && dst_bo->tiling) { diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c index f3ca212c..14a79013 100644 --- a/src/sna/sna_io.c +++ b/src/sna/sna_io.c @@ -44,6 +44,27 @@ /* XXX Need to avoid using GTT fenced access for I915_TILING_Y on 855GM */ +static Bool +box_intersect(BoxPtr a, const BoxRec *b) +{ + if (a->x1 < b->x1) + a->x1 = b->x1; + if (a->x2 > b->x2) + a->x2 = b->x2; + if (a->y1 < b->y1) + a->y1 = b->y1; + if (a->y2 > b->y2) + a->y2 = b->y2; + + return a->x1 < a->x2 && a->y1 < a->y2; +} + +static inline bool must_tile(struct sna *sna, int width, int height) +{ + return (width > sna->render.max_3d_size || + height > sna->render.max_3d_size); +} + static void read_boxes_inplace(struct kgem *kgem, struct kgem_bo *bo, int16_t src_dx, int16_t src_dy, PixmapPtr pixmap, int16_t dst_dx, int16_t dst_dy, @@ -105,13 +126,13 @@ void sna_read_boxes(struct sna *sna, for (n = 0; n < nbox; n++) { if (box[n].x1 + src_dx < 0 || box[n].y1 + src_dy < 0 || (box[n].x2 + src_dx) * dst->drawable.bitsPerPixel/8 > src_bo->pitch || - (box[n].y2 + src_dy) * src_bo->pitch > src_bo->size) + (box[n].y2 + src_dy) * src_bo->pitch > kgem_bo_size(src_bo)) { FatalError("source out-of-bounds box[%d]=(%d, %d), (%d, %d) + (%d, %d), pitch=%d, size=%d\n", n, box[n].x1, box[n].y1, box[n].x2, box[n].y2, src_dx, src_dy, - src_bo->pitch, src_bo->size); + src_bo->pitch, kgem_bo_size(src_bo)); } } #endif @@ -132,7 +153,6 @@ fallback: return; } - /* Is it worth detiling? */ extents = box[0]; for (n = 1; n < nbox; n++) { if (box[n].x1 < extents.x1) @@ -145,11 +165,16 @@ fallback: if (box[n].y2 > extents.y2) extents.y2 = box[n].y2; } - if ((extents.y2 - extents.y1) * src_bo->pitch < 4096) - goto fallback; + if (kgem_bo_is_mappable(kgem, src_bo)) { + /* Is it worth detiling? */ + if ((extents.y2 - extents.y1) * src_bo->pitch < 4096) + goto fallback; + } /* Try to avoid switching rings... */ - if (src_bo->tiling == I915_TILING_Y || kgem->ring == KGEM_RENDER) { + if (kgem->ring == KGEM_RENDER || + !kgem_bo_can_blt(kgem, src_bo) || + must_tile(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) { PixmapRec tmp; tmp.drawable.width = extents.x2 - extents.x1; @@ -161,38 +186,124 @@ fallback: assert(tmp.drawable.width); assert(tmp.drawable.height); - dst_bo = kgem_create_buffer_2d(kgem, - tmp.drawable.width, - tmp.drawable.height, - tmp.drawable.bitsPerPixel, - KGEM_BUFFER_LAST, - &ptr); - if (!dst_bo) - goto fallback; + if (must_tile(sna, tmp.drawable.width, tmp.drawable.height)) { + BoxRec tile, stack[64], *clipped, *c; + int step; + + if (n > ARRAY_SIZE(stack)) { + clipped = malloc(sizeof(BoxRec) * n); + if (clipped == NULL) + goto fallback; + } else + clipped = stack; + + step = MIN(sna->render.max_3d_size, + 8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel); + DBG(("%s: tiling download, using %dx%d tiles\n", + __FUNCTION__, step, step)); + + for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) { + tile.y2 = tile.y1 + step; + if (tile.y2 > extents.y2) + tile.y2 = extents.y2; + + for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) { + tile.x2 = tile.x1 + step; + if (tile.x2 > extents.x2) + tile.x2 = extents.x2; + + tmp.drawable.width = tile.x2 - tile.x1; + tmp.drawable.height = tile.y2 - tile.y1; + + c = clipped; + for (n = 0; n < nbox; n++) { + *c = box[n]; + if (!box_intersect(c, &tile)) + continue; + + DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, + c->x1, c->y1, + c->x2, c->y2, + src_dx, src_dy, + c->x1 - tile.x1, + c->y1 - tile.y1)); + c++; + } + if (c == clipped) + continue; + + dst_bo = kgem_create_buffer_2d(kgem, + tmp.drawable.width, + tmp.drawable.height, + tmp.drawable.bitsPerPixel, + KGEM_BUFFER_LAST, + &ptr); + if (!dst_bo) + goto fallback; + + if (!sna->render.copy_boxes(sna, GXcopy, + dst, src_bo, src_dx, src_dy, + &tmp, dst_bo, -tile.x1, -tile.y1, + clipped, c-clipped)) { + kgem_bo_destroy(&sna->kgem, dst_bo); + goto fallback; + } + + kgem_bo_submit(&sna->kgem, dst_bo); + kgem_buffer_read_sync(kgem, dst_bo); + + while (c-- != clipped) { + memcpy_blt(ptr, dst->devPrivate.ptr, tmp.drawable.bitsPerPixel, + dst_bo->pitch, dst->devKind, + c->x1 - tile.x1, + c->y1 - tile.y1, + c->x1 + dst_dx, + c->y1 + dst_dy, + c->x2 - c->x1, + c->y2 - c->y1); + } + + kgem_bo_destroy(&sna->kgem, dst_bo); + } + } - if (!sna->render.copy_boxes(sna, GXcopy, - dst, src_bo, src_dx, src_dy, - &tmp, dst_bo, -extents.x1, -extents.y1, - box, nbox)) { - kgem_bo_destroy(&sna->kgem, dst_bo); - goto fallback; - } + if (clipped != stack) + free(clipped); + } else { + dst_bo = kgem_create_buffer_2d(kgem, + tmp.drawable.width, + tmp.drawable.height, + tmp.drawable.bitsPerPixel, + KGEM_BUFFER_LAST, + &ptr); + if (!dst_bo) + goto fallback; + + if (!sna->render.copy_boxes(sna, GXcopy, + dst, src_bo, src_dx, src_dy, + &tmp, dst_bo, -extents.x1, -extents.y1, + box, nbox)) { + kgem_bo_destroy(&sna->kgem, dst_bo); + goto fallback; + } - kgem_bo_submit(&sna->kgem, dst_bo); - kgem_buffer_read_sync(kgem, dst_bo); + kgem_bo_submit(&sna->kgem, dst_bo); + kgem_buffer_read_sync(kgem, dst_bo); + + for (n = 0; n < nbox; n++) { + memcpy_blt(ptr, dst->devPrivate.ptr, tmp.drawable.bitsPerPixel, + dst_bo->pitch, dst->devKind, + box[n].x1 - extents.x1, + box[n].y1 - extents.y1, + box[n].x1 + dst_dx, + box[n].y1 + dst_dy, + box[n].x2 - box[n].x1, + box[n].y2 - box[n].y1); + } - for (n = 0; n < nbox; n++) { - memcpy_blt(ptr, dst->devPrivate.ptr, tmp.drawable.bitsPerPixel, - dst_bo->pitch, dst->devKind, - box[n].x1 - extents.x1, - box[n].y1 - extents.y1, - box[n].x1 + dst_dx, - box[n].y1 + dst_dy, - box[n].x2 - box[n].x1, - box[n].y2 - box[n].y1); + kgem_bo_destroy(&sna->kgem, dst_bo); } - - kgem_bo_destroy(&sna->kgem, dst_bo); return; } @@ -270,7 +381,7 @@ fallback: assert(tmp_box[n].x1 + src_dx >= 0); assert((tmp_box[n].x2 + src_dx) * dst->drawable.bitsPerPixel/8 <= src_bo->pitch); assert(tmp_box[n].y1 + src_dy >= 0); - assert((tmp_box[n].y2 + src_dy) * src_bo->pitch <= src_bo->size); + assert((tmp_box[n].y2 + src_dy) * src_bo->pitch <= kgem_bo_size(src_bo)); b[0] = cmd; b[1] = br13 | pitch; @@ -299,7 +410,7 @@ fallback: _kgem_set_mode(kgem, KGEM_BLT); tmp_box += nbox_this_time; } while (1); - assert(offset == dst_bo->size); + assert(offset == kgem_buffer_size(dst_bo)); kgem_buffer_read_sync(kgem, dst_bo); @@ -331,12 +442,12 @@ fallback: src += pitch * height; } while (--nbox); - assert(src - (char *)ptr == dst_bo->size); + assert(src - (char *)ptr == kgem_buffer_size(dst_bo)); kgem_bo_destroy(kgem, dst_bo); sna->blt_state.fill_bo = 0; } -static void write_boxes_inplace(struct kgem *kgem, +static bool write_boxes_inplace(struct kgem *kgem, const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy, struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy, const BoxRec *box, int n) @@ -346,11 +457,14 @@ static void write_boxes_inplace(struct kgem *kgem, DBG(("%s x %d, handle=%d, tiling=%d\n", __FUNCTION__, n, bo->handle, bo->tiling)); + if (!kgem_bo_is_mappable(kgem, bo)) + return false; + kgem_bo_submit(kgem, bo); dst = kgem_bo_map(kgem, bo); if (dst == NULL) - return; + return false; assert(dst != src); @@ -364,7 +478,7 @@ static void write_boxes_inplace(struct kgem *kgem, assert(box->x1 + dst_dx >= 0); assert((box->x2 + dst_dx)*bpp <= 8*bo->pitch); assert(box->y1 + dst_dy >= 0); - assert((box->y2 + dst_dy)*bo->pitch <= bo->size); + assert((box->y2 + dst_dy)*bo->pitch <= kgem_bo_size(bo)); assert(box->x1 + src_dx >= 0); assert((box->x2 + src_dx)*bpp <= 8*stride); @@ -377,6 +491,7 @@ static void write_boxes_inplace(struct kgem *kgem, box->x2 - box->x1, box->y2 - box->y1); box++; } while (--n); + return true; } static bool upload_inplace(struct kgem *kgem, @@ -384,9 +499,6 @@ static bool upload_inplace(struct kgem *kgem, const BoxRec *box, int n, int bpp) { - if (DEBUG_NO_IO) - return kgem_bo_is_mappable(kgem, bo); - /* If we are writing through the GTT, check first if we might be * able to almagamate a series of small writes into a single * operation. @@ -404,13 +516,14 @@ static bool upload_inplace(struct kgem *kgem, return !kgem_bo_map_will_stall(kgem, bo); } -void sna_write_boxes(struct sna *sna, PixmapPtr dst, +bool sna_write_boxes(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, const void *src, int stride, int16_t src_dx, int16_t src_dy, const BoxRec *box, int nbox) { struct kgem *kgem = &sna->kgem; struct kgem_bo *src_bo; + BoxRec extents; void *ptr; int offset; int n, cmd, br13; @@ -419,30 +532,30 @@ void sna_write_boxes(struct sna *sna, PixmapPtr dst, if (upload_inplace(kgem, dst_bo, box, nbox, dst->drawable.bitsPerPixel)) { fallback: - write_boxes_inplace(kgem, - src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy, - dst_bo, dst_dx, dst_dy, - box, nbox); - return; + return write_boxes_inplace(kgem, + src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + box, nbox); } - /* Try to avoid switching rings... */ - if (dst_bo->tiling == I915_TILING_Y || kgem->ring == KGEM_RENDER) { - PixmapRec tmp; - BoxRec extents; + extents = box[0]; + for (n = 1; n < nbox; n++) { + if (box[n].x1 < extents.x1) + extents.x1 = box[n].x1; + if (box[n].x2 > extents.x2) + extents.x2 = box[n].x2; - extents = box[0]; - for (n = 1; n < nbox; n++) { - if (box[n].x1 < extents.x1) - extents.x1 = box[n].x1; - if (box[n].x2 > extents.x2) - extents.x2 = box[n].x2; + if (box[n].y1 < extents.y1) + extents.y1 = box[n].y1; + if (box[n].y2 > extents.y2) + extents.y2 = box[n].y2; + } - if (box[n].y1 < extents.y1) - extents.y1 = box[n].y1; - if (box[n].y2 > extents.y2) - extents.y2 = box[n].y2; - } + /* Try to avoid switching rings... */ + if (kgem->ring == KGEM_RENDER || + !kgem_bo_can_blt(kgem, dst_bo) || + must_tile(sna, extents.x2 - extents.x1, extents.y2 - extents.y1)) { + PixmapRec tmp; tmp.drawable.width = extents.x2 - extents.x1; tmp.drawable.height = extents.y2 - extents.y1; @@ -453,37 +566,130 @@ fallback: assert(tmp.drawable.width); assert(tmp.drawable.height); - src_bo = kgem_create_buffer_2d(kgem, - tmp.drawable.width, - tmp.drawable.height, - tmp.drawable.bitsPerPixel, - KGEM_BUFFER_WRITE_INPLACE, - &ptr); - if (!src_bo) - goto fallback; + DBG(("%s: upload (%d, %d)x(%d, %d), max %dx%d\n", + __FUNCTION__, + extents.x1, extents.y1, + tmp.drawable.width, tmp.drawable.height, + sna->render.max_3d_size, sna->render.max_3d_size)); + if (must_tile(sna, tmp.drawable.width, tmp.drawable.height)) { + BoxRec tile, stack[64], *clipped, *c; + int step; + + step = MIN(sna->render.max_3d_size, + 8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel); + DBG(("%s: tiling upload, using %dx%d tiles\n", + __FUNCTION__, step, step)); + + if (n > ARRAY_SIZE(stack)) { + clipped = malloc(sizeof(BoxRec) * n); + if (clipped == NULL) + goto fallback; + } else + clipped = stack; + + for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) { + tile.y2 = tile.y1 + step; + if (tile.y2 > extents.y2) + tile.y2 = extents.y2; + + for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) { + tile.x2 = tile.x1 + step; + if (tile.x2 > extents.x2) + tile.x2 = extents.x2; + + tmp.drawable.width = tile.x2 - tile.x1; + tmp.drawable.height = tile.y2 - tile.y1; + + src_bo = kgem_create_buffer_2d(kgem, + tmp.drawable.width, + tmp.drawable.height, + tmp.drawable.bitsPerPixel, + KGEM_BUFFER_WRITE_INPLACE, + &ptr); + if (!src_bo) + goto fallback; + + c = clipped; + for (n = 0; n < nbox; n++) { + *c = box[n]; + if (!box_intersect(c, &tile)) + continue; + + DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, + c->x1, c->y1, + c->x2, c->y2, + src_dx, src_dy, + c->x1 - tile.x1, + c->y1 - tile.y1)); + memcpy_blt(src, ptr, tmp.drawable.bitsPerPixel, + stride, src_bo->pitch, + c->x1 + src_dx, + c->y1 + src_dy, + c->x1 - tile.x1, + c->y1 - tile.y1, + c->x2 - c->x1, + c->y2 - c->y1); + c++; + } + + if (c != clipped) + n = sna->render.copy_boxes(sna, GXcopy, + &tmp, src_bo, -tile.x1, -tile.y1, + dst, dst_bo, dst_dx, dst_dy, + clipped, c - clipped); + else + n = 1; + + kgem_bo_destroy(&sna->kgem, src_bo); + + if (!n) + goto fallback; + } + } - for (n = 0; n < nbox; n++) { - memcpy_blt(src, ptr, tmp.drawable.bitsPerPixel, - stride, src_bo->pitch, - box[n].x1 + src_dx, - box[n].y1 + src_dy, - box[n].x1 - extents.x1, - box[n].y1 - extents.y1, - box[n].x2 - box[n].x1, - box[n].y2 - box[n].y1); - } + if (clipped != stack) + free(clipped); + } else { + src_bo = kgem_create_buffer_2d(kgem, + tmp.drawable.width, + tmp.drawable.height, + tmp.drawable.bitsPerPixel, + KGEM_BUFFER_WRITE_INPLACE, + &ptr); + if (!src_bo) + goto fallback; + + for (n = 0; n < nbox; n++) { + DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, + box[n].x1, box[n].y1, + box[n].x2, box[n].y2, + src_dx, src_dy, + box[n].x1 - extents.x1, + box[n].y1 - extents.y1)); + memcpy_blt(src, ptr, tmp.drawable.bitsPerPixel, + stride, src_bo->pitch, + box[n].x1 + src_dx, + box[n].y1 + src_dy, + box[n].x1 - extents.x1, + box[n].y1 - extents.y1, + box[n].x2 - box[n].x1, + box[n].y2 - box[n].y1); + } - n = sna->render.copy_boxes(sna, GXcopy, - &tmp, src_bo, -extents.x1, -extents.y1, - dst, dst_bo, dst_dx, dst_dy, - box, nbox); + n = sna->render.copy_boxes(sna, GXcopy, + &tmp, src_bo, -extents.x1, -extents.y1, + dst, dst_bo, dst_dx, dst_dy, + box, nbox); - kgem_bo_destroy(&sna->kgem, src_bo); + kgem_bo_destroy(&sna->kgem, src_bo); - if (!n) - goto fallback; + if (!n) + goto fallback; + } - return; + return true; } cmd = XY_SRC_COPY_BLT_CMD; @@ -586,7 +792,7 @@ fallback: box++; offset += pitch * height; } while (--nbox_this_time); - assert(offset == src_bo->size); + assert(offset == kgem_buffer_size(src_bo)); if (nbox) { _kgem_submit(kgem); @@ -597,6 +803,7 @@ fallback: } while (nbox); sna->blt_state.fill_bo = 0; + return true; } static void @@ -823,7 +1030,7 @@ fallback: box++; offset += pitch * height; } while (--nbox_this_time); - assert(offset == src_bo->size); + assert(offset == kgem_buffer_size(src_bo)); if (nbox) { _kgem_submit(kgem); @@ -951,11 +1158,12 @@ indirect_replace(struct sna *sna, return ret; } -struct kgem_bo *sna_replace(struct sna *sna, - PixmapPtr pixmap, - struct kgem_bo *bo, - const void *src, int stride) +bool sna_replace(struct sna *sna, + PixmapPtr pixmap, + struct kgem_bo **_bo, + const void *src, int stride) { + struct kgem_bo *bo = *_bo; struct kgem *kgem = &sna->kgem; void *dst; @@ -968,7 +1176,7 @@ struct kgem_bo *sna_replace(struct sna *sna, if ((!kgem_bo_mapped(bo) || bo->rq) && indirect_replace(sna, pixmap, bo, src, stride)) - return bo; + return true; if (kgem_bo_is_busy(bo)) { struct kgem_bo *new_bo; @@ -979,26 +1187,26 @@ struct kgem_bo *sna_replace(struct sna *sna, pixmap->drawable.bitsPerPixel, bo->tiling, CREATE_GTT_MAP | CREATE_INACTIVE); - if (new_bo) { - kgem_bo_destroy(kgem, bo); + if (new_bo) bo = new_bo; - } } if (bo->tiling == I915_TILING_NONE && bo->pitch == stride) { - kgem_bo_write(kgem, bo, src, - (pixmap->drawable.height-1)*stride + pixmap->drawable.width*pixmap->drawable.bitsPerPixel/8); + if (!kgem_bo_write(kgem, bo, src, + (pixmap->drawable.height-1)*stride + pixmap->drawable.width*pixmap->drawable.bitsPerPixel/8)) + goto err; } else { if (kgem_bo_is_mappable(kgem, bo)) { dst = kgem_bo_map(kgem, bo); - if (dst) { - memcpy_blt(src, dst, pixmap->drawable.bitsPerPixel, - stride, bo->pitch, - 0, 0, - 0, 0, - pixmap->drawable.width, - pixmap->drawable.height); - } + if (!dst) + goto err; + + memcpy_blt(src, dst, pixmap->drawable.bitsPerPixel, + stride, bo->pitch, + 0, 0, + 0, 0, + pixmap->drawable.width, + pixmap->drawable.height); } else { BoxRec box; @@ -1006,14 +1214,23 @@ struct kgem_bo *sna_replace(struct sna *sna, box.x2 = pixmap->drawable.width; box.y2 = pixmap->drawable.height; - sna_write_boxes(sna, pixmap, - bo, 0, 0, - src, stride, 0, 0, - &box, 1); + if (!sna_write_boxes(sna, pixmap, + bo, 0, 0, + src, stride, 0, 0, + &box, 1)) + goto err; } } - return bo; + if (bo != *_bo) + kgem_bo_destroy(kgem, *_bo); + *_bo = bo; + return true; + +err: + if (bo != *_bo) + kgem_bo_destroy(kgem, bo); + return false; } struct kgem_bo *sna_replace__xor(struct sna *sna, diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c index f9151e08..7077f363 100644 --- a/src/sna/sna_render.c +++ b/src/sna/sna_render.c @@ -696,6 +696,11 @@ static int sna_render_picture_downsample(struct sna *sna, DBG(("%s: creating temporary GPU bo %dx%d\n", __FUNCTION__, width, height)); + if (!sna_pixmap_force_to_gpu(pixmap, MOVE_READ)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, ow, oh, + dst_x, dst_y); + tmp = screen->CreatePixmap(screen, width, height, pixmap->drawable.depth, @@ -1306,9 +1311,6 @@ do_fixup: return 0; } - /* XXX Convolution filter? */ - memset(ptr, 0, channel->bo->size); - /* Composite in the original format to preserve idiosyncracies */ if (picture->format == channel->pict_format) dst = pixman_image_create_bits(picture->format, @@ -1354,7 +1356,7 @@ do_fixup: w, h); pixman_image_unref(src); } else { - memset(ptr, 0, channel->bo->size); + memset(ptr, 0, kgem_buffer_size(channel->bo)); dst = src; } } @@ -1528,7 +1530,7 @@ sna_render_composite_redirect(struct sna *sna, if (op->dst.pixmap->drawable.width <= sna->render.max_3d_size) { int y1, y2; - assert(op->dst.pixmap.drawable.height > sna->render.max_3d_size); + assert(op->dst.pixmap->drawable.height > sna->render.max_3d_size); y1 = y + op->dst.y; y2 = y1 + height; y1 &= y1 & (64 - 1); diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c index 7b759a7e..cec04733 100644 --- a/src/sna/sna_video.c +++ b/src/sna/sna_video.c @@ -100,7 +100,7 @@ sna_video_buffer(struct sna *sna, struct sna_video_frame *frame) { /* Free the current buffer if we're going to have to reallocate */ - if (video->buf && video->buf->size < frame->size) + if (video->buf && kgem_bo_size(video->buf) < frame->size) sna_video_free_buffers(sna, video); if (video->buf == NULL) diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c index d99f8847..1aaf9723 100644 --- a/src/sna/sna_video_textured.c +++ b/src/sna/sna_video_textured.c @@ -271,7 +271,7 @@ sna_video_textured_put_image(ScrnInfoPtr scrn, return BadAlloc; } - assert(frame.bo->size >= frame.size); + assert(kgem_bo_size(frame.bo) >= frame.size); } else { frame.bo = kgem_create_linear(&sna->kgem, frame.size); if (frame.bo == NULL) { |