diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2014-10-21 10:38:22 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2014-10-21 20:30:21 +0100 |
commit | b4872f74c05a92c91d7b57146eb35ed2ecc86d97 (patch) | |
tree | 4d281d1093c42e0b303e296d233fe157ae480762 /src/sna | |
parent | 83a3d9147308f4777324abdea859ac0c108f03c6 (diff) |
sna: Avoid pwriting large batches
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/sna')
-rw-r--r-- | src/sna/kgem.c | 381 | ||||
-rw-r--r-- | src/sna/kgem.h | 28 |
2 files changed, 239 insertions, 170 deletions
diff --git a/src/sna/kgem.c b/src/sna/kgem.c index 234d1d06..25ed0ada 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -1252,6 +1252,99 @@ out: gem_close(kgem->fd, tiling.handle); } +static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink) +{ + int n; + + DBG(("%s: shrinking by %d\n", __FUNCTION__, shrink)); + + bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle; + + assert(kgem->nreloc__self <= 256); + if (kgem->nreloc__self) { + for (n = 0; n < kgem->nreloc__self; n++) { + int i = kgem->reloc__self[n]; + assert(kgem->reloc[i].target_handle == ~0U); + kgem->reloc[i].target_handle = bo->target_handle; + kgem->reloc[i].presumed_offset = bo->presumed_offset; + kgem->batch[kgem->reloc[i].offset/sizeof(kgem->batch[0])] = + kgem->reloc[i].delta + bo->presumed_offset - shrink; + } + + if (n == 256) { + for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) { + if (kgem->reloc[n].target_handle == ~0U) { + kgem->reloc[n].target_handle = bo->target_handle; + kgem->reloc[n].presumed_offset = bo->presumed_offset; + kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = + kgem->reloc[n].delta + bo->presumed_offset - shrink; + } + } + } + } + + if (shrink) { + for (n = 0; n < kgem->nreloc; n++) { + if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION && + kgem->reloc[n].target_handle == bo->target_handle) + kgem->reloc[n].delta -= shrink; + + if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch) + kgem->reloc[n].offset -= shrink; + } + } +} + +static struct kgem_bo *kgem_new_batch(struct kgem *kgem) +{ + struct kgem_bo *last; + unsigned flags; + + last = kgem->batch_bo; + if (last) { + kgem_fixup_relocs(kgem, last, 0); + kgem->batch = NULL; + } + + if (kgem->batch) { + assert(last == NULL); + return NULL; + } + + flags = CREATE_CPU_MAP | CREATE_NO_THROTTLE; + if (!kgem->has_llc) + flags |= CREATE_UNCACHED; + + kgem->batch_bo = kgem_create_linear(kgem, + sizeof(uint32_t)*kgem->batch_size, + flags); + if (kgem->batch_bo) + kgem->batch = kgem_bo_map__cpu(kgem, kgem->batch_bo); + if (kgem->batch == NULL) { + DBG(("%s: unable to map batch bo, mallocing(size=%d)\n", + __FUNCTION__, + sizeof(uint32_t)*kgem->batch_size)); + if (kgem->batch_bo) { + kgem_bo_destroy(kgem, kgem->batch_bo); + kgem->batch_bo = NULL; + } + + if (posix_memalign((void **)&kgem->batch, PAGE_SIZE, + ALIGN(sizeof(uint32_t) * kgem->batch_size, PAGE_SIZE))) { + ERR(("%s: batch allocation failed, disabling acceleration\n", __FUNCTION__)); + __kgem_set_wedged(kgem); + } + } else { + DBG(("%s: allocated and mapped batch handle=%d [size=%d]\n", + __FUNCTION__, kgem->batch_bo->handle, + sizeof(uint32_t)*kgem->batch_size)); + kgem_bo_sync__cpu(kgem, kgem->batch_bo); + } + + DBG(("%s: using last batch handle=%d\n", + __FUNCTION__, last ? last->handle : 0)); + return last; +} void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) { @@ -1362,7 +1455,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) kgem->wedged = 1; } - kgem->batch_size = ARRAY_SIZE(kgem->batch); + kgem->batch_size = UINT16_MAX & ~7; if (gen == 020 && !kgem->has_pinned_batches) /* Limited to what we can pin */ kgem->batch_size = 4*1024; @@ -1382,6 +1475,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) DBG(("%s: maximum batch size? %d\n", __FUNCTION__, kgem->batch_size)); + kgem_new_batch(kgem); kgem->half_cpu_cache_pages = cpu_cache_size() >> 13; DBG(("%s: last-level cache size: %d bytes, threshold in pages: %d\n", @@ -1803,36 +1897,6 @@ static uint32_t kgem_end_batch(struct kgem *kgem) return kgem->nbatch; } -static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo) -{ - int n; - - assert(kgem->nreloc__self <= 256); - if (kgem->nreloc__self == 0) - return; - - for (n = 0; n < kgem->nreloc__self; n++) { - int i = kgem->reloc__self[n]; - assert(kgem->reloc[i].target_handle == ~0U); - kgem->reloc[i].target_handle = bo->target_handle; - kgem->reloc[i].presumed_offset = bo->presumed_offset; - kgem->batch[kgem->reloc[i].offset/sizeof(kgem->batch[0])] = - kgem->reloc[i].delta + bo->presumed_offset; - } - - if (n == 256) { - for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) { - if (kgem->reloc[n].target_handle == ~0U) { - kgem->reloc[n].target_handle = bo->target_handle; - kgem->reloc[n].presumed_offset = bo->presumed_offset; - kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = - kgem->reloc[n].delta + bo->presumed_offset; - } - } - - } -} - static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo) { struct kgem_bo_binding *b; @@ -3041,15 +3105,16 @@ void kgem_reset(struct kgem *kgem) kgem->needs_reservation = false; kgem->flush = 0; kgem->batch_flags = kgem->batch_flags_base; + assert(kgem->batch); kgem->next_request = __kgem_request_alloc(kgem); kgem_sna_reset(kgem); } -static int compact_batch_surface(struct kgem *kgem) +static int compact_batch_surface(struct kgem *kgem, int *shrink) { - int size, shrink, n; + int size, n; if (!kgem->has_relaxed_delta) return kgem->batch_size * sizeof(uint32_t); @@ -3059,29 +3124,22 @@ static int compact_batch_surface(struct kgem *kgem) size = n - kgem->surface + kgem->nbatch; size = ALIGN(size, 1024); - shrink = n - size; - if (shrink) { - DBG(("shrinking from %d to %d\n", kgem->batch_size, size)); - - shrink *= sizeof(uint32_t); - for (n = 0; n < kgem->nreloc; n++) { - if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION && - kgem->reloc[n].target_handle == ~0U) - kgem->reloc[n].delta -= shrink; - - if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch) - kgem->reloc[n].offset -= shrink; - } - } - + *shrink = (n - size) * sizeof(uint32_t); return size * sizeof(uint32_t); } static struct kgem_bo * -kgem_create_batch(struct kgem *kgem, int size) +kgem_create_batch(struct kgem *kgem) { struct drm_i915_gem_set_domain set_domain; struct kgem_bo *bo; + int shrink = 0; + int size; + + if (kgem->surface != kgem->batch_size) + size = compact_batch_surface(kgem, &shrink); + else + size = kgem->nbatch * sizeof(uint32_t); if (size <= 4096) { bo = list_first_entry(&kgem->pinned_batches[0], @@ -3091,7 +3149,8 @@ kgem_create_batch(struct kgem *kgem, int size) out_4096: assert(bo->refcnt > 0); list_move_tail(&bo->list, &kgem->pinned_batches[0]); - return kgem_bo_reference(bo); + bo = kgem_bo_reference(bo); + goto write; } if (!__kgem_busy(kgem, bo->handle)) { @@ -3109,7 +3168,8 @@ out_4096: out_16384: assert(bo->refcnt > 0); list_move_tail(&bo->list, &kgem->pinned_batches[1]); - return kgem_bo_reference(bo); + bo = kgem_bo_reference(bo); + goto write; } if (!__kgem_busy(kgem, bo->handle)) { @@ -3121,14 +3181,14 @@ out_16384: if (kgem->gen == 020) { bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY); if (bo) - return bo; + goto write; /* Nothing available for reuse, rely on the kernel wa */ if (kgem->has_pinned_batches) { bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY); if (bo) { kgem->batch_flags &= ~LOCAL_I915_EXEC_IS_PINNED; - return bo; + goto write; } } @@ -3152,11 +3212,26 @@ out_16384: kgem_retire(kgem); assert(bo->rq == NULL); - return kgem_bo_reference(bo); + bo = kgem_bo_reference(bo); + goto write; } } - return kgem_create_linear(kgem, size, CREATE_NO_THROTTLE); + bo = NULL; + if (!kgem->has_llc) { + bo = kgem_create_linear(kgem, size, CREATE_NO_THROTTLE); + if (bo) { +write: + kgem_fixup_relocs(kgem, bo, shrink); + if (kgem_batch_write(kgem, bo->handle, size)) { + kgem_bo_destroy(kgem, bo); + return NULL; + } + } + } + if (bo == NULL) + bo = kgem_new_batch(kgem); + return bo; } #if !NDEBUG @@ -3253,7 +3328,6 @@ void _kgem_submit(struct kgem *kgem) { struct kgem_request *rq; uint32_t batch_end; - int size; assert(!DBG_NO_HW); assert(!kgem->wedged); @@ -3282,17 +3356,14 @@ void _kgem_submit(struct kgem *kgem) __kgem_batch_debug(kgem, batch_end); #endif - if (kgem->surface != kgem->batch_size) - size = compact_batch_surface(kgem); - else - size = kgem->nbatch * sizeof(kgem->batch[0]); - rq = kgem->next_request; assert(rq->bo == NULL); - rq->bo = kgem_create_batch(kgem, size); + + rq->bo = kgem_create_batch(kgem); if (rq->bo) { + struct drm_i915_gem_execbuffer2 execbuf; uint32_t handle = rq->bo->handle; - int i; + int i, ret; assert(!rq->bo->needs_flush); @@ -3306,113 +3377,105 @@ void _kgem_submit(struct kgem *kgem) kgem->exec[i].rsvd1 = 0; kgem->exec[i].rsvd2 = 0; - rq->bo->target_handle = kgem->has_handle_lut ? i : handle; rq->bo->exec = &kgem->exec[i]; rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */ list_add(&rq->bo->request, &rq->buffers); rq->ring = kgem->ring == KGEM_BLT; - kgem_fixup_self_relocs(kgem, rq->bo); - - if (kgem_batch_write(kgem, handle, size) == 0) { - struct drm_i915_gem_execbuffer2 execbuf; - int ret; - - memset(&execbuf, 0, sizeof(execbuf)); - execbuf.buffers_ptr = (uintptr_t)kgem->exec; - execbuf.buffer_count = kgem->nexec; - execbuf.batch_len = batch_end*sizeof(uint32_t); - execbuf.flags = kgem->ring | kgem->batch_flags; - - if (DBG_DUMP) { - int fd = open("/tmp/i915-batchbuffers.dump", - O_WRONLY | O_CREAT | O_APPEND, - 0666); - if (fd != -1) { - ret = write(fd, kgem->batch, batch_end*sizeof(uint32_t)); - fd = close(fd); - } + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = (uintptr_t)kgem->exec; + execbuf.buffer_count = kgem->nexec; + execbuf.batch_len = batch_end*sizeof(uint32_t); + execbuf.flags = kgem->ring | kgem->batch_flags; + + if (DBG_DUMP) { + int fd = open("/tmp/i915-batchbuffers.dump", + O_WRONLY | O_CREAT | O_APPEND, + 0666); + if (fd != -1) { + ret = write(fd, kgem->batch, batch_end*sizeof(uint32_t)); + fd = close(fd); } + } - ret = do_execbuf(kgem, &execbuf); - if (DEBUG_SYNC && ret == 0) { - struct drm_i915_gem_set_domain set_domain; + ret = do_execbuf(kgem, &execbuf); + if (DEBUG_SYNC && ret == 0) { + struct drm_i915_gem_set_domain set_domain; - VG_CLEAR(set_domain); - set_domain.handle = handle; - set_domain.read_domains = I915_GEM_DOMAIN_GTT; - set_domain.write_domain = I915_GEM_DOMAIN_GTT; + VG_CLEAR(set_domain); + set_domain.handle = handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = I915_GEM_DOMAIN_GTT; - ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); + ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); + } + if (ret < 0) { + kgem_throttle(kgem); + if (!kgem->wedged) { + xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, + "Failed to submit rendering commands, disabling acceleration.\n"); + __kgem_set_wedged(kgem); } - if (ret < 0) { - kgem_throttle(kgem); - if (!kgem->wedged) { - xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR, - "Failed to submit rendering commands, disabling acceleration.\n"); - __kgem_set_wedged(kgem); - } #if !NDEBUG - ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n", - kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, - kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret); + ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n", + kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, + kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret); - for (i = 0; i < kgem->nexec; i++) { - struct kgem_bo *bo, *found = NULL; + for (i = 0; i < kgem->nexec; i++) { + struct kgem_bo *bo, *found = NULL; - list_for_each_entry(bo, &kgem->next_request->buffers, request) { - if (bo->handle == kgem->exec[i].handle) { - found = bo; - break; - } + list_for_each_entry(bo, &kgem->next_request->buffers, request) { + if (bo->handle == kgem->exec[i].handle) { + found = bo; + break; } - ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n", - i, - kgem->exec[i].handle, - (int)kgem->exec[i].offset, - found ? kgem_bo_size(found) : -1, - found ? found->tiling : -1, - (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE), - found ? found->snoop : -1, - found ? found->purged : -1); - } - for (i = 0; i < kgem->nreloc; i++) { - ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n", - i, - (int)kgem->reloc[i].offset, - kgem->reloc[i].target_handle, - kgem->reloc[i].delta, - kgem->reloc[i].read_domains, - kgem->reloc[i].write_domain, - (int)kgem->reloc[i].presumed_offset); } + ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n", + i, + kgem->exec[i].handle, + (int)kgem->exec[i].offset, + found ? kgem_bo_size(found) : -1, + found ? found->tiling : -1, + (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE), + found ? found->snoop : -1, + found ? found->purged : -1); + } + for (i = 0; i < kgem->nreloc; i++) { + ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n", + i, + (int)kgem->reloc[i].offset, + kgem->reloc[i].target_handle, + kgem->reloc[i].delta, + kgem->reloc[i].read_domains, + kgem->reloc[i].write_domain, + (int)kgem->reloc[i].presumed_offset); + } - { - struct drm_i915_gem_get_aperture aperture; - if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0) - ErrorF("Aperture size %lld, available %lld\n", - (long long)aperture.aper_size, - (long long)aperture.aper_available_size); - } + { + struct drm_i915_gem_get_aperture aperture; + if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0) + ErrorF("Aperture size %lld, available %lld\n", + (long long)aperture.aper_size, + (long long)aperture.aper_available_size); + } - if (ret == -ENOSPC) - dump_gtt_info(kgem); - if (ret == -EDEADLK) - dump_fence_regs(kgem); - - if (DEBUG_SYNC) { - int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666); - if (fd != -1) { - int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t)); - assert(ignored == batch_end*sizeof(uint32_t)); - close(fd); - } + if (ret == -ENOSPC) + dump_gtt_info(kgem); + if (ret == -EDEADLK) + dump_fence_regs(kgem); - FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret); + if (DEBUG_SYNC) { + int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666); + if (fd != -1) { + int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t)); + assert(ignored == batch_end*sizeof(uint32_t)); + close(fd); } -#endif + + FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret); } +#endif } } #if SHOW_BATCH_AFTER @@ -4167,16 +4230,18 @@ struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags) } size = NUM_PAGES(size); - bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags); - if (bo) { - assert(bo->domain != DOMAIN_GPU); - ASSERT_IDLE(kgem, bo->handle); - bo->refcnt = 1; - return bo; - } + if ((flags & CREATE_UNCACHED) == 0) { + bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags); + if (bo) { + assert(bo->domain != DOMAIN_GPU); + ASSERT_IDLE(kgem, bo->handle); + bo->refcnt = 1; + return bo; + } - if (flags & CREATE_CACHED) - return NULL; + if (flags & CREATE_CACHED) + return NULL; + } handle = gem_create(kgem->fd, size); if (handle == 0) diff --git a/src/sna/kgem.h b/src/sna/kgem.h index 8bd57156..4a74f2ef 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -120,12 +120,22 @@ enum { }; struct kgem { - int fd; unsigned wedged; + int fd; unsigned gen; uint32_t unique_id; + uint16_t nbatch; + uint16_t surface; + uint16_t nexec; + uint16_t nreloc; + uint16_t nreloc__self; + uint16_t nfence; + uint16_t batch_size; + + uint32_t *batch; + enum kgem_mode { /* order matches I915_EXEC_RING ordering */ KGEM_NONE = 0, @@ -158,14 +168,6 @@ struct kgem { #define I915_EXEC_SECURE (1<<9) #define LOCAL_EXEC_OBJECT_WRITE (1<<2) - uint16_t nbatch; - uint16_t surface; - uint16_t nexec; - uint16_t nreloc; - uint16_t nreloc__self; - uint16_t nfence; - uint16_t batch_size; - uint32_t flush:1; uint32_t need_expire:1; uint32_t need_purge:1; @@ -217,8 +219,9 @@ struct kgem { int16_t dst_x, int16_t dst_y, uint16_t width, uint16_t height); + struct kgem_bo *batch_bo; + uint16_t reloc__self[256]; - uint32_t batch[64*1024-8] page_aligned; struct drm_i915_gem_exec_object2 exec[384] page_aligned; struct drm_i915_gem_relocation_entry reloc[8192] page_aligned; @@ -299,8 +302,9 @@ enum { CREATE_PRIME = 0x20, CREATE_TEMPORARY = 0x40, CREATE_CACHED = 0x80, - CREATE_NO_RETIRE = 0x100, - CREATE_NO_THROTTLE = 0x200, + CREATE_UNCACHED = 0x100, + CREATE_NO_RETIRE = 0x200, + CREATE_NO_THROTTLE = 0x400, }; struct kgem_bo *kgem_create_2d(struct kgem *kgem, int width, |