diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/dev/pci/drm/i915_drv.c | 18 | ||||
-rw-r--r-- | sys/dev/pci/drm/i915_drv.h | 4 | ||||
-rw-r--r-- | sys/dev/pci/drm/i915_gem.c | 204 | ||||
-rw-r--r-- | sys/dev/pci/drm/i915_gem_execbuffer.c | 123 |
4 files changed, 136 insertions, 213 deletions
diff --git a/sys/dev/pci/drm/i915_drv.c b/sys/dev/pci/drm/i915_drv.c index 0c3da5965b5..515d3f2fce4 100644 --- a/sys/dev/pci/drm/i915_drv.c +++ b/sys/dev/pci/drm/i915_drv.c @@ -465,6 +465,24 @@ inteldrm_probe(struct device *parent, void *match, void *aux) inteldrm_pciidlist)); } +bool +i915_semaphore_is_enabled(struct drm_device *dev) +{ + if (INTEL_INFO(dev)->gen < 6) + return 0; + + if (i915_semaphores >= 0) + return i915_semaphores; + +#ifdef CONFIG_INTEL_IOMMU + /* Enable semaphores on SNB when IO remapping is off */ + if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) + return false; +#endif + + return 1; +} + int i915_drm_freeze(struct drm_device *dev) { diff --git a/sys/dev/pci/drm/i915_drv.h b/sys/dev/pci/drm/i915_drv.h index 314c2f4ac73..d7d0f17f3bf 100644 --- a/sys/dev/pci/drm/i915_drv.h +++ b/sys/dev/pci/drm/i915_drv.h @@ -1060,7 +1060,6 @@ int i915_gem_object_pin_and_relocate(struct drm_obj *, struct drm_i915_gem_relocation_entry *); int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *, bus_size_t); -int i915_gem_flush_ring(struct intel_ring_buffer *, uint32_t, uint32_t); struct drm_obj *i915_gem_find_inactive_object(struct inteldrm_softc *, size_t); @@ -1148,6 +1147,7 @@ void inteldrm_chipset_flush(struct inteldrm_softc *); int intel_gpu_reset(struct drm_device *); int i915_reset(struct drm_device *); void inteldrm_timeout(void *); +bool i915_semaphore_is_enabled(struct drm_device *); /* i915_gem_evict.c */ int i915_gem_evict_everything(struct inteldrm_softc *); @@ -1197,6 +1197,8 @@ int i915_gem_object_put_fence(struct drm_i915_gem_object *); void i915_gem_reset(struct drm_device *); void i915_gem_clflush_object(struct drm_i915_gem_object *); void i915_gem_release(struct drm_device *, struct drm_file *); +int i915_gem_object_sync(struct drm_i915_gem_object *, + struct intel_ring_buffer *); /* intel_opregion.c */ int intel_opregion_setup(struct drm_device *dev); diff --git a/sys/dev/pci/drm/i915_gem.c b/sys/dev/pci/drm/i915_gem.c index 49e957f937e..3ff807421ee 100644 --- a/sys/dev/pci/drm/i915_gem.c +++ b/sys/dev/pci/drm/i915_gem.c @@ -74,6 +74,8 @@ void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *); int i915_gem_gtt_rebind_object(struct drm_i915_gem_object *, enum i915_cache_level); void i915_gem_request_remove_from_client(struct drm_i915_gem_request *); +int i915_gem_object_flush_active(struct drm_i915_gem_object *); +int i915_gem_check_olr(struct intel_ring_buffer *, u32); extern int ticks; @@ -406,7 +408,24 @@ i915_gem_check_wedge(struct inteldrm_softc *dev_priv, return 0; } -// i915_gem_check_olr +/* + * Compare seqno against outstanding lazy request. Emit a request if they are + * equal. + */ +int +i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) +{ + int ret; + +// BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); + + ret = 0; + if (seqno == ring->outstanding_lazy_request) + ret = i915_add_request(ring, NULL, NULL); + + return ret; +} + // __wait_seqno /** @@ -1112,7 +1131,6 @@ i915_gem_reset_ring_lists(drm_i915_private_t *dev_priv, struct drm_i915_gem_object, ring_list); - obj->base.write_domain = 0; i915_gem_object_move_to_inactive(obj); } } @@ -1217,10 +1235,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) if (!i915_seqno_passed(seqno, obj->last_read_seqno)) break; - if (obj->base.write_domain != 0) - i915_gem_object_move_off_active(obj); - else - i915_gem_object_move_to_inactive(obj); + i915_gem_object_move_to_inactive(obj); } } @@ -1235,9 +1250,73 @@ i915_gem_retire_requests(struct inteldrm_softc *dev_priv) } // i915_gem_retire_work_handler -// i915_gem_object_flush_active + +/** + * Ensures that an object will eventually get non-busy by flushing any required + * write domains, emitting any outstanding lazy request and retiring and + * completed requests. + */ +int +i915_gem_object_flush_active(struct drm_i915_gem_object *obj) +{ + int ret; + + if (obj->active) { + ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); + if (ret) + return ret; + + i915_gem_retire_requests_ring(obj->ring); + } + + return 0; +} + // i915_gem_wait_ioctl -// i915_gem_object_sync + +/** + * i915_gem_object_sync - sync an object to a ring. + * + * @obj: object which may be in use on another ring. + * @to: ring we wish to use the object on. May be NULL. + * + * This code is meant to abstract object synchronization with the GPU. + * Calling with NULL implies synchronizing the object with the CPU + * rather than a particular GPU ring. + * + * Returns 0 if successful, else propagates up the lower layer error. + */ +int +i915_gem_object_sync(struct drm_i915_gem_object *obj, + struct intel_ring_buffer *to) +{ + struct intel_ring_buffer *from = obj->ring; + u32 seqno; + int ret, idx; + + if (from == NULL || to == from) + return 0; + + if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) + return i915_gem_object_wait_rendering(obj, false); + + idx = intel_ring_sync_index(from, to); + + seqno = obj->last_read_seqno; + if (seqno <= from->sync_seqno[idx]) + return 0; + + if (seqno == from->outstanding_lazy_request) { + ret = i915_add_request(from, NULL, &seqno); + if (ret) { + return ret; + } + } + + from->sync_seqno[idx] = seqno; + + return to->sync_to(to, from, seqno); +} void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) @@ -1302,22 +1381,6 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) i915_gem_object_finish_gtt(obj); - /* Move the object to the CPU domain to ensure that - * any possible CPU writes while it's not in the GTT - * are flushed when we go to remap it. - */ - if (ret == 0) - ret = i915_gem_object_set_to_cpu_domain(obj, 1); - if (ret == ERESTART || ret == EINTR) - return ret; - if (ret) { - /* In the event of a disaster, abandon all caches and - * hope for the best. - */ - i915_gem_clflush_object(obj); - obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; - } - /* release the fence reg _after_ flushing */ ret = i915_gem_object_put_fence(obj); if (ret == ERESTART || ret == EINTR) @@ -1356,25 +1419,6 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) } int -i915_gem_flush_ring(struct intel_ring_buffer *ring, - uint32_t invalidate_domains, - uint32_t flush_domains) -{ - int ret; - - if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0) - return 0; - -// trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains); - - ret = ring->flush(ring, invalidate_domains, flush_domains); - if (ret) - return ret; - - return 0; -} - -int i915_gpu_idle(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; @@ -1571,19 +1615,8 @@ i915_gem_object_update_fence(struct drm_i915_gem_object *obj, int i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) { - int ret; - - if (obj->fenced_gpu_access) { - if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - ret = i915_gem_flush_ring(obj->ring, 0, - obj->base.write_domain); - if (ret) - return ret; - } - } - if (obj->last_fenced_seqno) { - ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); + int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); if (ret) return ret; @@ -1901,17 +1934,6 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) #endif } -/** Flushes any GPU write domain for the object if it's dirty. */ -int -i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj) -{ - if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) - return 0; - - /* Queue the GPU write cache flushing we need. */ - return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); -} - /** Flushes the CPU write domain for the object if it's dirty. */ void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) @@ -1955,10 +1977,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, int write) if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) return 0; - ret = i915_gem_object_flush_gpu_write_domain(obj); - if (ret) - return ret; - ret = i915_gem_object_wait_rendering(obj, !write); if (ret) return ret; @@ -2078,13 +2096,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, // u32 old_read_domains, old_write_domain; int ret; - ret = i915_gem_object_flush_gpu_write_domain(obj); - if (ret) - return ret; - if (pipelined != obj->ring) { - ret = i915_gem_object_wait_rendering(obj, false); - if (ret == -ERESTART || ret == -EINTR) + ret = i915_gem_object_sync(obj, pipelined); + if (ret) return (ret); } @@ -2117,7 +2131,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ - BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); + obj->base.write_domain = 0; obj->base.read_domains |= I915_GEM_DOMAIN_GTT; // trace_i915_gem_object_change_domain(obj, @@ -2135,12 +2149,6 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) return 0; - if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); - if (ret) - return ret; - } - ret = i915_gem_object_wait_rendering(obj, false); if (ret) return ret; @@ -2167,11 +2175,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, int write) if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) return 0; - ret = i915_gem_object_flush_gpu_write_domain(obj); - if (ret) - return ret; - - ret = i915_gem_object_wait_rendering(obj, false); + ret = i915_gem_object_wait_rendering(obj, !write); if (ret) return ret; @@ -2475,30 +2479,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, return (EBADF); } + ret = i915_gem_object_flush_active(obj); args->busy = obj->active; - if (args->busy) { - /* - * Unconditionally flush objects write domain if they are - * busy. The fact userland is calling this ioctl means that - * it wants to use this buffer sooner rather than later, so - * flushing now shoul reduce latency. - */ - if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - ret = i915_gem_flush_ring(obj->ring, - 0, obj->base.write_domain); - } else if (obj->ring->outstanding_lazy_request == - obj->last_read_seqno) { - i915_add_request(obj->ring, NULL, NULL); - } - - /* - * Update the active list after the flush otherwise this is - * only updated on a delayed timer. Updating now reduces - * working set size. - */ - i915_gem_retire_requests_ring(obj->ring); - args->busy = obj->active; - } drm_gem_object_unreference(&obj->base); return ret; diff --git a/sys/dev/pci/drm/i915_gem_execbuffer.c b/sys/dev/pci/drm/i915_gem_execbuffer.c index a63236f88b0..791084db2e5 100644 --- a/sys/dev/pci/drm/i915_gem_execbuffer.c +++ b/sys/dev/pci/drm/i915_gem_execbuffer.c @@ -65,11 +65,6 @@ struct change_domains { int i915_reset_gen7_sol_offsets(struct drm_device *, struct intel_ring_buffer *); -int i915_gem_execbuffer_flush(struct drm_device *, uint32_t, uint32_t, - uint32_t); -bool intel_enable_semaphores(struct drm_device *); -int i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *, - struct intel_ring_buffer *); int i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *, u32); int i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *, struct drm_obj **, int); @@ -266,83 +261,6 @@ i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, // i915_gem_execbuffer_relocate_slow int -i915_gem_execbuffer_flush(struct drm_device *dev, - uint32_t invalidate_domains, - uint32_t flush_domains, - uint32_t flush_rings) -{ - drm_i915_private_t *dev_priv = dev->dev_private; - int i, ret; - - if (flush_domains & I915_GEM_DOMAIN_CPU) - inteldrm_chipset_flush(dev_priv); - - if (flush_domains & I915_GEM_DOMAIN_GTT) - DRM_WRITEMEMORYBARRIER(); - - if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) { - for (i = 0; i < I915_NUM_RINGS; i++) - if (flush_rings & (1 << i)) { - ret = i915_gem_flush_ring(&dev_priv->rings[i], - invalidate_domains, - flush_domains); - if (ret) - return ret; - } - } - - return 0; -} - -bool -intel_enable_semaphores(struct drm_device *dev) -{ - if (INTEL_INFO(dev)->gen < 6) - return 0; - - if (i915_semaphores >= 0) - return i915_semaphores; - - /* Disable semaphores on SNB */ - if (INTEL_INFO(dev)->gen == 6) - return 0; - - return 1; -} - -int -i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj, - struct intel_ring_buffer *to) -{ - struct intel_ring_buffer *from = obj->ring; - u32 seqno; - int ret, idx; - - if (from == NULL || to == from) - return 0; - - /* XXX gpu semaphores are implicated in various hard hangs on SNB */ - if (!intel_enable_semaphores(obj->base.dev)) - return i915_gem_object_wait_rendering(obj, false); - idx = intel_ring_sync_index(from, to); - - seqno = obj->last_read_seqno; - if (seqno <= from->sync_seqno[idx]) - return 0; - - if (seqno == from->outstanding_lazy_request) { - ret = i915_add_request(from, NULL, &seqno); - if (ret) { - return ret; - } - } - - from->sync_seqno[idx] = seqno; - - return to->sync_to(to, from, seqno); -} - -int i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) { u32 plane, flip_mask; @@ -379,38 +297,41 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, struct drm_obj **object_list, int buffer_count) { struct drm_i915_gem_object *obj; - struct change_domains cd; + uint32_t flush_domains = 0; + uint32_t flips = 0; int ret, i; - memset(&cd, 0, sizeof(cd)); for (i = 0; i < buffer_count; i++) { obj = to_intel_bo(object_list[i]); - i915_gem_object_set_to_gpu_domain(obj, ring, &cd); - } - - if (cd.invalidate_domains | cd.flush_domains) { - ret = i915_gem_execbuffer_flush(ring->dev, - cd.invalidate_domains, - cd.flush_domains, - cd.flush_rings); + ret = i915_gem_object_sync(obj, ring); if (ret) return ret; - } - if (cd.flips) { - ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips); - if (ret) - return ret; + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) + i915_gem_clflush_object(obj); + + if (obj->base.pending_write_domain) + flips |= atomic_read(&obj->pending_flip); + + flush_domains |= obj->base.write_domain; } - for (i = 0; i < buffer_count; i++) { - obj = to_intel_bo(object_list[i]); - ret = i915_gem_execbuffer_sync_rings(obj, ring); + if (flips) { + ret = i915_gem_execbuffer_wait_for_flips(ring, flips); if (ret) return ret; } - return 0; + if (flush_domains & I915_GEM_DOMAIN_CPU) + inteldrm_chipset_flush(ring->dev->dev_private); + + if (flush_domains & I915_GEM_DOMAIN_GTT) + DRM_WRITEMEMORYBARRIER(); + + /* Unconditionally invalidate gpu caches and ensure that we do flush + * any residual writes from the previous batch. + */ + return intel_ring_invalidate_all_caches(ring); } // i915_gem_check_execbuffer |