From 3a4143d6f2489c761e3a0e00941567f7703c53e2 Mon Sep 17 00:00:00 2001 From: Jonathan Gray Date: Mon, 11 Mar 2013 13:15:09 +1100 Subject: handle error completions the same way as FreeBSD --- sys/dev/pci/drm/i915_drv.c | 2 ++ sys/dev/pci/drm/i915_drv.h | 3 +++ sys/dev/pci/drm/i915_gem.c | 64 +++++++++++++++++++++++++++++++++++++++++----- sys/dev/pci/drm/i915_irq.c | 5 +++- 4 files changed, 66 insertions(+), 8 deletions(-) diff --git a/sys/dev/pci/drm/i915_drv.c b/sys/dev/pci/drm/i915_drv.c index 89a6b71776e..d3681b07293 100644 --- a/sys/dev/pci/drm/i915_drv.c +++ b/sys/dev/pci/drm/i915_drv.c @@ -1000,6 +1000,7 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux) timeout_set(&dev_priv->hangcheck_timer, i915_hangcheck_elapsed, dev_priv); dev_priv->next_seqno = 1; dev_priv->mm.suspended = 1; + dev_priv->error_completion = 0; /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ if (IS_GEN3(dev)) { @@ -1068,6 +1069,7 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux) mtx_init(&dev_priv->rps.lock, IPL_NONE); mtx_init(&dev_priv->dpio_lock, IPL_NONE); mtx_init(&mchdev_lock, IPL_NONE); + mtx_init(&dev_priv->error_completion_lock, IPL_NONE); rw_init(&dev_priv->rps.hw_lock, "rpshw"); diff --git a/sys/dev/pci/drm/i915_drv.h b/sys/dev/pci/drm/i915_drv.h index 2eaf075a67b..c10f5e59c81 100644 --- a/sys/dev/pci/drm/i915_drv.h +++ b/sys/dev/pci/drm/i915_drv.h @@ -776,6 +776,9 @@ struct inteldrm_softc { unsigned int fsb_freq, mem_freq, is_ddr3; + int error_completion; + struct mutex error_completion_lock; + time_t last_gpu_reset; struct intel_fbdev *fbdev; diff --git a/sys/dev/pci/drm/i915_gem.c b/sys/dev/pci/drm/i915_gem.c index 1dc1f4c2e2d..ef34c19a734 100644 --- a/sys/dev/pci/drm/i915_gem.c +++ b/sys/dev/pci/drm/i915_gem.c @@ -81,6 +81,7 @@ int i915_gem_check_olr(struct intel_ring_buffer *, u32); void i915_gem_object_truncate(struct drm_i915_gem_object *obj); int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, unsigned alignment, bool map_and_fenceable); +int i915_gem_wait_for_error(struct drm_device *); extern int ticks; @@ -99,18 +100,48 @@ i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) // i915_gem_info_add_obj // i915_gem_info_remove_obj -// i915_gem_wait_for_error + +int +i915_gem_wait_for_error(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv= dev->dev_private; + int ret; + + if (!atomic_read(&dev_priv->mm.wedged)) + return (0); + + /* + * Only wait 10 seconds for the gpu reset to complete to avoid hanging + * userspace. If it takes that long something really bad is going on and + * we should simply try to bail out and fail as gracefully as possible. + */ + mtx_enter(&dev_priv->error_completion_lock); + while (dev_priv->error_completion == 0) { + ret = -msleep(&dev_priv->error_completion, + &dev_priv->error_completion_lock, PCATCH, "915wco", 10*hz); + if (ret != 0) { + mtx_leave(&dev_priv->error_completion_lock); + return (ret); + } + } + mtx_leave(&dev_priv->error_completion_lock); + + if (atomic_read(&dev_priv->mm.wedged)) { + mtx_enter(&dev_priv->error_completion_lock); + dev_priv->error_completion++; + mtx_leave(&dev_priv->error_completion_lock); + } + return (0); +} int i915_mutex_lock_interruptible(struct drm_device *dev) { int ret; -#ifdef notyet ret = i915_gem_wait_for_error(dev); if (ret) return ret; -#endif ret = rw_enter(&dev->dev_lock, RW_WRITE | RW_INTR); if (ret) @@ -426,8 +457,26 @@ int i915_gem_check_wedge(struct inteldrm_softc *dev_priv, bool interruptible) { - if (dev_priv->mm.wedged) - return (EIO); + if (atomic_read(&dev_priv->mm.wedged) != 0) { + bool recovery_complete; + + /* Give the error handler a chance to run. */ + mtx_enter(&dev_priv->error_completion_lock); + recovery_complete = (&dev_priv->error_completion) > 0; + mtx_leave(&dev_priv->error_completion_lock); + + /* Non-interruptible callers can't handle -EAGAIN, hence return + * -EIO unconditionally for these. */ + if (!interruptible) + return -EIO; + + /* Recovery complete, but still wedged means reset failure. */ + if (recovery_complete) + return -EIO; + + return -EAGAIN; + } + return 0; } @@ -465,8 +514,9 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) int ret = 0; /* Check first because poking a wedged chip is bad. */ - if (dev_priv->mm.wedged) - return (EIO); + ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible); + if (ret) + return (ret); ret = i915_gem_check_olr(ring, seqno); if (ret) diff --git a/sys/dev/pci/drm/i915_irq.c b/sys/dev/pci/drm/i915_irq.c index 8f4a86438fb..b670483851f 100644 --- a/sys/dev/pci/drm/i915_irq.c +++ b/sys/dev/pci/drm/i915_irq.c @@ -943,7 +943,10 @@ i915_error_work_func(void *arg1, void *arg2) atomic_set(&dev_priv->mm.wedged, 0); // kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event); } -// complete_all(&dev_priv->error_completion); + mtx_enter(&dev_priv->error_completion_lock); + dev_priv->error_completion++; + wakeup(&dev_priv->error_completion); + mtx_leave(&dev_priv->error_completion_lock); } } -- cgit v1.2.3