summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Gray <jsg@jsg.id.au>2013-03-11 13:15:09 +1100
committerJonathan Gray <jsg@jsg.id.au>2013-03-11 13:15:09 +1100
commit3a4143d6f2489c761e3a0e00941567f7703c53e2 (patch)
treea181fd1f2d4529f77d22ef69f6928ae66f77c681
parent1251b11c93a9ab8eda018c9a6750ee989e6b9ed3 (diff)
handle error completions the same way as FreeBSD
-rw-r--r--sys/dev/pci/drm/i915_drv.c2
-rw-r--r--sys/dev/pci/drm/i915_drv.h3
-rw-r--r--sys/dev/pci/drm/i915_gem.c64
-rw-r--r--sys/dev/pci/drm/i915_irq.c5
4 files changed, 66 insertions, 8 deletions
diff --git a/sys/dev/pci/drm/i915_drv.c b/sys/dev/pci/drm/i915_drv.c
index 89a6b71776e..d3681b07293 100644
--- a/sys/dev/pci/drm/i915_drv.c
+++ b/sys/dev/pci/drm/i915_drv.c
@@ -1000,6 +1000,7 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux)
timeout_set(&dev_priv->hangcheck_timer, i915_hangcheck_elapsed, dev_priv);
dev_priv->next_seqno = 1;
dev_priv->mm.suspended = 1;
+ dev_priv->error_completion = 0;
/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
if (IS_GEN3(dev)) {
@@ -1068,6 +1069,7 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux)
mtx_init(&dev_priv->rps.lock, IPL_NONE);
mtx_init(&dev_priv->dpio_lock, IPL_NONE);
mtx_init(&mchdev_lock, IPL_NONE);
+ mtx_init(&dev_priv->error_completion_lock, IPL_NONE);
rw_init(&dev_priv->rps.hw_lock, "rpshw");
diff --git a/sys/dev/pci/drm/i915_drv.h b/sys/dev/pci/drm/i915_drv.h
index 2eaf075a67b..c10f5e59c81 100644
--- a/sys/dev/pci/drm/i915_drv.h
+++ b/sys/dev/pci/drm/i915_drv.h
@@ -776,6 +776,9 @@ struct inteldrm_softc {
unsigned int fsb_freq, mem_freq, is_ddr3;
+ int error_completion;
+ struct mutex error_completion_lock;
+
time_t last_gpu_reset;
struct intel_fbdev *fbdev;
diff --git a/sys/dev/pci/drm/i915_gem.c b/sys/dev/pci/drm/i915_gem.c
index 1dc1f4c2e2d..ef34c19a734 100644
--- a/sys/dev/pci/drm/i915_gem.c
+++ b/sys/dev/pci/drm/i915_gem.c
@@ -81,6 +81,7 @@ int i915_gem_check_olr(struct intel_ring_buffer *, u32);
void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
unsigned alignment, bool map_and_fenceable);
+int i915_gem_wait_for_error(struct drm_device *);
extern int ticks;
@@ -99,18 +100,48 @@ i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
// i915_gem_info_add_obj
// i915_gem_info_remove_obj
-// i915_gem_wait_for_error
+
+int
+i915_gem_wait_for_error(struct drm_device *dev)
+{
+ drm_i915_private_t *dev_priv= dev->dev_private;
+ int ret;
+
+ if (!atomic_read(&dev_priv->mm.wedged))
+ return (0);
+
+ /*
+ * Only wait 10 seconds for the gpu reset to complete to avoid hanging
+ * userspace. If it takes that long something really bad is going on and
+ * we should simply try to bail out and fail as gracefully as possible.
+ */
+ mtx_enter(&dev_priv->error_completion_lock);
+ while (dev_priv->error_completion == 0) {
+ ret = -msleep(&dev_priv->error_completion,
+ &dev_priv->error_completion_lock, PCATCH, "915wco", 10*hz);
+ if (ret != 0) {
+ mtx_leave(&dev_priv->error_completion_lock);
+ return (ret);
+ }
+ }
+ mtx_leave(&dev_priv->error_completion_lock);
+
+ if (atomic_read(&dev_priv->mm.wedged)) {
+ mtx_enter(&dev_priv->error_completion_lock);
+ dev_priv->error_completion++;
+ mtx_leave(&dev_priv->error_completion_lock);
+ }
+ return (0);
+}
int
i915_mutex_lock_interruptible(struct drm_device *dev)
{
int ret;
-#ifdef notyet
ret = i915_gem_wait_for_error(dev);
if (ret)
return ret;
-#endif
ret = rw_enter(&dev->dev_lock, RW_WRITE | RW_INTR);
if (ret)
@@ -426,8 +457,26 @@ int
i915_gem_check_wedge(struct inteldrm_softc *dev_priv,
bool interruptible)
{
- if (dev_priv->mm.wedged)
- return (EIO);
+ if (atomic_read(&dev_priv->mm.wedged) != 0) {
+ bool recovery_complete;
+
+ /* Give the error handler a chance to run. */
+ mtx_enter(&dev_priv->error_completion_lock);
+ recovery_complete = (&dev_priv->error_completion) > 0;
+ mtx_leave(&dev_priv->error_completion_lock);
+
+ /* Non-interruptible callers can't handle -EAGAIN, hence return
+ * -EIO unconditionally for these. */
+ if (!interruptible)
+ return -EIO;
+
+ /* Recovery complete, but still wedged means reset failure. */
+ if (recovery_complete)
+ return -EIO;
+
+ return -EAGAIN;
+ }
+
return 0;
}
@@ -465,8 +514,9 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
int ret = 0;
/* Check first because poking a wedged chip is bad. */
- if (dev_priv->mm.wedged)
- return (EIO);
+ ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
+ if (ret)
+ return (ret);
ret = i915_gem_check_olr(ring, seqno);
if (ret)
diff --git a/sys/dev/pci/drm/i915_irq.c b/sys/dev/pci/drm/i915_irq.c
index 8f4a86438fb..b670483851f 100644
--- a/sys/dev/pci/drm/i915_irq.c
+++ b/sys/dev/pci/drm/i915_irq.c
@@ -943,7 +943,10 @@ i915_error_work_func(void *arg1, void *arg2)
atomic_set(&dev_priv->mm.wedged, 0);
// kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event);
}
-// complete_all(&dev_priv->error_completion);
+ mtx_enter(&dev_priv->error_completion_lock);
+ dev_priv->error_completion++;
+ wakeup(&dev_priv->error_completion);
+ mtx_leave(&dev_priv->error_completion_lock);
}
}