summaryrefslogtreecommitdiff
path: root/sys/dev/pci
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2023-02-10 14:32:04 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2023-02-10 14:32:04 +0000
commit629bddf921aeae634900f530b96c09d87fe0141e (patch)
treedc06ee1a3830678c524dadf19297b1b6199e69c9 /sys/dev/pci
parent6d44def05d51916ed8098d384eea652322c14d2d (diff)
drm/i915: Fix up locking around dumping requests lists
From John Harrison 04dcff26490cc8dedbfcf44cfb3e3e7a08622fd0 in linux-6.1.y/6.1.11 5bc4b43d5c6c9692ddc7b96116650cdf9406f3da in mainline linux
Diffstat (limited to 'sys/dev/pci')
-rw-r--r--sys/dev/pci/drm/i915/gt/intel_engine.h4
-rw-r--r--sys/dev/pci/drm/i915/gt/intel_engine_cs.c74
-rw-r--r--sys/dev/pci/drm/i915/gt/intel_execlists_submission.c27
-rw-r--r--sys/dev/pci/drm/i915/gt/intel_execlists_submission.h4
-rw-r--r--sys/dev/pci/drm/i915/i915_gpu_error.c26
5 files changed, 73 insertions, 62 deletions
diff --git a/sys/dev/pci/drm/i915/gt/intel_engine.h b/sys/dev/pci/drm/i915/gt/intel_engine.h
index cbc8b857d5f..7a4504ea35c 100644
--- a/sys/dev/pci/drm/i915/gt/intel_engine.h
+++ b/sys/dev/pci/drm/i915/gt/intel_engine.h
@@ -248,8 +248,8 @@ void intel_engine_dump_active_requests(struct list_head *requests,
ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine,
ktime_t *now);
-struct i915_request *
-intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine);
+void intel_engine_get_hung_entity(struct intel_engine_cs *engine,
+ struct intel_context **ce, struct i915_request **rq);
u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
struct intel_context *
diff --git a/sys/dev/pci/drm/i915/gt/intel_engine_cs.c b/sys/dev/pci/drm/i915/gt/intel_engine_cs.c
index 5ca6a8214fd..c8ae8682156 100644
--- a/sys/dev/pci/drm/i915/gt/intel_engine_cs.c
+++ b/sys/dev/pci/drm/i915/gt/intel_engine_cs.c
@@ -2085,17 +2085,6 @@ static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
}
}
-static unsigned long list_count(struct list_head *list)
-{
- struct list_head *pos;
- unsigned long count = 0;
-
- list_for_each(pos, list)
- count++;
-
- return count;
-}
-
static unsigned long read_ul(void *p, size_t x)
{
return *(unsigned long *)(p + x);
@@ -2187,11 +2176,11 @@ void intel_engine_dump_active_requests(struct list_head *requests,
}
}
-static void engine_dump_active_requests(struct intel_engine_cs *engine, struct drm_printer *m)
+static void engine_dump_active_requests(struct intel_engine_cs *engine,
+ struct drm_printer *m)
{
+ struct intel_context *hung_ce = NULL;
struct i915_request *hung_rq = NULL;
- struct intel_context *ce;
- bool guc;
/*
* No need for an engine->irq_seqno_barrier() before the seqno reads.
@@ -2200,29 +2189,20 @@ static void engine_dump_active_requests(struct intel_engine_cs *engine, struct d
* But the intention here is just to report an instantaneous snapshot
* so that's fine.
*/
- lockdep_assert_held(&engine->sched_engine->lock);
+ intel_engine_get_hung_entity(engine, &hung_ce, &hung_rq);
drm_printf(m, "\tRequests:\n");
- guc = intel_uc_uses_guc_submission(&engine->gt->uc);
- if (guc) {
- ce = intel_engine_get_hung_context(engine);
- if (ce)
- hung_rq = intel_context_get_active_request(ce);
- } else {
- hung_rq = intel_engine_execlist_find_hung_request(engine);
- if (hung_rq)
- hung_rq = i915_request_get_rcu(hung_rq);
- }
-
if (hung_rq)
engine_dump_request(hung_rq, m, "\t\thung");
+ else if (hung_ce)
+ drm_printf(m, "\t\tGot hung ce but no hung rq!\n");
- if (guc)
+ if (intel_uc_uses_guc_submission(&engine->gt->uc))
intel_guc_dump_active_requests(engine, hung_rq, m);
else
- intel_engine_dump_active_requests(&engine->sched_engine->requests,
- hung_rq, m);
+ intel_execlists_dump_active_requests(engine, hung_rq, m);
+
if (hung_rq)
i915_request_put(hung_rq);
}
@@ -2234,7 +2214,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
struct i915_gpu_error * const error = &engine->i915->gpu_error;
struct i915_request *rq;
intel_wakeref_t wakeref;
- unsigned long flags;
ktime_t dummy;
if (header) {
@@ -2271,13 +2250,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
i915_reset_count(error));
print_properties(engine, m);
- spin_lock_irqsave(&engine->sched_engine->lock, flags);
engine_dump_active_requests(engine, m);
- drm_printf(m, "\tOn hold?: %lu\n",
- list_count(&engine->sched_engine->hold));
- spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
-
drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base);
wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
if (wakeref) {
@@ -2323,8 +2297,7 @@ intel_engine_create_virtual(struct intel_engine_cs **siblings,
return siblings[0]->cops->create_virtual(siblings, count, flags);
}
-struct i915_request *
-intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine)
+static struct i915_request *engine_execlist_find_hung_request(struct intel_engine_cs *engine)
{
struct i915_request *request, *active = NULL;
@@ -2376,6 +2349,33 @@ intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine)
return active;
}
+void intel_engine_get_hung_entity(struct intel_engine_cs *engine,
+ struct intel_context **ce, struct i915_request **rq)
+{
+ unsigned long flags;
+
+ *ce = intel_engine_get_hung_context(engine);
+ if (*ce) {
+ intel_engine_clear_hung_context(engine);
+
+ *rq = intel_context_get_active_request(*ce);
+ return;
+ }
+
+ /*
+ * Getting here with GuC enabled means it is a forced error capture
+ * with no actual hang. So, no need to attempt the execlist search.
+ */
+ if (intel_uc_uses_guc_submission(&engine->gt->uc))
+ return;
+
+ spin_lock_irqsave(&engine->sched_engine->lock, flags);
+ *rq = engine_execlist_find_hung_request(engine);
+ if (*rq)
+ *rq = i915_request_get_rcu(*rq);
+ spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
+}
+
void xehp_enable_ccs_engines(struct intel_engine_cs *engine)
{
/*
diff --git a/sys/dev/pci/drm/i915/gt/intel_execlists_submission.c b/sys/dev/pci/drm/i915/gt/intel_execlists_submission.c
index 18116f1c7c9..682f9a6a6b1 100644
--- a/sys/dev/pci/drm/i915/gt/intel_execlists_submission.c
+++ b/sys/dev/pci/drm/i915/gt/intel_execlists_submission.c
@@ -4156,6 +4156,33 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
+static unsigned long list_count(struct list_head *list)
+{
+ struct list_head *pos;
+ unsigned long count = 0;
+
+ list_for_each(pos, list)
+ count++;
+
+ return count;
+}
+
+void intel_execlists_dump_active_requests(struct intel_engine_cs *engine,
+ struct i915_request *hung_rq,
+ struct drm_printer *m)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&engine->sched_engine->lock, flags);
+
+ intel_engine_dump_active_requests(&engine->sched_engine->requests, hung_rq, m);
+
+ drm_printf(m, "\tOn hold?: %lu\n",
+ list_count(&engine->sched_engine->hold));
+
+ spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_execlists.c"
#endif
diff --git a/sys/dev/pci/drm/i915/gt/intel_execlists_submission.h b/sys/dev/pci/drm/i915/gt/intel_execlists_submission.h
index a1aa92c983a..d2c7d45ea06 100644
--- a/sys/dev/pci/drm/i915/gt/intel_execlists_submission.h
+++ b/sys/dev/pci/drm/i915/gt/intel_execlists_submission.h
@@ -32,6 +32,10 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
int indent),
unsigned int max);
+void intel_execlists_dump_active_requests(struct intel_engine_cs *engine,
+ struct i915_request *hung_rq,
+ struct drm_printer *m);
+
bool
intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
diff --git a/sys/dev/pci/drm/i915/i915_gpu_error.c b/sys/dev/pci/drm/i915/i915_gpu_error.c
index 0dbc91c37ef..b11a4dfea5b 100644
--- a/sys/dev/pci/drm/i915/i915_gpu_error.c
+++ b/sys/dev/pci/drm/i915/i915_gpu_error.c
@@ -1643,35 +1643,15 @@ capture_engine(struct intel_engine_cs *engine,
{
struct intel_engine_capture_vma *capture = NULL;
struct intel_engine_coredump *ee;
- struct intel_context *ce;
+ struct intel_context *ce = NULL;
struct i915_request *rq = NULL;
- unsigned long flags;
ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL, dump_flags);
if (!ee)
return NULL;
- ce = intel_engine_get_hung_context(engine);
- if (ce) {
- intel_engine_clear_hung_context(engine);
- rq = intel_context_get_active_request(ce);
- if (!rq || !i915_request_started(rq))
- goto no_request_capture;
- } else {
- /*
- * Getting here with GuC enabled means it is a forced error capture
- * with no actual hang. So, no need to attempt the execlist search.
- */
- if (!intel_uc_uses_guc_submission(&engine->gt->uc)) {
- spin_lock_irqsave(&engine->sched_engine->lock, flags);
- rq = intel_engine_execlist_find_hung_request(engine);
- if (rq)
- rq = i915_request_get_rcu(rq);
- spin_unlock_irqrestore(&engine->sched_engine->lock,
- flags);
- }
- }
- if (!rq)
+ intel_engine_get_hung_entity(engine, &ce, &rq);
+ if (!rq || !i915_request_started(rq))
goto no_request_capture;
capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);