summaryrefslogtreecommitdiff
path: root/sys/dev/pci
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2022-01-29 12:37:10 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2022-01-29 12:37:10 +0000
commit8b38623022f047e7a4d07eb514da91747f63a638 (patch)
tree03105ce76e15b92a3861d0038129684148d24ce4 /sys/dev/pci
parentf2d367c551ae4a32b81dbf3bb4589b6050d16c99 (diff)
drm/i915: Flush TLBs before releasing backing store
From Tvrtko Ursulin 8a17a077e7e9ecce25c95dbdb27843d2d6c2f0f7 in linux 5.15.y/5.15.18 7938d61591d33394a21bdd7797a245b65428f44c in mainline linux
Diffstat (limited to 'sys/dev/pci')
-rw-r--r--sys/dev/pci/drm/i915/gem/i915_gem_object_types.h1
-rw-r--r--sys/dev/pci/drm/i915/gem/i915_gem_pages.c10
-rw-r--r--sys/dev/pci/drm/i915/gt/intel_gt.c102
-rw-r--r--sys/dev/pci/drm/i915/gt/intel_gt.h2
-rw-r--r--sys/dev/pci/drm/i915/gt/intel_gt_types.h2
-rw-r--r--sys/dev/pci/drm/i915/i915_reg.h11
-rw-r--r--sys/dev/pci/drm/i915/i915_vma.c3
-rw-r--r--sys/dev/pci/drm/i915/intel_uncore.c26
-rw-r--r--sys/dev/pci/drm/i915/intel_uncore.h2
9 files changed, 155 insertions, 4 deletions
diff --git a/sys/dev/pci/drm/i915/gem/i915_gem_object_types.h b/sys/dev/pci/drm/i915/gem/i915_gem_object_types.h
index 913b6db8e90..182e4fa258c 100644
--- a/sys/dev/pci/drm/i915/gem/i915_gem_object_types.h
+++ b/sys/dev/pci/drm/i915/gem/i915_gem_object_types.h
@@ -298,6 +298,7 @@ struct drm_i915_gem_object {
I915_BO_ALLOC_USER)
#define I915_BO_READONLY BIT(4)
#define I915_TILING_QUIRK_BIT 5 /* unknown swizzling; do not release! */
+#define I915_BO_WAS_BOUND_BIT 6
/**
* @mem_flags - Mutable placement-related flags
diff --git a/sys/dev/pci/drm/i915/gem/i915_gem_pages.c b/sys/dev/pci/drm/i915/gem/i915_gem_pages.c
index 1a10d9a7acb..c3eda20c9b5 100644
--- a/sys/dev/pci/drm/i915/gem/i915_gem_pages.c
+++ b/sys/dev/pci/drm/i915/gem/i915_gem_pages.c
@@ -10,6 +10,8 @@
#include "i915_gem_lmem.h"
#include "i915_gem_mman.h"
+#include "gt/intel_gt.h"
+
void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages,
unsigned int sg_page_sizes)
@@ -218,6 +220,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
__i915_gem_object_reset_page_iter(obj);
obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+ if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ intel_wakeref_t wakeref;
+
+ with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
+ intel_gt_invalidate_tlbs(&i915->gt);
+ }
+
return pages;
}
diff --git a/sys/dev/pci/drm/i915/gt/intel_gt.c b/sys/dev/pci/drm/i915/gt/intel_gt.c
index dd9e779cf66..6411b019c55 100644
--- a/sys/dev/pci/drm/i915/gt/intel_gt.c
+++ b/sys/dev/pci/drm/i915/gt/intel_gt.c
@@ -29,6 +29,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
mtx_init(&gt->irq_lock, IPL_TTY);
+ rw_init(&gt->tlb_invalidate_lock, "itlbinv");
+
INIT_LIST_HEAD(&gt->closed_vma);
mtx_init(&gt->closed_lock, IPL_TTY);
@@ -899,3 +901,103 @@ void intel_gt_info_print(const struct intel_gt_info *info,
intel_sseu_dump(&info->sseu, p);
}
+
+struct reg_and_bit {
+ i915_reg_t reg;
+ u32 bit;
+};
+
+static struct reg_and_bit
+get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
+ const i915_reg_t *regs, const unsigned int num)
+{
+ const unsigned int class = engine->class;
+ struct reg_and_bit rb = { };
+
+ if (drm_WARN_ON_ONCE(&engine->i915->drm,
+ class >= num || !regs[class].reg))
+ return rb;
+
+ rb.reg = regs[class];
+ if (gen8 && class == VIDEO_DECODE_CLASS)
+ rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
+ else
+ rb.bit = engine->instance;
+
+ rb.bit = BIT(rb.bit);
+
+ return rb;
+}
+
+void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+{
+ static const i915_reg_t gen8_regs[] = {
+ [RENDER_CLASS] = GEN8_RTCR,
+ [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */
+ [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR,
+ [COPY_ENGINE_CLASS] = GEN8_BTCR,
+ };
+ static const i915_reg_t gen12_regs[] = {
+ [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR,
+ [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR,
+ [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR,
+ [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
+ };
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ const i915_reg_t *regs;
+ unsigned int num = 0;
+
+ if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+ return;
+
+ if (GRAPHICS_VER(i915) == 12) {
+ regs = gen12_regs;
+ num = ARRAY_SIZE(gen12_regs);
+ } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
+ regs = gen8_regs;
+ num = ARRAY_SIZE(gen8_regs);
+ } else if (GRAPHICS_VER(i915) < 8) {
+ return;
+ }
+
+ if (drm_WARN_ONCE(&i915->drm, !num,
+ "Platform does not implement TLB invalidation!"))
+ return;
+
+ GEM_TRACE("\n");
+
+ assert_rpm_wakelock_held(&i915->runtime_pm);
+
+ mutex_lock(&gt->tlb_invalidate_lock);
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ for_each_engine(engine, gt, id) {
+ /*
+ * HW architecture suggest typical invalidation time at 40us,
+ * with pessimistic cases up to 100us and a recommendation to
+ * cap at 1ms. We go a bit higher just in case.
+ */
+ const unsigned int timeout_us = 100;
+ const unsigned int timeout_ms = 4;
+ struct reg_and_bit rb;
+
+ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+ if (!i915_mmio_reg_offset(rb.reg))
+ continue;
+
+ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ if (__intel_wait_for_register_fw(uncore,
+ rb.reg, rb.bit, 0,
+ timeout_us, timeout_ms,
+ NULL))
+ drm_err_ratelimited(&gt->i915->drm,
+ "%s TLB invalidation did not complete in %ums!\n",
+ engine->name, timeout_ms);
+ }
+
+ intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
+ mutex_unlock(&gt->tlb_invalidate_lock);
+}
diff --git a/sys/dev/pci/drm/i915/gt/intel_gt.h b/sys/dev/pci/drm/i915/gt/intel_gt.h
index 74e771871a9..c0169d6017c 100644
--- a/sys/dev/pci/drm/i915/gt/intel_gt.h
+++ b/sys/dev/pci/drm/i915/gt/intel_gt.h
@@ -90,4 +90,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,
void intel_gt_watchdog_work(struct work_struct *work);
+void intel_gt_invalidate_tlbs(struct intel_gt *gt);
+
#endif /* __INTEL_GT_H__ */
diff --git a/sys/dev/pci/drm/i915/gt/intel_gt_types.h b/sys/dev/pci/drm/i915/gt/intel_gt_types.h
index a81e21bf1bd..02c241bdaeb 100644
--- a/sys/dev/pci/drm/i915/gt/intel_gt_types.h
+++ b/sys/dev/pci/drm/i915/gt/intel_gt_types.h
@@ -72,6 +72,8 @@ struct intel_gt {
struct intel_uc uc;
+ struct rwlock tlb_invalidate_lock;
+
struct intel_gt_timelines {
spinlock_t lock; /* protects active_list */
struct list_head active_list;
diff --git a/sys/dev/pci/drm/i915/i915_reg.h b/sys/dev/pci/drm/i915/i915_reg.h
index 9023d4ecf3b..c65473fc909 100644
--- a/sys/dev/pci/drm/i915/i915_reg.h
+++ b/sys/dev/pci/drm/i915/i915_reg.h
@@ -2669,6 +2669,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28)
#define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24)
+#define GEN8_RTCR _MMIO(0x4260)
+#define GEN8_M1TCR _MMIO(0x4264)
+#define GEN8_M2TCR _MMIO(0x4268)
+#define GEN8_BTCR _MMIO(0x426c)
+#define GEN8_VTCR _MMIO(0x4270)
+
#if 0
#define PRB0_TAIL _MMIO(0x2030)
#define PRB0_HEAD _MMIO(0x2034)
@@ -2763,6 +2769,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define FAULT_VA_HIGH_BITS (0xf << 0)
#define FAULT_GTT_SEL (1 << 4)
+#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8)
+#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc)
+#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0)
+#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4)
+
#define GEN12_AUX_ERR_DBG _MMIO(0x43f4)
#define FPGA_DBG _MMIO(0x42300)
diff --git a/sys/dev/pci/drm/i915/i915_vma.c b/sys/dev/pci/drm/i915/i915_vma.c
index 0d457a60f08..8a20291f385 100644
--- a/sys/dev/pci/drm/i915/i915_vma.c
+++ b/sys/dev/pci/drm/i915/i915_vma.c
@@ -446,6 +446,9 @@ int i915_vma_bind(struct i915_vma *vma,
vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
}
+ if (vma->obj)
+ set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
+
atomic_or(bind_flags, &vma->flags);
return 0;
}
diff --git a/sys/dev/pci/drm/i915/intel_uncore.c b/sys/dev/pci/drm/i915/intel_uncore.c
index 4e83bd99781..f3690ab49b5 100644
--- a/sys/dev/pci/drm/i915/intel_uncore.c
+++ b/sys/dev/pci/drm/i915/intel_uncore.c
@@ -751,7 +751,8 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore,
}
static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
- enum forcewake_domains fw_domains)
+ enum forcewake_domains fw_domains,
+ bool delayed)
{
struct intel_uncore_forcewake_domain *domain;
unsigned int tmp;
@@ -766,7 +767,11 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
continue;
}
- uncore->funcs.force_wake_put(uncore, domain->mask);
+ if (delayed &&
+ !(domain->uncore->fw_domains_timer & domain->mask))
+ fw_domain_arm_timer(domain);
+ else
+ uncore->funcs.force_wake_put(uncore, domain->mask);
}
}
@@ -787,7 +792,20 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore,
return;
spin_lock_irqsave(&uncore->lock, irqflags);
- __intel_uncore_forcewake_put(uncore, fw_domains);
+ __intel_uncore_forcewake_put(uncore, fw_domains, false);
+ spin_unlock_irqrestore(&uncore->lock, irqflags);
+}
+
+void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
+ enum forcewake_domains fw_domains)
+{
+ unsigned long irqflags;
+
+ if (!uncore->funcs.force_wake_put)
+ return;
+
+ spin_lock_irqsave(&uncore->lock, irqflags);
+ __intel_uncore_forcewake_put(uncore, fw_domains, true);
spin_unlock_irqrestore(&uncore->lock, irqflags);
}
@@ -829,7 +847,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore *uncore,
if (!uncore->funcs.force_wake_put)
return;
- __intel_uncore_forcewake_put(uncore, fw_domains);
+ __intel_uncore_forcewake_put(uncore, fw_domains, false);
}
void assert_forcewakes_inactive(struct intel_uncore *uncore)
diff --git a/sys/dev/pci/drm/i915/intel_uncore.h b/sys/dev/pci/drm/i915/intel_uncore.h
index f4a18ac14c1..9e46fff2a5c 100644
--- a/sys/dev/pci/drm/i915/intel_uncore.h
+++ b/sys/dev/pci/drm/i915/intel_uncore.h
@@ -229,6 +229,8 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore,
enum forcewake_domains domains);
void intel_uncore_forcewake_put(struct intel_uncore *uncore,
enum forcewake_domains domains);
+void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
+ enum forcewake_domains domains);
void intel_uncore_forcewake_flush(struct intel_uncore *uncore,
enum forcewake_domains fw_domains);