summaryrefslogtreecommitdiff
path: root/sys/dev/pci/drm/i915
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2020-06-26 05:38:57 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2020-06-26 05:38:57 +0000
commite67935e29b79ee1e49f150a15d797f6fd8343c1c (patch)
tree4b421019b453413de82feccda659a08a5052aefe /sys/dev/pci/drm/i915
parent3f3288cd331ca451024b1cd5838afce6064d2c13 (diff)
drm/i915/gt: Move hsw GT workarounds from init_clock_gating to workarounds
From Chris Wilson 24fa6c758204c83904881e78658f5cff7980938c in linux 5.7.y/5.7.6 ef50fa9bd17d13d0611e39e13b37bbd3e1ea50bf in mainline linux
Diffstat (limited to 'sys/dev/pci/drm/i915')
-rw-r--r--sys/dev/pci/drm/i915/gt/intel_workarounds.c48
-rw-r--r--sys/dev/pci/drm/i915/intel_pm.c39
2 files changed, 50 insertions, 37 deletions
diff --git a/sys/dev/pci/drm/i915/gt/intel_workarounds.c b/sys/dev/pci/drm/i915/gt/intel_workarounds.c
index 1d30fb21a02..b5b8b73ec32 100644
--- a/sys/dev/pci/drm/i915/gt/intel_workarounds.c
+++ b/sys/dev/pci/drm/i915/gt/intel_workarounds.c
@@ -179,6 +179,12 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
}
static void
+wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
+{
+ wa_write_masked_or(wal, reg, clr, 0);
+}
+
+static void
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val);
@@ -698,6 +704,46 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
}
static void
+hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ /* L3 caching of data atomics doesn't work -- disable it. */
+ wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
+
+ wa_add(wal,
+ HSW_ROW_CHICKEN3, 0,
+ _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
+ 0 /* XXX does this reg exist? */);
+
+ /* WaVSRefCountFullforceMissDisable:hsw */
+ wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
+
+ wa_masked_dis(wal,
+ CACHE_MODE_0_GEN7,
+ /* WaDisable_RenderCache_OperationalFlush:hsw */
+ RC_OP_FLUSH_ENABLE |
+ /* enable HiZ Raw Stall Optimization */
+ HIZ_RAW_STALL_OPT_DISABLE);
+
+ /* WaDisable4x2SubspanOptimization:hsw */
+ wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ wa_add(wal, GEN7_GT_MODE, 0,
+ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
+ GEN6_WIZ_HASHING_16x4);
+
+ /* WaSampleCChickenBitEnable:hsw */
+ wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
+}
+
+static void
gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
/* WaDisableKillLogic:bxt,skl,kbl */
@@ -974,6 +1020,8 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
bxt_gt_workarounds_init(i915, wal);
else if (IS_SKYLAKE(i915))
skl_gt_workarounds_init(i915, wal);
+ else if (IS_HASWELL(i915))
+ hsw_gt_workarounds_init(i915, wal);
else if (INTEL_GEN(i915) <= 8)
return;
else
diff --git a/sys/dev/pci/drm/i915/intel_pm.c b/sys/dev/pci/drm/i915/intel_pm.c
index 3132715bab1..a8793c728a0 100644
--- a/sys/dev/pci/drm/i915/intel_pm.c
+++ b/sys/dev/pci/drm/i915/intel_pm.c
@@ -6994,45 +6994,10 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
{
- /* L3 caching of data atomics doesn't work -- disable it. */
- I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
- I915_WRITE(HSW_ROW_CHICKEN3,
- _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
-
/* This is required by WaCatErrorRejectionIssue:hsw */
I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
- I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
- GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
-
- /* WaVSRefCountFullforceMissDisable:hsw */
- I915_WRITE(GEN7_FF_THREAD_MODE,
- I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
-
- /* WaDisable_RenderCache_OperationalFlush:hsw */
- I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
-
- /* enable HiZ Raw Stall Optimization */
- I915_WRITE(CACHE_MODE_0_GEN7,
- _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
-
- /* WaDisable4x2SubspanOptimization:hsw */
- I915_WRITE(CACHE_MODE_1,
- _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
-
- /*
- * BSpec recommends 8x4 when MSAA is used,
- * however in practice 16x4 seems fastest.
- *
- * Note that PS/WM thread counts depend on the WIZ hashing
- * disable bit, which we don't touch here, but it's good
- * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
- */
- I915_WRITE(GEN7_GT_MODE,
- _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
-
- /* WaSampleCChickenBitEnable:hsw */
- I915_WRITE(HALF_SLICE_CHICKEN3,
- _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
+ I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
+ GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
/* WaSwitchSolVfFArbitrationPriority:hsw */
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);