sna/gen6: Bump the WM thread count to 80

Note that we should only do this when "WiZ Hashing" is disabled. So we should be checking the GT_MODE register (bring on i915_read!) to be sure that is safe to do so. However, it gives a big boost to performance of render copies... It also causes perf benchmarks to hit thermal limits much quicker. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
author: Chris Wilson <chris@chris-wilson.co.uk> 2012-07-19 16:58:34 +0100
committer: Chris Wilson <chris@chris-wilson.co.uk> 2012-07-19 17:55:00 +0100
commit: 6f60f89588caa70e7d8ed53ba453bbe8c2094a95 (patch)
tree: c6dbfc88a8f2920e412d9cbd81fc68a24a7b84e8 /src
parent: fc39d4b5cb105d269c5349e479daf112f5d93580 (diff)
3 files changed, 33 insertions, 3 deletions
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 20a09d00..044a9f99 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -56,6 +56,31 @@
 
 #define GEN6_MAX_SIZE 8192
 
+struct gt_info {
+	int max_vs_threads;
+	int max_gs_threads;
+	int max_wm_threads;
+	struct {
+		int size;
+		int max_vs_entries;
+		int max_gs_entries;
+	} urb;
+};
+
+static const struct gt_info gt1_info = {
+	.max_vs_threads = 24,
+	.max_gs_threads = 21,
+	.max_wm_threads = 40,
+	.urb = { 32, 256, 256 },
+};
+
+static const struct gt_info gt2_info = {
+	.max_vs_threads = 60,
+	.max_gs_threads = 60,
+	.max_wm_threads = 80,
+	.urb = { 64, 256, 256 },
+};
+
 static const uint32_t ps_kernel_nomask_affine[][4] = {
 #include "exa_wm_src_affine.g6b"
 #include "exa_wm_src_sample_argb.g6b"
@@ -422,7 +447,7 @@ gen6_emit_urb(struct sna *sna)
 {
 	OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
 	OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
-		  (256 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
+		  (sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
 	OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
 		  (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
 }
@@ -665,7 +690,7 @@ gen6_emit_wm(struct sna *sna, unsigned int kernel)
 		  wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
 	OUT_BATCH(0);
 	OUT_BATCH(6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT); /* DW4 */
-	OUT_BATCH((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT |
+	OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT |
 		  GEN6_3DSTATE_WM_DISPATCH_ENABLE |
 		  GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
 	OUT_BATCH(wm_kernels[kernel].num_inputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT |
@@ -4198,6 +4223,10 @@ static bool gen6_render_setup(struct sna *sna)
 	struct gen6_sampler_state *ss;
 	int i, j, k, l, m;
 
+	state->info = &gt1_info;
+	if (DEVICE_ID(sna->PciInfo) & 0x20)
+		state->info = &gt2_info; /* XXX requires GT_MODE WiZ disabled */
+
 	sna_static_stream_init(&general);
 
 	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 18ba826d..c041d666 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -4268,7 +4268,7 @@ static bool gen7_render_setup(struct sna *sna)
 
 	state->info = &gt1_info;
 	if (DEVICE_ID(sna->PciInfo) & 0x20)
-		state->info = &gt2_info;
+		state->info = &gt2_info; /* XXX requires GT_MODE WiZ disabled */
 
 	sna_static_stream_init(&general);
 
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index f0f4a2d1..5662a791 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -388,6 +388,7 @@ enum {
 };
 
 struct gen6_render_state {
+	const struct gt_info *info;
 	struct kgem_bo *general_bo;
 
 	uint32_t vs_state;
author	Chris Wilson <chris@chris-wilson.co.uk>	2012-07-19 16:58:34 +0100
committer	Chris Wilson <chris@chris-wilson.co.uk>	2012-07-19 17:55:00 +0100
commit	6f60f89588caa70e7d8ed53ba453bbe8c2094a95 (patch)
tree	c6dbfc88a8f2920e412d9cbd81fc68a24a7b84e8 /src
parent	fc39d4b5cb105d269c5349e479daf112f5d93580 (diff)