diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2012-07-19 16:58:34 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2012-07-19 17:55:00 +0100 |
commit | 6f60f89588caa70e7d8ed53ba453bbe8c2094a95 (patch) | |
tree | c6dbfc88a8f2920e412d9cbd81fc68a24a7b84e8 | |
parent | fc39d4b5cb105d269c5349e479daf112f5d93580 (diff) |
sna/gen6: Bump the WM thread count to 80
Note that we should only do this when "WiZ Hashing" is disabled. So we
should be checking the GT_MODE register (bring on i915_read!) to be sure
that is safe to do so. However, it gives a big boost to performance of
render copies... It also causes perf benchmarks to hit thermal limits
much quicker.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | src/sna/gen6_render.c | 33 | ||||
-rw-r--r-- | src/sna/gen7_render.c | 2 | ||||
-rw-r--r-- | src/sna/sna_render.h | 1 |
3 files changed, 33 insertions, 3 deletions
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c index 20a09d00..044a9f99 100644 --- a/src/sna/gen6_render.c +++ b/src/sna/gen6_render.c @@ -56,6 +56,31 @@ #define GEN6_MAX_SIZE 8192 +struct gt_info { + int max_vs_threads; + int max_gs_threads; + int max_wm_threads; + struct { + int size; + int max_vs_entries; + int max_gs_entries; + } urb; +}; + +static const struct gt_info gt1_info = { + .max_vs_threads = 24, + .max_gs_threads = 21, + .max_wm_threads = 40, + .urb = { 32, 256, 256 }, +}; + +static const struct gt_info gt2_info = { + .max_vs_threads = 60, + .max_gs_threads = 60, + .max_wm_threads = 80, + .urb = { 64, 256, 256 }, +}; + static const uint32_t ps_kernel_nomask_affine[][4] = { #include "exa_wm_src_affine.g6b" #include "exa_wm_src_sample_argb.g6b" @@ -422,7 +447,7 @@ gen6_emit_urb(struct sna *sna) { OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2)); OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | - (256 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ + (sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */ } @@ -665,7 +690,7 @@ gen6_emit_wm(struct sna *sna, unsigned int kernel) wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT); OUT_BATCH(0); OUT_BATCH(6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT); /* DW4 */ - OUT_BATCH((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT | + OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT | GEN6_3DSTATE_WM_DISPATCH_ENABLE | GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); OUT_BATCH(wm_kernels[kernel].num_inputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT | @@ -4198,6 +4223,10 @@ static bool gen6_render_setup(struct sna *sna) struct gen6_sampler_state *ss; int i, j, k, l, m; + state->info = >1_info; + if (DEVICE_ID(sna->PciInfo) & 0x20) + state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */ + sna_static_stream_init(&general); /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index 18ba826d..c041d666 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -4268,7 +4268,7 @@ static bool gen7_render_setup(struct sna *sna) state->info = >1_info; if (DEVICE_ID(sna->PciInfo) & 0x20) - state->info = >2_info; + state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */ sna_static_stream_init(&general); diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h index f0f4a2d1..5662a791 100644 --- a/src/sna/sna_render.h +++ b/src/sna/sna_render.h @@ -388,6 +388,7 @@ enum { }; struct gen6_render_state { + const struct gt_info *info; struct kgem_bo *general_bo; uint32_t vs_state; |