diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2012-07-31 10:20:51 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2012-07-31 19:11:54 +0100 |
commit | fd3a1236051265fab700aad689a171de47d7338f (patch) | |
tree | 6641adf9c792b6084d1f7a2f26dedff2e77b8e0e | |
parent | 8922b804bc9ed27957c81f7cda4812ab4ecbd4de (diff) |
sna/gen6: Enable 8 pixel dispatch
This gives a small performance increase when operating with rectangles,
which is reasonably frequent.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | src/sna/gen6_render.c | 60 | ||||
-rw-r--r-- | src/sna/gen6_render.h | 5 | ||||
-rw-r--r-- | src/sna/gen7_render.c | 2 | ||||
-rw-r--r-- | src/sna/sna_render.h | 2 |
4 files changed, 50 insertions, 19 deletions
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c index e3a103cb..78baa3ac 100644 --- a/src/sna/gen6_render.c +++ b/src/sna/gen6_render.c @@ -55,6 +55,14 @@ #define NO_RING_SWITCH 0 #define PREFER_RENDER 0 +#define USE_8_PIXEL_DISPATCH 1 +#define USE_16_PIXEL_DISPATCH 1 +#define USE_32_PIXEL_DISPATCH 0 + +#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH +#error "Must select at least 8, 16 or 32 pixel dispatch" +#endif + #define GEN6_MAX_SIZE 8192 struct gt_info { @@ -612,29 +620,36 @@ gen6_emit_sf(struct sna *sna, bool has_mask) static void gen6_emit_wm(struct sna *sna, unsigned int kernel) { + const uint32_t *kernels; + if (sna->render_state.gen6.kernel == kernel) return; sna->render_state.gen6.kernel = kernel; + kernels = sna->render_state.gen6.wm_kernel[kernel]; - DBG(("%s: switching to %s, num_surfaces=%d\n", + DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n", __FUNCTION__, - wm_kernels[kernel].name, - wm_kernels[kernel].num_surfaces)); + wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces, + kernels[0], kernels[1], kernels[2])); OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); - OUT_BATCH(sna->render_state.gen6.wm_kernel[kernel]); + OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]); OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT | wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT); - OUT_BATCH(0); - OUT_BATCH(6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT); /* DW4 */ + OUT_BATCH(0); /* scratch space */ + OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT | + 8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT | + 6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT); OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT | - GEN6_3DSTATE_WM_DISPATCH_ENABLE | - GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); + (kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) | + (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) | + (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) | + GEN6_3DSTATE_WM_DISPATCH_ENABLE); OUT_BATCH(wm_kernels[kernel].num_inputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT | GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); - OUT_BATCH(0); - OUT_BATCH(0); + OUT_BATCH(kernels[2]); + OUT_BATCH(kernels[1]); } static bool @@ -4156,18 +4171,31 @@ static bool gen6_render_setup(struct sna *sna) for (m = 0; m < GEN6_KERNEL_COUNT; m++) { if (wm_kernels[m].size) { - state->wm_kernel[m] = + state->wm_kernel[m][1] = sna_static_stream_add(&general, wm_kernels[m].data, wm_kernels[m].size, 64); } else { - state->wm_kernel[m] = - sna_static_stream_compile_wm(sna, &general, - wm_kernels[m].data, - 16); + if (USE_8_PIXEL_DISPATCH) { + state->wm_kernel[m][0] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 8); + } + + if (USE_16_PIXEL_DISPATCH) { + state->wm_kernel[m][1] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 16); + } + + if (USE_32_PIXEL_DISPATCH) { + state->wm_kernel[m][2] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 32); + } } - assert(state->wm_kernel[m]); + assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]); } ss = sna_static_stream_map(&general, diff --git a/src/sna/gen6_render.h b/src/sna/gen6_render.h index 9cc8e14e..2201a627 100644 --- a/src/sna/gen6_render.h +++ b/src/sna/gen6_render.h @@ -97,10 +97,13 @@ # define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT 27 # define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 /* DW4 */ -# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16 +# define GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT 16 +# define GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT 8 +# define GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT 0 /* DW5 */ # define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25 # define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_3DSTATE_WM_32_DISPATCH_ENABLE (1 << 2) # define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1) # define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0) /* DW6 */ diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index 6381ccfb..8c64016d 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -4277,7 +4277,7 @@ static bool gen7_render_setup(struct sna *sna) wm_kernels[m].data, 32); } } - assert(state->wm_kernel[m][1]); + assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]); } ss = sna_static_stream_map(&general, diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h index 943c248b..142f2224 100644 --- a/src/sna/sna_render.h +++ b/src/sna/sna_render.h @@ -396,7 +396,7 @@ struct gen6_render_state { uint32_t sf_state; uint32_t sf_mask_state; uint32_t wm_state; - uint32_t wm_kernel[GEN6_KERNEL_COUNT]; + uint32_t wm_kernel[GEN6_KERNEL_COUNT][3]; uint32_t cc_vp; uint32_t cc_blend; |