summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-07-31 10:20:51 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2012-07-31 19:11:54 +0100
commitfd3a1236051265fab700aad689a171de47d7338f (patch)
tree6641adf9c792b6084d1f7a2f26dedff2e77b8e0e
parent8922b804bc9ed27957c81f7cda4812ab4ecbd4de (diff)
sna/gen6: Enable 8 pixel dispatch
This gives a small performance increase when operating with rectangles, which is reasonably frequent. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/sna/gen6_render.c60
-rw-r--r--src/sna/gen6_render.h5
-rw-r--r--src/sna/gen7_render.c2
-rw-r--r--src/sna/sna_render.h2
4 files changed, 50 insertions, 19 deletions
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index e3a103cb..78baa3ac 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -55,6 +55,14 @@
#define NO_RING_SWITCH 0
#define PREFER_RENDER 0
+#define USE_8_PIXEL_DISPATCH 1
+#define USE_16_PIXEL_DISPATCH 1
+#define USE_32_PIXEL_DISPATCH 0
+
+#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
+#error "Must select at least 8, 16 or 32 pixel dispatch"
+#endif
+
#define GEN6_MAX_SIZE 8192
struct gt_info {
@@ -612,29 +620,36 @@ gen6_emit_sf(struct sna *sna, bool has_mask)
static void
gen6_emit_wm(struct sna *sna, unsigned int kernel)
{
+ const uint32_t *kernels;
+
if (sna->render_state.gen6.kernel == kernel)
return;
sna->render_state.gen6.kernel = kernel;
+ kernels = sna->render_state.gen6.wm_kernel[kernel];
- DBG(("%s: switching to %s, num_surfaces=%d\n",
+ DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n",
__FUNCTION__,
- wm_kernels[kernel].name,
- wm_kernels[kernel].num_surfaces));
+ wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces,
+ kernels[0], kernels[1], kernels[2]));
OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
- OUT_BATCH(sna->render_state.gen6.wm_kernel[kernel]);
+ OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
- OUT_BATCH(0);
- OUT_BATCH(6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT); /* DW4 */
+ OUT_BATCH(0); /* scratch space */
+ OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT |
+ 8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT |
+ 6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT);
OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT |
- GEN6_3DSTATE_WM_DISPATCH_ENABLE |
- GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
+ (kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) |
+ (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) |
+ (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) |
+ GEN6_3DSTATE_WM_DISPATCH_ENABLE);
OUT_BATCH(wm_kernels[kernel].num_inputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT |
GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
- OUT_BATCH(0);
- OUT_BATCH(0);
+ OUT_BATCH(kernels[2]);
+ OUT_BATCH(kernels[1]);
}
static bool
@@ -4156,18 +4171,31 @@ static bool gen6_render_setup(struct sna *sna)
for (m = 0; m < GEN6_KERNEL_COUNT; m++) {
if (wm_kernels[m].size) {
- state->wm_kernel[m] =
+ state->wm_kernel[m][1] =
sna_static_stream_add(&general,
wm_kernels[m].data,
wm_kernels[m].size,
64);
} else {
- state->wm_kernel[m] =
- sna_static_stream_compile_wm(sna, &general,
- wm_kernels[m].data,
- 16);
+ if (USE_8_PIXEL_DISPATCH) {
+ state->wm_kernel[m][0] =
+ sna_static_stream_compile_wm(sna, &general,
+ wm_kernels[m].data, 8);
+ }
+
+ if (USE_16_PIXEL_DISPATCH) {
+ state->wm_kernel[m][1] =
+ sna_static_stream_compile_wm(sna, &general,
+ wm_kernels[m].data, 16);
+ }
+
+ if (USE_32_PIXEL_DISPATCH) {
+ state->wm_kernel[m][2] =
+ sna_static_stream_compile_wm(sna, &general,
+ wm_kernels[m].data, 32);
+ }
}
- assert(state->wm_kernel[m]);
+ assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
}
ss = sna_static_stream_map(&general,
diff --git a/src/sna/gen6_render.h b/src/sna/gen6_render.h
index 9cc8e14e..2201a627 100644
--- a/src/sna/gen6_render.h
+++ b/src/sna/gen6_render.h
@@ -97,10 +97,13 @@
# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT 27
# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
/* DW4 */
-# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16
+# define GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT 16
+# define GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT 8
+# define GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT 0
/* DW5 */
# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
+# define GEN6_3DSTATE_WM_32_DISPATCH_ENABLE (1 << 2)
# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
/* DW6 */
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 6381ccfb..8c64016d 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -4277,7 +4277,7 @@ static bool gen7_render_setup(struct sna *sna)
wm_kernels[m].data, 32);
}
}
- assert(state->wm_kernel[m][1]);
+ assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
}
ss = sna_static_stream_map(&general,
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 943c248b..142f2224 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -396,7 +396,7 @@ struct gen6_render_state {
uint32_t sf_state;
uint32_t sf_mask_state;
uint32_t wm_state;
- uint32_t wm_kernel[GEN6_KERNEL_COUNT];
+ uint32_t wm_kernel[GEN6_KERNEL_COUNT][3];
uint32_t cc_vp;
uint32_t cc_blend;