diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2012-07-29 09:51:16 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2012-07-30 13:12:04 +0100 |
commit | 33d6afda6cec124494f49b74152768da8a3fbdb5 (patch) | |
tree | 00bbe35eb9f7d908c45597a1ef84705724e8809c | |
parent | eba8d3b3e14a5a16cea6cb8a89f12d3feb8f3d99 (diff) |
sna/gen7: Compile basic kernels at runtime
-rw-r--r-- | src/sna/gen7_render.c | 151 | ||||
-rw-r--r-- | src/sna/sna_render.h | 10 |
2 files changed, 68 insertions, 93 deletions
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index ded22d5f..aba58112 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -40,6 +40,7 @@ #include "sna_render_inline.h" #include "sna_video.h" +#include "brw/brw.h" #include "gen7_render.h" #define NO_COMPOSITE 0 @@ -53,6 +54,14 @@ #define NO_RING_SWITCH 0 +#define USE_8_PIXEL_DISPATCH 0 +#define USE_16_PIXEL_DISPATCH 1 +#define USE_32_PIXEL_DISPATCH 0 + +#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH +#error "Must select at least 8, 16 or 32 pixel dispatch" +#endif + #define GEN7_MAX_SIZE 16384 /* XXX Todo @@ -88,72 +97,6 @@ static const struct gt_info gt2_info = { .urb = { 256, 704, 320 }, }; -static const uint32_t ps_kernel_nomask_affine[][4] = { -#include "exa_wm_src_affine.g7b" -#include "exa_wm_src_sample_argb.g7b" -#include "exa_wm_write.g7b" -}; - -static const uint32_t ps_kernel_nomask_projective[][4] = { -#include "exa_wm_src_projective.g7b" -#include "exa_wm_src_sample_argb.g7b" -#include "exa_wm_write.g7b" -}; - -static const uint32_t ps_kernel_maskca_affine[][4] = { -#include "exa_wm_src_affine.g7b" -#include "exa_wm_src_sample_argb.g7b" -#include "exa_wm_mask_affine.g7b" -#include "exa_wm_mask_sample_argb.g7b" -#include "exa_wm_ca.g6b" //#include "exa_wm_ca.g7b" -#include "exa_wm_write.g7b" -}; - -static const uint32_t ps_kernel_maskca_projective[][4] = { -#include "exa_wm_src_projective.g7b" -#include "exa_wm_src_sample_argb.g7b" -#include "exa_wm_mask_projective.g7b" -#include "exa_wm_mask_sample_argb.g7b" -#include "exa_wm_ca.g6b" //#include "exa_wm_ca.g7b" -#include "exa_wm_write.g7b" -}; - -static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = { -#include "exa_wm_src_affine.g7b" -#include "exa_wm_src_sample_a.g7b" -#include "exa_wm_mask_affine.g7b" -#include "exa_wm_mask_sample_argb.g7b" -#include "exa_wm_ca_srcalpha.g6b" //#include "exa_wm_ca_srcalpha.g7b" -#include "exa_wm_write.g7b" -}; - -static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = { -#include "exa_wm_src_projective.g7b" -#include "exa_wm_src_sample_a.g7b" -#include "exa_wm_mask_projective.g7b" -#include "exa_wm_mask_sample_argb.g7b" -#include "exa_wm_ca_srcalpha.g6b" //#include "exa_wm_ca_srcalpha.g7b" -#include "exa_wm_write.g7b" -}; - -static const uint32_t ps_kernel_masknoca_affine[][4] = { -#include "exa_wm_src_affine.g7b" -#include "exa_wm_src_sample_argb.g7b" -#include "exa_wm_mask_affine.g7b" -#include "exa_wm_mask_sample_a.g7b" -#include "exa_wm_noca.g6b"// #include "exa_wm_noca.g7b" -#include "exa_wm_write.g7b" -}; - -static const uint32_t ps_kernel_masknoca_projective[][4] = { -#include "exa_wm_src_projective.g7b" -#include "exa_wm_src_sample_argb.g7b" -#include "exa_wm_mask_projective.g7b" -#include "exa_wm_mask_sample_a.g7b" -#include "exa_wm_noca.g6b" //#include "exa_wm_noca.g7b" -#include "exa_wm_write.g7b" -}; - static const uint32_t ps_kernel_packed[][4] = { #include "exa_wm_src_affine.g7b" #include "exa_wm_src_sample_argb.g7b" @@ -170,23 +113,25 @@ static const uint32_t ps_kernel_planar[][4] = { #define KERNEL(kernel_enum, kernel, num_surfaces) \ [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces} +#define NOKERNEL(kernel_enum, func, num_surfaces) \ + [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces} static const struct wm_kernel_info { const char *name; const void *data; unsigned int size; int num_surfaces; } wm_kernels[] = { - KERNEL(NOMASK, ps_kernel_nomask_affine, 2), - KERNEL(NOMASK_PROJECTIVE, ps_kernel_nomask_projective, 2), + NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), + NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), - KERNEL(MASK, ps_kernel_masknoca_affine, 3), - KERNEL(MASK_PROJECTIVE, ps_kernel_masknoca_projective, 3), + NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3), + NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3), - KERNEL(MASKCA, ps_kernel_maskca_affine, 3), - KERNEL(MASKCA_PROJECTIVE, ps_kernel_maskca_projective, 3), + NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3), + NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3), - KERNEL(MASKSA, ps_kernel_maskca_srcalpha_affine, 3), - KERNEL(MASKSA_PROJECTIVE, ps_kernel_maskca_srcalpha_projective, 3), + NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), + NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), @@ -818,27 +763,35 @@ gen7_emit_sf(struct sna *sna, bool has_mask) static void gen7_emit_wm(struct sna *sna, int kernel) { + const uint32_t *kernels; + if (sna->render_state.gen7.kernel == kernel) return; sna->render_state.gen7.kernel = kernel; + kernels = sna->render_state.gen7.wm_kernel[kernel]; - DBG(("%s: switching to %s, num_surfaces=%d\n", + DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n", __FUNCTION__, wm_kernels[kernel].name, - wm_kernels[kernel].num_surfaces)); + wm_kernels[kernel].num_surfaces, + kernels[0], kernels[1], kernels[2])); OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2)); - OUT_BATCH(sna->render_state.gen7.wm_kernel[kernel]); + OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]); OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT | wm_kernels[kernel].num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); OUT_BATCH(0); /* scratch address */ OUT_BATCH(sna->render_state.gen7.info->max_wm_threads | - GEN7_PS_ATTRIBUTE_ENABLE | - GEN7_PS_16_DISPATCH_ENABLE); - OUT_BATCH(6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0); - OUT_BATCH(0); /* kernel 1 */ - OUT_BATCH(0); /* kernel 2 */ + (kernels[0] ? GEN7_PS_8_DISPATCH_ENABLE : 0) | + (kernels[1] ? GEN7_PS_16_DISPATCH_ENABLE : 0) | + (kernels[2] ? GEN7_PS_32_DISPATCH_ENABLE : 0) | + GEN7_PS_ATTRIBUTE_ENABLE); + OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 | + 8 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 | + 6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2); + OUT_BATCH(kernels[2]); + OUT_BATCH(kernels[1]); } static bool @@ -4285,12 +4238,34 @@ static bool gen7_render_setup(struct sna *sna) */ null_create(&general); - for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++) - state->wm_kernel[m] = - sna_static_stream_add(&general, - wm_kernels[m].data, - wm_kernels[m].size, - 64); + for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++) { + if (wm_kernels[m].size) { + state->wm_kernel[m][1] = + sna_static_stream_add(&general, + wm_kernels[m].data, + wm_kernels[m].size, + 64); + } else { + if (USE_8_PIXEL_DISPATCH) { + state->wm_kernel[m][0] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 8); + } + + if (USE_16_PIXEL_DISPATCH) { + state->wm_kernel[m][1] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 16); + } + + if (USE_32_PIXEL_DISPATCH) { + state->wm_kernel[m][2] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 32); + } + } + assert(state->wm_kernel[m][1]); + } ss = sna_static_stream_map(&general, 2 * sizeof(*ss) * diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h index f4fabad8..011b1b78 100644 --- a/src/sna/sna_render.h +++ b/src/sna/sna_render.h @@ -421,16 +421,16 @@ struct gen6_render_state { enum { GEN7_WM_KERNEL_NOMASK = 0, - GEN7_WM_KERNEL_NOMASK_PROJECTIVE, + GEN7_WM_KERNEL_NOMASK_P, GEN7_WM_KERNEL_MASK, - GEN7_WM_KERNEL_MASK_PROJECTIVE, + GEN7_WM_KERNEL_MASK_P, GEN7_WM_KERNEL_MASKCA, - GEN7_WM_KERNEL_MASKCA_PROJECTIVE, + GEN7_WM_KERNEL_MASKCA_P, GEN7_WM_KERNEL_MASKSA, - GEN7_WM_KERNEL_MASKSA_PROJECTIVE, + GEN7_WM_KERNEL_MASKSA_P, GEN7_WM_KERNEL_VIDEO_PLANAR, GEN7_WM_KERNEL_VIDEO_PACKED, @@ -445,7 +445,7 @@ struct gen7_render_state { uint32_t sf_state; uint32_t sf_mask_state; uint32_t wm_state; - uint32_t wm_kernel[GEN7_WM_KERNEL_COUNT]; + uint32_t wm_kernel[GEN7_WM_KERNEL_COUNT][3]; uint32_t cc_vp; uint32_t cc_blend; |