summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-07-29 09:51:16 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2012-07-30 13:12:04 +0100
commit33d6afda6cec124494f49b74152768da8a3fbdb5 (patch)
tree00bbe35eb9f7d908c45597a1ef84705724e8809c
parenteba8d3b3e14a5a16cea6cb8a89f12d3feb8f3d99 (diff)
sna/gen7: Compile basic kernels at runtime
-rw-r--r--src/sna/gen7_render.c151
-rw-r--r--src/sna/sna_render.h10
2 files changed, 68 insertions, 93 deletions
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index ded22d5f..aba58112 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -40,6 +40,7 @@
#include "sna_render_inline.h"
#include "sna_video.h"
+#include "brw/brw.h"
#include "gen7_render.h"
#define NO_COMPOSITE 0
@@ -53,6 +54,14 @@
#define NO_RING_SWITCH 0
+#define USE_8_PIXEL_DISPATCH 0
+#define USE_16_PIXEL_DISPATCH 1
+#define USE_32_PIXEL_DISPATCH 0
+
+#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
+#error "Must select at least 8, 16 or 32 pixel dispatch"
+#endif
+
#define GEN7_MAX_SIZE 16384
/* XXX Todo
@@ -88,72 +97,6 @@ static const struct gt_info gt2_info = {
.urb = { 256, 704, 320 },
};
-static const uint32_t ps_kernel_nomask_affine[][4] = {
-#include "exa_wm_src_affine.g7b"
-#include "exa_wm_src_sample_argb.g7b"
-#include "exa_wm_write.g7b"
-};
-
-static const uint32_t ps_kernel_nomask_projective[][4] = {
-#include "exa_wm_src_projective.g7b"
-#include "exa_wm_src_sample_argb.g7b"
-#include "exa_wm_write.g7b"
-};
-
-static const uint32_t ps_kernel_maskca_affine[][4] = {
-#include "exa_wm_src_affine.g7b"
-#include "exa_wm_src_sample_argb.g7b"
-#include "exa_wm_mask_affine.g7b"
-#include "exa_wm_mask_sample_argb.g7b"
-#include "exa_wm_ca.g6b" //#include "exa_wm_ca.g7b"
-#include "exa_wm_write.g7b"
-};
-
-static const uint32_t ps_kernel_maskca_projective[][4] = {
-#include "exa_wm_src_projective.g7b"
-#include "exa_wm_src_sample_argb.g7b"
-#include "exa_wm_mask_projective.g7b"
-#include "exa_wm_mask_sample_argb.g7b"
-#include "exa_wm_ca.g6b" //#include "exa_wm_ca.g7b"
-#include "exa_wm_write.g7b"
-};
-
-static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = {
-#include "exa_wm_src_affine.g7b"
-#include "exa_wm_src_sample_a.g7b"
-#include "exa_wm_mask_affine.g7b"
-#include "exa_wm_mask_sample_argb.g7b"
-#include "exa_wm_ca_srcalpha.g6b" //#include "exa_wm_ca_srcalpha.g7b"
-#include "exa_wm_write.g7b"
-};
-
-static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = {
-#include "exa_wm_src_projective.g7b"
-#include "exa_wm_src_sample_a.g7b"
-#include "exa_wm_mask_projective.g7b"
-#include "exa_wm_mask_sample_argb.g7b"
-#include "exa_wm_ca_srcalpha.g6b" //#include "exa_wm_ca_srcalpha.g7b"
-#include "exa_wm_write.g7b"
-};
-
-static const uint32_t ps_kernel_masknoca_affine[][4] = {
-#include "exa_wm_src_affine.g7b"
-#include "exa_wm_src_sample_argb.g7b"
-#include "exa_wm_mask_affine.g7b"
-#include "exa_wm_mask_sample_a.g7b"
-#include "exa_wm_noca.g6b"// #include "exa_wm_noca.g7b"
-#include "exa_wm_write.g7b"
-};
-
-static const uint32_t ps_kernel_masknoca_projective[][4] = {
-#include "exa_wm_src_projective.g7b"
-#include "exa_wm_src_sample_argb.g7b"
-#include "exa_wm_mask_projective.g7b"
-#include "exa_wm_mask_sample_a.g7b"
-#include "exa_wm_noca.g6b" //#include "exa_wm_noca.g7b"
-#include "exa_wm_write.g7b"
-};
-
static const uint32_t ps_kernel_packed[][4] = {
#include "exa_wm_src_affine.g7b"
#include "exa_wm_src_sample_argb.g7b"
@@ -170,23 +113,25 @@ static const uint32_t ps_kernel_planar[][4] = {
#define KERNEL(kernel_enum, kernel, num_surfaces) \
[GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces}
+#define NOKERNEL(kernel_enum, func, num_surfaces) \
+ [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces}
static const struct wm_kernel_info {
const char *name;
const void *data;
unsigned int size;
int num_surfaces;
} wm_kernels[] = {
- KERNEL(NOMASK, ps_kernel_nomask_affine, 2),
- KERNEL(NOMASK_PROJECTIVE, ps_kernel_nomask_projective, 2),
+ NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
+ NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
- KERNEL(MASK, ps_kernel_masknoca_affine, 3),
- KERNEL(MASK_PROJECTIVE, ps_kernel_masknoca_projective, 3),
+ NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
+ NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
- KERNEL(MASKCA, ps_kernel_maskca_affine, 3),
- KERNEL(MASKCA_PROJECTIVE, ps_kernel_maskca_projective, 3),
+ NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
+ NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
- KERNEL(MASKSA, ps_kernel_maskca_srcalpha_affine, 3),
- KERNEL(MASKSA_PROJECTIVE, ps_kernel_maskca_srcalpha_projective, 3),
+ NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
+ NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
@@ -818,27 +763,35 @@ gen7_emit_sf(struct sna *sna, bool has_mask)
static void
gen7_emit_wm(struct sna *sna, int kernel)
{
+ const uint32_t *kernels;
+
if (sna->render_state.gen7.kernel == kernel)
return;
sna->render_state.gen7.kernel = kernel;
+ kernels = sna->render_state.gen7.wm_kernel[kernel];
- DBG(("%s: switching to %s, num_surfaces=%d\n",
+ DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n",
__FUNCTION__,
wm_kernels[kernel].name,
- wm_kernels[kernel].num_surfaces));
+ wm_kernels[kernel].num_surfaces,
+ kernels[0], kernels[1], kernels[2]));
OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2));
- OUT_BATCH(sna->render_state.gen7.wm_kernel[kernel]);
+ OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT |
wm_kernels[kernel].num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
OUT_BATCH(0); /* scratch address */
OUT_BATCH(sna->render_state.gen7.info->max_wm_threads |
- GEN7_PS_ATTRIBUTE_ENABLE |
- GEN7_PS_16_DISPATCH_ENABLE);
- OUT_BATCH(6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
- OUT_BATCH(0); /* kernel 1 */
- OUT_BATCH(0); /* kernel 2 */
+ (kernels[0] ? GEN7_PS_8_DISPATCH_ENABLE : 0) |
+ (kernels[1] ? GEN7_PS_16_DISPATCH_ENABLE : 0) |
+ (kernels[2] ? GEN7_PS_32_DISPATCH_ENABLE : 0) |
+ GEN7_PS_ATTRIBUTE_ENABLE);
+ OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
+ 8 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
+ 6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
+ OUT_BATCH(kernels[2]);
+ OUT_BATCH(kernels[1]);
}
static bool
@@ -4285,12 +4238,34 @@ static bool gen7_render_setup(struct sna *sna)
*/
null_create(&general);
- for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++)
- state->wm_kernel[m] =
- sna_static_stream_add(&general,
- wm_kernels[m].data,
- wm_kernels[m].size,
- 64);
+ for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++) {
+ if (wm_kernels[m].size) {
+ state->wm_kernel[m][1] =
+ sna_static_stream_add(&general,
+ wm_kernels[m].data,
+ wm_kernels[m].size,
+ 64);
+ } else {
+ if (USE_8_PIXEL_DISPATCH) {
+ state->wm_kernel[m][0] =
+ sna_static_stream_compile_wm(sna, &general,
+ wm_kernels[m].data, 8);
+ }
+
+ if (USE_16_PIXEL_DISPATCH) {
+ state->wm_kernel[m][1] =
+ sna_static_stream_compile_wm(sna, &general,
+ wm_kernels[m].data, 16);
+ }
+
+ if (USE_32_PIXEL_DISPATCH) {
+ state->wm_kernel[m][2] =
+ sna_static_stream_compile_wm(sna, &general,
+ wm_kernels[m].data, 32);
+ }
+ }
+ assert(state->wm_kernel[m][1]);
+ }
ss = sna_static_stream_map(&general,
2 * sizeof(*ss) *
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index f4fabad8..011b1b78 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -421,16 +421,16 @@ struct gen6_render_state {
enum {
GEN7_WM_KERNEL_NOMASK = 0,
- GEN7_WM_KERNEL_NOMASK_PROJECTIVE,
+ GEN7_WM_KERNEL_NOMASK_P,
GEN7_WM_KERNEL_MASK,
- GEN7_WM_KERNEL_MASK_PROJECTIVE,
+ GEN7_WM_KERNEL_MASK_P,
GEN7_WM_KERNEL_MASKCA,
- GEN7_WM_KERNEL_MASKCA_PROJECTIVE,
+ GEN7_WM_KERNEL_MASKCA_P,
GEN7_WM_KERNEL_MASKSA,
- GEN7_WM_KERNEL_MASKSA_PROJECTIVE,
+ GEN7_WM_KERNEL_MASKSA_P,
GEN7_WM_KERNEL_VIDEO_PLANAR,
GEN7_WM_KERNEL_VIDEO_PACKED,
@@ -445,7 +445,7 @@ struct gen7_render_state {
uint32_t sf_state;
uint32_t sf_mask_state;
uint32_t wm_state;
- uint32_t wm_kernel[GEN7_WM_KERNEL_COUNT];
+ uint32_t wm_kernel[GEN7_WM_KERNEL_COUNT][3];
uint32_t cc_vp;
uint32_t cc_blend;