From 492093d04b1486dd34aafe2f109a77ddeb836f18 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 31 Jul 2012 18:39:17 +0100 Subject: sna: Generate shaders for SNB+ 8-pixel dispatch Not ideal yet, sampling an alpha-only surface using SIMD8 only seems to ever return 0... Signed-off-by: Chris Wilson --- src/sna/brw/brw.h | 20 ++--- src/sna/brw/brw_sf.c | 9 +- src/sna/brw/brw_wm.c | 229 ++++++++++++++++++++++++++++++++------------------- src/sna/sna_render.h | 4 +- src/sna/sna_stream.c | 16 +++- 5 files changed, 173 insertions(+), 105 deletions(-) diff --git a/src/sna/brw/brw.h b/src/sna/brw/brw.h index a39b253a..f0f3ac87 100644 --- a/src/sna/brw/brw.h +++ b/src/sna/brw/brw.h @@ -1,14 +1,14 @@ #include "brw_eu.h" -void brw_sf_kernel__nomask(struct brw_compile *p); -void brw_sf_kernel__mask(struct brw_compile *p); +bool brw_sf_kernel__nomask(struct brw_compile *p); +bool brw_sf_kernel__mask(struct brw_compile *p); -void brw_wm_kernel__affine(struct brw_compile *p, int dispatch_width); -void brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch_width); -void brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch_width); -void brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__affine(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch_width); -void brw_wm_kernel__projective(struct brw_compile *p, int dispatch_width); -void brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch_width); -void brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch_width); -void brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__projective(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch_width); diff --git a/src/sna/brw/brw_sf.c b/src/sna/brw/brw_sf.c index 0c69433d..6f821719 100644 --- a/src/sna/brw/brw_sf.c +++ b/src/sna/brw/brw_sf.c @@ -1,6 +1,6 @@ #include "brw.h" -void brw_sf_kernel__nomask(struct brw_compile *p) +bool brw_sf_kernel__nomask(struct brw_compile *p) { struct brw_reg inv, v0, v1, v2, delta; @@ -23,10 +23,11 @@ void brw_sf_kernel__nomask(struct brw_compile *p) brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0), false, true, 4, 0, true, true, 0, BRW_URB_SWIZZLE_TRANSPOSE); + + return true; } -void -brw_sf_kernel__mask(struct brw_compile *p) +bool brw_sf_kernel__mask(struct brw_compile *p) { struct brw_reg inv, v0, v1, v2; @@ -48,4 +49,6 @@ brw_sf_kernel__mask(struct brw_compile *p) brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0), false, true, 4, 0, true, true, 0, BRW_URB_SWIZZLE_TRANSPOSE); + + return true; } diff --git a/src/sna/brw/brw_wm.c b/src/sna/brw/brw_wm.c index 9a8af5f4..f96881af 100644 --- a/src/sna/brw/brw_wm.c +++ b/src/sna/brw/brw_wm.c @@ -34,7 +34,8 @@ static void brw_wm_xy(struct brw_compile *p, int dw) brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1))); } -static void brw_wm_affine_st(struct brw_compile *p, int dw, int channel, int msg) +static void brw_wm_affine_st(struct brw_compile *p, int dw, + int channel, int msg) { int uv; @@ -88,8 +89,8 @@ static inline struct brw_reg sample_result(int dw, int result) WRITEMASK_XYZW); } -static void brw_wm_sample(struct brw_compile *p, int dw, - int channel, int msg, int result) +static int brw_wm_sample(struct brw_compile *p, int dw, + int channel, int msg, int result) { struct brw_reg src0; bool header; @@ -107,15 +108,24 @@ static void brw_wm_sample(struct brw_compile *p, int dw, brw_SAMPLE(p, sample_result(dw, result), msg, src0, channel+1, channel, WRITEMASK_XYZW, 0, 2*len, len+header, header, simd(dw)); + return result; } -static void brw_wm_sample__alpha(struct brw_compile *p, int dw, - int channel, int msg, int result) +static int brw_wm_sample__alpha(struct brw_compile *p, int dw, + int channel, int msg, int result) { struct brw_reg src0; - int len; + int mlen, rlen; + + if (dw == 8) { + /* SIMD8 sample return is not masked */ + mlen = 3; + rlen = 4; + } else { + mlen = 5; + rlen = 2; + } - len = dw == 16 ? 4 : 2; if (p->gen >= 60) src0 = brw_message_reg(msg); else @@ -123,27 +133,31 @@ static void brw_wm_sample__alpha(struct brw_compile *p, int dw, brw_SAMPLE(p, sample_result(dw, result), msg, src0, channel+1, channel, WRITEMASK_W, 0, - len/2, len+1, true, simd(dw)); + rlen, mlen, true, simd(dw)); + + if (dw == 8) + result += 3; + + return result; } -static void brw_wm_affine(struct brw_compile *p, int dw, - int channel, int msg, int result) +static int brw_wm_affine(struct brw_compile *p, int dw, + int channel, int msg, int result) { brw_wm_affine_st(p, dw, channel, msg); - brw_wm_sample(p, dw, channel, msg, result); + return brw_wm_sample(p, dw, channel, msg, result); } -static void brw_wm_affine__alpha(struct brw_compile *p, int dw, - int channel, int msg, int result) +static int brw_wm_affine__alpha(struct brw_compile *p, int dw, + int channel, int msg, int result) { brw_wm_affine_st(p, dw, channel, msg); - brw_wm_sample__alpha(p, dw, channel, msg, result); + return brw_wm_sample__alpha(p, dw, channel, msg, result); } static inline struct brw_reg null_result(int dw) { - return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_NULL, 0, + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0, BRW_REGISTER_TYPE_UW, dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, @@ -185,8 +199,8 @@ static void brw_fb_write(struct brw_compile *p, int dw) insn->header.compression_control = BRW_COMPRESSION_NONE; if (p->gen >= 60) { - src0 = brw_message_reg(2); msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; + src0 = brw_message_reg(2); header = false; } else { insn->header.destreg__conditionalmod = 0; @@ -206,14 +220,19 @@ static void brw_wm_write(struct brw_compile *p, int dw, int src) { int n; - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); - if (dw == 8 && p->gen >= 60) { - brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src, 0)); + /* XXX pixel execution mask? */ + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0)); + brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0)); brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0)); + brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0)); goto done; } + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + for (n = 0; n < 4; n++) { if (p->gen >= 60) { brw_MOV(p, @@ -242,38 +261,36 @@ done: brw_fb_write(p, dw); } -static inline struct brw_reg mask_a8(int nr) -{ - return brw_reg(BRW_GENERAL_REGISTER_FILE, - nr, 0, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_0, - BRW_WIDTH_8, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XYZW, - WRITEMASK_XYZW); -} - -static void brw_wm_write__mask(struct brw_compile *p, - int dw, +static void brw_wm_write__mask(struct brw_compile *p, int dw, int src, int mask) { int n; - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); - if (dw == 8 && p->gen >= 60) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MUL(p, brw_message_reg(2), - brw_vec8_grf(src, 0), - mask_a8(mask)); + brw_vec8_grf(src+0, 0), + brw_vec8_grf(mask, 0)); + brw_MUL(p, + brw_message_reg(3), + brw_vec8_grf(src+1, 0), + brw_vec8_grf(mask, 0)); brw_MUL(p, brw_message_reg(4), brw_vec8_grf(src+2, 0), - mask_a8(mask)); + brw_vec8_grf(mask, 0)); + brw_MUL(p, + brw_message_reg(5), + brw_vec8_grf(src+3, 0), + brw_vec8_grf(mask, 0)); + goto done; } + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + for (n = 0; n < 4; n++) { if (p->gen >= 60) { brw_MUL(p, @@ -306,25 +323,36 @@ done: brw_fb_write(p, dw); } -static void brw_wm_write__mask_ca(struct brw_compile *p, - int dw, int src, int mask) +static void brw_wm_write__mask_ca(struct brw_compile *p, int dw, + int src, int mask) { int n; - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); - if (dw == 8 && p->gen >= 60) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MUL(p, brw_message_reg(2), - brw_vec8_grf(src, 0), - brw_vec8_grf(mask, 0)); + brw_vec8_grf(src + 0, 0), + brw_vec8_grf(mask + 0, 0)); + brw_MUL(p, + brw_message_reg(3), + brw_vec8_grf(src + 1, 0), + brw_vec8_grf(mask + 1, 0)); brw_MUL(p, brw_message_reg(4), - brw_vec8_grf(src + 2, 0), + brw_vec8_grf(src + 2, 0), brw_vec8_grf(mask + 2, 0)); + brw_MUL(p, + brw_message_reg(5), + brw_vec8_grf(src + 3, 0), + brw_vec8_grf(mask + 3, 0)); + goto done; } + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + for (n = 0; n < 4; n++) { if (p->gen >= 60) { brw_MUL(p, @@ -357,56 +385,71 @@ done: brw_fb_write(p, dw); } -void +bool brw_wm_kernel__affine(struct brw_compile *p, int dispatch) { - int src = 12; - if (p->gen < 60) brw_wm_xy(p, dispatch); - brw_wm_affine(p, dispatch, 0, 1, src); - brw_wm_write(p, dispatch, src); + brw_wm_write(p, dispatch, brw_wm_affine(p, dispatch, 0, 1, 12)); + + return true; } -void +bool brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch) { - int src = 12, mask = 20; + int src, mask; + + if (dispatch == 8) + return false; /* XXX sampler alpha retuns all 0 */ if (p->gen < 60) brw_wm_xy(p, dispatch); - brw_wm_affine(p, dispatch, 0, 1, src); - brw_wm_affine__alpha(p, dispatch, 1, 7, mask); + + src = brw_wm_affine(p, dispatch, 0, 1, 12); + mask = brw_wm_affine__alpha(p, dispatch, 1, 6, 20); brw_wm_write__mask(p, dispatch, src, mask); + + return true; } -void +bool brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch) { - int src = 12, mask = 20; + int src, mask; if (p->gen < 60) brw_wm_xy(p, dispatch); - brw_wm_affine(p, dispatch, 0, 1, src); - brw_wm_affine(p, dispatch, 1, 7, mask); + + src = brw_wm_affine(p, dispatch, 0, 1, 12); + mask = brw_wm_affine(p, dispatch, 1, 6, 20); brw_wm_write__mask_ca(p, dispatch, src, mask); + + return true; } -void +bool brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch) { - int src = 12, mask = 14; + int src, mask; + + if (dispatch == 8) + return false; /* XXX sampler alpha retuns all 0 */ if (p->gen < 60) brw_wm_xy(p, dispatch); - brw_wm_affine__alpha(p, dispatch, 0, 1, src); - brw_wm_affine(p, dispatch, 1, 7, mask); + + src = brw_wm_affine__alpha(p, dispatch, 0, 1, 12); + mask = brw_wm_affine(p, dispatch, 1, 6, 16); brw_wm_write__mask(p, dispatch, mask, src); + + return true; } /* Projective variants */ -static void brw_wm_projective_st(struct brw_compile *p, int dw, int channel, int msg) +static void brw_wm_projective_st(struct brw_compile *p, int dw, + int channel, int msg) { int uv; @@ -480,63 +523,77 @@ static void brw_wm_projective_st(struct brw_compile *p, int dw, int channel, int } } -static void brw_wm_projective(struct brw_compile *p, int dw, - int channel, int msg, int result) +static int brw_wm_projective(struct brw_compile *p, int dw, + int channel, int msg, int result) { brw_wm_projective_st(p, dw, channel, msg); - brw_wm_sample(p, dw, channel, msg, result); + return brw_wm_sample(p, dw, channel, msg, result); } -static void brw_wm_projective__alpha(struct brw_compile *p, int dw, +static int brw_wm_projective__alpha(struct brw_compile *p, int dw, int channel, int msg, int result) { brw_wm_projective_st(p, dw, channel, msg); - brw_wm_sample__alpha(p, dw, channel, msg, result); + return brw_wm_sample__alpha(p, dw, channel, msg, result); } -void +bool brw_wm_kernel__projective(struct brw_compile *p, int dispatch) { - int src = 12; - if (p->gen < 60) brw_wm_xy(p, dispatch); - brw_wm_projective(p, dispatch, 0, 1, src); - brw_wm_write(p, dispatch, src); + brw_wm_write(p, dispatch, brw_wm_projective(p, dispatch, 0, 1, 12)); + + return true; } -void +bool brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch) { - int src = 12, mask = 20; + int src, mask; + + if (dispatch == 8) + return false; /* XXX sampler alpha retuns all 0 */ if (p->gen < 60) brw_wm_xy(p, dispatch); - brw_wm_projective(p, dispatch, 0, 1, src); - brw_wm_projective__alpha(p, dispatch, 1, 7, mask); + + src = brw_wm_projective(p, dispatch, 0, 1, 12); + mask = brw_wm_projective__alpha(p, dispatch, 1, 6, 20); brw_wm_write__mask(p, dispatch, src, mask); + + return true; } -void +bool brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch) { - int src = 12, mask = 20; + int src, mask; if (p->gen < 60) brw_wm_xy(p, dispatch); - brw_wm_projective(p, dispatch, 0, 1, src); - brw_wm_projective(p, dispatch, 1,7, mask); + + src = brw_wm_projective(p, dispatch, 0, 1, 12); + mask = brw_wm_projective(p, dispatch, 1, 6, 20); brw_wm_write__mask_ca(p, dispatch, src, mask); + + return true; } -void +bool brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch) { - int src = 12, mask = 14; + int src, mask; + + if (dispatch == 8) + return false; /* XXX sampler alpha retuns all 0 */ if (p->gen < 60) brw_wm_xy(p, dispatch); - brw_wm_projective__alpha(p, dispatch, 0, 1, src); - brw_wm_projective(p, dispatch, 1, 7, mask); + + src = brw_wm_projective__alpha(p, dispatch, 0, 1, 12); + mask = brw_wm_projective(p, dispatch, 1, 6, 16); brw_wm_write__mask(p, dispatch, mask, src); + + return true; } diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h index 011b1b78..943c248b 100644 --- a/src/sna/sna_render.h +++ b/src/sna/sna_render.h @@ -482,11 +482,11 @@ uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream, void *ptr); unsigned sna_static_stream_compile_sf(struct sna *sna, struct sna_static_stream *stream, - void (*compile)(struct brw_compile *)); + bool (*compile)(struct brw_compile *)); unsigned sna_static_stream_compile_wm(struct sna *sna, struct sna_static_stream *stream, - void (*compile)(struct brw_compile *, int), + bool (*compile)(struct brw_compile *, int), int width); struct kgem_bo *sna_static_stream_fini(struct sna *sna, struct sna_static_stream *stream); diff --git a/src/sna/sna_stream.c b/src/sna/sna_stream.c index 66a8c461..1a0a86b9 100644 --- a/src/sna/sna_stream.c +++ b/src/sna/sna_stream.c @@ -97,7 +97,7 @@ struct kgem_bo *sna_static_stream_fini(struct sna *sna, unsigned sna_static_stream_compile_sf(struct sna *sna, struct sna_static_stream *stream, - void (*compile)(struct brw_compile *)) + bool (*compile)(struct brw_compile *)) { struct brw_compile p; @@ -105,7 +105,11 @@ sna_static_stream_compile_sf(struct sna *sna, sna_static_stream_map(stream, 64*sizeof(uint32_t), 64)); - compile(&p); + if (!compile(&p)) { + stream->used -= 64*sizeof(uint32_t); + return 0; + } + assert(p.nr_insn*sizeof(struct brw_instruction) <= 64*sizeof(uint32_t)); stream->used -= 64*sizeof(uint32_t) - p.nr_insn*sizeof(struct brw_instruction); @@ -115,7 +119,7 @@ sna_static_stream_compile_sf(struct sna *sna, unsigned sna_static_stream_compile_wm(struct sna *sna, struct sna_static_stream *stream, - void (*compile)(struct brw_compile *, int), + bool (*compile)(struct brw_compile *, int), int dispatch_width) { struct brw_compile p; @@ -124,7 +128,11 @@ sna_static_stream_compile_wm(struct sna *sna, sna_static_stream_map(stream, 256*sizeof(uint32_t), 64)); - compile(&p, dispatch_width); + if (!compile(&p, dispatch_width)) { + stream->used -= 256*sizeof(uint32_t); + return 0; + } + assert(p.nr_insn*sizeof(struct brw_instruction) <= 256*sizeof(uint32_t)); stream->used -= 256*sizeof(uint32_t) - p.nr_insn*sizeof(struct brw_instruction); -- cgit v1.2.3