summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-07-31 18:39:17 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2012-07-31 19:11:08 +0100
commit492093d04b1486dd34aafe2f109a77ddeb836f18 (patch)
treec6e9f1958e920a886089649d11175026188fa73f
parent6a5ed88f9fab654c9c11c566b841d42150d26c5d (diff)
sna: Generate shaders for SNB+ 8-pixel dispatch
Not ideal yet, sampling an alpha-only surface using SIMD8 only seems to ever return 0... Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/sna/brw/brw.h20
-rw-r--r--src/sna/brw/brw_sf.c9
-rw-r--r--src/sna/brw/brw_wm.c229
-rw-r--r--src/sna/sna_render.h4
-rw-r--r--src/sna/sna_stream.c16
5 files changed, 173 insertions, 105 deletions
diff --git a/src/sna/brw/brw.h b/src/sna/brw/brw.h
index a39b253a..f0f3ac87 100644
--- a/src/sna/brw/brw.h
+++ b/src/sna/brw/brw.h
@@ -1,14 +1,14 @@
#include "brw_eu.h"
-void brw_sf_kernel__nomask(struct brw_compile *p);
-void brw_sf_kernel__mask(struct brw_compile *p);
+bool brw_sf_kernel__nomask(struct brw_compile *p);
+bool brw_sf_kernel__mask(struct brw_compile *p);
-void brw_wm_kernel__affine(struct brw_compile *p, int dispatch_width);
-void brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch_width);
-void brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch_width);
-void brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__affine(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch_width);
-void brw_wm_kernel__projective(struct brw_compile *p, int dispatch_width);
-void brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch_width);
-void brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch_width);
-void brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__projective(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch_width);
diff --git a/src/sna/brw/brw_sf.c b/src/sna/brw/brw_sf.c
index 0c69433d..6f821719 100644
--- a/src/sna/brw/brw_sf.c
+++ b/src/sna/brw/brw_sf.c
@@ -1,6 +1,6 @@
#include "brw.h"
-void brw_sf_kernel__nomask(struct brw_compile *p)
+bool brw_sf_kernel__nomask(struct brw_compile *p)
{
struct brw_reg inv, v0, v1, v2, delta;
@@ -23,10 +23,11 @@ void brw_sf_kernel__nomask(struct brw_compile *p)
brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0),
false, true, 4, 0, true, true, 0,
BRW_URB_SWIZZLE_TRANSPOSE);
+
+ return true;
}
-void
-brw_sf_kernel__mask(struct brw_compile *p)
+bool brw_sf_kernel__mask(struct brw_compile *p)
{
struct brw_reg inv, v0, v1, v2;
@@ -48,4 +49,6 @@ brw_sf_kernel__mask(struct brw_compile *p)
brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0),
false, true, 4, 0, true, true, 0,
BRW_URB_SWIZZLE_TRANSPOSE);
+
+ return true;
}
diff --git a/src/sna/brw/brw_wm.c b/src/sna/brw/brw_wm.c
index 9a8af5f4..f96881af 100644
--- a/src/sna/brw/brw_wm.c
+++ b/src/sna/brw/brw_wm.c
@@ -34,7 +34,8 @@ static void brw_wm_xy(struct brw_compile *p, int dw)
brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1)));
}
-static void brw_wm_affine_st(struct brw_compile *p, int dw, int channel, int msg)
+static void brw_wm_affine_st(struct brw_compile *p, int dw,
+ int channel, int msg)
{
int uv;
@@ -88,8 +89,8 @@ static inline struct brw_reg sample_result(int dw, int result)
WRITEMASK_XYZW);
}
-static void brw_wm_sample(struct brw_compile *p, int dw,
- int channel, int msg, int result)
+static int brw_wm_sample(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
{
struct brw_reg src0;
bool header;
@@ -107,15 +108,24 @@ static void brw_wm_sample(struct brw_compile *p, int dw,
brw_SAMPLE(p, sample_result(dw, result), msg, src0,
channel+1, channel, WRITEMASK_XYZW, 0,
2*len, len+header, header, simd(dw));
+ return result;
}
-static void brw_wm_sample__alpha(struct brw_compile *p, int dw,
- int channel, int msg, int result)
+static int brw_wm_sample__alpha(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
{
struct brw_reg src0;
- int len;
+ int mlen, rlen;
+
+ if (dw == 8) {
+ /* SIMD8 sample return is not masked */
+ mlen = 3;
+ rlen = 4;
+ } else {
+ mlen = 5;
+ rlen = 2;
+ }
- len = dw == 16 ? 4 : 2;
if (p->gen >= 60)
src0 = brw_message_reg(msg);
else
@@ -123,27 +133,31 @@ static void brw_wm_sample__alpha(struct brw_compile *p, int dw,
brw_SAMPLE(p, sample_result(dw, result), msg, src0,
channel+1, channel, WRITEMASK_W, 0,
- len/2, len+1, true, simd(dw));
+ rlen, mlen, true, simd(dw));
+
+ if (dw == 8)
+ result += 3;
+
+ return result;
}
-static void brw_wm_affine(struct brw_compile *p, int dw,
- int channel, int msg, int result)
+static int brw_wm_affine(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
{
brw_wm_affine_st(p, dw, channel, msg);
- brw_wm_sample(p, dw, channel, msg, result);
+ return brw_wm_sample(p, dw, channel, msg, result);
}
-static void brw_wm_affine__alpha(struct brw_compile *p, int dw,
- int channel, int msg, int result)
+static int brw_wm_affine__alpha(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
{
brw_wm_affine_st(p, dw, channel, msg);
- brw_wm_sample__alpha(p, dw, channel, msg, result);
+ return brw_wm_sample__alpha(p, dw, channel, msg, result);
}
static inline struct brw_reg null_result(int dw)
{
- return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
- BRW_ARF_NULL, 0,
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0,
BRW_REGISTER_TYPE_UW,
dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
@@ -185,8 +199,8 @@ static void brw_fb_write(struct brw_compile *p, int dw)
insn->header.compression_control = BRW_COMPRESSION_NONE;
if (p->gen >= 60) {
- src0 = brw_message_reg(2);
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
+ src0 = brw_message_reg(2);
header = false;
} else {
insn->header.destreg__conditionalmod = 0;
@@ -206,14 +220,19 @@ static void brw_wm_write(struct brw_compile *p, int dw, int src)
{
int n;
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
-
if (dw == 8 && p->gen >= 60) {
- brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src, 0));
+ /* XXX pixel execution mask? */
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0));
+ brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0));
brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0));
+ brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0));
goto done;
}
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
for (n = 0; n < 4; n++) {
if (p->gen >= 60) {
brw_MOV(p,
@@ -242,38 +261,36 @@ done:
brw_fb_write(p, dw);
}
-static inline struct brw_reg mask_a8(int nr)
-{
- return brw_reg(BRW_GENERAL_REGISTER_FILE,
- nr, 0,
- BRW_REGISTER_TYPE_F,
- BRW_VERTICAL_STRIDE_0,
- BRW_WIDTH_8,
- BRW_HORIZONTAL_STRIDE_1,
- BRW_SWIZZLE_XYZW,
- WRITEMASK_XYZW);
-}
-
-static void brw_wm_write__mask(struct brw_compile *p,
- int dw,
+static void brw_wm_write__mask(struct brw_compile *p, int dw,
int src, int mask)
{
int n;
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
-
if (dw == 8 && p->gen >= 60) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
brw_MUL(p,
brw_message_reg(2),
- brw_vec8_grf(src, 0),
- mask_a8(mask));
+ brw_vec8_grf(src+0, 0),
+ brw_vec8_grf(mask, 0));
+ brw_MUL(p,
+ brw_message_reg(3),
+ brw_vec8_grf(src+1, 0),
+ brw_vec8_grf(mask, 0));
brw_MUL(p,
brw_message_reg(4),
brw_vec8_grf(src+2, 0),
- mask_a8(mask));
+ brw_vec8_grf(mask, 0));
+ brw_MUL(p,
+ brw_message_reg(5),
+ brw_vec8_grf(src+3, 0),
+ brw_vec8_grf(mask, 0));
+
goto done;
}
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
for (n = 0; n < 4; n++) {
if (p->gen >= 60) {
brw_MUL(p,
@@ -306,25 +323,36 @@ done:
brw_fb_write(p, dw);
}
-static void brw_wm_write__mask_ca(struct brw_compile *p,
- int dw, int src, int mask)
+static void brw_wm_write__mask_ca(struct brw_compile *p, int dw,
+ int src, int mask)
{
int n;
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
-
if (dw == 8 && p->gen >= 60) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
brw_MUL(p,
brw_message_reg(2),
- brw_vec8_grf(src, 0),
- brw_vec8_grf(mask, 0));
+ brw_vec8_grf(src + 0, 0),
+ brw_vec8_grf(mask + 0, 0));
+ brw_MUL(p,
+ brw_message_reg(3),
+ brw_vec8_grf(src + 1, 0),
+ brw_vec8_grf(mask + 1, 0));
brw_MUL(p,
brw_message_reg(4),
- brw_vec8_grf(src + 2, 0),
+ brw_vec8_grf(src + 2, 0),
brw_vec8_grf(mask + 2, 0));
+ brw_MUL(p,
+ brw_message_reg(5),
+ brw_vec8_grf(src + 3, 0),
+ brw_vec8_grf(mask + 3, 0));
+
goto done;
}
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
for (n = 0; n < 4; n++) {
if (p->gen >= 60) {
brw_MUL(p,
@@ -357,56 +385,71 @@ done:
brw_fb_write(p, dw);
}
-void
+bool
brw_wm_kernel__affine(struct brw_compile *p, int dispatch)
{
- int src = 12;
-
if (p->gen < 60)
brw_wm_xy(p, dispatch);
- brw_wm_affine(p, dispatch, 0, 1, src);
- brw_wm_write(p, dispatch, src);
+ brw_wm_write(p, dispatch, brw_wm_affine(p, dispatch, 0, 1, 12));
+
+ return true;
}
-void
+bool
brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch)
{
- int src = 12, mask = 20;
+ int src, mask;
+
+ if (dispatch == 8)
+ return false; /* XXX sampler alpha retuns all 0 */
if (p->gen < 60)
brw_wm_xy(p, dispatch);
- brw_wm_affine(p, dispatch, 0, 1, src);
- brw_wm_affine__alpha(p, dispatch, 1, 7, mask);
+
+ src = brw_wm_affine(p, dispatch, 0, 1, 12);
+ mask = brw_wm_affine__alpha(p, dispatch, 1, 6, 20);
brw_wm_write__mask(p, dispatch, src, mask);
+
+ return true;
}
-void
+bool
brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch)
{
- int src = 12, mask = 20;
+ int src, mask;
if (p->gen < 60)
brw_wm_xy(p, dispatch);
- brw_wm_affine(p, dispatch, 0, 1, src);
- brw_wm_affine(p, dispatch, 1, 7, mask);
+
+ src = brw_wm_affine(p, dispatch, 0, 1, 12);
+ mask = brw_wm_affine(p, dispatch, 1, 6, 20);
brw_wm_write__mask_ca(p, dispatch, src, mask);
+
+ return true;
}
-void
+bool
brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch)
{
- int src = 12, mask = 14;
+ int src, mask;
+
+ if (dispatch == 8)
+ return false; /* XXX sampler alpha retuns all 0 */
if (p->gen < 60)
brw_wm_xy(p, dispatch);
- brw_wm_affine__alpha(p, dispatch, 0, 1, src);
- brw_wm_affine(p, dispatch, 1, 7, mask);
+
+ src = brw_wm_affine__alpha(p, dispatch, 0, 1, 12);
+ mask = brw_wm_affine(p, dispatch, 1, 6, 16);
brw_wm_write__mask(p, dispatch, mask, src);
+
+ return true;
}
/* Projective variants */
-static void brw_wm_projective_st(struct brw_compile *p, int dw, int channel, int msg)
+static void brw_wm_projective_st(struct brw_compile *p, int dw,
+ int channel, int msg)
{
int uv;
@@ -480,63 +523,77 @@ static void brw_wm_projective_st(struct brw_compile *p, int dw, int channel, int
}
}
-static void brw_wm_projective(struct brw_compile *p, int dw,
- int channel, int msg, int result)
+static int brw_wm_projective(struct brw_compile *p, int dw,
+ int channel, int msg, int result)
{
brw_wm_projective_st(p, dw, channel, msg);
- brw_wm_sample(p, dw, channel, msg, result);
+ return brw_wm_sample(p, dw, channel, msg, result);
}
-static void brw_wm_projective__alpha(struct brw_compile *p, int dw,
+static int brw_wm_projective__alpha(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
brw_wm_projective_st(p, dw, channel, msg);
- brw_wm_sample__alpha(p, dw, channel, msg, result);
+ return brw_wm_sample__alpha(p, dw, channel, msg, result);
}
-void
+bool
brw_wm_kernel__projective(struct brw_compile *p, int dispatch)
{
- int src = 12;
-
if (p->gen < 60)
brw_wm_xy(p, dispatch);
- brw_wm_projective(p, dispatch, 0, 1, src);
- brw_wm_write(p, dispatch, src);
+ brw_wm_write(p, dispatch, brw_wm_projective(p, dispatch, 0, 1, 12));
+
+ return true;
}
-void
+bool
brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch)
{
- int src = 12, mask = 20;
+ int src, mask;
+
+ if (dispatch == 8)
+ return false; /* XXX sampler alpha retuns all 0 */
if (p->gen < 60)
brw_wm_xy(p, dispatch);
- brw_wm_projective(p, dispatch, 0, 1, src);
- brw_wm_projective__alpha(p, dispatch, 1, 7, mask);
+
+ src = brw_wm_projective(p, dispatch, 0, 1, 12);
+ mask = brw_wm_projective__alpha(p, dispatch, 1, 6, 20);
brw_wm_write__mask(p, dispatch, src, mask);
+
+ return true;
}
-void
+bool
brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch)
{
- int src = 12, mask = 20;
+ int src, mask;
if (p->gen < 60)
brw_wm_xy(p, dispatch);
- brw_wm_projective(p, dispatch, 0, 1, src);
- brw_wm_projective(p, dispatch, 1,7, mask);
+
+ src = brw_wm_projective(p, dispatch, 0, 1, 12);
+ mask = brw_wm_projective(p, dispatch, 1, 6, 20);
brw_wm_write__mask_ca(p, dispatch, src, mask);
+
+ return true;
}
-void
+bool
brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch)
{
- int src = 12, mask = 14;
+ int src, mask;
+
+ if (dispatch == 8)
+ return false; /* XXX sampler alpha retuns all 0 */
if (p->gen < 60)
brw_wm_xy(p, dispatch);
- brw_wm_projective__alpha(p, dispatch, 0, 1, src);
- brw_wm_projective(p, dispatch, 1, 7, mask);
+
+ src = brw_wm_projective__alpha(p, dispatch, 0, 1, 12);
+ mask = brw_wm_projective(p, dispatch, 1, 6, 16);
brw_wm_write__mask(p, dispatch, mask, src);
+
+ return true;
}
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 011b1b78..943c248b 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -482,11 +482,11 @@ uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream,
void *ptr);
unsigned sna_static_stream_compile_sf(struct sna *sna,
struct sna_static_stream *stream,
- void (*compile)(struct brw_compile *));
+ bool (*compile)(struct brw_compile *));
unsigned sna_static_stream_compile_wm(struct sna *sna,
struct sna_static_stream *stream,
- void (*compile)(struct brw_compile *, int),
+ bool (*compile)(struct brw_compile *, int),
int width);
struct kgem_bo *sna_static_stream_fini(struct sna *sna,
struct sna_static_stream *stream);
diff --git a/src/sna/sna_stream.c b/src/sna/sna_stream.c
index 66a8c461..1a0a86b9 100644
--- a/src/sna/sna_stream.c
+++ b/src/sna/sna_stream.c
@@ -97,7 +97,7 @@ struct kgem_bo *sna_static_stream_fini(struct sna *sna,
unsigned
sna_static_stream_compile_sf(struct sna *sna,
struct sna_static_stream *stream,
- void (*compile)(struct brw_compile *))
+ bool (*compile)(struct brw_compile *))
{
struct brw_compile p;
@@ -105,7 +105,11 @@ sna_static_stream_compile_sf(struct sna *sna,
sna_static_stream_map(stream,
64*sizeof(uint32_t), 64));
- compile(&p);
+ if (!compile(&p)) {
+ stream->used -= 64*sizeof(uint32_t);
+ return 0;
+ }
+
assert(p.nr_insn*sizeof(struct brw_instruction) <= 64*sizeof(uint32_t));
stream->used -= 64*sizeof(uint32_t) - p.nr_insn*sizeof(struct brw_instruction);
@@ -115,7 +119,7 @@ sna_static_stream_compile_sf(struct sna *sna,
unsigned
sna_static_stream_compile_wm(struct sna *sna,
struct sna_static_stream *stream,
- void (*compile)(struct brw_compile *, int),
+ bool (*compile)(struct brw_compile *, int),
int dispatch_width)
{
struct brw_compile p;
@@ -124,7 +128,11 @@ sna_static_stream_compile_wm(struct sna *sna,
sna_static_stream_map(stream,
256*sizeof(uint32_t), 64));
- compile(&p, dispatch_width);
+ if (!compile(&p, dispatch_width)) {
+ stream->used -= 256*sizeof(uint32_t);
+ return 0;
+ }
+
assert(p.nr_insn*sizeof(struct brw_instruction) <= 256*sizeof(uint32_t));
stream->used -= 256*sizeof(uint32_t) - p.nr_insn*sizeof(struct brw_instruction);