diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2012-08-01 00:01:15 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2012-08-01 10:32:37 +0100 |
commit | 9b2873d3d97b6780d878bd9b821fba0302470f9f (patch) | |
tree | 8c28ad966224af3f464c3acb788e00ec237cb1a4 /src/sna | |
parent | fd3a1236051265fab700aad689a171de47d7338f (diff) |
sna/gen4+: Implement an opacity shader
Avoid the cumbersome lookup through the alpha gradient texture and
simply multiply the incoming opacity value. The next step will be to
reduce the number of floats required per vertex.
Now that we have removed the primary user of the alpha solid cache, it
may be time to retire that as well.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/sna')
-rw-r--r-- | src/sna/brw/brw.h | 3 | ||||
-rw-r--r-- | src/sna/brw/brw_test_gen7.c | 13 | ||||
-rw-r--r-- | src/sna/brw/brw_wm.c | 96 | ||||
-rw-r--r-- | src/sna/gen4_render.c | 38 | ||||
-rw-r--r-- | src/sna/gen4_render.h | 3 | ||||
-rw-r--r-- | src/sna/gen5_render.c | 39 | ||||
-rw-r--r-- | src/sna/gen5_render.h | 3 | ||||
-rw-r--r-- | src/sna/gen6_render.c | 35 | ||||
-rw-r--r-- | src/sna/gen7_render.c | 35 | ||||
-rw-r--r-- | src/sna/sna_render.h | 6 |
10 files changed, 158 insertions, 113 deletions
diff --git a/src/sna/brw/brw.h b/src/sna/brw/brw.h index f0f3ac87..e5fa72f9 100644 --- a/src/sna/brw/brw.h +++ b/src/sna/brw/brw.h @@ -12,3 +12,6 @@ bool brw_wm_kernel__projective(struct brw_compile *p, int dispatch_width); bool brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch_width); bool brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch_width); bool brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch_width); + +bool brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch_width); +bool brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch_width); diff --git a/src/sna/brw/brw_test_gen7.c b/src/sna/brw/brw_test_gen7.c index c3f0e231..085b25cc 100644 --- a/src/sna/brw/brw_test_gen7.c +++ b/src/sna/brw/brw_test_gen7.c @@ -167,6 +167,17 @@ static void gen7_ps_nomask_projective(void) compare(ps_kernel_nomask_projective); } +static void gen7_ps_opacity(void) +{ + uint32_t store[1024]; + struct brw_compile p; + + brw_compile_init(&p, GEN, store); + brw_wm_kernel__affine_opacity(&p, 16); + + compare(ps_kernel_nomask_affine); +} + void brw_test_gen7(void) { gen7_ps_nomask_affine(); @@ -175,4 +186,6 @@ void brw_test_gen7(void) gen7_ps_masksa_affine(); gen7_ps_nomask_projective(); + + gen7_ps_opacity(); } diff --git a/src/sna/brw/brw_wm.c b/src/sna/brw/brw_wm.c index f96881af..bd4003dd 100644 --- a/src/sna/brw/brw_wm.c +++ b/src/sna/brw/brw_wm.c @@ -323,6 +323,68 @@ done: brw_fb_write(p, dw); } +static void brw_wm_write__opacity(struct brw_compile *p, int dw, + int src, int mask) +{ + int n; + + if (dw == 8 && p->gen >= 60) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MUL(p, + brw_message_reg(2), + brw_vec8_grf(src+0, 0), + brw_vec1_grf(mask, 3)); + brw_MUL(p, + brw_message_reg(3), + brw_vec8_grf(src+1, 0), + brw_vec1_grf(mask, 3)); + brw_MUL(p, + brw_message_reg(4), + brw_vec8_grf(src+2, 0), + brw_vec1_grf(mask, 3)); + brw_MUL(p, + brw_message_reg(5), + brw_vec8_grf(src+3, 0), + brw_vec1_grf(mask, 3)); + + goto done; + } + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + for (n = 0; n < 4; n++) { + if (p->gen >= 60) { + brw_MUL(p, + brw_message_reg(2 + 2*n), + brw_vec8_grf(src + 2*n, 0), + brw_vec1_grf(mask, 3)); + } else if (p->gen >= 45 && dw == 16) { + brw_MUL(p, + brw_message_reg(2 + n + BRW_MRF_COMPR4), + brw_vec8_grf(src + 2*n, 0), + brw_vec1_grf(mask, 3)); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MUL(p, + brw_message_reg(2 + n), + brw_vec8_grf(src + 2*n, 0), + brw_vec1_grf(mask, 3)); + + if (dw == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MUL(p, + brw_message_reg(2 + n + 4), + brw_vec8_grf(src + 2*n+1, 0), + brw_vec1_grf(mask, 3)); + } + } + } + +done: + brw_fb_write(p, dw); +} + static void brw_wm_write__mask_ca(struct brw_compile *p, int dw, int src, int mask) { @@ -597,3 +659,37 @@ brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch) return true; } + +bool +brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) { + brw_wm_xy(p, dispatch); + mask = 4; + } else + mask = dispatch == 16 ? 8 : 6; + + src = brw_wm_affine(p, dispatch, 0, 1, 12); + brw_wm_write__opacity(p, dispatch, src, mask); + + return true; +} + +bool +brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch) +{ + int src, mask; + + if (p->gen < 60) { + brw_wm_xy(p, dispatch); + mask = 4; + } else + mask = dispatch == 16 ? 8 : 6; + + src = brw_wm_projective(p, dispatch, 0, 1, 12); + brw_wm_write__opacity(p, dispatch, src, mask); + + return true; +} diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c index d1dbf5a8..d8beed97 100644 --- a/src/sna/gen4_render.c +++ b/src/sna/gen4_render.c @@ -149,6 +149,9 @@ static const struct wm_kernel_info { NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true), NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true), + NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true), + NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true), + KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false), KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false), }; @@ -2425,28 +2428,6 @@ cleanup_dst: /* A poor man's span interface. But better than nothing? */ #if !NO_COMPOSITE_SPANS -static bool -gen4_composite_alpha_gradient_init(struct sna *sna, - struct sna_composite_channel *channel) -{ - DBG(("%s\n", __FUNCTION__)); - - channel->filter = PictFilterNearest; - channel->repeat = RepeatPad; - channel->is_affine = true; - channel->is_solid = false; - channel->transform = NULL; - channel->width = 256; - channel->height = 1; - channel->card_format = GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; - - channel->bo = sna_render_get_alpha_gradient(sna); - - channel->scale[0] = channel->scale[1] = 1; - channel->offset[0] = channel->offset[1] = 0; - return channel->bo != NULL; -} - inline static void gen4_emit_composite_texcoord(struct sna *sna, const struct sna_composite_channel *channel, @@ -2610,6 +2591,7 @@ gen4_render_composite_spans_done(struct sna *sna, DBG(("%s()\n", __FUNCTION__)); + kgem_bo_destroy(&sna->kgem, op->base.mask.bo); if (op->base.src.bo) kgem_bo_destroy(&sna->kgem, op->base.src.bo); @@ -2687,13 +2669,14 @@ gen4_render_composite_spans(struct sna *sna, break; } - tmp->base.mask.bo = NULL; + tmp->base.mask.bo = sna_render_get_solid(sna, 0); + if (tmp->base.mask.bo == NULL) + goto cleanup_src; + tmp->base.is_affine = tmp->base.src.is_affine; tmp->base.has_component_alpha = false; tmp->base.need_magic_ca_pass = false; - gen4_composite_alpha_gradient_init(sna, &tmp->base.mask); - tmp->prim_emit = gen4_emit_composite_spans_primitive; if (tmp->base.src.is_solid) tmp->prim_emit = gen4_emit_composite_spans_solid; @@ -2702,10 +2685,7 @@ gen4_render_composite_spans(struct sna *sna, tmp->base.floats_per_vertex = 5 + 2*!tmp->base.is_affine; tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex; - tmp->base.u.gen4.wm_kernel = - gen4_choose_composite_kernel(tmp->base.op, - true, false, - tmp->base.is_affine); + tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine; tmp->base.u.gen4.ve_id = 1 << 1 | tmp->base.is_affine; tmp->box = gen4_render_composite_spans_box; diff --git a/src/sna/gen4_render.h b/src/sna/gen4_render.h index 8e0cd746..49d232e8 100644 --- a/src/sna/gen4_render.h +++ b/src/sna/gen4_render.h @@ -2635,6 +2635,9 @@ typedef enum { WM_KERNEL_MASKSA, WM_KERNEL_MASKSA_P, + WM_KERNEL_OPACITY, + WM_KERNEL_OPACITY_P, + WM_KERNEL_VIDEO_PLANAR, WM_KERNEL_VIDEO_PACKED, KERNEL_COUNT diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c index 3d826c48..1e0ee108 100644 --- a/src/sna/gen5_render.c +++ b/src/sna/gen5_render.c @@ -120,6 +120,9 @@ static const struct wm_kernel_info { NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true), NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true), + NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true), + NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true), + KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false), KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false), }; @@ -2439,30 +2442,7 @@ cleanup_dst: return false; } -/* A poor man's span interface. But better than nothing? */ #if !NO_COMPOSITE_SPANS -static bool -gen5_composite_alpha_gradient_init(struct sna *sna, - struct sna_composite_channel *channel) -{ - DBG(("%s\n", __FUNCTION__)); - - channel->filter = PictFilterNearest; - channel->repeat = RepeatPad; - channel->is_affine = true; - channel->is_solid = false; - channel->transform = NULL; - channel->width = 256; - channel->height = 1; - channel->card_format = GEN5_SURFACEFORMAT_B8G8R8A8_UNORM; - - channel->bo = sna_render_get_alpha_gradient(sna); - - channel->scale[0] = channel->scale[1] = 1; - channel->offset[0] = channel->offset[1] = 0; - return channel->bo != NULL; -} - inline static void gen5_emit_composite_texcoord(struct sna *sna, const struct sna_composite_channel *channel, @@ -2639,6 +2619,7 @@ gen5_render_composite_spans_done(struct sna *sna, DBG(("%s()\n", __FUNCTION__)); + kgem_bo_destroy(&sna->kgem, op->base.mask.bo); if (op->base.src.bo) kgem_bo_destroy(&sna->kgem, op->base.src.bo); @@ -2721,13 +2702,14 @@ gen5_render_composite_spans(struct sna *sna, break; } - tmp->base.mask.bo = NULL; + tmp->base.mask.bo = sna_render_get_solid(sna, 0); + if (tmp->base.mask.bo == NULL) + goto cleanup_src; + tmp->base.is_affine = tmp->base.src.is_affine; tmp->base.has_component_alpha = false; tmp->base.need_magic_ca_pass = false; - gen5_composite_alpha_gradient_init(sna, &tmp->base.mask); - tmp->prim_emit = gen5_emit_composite_spans_primitive; if (tmp->base.src.is_solid) tmp->prim_emit = gen5_emit_composite_spans_solid; @@ -2736,10 +2718,7 @@ gen5_render_composite_spans(struct sna *sna, tmp->base.floats_per_vertex = 5 + 2*!tmp->base.is_affine; tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex; - tmp->base.u.gen5.wm_kernel = - gen5_choose_composite_kernel(tmp->base.op, - true, false, - tmp->base.is_affine); + tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine; tmp->base.u.gen5.ve_id = 1 << 1 | tmp->base.is_affine; tmp->box = gen5_render_composite_spans_box; diff --git a/src/sna/gen5_render.h b/src/sna/gen5_render.h index 17708b53..b6e5b0c2 100644 --- a/src/sna/gen5_render.h +++ b/src/sna/gen5_render.h @@ -2770,6 +2770,9 @@ typedef enum { WM_KERNEL_MASKSA, WM_KERNEL_MASKSA_P, + WM_KERNEL_OPACITY, + WM_KERNEL_OPACITY_P, + WM_KERNEL_VIDEO_PLANAR, WM_KERNEL_VIDEO_PACKED, KERNEL_COUNT diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c index 78baa3ac..3dc0729c 100644 --- a/src/sna/gen6_render.c +++ b/src/sna/gen6_render.c @@ -128,6 +128,9 @@ static const struct wm_kernel_info { NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3, 2), NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3, 2), + NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2, 2), + NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2, 2), + KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7, 1), KERNEL(VIDEO_PACKED, ps_kernel_packed, 2, 1), }; @@ -2788,28 +2791,7 @@ cleanup_dst: return false; } -/* A poor man's span interface. But better than nothing? */ #if !NO_COMPOSITE_SPANS -static bool -gen6_composite_alpha_gradient_init(struct sna *sna, - struct sna_composite_channel *channel) -{ - DBG(("%s\n", __FUNCTION__)); - - channel->is_affine = true; - channel->is_solid = false; - channel->transform = NULL; - channel->width = 256; - channel->height = 1; - channel->card_format = GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; - - channel->bo = sna_render_get_alpha_gradient(sna); - - channel->scale[0] = channel->scale[1] = 1; - channel->offset[0] = channel->offset[1] = 0; - return channel->bo != NULL; -} - inline static void gen6_emit_composite_texcoord_affine(struct sna *sna, const struct sna_composite_channel *channel, @@ -3040,6 +3022,7 @@ gen6_render_composite_spans_done(struct sna *sna, if (sna->render_state.gen6.vertex_offset) gen6_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, op->base.mask.bo); if (op->base.src.bo) kgem_bo_destroy(&sna->kgem, op->base.src.bo); @@ -3120,13 +3103,13 @@ gen6_render_composite_spans(struct sna *sna, gen6_composite_channel_convert(&tmp->base.src); break; } + tmp->base.mask.bo = sna_render_get_solid(sna, 0); + if (tmp->base.mask.bo == NULL) + goto cleanup_src; tmp->base.is_affine = tmp->base.src.is_affine; tmp->base.need_magic_ca_pass = false; - if (!gen6_composite_alpha_gradient_init(sna, &tmp->base.mask)) - goto cleanup_src; - tmp->prim_emit = gen6_emit_composite_spans_primitive; if (tmp->base.src.is_solid) { tmp->prim_emit = gen6_emit_composite_spans_solid; @@ -3150,9 +3133,7 @@ gen6_render_composite_spans(struct sna *sna, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_PAD), gen6_get_blend(tmp->base.op, false, tmp->base.dst.format), - gen6_choose_composite_kernel(tmp->base.op, - true, false, - tmp->base.is_affine), + GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine, 1 << 1 | tmp->base.is_affine); tmp->box = gen6_render_composite_spans_box; diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index 8c64016d..a199307f 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -133,6 +133,9 @@ static const struct wm_kernel_info { NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), + NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2), + NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2), + KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), }; @@ -2872,28 +2875,7 @@ cleanup_dst: return false; } -/* A poor man's span interface. But better than nothing? */ #if !NO_COMPOSITE_SPANS -static bool -gen7_composite_alpha_gradient_init(struct sna *sna, - struct sna_composite_channel *channel) -{ - DBG(("%s\n", __FUNCTION__)); - - channel->is_affine = true; - channel->is_solid = false; - channel->transform = NULL; - channel->width = 256; - channel->height = 1; - channel->card_format = GEN7_SURFACEFORMAT_B8G8R8A8_UNORM; - - channel->bo = sna_render_get_alpha_gradient(sna); - - channel->scale[0] = channel->scale[1] = 1; - channel->offset[0] = channel->offset[1] = 0; - return channel->bo != NULL; -} - inline static void gen7_emit_composite_texcoord_affine(struct sna *sna, const struct sna_composite_channel *channel, @@ -3124,6 +3106,7 @@ gen7_render_composite_spans_done(struct sna *sna, DBG(("%s()\n", __FUNCTION__)); + kgem_bo_destroy(&sna->kgem, op->base.mask.bo); if (op->base.src.bo) kgem_bo_destroy(&sna->kgem, op->base.src.bo); @@ -3201,13 +3184,13 @@ gen7_render_composite_spans(struct sna *sna, gen7_composite_channel_convert(&tmp->base.src); break; } + tmp->base.mask.bo = sna_render_get_solid(sna, 0); + if (tmp->base.mask.bo == NULL) + goto cleanup_src; tmp->base.is_affine = tmp->base.src.is_affine; tmp->base.need_magic_ca_pass = false; - if (!gen7_composite_alpha_gradient_init(sna, &tmp->base.mask)) - goto cleanup_src; - tmp->prim_emit = gen7_emit_composite_spans_primitive; if (tmp->base.src.is_solid) { tmp->prim_emit = gen7_emit_composite_spans_solid; @@ -3231,9 +3214,7 @@ gen7_render_composite_spans(struct sna *sna, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_PAD), gen7_get_blend(tmp->base.op, false, tmp->base.dst.format), - gen7_choose_composite_kernel(tmp->base.op, - true, false, - tmp->base.is_affine), + GEN7_WM_KERNEL_OPACITY | !tmp->base.is_affine, 1 << 1 | tmp->base.is_affine); tmp->box = gen7_render_composite_spans_box; diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h index 142f2224..ddcafdbb 100644 --- a/src/sna/sna_render.h +++ b/src/sna/sna_render.h @@ -383,6 +383,9 @@ enum { GEN6_WM_KERNEL_MASKSA, GEN6_WM_KERNEL_MASKSA_P, + GEN6_WM_KERNEL_OPACITY, + GEN6_WM_KERNEL_OPACITY_P, + GEN6_WM_KERNEL_VIDEO_PLANAR, GEN6_WM_KERNEL_VIDEO_PACKED, GEN6_KERNEL_COUNT @@ -432,6 +435,9 @@ enum { GEN7_WM_KERNEL_MASKSA, GEN7_WM_KERNEL_MASKSA_P, + GEN7_WM_KERNEL_OPACITY, + GEN7_WM_KERNEL_OPACITY_P, + GEN7_WM_KERNEL_VIDEO_PLANAR, GEN7_WM_KERNEL_VIDEO_PACKED, GEN7_WM_KERNEL_COUNT |