diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2012-01-19 21:15:35 +0000 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2012-01-20 00:02:05 +0000 |
commit | fc9531fc2df12b896e6d9e8b3f0d7248e55bfcdd (patch) | |
tree | 28dd2b52ad55e75bed203467123a77e111b16b4e /src/sna | |
parent | 2e0a534a888c59b921fd4cad69b980c6eda72212 (diff) |
sna: Move the flush to the backends
This allows us to implement backend specific workarounds and use the
more appropriate device specific flushing.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/sna')
-rw-r--r-- | src/sna/gen2_render.c | 59 | ||||
-rw-r--r-- | src/sna/gen3_render.c | 55 | ||||
-rw-r--r-- | src/sna/gen4_render.c | 23 | ||||
-rw-r--r-- | src/sna/gen5_render.c | 19 | ||||
-rw-r--r-- | src/sna/gen6_render.c | 140 | ||||
-rw-r--r-- | src/sna/gen6_render.h | 19 | ||||
-rw-r--r-- | src/sna/gen7_render.c | 85 | ||||
-rw-r--r-- | src/sna/gen7_render.h | 2 | ||||
-rw-r--r-- | src/sna/kgem.c | 35 | ||||
-rw-r--r-- | src/sna/kgem.h | 5 | ||||
-rw-r--r-- | src/sna/sna.h | 1 |
11 files changed, 185 insertions, 258 deletions
diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c index 0775b76c..6981576e 100644 --- a/src/sna/gen2_render.c +++ b/src/sna/gen2_render.c @@ -637,6 +637,17 @@ static void gen2_emit_composite_state(struct sna *sna, int tex; gen2_get_batch(sna); + + if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { + if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo) + BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE); + else + BATCH(_3DSTATE_MODES_5_CMD | + PIPELINE_FLUSH_RENDER_CACHE | + PIPELINE_FLUSH_TEXTURE_CACHE); + kgem_clear_dirty(&sna->kgem); + } + gen2_emit_target(sna, op); unwind = sna->kgem.nbatch; @@ -1657,8 +1668,6 @@ gen2_render_composite(struct sna *sna, int16_t width, int16_t height, struct sna_composite_op *tmp) { - bool need_flush; - DBG(("%s()\n", __FUNCTION__)); if (op >= ARRAY_SIZE(gen2_blend_op)) { @@ -1810,21 +1819,7 @@ gen2_render_composite(struct sna *sna, NULL)) kgem_submit(&sna->kgem); - need_flush = - kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo); gen2_emit_composite_state(sna, tmp); - if (need_flush) { - if (tmp->src.bo == tmp->dst.bo || tmp->mask.bo == tmp->dst.bo) { - kgem_emit_flush(&sna->kgem); - } else { - BATCH(_3DSTATE_MODES_5_CMD | - PIPELINE_FLUSH_RENDER_CACHE | - PIPELINE_FLUSH_TEXTURE_CACHE); - kgem_clear_dirty(&sna->kgem); - } - assert(sna->kgem.mode == KGEM_RENDER); - kgem_bo_mark_dirty(tmp->dst.bo); - } return TRUE; cleanup_src: @@ -2159,8 +2154,6 @@ gen2_render_composite_spans(struct sna *sna, unsigned flags, struct sna_composite_spans_op *tmp) { - bool need_flush; - DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height)); @@ -2236,20 +2229,7 @@ gen2_render_composite_spans(struct sna *sna, NULL)) kgem_submit(&sna->kgem); - need_flush = tmp->base.src.bo; gen2_emit_composite_spans_state(sna, tmp); - if (need_flush) { - if (tmp->base.src.bo == tmp->base.dst.bo) { - kgem_emit_flush(&sna->kgem); - } else { - BATCH(_3DSTATE_MODES_5_CMD | - PIPELINE_FLUSH_RENDER_CACHE | - PIPELINE_FLUSH_TEXTURE_CACHE); - kgem_clear_dirty(&sna->kgem); - } - assert(sna->kgem.mode == KGEM_RENDER); - kgem_bo_mark_dirty(tmp->base.dst.bo); - } return TRUE; cleanup_dst: @@ -2774,6 +2754,16 @@ static void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op uint32_t ls1, v; gen2_get_batch(sna); + + if (kgem_bo_is_dirty(op->src.bo)) { + if (op->src.bo == op->dst.bo) + BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE); + else + BATCH(_3DSTATE_MODES_5_CMD | + PIPELINE_FLUSH_RENDER_CACHE | + PIPELINE_FLUSH_TEXTURE_CACHE); + kgem_clear_dirty(&sna->kgem); + } gen2_emit_target(sna, op); ls1 = sna->kgem.nbatch; @@ -2845,9 +2835,6 @@ gen2_render_copy_boxes(struct sna *sna, uint8_t alu, if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) kgem_submit(&sna->kgem); - if (kgem_bo_is_dirty(src_bo)) - kgem_emit_flush(&sna->kgem); - memset(&tmp, 0, sizeof(tmp)); tmp.op = alu; @@ -2980,6 +2967,7 @@ gen2_render_copy(struct sna *sna, uint8_t alu, tmp->base.dst.bo = dst_bo; gen2_render_copy_setup_source(&tmp->base.src, src, src_bo); + tmp->base.mask.bo = NULL; tmp->base.floats_per_vertex = 4; tmp->base.floats_per_rect = 12; @@ -2987,9 +2975,6 @@ gen2_render_copy(struct sna *sna, uint8_t alu, if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) kgem_submit(&sna->kgem); - if (kgem_bo_is_dirty(src_bo)) - kgem_emit_flush(&sna->kgem); - tmp->blt = gen2_render_copy_blt; tmp->done = gen2_render_copy_done; diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c index a833526b..ad42fc78 100644 --- a/src/sna/gen3_render.c +++ b/src/sna/gen3_render.c @@ -1338,6 +1338,16 @@ static void gen3_emit_composite_state(struct sna *sna, gen3_get_batch(sna); + if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { + if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo) + OUT_BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE); + else + OUT_BATCH(_3DSTATE_MODES_5_CMD | + PIPELINE_FLUSH_RENDER_CACHE | + PIPELINE_FLUSH_TEXTURE_CACHE); + kgem_clear_dirty(&sna->kgem); + } + gen3_emit_target(sna, op->dst.bo, op->dst.width, @@ -2560,8 +2570,6 @@ gen3_render_composite(struct sna *sna, int16_t width, int16_t height, struct sna_composite_op *tmp) { - bool need_flush; - DBG(("%s()\n", __FUNCTION__)); if (op >= ARRAY_SIZE(gen3_blend_op)) { @@ -2802,22 +2810,7 @@ gen3_render_composite(struct sna *sna, NULL)) kgem_submit(&sna->kgem); - need_flush = - kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo); gen3_emit_composite_state(sna, tmp); - if (need_flush) { - if (tmp->src.bo == tmp->dst.bo || tmp->mask.bo == tmp->dst.bo) { - kgem_emit_flush(&sna->kgem); - } else { - OUT_BATCH(_3DSTATE_MODES_5_CMD | - PIPELINE_FLUSH_RENDER_CACHE | - PIPELINE_FLUSH_TEXTURE_CACHE); - kgem_clear_dirty(&sna->kgem); - } - assert(sna->kgem.mode == KGEM_RENDER); - kgem_bo_mark_dirty(tmp->dst.bo); - } - gen3_align_vertex(sna, tmp); return TRUE; @@ -3141,7 +3134,7 @@ gen3_render_composite_spans(struct sna *sna, unsigned flags, struct sna_composite_spans_op *tmp) { - bool no_offset, need_flush; + bool no_offset; DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height)); @@ -3230,6 +3223,8 @@ gen3_render_composite_spans(struct sna *sna, break; } + tmp->base.mask.bo = NULL; + tmp->base.floats_per_vertex = 2; if (!is_constant_ps(tmp->base.src.u.gen3.type)) tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3; @@ -3246,21 +3241,7 @@ gen3_render_composite_spans(struct sna *sna, NULL)) kgem_submit(&sna->kgem); - need_flush = kgem_bo_is_dirty(tmp->base.src.bo); gen3_emit_composite_state(sna, &tmp->base); - if (need_flush) { - if (tmp->base.src.bo == tmp->base.dst.bo) { - kgem_emit_flush(&sna->kgem); - } else { - OUT_BATCH(_3DSTATE_MODES_5_CMD | - PIPELINE_FLUSH_RENDER_CACHE | - PIPELINE_FLUSH_TEXTURE_CACHE); - kgem_clear_dirty(&sna->kgem); - } - assert(sna->kgem.mode == KGEM_RENDER); - kgem_bo_mark_dirty(tmp->base.dst.bo); - } - gen3_align_vertex(sna, &tmp->base); return TRUE; @@ -3811,9 +3792,6 @@ gen3_render_copy_boxes(struct sna *sna, uint8_t alu, if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) kgem_submit(&sna->kgem); - if (kgem_bo_is_dirty(src_bo)) - kgem_emit_flush(&sna->kgem); - memset(&tmp, 0, sizeof(tmp)); tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear; @@ -3827,6 +3805,7 @@ gen3_render_copy_boxes(struct sna *sna, uint8_t alu, tmp.floats_per_vertex = 4; tmp.floats_per_rect = 12; + tmp.mask.bo = NULL; tmp.mask.u.gen3.type = SHADER_NONE; gen3_emit_composite_state(sna, &tmp); @@ -3955,14 +3934,12 @@ gen3_render_copy(struct sna *sna, uint8_t alu, tmp->base.floats_per_vertex = 4; tmp->base.floats_per_rect = 12; + tmp->base.mask.bo = NULL; tmp->base.mask.u.gen3.type = SHADER_NONE; if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) kgem_submit(&sna->kgem); - if (kgem_bo_is_dirty(src_bo)) - kgem_emit_flush(&sna->kgem); - tmp->blt = gen3_render_copy_blt; tmp->done = gen3_render_copy_done; @@ -4111,6 +4088,7 @@ gen3_render_fill_boxes(struct sna *sna, tmp.need_magic_ca_pass = false; gen3_init_solid(&tmp.src, pixel); + tmp.mask.bo = NULL; tmp.mask.u.gen3.type = SHADER_NONE; tmp.u.gen3.num_constants = 0; @@ -4264,6 +4242,7 @@ gen3_render_fill(struct sna *sna, uint8_t alu, gen3_init_solid(&tmp->base.src, sna_rgba_for_color(color, dst->drawable.depth)); + tmp->base.mask.bo = NULL; tmp->base.mask.u.gen3.type = SHADER_NONE; tmp->base.u.gen3.num_constants = 0; diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c index 6351fa98..5ffd4a70 100644 --- a/src/sna/gen4_render.c +++ b/src/sna/gen4_render.c @@ -1263,7 +1263,6 @@ gen4_emit_state_base_address(struct sna *sna) static void gen4_emit_invariant(struct sna *sna) { - OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); if (sna->kgem.gen >= 45) OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); else @@ -1465,6 +1464,12 @@ gen4_emit_state(struct sna *sna, gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel); gen4_emit_vertex_elements(sna, op); gen4_emit_drawing_rectangle(sna, op); + + if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { + OUT_BATCH(MI_FLUSH); + kgem_clear_dirty(&sna->kgem); + kgem_bo_mark_dirty(op->dst.bo); + } } static void @@ -1724,9 +1729,6 @@ gen4_render_video(struct sna *sna, if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) kgem_submit(&sna->kgem); - if (kgem_bo_is_dirty(frame->bo)) - kgem_emit_flush(&sna->kgem); - gen4_video_bind_surfaces(sna, &tmp, frame); gen4_align_vertex(sna, &tmp); @@ -2308,9 +2310,6 @@ gen4_render_composite(struct sna *sna, NULL)) kgem_submit(&sna->kgem); - if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo)) - kgem_emit_flush(&sna->kgem); - gen4_bind_surfaces(sna, tmp); gen4_align_vertex(sna, tmp); return TRUE; @@ -2454,6 +2453,8 @@ fallback: tmp.src.width = src->drawable.width; tmp.src.height = src->drawable.height; + tmp.mask.bo = NULL; + tmp.is_affine = TRUE; tmp.floats_per_vertex = 3; tmp.u.gen4.wm_kernel = WM_KERNEL; @@ -2462,9 +2463,6 @@ fallback: if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) kgem_submit(&sna->kgem); - if (kgem_bo_is_dirty(src_bo)) - kgem_emit_flush(&sna->kgem); - gen4_copy_bind_surfaces(sna, &tmp); gen4_align_vertex(sna, &tmp); @@ -2558,6 +2556,8 @@ fallback: op->base.src.filter = SAMPLER_FILTER_NEAREST; op->base.src.repeat = SAMPLER_EXTEND_NONE; + op->base.mask.bo = NULL; + op->base.is_affine = true; op->base.floats_per_vertex = 3; op->base.u.gen4.wm_kernel = WM_KERNEL; @@ -2566,9 +2566,6 @@ fallback: if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) kgem_submit(&sna->kgem); - if (kgem_bo_is_dirty(src_bo)) - kgem_emit_flush(&sna->kgem); - gen4_copy_bind_surfaces(sna, &op->base); gen4_align_vertex(sna, &op->base); diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c index 5104cb7e..768a5db0 100644 --- a/src/sna/gen5_render.c +++ b/src/sna/gen5_render.c @@ -1484,6 +1484,12 @@ gen5_emit_state(struct sna *sna, if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)) gen5_emit_urb(sna); gen5_emit_vertex_elements(sna, op); + + if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { + OUT_BATCH(MI_FLUSH); + kgem_clear_dirty(&sna->kgem); + kgem_bo_mark_dirty(op->dst.bo); + } } static void gen5_bind_surfaces(struct sna *sna, @@ -1744,6 +1750,8 @@ gen5_render_video(struct sna *sna, tmp.src.filter = SAMPLER_FILTER_BILINEAR; tmp.src.repeat = SAMPLER_EXTEND_PAD; + tmp.src.bo = frame->bo; + tmp.mask.bo = NULL; tmp.u.gen5.wm_kernel = is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED; tmp.u.gen5.ve_id = 1; @@ -1754,9 +1762,6 @@ gen5_render_video(struct sna *sna, if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) kgem_submit(&sna->kgem); - if (kgem_bo_is_dirty(frame->bo)) - kgem_emit_flush(&sna->kgem); - gen5_video_bind_surfaces(sna, &tmp, frame); gen5_align_vertex(sna, &tmp); @@ -2353,7 +2358,6 @@ gen5_render_composite(struct sna *sna, kgem_bo_destroy(&sna->kgem, tmp->src.bo); return TRUE; } - kgem_emit_flush(&sna->kgem); } gen5_bind_surfaces(sna, tmp); @@ -2632,6 +2636,7 @@ gen5_render_composite_spans(struct sna *sna, break; } + tmp->base.mask.bo = NULL; tmp->base.is_affine = tmp->base.src.is_affine; tmp->base.has_component_alpha = FALSE; tmp->base.need_magic_ca_pass = FALSE; @@ -2661,9 +2666,6 @@ gen5_render_composite_spans(struct sna *sna, NULL)) kgem_submit(&sna->kgem); - if (kgem_bo_is_dirty(tmp->base.src.bo)) - kgem_emit_flush(&sna->kgem); - gen5_bind_surfaces(sna, &tmp->base); gen5_align_vertex(sna, &tmp->base); return TRUE; @@ -2797,8 +2799,6 @@ fallback: dst->drawable.bitsPerPixel, box, n)) return TRUE; - - kgem_emit_flush(&sna->kgem); } gen5_copy_bind_surfaces(sna, &tmp); @@ -2948,7 +2948,6 @@ fallback: dst->drawable.bitsPerPixel, op)) return TRUE; - kgem_emit_flush(&sna->kgem); } gen5_copy_bind_surfaces(sna, &op->base); diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c index 601bdaef..0967ea3b 100644 --- a/src/sna/gen6_render.c +++ b/src/sna/gen6_render.c @@ -556,7 +556,7 @@ gen6_emit_invariant(struct sna *sna) sna->render_state.gen6.needs_invariant = FALSE; } -static bool +static void gen6_emit_cc(struct sna *sna, int op, bool has_component_alpha, uint32_t dst_format) { @@ -570,7 +570,7 @@ gen6_emit_cc(struct sna *sna, op, has_component_alpha, dst_format, blend, render->blend)); if (render->blend == blend) - return false; + return; if (op == PictOpClear) { uint32_t src; @@ -580,7 +580,7 @@ gen6_emit_cc(struct sna *sna, */ src = BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO); if (render->blend == src) - return false; + return; } OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); @@ -594,7 +594,6 @@ gen6_emit_cc(struct sna *sna, } render->blend = blend; - return true; } static void @@ -680,11 +679,11 @@ gen6_emit_wm(struct sna *sna, unsigned int kernel, int nr_surfaces, int nr_input OUT_BATCH(0); } -static bool +static void gen6_emit_binding_table(struct sna *sna, uint16_t offset) { if (sna->render_state.gen6.surface_table == offset) - return false; + return; /* Binding table pointers */ OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS | @@ -696,19 +695,27 @@ gen6_emit_binding_table(struct sna *sna, uint16_t offset) OUT_BATCH(offset*4); sna->render_state.gen6.surface_table = offset; - return true; +} + +static bool +gen6_need_drawing_rectangle(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); + uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; + + return (sna->render_state.gen6.drawrect_limit != limit || + sna->render_state.gen6.drawrect_offset != offset); } static void gen6_emit_drawing_rectangle(struct sna *sna, - const struct sna_composite_op *op, - bool force) + const struct sna_composite_op *op) { uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; - if (!force && - sna->render_state.gen6.drawrect_limit == limit && + if (sna->render_state.gen6.drawrect_limit == limit && sna->render_state.gen6.drawrect_offset == offset) return; @@ -800,28 +807,54 @@ gen6_emit_vertex_elements(struct sna *sna, } static void +gen6_emit_flush(struct sna *sna) +{ + OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH | + GEN6_PIPE_CONTROL_TC_FLUSH | + GEN6_PIPE_CONTROL_CS_STALL); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void gen6_emit_state(struct sna *sna, const struct sna_composite_op *op, uint16_t wm_binding_table) - { - bool flushed = - (sna->kgem.batch[sna->kgem.nbatch-1] & (0xff<<23)) == MI_FLUSH; - bool need_flush; + /* [DevSNB-C+{W/A}] Before any depth stall flush (including those + * produced by non-pipelined state commands), software needs to first + * send a PIPE_CONTROL with no bits set except Post-Sync Operation != + * 0. + * + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * BEFORE the pipe-control with a post-sync op and no write-cache + * flushes. + */ + if (gen6_need_drawing_rectangle(sna, op)) { + OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | + GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME); + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + sna->render_state.gen6.general_bo, + I915_GEM_DOMAIN_INSTRUCTION << 16 | + I915_GEM_DOMAIN_INSTRUCTION, + 64)); + OUT_BATCH(0); + } - need_flush = gen6_emit_cc(sna, - op->op, - op->has_component_alpha, - op->dst.format); + if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { + gen6_emit_flush(sna); + kgem_clear_dirty(&sna->kgem); + kgem_bo_mark_dirty(op->dst.bo); + } - DBG(("%s: sampler src=(%d, %d), mask=(%d, %d), offset=%d\n", - __FUNCTION__, - op->src.filter, op->src.repeat, - op->mask.filter, op->mask.repeat, - (int)SAMPLER_OFFSET(op->src.filter, - op->src.repeat, - op->mask.filter, - op->mask.repeat))); + gen6_emit_cc(sna, op->op, op->has_component_alpha, op->dst.format); gen6_emit_sampler(sna, SAMPLER_OFFSET(op->src.filter, op->src.repeat, @@ -833,20 +866,14 @@ gen6_emit_state(struct sna *sna, op->u.gen6.nr_surfaces, op->u.gen6.nr_inputs); gen6_emit_vertex_elements(sna, op); - - /* XXX updating the binding table requires a non-pipelined cmd? - * The '>' in KDE menus suggest that every binding table update - * requires a subsequent non-pipelined op, or maybe a pipelined flush? - */ - need_flush |= gen6_emit_binding_table(sna, wm_binding_table); - gen6_emit_drawing_rectangle(sna, op, need_flush & !flushed); + gen6_emit_binding_table(sna, wm_binding_table); + gen6_emit_drawing_rectangle(sna, op); } static void gen6_magic_ca_pass(struct sna *sna, const struct sna_composite_op *op) { struct gen6_render_state *state = &sna->render_state.gen6; - bool need_flush; if (!op->need_magic_ca_pass) return; @@ -854,19 +881,15 @@ static void gen6_magic_ca_pass(struct sna *sna, DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, sna->render.vertex_start, sna->render.vertex_index)); - need_flush = gen6_emit_cc(sna, PictOpAdd, TRUE, op->dst.format); + gen6_emit_flush(sna); + + gen6_emit_cc(sna, PictOpAdd, TRUE, op->dst.format); gen6_emit_wm(sna, gen6_choose_composite_kernel(PictOpAdd, TRUE, TRUE, op->is_affine), 3, 2); - /* XXX We apparently need a non-pipelined op to flush the - * pipeline before changing blend state. - */ - if (need_flush) - OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); - OUT_BATCH(GEN6_3DPRIMITIVE | GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | @@ -1036,6 +1059,14 @@ static void null_create(struct sna_static_stream *stream) sna_static_stream_map(stream, 64, 64); } +static void scratch_create(struct sna_static_stream *stream) +{ + /* 64 bytes of scratch space for random writes, such as + * the pipe-control w/a. + */ + sna_static_stream_map(stream, 64, 64); +} + static void sampler_state_init(struct gen6_sampler_state *sampler_state, sampler_filter_t filter, @@ -1912,9 +1943,12 @@ gen6_render_video(struct sna *sna, tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); tmp.dst.bo = priv->gpu_bo; + tmp.src.bo = frame->bo; tmp.src.filter = SAMPLER_FILTER_BILINEAR; tmp.src.repeat = SAMPLER_EXTEND_PAD; + tmp.mask.bo = NULL; + tmp.is_affine = TRUE; tmp.floats_per_vertex = 3; tmp.floats_per_rect = 9; @@ -1935,9 +1969,6 @@ gen6_render_video(struct sna *sna, _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - if (kgem_bo_is_dirty(frame->bo)) - kgem_emit_flush(&sna->kgem); - gen6_emit_video_state(sna, &tmp, frame); gen6_align_vertex(sna, &tmp); @@ -2549,9 +2580,6 @@ gen6_render_composite(struct sna *sna, _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo)) - kgem_emit_flush(&sna->kgem); - gen6_emit_composite_state(sna, tmp); gen6_align_vertex(sna, tmp); return TRUE; @@ -2911,6 +2939,8 @@ gen6_render_composite_spans(struct sna *sna, break; } + tmp->base.mask.bo = NULL; + tmp->base.is_affine = tmp->base.src.is_affine; tmp->base.has_component_alpha = FALSE; tmp->base.need_magic_ca_pass = FALSE; @@ -2954,9 +2984,6 @@ gen6_render_composite_spans(struct sna *sna, _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - if (kgem_bo_is_dirty(tmp->base.src.bo)) - kgem_emit_flush(&sna->kgem); - gen6_emit_composite_state(sna, &tmp->base); gen6_align_vertex(sna, &tmp->base); return TRUE; @@ -3144,9 +3171,6 @@ fallback: _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - if (kgem_bo_is_dirty(src_bo)) - kgem_emit_flush(&sna->kgem); - gen6_emit_copy_state(sna, &tmp); gen6_align_vertex(sna, &tmp); @@ -3290,6 +3314,8 @@ fallback: op->base.src.filter = SAMPLER_FILTER_NEAREST; op->base.src.repeat = SAMPLER_EXTEND_NONE; + op->base.mask.bo = NULL; + op->base.is_affine = true; op->base.floats_per_vertex = 3; op->base.floats_per_rect = 9; @@ -3305,9 +3331,6 @@ fallback: _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - if (kgem_bo_is_dirty(src_bo)) - kgem_emit_flush(&sna->kgem); - gen6_emit_copy_state(sna, &op->base); gen6_align_vertex(sna, &op->base); @@ -3442,6 +3465,8 @@ gen6_render_fill_boxes(struct sna *sna, tmp.src.filter = SAMPLER_FILTER_NEAREST; tmp.src.repeat = SAMPLER_EXTEND_REPEAT; + tmp.mask.bo = NULL; + tmp.is_affine = TRUE; tmp.floats_per_vertex = 3; tmp.floats_per_rect = 9; @@ -3927,6 +3952,7 @@ static Bool gen6_render_setup(struct sna *sna) * dumps, you know it points to zero. */ null_create(&general); + scratch_create(&general); for (m = 0; m < GEN6_KERNEL_COUNT; m++) state->wm_kernel[m] = diff --git a/src/sna/gen6_render.h b/src/sna/gen6_render.h index 42c5a6b9..eded2b7c 100644 --- a/src/sna/gen6_render.h +++ b/src/sna/gen6_render.h @@ -133,20 +133,6 @@ /* for GEN6_STATE_BASE_ADDRESS */ #define BASE_ADDRESS_MODIFY (1 << 0) -/* for GEN6_PIPE_CONTROL */ -#define GEN6_PIPE_CONTROL_NOWRITE (0 << 14) -#define GEN6_PIPE_CONTROL_WRITE_QWORD (1 << 14) -#define GEN6_PIPE_CONTROL_WRITE_DEPTH (2 << 14) -#define GEN6_PIPE_CONTROL_WRITE_TIME (3 << 14) -#define GEN6_PIPE_CONTROL_DEPTH_STALL (1 << 13) -#define GEN6_PIPE_CONTROL_WC_FLUSH (1 << 12) -#define GEN6_PIPE_CONTROL_IS_FLUSH (1 << 11) -#define GEN6_PIPE_CONTROL_TC_FLUSH (1 << 10) -#define GEN6_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) -#define GEN6_PIPE_CONTROL_GLOBAL_GTT (1 << 2) -#define GEN6_PIPE_CONTROL_LOCAL_PGTT (0 << 2) -#define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) - /* VERTEX_BUFFER_STATE Structure */ #define VB0_BUFFER_INDEX_SHIFT 26 #define VB0_VERTEXDATA (0 << 20) @@ -342,8 +328,6 @@ #define GEN6_3DSTATE_AA_LINE_PARAMS GEN6_3D(3, 1, 0xa) #define GEN6_3DSTATE_GS_SVB_INDEX GEN6_3D(3, 1, 0xb) -#define GEN6_PIPE_CONTROL GEN6_3D(3, 2, 0) - #define GEN6_3DPRIMITIVE GEN6_3D(3, 3, 0) #define GEN6_3DSTATE_CLEAR_PARAMS GEN6_3D(3, 1, 0x10) @@ -457,6 +441,8 @@ #define GEN6_CLIP_ENABLE 1 /* for GEN6_PIPE_CONTROL */ +#define GEN6_PIPE_CONTROL GEN6_3D(3, 2, 0) +#define GEN6_PIPE_CONTROL_CS_STALL (1 << 20) #define GEN6_PIPE_CONTROL_NOWRITE (0 << 14) #define GEN6_PIPE_CONTROL_WRITE_QWORD (1 << 14) #define GEN6_PIPE_CONTROL_WRITE_DEPTH (2 << 14) @@ -468,6 +454,7 @@ #define GEN6_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) #define GEN6_PIPE_CONTROL_GLOBAL_GTT (1 << 2) #define GEN6_PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) #define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) /* 3DPRIMITIVE bits */ diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index a7f1dc09..209d8a0d 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -705,13 +705,13 @@ gen7_emit_invariant(struct sna *sna) sna->render_state.gen7.needs_invariant = FALSE; } -static bool +static void gen7_emit_cc(struct sna *sna, uint32_t blend_offset) { struct gen7_render_state *render = &sna->render_state.gen7; if (render->blend == blend_offset) - return false; + return; /* XXX can have upto 8 blend states preload, selectable via * Render Target Index. What other side-effects of Render Target Index? @@ -722,7 +722,6 @@ gen7_emit_cc(struct sna *sna, uint32_t blend_offset) OUT_BATCH((render->cc_blend + blend_offset) | 1); render->blend = blend_offset; - return true; } static void @@ -797,11 +796,11 @@ gen7_emit_wm(struct sna *sna, unsigned int kernel, int nr_surfaces, int nr_input OUT_BATCH(0); /* kernel 2 */ } -static bool +static void gen7_emit_binding_table(struct sna *sna, uint16_t offset) { if (sna->render_state.gen7.surface_table == offset) - return false; + return; /* Binding table pointers */ assert(is_aligned(4*offset, 32)); @@ -809,19 +808,16 @@ gen7_emit_binding_table(struct sna *sna, uint16_t offset) OUT_BATCH(offset*4); sna->render_state.gen7.surface_table = offset; - return true; } static void gen7_emit_drawing_rectangle(struct sna *sna, - const struct sna_composite_op *op, - bool force) + const struct sna_composite_op *op) { uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; - if (!force && - sna->render_state.gen7.drawrect_limit == limit && + if (sna->render_state.gen7.drawrect_limit == limit && sna->render_state.gen7.drawrect_offset == offset) return; @@ -915,19 +911,32 @@ gen7_emit_vertex_elements(struct sna *sna, } static void +gen7_emit_flush(struct sna *sna) +{ + OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | + GEN7_PIPE_CONTROL_TC_FLUSH | + GEN7_PIPE_CONTROL_CS_STALL); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void gen7_emit_state(struct sna *sna, const struct sna_composite_op *op, uint16_t wm_binding_table) { - bool flushed = - (sna->kgem.batch[sna->kgem.nbatch-1] & (0xff<<23)) == MI_FLUSH; - bool need_flush; + if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { + gen7_emit_flush(sna); + kgem_clear_dirty(&sna->kgem); + kgem_bo_mark_dirty(op->dst.bo); + } - need_flush = gen7_emit_cc(sna, - gen7_get_blend(op->op, - op->has_component_alpha, - op->dst.format)); + gen7_emit_cc(sna, + gen7_get_blend(op->op, + op->has_component_alpha, + op->dst.format)); DBG(("%s: sampler src=(%d, %d), mask=(%d, %d), offset=%d\n", __FUNCTION__, @@ -949,16 +958,14 @@ gen7_emit_state(struct sna *sna, op->u.gen7.nr_inputs); gen7_emit_vertex_elements(sna, op); - /* XXX updating the binding table requires a non-pipelined cmd? */ - need_flush |= gen7_emit_binding_table(sna, wm_binding_table); - gen7_emit_drawing_rectangle(sna, op, need_flush & !flushed); + gen7_emit_binding_table(sna, wm_binding_table); + gen7_emit_drawing_rectangle(sna, op); } static void gen7_magic_ca_pass(struct sna *sna, const struct sna_composite_op *op) { struct gen7_render_state *state = &sna->render_state.gen7; - bool need_flush; if (!op->need_magic_ca_pass) return; @@ -966,21 +973,15 @@ static void gen7_magic_ca_pass(struct sna *sna, DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, sna->render.vertex_start, sna->render.vertex_index)); - need_flush = - gen7_emit_cc(sna, - gen7_get_blend(PictOpAdd, TRUE, op->dst.format)); + gen7_emit_flush(sna); + + gen7_emit_cc(sna, gen7_get_blend(PictOpAdd, TRUE, op->dst.format)); gen7_emit_wm(sna, gen7_choose_composite_kernel(PictOpAdd, TRUE, TRUE, op->is_affine), 3, 2); - /* XXX We apparently need a non-pipelined op to flush the - * pipeline before changing blend state. - */ - if (need_flush) - OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); - OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2)); OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST); OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); @@ -2012,9 +2013,12 @@ gen7_render_video(struct sna *sna, tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth); tmp.dst.bo = priv->gpu_bo; + tmp.src.bo = frame->bo; tmp.src.filter = SAMPLER_FILTER_BILINEAR; tmp.src.repeat = SAMPLER_EXTEND_PAD; + tmp.mask.bo = NULL; + tmp.is_affine = TRUE; tmp.floats_per_vertex = 3; tmp.floats_per_rect = 9; @@ -2035,9 +2039,6 @@ gen7_render_video(struct sna *sna, _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - if (kgem_bo_is_dirty(frame->bo)) - kgem_emit_flush(&sna->kgem); - gen7_emit_video_state(sna, &tmp, frame); gen7_align_vertex(sna, &tmp); @@ -2650,9 +2651,6 @@ gen7_render_composite(struct sna *sna, _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo)) - kgem_emit_flush(&sna->kgem); - gen7_emit_composite_state(sna, tmp); gen7_align_vertex(sna, tmp); return TRUE; @@ -3011,6 +3009,8 @@ gen7_render_composite_spans(struct sna *sna, break; } + tmp->base.mask.bo = NULL; + tmp->base.is_affine = tmp->base.src.is_affine; tmp->base.has_component_alpha = FALSE; tmp->base.need_magic_ca_pass = FALSE; @@ -3054,9 +3054,6 @@ gen7_render_composite_spans(struct sna *sna, _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - if (kgem_bo_is_dirty(tmp->base.src.bo)) - kgem_emit_flush(&sna->kgem); - gen7_emit_composite_state(sna, &tmp->base); gen7_align_vertex(sna, &tmp->base); return TRUE; @@ -3244,9 +3241,6 @@ fallback: _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - if (kgem_bo_is_dirty(src_bo)) - kgem_emit_flush(&sna->kgem); - gen7_emit_copy_state(sna, &tmp); gen7_align_vertex(sna, &tmp); @@ -3390,6 +3384,8 @@ fallback: op->base.src.filter = SAMPLER_FILTER_NEAREST; op->base.src.repeat = SAMPLER_EXTEND_NONE; + op->base.mask.bo = NULL; + op->base.is_affine = true; op->base.floats_per_vertex = 3; op->base.floats_per_rect = 9; @@ -3405,9 +3401,6 @@ fallback: _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - if (kgem_bo_is_dirty(src_bo)) - kgem_emit_flush(&sna->kgem); - gen7_emit_copy_state(sna, &op->base); gen7_align_vertex(sna, &op->base); @@ -3937,7 +3930,7 @@ gen7_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) tmp.has_component_alpha = 0; tmp.need_magic_ca_pass = FALSE; - tmp.u.gen7.wm_kernel = GEN6_WM_KERNEL_NOMASK; + tmp.u.gen7.wm_kernel = GEN7_WM_KERNEL_NOMASK; tmp.u.gen7.nr_surfaces = 2; tmp.u.gen7.nr_inputs = 1; tmp.u.gen7.ve_id = 1; diff --git a/src/sna/gen7_render.h b/src/sna/gen7_render.h index 06ab0eeb..e2ca1f2f 100644 --- a/src/sna/gen7_render.h +++ b/src/sna/gen7_render.h @@ -123,6 +123,7 @@ /* for GEN7_PIPE_CONTROL */ #define GEN7_PIPE_CONTROL GEN7_3D(3, 2, 0) +#define GEN7_PIPE_CONTROL_CS_STALL (1 << 20) #define GEN7_PIPE_CONTROL_NOWRITE (0 << 14) #define GEN7_PIPE_CONTROL_WRITE_QWORD (1 << 14) #define GEN7_PIPE_CONTROL_WRITE_DEPTH (2 << 14) @@ -134,6 +135,7 @@ #define GEN7_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) #define GEN7_PIPE_CONTROL_GLOBAL_GTT (1 << 2) #define GEN7_PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) #define GEN7_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) /* VERTEX_BUFFER_STATE Structure */ diff --git a/src/sna/kgem.c b/src/sna/kgem.c index b49f92a4..52e5bffd 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -2854,41 +2854,6 @@ void kgem_clear_dirty(struct kgem *kgem) bo->dirty = false; } -/* Flush the contents of the RenderCache and invalidate the TextureCache */ -void kgem_emit_flush(struct kgem *kgem) -{ - if (kgem->nbatch == 0) - return; - - if (!kgem_check_batch(kgem, 4)) { - _kgem_submit(kgem); - return; - } - - DBG(("%s()\n", __FUNCTION__)); - - if (kgem->ring == KGEM_BLT) { - kgem->batch[kgem->nbatch++] = MI_FLUSH_DW | 2; - kgem->batch[kgem->nbatch++] = 0; - kgem->batch[kgem->nbatch++] = 0; - kgem->batch[kgem->nbatch++] = 0; - } else if (kgem->gen >= 50 && 0) { - kgem->batch[kgem->nbatch++] = PIPE_CONTROL | 2; - kgem->batch[kgem->nbatch++] = - PIPE_CONTROL_WC_FLUSH | - PIPE_CONTROL_TC_FLUSH | - PIPE_CONTROL_NOWRITE; - kgem->batch[kgem->nbatch++] = 0; - kgem->batch[kgem->nbatch++] = 0; - } else { - if ((kgem->batch[kgem->nbatch-1] & (0xff<<23)) == MI_FLUSH) - kgem->nbatch--; - kgem->batch[kgem->nbatch++] = MI_FLUSH | MI_INVALIDATE_MAP_CACHE; - } - - kgem_clear_dirty(kgem); -} - struct kgem_bo *kgem_create_proxy(struct kgem_bo *target, int offset, int length) { diff --git a/src/sna/kgem.h b/src/sna/kgem.h index 92b7cd6b..b85625f9 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -269,17 +269,12 @@ static inline void kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) _kgem_bo_destroy(kgem, bo); } -void kgem_emit_flush(struct kgem *kgem); void kgem_clear_dirty(struct kgem *kgem); static inline void kgem_set_mode(struct kgem *kgem, enum kgem_mode mode) { assert(!kgem->wedged); -#if DEBUG_FLUSH_CACHE - kgem_emit_flush(kgem); -#endif - #if DEBUG_FLUSH_BATCH kgem_submit(kgem); #endif diff --git a/src/sna/sna.h b/src/sna/sna.h index 247cb46c..0c0cb397 100644 --- a/src/sna/sna.h +++ b/src/sna/sna.h @@ -94,7 +94,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define DEBUG_NO_BLT 0 #define DEBUG_NO_IO 0 -#define DEBUG_FLUSH_CACHE 0 #define DEBUG_FLUSH_BATCH 0 #define DEBUG_FLUSH_SYNC 0 |