summaryrefslogtreecommitdiff
path: root/src/sna
diff options
context:
space:
mode:
Diffstat (limited to 'src/sna')
-rw-r--r--src/sna/gen2_render.c59
-rw-r--r--src/sna/gen3_render.c55
-rw-r--r--src/sna/gen4_render.c23
-rw-r--r--src/sna/gen5_render.c19
-rw-r--r--src/sna/gen6_render.c140
-rw-r--r--src/sna/gen6_render.h19
-rw-r--r--src/sna/gen7_render.c85
-rw-r--r--src/sna/gen7_render.h2
-rw-r--r--src/sna/kgem.c35
-rw-r--r--src/sna/kgem.h5
-rw-r--r--src/sna/sna.h1
11 files changed, 185 insertions, 258 deletions
diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 0775b76c..6981576e 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -637,6 +637,17 @@ static void gen2_emit_composite_state(struct sna *sna,
int tex;
gen2_get_batch(sna);
+
+ if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
+ if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
+ BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
+ else
+ BATCH(_3DSTATE_MODES_5_CMD |
+ PIPELINE_FLUSH_RENDER_CACHE |
+ PIPELINE_FLUSH_TEXTURE_CACHE);
+ kgem_clear_dirty(&sna->kgem);
+ }
+
gen2_emit_target(sna, op);
unwind = sna->kgem.nbatch;
@@ -1657,8 +1668,6 @@ gen2_render_composite(struct sna *sna,
int16_t width, int16_t height,
struct sna_composite_op *tmp)
{
- bool need_flush;
-
DBG(("%s()\n", __FUNCTION__));
if (op >= ARRAY_SIZE(gen2_blend_op)) {
@@ -1810,21 +1819,7 @@ gen2_render_composite(struct sna *sna,
NULL))
kgem_submit(&sna->kgem);
- need_flush =
- kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo);
gen2_emit_composite_state(sna, tmp);
- if (need_flush) {
- if (tmp->src.bo == tmp->dst.bo || tmp->mask.bo == tmp->dst.bo) {
- kgem_emit_flush(&sna->kgem);
- } else {
- BATCH(_3DSTATE_MODES_5_CMD |
- PIPELINE_FLUSH_RENDER_CACHE |
- PIPELINE_FLUSH_TEXTURE_CACHE);
- kgem_clear_dirty(&sna->kgem);
- }
- assert(sna->kgem.mode == KGEM_RENDER);
- kgem_bo_mark_dirty(tmp->dst.bo);
- }
return TRUE;
cleanup_src:
@@ -2159,8 +2154,6 @@ gen2_render_composite_spans(struct sna *sna,
unsigned flags,
struct sna_composite_spans_op *tmp)
{
- bool need_flush;
-
DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__,
src_x, src_y, dst_x, dst_y, width, height));
@@ -2236,20 +2229,7 @@ gen2_render_composite_spans(struct sna *sna,
NULL))
kgem_submit(&sna->kgem);
- need_flush = tmp->base.src.bo;
gen2_emit_composite_spans_state(sna, tmp);
- if (need_flush) {
- if (tmp->base.src.bo == tmp->base.dst.bo) {
- kgem_emit_flush(&sna->kgem);
- } else {
- BATCH(_3DSTATE_MODES_5_CMD |
- PIPELINE_FLUSH_RENDER_CACHE |
- PIPELINE_FLUSH_TEXTURE_CACHE);
- kgem_clear_dirty(&sna->kgem);
- }
- assert(sna->kgem.mode == KGEM_RENDER);
- kgem_bo_mark_dirty(tmp->base.dst.bo);
- }
return TRUE;
cleanup_dst:
@@ -2774,6 +2754,16 @@ static void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op
uint32_t ls1, v;
gen2_get_batch(sna);
+
+ if (kgem_bo_is_dirty(op->src.bo)) {
+ if (op->src.bo == op->dst.bo)
+ BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
+ else
+ BATCH(_3DSTATE_MODES_5_CMD |
+ PIPELINE_FLUSH_RENDER_CACHE |
+ PIPELINE_FLUSH_TEXTURE_CACHE);
+ kgem_clear_dirty(&sna->kgem);
+ }
gen2_emit_target(sna, op);
ls1 = sna->kgem.nbatch;
@@ -2845,9 +2835,6 @@ gen2_render_copy_boxes(struct sna *sna, uint8_t alu,
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
kgem_submit(&sna->kgem);
- if (kgem_bo_is_dirty(src_bo))
- kgem_emit_flush(&sna->kgem);
-
memset(&tmp, 0, sizeof(tmp));
tmp.op = alu;
@@ -2980,6 +2967,7 @@ gen2_render_copy(struct sna *sna, uint8_t alu,
tmp->base.dst.bo = dst_bo;
gen2_render_copy_setup_source(&tmp->base.src, src, src_bo);
+ tmp->base.mask.bo = NULL;
tmp->base.floats_per_vertex = 4;
tmp->base.floats_per_rect = 12;
@@ -2987,9 +2975,6 @@ gen2_render_copy(struct sna *sna, uint8_t alu,
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
kgem_submit(&sna->kgem);
- if (kgem_bo_is_dirty(src_bo))
- kgem_emit_flush(&sna->kgem);
-
tmp->blt = gen2_render_copy_blt;
tmp->done = gen2_render_copy_done;
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index a833526b..ad42fc78 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -1338,6 +1338,16 @@ static void gen3_emit_composite_state(struct sna *sna,
gen3_get_batch(sna);
+ if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
+ if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
+ OUT_BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
+ else
+ OUT_BATCH(_3DSTATE_MODES_5_CMD |
+ PIPELINE_FLUSH_RENDER_CACHE |
+ PIPELINE_FLUSH_TEXTURE_CACHE);
+ kgem_clear_dirty(&sna->kgem);
+ }
+
gen3_emit_target(sna,
op->dst.bo,
op->dst.width,
@@ -2560,8 +2570,6 @@ gen3_render_composite(struct sna *sna,
int16_t width, int16_t height,
struct sna_composite_op *tmp)
{
- bool need_flush;
-
DBG(("%s()\n", __FUNCTION__));
if (op >= ARRAY_SIZE(gen3_blend_op)) {
@@ -2802,22 +2810,7 @@ gen3_render_composite(struct sna *sna,
NULL))
kgem_submit(&sna->kgem);
- need_flush =
- kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo);
gen3_emit_composite_state(sna, tmp);
- if (need_flush) {
- if (tmp->src.bo == tmp->dst.bo || tmp->mask.bo == tmp->dst.bo) {
- kgem_emit_flush(&sna->kgem);
- } else {
- OUT_BATCH(_3DSTATE_MODES_5_CMD |
- PIPELINE_FLUSH_RENDER_CACHE |
- PIPELINE_FLUSH_TEXTURE_CACHE);
- kgem_clear_dirty(&sna->kgem);
- }
- assert(sna->kgem.mode == KGEM_RENDER);
- kgem_bo_mark_dirty(tmp->dst.bo);
- }
-
gen3_align_vertex(sna, tmp);
return TRUE;
@@ -3141,7 +3134,7 @@ gen3_render_composite_spans(struct sna *sna,
unsigned flags,
struct sna_composite_spans_op *tmp)
{
- bool no_offset, need_flush;
+ bool no_offset;
DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__,
src_x, src_y, dst_x, dst_y, width, height));
@@ -3230,6 +3223,8 @@ gen3_render_composite_spans(struct sna *sna,
break;
}
+ tmp->base.mask.bo = NULL;
+
tmp->base.floats_per_vertex = 2;
if (!is_constant_ps(tmp->base.src.u.gen3.type))
tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
@@ -3246,21 +3241,7 @@ gen3_render_composite_spans(struct sna *sna,
NULL))
kgem_submit(&sna->kgem);
- need_flush = kgem_bo_is_dirty(tmp->base.src.bo);
gen3_emit_composite_state(sna, &tmp->base);
- if (need_flush) {
- if (tmp->base.src.bo == tmp->base.dst.bo) {
- kgem_emit_flush(&sna->kgem);
- } else {
- OUT_BATCH(_3DSTATE_MODES_5_CMD |
- PIPELINE_FLUSH_RENDER_CACHE |
- PIPELINE_FLUSH_TEXTURE_CACHE);
- kgem_clear_dirty(&sna->kgem);
- }
- assert(sna->kgem.mode == KGEM_RENDER);
- kgem_bo_mark_dirty(tmp->base.dst.bo);
- }
-
gen3_align_vertex(sna, &tmp->base);
return TRUE;
@@ -3811,9 +3792,6 @@ gen3_render_copy_boxes(struct sna *sna, uint8_t alu,
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
kgem_submit(&sna->kgem);
- if (kgem_bo_is_dirty(src_bo))
- kgem_emit_flush(&sna->kgem);
-
memset(&tmp, 0, sizeof(tmp));
tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
@@ -3827,6 +3805,7 @@ gen3_render_copy_boxes(struct sna *sna, uint8_t alu,
tmp.floats_per_vertex = 4;
tmp.floats_per_rect = 12;
+ tmp.mask.bo = NULL;
tmp.mask.u.gen3.type = SHADER_NONE;
gen3_emit_composite_state(sna, &tmp);
@@ -3955,14 +3934,12 @@ gen3_render_copy(struct sna *sna, uint8_t alu,
tmp->base.floats_per_vertex = 4;
tmp->base.floats_per_rect = 12;
+ tmp->base.mask.bo = NULL;
tmp->base.mask.u.gen3.type = SHADER_NONE;
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
kgem_submit(&sna->kgem);
- if (kgem_bo_is_dirty(src_bo))
- kgem_emit_flush(&sna->kgem);
-
tmp->blt = gen3_render_copy_blt;
tmp->done = gen3_render_copy_done;
@@ -4111,6 +4088,7 @@ gen3_render_fill_boxes(struct sna *sna,
tmp.need_magic_ca_pass = false;
gen3_init_solid(&tmp.src, pixel);
+ tmp.mask.bo = NULL;
tmp.mask.u.gen3.type = SHADER_NONE;
tmp.u.gen3.num_constants = 0;
@@ -4264,6 +4242,7 @@ gen3_render_fill(struct sna *sna, uint8_t alu,
gen3_init_solid(&tmp->base.src,
sna_rgba_for_color(color, dst->drawable.depth));
+ tmp->base.mask.bo = NULL;
tmp->base.mask.u.gen3.type = SHADER_NONE;
tmp->base.u.gen3.num_constants = 0;
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 6351fa98..5ffd4a70 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -1263,7 +1263,6 @@ gen4_emit_state_base_address(struct sna *sna)
static void
gen4_emit_invariant(struct sna *sna)
{
- OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
if (sna->kgem.gen >= 45)
OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
else
@@ -1465,6 +1464,12 @@ gen4_emit_state(struct sna *sna,
gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
gen4_emit_vertex_elements(sna, op);
gen4_emit_drawing_rectangle(sna, op);
+
+ if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
+ OUT_BATCH(MI_FLUSH);
+ kgem_clear_dirty(&sna->kgem);
+ kgem_bo_mark_dirty(op->dst.bo);
+ }
}
static void
@@ -1724,9 +1729,6 @@ gen4_render_video(struct sna *sna,
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
kgem_submit(&sna->kgem);
- if (kgem_bo_is_dirty(frame->bo))
- kgem_emit_flush(&sna->kgem);
-
gen4_video_bind_surfaces(sna, &tmp, frame);
gen4_align_vertex(sna, &tmp);
@@ -2308,9 +2310,6 @@ gen4_render_composite(struct sna *sna,
NULL))
kgem_submit(&sna->kgem);
- if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo))
- kgem_emit_flush(&sna->kgem);
-
gen4_bind_surfaces(sna, tmp);
gen4_align_vertex(sna, tmp);
return TRUE;
@@ -2454,6 +2453,8 @@ fallback:
tmp.src.width = src->drawable.width;
tmp.src.height = src->drawable.height;
+ tmp.mask.bo = NULL;
+
tmp.is_affine = TRUE;
tmp.floats_per_vertex = 3;
tmp.u.gen4.wm_kernel = WM_KERNEL;
@@ -2462,9 +2463,6 @@ fallback:
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
kgem_submit(&sna->kgem);
- if (kgem_bo_is_dirty(src_bo))
- kgem_emit_flush(&sna->kgem);
-
gen4_copy_bind_surfaces(sna, &tmp);
gen4_align_vertex(sna, &tmp);
@@ -2558,6 +2556,8 @@ fallback:
op->base.src.filter = SAMPLER_FILTER_NEAREST;
op->base.src.repeat = SAMPLER_EXTEND_NONE;
+ op->base.mask.bo = NULL;
+
op->base.is_affine = true;
op->base.floats_per_vertex = 3;
op->base.u.gen4.wm_kernel = WM_KERNEL;
@@ -2566,9 +2566,6 @@ fallback:
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
kgem_submit(&sna->kgem);
- if (kgem_bo_is_dirty(src_bo))
- kgem_emit_flush(&sna->kgem);
-
gen4_copy_bind_surfaces(sna, &op->base);
gen4_align_vertex(sna, &op->base);
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 5104cb7e..768a5db0 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -1484,6 +1484,12 @@ gen5_emit_state(struct sna *sna,
if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel))
gen5_emit_urb(sna);
gen5_emit_vertex_elements(sna, op);
+
+ if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
+ OUT_BATCH(MI_FLUSH);
+ kgem_clear_dirty(&sna->kgem);
+ kgem_bo_mark_dirty(op->dst.bo);
+ }
}
static void gen5_bind_surfaces(struct sna *sna,
@@ -1744,6 +1750,8 @@ gen5_render_video(struct sna *sna,
tmp.src.filter = SAMPLER_FILTER_BILINEAR;
tmp.src.repeat = SAMPLER_EXTEND_PAD;
+ tmp.src.bo = frame->bo;
+ tmp.mask.bo = NULL;
tmp.u.gen5.wm_kernel =
is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
tmp.u.gen5.ve_id = 1;
@@ -1754,9 +1762,6 @@ gen5_render_video(struct sna *sna,
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
kgem_submit(&sna->kgem);
- if (kgem_bo_is_dirty(frame->bo))
- kgem_emit_flush(&sna->kgem);
-
gen5_video_bind_surfaces(sna, &tmp, frame);
gen5_align_vertex(sna, &tmp);
@@ -2353,7 +2358,6 @@ gen5_render_composite(struct sna *sna,
kgem_bo_destroy(&sna->kgem, tmp->src.bo);
return TRUE;
}
- kgem_emit_flush(&sna->kgem);
}
gen5_bind_surfaces(sna, tmp);
@@ -2632,6 +2636,7 @@ gen5_render_composite_spans(struct sna *sna,
break;
}
+ tmp->base.mask.bo = NULL;
tmp->base.is_affine = tmp->base.src.is_affine;
tmp->base.has_component_alpha = FALSE;
tmp->base.need_magic_ca_pass = FALSE;
@@ -2661,9 +2666,6 @@ gen5_render_composite_spans(struct sna *sna,
NULL))
kgem_submit(&sna->kgem);
- if (kgem_bo_is_dirty(tmp->base.src.bo))
- kgem_emit_flush(&sna->kgem);
-
gen5_bind_surfaces(sna, &tmp->base);
gen5_align_vertex(sna, &tmp->base);
return TRUE;
@@ -2797,8 +2799,6 @@ fallback:
dst->drawable.bitsPerPixel,
box, n))
return TRUE;
-
- kgem_emit_flush(&sna->kgem);
}
gen5_copy_bind_surfaces(sna, &tmp);
@@ -2948,7 +2948,6 @@ fallback:
dst->drawable.bitsPerPixel,
op))
return TRUE;
- kgem_emit_flush(&sna->kgem);
}
gen5_copy_bind_surfaces(sna, &op->base);
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 601bdaef..0967ea3b 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -556,7 +556,7 @@ gen6_emit_invariant(struct sna *sna)
sna->render_state.gen6.needs_invariant = FALSE;
}
-static bool
+static void
gen6_emit_cc(struct sna *sna,
int op, bool has_component_alpha, uint32_t dst_format)
{
@@ -570,7 +570,7 @@ gen6_emit_cc(struct sna *sna,
op, has_component_alpha, dst_format,
blend, render->blend));
if (render->blend == blend)
- return false;
+ return;
if (op == PictOpClear) {
uint32_t src;
@@ -580,7 +580,7 @@ gen6_emit_cc(struct sna *sna,
*/
src = BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO);
if (render->blend == src)
- return false;
+ return;
}
OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
@@ -594,7 +594,6 @@ gen6_emit_cc(struct sna *sna,
}
render->blend = blend;
- return true;
}
static void
@@ -680,11 +679,11 @@ gen6_emit_wm(struct sna *sna, unsigned int kernel, int nr_surfaces, int nr_input
OUT_BATCH(0);
}
-static bool
+static void
gen6_emit_binding_table(struct sna *sna, uint16_t offset)
{
if (sna->render_state.gen6.surface_table == offset)
- return false;
+ return;
/* Binding table pointers */
OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS |
@@ -696,19 +695,27 @@ gen6_emit_binding_table(struct sna *sna, uint16_t offset)
OUT_BATCH(offset*4);
sna->render_state.gen6.surface_table = offset;
- return true;
+}
+
+static bool
+gen6_need_drawing_rectangle(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
+ uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
+
+ return (sna->render_state.gen6.drawrect_limit != limit ||
+ sna->render_state.gen6.drawrect_offset != offset);
}
static void
gen6_emit_drawing_rectangle(struct sna *sna,
- const struct sna_composite_op *op,
- bool force)
+ const struct sna_composite_op *op)
{
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
- if (!force &&
- sna->render_state.gen6.drawrect_limit == limit &&
+ if (sna->render_state.gen6.drawrect_limit == limit &&
sna->render_state.gen6.drawrect_offset == offset)
return;
@@ -800,28 +807,54 @@ gen6_emit_vertex_elements(struct sna *sna,
}
static void
+gen6_emit_flush(struct sna *sna)
+{
+ OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH |
+ GEN6_PIPE_CONTROL_TC_FLUSH |
+ GEN6_PIPE_CONTROL_CS_STALL);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+}
+
+static void
gen6_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t wm_binding_table)
-
{
- bool flushed =
- (sna->kgem.batch[sna->kgem.nbatch-1] & (0xff<<23)) == MI_FLUSH;
- bool need_flush;
+ /* [DevSNB-C+{W/A}] Before any depth stall flush (including those
+ * produced by non-pipelined state commands), software needs to first
+ * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
+ * 0.
+ *
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * BEFORE the pipe-control with a post-sync op and no write-cache
+ * flushes.
+ */
+ if (gen6_need_drawing_rectangle(sna, op)) {
+ OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
+ GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME);
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ sna->render_state.gen6.general_bo,
+ I915_GEM_DOMAIN_INSTRUCTION << 16 |
+ I915_GEM_DOMAIN_INSTRUCTION,
+ 64));
+ OUT_BATCH(0);
+ }
- need_flush = gen6_emit_cc(sna,
- op->op,
- op->has_component_alpha,
- op->dst.format);
+ if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
+ gen6_emit_flush(sna);
+ kgem_clear_dirty(&sna->kgem);
+ kgem_bo_mark_dirty(op->dst.bo);
+ }
- DBG(("%s: sampler src=(%d, %d), mask=(%d, %d), offset=%d\n",
- __FUNCTION__,
- op->src.filter, op->src.repeat,
- op->mask.filter, op->mask.repeat,
- (int)SAMPLER_OFFSET(op->src.filter,
- op->src.repeat,
- op->mask.filter,
- op->mask.repeat)));
+ gen6_emit_cc(sna, op->op, op->has_component_alpha, op->dst.format);
gen6_emit_sampler(sna,
SAMPLER_OFFSET(op->src.filter,
op->src.repeat,
@@ -833,20 +866,14 @@ gen6_emit_state(struct sna *sna,
op->u.gen6.nr_surfaces,
op->u.gen6.nr_inputs);
gen6_emit_vertex_elements(sna, op);
-
- /* XXX updating the binding table requires a non-pipelined cmd?
- * The '>' in KDE menus suggest that every binding table update
- * requires a subsequent non-pipelined op, or maybe a pipelined flush?
- */
- need_flush |= gen6_emit_binding_table(sna, wm_binding_table);
- gen6_emit_drawing_rectangle(sna, op, need_flush & !flushed);
+ gen6_emit_binding_table(sna, wm_binding_table);
+ gen6_emit_drawing_rectangle(sna, op);
}
static void gen6_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen6_render_state *state = &sna->render_state.gen6;
- bool need_flush;
if (!op->need_magic_ca_pass)
return;
@@ -854,19 +881,15 @@ static void gen6_magic_ca_pass(struct sna *sna,
DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
sna->render.vertex_start, sna->render.vertex_index));
- need_flush = gen6_emit_cc(sna, PictOpAdd, TRUE, op->dst.format);
+ gen6_emit_flush(sna);
+
+ gen6_emit_cc(sna, PictOpAdd, TRUE, op->dst.format);
gen6_emit_wm(sna,
gen6_choose_composite_kernel(PictOpAdd,
TRUE, TRUE,
op->is_affine),
3, 2);
- /* XXX We apparently need a non-pipelined op to flush the
- * pipeline before changing blend state.
- */
- if (need_flush)
- OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
-
OUT_BATCH(GEN6_3DPRIMITIVE |
GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
_3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
@@ -1036,6 +1059,14 @@ static void null_create(struct sna_static_stream *stream)
sna_static_stream_map(stream, 64, 64);
}
+static void scratch_create(struct sna_static_stream *stream)
+{
+ /* 64 bytes of scratch space for random writes, such as
+ * the pipe-control w/a.
+ */
+ sna_static_stream_map(stream, 64, 64);
+}
+
static void
sampler_state_init(struct gen6_sampler_state *sampler_state,
sampler_filter_t filter,
@@ -1912,9 +1943,12 @@ gen6_render_video(struct sna *sna,
tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
tmp.dst.bo = priv->gpu_bo;
+ tmp.src.bo = frame->bo;
tmp.src.filter = SAMPLER_FILTER_BILINEAR;
tmp.src.repeat = SAMPLER_EXTEND_PAD;
+ tmp.mask.bo = NULL;
+
tmp.is_affine = TRUE;
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
@@ -1935,9 +1969,6 @@ gen6_render_video(struct sna *sna,
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
- if (kgem_bo_is_dirty(frame->bo))
- kgem_emit_flush(&sna->kgem);
-
gen6_emit_video_state(sna, &tmp, frame);
gen6_align_vertex(sna, &tmp);
@@ -2549,9 +2580,6 @@ gen6_render_composite(struct sna *sna,
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
- if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo))
- kgem_emit_flush(&sna->kgem);
-
gen6_emit_composite_state(sna, tmp);
gen6_align_vertex(sna, tmp);
return TRUE;
@@ -2911,6 +2939,8 @@ gen6_render_composite_spans(struct sna *sna,
break;
}
+ tmp->base.mask.bo = NULL;
+
tmp->base.is_affine = tmp->base.src.is_affine;
tmp->base.has_component_alpha = FALSE;
tmp->base.need_magic_ca_pass = FALSE;
@@ -2954,9 +2984,6 @@ gen6_render_composite_spans(struct sna *sna,
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
- if (kgem_bo_is_dirty(tmp->base.src.bo))
- kgem_emit_flush(&sna->kgem);
-
gen6_emit_composite_state(sna, &tmp->base);
gen6_align_vertex(sna, &tmp->base);
return TRUE;
@@ -3144,9 +3171,6 @@ fallback:
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
- if (kgem_bo_is_dirty(src_bo))
- kgem_emit_flush(&sna->kgem);
-
gen6_emit_copy_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
@@ -3290,6 +3314,8 @@ fallback:
op->base.src.filter = SAMPLER_FILTER_NEAREST;
op->base.src.repeat = SAMPLER_EXTEND_NONE;
+ op->base.mask.bo = NULL;
+
op->base.is_affine = true;
op->base.floats_per_vertex = 3;
op->base.floats_per_rect = 9;
@@ -3305,9 +3331,6 @@ fallback:
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
- if (kgem_bo_is_dirty(src_bo))
- kgem_emit_flush(&sna->kgem);
-
gen6_emit_copy_state(sna, &op->base);
gen6_align_vertex(sna, &op->base);
@@ -3442,6 +3465,8 @@ gen6_render_fill_boxes(struct sna *sna,
tmp.src.filter = SAMPLER_FILTER_NEAREST;
tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
+ tmp.mask.bo = NULL;
+
tmp.is_affine = TRUE;
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
@@ -3927,6 +3952,7 @@ static Bool gen6_render_setup(struct sna *sna)
* dumps, you know it points to zero.
*/
null_create(&general);
+ scratch_create(&general);
for (m = 0; m < GEN6_KERNEL_COUNT; m++)
state->wm_kernel[m] =
diff --git a/src/sna/gen6_render.h b/src/sna/gen6_render.h
index 42c5a6b9..eded2b7c 100644
--- a/src/sna/gen6_render.h
+++ b/src/sna/gen6_render.h
@@ -133,20 +133,6 @@
/* for GEN6_STATE_BASE_ADDRESS */
#define BASE_ADDRESS_MODIFY (1 << 0)
-/* for GEN6_PIPE_CONTROL */
-#define GEN6_PIPE_CONTROL_NOWRITE (0 << 14)
-#define GEN6_PIPE_CONTROL_WRITE_QWORD (1 << 14)
-#define GEN6_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
-#define GEN6_PIPE_CONTROL_WRITE_TIME (3 << 14)
-#define GEN6_PIPE_CONTROL_DEPTH_STALL (1 << 13)
-#define GEN6_PIPE_CONTROL_WC_FLUSH (1 << 12)
-#define GEN6_PIPE_CONTROL_IS_FLUSH (1 << 11)
-#define GEN6_PIPE_CONTROL_TC_FLUSH (1 << 10)
-#define GEN6_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
-#define GEN6_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
-#define GEN6_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
-#define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
-
/* VERTEX_BUFFER_STATE Structure */
#define VB0_BUFFER_INDEX_SHIFT 26
#define VB0_VERTEXDATA (0 << 20)
@@ -342,8 +328,6 @@
#define GEN6_3DSTATE_AA_LINE_PARAMS GEN6_3D(3, 1, 0xa)
#define GEN6_3DSTATE_GS_SVB_INDEX GEN6_3D(3, 1, 0xb)
-#define GEN6_PIPE_CONTROL GEN6_3D(3, 2, 0)
-
#define GEN6_3DPRIMITIVE GEN6_3D(3, 3, 0)
#define GEN6_3DSTATE_CLEAR_PARAMS GEN6_3D(3, 1, 0x10)
@@ -457,6 +441,8 @@
#define GEN6_CLIP_ENABLE 1
/* for GEN6_PIPE_CONTROL */
+#define GEN6_PIPE_CONTROL GEN6_3D(3, 2, 0)
+#define GEN6_PIPE_CONTROL_CS_STALL (1 << 20)
#define GEN6_PIPE_CONTROL_NOWRITE (0 << 14)
#define GEN6_PIPE_CONTROL_WRITE_QWORD (1 << 14)
#define GEN6_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
@@ -468,6 +454,7 @@
#define GEN6_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define GEN6_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define GEN6_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
+#define GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
#define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
/* 3DPRIMITIVE bits */
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index a7f1dc09..209d8a0d 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -705,13 +705,13 @@ gen7_emit_invariant(struct sna *sna)
sna->render_state.gen7.needs_invariant = FALSE;
}
-static bool
+static void
gen7_emit_cc(struct sna *sna, uint32_t blend_offset)
{
struct gen7_render_state *render = &sna->render_state.gen7;
if (render->blend == blend_offset)
- return false;
+ return;
/* XXX can have upto 8 blend states preload, selectable via
* Render Target Index. What other side-effects of Render Target Index?
@@ -722,7 +722,6 @@ gen7_emit_cc(struct sna *sna, uint32_t blend_offset)
OUT_BATCH((render->cc_blend + blend_offset) | 1);
render->blend = blend_offset;
- return true;
}
static void
@@ -797,11 +796,11 @@ gen7_emit_wm(struct sna *sna, unsigned int kernel, int nr_surfaces, int nr_input
OUT_BATCH(0); /* kernel 2 */
}
-static bool
+static void
gen7_emit_binding_table(struct sna *sna, uint16_t offset)
{
if (sna->render_state.gen7.surface_table == offset)
- return false;
+ return;
/* Binding table pointers */
assert(is_aligned(4*offset, 32));
@@ -809,19 +808,16 @@ gen7_emit_binding_table(struct sna *sna, uint16_t offset)
OUT_BATCH(offset*4);
sna->render_state.gen7.surface_table = offset;
- return true;
}
static void
gen7_emit_drawing_rectangle(struct sna *sna,
- const struct sna_composite_op *op,
- bool force)
+ const struct sna_composite_op *op)
{
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
- if (!force &&
- sna->render_state.gen7.drawrect_limit == limit &&
+ if (sna->render_state.gen7.drawrect_limit == limit &&
sna->render_state.gen7.drawrect_offset == offset)
return;
@@ -915,19 +911,32 @@ gen7_emit_vertex_elements(struct sna *sna,
}
static void
+gen7_emit_flush(struct sna *sna)
+{
+ OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH |
+ GEN7_PIPE_CONTROL_TC_FLUSH |
+ GEN7_PIPE_CONTROL_CS_STALL);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+}
+
+static void
gen7_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t wm_binding_table)
{
- bool flushed =
- (sna->kgem.batch[sna->kgem.nbatch-1] & (0xff<<23)) == MI_FLUSH;
- bool need_flush;
+ if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
+ gen7_emit_flush(sna);
+ kgem_clear_dirty(&sna->kgem);
+ kgem_bo_mark_dirty(op->dst.bo);
+ }
- need_flush = gen7_emit_cc(sna,
- gen7_get_blend(op->op,
- op->has_component_alpha,
- op->dst.format));
+ gen7_emit_cc(sna,
+ gen7_get_blend(op->op,
+ op->has_component_alpha,
+ op->dst.format));
DBG(("%s: sampler src=(%d, %d), mask=(%d, %d), offset=%d\n",
__FUNCTION__,
@@ -949,16 +958,14 @@ gen7_emit_state(struct sna *sna,
op->u.gen7.nr_inputs);
gen7_emit_vertex_elements(sna, op);
- /* XXX updating the binding table requires a non-pipelined cmd? */
- need_flush |= gen7_emit_binding_table(sna, wm_binding_table);
- gen7_emit_drawing_rectangle(sna, op, need_flush & !flushed);
+ gen7_emit_binding_table(sna, wm_binding_table);
+ gen7_emit_drawing_rectangle(sna, op);
}
static void gen7_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen7_render_state *state = &sna->render_state.gen7;
- bool need_flush;
if (!op->need_magic_ca_pass)
return;
@@ -966,21 +973,15 @@ static void gen7_magic_ca_pass(struct sna *sna,
DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
sna->render.vertex_start, sna->render.vertex_index));
- need_flush =
- gen7_emit_cc(sna,
- gen7_get_blend(PictOpAdd, TRUE, op->dst.format));
+ gen7_emit_flush(sna);
+
+ gen7_emit_cc(sna, gen7_get_blend(PictOpAdd, TRUE, op->dst.format));
gen7_emit_wm(sna,
gen7_choose_composite_kernel(PictOpAdd,
TRUE, TRUE,
op->is_affine),
3, 2);
- /* XXX We apparently need a non-pipelined op to flush the
- * pipeline before changing blend state.
- */
- if (need_flush)
- OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
-
OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
@@ -2012,9 +2013,12 @@ gen7_render_video(struct sna *sna,
tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
tmp.dst.bo = priv->gpu_bo;
+ tmp.src.bo = frame->bo;
tmp.src.filter = SAMPLER_FILTER_BILINEAR;
tmp.src.repeat = SAMPLER_EXTEND_PAD;
+ tmp.mask.bo = NULL;
+
tmp.is_affine = TRUE;
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
@@ -2035,9 +2039,6 @@ gen7_render_video(struct sna *sna,
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
- if (kgem_bo_is_dirty(frame->bo))
- kgem_emit_flush(&sna->kgem);
-
gen7_emit_video_state(sna, &tmp, frame);
gen7_align_vertex(sna, &tmp);
@@ -2650,9 +2651,6 @@ gen7_render_composite(struct sna *sna,
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
- if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo))
- kgem_emit_flush(&sna->kgem);
-
gen7_emit_composite_state(sna, tmp);
gen7_align_vertex(sna, tmp);
return TRUE;
@@ -3011,6 +3009,8 @@ gen7_render_composite_spans(struct sna *sna,
break;
}
+ tmp->base.mask.bo = NULL;
+
tmp->base.is_affine = tmp->base.src.is_affine;
tmp->base.has_component_alpha = FALSE;
tmp->base.need_magic_ca_pass = FALSE;
@@ -3054,9 +3054,6 @@ gen7_render_composite_spans(struct sna *sna,
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
- if (kgem_bo_is_dirty(tmp->base.src.bo))
- kgem_emit_flush(&sna->kgem);
-
gen7_emit_composite_state(sna, &tmp->base);
gen7_align_vertex(sna, &tmp->base);
return TRUE;
@@ -3244,9 +3241,6 @@ fallback:
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
- if (kgem_bo_is_dirty(src_bo))
- kgem_emit_flush(&sna->kgem);
-
gen7_emit_copy_state(sna, &tmp);
gen7_align_vertex(sna, &tmp);
@@ -3390,6 +3384,8 @@ fallback:
op->base.src.filter = SAMPLER_FILTER_NEAREST;
op->base.src.repeat = SAMPLER_EXTEND_NONE;
+ op->base.mask.bo = NULL;
+
op->base.is_affine = true;
op->base.floats_per_vertex = 3;
op->base.floats_per_rect = 9;
@@ -3405,9 +3401,6 @@ fallback:
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
- if (kgem_bo_is_dirty(src_bo))
- kgem_emit_flush(&sna->kgem);
-
gen7_emit_copy_state(sna, &op->base);
gen7_align_vertex(sna, &op->base);
@@ -3937,7 +3930,7 @@ gen7_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
tmp.has_component_alpha = 0;
tmp.need_magic_ca_pass = FALSE;
- tmp.u.gen7.wm_kernel = GEN6_WM_KERNEL_NOMASK;
+ tmp.u.gen7.wm_kernel = GEN7_WM_KERNEL_NOMASK;
tmp.u.gen7.nr_surfaces = 2;
tmp.u.gen7.nr_inputs = 1;
tmp.u.gen7.ve_id = 1;
diff --git a/src/sna/gen7_render.h b/src/sna/gen7_render.h
index 06ab0eeb..e2ca1f2f 100644
--- a/src/sna/gen7_render.h
+++ b/src/sna/gen7_render.h
@@ -123,6 +123,7 @@
/* for GEN7_PIPE_CONTROL */
#define GEN7_PIPE_CONTROL GEN7_3D(3, 2, 0)
+#define GEN7_PIPE_CONTROL_CS_STALL (1 << 20)
#define GEN7_PIPE_CONTROL_NOWRITE (0 << 14)
#define GEN7_PIPE_CONTROL_WRITE_QWORD (1 << 14)
#define GEN7_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
@@ -134,6 +135,7 @@
#define GEN7_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define GEN7_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define GEN7_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
+#define GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
#define GEN7_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
/* VERTEX_BUFFER_STATE Structure */
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index b49f92a4..52e5bffd 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -2854,41 +2854,6 @@ void kgem_clear_dirty(struct kgem *kgem)
bo->dirty = false;
}
-/* Flush the contents of the RenderCache and invalidate the TextureCache */
-void kgem_emit_flush(struct kgem *kgem)
-{
- if (kgem->nbatch == 0)
- return;
-
- if (!kgem_check_batch(kgem, 4)) {
- _kgem_submit(kgem);
- return;
- }
-
- DBG(("%s()\n", __FUNCTION__));
-
- if (kgem->ring == KGEM_BLT) {
- kgem->batch[kgem->nbatch++] = MI_FLUSH_DW | 2;
- kgem->batch[kgem->nbatch++] = 0;
- kgem->batch[kgem->nbatch++] = 0;
- kgem->batch[kgem->nbatch++] = 0;
- } else if (kgem->gen >= 50 && 0) {
- kgem->batch[kgem->nbatch++] = PIPE_CONTROL | 2;
- kgem->batch[kgem->nbatch++] =
- PIPE_CONTROL_WC_FLUSH |
- PIPE_CONTROL_TC_FLUSH |
- PIPE_CONTROL_NOWRITE;
- kgem->batch[kgem->nbatch++] = 0;
- kgem->batch[kgem->nbatch++] = 0;
- } else {
- if ((kgem->batch[kgem->nbatch-1] & (0xff<<23)) == MI_FLUSH)
- kgem->nbatch--;
- kgem->batch[kgem->nbatch++] = MI_FLUSH | MI_INVALIDATE_MAP_CACHE;
- }
-
- kgem_clear_dirty(kgem);
-}
-
struct kgem_bo *kgem_create_proxy(struct kgem_bo *target,
int offset, int length)
{
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 92b7cd6b..b85625f9 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -269,17 +269,12 @@ static inline void kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
_kgem_bo_destroy(kgem, bo);
}
-void kgem_emit_flush(struct kgem *kgem);
void kgem_clear_dirty(struct kgem *kgem);
static inline void kgem_set_mode(struct kgem *kgem, enum kgem_mode mode)
{
assert(!kgem->wedged);
-#if DEBUG_FLUSH_CACHE
- kgem_emit_flush(kgem);
-#endif
-
#if DEBUG_FLUSH_BATCH
kgem_submit(kgem);
#endif
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 247cb46c..0c0cb397 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -94,7 +94,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define DEBUG_NO_BLT 0
#define DEBUG_NO_IO 0
-#define DEBUG_FLUSH_CACHE 0
#define DEBUG_FLUSH_BATCH 0
#define DEBUG_FLUSH_SYNC 0