From 9a7bf70365980809d0f02190f2f620a957ff1ba8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 26 Jan 2013 23:03:33 +0000 Subject: sna: Enable threaded rasterisation for non-antialiased geometry Signed-off-by: Chris Wilson --- src/sna/gen3_render.c | 239 ++++++++++++++++++++++++++++++++++-- src/sna/gen4_render.c | 68 ++++++++++- src/sna/gen4_vertex.c | 237 ++++++++++++++++++++++++++++++++++++ src/sna/gen5_render.c | 68 ++++++++++- src/sna/gen6_render.c | 68 ++++++++++- src/sna/gen7_render.c | 68 ++++++++++- src/sna/sna_blt.c | 306 +++++++++++++++++++++++++++++++++++++++++++++-- src/sna/sna_render.h | 5 + src/sna/sna_trapezoids.c | 229 +++++++++++++++++++++++++++++++---- 9 files changed, 1234 insertions(+), 54 deletions(-) (limited to 'src') diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c index 3224d717..6c0ea6a6 100644 --- a/src/sna/gen3_render.c +++ b/src/sna/gen3_render.c @@ -430,6 +430,26 @@ gen3_emit_composite_primitive_constant(struct sna *sna, gen3_emit_composite_dstcoord(sna, dst_x, dst_y); } +fastcall static void +gen3_emit_composite_boxes_constant(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + v[0] = box->x2; + v[1] = box->y2; + + v[2] = box->x1; + v[3] = box->y2; + + v[4] = box->x1; + v[5] = box->y1; + + box++; + v += 6; + } while (--nbox); +} + fastcall static void gen3_emit_composite_primitive_identity_gradient(struct sna *sna, const struct sna_composite_op *op, @@ -456,6 +476,32 @@ gen3_emit_composite_primitive_identity_gradient(struct sna *sna, OUT_VERTEX(src_y); } +fastcall static void +gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + v[0] = box->x2; + v[1] = box->y2; + v[2] = box->x2 + op->src.offset[0]; + v[3] = box->y2 + op->src.offset[1]; + + v[4] = box->x1; + v[5] = box->y2; + v[6] = box->x1 + op->src.offset[0]; + v[7] = box->y2 + op->src.offset[1]; + + v[8] = box->x1; + v[9] = box->y1; + v[10] = box->x1 + op->src.offset[0]; + v[11] = box->y1 + op->src.offset[1]; + + v += 12; + box++; + } while (--nbox); +} + fastcall static void gen3_emit_composite_primitive_affine_gradient(struct sna *sna, const struct sna_composite_op *op, @@ -493,6 +539,40 @@ gen3_emit_composite_primitive_affine_gradient(struct sna *sna, OUT_VERTEX(sy); } +fastcall static void +gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + const PictTransform *transform = op->src.transform; + + do { + v[0] = box->x2; + v[1] = box->y2; + sna_get_transformed_coordinates(box->x2 + op->src.offset[0], + box->y2 + op->src.offset[1], + transform, + &v[2], &v[3]); + + v[4] = box->x1; + v[5] = box->y2; + sna_get_transformed_coordinates(box->x1 + op->src.offset[0], + box->y2 + op->src.offset[1], + transform, + &v[6], &v[7]); + + v[8] = box->x1; + v[9] = box->y1; + sna_get_transformed_coordinates(box->x1 + op->src.offset[0], + box->y1 + op->src.offset[1], + transform, + &v[10], &v[11]); + + box++; + v += 12; + } while (--nbox); +} + fastcall static void gen3_emit_composite_primitive_identity_source(struct sna *sna, const struct sna_composite_op *op, @@ -518,6 +598,28 @@ gen3_emit_composite_primitive_identity_source(struct sna *sna, v[7] = v[3] = v[11] + h * op->src.scale[1]; } +fastcall static void +gen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + v[0] = box->x2 + op->dst.x; + v[8] = v[4] = box->x1 + op->dst.x; + v[5] = v[1] = box->y2 + op->dst.y; + v[9] = box->y1 + op->dst.y; + + v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; + v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; + + v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; + v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; + + v += 12; + box++; + } while (--nbox); +} + fastcall static void gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna, const struct sna_composite_op *op, @@ -543,6 +645,28 @@ gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna, v[7] = v[3] = v[11] + h * op->src.scale[1]; } +fastcall static void +gen3_emit_composite_boxes_identity_source_no_offset(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + v[0] = box->x2; + v[8] = v[4] = box->x1; + v[5] = v[1] = box->y2; + v[9] = box->y1; + + v[10] = v[6] = box->x1 * op->src.scale[0]; + v[2] = box->x2 * op->src.scale[0]; + + v[11] = box->y1 * op->src.scale[1]; + v[7] = v[3] = box->y2 * op->src.scale[1]; + + v += 12; + box++; + } while (--nbox); +} + fastcall static void gen3_emit_composite_primitive_affine_source(struct sna *sna, const struct sna_composite_op *op, @@ -576,6 +700,39 @@ gen3_emit_composite_primitive_affine_source(struct sna *sna, &v[10], &v[11]); } +fastcall static void +gen3_emit_composite_boxes_affine_source(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + const PictTransform *transform = op->src.transform; + + do { + v[0] = box->x2; + v[5] = v[1] = box->y2; + v[8] = v[4] = box->x1; + v[9] = box->y1; + + _sna_get_transformed_scaled(box->x2 + op->src.offset[0], + box->y2 + op->src.offset[1], + transform, op->src.scale, + &v[2], &v[3]); + + _sna_get_transformed_scaled(box->x1 + op->src.offset[0], + box->y2 + op->src.offset[1], + transform, op->src.scale, + &v[6], &v[7]); + + _sna_get_transformed_scaled(box->x1 + op->src.offset[0], + box->y1 + op->src.offset[1], + transform, op->src.scale, + &v[10], &v[11]); + + v += 12; + box++; + } while (--nbox); +} + fastcall static void gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna, const struct sna_composite_op *op, @@ -1900,9 +2057,9 @@ gen3_render_composite_box(struct sna *sna, } static void -gen3_render_composite_boxes(struct sna *sna, - const struct sna_composite_op *op, - const BoxRec *box, int nbox) +gen3_render_composite_boxes__blt(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) { DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n", __FUNCTION__, nbox, @@ -1935,6 +2092,60 @@ gen3_render_composite_boxes(struct sna *sna, } while (nbox); } +static void +gen3_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", nbox)); + + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen3_get_rectangles(sna, op, nbox); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + } while (nbox); +} + +static void +gen3_render_composite_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", nbox)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen3_get_rectangles(sna, op, nbox); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + static void gen3_render_composite_done(struct sna *sna, const struct sna_composite_op *op) @@ -2986,24 +3197,32 @@ gen3_render_composite(struct sna *sna, case SHADER_WHITE: case SHADER_CONSTANT: tmp->prim_emit = gen3_emit_composite_primitive_constant; + tmp->emit_boxes = gen3_emit_composite_boxes_constant; break; case SHADER_LINEAR: case SHADER_RADIAL: - if (tmp->src.transform == NULL) + if (tmp->src.transform == NULL) { tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient; - else if (tmp->src.is_affine) + tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient; + } else if (tmp->src.is_affine) { tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient; + tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient; + } break; case SHADER_TEXTURE: if (tmp->src.transform == NULL) { - if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) + if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) { tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset; - else + tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset; + } else { tmp->prim_emit = gen3_emit_composite_primitive_identity_source; + tmp->emit_boxes = gen3_emit_composite_boxes_identity_source; + } } else if (tmp->src.is_affine) { tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; tmp->prim_emit = gen3_emit_composite_primitive_affine_source; + tmp->emit_boxes = gen3_emit_composite_boxes_affine_source; } break; } @@ -3035,7 +3254,11 @@ gen3_render_composite(struct sna *sna, tmp->blt = gen3_render_composite_blt; tmp->box = gen3_render_composite_box; - tmp->boxes = gen3_render_composite_boxes; + tmp->boxes = gen3_render_composite_boxes__blt; + if (tmp->emit_boxes) { + tmp->boxes = gen3_render_composite_boxes; + tmp->thread_boxes = gen3_render_composite_boxes__thread; + } tmp->done = gen3_render_composite_done; if (!kgem_check_bo(&sna->kgem, diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c index 65016cd1..e4f5f594 100644 --- a/src/sna/gen4_render.c +++ b/src/sna/gen4_render.c @@ -1109,9 +1109,9 @@ gen4_render_composite_box(struct sna *sna, } static void -gen4_render_composite_boxes(struct sna *sna, - const struct sna_composite_op *op, - const BoxRec *box, int nbox) +gen4_render_composite_boxes__blt(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) { DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n", __FUNCTION__, nbox, op->dst.x, op->dst.y, @@ -1145,6 +1145,62 @@ gen4_render_composite_boxes(struct sna *sna, } while (nbox); } +static void +gen4_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", nbox)); + + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen4_get_rectangles(sna, op, nbox, + gen4_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + } while (nbox); +} + +static void +gen4_render_composite_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", nbox)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen4_get_rectangles(sna, op, nbox, + gen4_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + #ifndef MAX #define MAX(a,b) ((a) > (b) ? (a) : (b)) #endif @@ -1899,7 +1955,11 @@ gen4_render_composite(struct sna *sna, tmp->blt = gen4_render_composite_blt; tmp->box = gen4_render_composite_box; - tmp->boxes = gen4_render_composite_boxes; + tmp->boxes = gen4_render_composite_boxes__blt; + if (tmp->emit_boxes) { + tmp->boxes = gen4_render_composite_boxes; + tmp->thread_boxes = gen4_render_composite_boxes__thread; + } tmp->done = gen4_render_composite_done; if (!kgem_check_bo(&sna->kgem, diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c index cc679d38..e5131669 100644 --- a/src/sna/gen4_vertex.c +++ b/src/sna/gen4_vertex.c @@ -359,6 +359,31 @@ emit_primitive_solid(struct sna *sna, v[5] = v[3] = v[1] = .5; } +fastcall static void +emit_boxes_solid(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[2] = dst.f; + dst.p.y = box->y1; + v[4] = dst.f; + + v[5] = v[3] = v[1] = .5; + box++; + v += 6; + } while (--nbox); +} + fastcall static void emit_primitive_linear(struct sna *sna, const struct sna_composite_op *op, @@ -389,6 +414,34 @@ emit_primitive_linear(struct sna *sna, v[5] = compute_linear(&op->src, r->src.x, r->src.y); } +fastcall static void +emit_boxes_linear(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + union { + struct sna_coordinate p; + float f; + } dst; + + do { + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[2] = dst.f; + dst.p.y = box->y1; + v[4] = dst.f; + + v[1] = compute_linear(&op->src, box->x2, box->y2); + v[3] = compute_linear(&op->src, box->x1, box->y2); + v[5] = compute_linear(&op->src, box->x1, box->y1); + + v += 6; + box++; + } while (--nbox); +} + fastcall static void emit_primitive_identity_source(struct sna *sna, const struct sna_composite_op *op, @@ -420,6 +473,36 @@ emit_primitive_identity_source(struct sna *sna, v[5] = v[2] = v[8] + r->height * op->src.scale[1]; } +fastcall static void +emit_boxes_identity_source(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[3] = dst.f; + dst.p.y = box->y1; + v[6] = dst.f; + + v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; + v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; + + v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; + v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; + + v += 9; + box++; + } while (--nbox); +} + fastcall static void emit_primitive_simple_source(struct sna *sna, const struct sna_composite_op *op, @@ -460,6 +543,45 @@ emit_primitive_simple_source(struct sna *sna, v[8] = ((r->src.y + ty) * yy + y0) * sy; } +fastcall static void +emit_boxes_simple_source(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float xx = op->src.transform->matrix[0][0]; + float x0 = op->src.transform->matrix[0][2]; + float yy = op->src.transform->matrix[1][1]; + float y0 = op->src.transform->matrix[1][2]; + float sx = op->src.scale[0]; + float sy = op->src.scale[1]; + int16_t tx = op->src.offset[0]; + int16_t ty = op->src.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[1] = ((box->x2 + tx) * xx + x0) * sx; + v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; + + dst.p.x = box->x1; + v[3] = dst.f; + v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; + + dst.p.y = box->y1; + v[6] = dst.f; + v[8] = ((box->y1 + ty) * yy + y0) * sy; + + v += 9; + box++; + } while (--nbox); +} + fastcall static void emit_primitive_affine_source(struct sna *sna, const struct sna_composite_op *op, @@ -499,6 +621,43 @@ emit_primitive_affine_source(struct sna *sna, &v[7], &v[8]); } +fastcall static void +emit_boxes_affine_source(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + box->x2, + op->src.offset[1] + box->y2, + op->src.transform, op->src.scale, + &v[1], &v[2]); + + dst.p.x = box->x1; + v[3] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + box->x1, + op->src.offset[1] + box->y2, + op->src.transform, op->src.scale, + &v[4], &v[5]); + + dst.p.y = box->y1; + v[6] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + box->x1, + op->src.offset[1] + box->y1, + op->src.transform, op->src.scale, + &v[7], &v[8]); + box++; + v += 9; + } while (--nbox); +} + fastcall static void emit_primitive_identity_mask(struct sna *sna, const struct sna_composite_op *op, @@ -542,6 +701,40 @@ emit_primitive_identity_mask(struct sna *sna, v[9] = v[5] = v[1] = .5; } +fastcall static void +emit_boxes_identity_mask(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float msk_x = op->mask.offset[0]; + float msk_y = op->mask.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[2] = (msk_x + box->x2) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; + + dst.p.x = box->x1; + v[4] = dst.f; + v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; + + dst.p.y = box->y1; + v[8] = dst.f; + v[11] = (msk_y + box->y1) * op->mask.scale[1]; + + v[9] = v[5] = v[1] = .5; + v += 12; + box++; + } while (--nbox); +} + fastcall static void emit_primitive_linear_identity_mask(struct sna *sna, const struct sna_composite_op *op, @@ -587,6 +780,43 @@ emit_primitive_linear_identity_mask(struct sna *sna, v[9] = compute_linear(&op->src, r->src.x, r->src.y); } +fastcall static void +emit_boxes_linear_identity_mask(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float msk_x = op->mask.offset[0]; + float msk_y = op->mask.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[2] = (msk_x + box->x2) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; + + dst.p.x = box->x1; + v[4] = dst.f; + v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; + + dst.p.y = box->y1; + v[8] = dst.f; + v[11] = (msk_y + box->y1) * op->mask.scale[1]; + + v[1] = compute_linear(&op->src, box->x2, box->y2); + v[5] = compute_linear(&op->src, box->x1, box->y2); + v[9] = compute_linear(&op->src, box->x1, box->y1); + + v += 12; + box++; + } while (--nbox); +} + fastcall static void emit_primitive_identity_source_mask(struct sna *sna, const struct sna_composite_op *op, @@ -760,11 +990,13 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp) if (tmp->src.is_solid) { DBG(("%s: solid, identity mask\n", __FUNCTION__)); tmp->prim_emit = emit_primitive_identity_mask; + tmp->emit_boxes = emit_boxes_identity_mask; tmp->floats_per_vertex = 4; vb = 1 | 2 << 2; } else if (tmp->src.is_linear) { DBG(("%s: linear, identity mask\n", __FUNCTION__)); tmp->prim_emit = emit_primitive_linear_identity_mask; + tmp->emit_boxes = emit_boxes_linear_identity_mask; tmp->floats_per_vertex = 4; vb = 1 | 2 << 2; } else if (tmp->src.transform == NULL) { @@ -821,6 +1053,7 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp) if (tmp->src.is_solid) { DBG(("%s: solid, no mask\n", __FUNCTION__)); tmp->prim_emit = emit_primitive_solid; + tmp->emit_boxes = emit_boxes_solid; if (tmp->src.is_opaque && tmp->op == PictOpOver) tmp->op = PictOpSrc; tmp->floats_per_vertex = 2; @@ -828,11 +1061,13 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp) } else if (tmp->src.is_linear) { DBG(("%s: linear, no mask\n", __FUNCTION__)); tmp->prim_emit = emit_primitive_linear; + tmp->emit_boxes = emit_boxes_linear; tmp->floats_per_vertex = 2; vb = 1; } else if (tmp->src.transform == NULL) { DBG(("%s: identity src, no mask\n", __FUNCTION__)); tmp->prim_emit = emit_primitive_identity_source; + tmp->emit_boxes = emit_boxes_identity_source; tmp->floats_per_vertex = 3; vb = 2; } else if (tmp->src.is_affine) { @@ -841,9 +1076,11 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp) if (!sna_affine_transform_is_rotation(tmp->src.transform)) { DBG(("%s: simple src, no mask\n", __FUNCTION__)); tmp->prim_emit = emit_primitive_simple_source; + tmp->emit_boxes = emit_boxes_simple_source; } else { DBG(("%s: affine src, no mask\n", __FUNCTION__)); tmp->prim_emit = emit_primitive_affine_source; + tmp->emit_boxes = emit_boxes_affine_source; } tmp->floats_per_vertex = 3; vb = 2; diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c index 81e6635a..998d55e2 100644 --- a/src/sna/gen5_render.c +++ b/src/sna/gen5_render.c @@ -1095,9 +1095,9 @@ gen5_render_composite_box(struct sna *sna, } static void -gen5_render_composite_boxes(struct sna *sna, - const struct sna_composite_op *op, - const BoxRec *box, int nbox) +gen5_render_composite_boxes__blt(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) { DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n", __FUNCTION__, nbox, op->dst.x, op->dst.y, @@ -1131,6 +1131,62 @@ gen5_render_composite_boxes(struct sna *sna, } while (nbox); } +static void +gen5_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", nbox)); + + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen5_get_rectangles(sna, op, nbox, + gen5_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + } while (nbox); +} + +static void +gen5_render_composite_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", nbox)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen5_get_rectangles(sna, op, nbox, + gen5_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + #ifndef MAX #define MAX(a,b) ((a) > (b) ? (a) : (b)) #endif @@ -1874,7 +1930,11 @@ gen5_render_composite(struct sna *sna, tmp->blt = gen5_render_composite_blt; tmp->box = gen5_render_composite_box; - tmp->boxes = gen5_render_composite_boxes; + tmp->boxes = gen5_render_composite_boxes__blt; + if (tmp->emit_boxes) { + tmp->boxes = gen5_render_composite_boxes; + tmp->thread_boxes = gen5_render_composite_boxes__thread; + } tmp->done = gen5_render_composite_done; if (!kgem_check_bo(&sna->kgem, diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c index 4ff1606d..4a9387a6 100644 --- a/src/sna/gen6_render.c +++ b/src/sna/gen6_render.c @@ -1340,9 +1340,9 @@ gen6_render_composite_box(struct sna *sna, } static void -gen6_render_composite_boxes(struct sna *sna, - const struct sna_composite_op *op, - const BoxRec *box, int nbox) +gen6_render_composite_boxes__blt(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) { DBG(("composite_boxes(%d)\n", nbox)); @@ -1372,6 +1372,62 @@ gen6_render_composite_boxes(struct sna *sna, } while (nbox); } +static void +gen6_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", nbox)); + + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen6_get_rectangles(sna, op, nbox, + gen6_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + } while (nbox); +} + +static void +gen6_render_composite_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", nbox)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen6_get_rectangles(sna, op, nbox, + gen6_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + #ifndef MAX #define MAX(a,b) ((a) > (b) ? (a) : (b)) #endif @@ -2214,7 +2270,11 @@ gen6_render_composite(struct sna *sna, tmp->blt = gen6_render_composite_blt; tmp->box = gen6_render_composite_box; - tmp->boxes = gen6_render_composite_boxes; + tmp->boxes = gen6_render_composite_boxes__blt; + if (tmp->emit_boxes) { + tmp->boxes = gen6_render_composite_boxes; + tmp->thread_boxes = gen6_render_composite_boxes__thread; + } tmp->done = gen6_render_composite_done; kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index 34ba252a..6eec4b45 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -1463,9 +1463,9 @@ gen7_render_composite_box(struct sna *sna, } static void -gen7_render_composite_boxes(struct sna *sna, - const struct sna_composite_op *op, - const BoxRec *box, int nbox) +gen7_render_composite_boxes__blt(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) { DBG(("composite_boxes(%d)\n", nbox)); @@ -1495,6 +1495,62 @@ gen7_render_composite_boxes(struct sna *sna, } while (nbox); } +static void +gen7_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", nbox)); + + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen7_get_rectangles(sna, op, nbox, + gen7_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + } while (nbox); +} + +static void +gen7_render_composite_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", nbox)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen7_get_rectangles(sna, op, nbox, + gen7_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + #ifndef MAX #define MAX(a,b) ((a) > (b) ? (a) : (b)) #endif @@ -2334,7 +2390,11 @@ gen7_render_composite(struct sna *sna, tmp->blt = gen7_render_composite_blt; tmp->box = gen7_render_composite_box; - tmp->boxes = gen7_render_composite_boxes; + tmp->boxes = gen7_render_composite_boxes__blt; + if (tmp->emit_boxes){ + tmp->boxes = gen7_render_composite_boxes; + tmp->thread_boxes = gen7_render_composite_boxes__thread; + } tmp->done = gen7_render_composite_done; kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c index 56025790..edfcb9ea 100644 --- a/src/sna/sna_blt.c +++ b/src/sna/sna_blt.c @@ -934,6 +934,76 @@ static void blt_composite_fill_boxes_no_offset(struct sna *sna, _sna_blt_fill_boxes(sna, &op->u.blt, box, n); } +static void blt_composite_fill_boxes_no_offset__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + struct kgem *kgem = &sna->kgem; + const struct sna_blt_state *blt = &op->u.blt; + uint32_t cmd = blt->cmd; + + DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox)); + + sna_vertex_lock(&sna->render); + if (!kgem_check_batch(kgem, 3)) { + sna_vertex_wait__locked(&sna->render); + sna_blt_fill_begin(sna, blt); + } + + do { + uint32_t *b = kgem->batch + kgem->nbatch; + int nbox_this_time; + + nbox_this_time = nbox; + if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3; + assert(nbox_this_time); + nbox -= nbox_this_time; + + kgem->nbatch += 3 * nbox_this_time; + assert(kgem->nbatch < kgem->surface); + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + while (nbox_this_time >= 8) { + b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; + b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; + b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++; + b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++; + b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++; + b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++; + b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++; + b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++; + b += 24; + nbox_this_time -= 8; + } + if (nbox_this_time & 4) { + b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; + b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; + b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++; + b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++; + b += 12; + } + if (nbox_this_time & 2) { + b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; + b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++; + b += 6; + } + if (nbox_this_time & 1) { + b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++; + } + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + if (!nbox) + break; + + sna_vertex_wait__locked(&sna->render); + sna_blt_fill_begin(sna, blt); + } while (1); + sna_vertex_unlock(&sna->render); +} + fastcall static void blt_composite_fill_box(struct sna *sna, const struct sna_composite_op *op, const BoxRec *box) @@ -957,6 +1027,92 @@ static void blt_composite_fill_boxes(struct sna *sna, } while (--n); } +static inline uint64_t add4(const BoxRec *b, int16_t x, int16_t y) +{ + union { + uint64_t v; + int16_t i[4]; + } vi; + vi.v = *(uint64_t *)b; + vi.i[0] += x; + vi.i[1] += y; + vi.i[2] += x; + vi.i[3] += y; + return vi.v; +} + +static void blt_composite_fill_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + struct kgem *kgem = &sna->kgem; + const struct sna_blt_state *blt = &op->u.blt; + uint32_t cmd = blt->cmd; + int16_t dx = op->dst.x; + int16_t dy = op->dst.y; + + DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox)); + + sna_vertex_lock(&sna->render); + if (!kgem_check_batch(kgem, 3)) { + sna_vertex_wait__locked(&sna->render); + sna_blt_fill_begin(sna, blt); + } + + do { + uint32_t *b = kgem->batch + kgem->nbatch; + int nbox_this_time; + + nbox_this_time = nbox; + if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3; + assert(nbox_this_time); + nbox -= nbox_this_time; + + kgem->nbatch += 3 * nbox_this_time; + assert(kgem->nbatch < kgem->surface); + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + while (nbox_this_time >= 8) { + b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy); + b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy); + b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy); + b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy); + b[12] = cmd; *(uint64_t *)(b+13) = add4(box++, dx, dy); + b[15] = cmd; *(uint64_t *)(b+16) = add4(box++, dx, dy); + b[18] = cmd; *(uint64_t *)(b+19) = add4(box++, dx, dy); + b[21] = cmd; *(uint64_t *)(b+22) = add4(box++, dx, dy); + b += 24; + nbox_this_time -= 8; + } + if (nbox_this_time & 4) { + b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy); + b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy); + b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy); + b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy); + b += 12; + } + if (nbox_this_time & 2) { + b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy); + b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy); + b += 6; + } + if (nbox_this_time & 1) { + b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy); + } + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + if (!nbox) + break; + + sna_vertex_wait__locked(&sna->render); + sna_blt_fill_begin(sna, blt); + } while (1); + sna_vertex_unlock(&sna->render); +} + fastcall static void blt_composite_nop(struct sna *sna, const struct sna_composite_op *op, @@ -1014,6 +1170,7 @@ prepare_blt_clear(struct sna *sna, op->blt = blt_composite_fill__cpu; op->box = blt_composite_fill_box__cpu; op->boxes = blt_composite_fill_boxes__cpu; + op->thread_boxes = blt_composite_fill_boxes__cpu; op->done = nop_done; op->u.blt.pixel = 0; return true; @@ -1023,9 +1180,11 @@ prepare_blt_clear(struct sna *sna, if (op->dst.x|op->dst.y) { op->box = blt_composite_fill_box; op->boxes = blt_composite_fill_boxes; + op->thread_boxes = blt_composite_fill_boxes__thread; } else { op->box = blt_composite_fill_box_no_offset; op->boxes = blt_composite_fill_boxes_no_offset; + op->thread_boxes = blt_composite_fill_boxes_no_offset__thread; } op->done = nop_done; @@ -1050,6 +1209,7 @@ prepare_blt_fill(struct sna *sna, op->blt = blt_composite_fill__cpu; op->box = blt_composite_fill_box__cpu; op->boxes = blt_composite_fill_boxes__cpu; + op->thread_boxes = blt_composite_fill_boxes__cpu; op->done = nop_done; return true; } @@ -1058,9 +1218,11 @@ prepare_blt_fill(struct sna *sna, if (op->dst.x|op->dst.y) { op->box = blt_composite_fill_box; op->boxes = blt_composite_fill_boxes; + op->thread_boxes = blt_composite_fill_boxes__thread; } else { op->box = blt_composite_fill_box_no_offset; op->boxes = blt_composite_fill_boxes_no_offset; + op->thread_boxes = blt_composite_fill_boxes_no_offset__thread; } op->done = nop_done; @@ -1151,6 +1313,141 @@ static void blt_composite_copy_boxes(struct sna *sna, } while(--nbox); } +static inline uint32_t add2(uint32_t v, int16_t x, int16_t y) +{ + x += v & 0xffff; + y += v >> 16; + return (uint16_t)y << 16 | x; +} + +static void blt_composite_copy_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + struct kgem *kgem = &sna->kgem; + int dst_dx = op->dst.x; + int dst_dy = op->dst.y; + int src_dx = op->src.offset[0]; + int src_dy = op->src.offset[1]; + uint32_t cmd = op->u.blt.cmd; + uint32_t br13 = op->u.blt.br13; + struct kgem_bo *src_bo = op->u.blt.bo[0]; + struct kgem_bo *dst_bo = op->u.blt.bo[1]; + int src_pitch = op->u.blt.pitch[0]; + + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + sna_vertex_lock(&sna->render); + + if ((dst_dx | dst_dy) == 0) { + uint64_t hdr = (uint64_t)br13 << 32 | cmd; + do { + int nbox_this_time; + + nbox_this_time = nbox; + if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + do { + uint32_t *b = kgem->batch + kgem->nbatch; + + DBG((" %s: box=(%d, %d)x(%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, box->y2 - box->y1)); + + assert(box->x1 + src_dx >= 0); + assert(box->y1 + src_dy >= 0); + assert(box->x1 + src_dx <= INT16_MAX); + assert(box->y1 + src_dy <= INT16_MAX); + + assert(box->x1 >= 0); + assert(box->y1 >= 0); + + *(uint64_t *)&b[0] = hdr; + *(uint64_t *)&b[2] = *(const uint64_t *)box; + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = add2(b[2], src_dx, src_dy); + b[6] = src_pitch; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 8; + assert(kgem->nbatch < kgem->surface); + box++; + } while (--nbox_this_time); + + if (!nbox) + break; + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } while (1); + } else { + do { + int nbox_this_time; + + nbox_this_time = nbox; + if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + do { + uint32_t *b = kgem->batch + kgem->nbatch; + + DBG((" %s: box=(%d, %d)x(%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, box->y2 - box->y1)); + + assert(box->x1 + src_dx >= 0); + assert(box->y1 + src_dy >= 0); + + assert(box->x1 + dst_dx >= 0); + assert(box->y1 + dst_dy >= 0); + + b[0] = cmd; + b[1] = br13; + b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx); + b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx); + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx); + b[6] = src_pitch; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 8; + assert(kgem->nbatch < kgem->surface); + box++; + } while (--nbox_this_time); + + if (!nbox) + break; + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } while (1); + } + sna_vertex_unlock(&sna->render); +} + fastcall static void blt_composite_copy_with_alpha(struct sna *sna, const struct sna_composite_op *op, @@ -1277,6 +1574,7 @@ prepare_blt_copy(struct sna *sna, op->blt = blt_composite_copy; op->box = blt_composite_copy_box; op->boxes = blt_composite_copy_boxes; + op->thread_boxes = blt_composite_copy_boxes__thread; if (!sna_blt_copy_init(sna, &op->u.blt, bo, op->dst.bo, src->drawable.bitsPerPixel, @@ -2075,6 +2373,7 @@ sna_blt_composite__convert(struct sna *sna, tmp->blt = blt_composite_copy; tmp->box = blt_composite_copy_box; tmp->boxes = blt_composite_copy_boxes; + tmp->thread_boxes = blt_composite_copy_boxes__thread; if (!sna_blt_copy_init(sna, &tmp->u.blt, tmp->src.bo, tmp->dst.bo, @@ -2446,13 +2745,6 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, return true; } -static inline uint32_t add2(uint32_t v, int16_t x, int16_t y) -{ - x += v & 0xffff; - y += v >> 16; - return (uint16_t)y << 16 | x; -} - bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h index 6a0b1d8a..c953e50c 100644 --- a/src/sna/sna_render.h +++ b/src/sna/sna_render.h @@ -35,6 +35,8 @@ struct sna_composite_op { const BoxRec *box); void (*boxes)(struct sna *sna, const struct sna_composite_op *op, const BoxRec *box, int nbox); + void (*thread_boxes)(struct sna *sna, const struct sna_composite_op *op, + const BoxRec *box, int nbox); void (*done)(struct sna *sna, const struct sna_composite_op *op); struct sna_damage **damage; @@ -93,6 +95,9 @@ struct sna_composite_op { fastcall void (*prim_emit)(struct sna *sna, const struct sna_composite_op *op, const struct sna_composite_rectangles *r); + fastcall void (*emit_boxes)(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v); struct sna_composite_redirect { struct kgem_bo *real_bo; diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c index bf4816bb..7f7492ad 100644 --- a/src/sna/sna_trapezoids.c +++ b/src/sna/sna_trapezoids.c @@ -1876,7 +1876,7 @@ static void mono_add_line(struct mono *mono, int dst_x, int dst_y, xFixed top, xFixed bottom, - xPointFixed *p1, xPointFixed *p2, + const xPointFixed *p1, const xPointFixed *p2, int dir) { struct mono_polygon *polygon = &mono->polygon; @@ -1893,7 +1893,7 @@ mono_add_line(struct mono *mono, dir)); if (top > bottom) { - xPointFixed *t; + const xPointFixed *t; y = top; top = bottom; @@ -2150,6 +2150,60 @@ mono_span__fast(struct mono *c, int x1, int x2, BoxPtr box) c->op.box(c->sna, &c->op, box); } +struct mono_span_thread_boxes { + const struct sna_composite_op *op; +#define MONO_SPAN_MAX_BOXES (8192/sizeof(BoxRec)) + BoxRec boxes[MONO_SPAN_MAX_BOXES]; + int num_boxes; +}; + +inline static void +thread_mono_span_add_boxes(struct mono *c, const BoxRec *box, int count) +{ + struct mono_span_thread_boxes *b = c->op.priv; + + assert(count > 0 && count <= MONO_SPAN_MAX_BOXES); + if (b->num_boxes + count > MONO_SPAN_MAX_BOXES) { + b->op->thread_boxes(c->sna, b->op, b->boxes, b->num_boxes); + b->num_boxes = 0; + } + + memcpy(b->boxes + b->num_boxes, box, count*sizeof(BoxRec)); + b->num_boxes += count; + assert(b->num_boxes <= MONO_SPAN_MAX_BOXES); +} + +fastcall static void +thread_mono_span_clipped(struct mono *c, int x1, int x2, BoxPtr box) +{ + pixman_region16_t region; + + __DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2)); + + box->x1 = x1; + box->x2 = x2; + + assert(c->clip.data); + + pixman_region_init_rects(®ion, box, 1); + RegionIntersect(®ion, ®ion, &c->clip); + if (REGION_NUM_RECTS(®ion)) + thread_mono_span_add_boxes(c, + REGION_RECTS(®ion), + REGION_NUM_RECTS(®ion)); + pixman_region_fini(®ion); +} + +fastcall static void +thread_mono_span(struct mono *c, int x1, int x2, BoxPtr box) +{ + __DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2)); + + box->x1 = x1; + box->x2 = x2; + thread_mono_span_add_boxes(c, box, 1); +} + inline static void mono_row(struct mono *c, int16_t y, int16_t h) { @@ -2267,10 +2321,7 @@ mono_render(struct mono *mono) struct mono_polygon *polygon = &mono->polygon; int i, j, h = mono->clip.extents.y2 - mono->clip.extents.y1; - if (mono->clip.data == NULL && mono->op.damage == NULL) - mono->span = mono_span__fast; - else - mono->span = mono_span; + assert(mono->span); for (i = 0; i < h; i = j) { j = i + 1; @@ -4053,6 +4104,74 @@ choose_span(struct sna_composite_spans_op *tmp, return span; } +struct mono_span_thread { + struct sna *sna; + const xTrapezoid *traps; + const struct sna_composite_op *op; + RegionPtr clip; + int ntrap; + BoxRec extents; + int dx, dy; +}; + +static void +mono_span_thread(void *arg) +{ + struct mono_span_thread *thread = arg; + struct mono mono; + struct mono_span_thread_boxes boxes; + const xTrapezoid *t; + int n; + + mono.sna = thread->sna; + + mono.clip.extents = thread->extents; + mono.clip.data = NULL; + if (thread->clip->data) { + RegionIntersect(&mono.clip, &mono.clip, thread->clip); + if (RegionNil(&mono.clip)) + return; + } + + boxes.op = thread->op; + boxes.num_boxes = 0; + mono.op.priv = &boxes; + + if (!mono_init(&mono, 2*thread->ntrap)) { + RegionUninit(&mono.clip); + return; + } + + for (n = thread->ntrap, t = thread->traps; n--; t++) { + if (!xTrapezoidValid(t)) + continue; + + if (pixman_fixed_to_int(t->top) + thread->dy >= thread->extents.y2 || + pixman_fixed_to_int(t->bottom) + thread->dy <= thread->extents.y1) + continue; + + mono_add_line(&mono, thread->dx, thread->dy, + t->top, t->bottom, + &t->left.p1, &t->left.p2, 1); + mono_add_line(&mono, thread->dx, thread->dy, + t->top, t->bottom, + &t->right.p1, &t->right.p2, -1); + } + + if (mono.clip.data == NULL) + mono.span = thread_mono_span; + else + mono.span = thread_mono_span_clipped; + + mono_render(&mono); + mono_fini(&mono); + + if (boxes.num_boxes) + thread->op->thread_boxes(thread->sna, thread->op, + boxes.boxes, boxes.num_boxes); + RegionUninit(&mono.clip); +} + static bool mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, INT16 src_x, INT16 src_y, @@ -4062,8 +4181,8 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, BoxRec extents; int16_t dst_x, dst_y; int16_t dx, dy; - bool was_clear; - int n; + bool unbounded; + int num_threads, n; if (NO_SCAN_CONVERTER) return false; @@ -4102,11 +4221,69 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, src_x + mono.clip.extents.x1 - dst_x - dx, src_y + mono.clip.extents.y1 - dst_y - dy)); + unbounded = (!sna_drawable_is_clear(dst->pDrawable) && + !operator_is_bounded(op)); + mono.sna = to_sna_from_drawable(dst->pDrawable); - if (!mono_init(&mono, 2*ntrap)) + if (!mono.sna->render.composite(mono.sna, op, src, NULL, dst, + src_x + mono.clip.extents.x1 - dst_x - dx, + src_y + mono.clip.extents.y1 - dst_y - dy, + 0, 0, + mono.clip.extents.x1, mono.clip.extents.y1, + mono.clip.extents.x2 - mono.clip.extents.x1, + mono.clip.extents.y2 - mono.clip.extents.y1, + memset(&mono.op, 0, sizeof(mono.op)))) return false; - was_clear = sna_drawable_is_clear(dst->pDrawable); + num_threads = 1; + if (!NO_GPU_THREADS && + mono.op.thread_boxes && + mono.op.damage == NULL && + !unbounded) + num_threads = sna_use_threads(mono.clip.extents.x2 - mono.clip.extents.x1, + mono.clip.extents.y2 - mono.clip.extents.y1, + 16); + if (num_threads > 1) { + struct mono_span_thread threads[num_threads]; + int y, h; + + DBG(("%s: using %d threads for mono span compositing %dx%d\n", + __FUNCTION__, num_threads, + mono.clip.extents.x2 - mono.clip.extents.x1, + mono.clip.extents.y2 - mono.clip.extents.y1)); + + threads[0].sna = mono.sna; + threads[0].op = &mono.op; + threads[0].traps = traps; + threads[0].ntrap = ntrap; + threads[0].extents = mono.clip.extents; + threads[0].clip = &mono.clip; + threads[0].dx = dx; + threads[0].dy = dy; + + y = extents.y1; + h = extents.y2 - extents.y1; + h = (h + num_threads - 1) / num_threads; + + for (n = 1; n < num_threads; n++) { + threads[n] = threads[0]; + threads[n].extents.y1 = y; + threads[n].extents.y2 = y += h; + + sna_threads_run(mono_span_thread, &threads[n]); + } + + threads[0].extents.y1 = y; + threads[0].extents.y2 = extents.y2; + mono_span_thread(&threads[0]); + + sna_threads_wait(); + mono.op.done(mono.sna, &mono.op); + return true; + } + + if (!mono_init(&mono, 2*ntrap)) + return false; for (n = 0; n < ntrap; n++) { if (!xTrapezoidValid(&traps[n])) @@ -4124,23 +4301,16 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, &traps[n].right.p1, &traps[n].right.p2, -1); } - memset(&mono.op, 0, sizeof(mono.op)); - if (!mono.sna->render.composite(mono.sna, op, src, NULL, dst, - src_x + mono.clip.extents.x1 - dst_x - dx, - src_y + mono.clip.extents.y1 - dst_y - dy, - 0, 0, - mono.clip.extents.x1, mono.clip.extents.y1, - mono.clip.extents.x2 - mono.clip.extents.x1, - mono.clip.extents.y2 - mono.clip.extents.y1, - &mono.op)) { - mono_fini(&mono); - return false; - } + if (mono.clip.data == NULL && mono.op.damage == NULL) + mono.span = mono_span__fast; + else + mono.span = mono_span; + mono_render(&mono); mono.op.done(mono.sna, &mono.op); mono_fini(&mono); - if (!was_clear && !operator_is_bounded(op)) { + if (unbounded) { xPointFixed p1, p2; if (!mono_init(&mono, 2+2*ntrap)) @@ -5245,6 +5415,11 @@ unbounded_pass: mono.op.box = mono_inplace_composite_box; mono.op.boxes = mono_inplace_composite_boxes; } + + if (mono.clip.data == NULL && mono.op.damage == NULL) + mono.span = mono_span__fast; + else + mono.span = mono_span; mono_render(&mono); mono_fini(&mono); @@ -6850,6 +7025,10 @@ mono_triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, mono.clip.extents.x2 - mono.clip.extents.x1, mono.clip.extents.y2 - mono.clip.extents.y1, &mono.op)) { + if (mono.clip.data == NULL && mono.op.damage == NULL) + mono.span = mono_span__fast; + else + mono.span = mono_span; mono_render(&mono); mono.op.done(mono.sna, &mono.op); } @@ -6893,6 +7072,10 @@ mono_triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, mono.clip.extents.x2 - mono.clip.extents.x1, mono.clip.extents.y2 - mono.clip.extents.y1, &mono.op)) { + if (mono.clip.data == NULL && mono.op.damage == NULL) + mono.span = mono_span__fast; + else + mono.span = mono_span; mono_render(&mono); mono.op.done(mono.sna, &mono.op); } -- cgit v1.2.3