summaryrefslogtreecommitdiff
path: root/src/sna
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2013-01-26 23:03:33 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2013-01-27 13:06:46 +0000
commit9a7bf70365980809d0f02190f2f620a957ff1ba8 (patch)
tree754fc42b19f596a697c41e8045abf0223fb6b5be /src/sna
parent8178cff5718e69e14d3953a7f754d7585a06838f (diff)
sna: Enable threaded rasterisation for non-antialiased geometry
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/sna')
-rw-r--r--src/sna/gen3_render.c239
-rw-r--r--src/sna/gen4_render.c68
-rw-r--r--src/sna/gen4_vertex.c237
-rw-r--r--src/sna/gen5_render.c68
-rw-r--r--src/sna/gen6_render.c68
-rw-r--r--src/sna/gen7_render.c68
-rw-r--r--src/sna/sna_blt.c306
-rw-r--r--src/sna/sna_render.h5
-rw-r--r--src/sna/sna_trapezoids.c229
9 files changed, 1234 insertions, 54 deletions
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 3224d717..6c0ea6a6 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -431,6 +431,26 @@ gen3_emit_composite_primitive_constant(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_constant(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ v[0] = box->x2;
+ v[1] = box->y2;
+
+ v[2] = box->x1;
+ v[3] = box->y2;
+
+ v[4] = box->x1;
+ v[5] = box->y1;
+
+ box++;
+ v += 6;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -457,6 +477,32 @@ gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ v[0] = box->x2;
+ v[1] = box->y2;
+ v[2] = box->x2 + op->src.offset[0];
+ v[3] = box->y2 + op->src.offset[1];
+
+ v[4] = box->x1;
+ v[5] = box->y2;
+ v[6] = box->x1 + op->src.offset[0];
+ v[7] = box->y2 + op->src.offset[1];
+
+ v[8] = box->x1;
+ v[9] = box->y1;
+ v[10] = box->x1 + op->src.offset[0];
+ v[11] = box->y1 + op->src.offset[1];
+
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_affine_gradient(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -494,6 +540,40 @@ gen3_emit_composite_primitive_affine_gradient(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ const PictTransform *transform = op->src.transform;
+
+ do {
+ v[0] = box->x2;
+ v[1] = box->y2;
+ sna_get_transformed_coordinates(box->x2 + op->src.offset[0],
+ box->y2 + op->src.offset[1],
+ transform,
+ &v[2], &v[3]);
+
+ v[4] = box->x1;
+ v[5] = box->y2;
+ sna_get_transformed_coordinates(box->x1 + op->src.offset[0],
+ box->y2 + op->src.offset[1],
+ transform,
+ &v[6], &v[7]);
+
+ v[8] = box->x1;
+ v[9] = box->y1;
+ sna_get_transformed_coordinates(box->x1 + op->src.offset[0],
+ box->y1 + op->src.offset[1],
+ transform,
+ &v[10], &v[11]);
+
+ box++;
+ v += 12;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_identity_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -519,6 +599,28 @@ gen3_emit_composite_primitive_identity_source(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ v[0] = box->x2 + op->dst.x;
+ v[8] = v[4] = box->x1 + op->dst.x;
+ v[5] = v[1] = box->y2 + op->dst.y;
+ v[9] = box->y1 + op->dst.y;
+
+ v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
+ v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
+
+ v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
+ v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
+
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -544,6 +646,28 @@ gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_identity_source_no_offset(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ v[0] = box->x2;
+ v[8] = v[4] = box->x1;
+ v[5] = v[1] = box->y2;
+ v[9] = box->y1;
+
+ v[10] = v[6] = box->x1 * op->src.scale[0];
+ v[2] = box->x2 * op->src.scale[0];
+
+ v[11] = box->y1 * op->src.scale[1];
+ v[7] = v[3] = box->y2 * op->src.scale[1];
+
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_affine_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -577,6 +701,39 @@ gen3_emit_composite_primitive_affine_source(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_boxes_affine_source(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ const PictTransform *transform = op->src.transform;
+
+ do {
+ v[0] = box->x2;
+ v[5] = v[1] = box->y2;
+ v[8] = v[4] = box->x1;
+ v[9] = box->y1;
+
+ _sna_get_transformed_scaled(box->x2 + op->src.offset[0],
+ box->y2 + op->src.offset[1],
+ transform, op->src.scale,
+ &v[2], &v[3]);
+
+ _sna_get_transformed_scaled(box->x1 + op->src.offset[0],
+ box->y2 + op->src.offset[1],
+ transform, op->src.scale,
+ &v[6], &v[7]);
+
+ _sna_get_transformed_scaled(box->x1 + op->src.offset[0],
+ box->y1 + op->src.offset[1],
+ transform, op->src.scale,
+ &v[10], &v[11]);
+
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -1900,9 +2057,9 @@ gen3_render_composite_box(struct sna *sna,
}
static void
-gen3_render_composite_boxes(struct sna *sna,
- const struct sna_composite_op *op,
- const BoxRec *box, int nbox)
+gen3_render_composite_boxes__blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
__FUNCTION__, nbox,
@@ -1936,6 +2093,60 @@ gen3_render_composite_boxes(struct sna *sna,
}
static void
+gen3_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen3_get_rectangles(sna, op, nbox);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+ } while (nbox);
+}
+
+static void
+gen3_render_composite_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen3_get_rectangles(sna, op, nbox);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+static void
gen3_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
@@ -2986,24 +3197,32 @@ gen3_render_composite(struct sna *sna,
case SHADER_WHITE:
case SHADER_CONSTANT:
tmp->prim_emit = gen3_emit_composite_primitive_constant;
+ tmp->emit_boxes = gen3_emit_composite_boxes_constant;
break;
case SHADER_LINEAR:
case SHADER_RADIAL:
- if (tmp->src.transform == NULL)
+ if (tmp->src.transform == NULL) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
- else if (tmp->src.is_affine)
+ tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient;
+ } else if (tmp->src.is_affine) {
tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
+ tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient;
+ }
break;
case SHADER_TEXTURE:
if (tmp->src.transform == NULL) {
- if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0)
+ if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset;
- else
+ tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset;
+ } else {
tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
+ tmp->emit_boxes = gen3_emit_composite_boxes_identity_source;
+ }
} else if (tmp->src.is_affine) {
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
+ tmp->emit_boxes = gen3_emit_composite_boxes_affine_source;
}
break;
}
@@ -3035,7 +3254,11 @@ gen3_render_composite(struct sna *sna,
tmp->blt = gen3_render_composite_blt;
tmp->box = gen3_render_composite_box;
- tmp->boxes = gen3_render_composite_boxes;
+ tmp->boxes = gen3_render_composite_boxes__blt;
+ if (tmp->emit_boxes) {
+ tmp->boxes = gen3_render_composite_boxes;
+ tmp->thread_boxes = gen3_render_composite_boxes__thread;
+ }
tmp->done = gen3_render_composite_done;
if (!kgem_check_bo(&sna->kgem,
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 65016cd1..e4f5f594 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -1109,9 +1109,9 @@ gen4_render_composite_box(struct sna *sna,
}
static void
-gen4_render_composite_boxes(struct sna *sna,
- const struct sna_composite_op *op,
- const BoxRec *box, int nbox)
+gen4_render_composite_boxes__blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
{
DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
__FUNCTION__, nbox, op->dst.x, op->dst.y,
@@ -1145,6 +1145,62 @@ gen4_render_composite_boxes(struct sna *sna,
} while (nbox);
}
+static void
+gen4_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen4_get_rectangles(sna, op, nbox,
+ gen4_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+ } while (nbox);
+}
+
+static void
+gen4_render_composite_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen4_get_rectangles(sna, op, nbox,
+ gen4_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
@@ -1899,7 +1955,11 @@ gen4_render_composite(struct sna *sna,
tmp->blt = gen4_render_composite_blt;
tmp->box = gen4_render_composite_box;
- tmp->boxes = gen4_render_composite_boxes;
+ tmp->boxes = gen4_render_composite_boxes__blt;
+ if (tmp->emit_boxes) {
+ tmp->boxes = gen4_render_composite_boxes;
+ tmp->thread_boxes = gen4_render_composite_boxes__thread;
+ }
tmp->done = gen4_render_composite_done;
if (!kgem_check_bo(&sna->kgem,
diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index cc679d38..e5131669 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -360,6 +360,31 @@ emit_primitive_solid(struct sna *sna,
}
fastcall static void
+emit_boxes_solid(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ dst.p.x = box->x1;
+ v[2] = dst.f;
+ dst.p.y = box->y1;
+ v[4] = dst.f;
+
+ v[5] = v[3] = v[1] = .5;
+ box++;
+ v += 6;
+ } while (--nbox);
+}
+
+fastcall static void
emit_primitive_linear(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -390,6 +415,34 @@ emit_primitive_linear(struct sna *sna,
}
fastcall static void
+emit_boxes_linear(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ do {
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ dst.p.x = box->x1;
+ v[2] = dst.f;
+ dst.p.y = box->y1;
+ v[4] = dst.f;
+
+ v[1] = compute_linear(&op->src, box->x2, box->y2);
+ v[3] = compute_linear(&op->src, box->x1, box->y2);
+ v[5] = compute_linear(&op->src, box->x1, box->y1);
+
+ v += 6;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
emit_primitive_identity_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -421,6 +474,36 @@ emit_primitive_identity_source(struct sna *sna,
}
fastcall static void
+emit_boxes_identity_source(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ dst.p.x = box->x1;
+ v[3] = dst.f;
+ dst.p.y = box->y1;
+ v[6] = dst.f;
+
+ v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
+ v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
+
+ v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
+ v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
+
+ v += 9;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
emit_primitive_simple_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -461,6 +544,45 @@ emit_primitive_simple_source(struct sna *sna,
}
fastcall static void
+emit_boxes_simple_source(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ float xx = op->src.transform->matrix[0][0];
+ float x0 = op->src.transform->matrix[0][2];
+ float yy = op->src.transform->matrix[1][1];
+ float y0 = op->src.transform->matrix[1][2];
+ float sx = op->src.scale[0];
+ float sy = op->src.scale[1];
+ int16_t tx = op->src.offset[0];
+ int16_t ty = op->src.offset[1];
+
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ v[1] = ((box->x2 + tx) * xx + x0) * sx;
+ v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
+
+ dst.p.x = box->x1;
+ v[3] = dst.f;
+ v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
+
+ dst.p.y = box->y1;
+ v[6] = dst.f;
+ v[8] = ((box->y1 + ty) * yy + y0) * sy;
+
+ v += 9;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
emit_primitive_affine_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -500,6 +622,43 @@ emit_primitive_affine_source(struct sna *sna,
}
fastcall static void
+emit_boxes_affine_source(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + box->x2,
+ op->src.offset[1] + box->y2,
+ op->src.transform, op->src.scale,
+ &v[1], &v[2]);
+
+ dst.p.x = box->x1;
+ v[3] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + box->x1,
+ op->src.offset[1] + box->y2,
+ op->src.transform, op->src.scale,
+ &v[4], &v[5]);
+
+ dst.p.y = box->y1;
+ v[6] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + box->x1,
+ op->src.offset[1] + box->y1,
+ op->src.transform, op->src.scale,
+ &v[7], &v[8]);
+ box++;
+ v += 9;
+ } while (--nbox);
+}
+
+fastcall static void
emit_primitive_identity_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -543,6 +702,40 @@ emit_primitive_identity_mask(struct sna *sna,
}
fastcall static void
+emit_boxes_identity_mask(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ float msk_x = op->mask.offset[0];
+ float msk_y = op->mask.offset[1];
+
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ v[2] = (msk_x + box->x2) * op->mask.scale[0];
+ v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
+
+ dst.p.x = box->x1;
+ v[4] = dst.f;
+ v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
+
+ dst.p.y = box->y1;
+ v[8] = dst.f;
+ v[11] = (msk_y + box->y1) * op->mask.scale[1];
+
+ v[9] = v[5] = v[1] = .5;
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
emit_primitive_linear_identity_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -588,6 +781,43 @@ emit_primitive_linear_identity_mask(struct sna *sna,
}
fastcall static void
+emit_boxes_linear_identity_mask(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ float msk_x = op->mask.offset[0];
+ float msk_y = op->mask.offset[1];
+
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ v[2] = (msk_x + box->x2) * op->mask.scale[0];
+ v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
+
+ dst.p.x = box->x1;
+ v[4] = dst.f;
+ v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
+
+ dst.p.y = box->y1;
+ v[8] = dst.f;
+ v[11] = (msk_y + box->y1) * op->mask.scale[1];
+
+ v[1] = compute_linear(&op->src, box->x2, box->y2);
+ v[5] = compute_linear(&op->src, box->x1, box->y2);
+ v[9] = compute_linear(&op->src, box->x1, box->y1);
+
+ v += 12;
+ box++;
+ } while (--nbox);
+}
+
+fastcall static void
emit_primitive_identity_source_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
@@ -760,11 +990,13 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
if (tmp->src.is_solid) {
DBG(("%s: solid, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_identity_mask;
+ tmp->emit_boxes = emit_boxes_identity_mask;
tmp->floats_per_vertex = 4;
vb = 1 | 2 << 2;
} else if (tmp->src.is_linear) {
DBG(("%s: linear, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_linear_identity_mask;
+ tmp->emit_boxes = emit_boxes_linear_identity_mask;
tmp->floats_per_vertex = 4;
vb = 1 | 2 << 2;
} else if (tmp->src.transform == NULL) {
@@ -821,6 +1053,7 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
if (tmp->src.is_solid) {
DBG(("%s: solid, no mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_solid;
+ tmp->emit_boxes = emit_boxes_solid;
if (tmp->src.is_opaque && tmp->op == PictOpOver)
tmp->op = PictOpSrc;
tmp->floats_per_vertex = 2;
@@ -828,11 +1061,13 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
} else if (tmp->src.is_linear) {
DBG(("%s: linear, no mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_linear;
+ tmp->emit_boxes = emit_boxes_linear;
tmp->floats_per_vertex = 2;
vb = 1;
} else if (tmp->src.transform == NULL) {
DBG(("%s: identity src, no mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_identity_source;
+ tmp->emit_boxes = emit_boxes_identity_source;
tmp->floats_per_vertex = 3;
vb = 2;
} else if (tmp->src.is_affine) {
@@ -841,9 +1076,11 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
DBG(("%s: simple src, no mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_simple_source;
+ tmp->emit_boxes = emit_boxes_simple_source;
} else {
DBG(("%s: affine src, no mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_affine_source;
+ tmp->emit_boxes = emit_boxes_affine_source;
}
tmp->floats_per_vertex = 3;
vb = 2;
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 81e6635a..998d55e2 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -1095,9 +1095,9 @@ gen5_render_composite_box(struct sna *sna,
}
static void
-gen5_render_composite_boxes(struct sna *sna,
- const struct sna_composite_op *op,
- const BoxRec *box, int nbox)
+gen5_render_composite_boxes__blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
{
DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
__FUNCTION__, nbox, op->dst.x, op->dst.y,
@@ -1131,6 +1131,62 @@ gen5_render_composite_boxes(struct sna *sna,
} while (nbox);
}
+static void
+gen5_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen5_get_rectangles(sna, op, nbox,
+ gen5_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+ } while (nbox);
+}
+
+static void
+gen5_render_composite_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen5_get_rectangles(sna, op, nbox,
+ gen5_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
@@ -1874,7 +1930,11 @@ gen5_render_composite(struct sna *sna,
tmp->blt = gen5_render_composite_blt;
tmp->box = gen5_render_composite_box;
- tmp->boxes = gen5_render_composite_boxes;
+ tmp->boxes = gen5_render_composite_boxes__blt;
+ if (tmp->emit_boxes) {
+ tmp->boxes = gen5_render_composite_boxes;
+ tmp->thread_boxes = gen5_render_composite_boxes__thread;
+ }
tmp->done = gen5_render_composite_done;
if (!kgem_check_bo(&sna->kgem,
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 4ff1606d..4a9387a6 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -1340,9 +1340,9 @@ gen6_render_composite_box(struct sna *sna,
}
static void
-gen6_render_composite_boxes(struct sna *sna,
- const struct sna_composite_op *op,
- const BoxRec *box, int nbox)
+gen6_render_composite_boxes__blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
{
DBG(("composite_boxes(%d)\n", nbox));
@@ -1372,6 +1372,62 @@ gen6_render_composite_boxes(struct sna *sna,
} while (nbox);
}
+static void
+gen6_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen6_get_rectangles(sna, op, nbox,
+ gen6_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+ } while (nbox);
+}
+
+static void
+gen6_render_composite_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen6_get_rectangles(sna, op, nbox,
+ gen6_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
@@ -2214,7 +2270,11 @@ gen6_render_composite(struct sna *sna,
tmp->blt = gen6_render_composite_blt;
tmp->box = gen6_render_composite_box;
- tmp->boxes = gen6_render_composite_boxes;
+ tmp->boxes = gen6_render_composite_boxes__blt;
+ if (tmp->emit_boxes) {
+ tmp->boxes = gen6_render_composite_boxes;
+ tmp->thread_boxes = gen6_render_composite_boxes__thread;
+ }
tmp->done = gen6_render_composite_done;
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 34ba252a..6eec4b45 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -1463,9 +1463,9 @@ gen7_render_composite_box(struct sna *sna,
}
static void
-gen7_render_composite_boxes(struct sna *sna,
- const struct sna_composite_op *op,
- const BoxRec *box, int nbox)
+gen7_render_composite_boxes__blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
{
DBG(("composite_boxes(%d)\n", nbox));
@@ -1495,6 +1495,62 @@ gen7_render_composite_boxes(struct sna *sna,
} while (nbox);
}
+static void
+gen7_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen7_get_rectangles(sna, op, nbox,
+ gen7_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+ } while (nbox);
+}
+
+static void
+gen7_render_composite_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", nbox));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen7_get_rectangles(sna, op, nbox,
+ gen7_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
@@ -2334,7 +2390,11 @@ gen7_render_composite(struct sna *sna,
tmp->blt = gen7_render_composite_blt;
tmp->box = gen7_render_composite_box;
- tmp->boxes = gen7_render_composite_boxes;
+ tmp->boxes = gen7_render_composite_boxes__blt;
+ if (tmp->emit_boxes){
+ tmp->boxes = gen7_render_composite_boxes;
+ tmp->thread_boxes = gen7_render_composite_boxes__thread;
+ }
tmp->done = gen7_render_composite_done;
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 56025790..edfcb9ea 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -934,6 +934,76 @@ static void blt_composite_fill_boxes_no_offset(struct sna *sna,
_sna_blt_fill_boxes(sna, &op->u.blt, box, n);
}
+static void blt_composite_fill_boxes_no_offset__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ struct kgem *kgem = &sna->kgem;
+ const struct sna_blt_state *blt = &op->u.blt;
+ uint32_t cmd = blt->cmd;
+
+ DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
+
+ sna_vertex_lock(&sna->render);
+ if (!kgem_check_batch(kgem, 3)) {
+ sna_vertex_wait__locked(&sna->render);
+ sna_blt_fill_begin(sna, blt);
+ }
+
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ kgem->nbatch += 3 * nbox_this_time;
+ assert(kgem->nbatch < kgem->surface);
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ while (nbox_this_time >= 8) {
+ b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
+ b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
+ b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
+ b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
+ b[12] = cmd; *(uint64_t *)(b+13) = *(const uint64_t *)box++;
+ b[15] = cmd; *(uint64_t *)(b+16) = *(const uint64_t *)box++;
+ b[18] = cmd; *(uint64_t *)(b+19) = *(const uint64_t *)box++;
+ b[21] = cmd; *(uint64_t *)(b+22) = *(const uint64_t *)box++;
+ b += 24;
+ nbox_this_time -= 8;
+ }
+ if (nbox_this_time & 4) {
+ b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
+ b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
+ b[6] = cmd; *(uint64_t *)(b+7) = *(const uint64_t *)box++;
+ b[9] = cmd; *(uint64_t *)(b+10) = *(const uint64_t *)box++;
+ b += 12;
+ }
+ if (nbox_this_time & 2) {
+ b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
+ b[3] = cmd; *(uint64_t *)(b+4) = *(const uint64_t *)box++;
+ b += 6;
+ }
+ if (nbox_this_time & 1) {
+ b[0] = cmd; *(uint64_t *)(b+1) = *(const uint64_t *)box++;
+ }
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ if (!nbox)
+ break;
+
+ sna_vertex_wait__locked(&sna->render);
+ sna_blt_fill_begin(sna, blt);
+ } while (1);
+ sna_vertex_unlock(&sna->render);
+}
+
fastcall static void blt_composite_fill_box(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box)
@@ -957,6 +1027,92 @@ static void blt_composite_fill_boxes(struct sna *sna,
} while (--n);
}
+static inline uint64_t add4(const BoxRec *b, int16_t x, int16_t y)
+{
+ union {
+ uint64_t v;
+ int16_t i[4];
+ } vi;
+ vi.v = *(uint64_t *)b;
+ vi.i[0] += x;
+ vi.i[1] += y;
+ vi.i[2] += x;
+ vi.i[3] += y;
+ return vi.v;
+}
+
+static void blt_composite_fill_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ struct kgem *kgem = &sna->kgem;
+ const struct sna_blt_state *blt = &op->u.blt;
+ uint32_t cmd = blt->cmd;
+ int16_t dx = op->dst.x;
+ int16_t dy = op->dst.y;
+
+ DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
+
+ sna_vertex_lock(&sna->render);
+ if (!kgem_check_batch(kgem, 3)) {
+ sna_vertex_wait__locked(&sna->render);
+ sna_blt_fill_begin(sna, blt);
+ }
+
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ kgem->nbatch += 3 * nbox_this_time;
+ assert(kgem->nbatch < kgem->surface);
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ while (nbox_this_time >= 8) {
+ b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
+ b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
+ b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
+ b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
+ b[12] = cmd; *(uint64_t *)(b+13) = add4(box++, dx, dy);
+ b[15] = cmd; *(uint64_t *)(b+16) = add4(box++, dx, dy);
+ b[18] = cmd; *(uint64_t *)(b+19) = add4(box++, dx, dy);
+ b[21] = cmd; *(uint64_t *)(b+22) = add4(box++, dx, dy);
+ b += 24;
+ nbox_this_time -= 8;
+ }
+ if (nbox_this_time & 4) {
+ b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
+ b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
+ b[6] = cmd; *(uint64_t *)(b+7) = add4(box++, dx, dy);
+ b[9] = cmd; *(uint64_t *)(b+10) = add4(box++, dx, dy);
+ b += 12;
+ }
+ if (nbox_this_time & 2) {
+ b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
+ b[3] = cmd; *(uint64_t *)(b+4) = add4(box++, dx, dy);
+ b += 6;
+ }
+ if (nbox_this_time & 1) {
+ b[0] = cmd; *(uint64_t *)(b+1) = add4(box++, dx, dy);
+ }
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ if (!nbox)
+ break;
+
+ sna_vertex_wait__locked(&sna->render);
+ sna_blt_fill_begin(sna, blt);
+ } while (1);
+ sna_vertex_unlock(&sna->render);
+}
+
fastcall
static void blt_composite_nop(struct sna *sna,
const struct sna_composite_op *op,
@@ -1014,6 +1170,7 @@ prepare_blt_clear(struct sna *sna,
op->blt = blt_composite_fill__cpu;
op->box = blt_composite_fill_box__cpu;
op->boxes = blt_composite_fill_boxes__cpu;
+ op->thread_boxes = blt_composite_fill_boxes__cpu;
op->done = nop_done;
op->u.blt.pixel = 0;
return true;
@@ -1023,9 +1180,11 @@ prepare_blt_clear(struct sna *sna,
if (op->dst.x|op->dst.y) {
op->box = blt_composite_fill_box;
op->boxes = blt_composite_fill_boxes;
+ op->thread_boxes = blt_composite_fill_boxes__thread;
} else {
op->box = blt_composite_fill_box_no_offset;
op->boxes = blt_composite_fill_boxes_no_offset;
+ op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
}
op->done = nop_done;
@@ -1050,6 +1209,7 @@ prepare_blt_fill(struct sna *sna,
op->blt = blt_composite_fill__cpu;
op->box = blt_composite_fill_box__cpu;
op->boxes = blt_composite_fill_boxes__cpu;
+ op->thread_boxes = blt_composite_fill_boxes__cpu;
op->done = nop_done;
return true;
}
@@ -1058,9 +1218,11 @@ prepare_blt_fill(struct sna *sna,
if (op->dst.x|op->dst.y) {
op->box = blt_composite_fill_box;
op->boxes = blt_composite_fill_boxes;
+ op->thread_boxes = blt_composite_fill_boxes__thread;
} else {
op->box = blt_composite_fill_box_no_offset;
op->boxes = blt_composite_fill_boxes_no_offset;
+ op->thread_boxes = blt_composite_fill_boxes_no_offset__thread;
}
op->done = nop_done;
@@ -1151,6 +1313,141 @@ static void blt_composite_copy_boxes(struct sna *sna,
} while(--nbox);
}
+static inline uint32_t add2(uint32_t v, int16_t x, int16_t y)
+{
+ x += v & 0xffff;
+ y += v >> 16;
+ return (uint16_t)y << 16 | x;
+}
+
+static void blt_composite_copy_boxes__thread(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ struct kgem *kgem = &sna->kgem;
+ int dst_dx = op->dst.x;
+ int dst_dy = op->dst.y;
+ int src_dx = op->src.offset[0];
+ int src_dy = op->src.offset[1];
+ uint32_t cmd = op->u.blt.cmd;
+ uint32_t br13 = op->u.blt.br13;
+ struct kgem_bo *src_bo = op->u.blt.bo[0];
+ struct kgem_bo *dst_bo = op->u.blt.bo[1];
+ int src_pitch = op->u.blt.pitch[0];
+
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+
+ sna_vertex_lock(&sna->render);
+
+ if ((dst_dx | dst_dy) == 0) {
+ uint64_t hdr = (uint64_t)br13 << 32 | cmd;
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG((" %s: box=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1));
+
+ assert(box->x1 + src_dx >= 0);
+ assert(box->y1 + src_dy >= 0);
+ assert(box->x1 + src_dx <= INT16_MAX);
+ assert(box->y1 + src_dy <= INT16_MAX);
+
+ assert(box->x1 >= 0);
+ assert(box->y1 >= 0);
+
+ *(uint64_t *)&b[0] = hdr;
+ *(uint64_t *)&b[2] = *(const uint64_t *)box;
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = add2(b[2], src_dx, src_dy);
+ b[6] = src_pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 8;
+ assert(kgem->nbatch < kgem->surface);
+ box++;
+ } while (--nbox_this_time);
+
+ if (!nbox)
+ break;
+
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ } while (1);
+ } else {
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG((" %s: box=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1));
+
+ assert(box->x1 + src_dx >= 0);
+ assert(box->y1 + src_dy >= 0);
+
+ assert(box->x1 + dst_dx >= 0);
+ assert(box->y1 + dst_dy >= 0);
+
+ b[0] = cmd;
+ b[1] = br13;
+ b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
+ b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
+ b[6] = src_pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 8;
+ assert(kgem->nbatch < kgem->surface);
+ box++;
+ } while (--nbox_this_time);
+
+ if (!nbox)
+ break;
+
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ } while (1);
+ }
+ sna_vertex_unlock(&sna->render);
+}
+
fastcall static void
blt_composite_copy_with_alpha(struct sna *sna,
const struct sna_composite_op *op,
@@ -1277,6 +1574,7 @@ prepare_blt_copy(struct sna *sna,
op->blt = blt_composite_copy;
op->box = blt_composite_copy_box;
op->boxes = blt_composite_copy_boxes;
+ op->thread_boxes = blt_composite_copy_boxes__thread;
if (!sna_blt_copy_init(sna, &op->u.blt, bo, op->dst.bo,
src->drawable.bitsPerPixel,
@@ -2075,6 +2373,7 @@ sna_blt_composite__convert(struct sna *sna,
tmp->blt = blt_composite_copy;
tmp->box = blt_composite_copy_box;
tmp->boxes = blt_composite_copy_boxes;
+ tmp->thread_boxes = blt_composite_copy_boxes__thread;
if (!sna_blt_copy_init(sna, &tmp->u.blt,
tmp->src.bo, tmp->dst.bo,
@@ -2446,13 +2745,6 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
return true;
}
-static inline uint32_t add2(uint32_t v, int16_t x, int16_t y)
-{
- x += v & 0xffff;
- y += v >> 16;
- return (uint16_t)y << 16 | x;
-}
-
bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 6a0b1d8a..c953e50c 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -35,6 +35,8 @@ struct sna_composite_op {
const BoxRec *box);
void (*boxes)(struct sna *sna, const struct sna_composite_op *op,
const BoxRec *box, int nbox);
+ void (*thread_boxes)(struct sna *sna, const struct sna_composite_op *op,
+ const BoxRec *box, int nbox);
void (*done)(struct sna *sna, const struct sna_composite_op *op);
struct sna_damage **damage;
@@ -93,6 +95,9 @@ struct sna_composite_op {
fastcall void (*prim_emit)(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r);
+ fastcall void (*emit_boxes)(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v);
struct sna_composite_redirect {
struct kgem_bo *real_bo;
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index bf4816bb..7f7492ad 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -1876,7 +1876,7 @@ static void
mono_add_line(struct mono *mono,
int dst_x, int dst_y,
xFixed top, xFixed bottom,
- xPointFixed *p1, xPointFixed *p2,
+ const xPointFixed *p1, const xPointFixed *p2,
int dir)
{
struct mono_polygon *polygon = &mono->polygon;
@@ -1893,7 +1893,7 @@ mono_add_line(struct mono *mono,
dir));
if (top > bottom) {
- xPointFixed *t;
+ const xPointFixed *t;
y = top;
top = bottom;
@@ -2150,6 +2150,60 @@ mono_span__fast(struct mono *c, int x1, int x2, BoxPtr box)
c->op.box(c->sna, &c->op, box);
}
+struct mono_span_thread_boxes {
+ const struct sna_composite_op *op;
+#define MONO_SPAN_MAX_BOXES (8192/sizeof(BoxRec))
+ BoxRec boxes[MONO_SPAN_MAX_BOXES];
+ int num_boxes;
+};
+
+inline static void
+thread_mono_span_add_boxes(struct mono *c, const BoxRec *box, int count)
+{
+ struct mono_span_thread_boxes *b = c->op.priv;
+
+ assert(count > 0 && count <= MONO_SPAN_MAX_BOXES);
+ if (b->num_boxes + count > MONO_SPAN_MAX_BOXES) {
+ b->op->thread_boxes(c->sna, b->op, b->boxes, b->num_boxes);
+ b->num_boxes = 0;
+ }
+
+ memcpy(b->boxes + b->num_boxes, box, count*sizeof(BoxRec));
+ b->num_boxes += count;
+ assert(b->num_boxes <= MONO_SPAN_MAX_BOXES);
+}
+
+fastcall static void
+thread_mono_span_clipped(struct mono *c, int x1, int x2, BoxPtr box)
+{
+ pixman_region16_t region;
+
+ __DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2));
+
+ box->x1 = x1;
+ box->x2 = x2;
+
+ assert(c->clip.data);
+
+ pixman_region_init_rects(&region, box, 1);
+ RegionIntersect(&region, &region, &c->clip);
+ if (REGION_NUM_RECTS(&region))
+ thread_mono_span_add_boxes(c,
+ REGION_RECTS(&region),
+ REGION_NUM_RECTS(&region));
+ pixman_region_fini(&region);
+}
+
+fastcall static void
+thread_mono_span(struct mono *c, int x1, int x2, BoxPtr box)
+{
+ __DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2));
+
+ box->x1 = x1;
+ box->x2 = x2;
+ thread_mono_span_add_boxes(c, box, 1);
+}
+
inline static void
mono_row(struct mono *c, int16_t y, int16_t h)
{
@@ -2267,10 +2321,7 @@ mono_render(struct mono *mono)
struct mono_polygon *polygon = &mono->polygon;
int i, j, h = mono->clip.extents.y2 - mono->clip.extents.y1;
- if (mono->clip.data == NULL && mono->op.damage == NULL)
- mono->span = mono_span__fast;
- else
- mono->span = mono_span;
+ assert(mono->span);
for (i = 0; i < h; i = j) {
j = i + 1;
@@ -4053,6 +4104,74 @@ choose_span(struct sna_composite_spans_op *tmp,
return span;
}
+struct mono_span_thread {
+ struct sna *sna;
+ const xTrapezoid *traps;
+ const struct sna_composite_op *op;
+ RegionPtr clip;
+ int ntrap;
+ BoxRec extents;
+ int dx, dy;
+};
+
+static void
+mono_span_thread(void *arg)
+{
+ struct mono_span_thread *thread = arg;
+ struct mono mono;
+ struct mono_span_thread_boxes boxes;
+ const xTrapezoid *t;
+ int n;
+
+ mono.sna = thread->sna;
+
+ mono.clip.extents = thread->extents;
+ mono.clip.data = NULL;
+ if (thread->clip->data) {
+ RegionIntersect(&mono.clip, &mono.clip, thread->clip);
+ if (RegionNil(&mono.clip))
+ return;
+ }
+
+ boxes.op = thread->op;
+ boxes.num_boxes = 0;
+ mono.op.priv = &boxes;
+
+ if (!mono_init(&mono, 2*thread->ntrap)) {
+ RegionUninit(&mono.clip);
+ return;
+ }
+
+ for (n = thread->ntrap, t = thread->traps; n--; t++) {
+ if (!xTrapezoidValid(t))
+ continue;
+
+ if (pixman_fixed_to_int(t->top) + thread->dy >= thread->extents.y2 ||
+ pixman_fixed_to_int(t->bottom) + thread->dy <= thread->extents.y1)
+ continue;
+
+ mono_add_line(&mono, thread->dx, thread->dy,
+ t->top, t->bottom,
+ &t->left.p1, &t->left.p2, 1);
+ mono_add_line(&mono, thread->dx, thread->dy,
+ t->top, t->bottom,
+ &t->right.p1, &t->right.p2, -1);
+ }
+
+ if (mono.clip.data == NULL)
+ mono.span = thread_mono_span;
+ else
+ mono.span = thread_mono_span_clipped;
+
+ mono_render(&mono);
+ mono_fini(&mono);
+
+ if (boxes.num_boxes)
+ thread->op->thread_boxes(thread->sna, thread->op,
+ boxes.boxes, boxes.num_boxes);
+ RegionUninit(&mono.clip);
+}
+
static bool
mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
INT16 src_x, INT16 src_y,
@@ -4062,8 +4181,8 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
BoxRec extents;
int16_t dst_x, dst_y;
int16_t dx, dy;
- bool was_clear;
- int n;
+ bool unbounded;
+ int num_threads, n;
if (NO_SCAN_CONVERTER)
return false;
@@ -4102,11 +4221,69 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
src_x + mono.clip.extents.x1 - dst_x - dx,
src_y + mono.clip.extents.y1 - dst_y - dy));
+ unbounded = (!sna_drawable_is_clear(dst->pDrawable) &&
+ !operator_is_bounded(op));
+
mono.sna = to_sna_from_drawable(dst->pDrawable);
- if (!mono_init(&mono, 2*ntrap))
+ if (!mono.sna->render.composite(mono.sna, op, src, NULL, dst,
+ src_x + mono.clip.extents.x1 - dst_x - dx,
+ src_y + mono.clip.extents.y1 - dst_y - dy,
+ 0, 0,
+ mono.clip.extents.x1, mono.clip.extents.y1,
+ mono.clip.extents.x2 - mono.clip.extents.x1,
+ mono.clip.extents.y2 - mono.clip.extents.y1,
+ memset(&mono.op, 0, sizeof(mono.op))))
return false;
- was_clear = sna_drawable_is_clear(dst->pDrawable);
+ num_threads = 1;
+ if (!NO_GPU_THREADS &&
+ mono.op.thread_boxes &&
+ mono.op.damage == NULL &&
+ !unbounded)
+ num_threads = sna_use_threads(mono.clip.extents.x2 - mono.clip.extents.x1,
+ mono.clip.extents.y2 - mono.clip.extents.y1,
+ 16);
+ if (num_threads > 1) {
+ struct mono_span_thread threads[num_threads];
+ int y, h;
+
+ DBG(("%s: using %d threads for mono span compositing %dx%d\n",
+ __FUNCTION__, num_threads,
+ mono.clip.extents.x2 - mono.clip.extents.x1,
+ mono.clip.extents.y2 - mono.clip.extents.y1));
+
+ threads[0].sna = mono.sna;
+ threads[0].op = &mono.op;
+ threads[0].traps = traps;
+ threads[0].ntrap = ntrap;
+ threads[0].extents = mono.clip.extents;
+ threads[0].clip = &mono.clip;
+ threads[0].dx = dx;
+ threads[0].dy = dy;
+
+ y = extents.y1;
+ h = extents.y2 - extents.y1;
+ h = (h + num_threads - 1) / num_threads;
+
+ for (n = 1; n < num_threads; n++) {
+ threads[n] = threads[0];
+ threads[n].extents.y1 = y;
+ threads[n].extents.y2 = y += h;
+
+ sna_threads_run(mono_span_thread, &threads[n]);
+ }
+
+ threads[0].extents.y1 = y;
+ threads[0].extents.y2 = extents.y2;
+ mono_span_thread(&threads[0]);
+
+ sna_threads_wait();
+ mono.op.done(mono.sna, &mono.op);
+ return true;
+ }
+
+ if (!mono_init(&mono, 2*ntrap))
+ return false;
for (n = 0; n < ntrap; n++) {
if (!xTrapezoidValid(&traps[n]))
@@ -4124,23 +4301,16 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
&traps[n].right.p1, &traps[n].right.p2, -1);
}
- memset(&mono.op, 0, sizeof(mono.op));
- if (!mono.sna->render.composite(mono.sna, op, src, NULL, dst,
- src_x + mono.clip.extents.x1 - dst_x - dx,
- src_y + mono.clip.extents.y1 - dst_y - dy,
- 0, 0,
- mono.clip.extents.x1, mono.clip.extents.y1,
- mono.clip.extents.x2 - mono.clip.extents.x1,
- mono.clip.extents.y2 - mono.clip.extents.y1,
- &mono.op)) {
- mono_fini(&mono);
- return false;
- }
+ if (mono.clip.data == NULL && mono.op.damage == NULL)
+ mono.span = mono_span__fast;
+ else
+ mono.span = mono_span;
+
mono_render(&mono);
mono.op.done(mono.sna, &mono.op);
mono_fini(&mono);
- if (!was_clear && !operator_is_bounded(op)) {
+ if (unbounded) {
xPointFixed p1, p2;
if (!mono_init(&mono, 2+2*ntrap))
@@ -5245,6 +5415,11 @@ unbounded_pass:
mono.op.box = mono_inplace_composite_box;
mono.op.boxes = mono_inplace_composite_boxes;
}
+
+ if (mono.clip.data == NULL && mono.op.damage == NULL)
+ mono.span = mono_span__fast;
+ else
+ mono.span = mono_span;
mono_render(&mono);
mono_fini(&mono);
@@ -6850,6 +7025,10 @@ mono_triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
mono.clip.extents.x2 - mono.clip.extents.x1,
mono.clip.extents.y2 - mono.clip.extents.y1,
&mono.op)) {
+ if (mono.clip.data == NULL && mono.op.damage == NULL)
+ mono.span = mono_span__fast;
+ else
+ mono.span = mono_span;
mono_render(&mono);
mono.op.done(mono.sna, &mono.op);
}
@@ -6893,6 +7072,10 @@ mono_triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
mono.clip.extents.x2 - mono.clip.extents.x1,
mono.clip.extents.y2 - mono.clip.extents.y1,
&mono.op)) {
+ if (mono.clip.data == NULL && mono.op.damage == NULL)
+ mono.span = mono_span__fast;
+ else
+ mono.span = mono_span;
mono_render(&mono);
mono.op.done(mono.sna, &mono.op);
}