sna/gen4+: Begin specialising vertex programs for ISA

Allow use of advanced ISA when available by detecting support at runtime. This initial work just uses GCC to emit varying ISA, future work could use hand written code for these hot spots. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
author: Chris Wilson <chris@chris-wilson.co.uk> 2013-02-26 00:02:16 +0000
committer: Chris Wilson <chris@chris-wilson.co.uk> 2013-02-26 00:03:28 +0000
commit: 94b95cc2fc63457903a7b7b6eaa09bc27f25750c (patch)
tree: a8bdd1dbb1cb5f1381df4658c4c599fee2a0c5d3 /src/sna
parent: f095678125b25aeae80d838729a7f89d09007e10 (diff)
7 files changed, 1123 insertions, 36 deletions
diff --git a/src/sna/compiler.h b/src/sna/compiler.h
index 23ec31c3..fe2e3214 100644
--- a/src/sna/compiler.h
+++ b/src/sna/compiler.h
@@ -52,6 +52,16 @@
 #define flatten
 #endif
 
+#if defined(__GNUC__) && (__GNUC__ >= 4) /* 4.4 */
+#define sse2 __attribute__((target("sse2")))
+#define sse4_2 __attribute__((target("sse4.2,sse2")))
+#define avx2 __attribute__((target("avx2,sse4.2,sse2")))
+#else
+#define sse2
+#define sse4_2
+#define avx2
+#endif
+
 #ifdef HAVE_VALGRIND
 #define VG(x) x
 #else
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index d08d762b..92802469 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -1945,7 +1945,7 @@ gen4_render_composite(struct sna *sna,
 					     tmp->mask.bo != NULL,
 					     tmp->has_component_alpha,
 					     tmp->is_affine);
-	tmp->u.gen4.ve_id = gen4_choose_composite_emitter(tmp);
+	tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
 
 	tmp->blt   = gen4_render_composite_blt;
 	tmp->box   = gen4_render_composite_box;
@@ -2186,7 +2186,7 @@ gen4_render_composite_spans(struct sna *sna,
 	tmp->base.has_component_alpha = false;
 	tmp->base.need_magic_ca_pass = false;
 
-	tmp->base.u.gen4.ve_id = gen4_choose_spans_emitter(tmp);
+	tmp->base.u.gen4.ve_id = gen4_choose_spans_emitter(sna, tmp);
 	tmp->base.u.gen4.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
 
 	tmp->box   = gen4_render_composite_spans_box;
diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index 5062ebdf..20f85b37 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -272,10 +272,10 @@ emit_texcoord(struct sna *sna,
 
 inline static void
 emit_vertex(struct sna *sna,
-		      const struct sna_composite_op *op,
-		      int16_t srcX, int16_t srcY,
-		      int16_t mskX, int16_t mskY,
-		      int16_t dstX, int16_t dstY)
+	    const struct sna_composite_op *op,
+	    int16_t srcX, int16_t srcY,
+	    int16_t mskX, int16_t mskY,
+	    int16_t dstX, int16_t dstY)
 {
 	OUT_VERTEX(dstX, dstY);
 	emit_texcoord(sna, &op->src, srcX, srcY);
@@ -414,6 +414,66 @@ emit_primitive_linear(struct sna *sna,
 	v[5] = compute_linear(&op->src, r->src.x, r->src.y);
 }
 
+sse4_2 fastcall static void
+emit_primitive_linear__sse4_2(struct sna *sna,
+			      const struct sna_composite_op *op,
+			      const struct sna_composite_rectangles *r)
+{
+	float *v;
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+
+	assert(op->floats_per_rect == 6);
+	assert((sna->render.vertex_used % 2) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 6;
+	assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	dst.p.x = r->dst.x;
+	v[2] = dst.f;
+	dst.p.y = r->dst.y;
+	v[4] = dst.f;
+
+	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
+	v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
+	v[5] = compute_linear(&op->src, r->src.x, r->src.y);
+}
+
+avx2 fastcall static void
+emit_primitive_linear__avx2(struct sna *sna,
+			    const struct sna_composite_op *op,
+			    const struct sna_composite_rectangles *r)
+{
+	float *v;
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+
+	assert(op->floats_per_rect == 6);
+	assert((sna->render.vertex_used % 2) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 6;
+	assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	dst.p.x = r->dst.x;
+	v[2] = dst.f;
+	dst.p.y = r->dst.y;
+	v[4] = dst.f;
+
+	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
+	v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
+	v[5] = compute_linear(&op->src, r->src.x, r->src.y);
+}
+
 fastcall static void
 emit_boxes_linear(const struct sna_composite_op *op,
 		  const BoxRec *box, int nbox,
@@ -442,6 +502,62 @@ emit_boxes_linear(const struct sna_composite_op *op,
 	} while (--nbox);
 }
 
+sse4_2 fastcall static void
+emit_boxes_linear__sse4_2(const struct sna_composite_op *op,
+			  const BoxRec *box, int nbox,
+			  float *v)
+{
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+
+	do {
+		dst.p.x = box->x2;
+		dst.p.y = box->y2;
+		v[0] = dst.f;
+		dst.p.x = box->x1;
+		v[2] = dst.f;
+		dst.p.y = box->y1;
+		v[4] = dst.f;
+
+		v[1] = compute_linear(&op->src, box->x2, box->y2);
+		v[3] = compute_linear(&op->src, box->x1, box->y2);
+		v[5] = compute_linear(&op->src, box->x1, box->y1);
+
+		v += 6;
+		box++;
+	} while (--nbox);
+}
+
+avx2 fastcall static void
+emit_boxes_linear__avx2(const struct sna_composite_op *op,
+			const BoxRec *box, int nbox,
+			float *v)
+{
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+
+	do {
+		dst.p.x = box->x2;
+		dst.p.y = box->y2;
+		v[0] = dst.f;
+		dst.p.x = box->x1;
+		v[2] = dst.f;
+		dst.p.y = box->y1;
+		v[4] = dst.f;
+
+		v[1] = compute_linear(&op->src, box->x2, box->y2);
+		v[3] = compute_linear(&op->src, box->x1, box->y2);
+		v[5] = compute_linear(&op->src, box->x1, box->y1);
+
+		v += 6;
+		box++;
+	} while (--nbox);
+}
+
 fastcall static void
 emit_primitive_identity_source(struct sna *sna,
 			       const struct sna_composite_op *op,
@@ -473,6 +589,68 @@ emit_primitive_identity_source(struct sna *sna,
 	v[5] = v[2] = v[8] + r->height * op->src.scale[1];
 }
 
+sse4_2 fastcall static void
+emit_primitive_identity_source__sse4_2(struct sna *sna,
+				       const struct sna_composite_op *op,
+				       const struct sna_composite_rectangles *r)
+{
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+	float *v;
+
+	assert(op->floats_per_rect == 9);
+	assert((sna->render.vertex_used % 3) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 9;
+
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	dst.p.x = r->dst.x;
+	v[3] = dst.f;
+	dst.p.y = r->dst.y;
+	v[6] = dst.f;
+
+	v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
+	v[1] = v[4] + r->width * op->src.scale[0];
+
+	v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
+	v[5] = v[2] = v[8] + r->height * op->src.scale[1];
+}
+
+avx2 fastcall static void
+emit_primitive_identity_source__avx2(struct sna *sna,
+				     const struct sna_composite_op *op,
+				     const struct sna_composite_rectangles *r)
+{
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+	float *v;
+
+	assert(op->floats_per_rect == 9);
+	assert((sna->render.vertex_used % 3) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 9;
+
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	dst.p.x = r->dst.x;
+	v[3] = dst.f;
+	dst.p.y = r->dst.y;
+	v[6] = dst.f;
+
+	v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
+	v[1] = v[4] + r->width * op->src.scale[0];
+
+	v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
+	v[5] = v[2] = v[8] + r->height * op->src.scale[1];
+}
+
 fastcall static void
 emit_boxes_identity_source(const struct sna_composite_op *op,
 			   const BoxRec *box, int nbox,
@@ -503,6 +681,66 @@ emit_boxes_identity_source(const struct sna_composite_op *op,
 	} while (--nbox);
 }
 
+sse4_2 fastcall static void
+emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op,
+				   const BoxRec *box, int nbox,
+				   float *v)
+{
+	do {
+		union {
+			struct sna_coordinate p;
+			float f;
+		} dst;
+
+		dst.p.x = box->x2;
+		dst.p.y = box->y2;
+		v[0] = dst.f;
+		dst.p.x = box->x1;
+		v[3] = dst.f;
+		dst.p.y = box->y1;
+		v[6] = dst.f;
+
+		v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
+		v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
+
+		v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
+		v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
+
+		v += 9;
+		box++;
+	} while (--nbox);
+}
+
+avx2 fastcall static void
+emit_boxes_identity_source__avx2(const struct sna_composite_op *op,
+				 const BoxRec *box, int nbox,
+				 float *v)
+{
+	do {
+		union {
+			struct sna_coordinate p;
+			float f;
+		} dst;
+
+		dst.p.x = box->x2;
+		dst.p.y = box->y2;
+		v[0] = dst.f;
+		dst.p.x = box->x1;
+		v[3] = dst.f;
+		dst.p.y = box->y1;
+		v[6] = dst.f;
+
+		v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
+		v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
+
+		v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
+		v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
+
+		v += 9;
+		box++;
+	} while (--nbox);
+}
+
 fastcall static void
 emit_primitive_simple_source(struct sna *sna,
 			     const struct sna_composite_op *op,
@@ -543,6 +781,86 @@ emit_primitive_simple_source(struct sna *sna,
 	v[8] = ((r->src.y + ty) * yy + y0) * sy;
 }
 
+sse4_2 fastcall static void
+emit_primitive_simple_source__sse4_2(struct sna *sna,
+				     const struct sna_composite_op *op,
+				     const struct sna_composite_rectangles *r)
+{
+	float *v;
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+
+	float xx = op->src.transform->matrix[0][0];
+	float x0 = op->src.transform->matrix[0][2];
+	float yy = op->src.transform->matrix[1][1];
+	float y0 = op->src.transform->matrix[1][2];
+	float sx = op->src.scale[0];
+	float sy = op->src.scale[1];
+	int16_t tx = op->src.offset[0];
+	int16_t ty = op->src.offset[1];
+
+	assert(op->floats_per_rect == 9);
+	assert((sna->render.vertex_used % 3) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 3*3;
+
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
+	v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
+
+	dst.p.x = r->dst.x;
+	v[3] = dst.f;
+	v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
+
+	dst.p.y = r->dst.y;
+	v[6] = dst.f;
+	v[8] = ((r->src.y + ty) * yy + y0) * sy;
+}
+
+avx2 fastcall static void
+emit_primitive_simple_source__avx2(struct sna *sna,
+				   const struct sna_composite_op *op,
+				   const struct sna_composite_rectangles *r)
+{
+	float *v;
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+
+	float xx = op->src.transform->matrix[0][0];
+	float x0 = op->src.transform->matrix[0][2];
+	float yy = op->src.transform->matrix[1][1];
+	float y0 = op->src.transform->matrix[1][2];
+	float sx = op->src.scale[0];
+	float sy = op->src.scale[1];
+	int16_t tx = op->src.offset[0];
+	int16_t ty = op->src.offset[1];
+
+	assert(op->floats_per_rect == 9);
+	assert((sna->render.vertex_used % 3) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 3*3;
+
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
+	v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
+
+	dst.p.x = r->dst.x;
+	v[3] = dst.f;
+	v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
+
+	dst.p.y = r->dst.y;
+	v[6] = dst.f;
+	v[8] = ((r->src.y + ty) * yy + y0) * sy;
+}
+
 fastcall static void
 emit_boxes_simple_source(const struct sna_composite_op *op,
 			 const BoxRec *box, int nbox,
@@ -582,6 +900,84 @@ emit_boxes_simple_source(const struct sna_composite_op *op,
 	} while (--nbox);
 }
 
+sse4_2 fastcall static void
+emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op,
+				 const BoxRec *box, int nbox,
+				 float *v)
+{
+	float xx = op->src.transform->matrix[0][0];
+	float x0 = op->src.transform->matrix[0][2];
+	float yy = op->src.transform->matrix[1][1];
+	float y0 = op->src.transform->matrix[1][2];
+	float sx = op->src.scale[0];
+	float sy = op->src.scale[1];
+	int16_t tx = op->src.offset[0];
+	int16_t ty = op->src.offset[1];
+
+	do {
+		union {
+			struct sna_coordinate p;
+			float f;
+		} dst;
+
+		dst.p.x = box->x2;
+		dst.p.y = box->y2;
+		v[0] = dst.f;
+		v[1] = ((box->x2 + tx) * xx + x0) * sx;
+		v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
+
+		dst.p.x = box->x1;
+		v[3] = dst.f;
+		v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
+
+		dst.p.y = box->y1;
+		v[6] = dst.f;
+		v[8] = ((box->y1 + ty) * yy + y0) * sy;
+
+		v += 9;
+		box++;
+	} while (--nbox);
+}
+
+avx2 fastcall static void
+emit_boxes_simple_source__avx2(const struct sna_composite_op *op,
+			       const BoxRec *box, int nbox,
+			       float *v)
+{
+	float xx = op->src.transform->matrix[0][0];
+	float x0 = op->src.transform->matrix[0][2];
+	float yy = op->src.transform->matrix[1][1];
+	float y0 = op->src.transform->matrix[1][2];
+	float sx = op->src.scale[0];
+	float sy = op->src.scale[1];
+	int16_t tx = op->src.offset[0];
+	int16_t ty = op->src.offset[1];
+
+	do {
+		union {
+			struct sna_coordinate p;
+			float f;
+		} dst;
+
+		dst.p.x = box->x2;
+		dst.p.y = box->y2;
+		v[0] = dst.f;
+		v[1] = ((box->x2 + tx) * xx + x0) * sx;
+		v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
+
+		dst.p.x = box->x1;
+		v[3] = dst.f;
+		v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
+
+		dst.p.y = box->y1;
+		v[6] = dst.f;
+		v[8] = ((box->y1 + ty) * yy + y0) * sy;
+
+		v += 9;
+		box++;
+	} while (--nbox);
+}
+
 fastcall static void
 emit_primitive_affine_source(struct sna *sna,
 			     const struct sna_composite_op *op,
@@ -981,7 +1377,7 @@ emit_composite_texcoord_affine(struct sna *sna,
 }
 
 
-unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
+unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp)
 {
 	unsigned vb;
 
@@ -1060,14 +1456,30 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
 			vb = 1;
 		} else if (tmp->src.is_linear) {
 			DBG(("%s: linear, no mask\n", __FUNCTION__));
-			tmp->prim_emit = emit_primitive_linear;
-			tmp->emit_boxes = emit_boxes_linear;
+			if (sna->cpu_features & AVX2) {
+				tmp->prim_emit = emit_primitive_linear__avx2;
+				tmp->emit_boxes = emit_boxes_linear__avx2;
+			} else  if (sna->cpu_features & SSE4_2) {
+				tmp->prim_emit = emit_primitive_linear__sse4_2;
+				tmp->emit_boxes = emit_boxes_linear__sse4_2;
+			} else {
+				tmp->prim_emit = emit_primitive_linear;
+				tmp->emit_boxes = emit_boxes_linear;
+			}
 			tmp->floats_per_vertex = 2;
 			vb = 1;
 		} else if (tmp->src.transform == NULL) {
 			DBG(("%s: identity src, no mask\n", __FUNCTION__));
-			tmp->prim_emit = emit_primitive_identity_source;
-			tmp->emit_boxes = emit_boxes_identity_source;
+			if (sna->cpu_features & AVX2) {
+				tmp->prim_emit = emit_primitive_identity_source__avx2;
+				tmp->emit_boxes = emit_boxes_identity_source__avx2;
+			} else if (sna->cpu_features & SSE4_2) {
+				tmp->prim_emit = emit_primitive_identity_source__sse4_2;
+				tmp->emit_boxes = emit_boxes_identity_source__sse4_2;
+			} else {
+				tmp->prim_emit = emit_primitive_identity_source;
+				tmp->emit_boxes = emit_boxes_identity_source;
+			}
 			tmp->floats_per_vertex = 3;
 			vb = 2;
 		} else if (tmp->src.is_affine) {
@@ -1075,8 +1487,16 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
 			tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
 			if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
 				DBG(("%s: simple src, no mask\n", __FUNCTION__));
-				tmp->prim_emit = emit_primitive_simple_source;
-				tmp->emit_boxes = emit_boxes_simple_source;
+				if (sna->cpu_features & AVX2) {
+					tmp->prim_emit = emit_primitive_simple_source__avx2;
+					tmp->emit_boxes = emit_boxes_simple_source__avx2;
+				} else if (sna->cpu_features & SSE4_2) {
+					tmp->prim_emit = emit_primitive_simple_source__sse4_2;
+					tmp->emit_boxes = emit_boxes_simple_source__sse4_2;
+				} else {
+					tmp->prim_emit = emit_primitive_simple_source;
+					tmp->emit_boxes = emit_boxes_simple_source;
+				}
 			} else {
 				DBG(("%s: affine src, no mask\n", __FUNCTION__));
 				tmp->prim_emit = emit_primitive_affine_source;
@@ -1222,6 +1642,86 @@ emit_span_identity(struct sna *sna,
 	v[11] = v[7] = v[3] = opacity;
 }
 
+sse4_2 fastcall static void
+emit_span_identity__sse4_2(struct sna *sna,
+			   const struct sna_composite_spans_op *op,
+			   const BoxRec *box,
+			   float opacity)
+{
+	float *v;
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+
+	float sx = op->base.src.scale[0];
+	float sy = op->base.src.scale[1];
+	int16_t tx = op->base.src.offset[0];
+	int16_t ty = op->base.src.offset[1];
+
+	assert(op->base.floats_per_rect == 12);
+	assert((sna->render.vertex_used % 4) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 3*4;
+	assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+	dst.p.x = box->x2;
+	dst.p.y = box->y2;
+	v[0] = dst.f;
+	v[1] = (box->x2 + tx) * sx;
+	v[6] = v[2] = (box->y2 + ty) * sy;
+
+	dst.p.x = box->x1;
+	v[4] = dst.f;
+	v[9] = v[5] = (box->x1 + tx) * sx;
+
+	dst.p.y = box->y1;
+	v[8] = dst.f;
+	v[10] = (box->y1 + ty) * sy;
+
+	v[11] = v[7] = v[3] = opacity;
+}
+
+avx2 fastcall static void
+emit_span_identity__avx2(struct sna *sna,
+			 const struct sna_composite_spans_op *op,
+			 const BoxRec *box,
+			 float opacity)
+{
+	float *v;
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+
+	float sx = op->base.src.scale[0];
+	float sy = op->base.src.scale[1];
+	int16_t tx = op->base.src.offset[0];
+	int16_t ty = op->base.src.offset[1];
+
+	assert(op->base.floats_per_rect == 12);
+	assert((sna->render.vertex_used % 4) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 3*4;
+	assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+	dst.p.x = box->x2;
+	dst.p.y = box->y2;
+	v[0] = dst.f;
+	v[1] = (box->x2 + tx) * sx;
+	v[6] = v[2] = (box->y2 + ty) * sy;
+
+	dst.p.x = box->x1;
+	v[4] = dst.f;
+	v[9] = v[5] = (box->x1 + tx) * sx;
+
+	dst.p.y = box->y1;
+	v[8] = dst.f;
+	v[10] = (box->y1 + ty) * sy;
+
+	v[11] = v[7] = v[3] = opacity;
+}
+
 fastcall static void
 emit_span_boxes_identity(const struct sna_composite_spans_op *op,
 			 const struct sna_opacity_box *b, int nbox,
@@ -1259,11 +1759,173 @@ emit_span_boxes_identity(const struct sna_composite_spans_op *op,
 	} while (--nbox);
 }
 
+sse4_2 fastcall static void
+emit_span_boxes_identity__sse4_2(const struct sna_composite_spans_op *op,
+				 const struct sna_opacity_box *b, int nbox,
+				 float *v)
+{
+	do {
+		union {
+			struct sna_coordinate p;
+			float f;
+		} dst;
+
+		float sx = op->base.src.scale[0];
+		float sy = op->base.src.scale[1];
+		int16_t tx = op->base.src.offset[0];
+		int16_t ty = op->base.src.offset[1];
+
+		dst.p.x = b->box.x2;
+		dst.p.y = b->box.y2;
+		v[0] = dst.f;
+		v[1] = (b->box.x2 + tx) * sx;
+		v[6] = v[2] = (b->box.y2 + ty) * sy;
+
+		dst.p.x = b->box.x1;
+		v[4] = dst.f;
+		v[9] = v[5] = (b->box.x1 + tx) * sx;
+
+		dst.p.y = b->box.y1;
+		v[8] = dst.f;
+		v[10] = (b->box.y1 + ty) * sy;
+
+		v[11] = v[7] = v[3] = b->alpha;
+
+		v += 12;
+		b++;
+	} while (--nbox);
+}
+
+avx2 fastcall static void
+emit_span_boxes_identity__avx2(const struct sna_composite_spans_op *op,
+			       const struct sna_opacity_box *b, int nbox,
+			       float *v)
+{
+	do {
+		union {
+			struct sna_coordinate p;
+			float f;
+		} dst;
+
+		float sx = op->base.src.scale[0];
+		float sy = op->base.src.scale[1];
+		int16_t tx = op->base.src.offset[0];
+		int16_t ty = op->base.src.offset[1];
+
+		dst.p.x = b->box.x2;
+		dst.p.y = b->box.y2;
+		v[0] = dst.f;
+		v[1] = (b->box.x2 + tx) * sx;
+		v[6] = v[2] = (b->box.y2 + ty) * sy;
+
+		dst.p.x = b->box.x1;
+		v[4] = dst.f;
+		v[9] = v[5] = (b->box.x1 + tx) * sx;
+
+		dst.p.y = b->box.y1;
+		v[8] = dst.f;
+		v[10] = (b->box.y1 + ty) * sy;
+
+		v[11] = v[7] = v[3] = b->alpha;
+
+		v += 12;
+		b++;
+	} while (--nbox);
+}
+
 fastcall static void
 emit_span_simple(struct sna *sna,
-		  const struct sna_composite_spans_op *op,
-		  const BoxRec *box,
-		  float opacity)
+		 const struct sna_composite_spans_op *op,
+		 const BoxRec *box,
+		 float opacity)
+{
+	float *v;
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+
+	float xx = op->base.src.transform->matrix[0][0];
+	float x0 = op->base.src.transform->matrix[0][2];
+	float yy = op->base.src.transform->matrix[1][1];
+	float y0 = op->base.src.transform->matrix[1][2];
+	float sx = op->base.src.scale[0];
+	float sy = op->base.src.scale[1];
+	int16_t tx = op->base.src.offset[0];
+	int16_t ty = op->base.src.offset[1];
+
+	assert(op->base.floats_per_rect == 12);
+	assert((sna->render.vertex_used % 4) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 3*4;
+	assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+	dst.p.x = box->x2;
+	dst.p.y = box->y2;
+	v[0] = dst.f;
+	v[1] = ((box->x2 + tx) * xx + x0) * sx;
+	v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
+
+	dst.p.x = box->x1;
+	v[4] = dst.f;
+	v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx;
+
+	dst.p.y = box->y1;
+	v[8] = dst.f;
+	v[10] = ((box->y1 + ty) * yy + y0) * sy;
+
+	v[11] = v[7] = v[3] = opacity;
+}
+
+sse4_2 fastcall static void
+emit_span_simple__sse4_2(struct sna *sna,
+			 const struct sna_composite_spans_op *op,
+			 const BoxRec *box,
+			 float opacity)
+{
+	float *v;
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+
+	float xx = op->base.src.transform->matrix[0][0];
+	float x0 = op->base.src.transform->matrix[0][2];
+	float yy = op->base.src.transform->matrix[1][1];
+	float y0 = op->base.src.transform->matrix[1][2];
+	float sx = op->base.src.scale[0];
+	float sy = op->base.src.scale[1];
+	int16_t tx = op->base.src.offset[0];
+	int16_t ty = op->base.src.offset[1];
+
+	assert(op->base.floats_per_rect == 12);
+	assert((sna->render.vertex_used % 4) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 3*4;
+	assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+	dst.p.x = box->x2;
+	dst.p.y = box->y2;
+	v[0] = dst.f;
+	v[1] = ((box->x2 + tx) * xx + x0) * sx;
+	v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
+
+	dst.p.x = box->x1;
+	v[4] = dst.f;
+	v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx;
+
+	dst.p.y = box->y1;
+	v[8] = dst.f;
+	v[10] = ((box->y1 + ty) * yy + y0) * sy;
+
+	v[11] = v[7] = v[3] = opacity;
+}
+
+avx2 fastcall static void
+emit_span_simple__avx2(struct sna *sna,
+		       const struct sna_composite_spans_op *op,
+		       const BoxRec *box,
+		       float opacity)
 {
 	float *v;
 	union {
@@ -1344,6 +2006,88 @@ emit_span_boxes_simple(const struct sna_composite_spans_op *op,
 	} while (--nbox);
 }
 
+sse4_2 fastcall static void
+emit_span_boxes_simple__sse4_2(const struct sna_composite_spans_op *op,
+			       const struct sna_opacity_box *b, int nbox,
+			       float *v)
+{
+	float xx = op->base.src.transform->matrix[0][0];
+	float x0 = op->base.src.transform->matrix[0][2];
+	float yy = op->base.src.transform->matrix[1][1];
+	float y0 = op->base.src.transform->matrix[1][2];
+	float sx = op->base.src.scale[0];
+	float sy = op->base.src.scale[1];
+	int16_t tx = op->base.src.offset[0];
+	int16_t ty = op->base.src.offset[1];
+
+	do {
+		union {
+			struct sna_coordinate p;
+			float f;
+		} dst;
+
+		dst.p.x = b->box.x2;
+		dst.p.y = b->box.y2;
+		v[0] = dst.f;
+		v[1] = ((b->box.x2 + tx) * xx + x0) * sx;
+		v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy;
+
+		dst.p.x = b->box.x1;
+		v[4] = dst.f;
+		v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx;
+
+		dst.p.y = b->box.y1;
+		v[8] = dst.f;
+		v[10] = ((b->box.y1 + ty) * yy + y0) * sy;
+
+		v[11] = v[7] = v[3] = b->alpha;
+
+		v += 12;
+		b++;
+	} while (--nbox);
+}
+
+avx2 fastcall static void
+emit_span_boxes_simple__avx2(const struct sna_composite_spans_op *op,
+			     const struct sna_opacity_box *b, int nbox,
+			     float *v)
+{
+	float xx = op->base.src.transform->matrix[0][0];
+	float x0 = op->base.src.transform->matrix[0][2];
+	float yy = op->base.src.transform->matrix[1][1];
+	float y0 = op->base.src.transform->matrix[1][2];
+	float sx = op->base.src.scale[0];
+	float sy = op->base.src.scale[1];
+	int16_t tx = op->base.src.offset[0];
+	int16_t ty = op->base.src.offset[1];
+
+	do {
+		union {
+			struct sna_coordinate p;
+			float f;
+		} dst;
+
+		dst.p.x = b->box.x2;
+		dst.p.y = b->box.y2;
+		v[0] = dst.f;
+		v[1] = ((b->box.x2 + tx) * xx + x0) * sx;
+		v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy;
+
+		dst.p.x = b->box.x1;
+		v[4] = dst.f;
+		v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx;
+
+		dst.p.y = b->box.y1;
+		v[8] = dst.f;
+		v[10] = ((b->box.y1 + ty) * yy + y0) * sy;
+
+		v[11] = v[7] = v[3] = b->alpha;
+
+		v += 12;
+		b++;
+	} while (--nbox);
+}
+
 fastcall static void
 emit_span_affine(struct sna *sna,
 		  const struct sna_composite_spans_op *op,
@@ -1389,6 +2133,96 @@ emit_span_affine(struct sna *sna,
 	v[11] = v[7] = v[3] = opacity;
 }
 
+sse4_2 fastcall static void
+emit_span_affine__sse4_2(struct sna *sna,
+			 const struct sna_composite_spans_op *op,
+			 const BoxRec *box,
+			 float opacity)
+{
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+	float *v;
+
+	assert(op->base.floats_per_rect == 12);
+	assert((sna->render.vertex_used % 4) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 12;
+
+	dst.p.x = box->x2;
+	dst.p.y = box->y2;
+	v[0] = dst.f;
+	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x2,
+				    op->base.src.offset[1] + box->y2,
+				    op->base.src.transform,
+				    op->base.src.scale,
+				    &v[1], &v[2]);
+
+	dst.p.x = box->x1;
+	v[4] = dst.f;
+	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
+				    op->base.src.offset[1] + box->y2,
+				    op->base.src.transform,
+				    op->base.src.scale,
+				    &v[5], &v[6]);
+
+	dst.p.y = box->y1;
+	v[8] = dst.f;
+	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
+				    op->base.src.offset[1] + box->y1,
+				    op->base.src.transform,
+				    op->base.src.scale,
+				    &v[9], &v[10]);
+
+	v[11] = v[7] = v[3] = opacity;
+}
+
+avx2 fastcall static void
+emit_span_affine__avx2(struct sna *sna,
+		       const struct sna_composite_spans_op *op,
+		       const BoxRec *box,
+		       float opacity)
+{
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+	float *v;
+
+	assert(op->base.floats_per_rect == 12);
+	assert((sna->render.vertex_used % 4) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 12;
+
+	dst.p.x = box->x2;
+	dst.p.y = box->y2;
+	v[0] = dst.f;
+	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x2,
+				    op->base.src.offset[1] + box->y2,
+				    op->base.src.transform,
+				    op->base.src.scale,
+				    &v[1], &v[2]);
+
+	dst.p.x = box->x1;
+	v[4] = dst.f;
+	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
+				    op->base.src.offset[1] + box->y2,
+				    op->base.src.transform,
+				    op->base.src.scale,
+				    &v[5], &v[6]);
+
+	dst.p.y = box->y1;
+	v[8] = dst.f;
+	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
+				    op->base.src.offset[1] + box->y1,
+				    op->base.src.transform,
+				    op->base.src.scale,
+				    &v[9], &v[10]);
+
+	v[11] = v[7] = v[3] = opacity;
+}
+
 fastcall static void
 emit_span_boxes_affine(const struct sna_composite_spans_op *op,
 		       const struct sna_opacity_box *b, int nbox,
@@ -1432,11 +2266,161 @@ emit_span_boxes_affine(const struct sna_composite_spans_op *op,
 	} while (--nbox);
 }
 
+sse4_2 fastcall static void
+emit_span_boxes_affine__sse4_2(const struct sna_composite_spans_op *op,
+			       const struct sna_opacity_box *b, int nbox,
+			       float *v)
+{
+	do {
+		union {
+			struct sna_coordinate p;
+			float f;
+		} dst;
+
+		dst.p.x = b->box.x2;
+		dst.p.y = b->box.y2;
+		v[0] = dst.f;
+		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
+					    op->base.src.offset[1] + b->box.y2,
+					    op->base.src.transform,
+					    op->base.src.scale,
+					    &v[1], &v[2]);
+
+		dst.p.x = b->box.x1;
+		v[4] = dst.f;
+		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
+					    op->base.src.offset[1] + b->box.y2,
+					    op->base.src.transform,
+					    op->base.src.scale,
+					    &v[5], &v[6]);
+
+		dst.p.y = b->box.y1;
+		v[8] = dst.f;
+		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
+					    op->base.src.offset[1] + b->box.y1,
+					    op->base.src.transform,
+					    op->base.src.scale,
+					    &v[9], &v[10]);
+
+		v[11] = v[7] = v[3] = b->alpha;
+
+		v += 12;
+		b++;
+	} while (--nbox);
+}
+
+avx2 fastcall static void
+emit_span_boxes_affine__avx2(const struct sna_composite_spans_op *op,
+			     const struct sna_opacity_box *b, int nbox,
+			     float *v)
+{
+	do {
+		union {
+			struct sna_coordinate p;
+			float f;
+		} dst;
+
+		dst.p.x = b->box.x2;
+		dst.p.y = b->box.y2;
+		v[0] = dst.f;
+		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
+					    op->base.src.offset[1] + b->box.y2,
+					    op->base.src.transform,
+					    op->base.src.scale,
+					    &v[1], &v[2]);
+
+		dst.p.x = b->box.x1;
+		v[4] = dst.f;
+		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
+					    op->base.src.offset[1] + b->box.y2,
+					    op->base.src.transform,
+					    op->base.src.scale,
+					    &v[5], &v[6]);
+
+		dst.p.y = b->box.y1;
+		v[8] = dst.f;
+		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
+					    op->base.src.offset[1] + b->box.y1,
+					    op->base.src.transform,
+					    op->base.src.scale,
+					    &v[9], &v[10]);
+
+		v[11] = v[7] = v[3] = b->alpha;
+
+		v += 12;
+		b++;
+	} while (--nbox);
+}
+
 fastcall static void
 emit_span_linear(struct sna *sna,
-		  const struct sna_composite_spans_op *op,
-		  const BoxRec *box,
-		  float opacity)
+		 const struct sna_composite_spans_op *op,
+		 const BoxRec *box,
+		 float opacity)
+{
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+	float *v;
+
+	assert(op->base.floats_per_rect == 9);
+	assert((sna->render.vertex_used % 3) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 9;
+
+	dst.p.x = box->x2;
+	dst.p.y = box->y2;
+	v[0] = dst.f;
+	dst.p.x = box->x1;
+	v[3] = dst.f;
+	dst.p.y = box->y1;
+	v[6] = dst.f;
+
+	v[1] = compute_linear(&op->base.src, box->x2, box->y2);
+	v[4] = compute_linear(&op->base.src, box->x1, box->y2);
+	v[7] = compute_linear(&op->base.src, box->x1, box->y1);
+
+	v[8] = v[5] = v[2] = opacity;
+}
+
+sse4_2 fastcall static void
+emit_span_linear__sse4_2(struct sna *sna,
+			 const struct sna_composite_spans_op *op,
+			 const BoxRec *box,
+			 float opacity)
+{
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+	float *v;
+
+	assert(op->base.floats_per_rect == 9);
+	assert((sna->render.vertex_used % 3) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 9;
+
+	dst.p.x = box->x2;
+	dst.p.y = box->y2;
+	v[0] = dst.f;
+	dst.p.x = box->x1;
+	v[3] = dst.f;
+	dst.p.y = box->y1;
+	v[6] = dst.f;
+
+	v[1] = compute_linear(&op->base.src, box->x2, box->y2);
+	v[4] = compute_linear(&op->base.src, box->x1, box->y2);
+	v[7] = compute_linear(&op->base.src, box->x1, box->y1);
+
+	v[8] = v[5] = v[2] = opacity;
+}
+
+avx2 fastcall static void
+emit_span_linear__avx2(struct sna *sna,
+		       const struct sna_composite_spans_op *op,
+		       const BoxRec *box,
+		       float opacity)
 {
 	union {
 		struct sna_coordinate p;
@@ -1494,6 +2478,66 @@ emit_span_boxes_linear(const struct sna_composite_spans_op *op,
 	} while (--nbox);
 }
 
+sse4_2 fastcall static void
+emit_span_boxes_linear__sse4_2(const struct sna_composite_spans_op *op,
+			       const struct sna_opacity_box *b, int nbox,
+			       float *v)
+{
+	do {
+		union {
+			struct sna_coordinate p;
+			float f;
+		} dst;
+
+		dst.p.x = b->box.x2;
+		dst.p.y = b->box.y2;
+		v[0] = dst.f;
+		dst.p.x = b->box.x1;
+		v[3] = dst.f;
+		dst.p.y = b->box.y1;
+		v[6] = dst.f;
+
+		v[1] = compute_linear(&op->base.src, b->box.x2, b->box.y2);
+		v[4] = compute_linear(&op->base.src, b->box.x1, b->box.y2);
+		v[7] = compute_linear(&op->base.src, b->box.x1, b->box.y1);
+
+		v[8] = v[5] = v[2] = b->alpha;
+
+		v += 9;
+		b++;
+	} while (--nbox);
+}
+
+avx2 fastcall static void
+emit_span_boxes_linear__avx2(const struct sna_composite_spans_op *op,
+			     const struct sna_opacity_box *b, int nbox,
+			     float *v)
+{
+	do {
+		union {
+			struct sna_coordinate p;
+			float f;
+		} dst;
+
+		dst.p.x = b->box.x2;
+		dst.p.y = b->box.y2;
+		v[0] = dst.f;
+		dst.p.x = b->box.x1;
+		v[3] = dst.f;
+		dst.p.y = b->box.y1;
+		v[6] = dst.f;
+
+		v[1] = compute_linear(&op->base.src, b->box.x2, b->box.y2);
+		v[4] = compute_linear(&op->base.src, b->box.x1, b->box.y2);
+		v[7] = compute_linear(&op->base.src, b->box.x1, b->box.y1);
+
+		v[8] = v[5] = v[2] = b->alpha;
+
+		v += 9;
+		b++;
+	} while (--nbox);
+}
+
 inline inline static uint32_t
 gen4_choose_spans_vertex_buffer(const struct sna_composite_op *op)
 {
@@ -1502,7 +2546,8 @@ gen4_choose_spans_vertex_buffer(const struct sna_composite_op *op)
 	return 1 << 2 | id;
 }
 
-unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp)
+unsigned gen4_choose_spans_emitter(struct sna *sna,
+				   struct sna_composite_spans_op *tmp)
 {
 	unsigned vb;
 
@@ -1512,24 +2557,56 @@ unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp)
 		tmp->base.floats_per_vertex = 3;
 		vb = 1 << 2 | 1;
 	} else if (tmp->base.src.is_linear) {
-		tmp->prim_emit = emit_span_linear;
-		tmp->emit_boxes = emit_span_boxes_linear;
+		if (sna->cpu_features & AVX2) {
+			tmp->prim_emit = emit_span_linear__avx2;
+			tmp->emit_boxes = emit_span_boxes_linear__avx2;
+		} else if (sna->cpu_features & SSE4_2) {
+			tmp->prim_emit = emit_span_linear__sse4_2;
+			tmp->emit_boxes = emit_span_boxes_linear__sse4_2;
+		} else {
+			tmp->prim_emit = emit_span_linear;
+			tmp->emit_boxes = emit_span_boxes_linear;
+		}
 		tmp->base.floats_per_vertex = 3;
 		vb = 1 << 2 | 1;
 	} else if (tmp->base.src.transform == NULL) {
-		tmp->prim_emit = emit_span_identity;
-		tmp->emit_boxes = emit_span_boxes_identity;
+		if (sna->cpu_features & AVX2) {
+			tmp->prim_emit = emit_span_identity__avx2;
+			tmp->emit_boxes = emit_span_boxes_identity__avx2;
+		} else if (sna->cpu_features & SSE4_2) {
+			tmp->prim_emit = emit_span_identity__sse4_2;
+			tmp->emit_boxes = emit_span_boxes_identity__sse4_2;
+		} else {
+			tmp->prim_emit = emit_span_identity;
+			tmp->emit_boxes = emit_span_boxes_identity;
+		}
 		tmp->base.floats_per_vertex = 4;
 		vb = 1 << 2 | 2;
 	} else if (tmp->base.is_affine) {
 		tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
 		tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
 		if (!sna_affine_transform_is_rotation(tmp->base.src.transform)) {
-			tmp->prim_emit = emit_span_simple;
-			tmp->emit_boxes = emit_span_boxes_simple;
+			if (sna->cpu_features & AVX2) {
+				tmp->prim_emit = emit_span_simple__avx2;
+				tmp->emit_boxes = emit_span_boxes_simple__avx2;
+			} else if (sna->cpu_features & SSE4_2) {
+				tmp->prim_emit = emit_span_simple__sse4_2;
+				tmp->emit_boxes = emit_span_boxes_simple__sse4_2;
+			} else {
+				tmp->prim_emit = emit_span_simple;
+				tmp->emit_boxes = emit_span_boxes_simple;
+			}
 		} else {
-			tmp->prim_emit = emit_span_affine;
-			tmp->emit_boxes = emit_span_boxes_affine;
+			if (sna->cpu_features & AVX2) {
+				tmp->prim_emit = emit_span_affine__avx2;
+				tmp->emit_boxes = emit_span_boxes_affine__avx2;
+			} else if (sna->cpu_features & SSE4_2) {
+				tmp->prim_emit = emit_span_affine__sse4_2;
+				tmp->emit_boxes = emit_span_boxes_affine__sse4_2;
+			} else {
+				tmp->prim_emit = emit_span_affine;
+				tmp->emit_boxes = emit_span_boxes_affine;
+			}
 		}
 		tmp->base.floats_per_vertex = 4;
 		vb = 1 << 2 | 2;
diff --git a/src/sna/gen4_vertex.h b/src/sna/gen4_vertex.h
index 431b545e..1494ba14 100644
--- a/src/sna/gen4_vertex.h
+++ b/src/sna/gen4_vertex.h
@@ -10,7 +10,7 @@ void gen4_vertex_flush(struct sna *sna);
 int gen4_vertex_finish(struct sna *sna);
 void gen4_vertex_close(struct sna *sna);
 
-unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp);
-unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp);
+unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp);
+unsigned gen4_choose_spans_emitter(struct sna *sna, struct sna_composite_spans_op *tmp);
 
 #endif /* GEN4_VERTEX_H */
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index f2368773..8b9eaac1 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -1924,7 +1924,7 @@ gen5_render_composite(struct sna *sna,
 					     tmp->mask.bo != NULL,
 					     tmp->has_component_alpha,
 					     tmp->is_affine);
-	tmp->u.gen5.ve_id = gen4_choose_composite_emitter(tmp);
+	tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp);
 
 	tmp->blt   = gen5_render_composite_blt;
 	tmp->box   = gen5_render_composite_box;
@@ -2152,7 +2152,7 @@ gen5_render_composite_spans(struct sna *sna,
 	tmp->base.has_component_alpha = false;
 	tmp->base.need_magic_ca_pass = false;
 
-	tmp->base.u.gen5.ve_id = gen4_choose_spans_emitter(tmp);
+	tmp->base.u.gen5.ve_id = gen4_choose_spans_emitter(sna, tmp);
 	tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
 
 	tmp->box   = gen5_render_composite_spans_box;
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index fa4c47b1..d4105141 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2272,7 +2272,7 @@ gen6_render_composite(struct sna *sna,
 							    tmp->mask.bo != NULL,
 							    tmp->has_component_alpha,
 							    tmp->is_affine),
-			       gen4_choose_composite_emitter(tmp));
+			       gen4_choose_composite_emitter(sna, tmp));
 
 	tmp->blt   = gen6_render_composite_blt;
 	tmp->box   = gen6_render_composite_box;
@@ -2508,7 +2508,7 @@ gen6_render_composite_spans(struct sna *sna,
 					      SAMPLER_EXTEND_PAD),
 			       gen6_get_blend(tmp->base.op, false, tmp->base.dst.format),
 			       GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine,
-			       gen4_choose_spans_emitter(tmp));
+			       gen4_choose_spans_emitter(sna, tmp));
 
 	tmp->box   = gen6_render_composite_spans_box;
 	tmp->boxes = gen6_render_composite_spans_boxes;
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index bd14d90f..7984cf19 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2412,7 +2412,7 @@ gen7_render_composite(struct sna *sna,
 							    tmp->mask.bo != NULL,
 							    tmp->has_component_alpha,
 							    tmp->is_affine),
-			       gen4_choose_composite_emitter(tmp));
+			       gen4_choose_composite_emitter(sna, tmp));
 
 	tmp->blt   = gen7_render_composite_blt;
 	tmp->box   = gen7_render_composite_box;
@@ -2628,7 +2628,7 @@ gen7_render_composite_spans(struct sna *sna,
 					      SAMPLER_EXTEND_PAD),
 			       gen7_get_blend(tmp->base.op, false, tmp->base.dst.format),
 			       GEN7_WM_KERNEL_OPACITY | !tmp->base.is_affine,
-			       gen4_choose_spans_emitter(tmp));
+			       gen4_choose_spans_emitter(sna, tmp));
 
 	tmp->box   = gen7_render_composite_spans_box;
 	tmp->boxes = gen7_render_composite_spans_boxes;
author	Chris Wilson <chris@chris-wilson.co.uk>	2013-02-26 00:02:16 +0000
committer	Chris Wilson <chris@chris-wilson.co.uk>	2013-02-26 00:03:28 +0000
commit	94b95cc2fc63457903a7b7b6eaa09bc27f25750c (patch)
tree	a8bdd1dbb1cb5f1381df4658c4c599fee2a0c5d3 /src/sna
parent	f095678125b25aeae80d838729a7f89d09007e10 (diff)