summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2013-01-26 14:41:04 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2013-01-27 13:06:46 +0000
commit8178cff5718e69e14d3953a7f754d7585a06838f (patch)
tree563a6e806e898ed928c6ba4cf683f1cb990c46c5
parent8ffb3f50b3b4601401da76e2848e059ab63231f4 (diff)
sna: Begin sketching out a threaded rasteriser for spans
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--configure.ac34
-rw-r--r--src/sna/Makefile.am2
-rw-r--r--src/sna/atomic.h89
-rw-r--r--src/sna/compiler.h2
-rw-r--r--src/sna/gen3_render.c323
-rw-r--r--src/sna/gen4_render.c50
-rw-r--r--src/sna/gen4_vertex.c229
-rw-r--r--src/sna/gen5_render.c50
-rw-r--r--src/sna/gen6_render.c56
-rw-r--r--src/sna/gen7_render.c50
-rw-r--r--src/sna/kgem_debug_gen6.c4
-rw-r--r--src/sna/sna.h1
-rw-r--r--src/sna/sna_render.c2
-rw-r--r--src/sna/sna_render.h50
-rw-r--r--src/sna/sna_trapezoids.c246
-rw-r--r--src/sna/sna_vertex.c37
16 files changed, 1174 insertions, 51 deletions
diff --git a/configure.ac b/configure.ac
index cb1496b4..46affdcd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -104,6 +104,40 @@ if test x$ASM != "xno"; then
fi
AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes)
+# Check for atomic intrinsics
+AC_CACHE_CHECK([for native atomic primitives], intel_cv_atomic_primitives,
+[
+ intel_cv_atomic_primitives="none"
+
+ AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+int atomic_add(int i) { return __sync_fetch_and_add (&i, 1); }
+int atomic_cmpxchg(int i, int j, int k) { return __sync_val_compare_and_swap (&i, j, k); }
+ ]],[[]])],
+ [intel_cv_atomic_primitives="Intel"],[])
+
+ if test "x$intel_cv_atomic_primitives" = "xnone"; then
+ AC_CHECK_HEADER([atomic_ops.h], intel_cv_atomic_primitives="libatomic-ops")
+ fi
+
+ # atomic functions defined in <atomic.h> & libc on Solaris
+ if test "x$intel_cv_atomic_primitives" = "xnone"; then
+ AC_CHECK_FUNC([atomic_cas_uint],
+ intel_cv_atomic_primitives="Solaris")
+ fi
+
+])
+if test "x$intel_cv_atomic_primitives" = xIntel; then
+ AC_DEFINE(HAVE_ATOMIC_PRIMITIVES, 1,
+ [Enable if your compiler supports the Intel __sync_* atomic primitives])
+fi
+if test "x$intel_cv_atomic_primitives" = "xlibatomic-ops"; then
+ AC_DEFINE(HAVE_LIB_ATOMIC_OPS, 1, [Enable if you have libatomic-ops-dev installed])
+fi
+
+if test "x$intel_cv_atomic_primitives" = "xnone"; then
+ AC_MSG_ERROR([xf86-video-intel depends upon atomic operations, which were not found for your compiler/cpu. Try compiling with -march=native, or install the libatomics-op-dev package.])
+fi
+
AC_ARG_ENABLE(udev,
AS_HELP_STRING([--disable-udev],
[Disable udev-based monitor hotplug detection [default=auto]]),
diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am
index bfa836f6..c74c904d 100644
--- a/src/sna/Makefile.am
+++ b/src/sna/Makefile.am
@@ -38,6 +38,7 @@ libsna_la_LDFLAGS = -pthread
libsna_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@ brw/libbrw.la fb/libfb.la
libsna_la_SOURCES = \
+ atomic.h \
blt.c \
compiler.h \
kgem.c \
@@ -64,6 +65,7 @@ libsna_la_SOURCES = \
sna_tiling.c \
sna_transform.c \
sna_threads.c \
+ sna_vertex.c \
sna_video.c \
sna_video.h \
sna_video_overlay.c \
diff --git a/src/sna/atomic.h b/src/sna/atomic.h
new file mode 100644
index 00000000..306dc6db
--- /dev/null
+++ b/src/sna/atomic.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifndef ATOMIC_H
+#define ATOMIC_H
+
+#if HAVE_ATOMIC_PRIMITIVES
+
+#define HAS_ATOMIC_OPS 1
+
+typedef struct {
+ int atomic;
+} atomic_t;
+
+# define atomic_read(x) ((x)->atomic)
+# define atomic_set(x, val) ((x)->atomic = (val))
+# define atomic_inc(x) ((void) __sync_fetch_and_add (&(x)->atomic, 1))
+# define atomic_dec_and_test(x) (__sync_fetch_and_add (&(x)->atomic, -1) == 1)
+# define atomic_add(x, v) ((void) __sync_add_and_fetch(&(x)->atomic, (v)))
+# define atomic_dec(x, v) ((void) __sync_sub_and_fetch(&(x)->atomic, (v)))
+# define atomic_cmpxchg(x, oldv, newv) __sync_val_compare_and_swap (&(x)->atomic, oldv, newv)
+
+#endif
+
+#if HAVE_LIB_ATOMIC_OPS
+#include <atomic_ops.h>
+
+#define HAS_ATOMIC_OPS 1
+
+typedef struct {
+ AO_t atomic;
+} atomic_t;
+
+# define atomic_read(x) AO_load_full(&(x)->atomic)
+# define atomic_set(x, val) AO_store_full(&(x)->atomic, (val))
+# define atomic_inc(x) ((void) AO_fetch_and_add1_full(&(x)->atomic))
+# define atomic_add(x, v) ((void) AO_fetch_and_add_full(&(x)->atomic, (v)))
+# define atomic_dec(x, v) ((void) AO_fetch_and_add_full(&(x)->atomic, -(v)))
+# define atomic_dec_and_test(x) (AO_fetch_and_sub1_full(&(x)->atomic) == 1)
+# define atomic_cmpxchg(x, oldv, newv) AO_compare_and_swap_full(&(x)->atomic, oldv, newv)
+
+#endif
+
+#if defined(__sun) && !defined(HAS_ATOMIC_OPS) /* Solaris & OpenSolaris */
+
+#include <sys/atomic.h>
+#define HAS_ATOMIC_OPS 1
+
+typedef struct { uint_t atomic; } atomic_t;
+
+# define atomic_read(x) (int) ((x)->atomic)
+# define atomic_set(x, val) ((x)->atomic = (uint_t)(val))
+# define atomic_inc(x) (atomic_inc_uint (&(x)->atomic))
+# define atomic_dec_and_test(x) (atomic_dec_uint_nv(&(x)->atomic) == 1)
+# define atomic_add(x, v) (atomic_add_int(&(x)->atomic, (v)))
+# define atomic_dec(x, v) (atomic_add_int(&(x)->atomic, -(v)))
+# define atomic_cmpxchg(x, oldv, newv) atomic_cas_uint (&(x)->atomic, oldv, newv)
+
+#endif
+
+#if ! HAS_ATOMIC_OPS
+#error xf86-video-intel requires atomic operations, please define them for your CPU/compiler.
+#endif
+
+#endif
diff --git a/src/sna/compiler.h b/src/sna/compiler.h
index ff80365e..b985f2bc 100644
--- a/src/sna/compiler.h
+++ b/src/sna/compiler.h
@@ -36,6 +36,7 @@
#define fastcall __attribute__((regparm(3)))
#define must_check __attribute__((warn_unused_result))
#define constant __attribute__((const))
+#define __packed__ __attribute__((__packed__))
#else
#define likely(expr) (expr)
#define unlikely(expr) (expr)
@@ -44,6 +45,7 @@
#define fastcall
#define must_check
#define constant
+#define __packed__
#endif
#ifdef HAVE_VALGRIND
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 01c0aeef..3224d717 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -1618,6 +1618,8 @@ static int gen3_vertex_finish(struct sna *sna)
assert(sna->render.vertex_used);
assert(sna->render.vertex_used <= sna->render.vertex_size);
+ sna_vertex_wait__locked(&sna->render);
+
bo = sna->render.vbo;
if (bo) {
DBG(("%s: reloc = %d\n", __FUNCTION__,
@@ -1796,6 +1798,17 @@ static int gen3_get_rectangles__flush(struct sna *sna,
}
}
+ /* Preventing discarding new vbo after lock contention */
+ if (sna->render.active) {
+ int rem;
+
+ sna_vertex_wait__locked(&sna->render);
+
+ rem = vertex_space(sna);
+ if (rem > op->floats_per_rect)
+ return rem;
+ }
+
return gen3_vertex_finish(sna);
}
@@ -1838,6 +1851,7 @@ flush:
gen3_vertex_flush(sna);
gen3_magic_ca_pass(sna, op);
}
+ gen3_vertex_finish(sna);
_kgem_submit(&sna->kgem);
gen3_emit_composite_state(sna, op);
assert(sna->render.vertex_offset == 0);
@@ -3081,6 +3095,26 @@ gen3_emit_composite_spans_primitive_zero(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_zero__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox, float *v)
+{
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[1] = op->base.dst.y + b->box.y2;
+
+ v[2] = op->base.dst.x + b->box.x1;
+ v[3] = v[1];
+
+ v[4] = v[2];
+ v[5] = op->base.dst.x + b->box.y1;
+
+ v += 6;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3096,6 +3130,22 @@ gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_zero_no_offset__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox, float *v)
+{
+ do {
+ v[0] = b->box.x2;
+ v[3] = v[1] = b->box.y2;
+ v[4] = v[2] = b->box.x1;
+ v[5] = b->box.y1;
+
+ b++;
+ v += 6;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_constant(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3112,6 +3162,24 @@ gen3_emit_composite_spans_primitive_constant(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_constant__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox,
+ float *v)
+{
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[6] = v[3] = op->base.dst.x + b->box.x1;
+ v[4] = v[1] = op->base.dst.y + b->box.y2;
+ v[7] = op->base.dst.y + b->box.y1;
+ v[8] = v[5] = v[2] = b->alpha;
+
+ v += 9;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3128,6 +3196,23 @@ gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_constant_no_offset__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox, float *v)
+{
+ do {
+ v[0] = b->box.x2;
+ v[6] = v[3] = b->box.x1;
+ v[4] = v[1] = b->box.y2;
+ v[7] = b->box.y1;
+ v[8] = v[5] = v[2] = b->alpha;
+
+ v += 9;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_identity_source(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3156,6 +3241,36 @@ gen3_emit_composite_spans_primitive_identity_source(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_identity_source__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox,
+ float *v)
+{
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[1] = op->base.dst.y + b->box.y2;
+ v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0];
+ v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1];
+ v[4] = b->alpha;
+
+ v[5] = op->base.dst.x + b->box.x1;
+ v[6] = v[1];
+ v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0];
+ v[8] = v[3];
+ v[9] = b->alpha;
+
+ v[10] = v[5];
+ v[11] = op->base.dst.y + b->box.y1;
+ v[12] = v[7];
+ v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1];
+ v[14] = b->alpha;
+
+ v += 15;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_affine_source(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3190,6 +3305,40 @@ gen3_emit_composite_spans_primitive_affine_source(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_affine_source__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox,
+ float *v)
+{
+ PictTransform *transform = op->base.src.transform;
+
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[6] = v[1] = op->base.dst.y + b->box.y2;
+ v[10] = v[5] = op->base.dst.x + b->box.x1;
+ v[11] = op->base.dst.y + b->box.y1;
+ v[14] = v[9] = v[4] = b->alpha;
+
+ _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2,
+ (int)op->base.src.offset[1] + b->box.y2,
+ transform, op->base.src.scale,
+ &v[2], &v[3]);
+
+ _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
+ (int)op->base.src.offset[1] + b->box.y2,
+ transform, op->base.src.scale,
+ &v[7], &v[8]);
+
+ _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
+ (int)op->base.src.offset[1] + b->box.y1,
+ transform, op->base.src.scale,
+ &v[12], &v[13]);
+ v += 15;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3218,6 +3367,36 @@ gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_identity_gradient__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox,
+ float *v)
+{
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[1] = op->base.dst.y + b->box.y2;
+ v[2] = op->base.src.offset[0] + b->box.x2;
+ v[3] = op->base.src.offset[1] + b->box.y2;
+ v[4] = b->alpha;
+
+ v[5] = op->base.dst.x + b->box.x1;
+ v[6] = v[1];
+ v[7] = op->base.src.offset[0] + b->box.x1;
+ v[8] = v[3];
+ v[9] = b->alpha;
+
+ v[10] = v[5];
+ v[11] = op->base.dst.y + b->box.y1;
+ v[12] = v[7];
+ v[13] = op->base.src.offset[1] + b->box.y1;
+ v[14] = b->alpha;
+
+ v += 15;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3253,6 +3432,43 @@ gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna,
}
fastcall static void
+gen3_emit_composite_spans_primitive_affine_gradient__boxes(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox,
+ float *v)
+{
+ PictTransform *transform = op->base.src.transform;
+
+ do {
+ v[0] = op->base.dst.x + b->box.x2;
+ v[1] = op->base.dst.y + b->box.y2;
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + b->box.x2,
+ (int)op->base.src.offset[1] + b->box.y2,
+ transform,
+ &v[2], &v[3]);
+ v[4] = b->alpha;
+
+ v[5] = op->base.dst.x + b->box.x1;
+ v[6] = v[1];
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + b->box.x1,
+ (int)op->base.src.offset[1] + b->box.y2,
+ transform,
+ &v[7], &v[8]);
+ v[9] = b->alpha;
+
+ v[10] = v[5];
+ v[11] = op->base.dst.y + b->box.y1;
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + b->box.x1,
+ (int)op->base.src.offset[1] + b->box.y1,
+ transform,
+ &v[12], &v[13]);
+ v[14] = b->alpha;
+ v += 15;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
gen3_emit_composite_spans_primitive(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
@@ -3297,6 +3513,48 @@ gen3_render_composite_spans_constant_box(struct sna *sna,
}
fastcall static void
+gen3_render_composite_spans_constant_thread_boxes(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * 9;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ do {
+ v[0] = box->box.x2;
+ v[6] = v[3] = box->box.x1;
+ v[4] = v[1] = box->box.y2;
+ v[7] = box->box.y1;
+ v[8] = v[5] = v[2] = box->alpha;
+ v += 9;
+ box++;
+ } while (--nbox_this_time);
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen3_render_composite_spans_box(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, float opacity)
@@ -3344,6 +3602,41 @@ gen3_render_composite_spans_boxes(struct sna *sna,
}
fastcall static void
+gen3_render_composite_spans_boxes__thread(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen3_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
@@ -3447,40 +3740,58 @@ gen3_render_composite_spans(struct sna *sna,
no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0;
tmp->box = gen3_render_composite_spans_box;
tmp->boxes = gen3_render_composite_spans_boxes;
+ tmp->thread_boxes = gen3_render_composite_spans_boxes__thread;
tmp->done = gen3_render_composite_spans_done;
tmp->prim_emit = gen3_emit_composite_spans_primitive;
switch (tmp->base.src.u.gen3.type) {
case SHADER_NONE:
assert(0);
case SHADER_ZERO:
- tmp->prim_emit = no_offset ? gen3_emit_composite_spans_primitive_zero_no_offset : gen3_emit_composite_spans_primitive_zero;
+ if (no_offset) {
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_zero_no_offset;
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero_no_offset__boxes;
+ } else {
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_zero;
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero__boxes;
+ }
break;
case SHADER_BLACK:
case SHADER_WHITE:
case SHADER_CONSTANT:
if (no_offset) {
tmp->box = gen3_render_composite_spans_constant_box;
+ tmp->thread_boxes = gen3_render_composite_spans_constant_thread_boxes;
tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset;
- } else
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant_no_offset__boxes;
+ } else {
tmp->prim_emit = gen3_emit_composite_spans_primitive_constant;
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__boxes;
+ }
break;
case SHADER_LINEAR:
case SHADER_RADIAL:
- if (tmp->base.src.transform == NULL)
+ if (tmp->base.src.transform == NULL) {
tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient;
- else if (tmp->base.src.is_affine)
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__boxes;
+ } else if (tmp->base.src.is_affine) {
tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient;
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__boxes;
+ }
break;
case SHADER_TEXTURE:
- if (tmp->base.src.transform == NULL)
+ if (tmp->base.src.transform == NULL) {
tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source;
- else if (tmp->base.src.is_affine) {
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__boxes;
+ } else if (tmp->base.src.is_affine) {
tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source;
+ tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__boxes;
}
break;
}
+ if (tmp->emit_boxes == NULL)
+ tmp->thread_boxes = NULL;
tmp->base.mask.bo = NULL;
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index d2f3fff1..65016cd1 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -621,6 +621,17 @@ static int gen4_get_rectangles__flush(struct sna *sna,
op->u.gen4.wm_kernel);
}
+ /* Preventing discarding new vbo after lock contention */
+ if (sna->render.active) {
+ int rem;
+
+ sna_vertex_wait__locked(&sna->render);
+
+ rem = vertex_space(sna);
+ if (rem > op->floats_per_rect)
+ return rem;
+ }
+
return gen4_vertex_finish(sna);
}
@@ -656,6 +667,7 @@ flush:
gen4_vertex_flush(sna);
gen4_magic_ca_pass(sna, op);
}
+ gen4_vertex_finish(sna);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
@@ -1966,6 +1978,42 @@ gen4_render_composite_spans_boxes(struct sna *sna,
}
fastcall static void
+gen4_render_composite_spans_boxes__thread(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox,
+ gen4_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen4_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
@@ -2080,6 +2128,8 @@ gen4_render_composite_spans(struct sna *sna,
tmp->box = gen4_render_composite_spans_box;
tmp->boxes = gen4_render_composite_spans_boxes;
+ if (tmp->emit_boxes)
+ tmp->thread_boxes = gen4_render_composite_spans_boxes__thread;
tmp->done = gen4_render_composite_spans_done;
if (!kgem_check_bo(&sna->kgem,
diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index 4e404670..cc679d38 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -36,12 +36,13 @@
void gen4_vertex_flush(struct sna *sna)
{
- assert(sna->render.vertex_offset);
- assert(sna->render.vertex_index > sna->render.vertex_start);
-
DBG(("%s[%x] = %d\n", __FUNCTION__,
4*sna->render.vertex_offset,
sna->render.vertex_index - sna->render.vertex_start));
+
+ assert(sna->render.vertex_offset);
+ assert(sna->render.vertex_index > sna->render.vertex_start);
+
sna->kgem.batch[sna->render.vertex_offset] =
sna->render.vertex_index - sna->render.vertex_start;
sna->render.vertex_offset = 0;
@@ -58,6 +59,8 @@ int gen4_vertex_finish(struct sna *sna)
assert(sna->render.vertex_offset == 0);
assert(sna->render.vertex_used);
+ sna_vertex_wait__locked(&sna->render);
+
/* Note: we only need dword alignment (currently) */
bo = sna->render.vbo;
@@ -73,6 +76,7 @@ int gen4_vertex_finish(struct sna *sna)
0);
}
+ assert(!sna->render.active);
sna->render.nvertex_reloc = 0;
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
@@ -87,6 +91,7 @@ int gen4_vertex_finish(struct sna *sna)
hint |= CREATE_CACHED | CREATE_NO_THROTTLE;
size = 256*1024;
+ assert(!sna->render.active);
sna->render.vertices = NULL;
sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
while (sna->render.vbo == NULL && size > 16*1024) {
@@ -144,6 +149,8 @@ void gen4_vertex_close(struct sna *sna)
__FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0,
sna->render.vb_id, sna->render.nvertex_reloc));
+ assert(!sna->render.active);
+
bo = sna->render.vbo;
if (bo) {
if (sna->render.vertex_size - sna->render.vertex_used < 64) {
@@ -205,6 +212,7 @@ void gen4_vertex_close(struct sna *sna)
sna->render.vb_id = 0;
if (sna->render.vbo == NULL) {
+ assert(!sna->render.active);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
assert(sna->render.vertices == sna->render.vertex_data);
@@ -853,7 +861,7 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
}
inline static void
-emit_spans_vertex(struct sna *sna,
+emit_span_vertex(struct sna *sna,
const struct sna_composite_spans_op *op,
int16_t x, int16_t y)
{
@@ -867,18 +875,18 @@ emit_composite_spans_primitive(struct sna *sna,
const BoxRec *box,
float opacity)
{
- emit_spans_vertex(sna, op, box->x2, box->y2);
+ emit_span_vertex(sna, op, box->x2, box->y2);
OUT_VERTEX_F(opacity);
- emit_spans_vertex(sna, op, box->x1, box->y2);
+ emit_span_vertex(sna, op, box->x1, box->y2);
OUT_VERTEX_F(opacity);
- emit_spans_vertex(sna, op, box->x1, box->y1);
+ emit_span_vertex(sna, op, box->x1, box->y1);
OUT_VERTEX_F(opacity);
}
fastcall static void
-emit_spans_solid(struct sna *sna,
+emit_span_solid(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
@@ -909,7 +917,36 @@ emit_spans_solid(struct sna *sna,
}
fastcall static void
-emit_spans_identity(struct sna *sna,
+emit_span_boxes_solid(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b,
+ int nbox, float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = b->box.x2;
+ dst.p.y = b->box.y2;
+ v[0] = dst.f;
+
+ dst.p.x = b->box.x1;
+ v[3] = dst.f;
+
+ dst.p.y = b->box.y1;
+ v[6] = dst.f;
+
+ v[7] = v[4] = v[1] = .5;
+ v[8] = v[5] = v[2] = b->alpha;
+
+ v += 9;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_span_identity(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
@@ -949,7 +986,43 @@ emit_spans_identity(struct sna *sna,
}
fastcall static void
-emit_spans_simple(struct sna *sna,
+emit_span_boxes_identity(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ float sx = op->base.src.scale[0];
+ float sy = op->base.src.scale[1];
+ int16_t tx = op->base.src.offset[0];
+ int16_t ty = op->base.src.offset[1];
+
+ dst.p.x = b->box.x2;
+ dst.p.y = b->box.y2;
+ v[0] = dst.f;
+ v[1] = (b->box.x2 + tx) * sx;
+ v[6] = v[2] = (b->box.y2 + ty) * sy;
+
+ dst.p.x = b->box.x1;
+ v[4] = dst.f;
+ v[9] = v[5] = (b->box.x1 + tx) * sx;
+
+ dst.p.y = b->box.y1;
+ v[8] = dst.f;
+ v[10] = (b->box.y1 + ty) * sy;
+
+ v[11] = v[7] = v[3] = b->alpha;
+ v += 12;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_span_simple(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
@@ -993,7 +1066,47 @@ emit_spans_simple(struct sna *sna,
}
fastcall static void
-emit_spans_affine(struct sna *sna,
+emit_span_boxes_simple(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ float xx = op->base.src.transform->matrix[0][0];
+ float x0 = op->base.src.transform->matrix[0][2];
+ float yy = op->base.src.transform->matrix[1][1];
+ float y0 = op->base.src.transform->matrix[1][2];
+ float sx = op->base.src.scale[0];
+ float sy = op->base.src.scale[1];
+ int16_t tx = op->base.src.offset[0];
+ int16_t ty = op->base.src.offset[1];
+
+ dst.p.x = b->box.x2;
+ dst.p.y = b->box.y2;
+ v[0] = dst.f;
+ v[1] = ((b->box.x2 + tx) * xx + x0) * sx;
+ v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy;
+
+ dst.p.x = b->box.x1;
+ v[4] = dst.f;
+ v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx;
+
+ dst.p.y = b->box.y1;
+ v[8] = dst.f;
+ v[10] = ((b->box.y1 + ty) * yy + y0) * sy;
+
+ v[11] = v[7] = v[3] = b->alpha;
+ v += 9;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_span_affine(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
@@ -1038,7 +1151,50 @@ emit_spans_affine(struct sna *sna,
}
fastcall static void
-emit_spans_linear(struct sna *sna,
+emit_span_boxes_affine(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = b->box.x2;
+ dst.p.y = b->box.y2;
+ v[0] = dst.f;
+ _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
+ op->base.src.offset[1] + b->box.y2,
+ op->base.src.transform,
+ op->base.src.scale,
+ &v[1], &v[2]);
+
+ dst.p.x = b->box.x1;
+ v[4] = dst.f;
+ _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
+ op->base.src.offset[1] + b->box.y2,
+ op->base.src.transform,
+ op->base.src.scale,
+ &v[5], &v[6]);
+
+ dst.p.y = b->box.y1;
+ v[8] = dst.f;
+ _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
+ op->base.src.offset[1] + b->box.y1,
+ op->base.src.transform,
+ op->base.src.scale,
+ &v[9], &v[10]);
+
+ v[11] = v[7] = v[3] = b->alpha;
+
+ v += 12;
+ b++;
+ } while (--nbox);
+}
+
+fastcall static void
+emit_span_linear(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
@@ -1069,6 +1225,35 @@ emit_spans_linear(struct sna *sna,
v[8] = v[5] = v[2] = opacity;
}
+fastcall static void
+emit_span_boxes_linear(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *b, int nbox,
+ float *v)
+{
+ do {
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ dst.p.x = b->box.x2;
+ dst.p.y = b->box.y2;
+ v[0] = dst.f;
+ dst.p.x = b->box.x1;
+ v[3] = dst.f;
+ dst.p.y = b->box.y1;
+ v[6] = dst.f;
+
+ v[1] = compute_linear(&op->base.src, b->box.x2, b->box.y2);
+ v[4] = compute_linear(&op->base.src, b->box.x1, b->box.y2);
+ v[7] = compute_linear(&op->base.src, b->box.x1, b->box.y1);
+
+ v[8] = v[5] = v[2] = b->alpha;
+ v += 9;
+ b++;
+ } while (--nbox);
+}
+
inline inline static uint32_t
gen4_choose_spans_vertex_buffer(const struct sna_composite_op *op)
{
@@ -1083,24 +1268,30 @@ unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp)
unsigned vb;
if (tmp->base.src.is_solid) {
- tmp->prim_emit = emit_spans_solid;
+ tmp->prim_emit = emit_span_solid;
+ tmp->emit_boxes = emit_span_boxes_solid;
tmp->base.floats_per_vertex = 3;
vb = 1 << 2 | 1;
} else if (tmp->base.src.is_linear) {
- tmp->prim_emit = emit_spans_linear;
+ tmp->prim_emit = emit_span_linear;
+ tmp->emit_boxes = emit_span_boxes_linear;
tmp->base.floats_per_vertex = 3;
vb = 1 << 2 | 1;
} else if (tmp->base.src.transform == NULL) {
- tmp->prim_emit = emit_spans_identity;
+ tmp->prim_emit = emit_span_identity;
+ tmp->emit_boxes = emit_span_boxes_identity;
tmp->base.floats_per_vertex = 4;
vb = 1 << 2 | 2;
} else if (tmp->base.is_affine) {
tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
- if (!sna_affine_transform_is_rotation(tmp->base.src.transform))
- tmp->prim_emit = emit_spans_simple;
- else
- tmp->prim_emit = emit_spans_affine;
+ if (!sna_affine_transform_is_rotation(tmp->base.src.transform)) {
+ tmp->prim_emit = emit_span_simple;
+ tmp->emit_boxes = emit_span_boxes_simple;
+ } else {
+ tmp->prim_emit = emit_span_affine;
+ tmp->emit_boxes = emit_span_boxes_affine;
+ }
tmp->base.floats_per_vertex = 4;
vb = 1 << 2 | 2;
} else {
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 5995d1d9..81e6635a 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -607,6 +607,17 @@ static int gen5_get_rectangles__flush(struct sna *sna,
op->u.gen5.wm_kernel);
}
+ /* Preventing discarding new vbo after lock contention */
+ if (sna->render.active) {
+ int rem;
+
+ sna_vertex_wait__locked(&sna->render);
+
+ rem = vertex_space(sna);
+ if (rem > op->floats_per_rect)
+ return rem;
+ }
+
return gen4_vertex_finish(sna);
}
@@ -643,6 +654,7 @@ flush:
gen4_vertex_flush(sna);
gen5_magic_ca_pass(sna, op);
}
+ gen4_vertex_finish(sna);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
@@ -1939,6 +1951,42 @@ gen5_render_composite_spans_boxes(struct sna *sna,
}
fastcall static void
+gen5_render_composite_spans_boxes__thread(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
+ gen5_bind_surfaces);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen5_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
@@ -2049,6 +2097,8 @@ gen5_render_composite_spans(struct sna *sna,
tmp->box = gen5_render_composite_spans_box;
tmp->boxes = gen5_render_composite_spans_boxes;
+ if (tmp->emit_boxes)
+ tmp->thread_boxes = gen5_render_composite_spans_boxes__thread;
tmp->done = gen5_render_composite_spans_done;
if (!kgem_check_bo(&sna->kgem,
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 35ff862b..4ff1606d 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -1157,6 +1157,17 @@ static int gen6_get_rectangles__flush(struct sna *sna,
}
}
+ /* Preventing discarding new vbo after lock contention */
+ if (sna->render.active) {
+ int rem;
+
+ sna_vertex_wait__locked(&sna->render);
+
+ rem = vertex_space(sna);
+ if (rem > op->floats_per_rect)
+ return rem;
+ }
+
return gen4_vertex_finish(sna);
}
@@ -1193,6 +1204,7 @@ flush:
gen4_vertex_flush(sna);
gen6_magic_ca_pass(sna, op);
}
+ gen4_vertex_finish(sna);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
@@ -1293,6 +1305,7 @@ gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op)
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex;
}
+ assert((sna->render.vertex_used % op->floats_per_vertex) == 0);
}
fastcall static void
@@ -1720,6 +1733,7 @@ static void gen6_render_composite_done(struct sna *sna,
{
DBG(("%s\n", __FUNCTION__));
+ assert(!sna->render.active);
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen6_magic_ca_pass(sna, op);
@@ -2281,10 +2295,47 @@ gen6_render_composite_spans_boxes(struct sna *sna,
}
fastcall static void
+gen6_render_composite_spans_boxes__thread(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
+ gen6_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen6_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
DBG(("%s()\n", __FUNCTION__));
+ assert(!sna->render.active);
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
@@ -2397,6 +2448,8 @@ gen6_render_composite_spans(struct sna *sna,
tmp->box = gen6_render_composite_spans_box;
tmp->boxes = gen6_render_composite_spans_boxes;
+ if (tmp->emit_boxes)
+ tmp->thread_boxes = gen6_render_composite_spans_boxes__thread;
tmp->done = gen6_render_composite_spans_done;
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
@@ -2768,6 +2821,7 @@ gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
{
DBG(("%s()\n", __FUNCTION__));
+ assert(!sna->render.active);
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
}
@@ -3115,6 +3169,7 @@ gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op)
{
DBG(("%s()\n", __FUNCTION__));
+ assert(!sna->render.active);
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
@@ -3409,6 +3464,7 @@ gen6_render_expire(struct kgem *kgem)
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
kgem_bo_destroy(kgem, sna->render.vbo);
+ assert(!sna->render.active);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index fa36ce65..34ba252a 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -1282,6 +1282,17 @@ static int gen7_get_rectangles__flush(struct sna *sna,
}
}
+ /* Preventing discarding new vbo after lock contention */
+ if (sna->render.active) {
+ int rem;
+
+ sna_vertex_wait__locked(&sna->render);
+
+ rem = vertex_space(sna);
+ if (rem > op->floats_per_rect)
+ return rem;
+ }
+
return gen4_vertex_finish(sna);
}
@@ -1318,6 +1329,7 @@ flush:
gen4_vertex_flush(sna);
gen7_magic_ca_pass(sna, op);
}
+ gen4_vertex_finish(sna);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
@@ -2403,6 +2415,42 @@ gen7_render_composite_spans_boxes(struct sna *sna,
}
fastcall static void
+gen7_render_composite_spans_boxes__thread(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ op->base.dst.x, op->base.dst.y));
+
+ sna_vertex_lock(&sna->render);
+ do {
+ int nbox_this_time;
+ float *v;
+
+ nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox,
+ gen7_emit_composite_state);
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
+
+ sna_vertex_acquire__locked(&sna->render);
+ sna_vertex_unlock(&sna->render);
+
+ op->emit_boxes(op, box, nbox_this_time, v);
+ box += nbox_this_time;
+
+ sna_vertex_lock(&sna->render);
+ sna_vertex_release__locked(&sna->render);
+ } while (nbox);
+ sna_vertex_unlock(&sna->render);
+}
+
+fastcall static void
gen7_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
@@ -2499,6 +2547,8 @@ gen7_render_composite_spans(struct sna *sna,
tmp->box = gen7_render_composite_spans_box;
tmp->boxes = gen7_render_composite_spans_boxes;
+ if (tmp->emit_boxes)
+ tmp->thread_boxes = gen7_render_composite_spans_boxes__thread;
tmp->done = gen7_render_composite_spans_done;
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
diff --git a/src/sna/kgem_debug_gen6.c b/src/sna/kgem_debug_gen6.c
index fd3f789a..7ef55d38 100644
--- a/src/sna/kgem_debug_gen6.c
+++ b/src/sna/kgem_debug_gen6.c
@@ -75,11 +75,11 @@ static void gen6_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
assert(i < kgem->nreloc);
reloc = kgem->reloc[i].target_handle;
- if (reloc == 0) {
+ if (reloc == -1) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
- if (bo->handle == reloc)
+ if (bo->target_handle == reloc)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 5832c99f..84d9807a 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -42,6 +42,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#endif
#include <stdint.h>
+
#include "compiler.h"
#include <xorg-server.h>
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 84c6b351..4b32b82d 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -304,6 +304,8 @@ void no_render_init(struct sna *sna)
sna->kgem.expire = no_render_expire;
if (sna->kgem.has_blt)
sna->kgem.ring = KGEM_BLT;
+
+ sna_vertex_init(sna);
}
static struct kgem_bo *
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 13a3e7df..6a0b1d8a 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -7,6 +7,8 @@
#include <stdbool.h>
#include <stdint.h>
+#include <pthread.h>
+#include "atomic.h"
#define GRADIENT_CACHE_SIZE 16
@@ -142,6 +144,11 @@ struct sna_composite_op {
void *priv;
};
+struct sna_opacity_box {
+ BoxRec box;
+ float alpha;
+} __packed__;
+
struct sna_composite_spans_op {
struct sna_composite_op base;
@@ -153,6 +160,12 @@ struct sna_composite_spans_op {
const struct sna_composite_spans_op *op,
const BoxRec *box, int nbox,
float opacity);
+
+ fastcall void (*thread_boxes)(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box,
+ int nbox);
+
fastcall void (*done)(struct sna *sna,
const struct sna_composite_spans_op *op);
@@ -160,6 +173,9 @@ struct sna_composite_spans_op {
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity);
+ fastcall void (*emit_boxes)(const struct sna_composite_spans_op *op,
+ const struct sna_opacity_box *box, int nbox,
+ float *v);
};
struct sna_fill_op {
@@ -188,6 +204,10 @@ struct sna_copy_op {
};
struct sna_render {
+ pthread_mutex_t lock;
+ pthread_cond_t wait;
+ int active;
+
int max_3d_size;
int max_3d_pitch;
@@ -714,4 +734,34 @@ sna_render_copy_boxes__overlap(struct sna *sna, uint8_t alu,
bool
sna_composite_mask_is_opaque(PicturePtr mask);
+void sna_vertex_init(struct sna *sna);
+
+static inline void sna_vertex_lock(struct sna_render *r)
+{
+ pthread_mutex_lock(&r->lock);
+}
+
+static inline void sna_vertex_acquire__locked(struct sna_render *r)
+{
+ r->active++;
+}
+
+static inline void sna_vertex_unlock(struct sna_render *r)
+{
+ pthread_mutex_unlock(&r->lock);
+}
+
+static inline void sna_vertex_release__locked(struct sna_render *r)
+{
+ assert(r->active > 0);
+ if (--r->active == 0)
+ pthread_cond_signal(&r->wait);
+}
+
+static inline void sna_vertex_wait__locked(struct sna_render *r)
+{
+ while (r->active)
+ pthread_cond_wait(&r->wait, &r->lock);
+}
+
#endif /* SNA_RENDER_H */
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 79e845a9..bf4816bb 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -49,6 +49,7 @@
#define NO_ALIGNED_BOXES 0
#define NO_UNALIGNED_BOXES 0
#define NO_SCAN_CONVERTER 0
+#define NO_GPU_THREADS 0
/* TODO: Emit unantialiased and MSAA triangles. */
@@ -328,10 +329,10 @@ floored_divrem(int a, int b)
/* Compute the floored division (x*a)/b. Assumes / and % perform symmetric
* division. */
static struct quorem
-floored_muldivrem(int x, int a, int b)
+floored_muldivrem(int32_t x, int32_t a, int32_t b)
{
struct quorem qr;
- long long xa = (long long)x*a;
+ int64_t xa = (int64_t)x*a;
qr.quo = xa/b;
qr.rem = xa%b;
if (qr.rem && (xa>=0) != (b>=0)) {
@@ -674,6 +675,8 @@ polygon_add_edge(struct polygon *polygon,
ybot = bottom <= ymax ? bottom : ymax;
e->ytop = ytop;
e->height_left = ybot - ytop;
+ if (e->height_left <= 0)
+ return;
if (dx == 0) {
e->x.quo = x1;
@@ -736,6 +739,8 @@ polygon_add_line(struct polygon *polygon,
e->ytop = top;
e->height_left = bot - top;
+ if (e->height_left <= 0)
+ return;
if (dx == 0) {
e->x.quo = p1->x;
@@ -4021,14 +4026,13 @@ static span_func_t
choose_span(struct sna_composite_spans_op *tmp,
PicturePtr dst,
PictFormatPtr maskFormat,
- uint8_t op,
RegionPtr clip)
{
span_func_t span;
if (is_mono(dst, maskFormat)) {
/* XXX An imprecise approximation */
- if (maskFormat && !operator_is_bounded(op)) {
+ if (maskFormat && !operator_is_bounded(tmp->base.op)) {
span = tor_blt_span_mono_unbounded;
if (REGION_NUM_RECTS(clip) > 1)
span = tor_blt_span_mono_unbounded_clipped;
@@ -4188,6 +4192,151 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
return true;
}
+struct span_thread {
+ struct sna *sna;
+ const struct sna_composite_spans_op *op;
+ const xTrapezoid *traps;
+ RegionPtr clip;
+ span_func_t span;
+ BoxRec extents;
+ int dx, dy, draw_y;
+ int ntrap;
+ bool unbounded;
+};
+
+#define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box))
+struct span_thread_boxes {
+ const struct sna_composite_spans_op *op;
+ struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
+ int num_boxes;
+};
+
+static void span_thread_add_boxes(struct sna *sna, void *data,
+ const BoxRec *box, int count, float alpha)
+{
+ struct span_thread_boxes *b = data;
+
+ __DBG(("%s: adding %d boxes with alpha=%f\n",
+ __FUNCTION__, count, alpha));
+
+ assert(count > 0 && count <= SPAN_THREAD_MAX_BOXES);
+ if (b->num_boxes + count > SPAN_THREAD_MAX_BOXES) {
+ DBG(("%s: flushing %d boxes, adding %d\n", __FUNCTION__, b->num_boxes, count));
+ assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
+ b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes);
+ b->num_boxes = 0;
+ }
+
+ do {
+ b->boxes[b->num_boxes].box = *box++;
+ b->boxes[b->num_boxes].alpha = alpha;
+ b->num_boxes++;
+ } while (--count);
+ assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
+}
+
+static void
+span_thread_box(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage)
+{
+ __DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
+ span_thread_add_boxes(sna, op, box, 1, AREA_TO_ALPHA(coverage));
+}
+
+static void
+span_thread_clipped_box(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage)
+{
+ pixman_region16_t region;
+
+ __DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2,
+ AREA_TO_ALPHA(coverage)));
+
+ pixman_region_init_rects(&region, box, 1);
+ RegionIntersect(&region, &region, clip);
+ if (REGION_NUM_RECTS(&region)) {
+ span_thread_add_boxes(sna, op,
+ REGION_RECTS(&region),
+ REGION_NUM_RECTS(&region),
+ AREA_TO_ALPHA(coverage));
+ }
+ pixman_region_fini(&region);
+}
+
+static span_func_t
+thread_choose_span(struct sna_composite_spans_op *tmp,
+ PicturePtr dst,
+ PictFormatPtr maskFormat,
+ RegionPtr clip)
+{
+ span_func_t span;
+
+ if (tmp->base.damage)
+ return NULL;
+
+ if (is_mono(dst, maskFormat)) {
+ return NULL;
+ } else {
+ if (REGION_NUM_RECTS(clip) > 1)
+ span = span_thread_clipped_box;
+ else
+ span = span_thread_box;
+ }
+
+ return span;
+}
+
+static void
+span_thread(void *arg)
+{
+ struct span_thread *thread = arg;
+ struct span_thread_boxes boxes;
+ struct tor tor;
+ const xTrapezoid *t;
+ int n, y1, y2;
+
+ if (tor_init(&tor, &thread->extents, 2*thread->ntrap))
+ return;
+
+ boxes.op = thread->op;
+ boxes.num_boxes = 0;
+
+ y1 = thread->extents.y1 - thread->draw_y;
+ y2 = thread->extents.y2 - thread->draw_y;
+ for (n = thread->ntrap, t = thread->traps; n--; t++) {
+ xTrapezoid tt;
+
+ if (pixman_fixed_to_int(t->top) >= y2 ||
+ pixman_fixed_to_int(t->bottom) < y1)
+ continue;
+
+ if (!project_trapezoid_onto_grid(t, thread->dx, thread->dy, &tt))
+ continue;
+
+ tor_add_edge(&tor, &tt, &tt.left, 1);
+ tor_add_edge(&tor, &tt, &tt.right, -1);
+ }
+
+ tor_render(thread->sna, &tor,
+ (struct sna_composite_spans_op *)&boxes, thread->clip,
+ thread->span, thread->unbounded);
+
+ tor_fini(&tor);
+
+ if (boxes.num_boxes) {
+ DBG(("%s: flushing %d boxes\n", __FUNCTION__, boxes.num_boxes));
+ assert(boxes.num_boxes <= SPAN_THREAD_MAX_BOXES);
+ thread->op->thread_boxes(thread->sna, thread->op,
+ boxes.boxes, boxes.num_boxes);
+ }
+}
+
static bool
trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
PictFormatPtr maskFormat, unsigned int flags,
@@ -4196,12 +4345,12 @@ trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
{
struct sna *sna;
struct sna_composite_spans_op tmp;
- struct tor tor;
BoxRec extents;
pixman_region16_t clip;
int16_t dst_x, dst_y;
bool was_clear;
int dx, dy, n;
+ int num_threads;
if (NO_SCAN_CONVERTER)
return false;
@@ -4305,29 +4454,78 @@ trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
dx *= FAST_SAMPLES_X;
dy *= FAST_SAMPLES_Y;
- if (tor_init(&tor, &extents, 2*ntrap))
- goto skip;
- for (n = 0; n < ntrap; n++) {
- xTrapezoid t;
+ num_threads = 1;
+ if (!NO_GPU_THREADS && tmp.thread_boxes &&
+ thread_choose_span(&tmp, dst, maskFormat, &clip))
+ num_threads = sna_use_threads(extents.x2-extents.x1,
+ extents.y2-extents.y1,
+ 16);
+ if (num_threads == 1) {
+ struct tor tor;
- if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
- continue;
+ if (tor_init(&tor, &extents, 2*ntrap))
+ goto skip;
- if (pixman_fixed_to_int(traps[n].top) + dst->pDrawable->y >= extents.y2 ||
- pixman_fixed_to_int(traps[n].bottom) + dst->pDrawable->y < extents.y1)
- continue;
+ for (n = 0; n < ntrap; n++) {
+ xTrapezoid t;
- tor_add_edge(&tor, &t, &t.left, 1);
- tor_add_edge(&tor, &t, &t.right, -1);
- }
+ if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t))
+ continue;
- tor_render(sna, &tor, &tmp, &clip,
- choose_span(&tmp, dst, maskFormat, op, &clip),
- !was_clear && maskFormat && !operator_is_bounded(op));
+ if (pixman_fixed_to_int(traps[n].top) + dst->pDrawable->y >= extents.y2 ||
+ pixman_fixed_to_int(traps[n].bottom) + dst->pDrawable->y < extents.y1)
+ continue;
+
+ tor_add_edge(&tor, &t, &t.left, 1);
+ tor_add_edge(&tor, &t, &t.right, -1);
+ }
+
+ tor_render(sna, &tor, &tmp, &clip,
+ choose_span(&tmp, dst, maskFormat, &clip),
+ !was_clear && maskFormat && !operator_is_bounded(op));
skip:
- tor_fini(&tor);
+ tor_fini(&tor);
+ } else {
+ struct span_thread threads[num_threads];
+ int y, h;
+
+ DBG(("%s: using %d threads for span compositing %dx%d\n",
+ __FUNCTION__, num_threads,
+ extents.x2 - extents.x1,
+ extents.y2 - extents.y1));
+
+ threads[0].sna = sna;
+ threads[0].op = &tmp;
+ threads[0].traps = traps;
+ threads[0].ntrap = ntrap;
+ threads[0].extents = extents;
+ threads[0].clip = &clip;
+ threads[0].dx = dx;
+ threads[0].dy = dy;
+ threads[0].draw_y = dst->pDrawable->y;
+ threads[0].unbounded = !was_clear && maskFormat && !operator_is_bounded(op);
+ threads[0].span = thread_choose_span(&tmp, dst, maskFormat, &clip);
+
+ y = extents.y1;
+ h = extents.y2 - extents.y1;
+ h = (h + num_threads - 1) / num_threads;
+
+ for (n = 1; n < num_threads; n++) {
+ threads[n] = threads[0];
+ threads[n].extents.y1 = y;
+ threads[n].extents.y2 = y += h;
+
+ sna_threads_run(span_thread, &threads[n]);
+ }
+
+ threads[0].extents.y1 = y;
+ threads[0].extents.y2 = extents.y2;
+ span_thread(&threads[0]);
+
+ sna_threads_wait();
+ }
tmp.done(sna, &tmp);
REGION_UNINIT(NULL, &clip);
@@ -6282,7 +6480,7 @@ trap_span_converter(PicturePtr dst,
}
tor_render(sna, &tor, &tmp, clip,
- choose_span(&tmp, dst, NULL, PictOpAdd, clip), false);
+ choose_span(&tmp, dst, NULL, clip), false);
skip:
tor_fini(&tor);
@@ -6827,7 +7025,7 @@ triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
}
tor_render(sna, &tor, &tmp, &clip,
- choose_span(&tmp, dst, maskFormat, op, &clip),
+ choose_span(&tmp, dst, maskFormat, &clip),
!was_clear && maskFormat && !operator_is_bounded(op));
skip:
@@ -7201,7 +7399,7 @@ tristrip_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
assert(tor.polygon->num_edges <= 2*count);
tor_render(sna, &tor, &tmp, &clip,
- choose_span(&tmp, dst, maskFormat, op, &clip),
+ choose_span(&tmp, dst, maskFormat, &clip),
!was_clear && maskFormat && !operator_is_bounded(op));
skip:
diff --git a/src/sna/sna_vertex.c b/src/sna/sna_vertex.c
new file mode 100644
index 00000000..6755d9aa
--- /dev/null
+++ b/src/sna/sna_vertex.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "sna.h"
+
+#include <unistd.h>
+
+void sna_vertex_init(struct sna *sna)
+{
+ pthread_mutex_init(&sna->render.lock, NULL);
+ pthread_cond_init(&sna->render.wait, NULL);
+ sna->render.active = 0;
+}