diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2013-01-26 14:41:04 +0000 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2013-01-27 13:06:46 +0000 |
commit | 8178cff5718e69e14d3953a7f754d7585a06838f (patch) | |
tree | 563a6e806e898ed928c6ba4cf683f1cb990c46c5 | |
parent | 8ffb3f50b3b4601401da76e2848e059ab63231f4 (diff) |
sna: Begin sketching out a threaded rasteriser for spans
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | configure.ac | 34 | ||||
-rw-r--r-- | src/sna/Makefile.am | 2 | ||||
-rw-r--r-- | src/sna/atomic.h | 89 | ||||
-rw-r--r-- | src/sna/compiler.h | 2 | ||||
-rw-r--r-- | src/sna/gen3_render.c | 323 | ||||
-rw-r--r-- | src/sna/gen4_render.c | 50 | ||||
-rw-r--r-- | src/sna/gen4_vertex.c | 229 | ||||
-rw-r--r-- | src/sna/gen5_render.c | 50 | ||||
-rw-r--r-- | src/sna/gen6_render.c | 56 | ||||
-rw-r--r-- | src/sna/gen7_render.c | 50 | ||||
-rw-r--r-- | src/sna/kgem_debug_gen6.c | 4 | ||||
-rw-r--r-- | src/sna/sna.h | 1 | ||||
-rw-r--r-- | src/sna/sna_render.c | 2 | ||||
-rw-r--r-- | src/sna/sna_render.h | 50 | ||||
-rw-r--r-- | src/sna/sna_trapezoids.c | 246 | ||||
-rw-r--r-- | src/sna/sna_vertex.c | 37 |
16 files changed, 1174 insertions, 51 deletions
diff --git a/configure.ac b/configure.ac index cb1496b4..46affdcd 100644 --- a/configure.ac +++ b/configure.ac @@ -104,6 +104,40 @@ if test x$ASM != "xno"; then fi AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes) +# Check for atomic intrinsics +AC_CACHE_CHECK([for native atomic primitives], intel_cv_atomic_primitives, +[ + intel_cv_atomic_primitives="none" + + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ +int atomic_add(int i) { return __sync_fetch_and_add (&i, 1); } +int atomic_cmpxchg(int i, int j, int k) { return __sync_val_compare_and_swap (&i, j, k); } + ]],[[]])], + [intel_cv_atomic_primitives="Intel"],[]) + + if test "x$intel_cv_atomic_primitives" = "xnone"; then + AC_CHECK_HEADER([atomic_ops.h], intel_cv_atomic_primitives="libatomic-ops") + fi + + # atomic functions defined in <atomic.h> & libc on Solaris + if test "x$intel_cv_atomic_primitives" = "xnone"; then + AC_CHECK_FUNC([atomic_cas_uint], + intel_cv_atomic_primitives="Solaris") + fi + +]) +if test "x$intel_cv_atomic_primitives" = xIntel; then + AC_DEFINE(HAVE_ATOMIC_PRIMITIVES, 1, + [Enable if your compiler supports the Intel __sync_* atomic primitives]) +fi +if test "x$intel_cv_atomic_primitives" = "xlibatomic-ops"; then + AC_DEFINE(HAVE_LIB_ATOMIC_OPS, 1, [Enable if you have libatomic-ops-dev installed]) +fi + +if test "x$intel_cv_atomic_primitives" = "xnone"; then + AC_MSG_ERROR([xf86-video-intel depends upon atomic operations, which were not found for your compiler/cpu. Try compiling with -march=native, or install the libatomics-op-dev package.]) +fi + AC_ARG_ENABLE(udev, AS_HELP_STRING([--disable-udev], [Disable udev-based monitor hotplug detection [default=auto]]), diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am index bfa836f6..c74c904d 100644 --- a/src/sna/Makefile.am +++ b/src/sna/Makefile.am @@ -38,6 +38,7 @@ libsna_la_LDFLAGS = -pthread libsna_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@ brw/libbrw.la fb/libfb.la libsna_la_SOURCES = \ + atomic.h \ blt.c \ compiler.h \ kgem.c \ @@ -64,6 +65,7 @@ libsna_la_SOURCES = \ sna_tiling.c \ sna_transform.c \ sna_threads.c \ + sna_vertex.c \ sna_video.c \ sna_video.h \ sna_video_overlay.c \ diff --git a/src/sna/atomic.h b/src/sna/atomic.h new file mode 100644 index 00000000..306dc6db --- /dev/null +++ b/src/sna/atomic.h @@ -0,0 +1,89 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifndef ATOMIC_H +#define ATOMIC_H + +#if HAVE_ATOMIC_PRIMITIVES + +#define HAS_ATOMIC_OPS 1 + +typedef struct { + int atomic; +} atomic_t; + +# define atomic_read(x) ((x)->atomic) +# define atomic_set(x, val) ((x)->atomic = (val)) +# define atomic_inc(x) ((void) __sync_fetch_and_add (&(x)->atomic, 1)) +# define atomic_dec_and_test(x) (__sync_fetch_and_add (&(x)->atomic, -1) == 1) +# define atomic_add(x, v) ((void) __sync_add_and_fetch(&(x)->atomic, (v))) +# define atomic_dec(x, v) ((void) __sync_sub_and_fetch(&(x)->atomic, (v))) +# define atomic_cmpxchg(x, oldv, newv) __sync_val_compare_and_swap (&(x)->atomic, oldv, newv) + +#endif + +#if HAVE_LIB_ATOMIC_OPS +#include <atomic_ops.h> + +#define HAS_ATOMIC_OPS 1 + +typedef struct { + AO_t atomic; +} atomic_t; + +# define atomic_read(x) AO_load_full(&(x)->atomic) +# define atomic_set(x, val) AO_store_full(&(x)->atomic, (val)) +# define atomic_inc(x) ((void) AO_fetch_and_add1_full(&(x)->atomic)) +# define atomic_add(x, v) ((void) AO_fetch_and_add_full(&(x)->atomic, (v))) +# define atomic_dec(x, v) ((void) AO_fetch_and_add_full(&(x)->atomic, -(v))) +# define atomic_dec_and_test(x) (AO_fetch_and_sub1_full(&(x)->atomic) == 1) +# define atomic_cmpxchg(x, oldv, newv) AO_compare_and_swap_full(&(x)->atomic, oldv, newv) + +#endif + +#if defined(__sun) && !defined(HAS_ATOMIC_OPS) /* Solaris & OpenSolaris */ + +#include <sys/atomic.h> +#define HAS_ATOMIC_OPS 1 + +typedef struct { uint_t atomic; } atomic_t; + +# define atomic_read(x) (int) ((x)->atomic) +# define atomic_set(x, val) ((x)->atomic = (uint_t)(val)) +# define atomic_inc(x) (atomic_inc_uint (&(x)->atomic)) +# define atomic_dec_and_test(x) (atomic_dec_uint_nv(&(x)->atomic) == 1) +# define atomic_add(x, v) (atomic_add_int(&(x)->atomic, (v))) +# define atomic_dec(x, v) (atomic_add_int(&(x)->atomic, -(v))) +# define atomic_cmpxchg(x, oldv, newv) atomic_cas_uint (&(x)->atomic, oldv, newv) + +#endif + +#if ! HAS_ATOMIC_OPS +#error xf86-video-intel requires atomic operations, please define them for your CPU/compiler. +#endif + +#endif diff --git a/src/sna/compiler.h b/src/sna/compiler.h index ff80365e..b985f2bc 100644 --- a/src/sna/compiler.h +++ b/src/sna/compiler.h @@ -36,6 +36,7 @@ #define fastcall __attribute__((regparm(3))) #define must_check __attribute__((warn_unused_result)) #define constant __attribute__((const)) +#define __packed__ __attribute__((__packed__)) #else #define likely(expr) (expr) #define unlikely(expr) (expr) @@ -44,6 +45,7 @@ #define fastcall #define must_check #define constant +#define __packed__ #endif #ifdef HAVE_VALGRIND diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c index 01c0aeef..3224d717 100644 --- a/src/sna/gen3_render.c +++ b/src/sna/gen3_render.c @@ -1618,6 +1618,8 @@ static int gen3_vertex_finish(struct sna *sna) assert(sna->render.vertex_used); assert(sna->render.vertex_used <= sna->render.vertex_size); + sna_vertex_wait__locked(&sna->render); + bo = sna->render.vbo; if (bo) { DBG(("%s: reloc = %d\n", __FUNCTION__, @@ -1796,6 +1798,17 @@ static int gen3_get_rectangles__flush(struct sna *sna, } } + /* Preventing discarding new vbo after lock contention */ + if (sna->render.active) { + int rem; + + sna_vertex_wait__locked(&sna->render); + + rem = vertex_space(sna); + if (rem > op->floats_per_rect) + return rem; + } + return gen3_vertex_finish(sna); } @@ -1838,6 +1851,7 @@ flush: gen3_vertex_flush(sna); gen3_magic_ca_pass(sna, op); } + gen3_vertex_finish(sna); _kgem_submit(&sna->kgem); gen3_emit_composite_state(sna, op); assert(sna->render.vertex_offset == 0); @@ -3081,6 +3095,26 @@ gen3_emit_composite_spans_primitive_zero(struct sna *sna, } fastcall static void +gen3_emit_composite_spans_primitive_zero__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, float *v) +{ + do { + v[0] = op->base.dst.x + b->box.x2; + v[1] = op->base.dst.y + b->box.y2; + + v[2] = op->base.dst.x + b->box.x1; + v[3] = v[1]; + + v[4] = v[2]; + v[5] = op->base.dst.x + b->box.y1; + + v += 6; + b++; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, @@ -3096,6 +3130,22 @@ gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna, } fastcall static void +gen3_emit_composite_spans_primitive_zero_no_offset__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, float *v) +{ + do { + v[0] = b->box.x2; + v[3] = v[1] = b->box.y2; + v[4] = v[2] = b->box.x1; + v[5] = b->box.y1; + + b++; + v += 6; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_spans_primitive_constant(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, @@ -3112,6 +3162,24 @@ gen3_emit_composite_spans_primitive_constant(struct sna *sna, } fastcall static void +gen3_emit_composite_spans_primitive_constant__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + do { + v[0] = op->base.dst.x + b->box.x2; + v[6] = v[3] = op->base.dst.x + b->box.x1; + v[4] = v[1] = op->base.dst.y + b->box.y2; + v[7] = op->base.dst.y + b->box.y1; + v[8] = v[5] = v[2] = b->alpha; + + v += 9; + b++; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, @@ -3128,6 +3196,23 @@ gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna, } fastcall static void +gen3_emit_composite_spans_primitive_constant_no_offset__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, float *v) +{ + do { + v[0] = b->box.x2; + v[6] = v[3] = b->box.x1; + v[4] = v[1] = b->box.y2; + v[7] = b->box.y1; + v[8] = v[5] = v[2] = b->alpha; + + v += 9; + b++; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_spans_primitive_identity_source(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, @@ -3156,6 +3241,36 @@ gen3_emit_composite_spans_primitive_identity_source(struct sna *sna, } fastcall static void +gen3_emit_composite_spans_primitive_identity_source__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + do { + v[0] = op->base.dst.x + b->box.x2; + v[1] = op->base.dst.y + b->box.y2; + v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0]; + v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1]; + v[4] = b->alpha; + + v[5] = op->base.dst.x + b->box.x1; + v[6] = v[1]; + v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0]; + v[8] = v[3]; + v[9] = b->alpha; + + v[10] = v[5]; + v[11] = op->base.dst.y + b->box.y1; + v[12] = v[7]; + v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1]; + v[14] = b->alpha; + + v += 15; + b++; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_spans_primitive_affine_source(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, @@ -3190,6 +3305,40 @@ gen3_emit_composite_spans_primitive_affine_source(struct sna *sna, } fastcall static void +gen3_emit_composite_spans_primitive_affine_source__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + PictTransform *transform = op->base.src.transform; + + do { + v[0] = op->base.dst.x + b->box.x2; + v[6] = v[1] = op->base.dst.y + b->box.y2; + v[10] = v[5] = op->base.dst.x + b->box.x1; + v[11] = op->base.dst.y + b->box.y1; + v[14] = v[9] = v[4] = b->alpha; + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2, + (int)op->base.src.offset[1] + b->box.y2, + transform, op->base.src.scale, + &v[2], &v[3]); + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1, + (int)op->base.src.offset[1] + b->box.y2, + transform, op->base.src.scale, + &v[7], &v[8]); + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1, + (int)op->base.src.offset[1] + b->box.y1, + transform, op->base.src.scale, + &v[12], &v[13]); + v += 15; + b++; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, @@ -3218,6 +3367,36 @@ gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna, } fastcall static void +gen3_emit_composite_spans_primitive_identity_gradient__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + do { + v[0] = op->base.dst.x + b->box.x2; + v[1] = op->base.dst.y + b->box.y2; + v[2] = op->base.src.offset[0] + b->box.x2; + v[3] = op->base.src.offset[1] + b->box.y2; + v[4] = b->alpha; + + v[5] = op->base.dst.x + b->box.x1; + v[6] = v[1]; + v[7] = op->base.src.offset[0] + b->box.x1; + v[8] = v[3]; + v[9] = b->alpha; + + v[10] = v[5]; + v[11] = op->base.dst.y + b->box.y1; + v[12] = v[7]; + v[13] = op->base.src.offset[1] + b->box.y1; + v[14] = b->alpha; + + v += 15; + b++; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, @@ -3253,6 +3432,43 @@ gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna, } fastcall static void +gen3_emit_composite_spans_primitive_affine_gradient__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + PictTransform *transform = op->base.src.transform; + + do { + v[0] = op->base.dst.x + b->box.x2; + v[1] = op->base.dst.y + b->box.y2; + _sna_get_transformed_coordinates((int)op->base.src.offset[0] + b->box.x2, + (int)op->base.src.offset[1] + b->box.y2, + transform, + &v[2], &v[3]); + v[4] = b->alpha; + + v[5] = op->base.dst.x + b->box.x1; + v[6] = v[1]; + _sna_get_transformed_coordinates((int)op->base.src.offset[0] + b->box.x1, + (int)op->base.src.offset[1] + b->box.y2, + transform, + &v[7], &v[8]); + v[9] = b->alpha; + + v[10] = v[5]; + v[11] = op->base.dst.y + b->box.y1; + _sna_get_transformed_coordinates((int)op->base.src.offset[0] + b->box.x1, + (int)op->base.src.offset[1] + b->box.y1, + transform, + &v[12], &v[13]); + v[14] = b->alpha; + v += 15; + b++; + } while (--nbox); +} + +fastcall static void gen3_emit_composite_spans_primitive(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, @@ -3297,6 +3513,48 @@ gen3_render_composite_spans_constant_box(struct sna *sna, } fastcall static void +gen3_render_composite_spans_constant_thread_boxes(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * 9; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + do { + v[0] = box->box.x2; + v[6] = v[3] = box->box.x1; + v[4] = v[1] = box->box.y2; + v[7] = box->box.y1; + v[8] = v[5] = v[2] = box->alpha; + v += 9; + box++; + } while (--nbox_this_time); + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void gen3_render_composite_spans_box(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, float opacity) @@ -3344,6 +3602,41 @@ gen3_render_composite_spans_boxes(struct sna *sna, } fastcall static void +gen3_render_composite_spans_boxes__thread(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void gen3_render_composite_spans_done(struct sna *sna, const struct sna_composite_spans_op *op) { @@ -3447,40 +3740,58 @@ gen3_render_composite_spans(struct sna *sna, no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0; tmp->box = gen3_render_composite_spans_box; tmp->boxes = gen3_render_composite_spans_boxes; + tmp->thread_boxes = gen3_render_composite_spans_boxes__thread; tmp->done = gen3_render_composite_spans_done; tmp->prim_emit = gen3_emit_composite_spans_primitive; switch (tmp->base.src.u.gen3.type) { case SHADER_NONE: assert(0); case SHADER_ZERO: - tmp->prim_emit = no_offset ? gen3_emit_composite_spans_primitive_zero_no_offset : gen3_emit_composite_spans_primitive_zero; + if (no_offset) { + tmp->prim_emit = gen3_emit_composite_spans_primitive_zero_no_offset; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero_no_offset__boxes; + } else { + tmp->prim_emit = gen3_emit_composite_spans_primitive_zero; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero__boxes; + } break; case SHADER_BLACK: case SHADER_WHITE: case SHADER_CONSTANT: if (no_offset) { tmp->box = gen3_render_composite_spans_constant_box; + tmp->thread_boxes = gen3_render_composite_spans_constant_thread_boxes; tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset; - } else + tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant_no_offset__boxes; + } else { tmp->prim_emit = gen3_emit_composite_spans_primitive_constant; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__boxes; + } break; case SHADER_LINEAR: case SHADER_RADIAL: - if (tmp->base.src.transform == NULL) + if (tmp->base.src.transform == NULL) { tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient; - else if (tmp->base.src.is_affine) + tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__boxes; + } else if (tmp->base.src.is_affine) { tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__boxes; + } break; case SHADER_TEXTURE: - if (tmp->base.src.transform == NULL) + if (tmp->base.src.transform == NULL) { tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source; - else if (tmp->base.src.is_affine) { + tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__boxes; + } else if (tmp->base.src.is_affine) { tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2]; tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2]; tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__boxes; } break; } + if (tmp->emit_boxes == NULL) + tmp->thread_boxes = NULL; tmp->base.mask.bo = NULL; diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c index d2f3fff1..65016cd1 100644 --- a/src/sna/gen4_render.c +++ b/src/sna/gen4_render.c @@ -621,6 +621,17 @@ static int gen4_get_rectangles__flush(struct sna *sna, op->u.gen4.wm_kernel); } + /* Preventing discarding new vbo after lock contention */ + if (sna->render.active) { + int rem; + + sna_vertex_wait__locked(&sna->render); + + rem = vertex_space(sna); + if (rem > op->floats_per_rect) + return rem; + } + return gen4_vertex_finish(sna); } @@ -656,6 +667,7 @@ flush: gen4_vertex_flush(sna); gen4_magic_ca_pass(sna, op); } + gen4_vertex_finish(sna); _kgem_submit(&sna->kgem); emit_state(sna, op); goto start; @@ -1966,6 +1978,42 @@ gen4_render_composite_spans_boxes(struct sna *sna, } fastcall static void +gen4_render_composite_spans_boxes__thread(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen4_get_rectangles(sna, &op->base, nbox, + gen4_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void gen4_render_composite_spans_done(struct sna *sna, const struct sna_composite_spans_op *op) { @@ -2080,6 +2128,8 @@ gen4_render_composite_spans(struct sna *sna, tmp->box = gen4_render_composite_spans_box; tmp->boxes = gen4_render_composite_spans_boxes; + if (tmp->emit_boxes) + tmp->thread_boxes = gen4_render_composite_spans_boxes__thread; tmp->done = gen4_render_composite_spans_done; if (!kgem_check_bo(&sna->kgem, diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c index 4e404670..cc679d38 100644 --- a/src/sna/gen4_vertex.c +++ b/src/sna/gen4_vertex.c @@ -36,12 +36,13 @@ void gen4_vertex_flush(struct sna *sna) { - assert(sna->render.vertex_offset); - assert(sna->render.vertex_index > sna->render.vertex_start); - DBG(("%s[%x] = %d\n", __FUNCTION__, 4*sna->render.vertex_offset, sna->render.vertex_index - sna->render.vertex_start)); + + assert(sna->render.vertex_offset); + assert(sna->render.vertex_index > sna->render.vertex_start); + sna->kgem.batch[sna->render.vertex_offset] = sna->render.vertex_index - sna->render.vertex_start; sna->render.vertex_offset = 0; @@ -58,6 +59,8 @@ int gen4_vertex_finish(struct sna *sna) assert(sna->render.vertex_offset == 0); assert(sna->render.vertex_used); + sna_vertex_wait__locked(&sna->render); + /* Note: we only need dword alignment (currently) */ bo = sna->render.vbo; @@ -73,6 +76,7 @@ int gen4_vertex_finish(struct sna *sna) 0); } + assert(!sna->render.active); sna->render.nvertex_reloc = 0; sna->render.vertex_used = 0; sna->render.vertex_index = 0; @@ -87,6 +91,7 @@ int gen4_vertex_finish(struct sna *sna) hint |= CREATE_CACHED | CREATE_NO_THROTTLE; size = 256*1024; + assert(!sna->render.active); sna->render.vertices = NULL; sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); while (sna->render.vbo == NULL && size > 16*1024) { @@ -144,6 +149,8 @@ void gen4_vertex_close(struct sna *sna) __FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0, sna->render.vb_id, sna->render.nvertex_reloc)); + assert(!sna->render.active); + bo = sna->render.vbo; if (bo) { if (sna->render.vertex_size - sna->render.vertex_used < 64) { @@ -205,6 +212,7 @@ void gen4_vertex_close(struct sna *sna) sna->render.vb_id = 0; if (sna->render.vbo == NULL) { + assert(!sna->render.active); sna->render.vertex_used = 0; sna->render.vertex_index = 0; assert(sna->render.vertices == sna->render.vertex_data); @@ -853,7 +861,7 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp) } inline static void -emit_spans_vertex(struct sna *sna, +emit_span_vertex(struct sna *sna, const struct sna_composite_spans_op *op, int16_t x, int16_t y) { @@ -867,18 +875,18 @@ emit_composite_spans_primitive(struct sna *sna, const BoxRec *box, float opacity) { - emit_spans_vertex(sna, op, box->x2, box->y2); + emit_span_vertex(sna, op, box->x2, box->y2); OUT_VERTEX_F(opacity); - emit_spans_vertex(sna, op, box->x1, box->y2); + emit_span_vertex(sna, op, box->x1, box->y2); OUT_VERTEX_F(opacity); - emit_spans_vertex(sna, op, box->x1, box->y1); + emit_span_vertex(sna, op, box->x1, box->y1); OUT_VERTEX_F(opacity); } fastcall static void -emit_spans_solid(struct sna *sna, +emit_span_solid(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, float opacity) @@ -909,7 +917,36 @@ emit_spans_solid(struct sna *sna, } fastcall static void -emit_spans_identity(struct sna *sna, +emit_span_boxes_solid(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = b->box.x2; + dst.p.y = b->box.y2; + v[0] = dst.f; + + dst.p.x = b->box.x1; + v[3] = dst.f; + + dst.p.y = b->box.y1; + v[6] = dst.f; + + v[7] = v[4] = v[1] = .5; + v[8] = v[5] = v[2] = b->alpha; + + v += 9; + b++; + } while (--nbox); +} + +fastcall static void +emit_span_identity(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, float opacity) @@ -949,7 +986,43 @@ emit_spans_identity(struct sna *sna, } fastcall static void -emit_spans_simple(struct sna *sna, +emit_span_boxes_identity(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + float sx = op->base.src.scale[0]; + float sy = op->base.src.scale[1]; + int16_t tx = op->base.src.offset[0]; + int16_t ty = op->base.src.offset[1]; + + dst.p.x = b->box.x2; + dst.p.y = b->box.y2; + v[0] = dst.f; + v[1] = (b->box.x2 + tx) * sx; + v[6] = v[2] = (b->box.y2 + ty) * sy; + + dst.p.x = b->box.x1; + v[4] = dst.f; + v[9] = v[5] = (b->box.x1 + tx) * sx; + + dst.p.y = b->box.y1; + v[8] = dst.f; + v[10] = (b->box.y1 + ty) * sy; + + v[11] = v[7] = v[3] = b->alpha; + v += 12; + b++; + } while (--nbox); +} + +fastcall static void +emit_span_simple(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, float opacity) @@ -993,7 +1066,47 @@ emit_spans_simple(struct sna *sna, } fastcall static void -emit_spans_affine(struct sna *sna, +emit_span_boxes_simple(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + float xx = op->base.src.transform->matrix[0][0]; + float x0 = op->base.src.transform->matrix[0][2]; + float yy = op->base.src.transform->matrix[1][1]; + float y0 = op->base.src.transform->matrix[1][2]; + float sx = op->base.src.scale[0]; + float sy = op->base.src.scale[1]; + int16_t tx = op->base.src.offset[0]; + int16_t ty = op->base.src.offset[1]; + + dst.p.x = b->box.x2; + dst.p.y = b->box.y2; + v[0] = dst.f; + v[1] = ((b->box.x2 + tx) * xx + x0) * sx; + v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy; + + dst.p.x = b->box.x1; + v[4] = dst.f; + v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx; + + dst.p.y = b->box.y1; + v[8] = dst.f; + v[10] = ((b->box.y1 + ty) * yy + y0) * sy; + + v[11] = v[7] = v[3] = b->alpha; + v += 9; + b++; + } while (--nbox); +} + +fastcall static void +emit_span_affine(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, float opacity) @@ -1038,7 +1151,50 @@ emit_spans_affine(struct sna *sna, } fastcall static void -emit_spans_linear(struct sna *sna, +emit_span_boxes_affine(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = b->box.x2; + dst.p.y = b->box.y2; + v[0] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, + op->base.src.offset[1] + b->box.y2, + op->base.src.transform, + op->base.src.scale, + &v[1], &v[2]); + + dst.p.x = b->box.x1; + v[4] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, + op->base.src.offset[1] + b->box.y2, + op->base.src.transform, + op->base.src.scale, + &v[5], &v[6]); + + dst.p.y = b->box.y1; + v[8] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, + op->base.src.offset[1] + b->box.y1, + op->base.src.transform, + op->base.src.scale, + &v[9], &v[10]); + + v[11] = v[7] = v[3] = b->alpha; + + v += 12; + b++; + } while (--nbox); +} + +fastcall static void +emit_span_linear(struct sna *sna, const struct sna_composite_spans_op *op, const BoxRec *box, float opacity) @@ -1069,6 +1225,35 @@ emit_spans_linear(struct sna *sna, v[8] = v[5] = v[2] = opacity; } +fastcall static void +emit_span_boxes_linear(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = b->box.x2; + dst.p.y = b->box.y2; + v[0] = dst.f; + dst.p.x = b->box.x1; + v[3] = dst.f; + dst.p.y = b->box.y1; + v[6] = dst.f; + + v[1] = compute_linear(&op->base.src, b->box.x2, b->box.y2); + v[4] = compute_linear(&op->base.src, b->box.x1, b->box.y2); + v[7] = compute_linear(&op->base.src, b->box.x1, b->box.y1); + + v[8] = v[5] = v[2] = b->alpha; + v += 9; + b++; + } while (--nbox); +} + inline inline static uint32_t gen4_choose_spans_vertex_buffer(const struct sna_composite_op *op) { @@ -1083,24 +1268,30 @@ unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp) unsigned vb; if (tmp->base.src.is_solid) { - tmp->prim_emit = emit_spans_solid; + tmp->prim_emit = emit_span_solid; + tmp->emit_boxes = emit_span_boxes_solid; tmp->base.floats_per_vertex = 3; vb = 1 << 2 | 1; } else if (tmp->base.src.is_linear) { - tmp->prim_emit = emit_spans_linear; + tmp->prim_emit = emit_span_linear; + tmp->emit_boxes = emit_span_boxes_linear; tmp->base.floats_per_vertex = 3; vb = 1 << 2 | 1; } else if (tmp->base.src.transform == NULL) { - tmp->prim_emit = emit_spans_identity; + tmp->prim_emit = emit_span_identity; + tmp->emit_boxes = emit_span_boxes_identity; tmp->base.floats_per_vertex = 4; vb = 1 << 2 | 2; } else if (tmp->base.is_affine) { tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2]; tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2]; - if (!sna_affine_transform_is_rotation(tmp->base.src.transform)) - tmp->prim_emit = emit_spans_simple; - else - tmp->prim_emit = emit_spans_affine; + if (!sna_affine_transform_is_rotation(tmp->base.src.transform)) { + tmp->prim_emit = emit_span_simple; + tmp->emit_boxes = emit_span_boxes_simple; + } else { + tmp->prim_emit = emit_span_affine; + tmp->emit_boxes = emit_span_boxes_affine; + } tmp->base.floats_per_vertex = 4; vb = 1 << 2 | 2; } else { diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c index 5995d1d9..81e6635a 100644 --- a/src/sna/gen5_render.c +++ b/src/sna/gen5_render.c @@ -607,6 +607,17 @@ static int gen5_get_rectangles__flush(struct sna *sna, op->u.gen5.wm_kernel); } + /* Preventing discarding new vbo after lock contention */ + if (sna->render.active) { + int rem; + + sna_vertex_wait__locked(&sna->render); + + rem = vertex_space(sna); + if (rem > op->floats_per_rect) + return rem; + } + return gen4_vertex_finish(sna); } @@ -643,6 +654,7 @@ flush: gen4_vertex_flush(sna); gen5_magic_ca_pass(sna, op); } + gen4_vertex_finish(sna); _kgem_submit(&sna->kgem); emit_state(sna, op); goto start; @@ -1939,6 +1951,42 @@ gen5_render_composite_spans_boxes(struct sna *sna, } fastcall static void +gen5_render_composite_spans_boxes__thread(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox, + gen5_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void gen5_render_composite_spans_done(struct sna *sna, const struct sna_composite_spans_op *op) { @@ -2049,6 +2097,8 @@ gen5_render_composite_spans(struct sna *sna, tmp->box = gen5_render_composite_spans_box; tmp->boxes = gen5_render_composite_spans_boxes; + if (tmp->emit_boxes) + tmp->thread_boxes = gen5_render_composite_spans_boxes__thread; tmp->done = gen5_render_composite_spans_done; if (!kgem_check_bo(&sna->kgem, diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c index 35ff862b..4ff1606d 100644 --- a/src/sna/gen6_render.c +++ b/src/sna/gen6_render.c @@ -1157,6 +1157,17 @@ static int gen6_get_rectangles__flush(struct sna *sna, } } + /* Preventing discarding new vbo after lock contention */ + if (sna->render.active) { + int rem; + + sna_vertex_wait__locked(&sna->render); + + rem = vertex_space(sna); + if (rem > op->floats_per_rect) + return rem; + } + return gen4_vertex_finish(sna); } @@ -1193,6 +1204,7 @@ flush: gen4_vertex_flush(sna); gen6_magic_ca_pass(sna, op); } + gen4_vertex_finish(sna); _kgem_submit(&sna->kgem); emit_state(sna, op); goto start; @@ -1293,6 +1305,7 @@ gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op) sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex; } + assert((sna->render.vertex_used % op->floats_per_vertex) == 0); } fastcall static void @@ -1720,6 +1733,7 @@ static void gen6_render_composite_done(struct sna *sna, { DBG(("%s\n", __FUNCTION__)); + assert(!sna->render.active); if (sna->render.vertex_offset) { gen4_vertex_flush(sna); gen6_magic_ca_pass(sna, op); @@ -2281,10 +2295,47 @@ gen6_render_composite_spans_boxes(struct sna *sna, } fastcall static void +gen6_render_composite_spans_boxes__thread(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, + gen6_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void gen6_render_composite_spans_done(struct sna *sna, const struct sna_composite_spans_op *op) { DBG(("%s()\n", __FUNCTION__)); + assert(!sna->render.active); if (sna->render.vertex_offset) gen4_vertex_flush(sna); @@ -2397,6 +2448,8 @@ gen6_render_composite_spans(struct sna *sna, tmp->box = gen6_render_composite_spans_box; tmp->boxes = gen6_render_composite_spans_boxes; + if (tmp->emit_boxes) + tmp->thread_boxes = gen6_render_composite_spans_boxes__thread; tmp->done = gen6_render_composite_spans_done; kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); @@ -2768,6 +2821,7 @@ gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op) { DBG(("%s()\n", __FUNCTION__)); + assert(!sna->render.active); if (sna->render.vertex_offset) gen4_vertex_flush(sna); } @@ -3115,6 +3169,7 @@ gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op) { DBG(("%s()\n", __FUNCTION__)); + assert(!sna->render.active); if (sna->render.vertex_offset) gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, op->base.src.bo); @@ -3409,6 +3464,7 @@ gen6_render_expire(struct kgem *kgem) if (sna->render.vbo && !sna->render.vertex_used) { DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); kgem_bo_destroy(kgem, sna->render.vbo); + assert(!sna->render.active); sna->render.vbo = NULL; sna->render.vertices = sna->render.vertex_data; sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index fa36ce65..34ba252a 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -1282,6 +1282,17 @@ static int gen7_get_rectangles__flush(struct sna *sna, } } + /* Preventing discarding new vbo after lock contention */ + if (sna->render.active) { + int rem; + + sna_vertex_wait__locked(&sna->render); + + rem = vertex_space(sna); + if (rem > op->floats_per_rect) + return rem; + } + return gen4_vertex_finish(sna); } @@ -1318,6 +1329,7 @@ flush: gen4_vertex_flush(sna); gen7_magic_ca_pass(sna, op); } + gen4_vertex_finish(sna); _kgem_submit(&sna->kgem); emit_state(sna, op); goto start; @@ -2403,6 +2415,42 @@ gen7_render_composite_spans_boxes(struct sna *sna, } fastcall static void +gen7_render_composite_spans_boxes__thread(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox, + gen7_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void gen7_render_composite_spans_done(struct sna *sna, const struct sna_composite_spans_op *op) { @@ -2499,6 +2547,8 @@ gen7_render_composite_spans(struct sna *sna, tmp->box = gen7_render_composite_spans_box; tmp->boxes = gen7_render_composite_spans_boxes; + if (tmp->emit_boxes) + tmp->thread_boxes = gen7_render_composite_spans_boxes__thread; tmp->done = gen7_render_composite_spans_done; kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); diff --git a/src/sna/kgem_debug_gen6.c b/src/sna/kgem_debug_gen6.c index fd3f789a..7ef55d38 100644 --- a/src/sna/kgem_debug_gen6.c +++ b/src/sna/kgem_debug_gen6.c @@ -75,11 +75,11 @@ static void gen6_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) assert(i < kgem->nreloc); reloc = kgem->reloc[i].target_handle; - if (reloc == 0) { + if (reloc == -1) { base = kgem->batch; } else { list_for_each_entry(bo, &kgem->next_request->buffers, request) - if (bo->handle == reloc) + if (bo->target_handle == reloc) break; assert(&bo->request != &kgem->next_request->buffers); base = kgem_bo_map__debug(kgem, bo); diff --git a/src/sna/sna.h b/src/sna/sna.h index 5832c99f..84d9807a 100644 --- a/src/sna/sna.h +++ b/src/sna/sna.h @@ -42,6 +42,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #endif #include <stdint.h> + #include "compiler.h" #include <xorg-server.h> diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c index 84c6b351..4b32b82d 100644 --- a/src/sna/sna_render.c +++ b/src/sna/sna_render.c @@ -304,6 +304,8 @@ void no_render_init(struct sna *sna) sna->kgem.expire = no_render_expire; if (sna->kgem.has_blt) sna->kgem.ring = KGEM_BLT; + + sna_vertex_init(sna); } static struct kgem_bo * diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h index 13a3e7df..6a0b1d8a 100644 --- a/src/sna/sna_render.h +++ b/src/sna/sna_render.h @@ -7,6 +7,8 @@ #include <stdbool.h> #include <stdint.h> +#include <pthread.h> +#include "atomic.h" #define GRADIENT_CACHE_SIZE 16 @@ -142,6 +144,11 @@ struct sna_composite_op { void *priv; }; +struct sna_opacity_box { + BoxRec box; + float alpha; +} __packed__; + struct sna_composite_spans_op { struct sna_composite_op base; @@ -153,6 +160,12 @@ struct sna_composite_spans_op { const struct sna_composite_spans_op *op, const BoxRec *box, int nbox, float opacity); + + fastcall void (*thread_boxes)(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox); + fastcall void (*done)(struct sna *sna, const struct sna_composite_spans_op *op); @@ -160,6 +173,9 @@ struct sna_composite_spans_op { const struct sna_composite_spans_op *op, const BoxRec *box, float opacity); + fastcall void (*emit_boxes)(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, int nbox, + float *v); }; struct sna_fill_op { @@ -188,6 +204,10 @@ struct sna_copy_op { }; struct sna_render { + pthread_mutex_t lock; + pthread_cond_t wait; + int active; + int max_3d_size; int max_3d_pitch; @@ -714,4 +734,34 @@ sna_render_copy_boxes__overlap(struct sna *sna, uint8_t alu, bool sna_composite_mask_is_opaque(PicturePtr mask); +void sna_vertex_init(struct sna *sna); + +static inline void sna_vertex_lock(struct sna_render *r) +{ + pthread_mutex_lock(&r->lock); +} + +static inline void sna_vertex_acquire__locked(struct sna_render *r) +{ + r->active++; +} + +static inline void sna_vertex_unlock(struct sna_render *r) +{ + pthread_mutex_unlock(&r->lock); +} + +static inline void sna_vertex_release__locked(struct sna_render *r) +{ + assert(r->active > 0); + if (--r->active == 0) + pthread_cond_signal(&r->wait); +} + +static inline void sna_vertex_wait__locked(struct sna_render *r) +{ + while (r->active) + pthread_cond_wait(&r->wait, &r->lock); +} + #endif /* SNA_RENDER_H */ diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c index 79e845a9..bf4816bb 100644 --- a/src/sna/sna_trapezoids.c +++ b/src/sna/sna_trapezoids.c @@ -49,6 +49,7 @@ #define NO_ALIGNED_BOXES 0 #define NO_UNALIGNED_BOXES 0 #define NO_SCAN_CONVERTER 0 +#define NO_GPU_THREADS 0 /* TODO: Emit unantialiased and MSAA triangles. */ @@ -328,10 +329,10 @@ floored_divrem(int a, int b) /* Compute the floored division (x*a)/b. Assumes / and % perform symmetric * division. */ static struct quorem -floored_muldivrem(int x, int a, int b) +floored_muldivrem(int32_t x, int32_t a, int32_t b) { struct quorem qr; - long long xa = (long long)x*a; + int64_t xa = (int64_t)x*a; qr.quo = xa/b; qr.rem = xa%b; if (qr.rem && (xa>=0) != (b>=0)) { @@ -674,6 +675,8 @@ polygon_add_edge(struct polygon *polygon, ybot = bottom <= ymax ? bottom : ymax; e->ytop = ytop; e->height_left = ybot - ytop; + if (e->height_left <= 0) + return; if (dx == 0) { e->x.quo = x1; @@ -736,6 +739,8 @@ polygon_add_line(struct polygon *polygon, e->ytop = top; e->height_left = bot - top; + if (e->height_left <= 0) + return; if (dx == 0) { e->x.quo = p1->x; @@ -4021,14 +4026,13 @@ static span_func_t choose_span(struct sna_composite_spans_op *tmp, PicturePtr dst, PictFormatPtr maskFormat, - uint8_t op, RegionPtr clip) { span_func_t span; if (is_mono(dst, maskFormat)) { /* XXX An imprecise approximation */ - if (maskFormat && !operator_is_bounded(op)) { + if (maskFormat && !operator_is_bounded(tmp->base.op)) { span = tor_blt_span_mono_unbounded; if (REGION_NUM_RECTS(clip) > 1) span = tor_blt_span_mono_unbounded_clipped; @@ -4188,6 +4192,151 @@ mono_trapezoids_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, return true; } +struct span_thread { + struct sna *sna; + const struct sna_composite_spans_op *op; + const xTrapezoid *traps; + RegionPtr clip; + span_func_t span; + BoxRec extents; + int dx, dy, draw_y; + int ntrap; + bool unbounded; +}; + +#define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box)) +struct span_thread_boxes { + const struct sna_composite_spans_op *op; + struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES]; + int num_boxes; +}; + +static void span_thread_add_boxes(struct sna *sna, void *data, + const BoxRec *box, int count, float alpha) +{ + struct span_thread_boxes *b = data; + + __DBG(("%s: adding %d boxes with alpha=%f\n", + __FUNCTION__, count, alpha)); + + assert(count > 0 && count <= SPAN_THREAD_MAX_BOXES); + if (b->num_boxes + count > SPAN_THREAD_MAX_BOXES) { + DBG(("%s: flushing %d boxes, adding %d\n", __FUNCTION__, b->num_boxes, count)); + assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES); + b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes); + b->num_boxes = 0; + } + + do { + b->boxes[b->num_boxes].box = *box++; + b->boxes[b->num_boxes].alpha = alpha; + b->num_boxes++; + } while (--count); + assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES); +} + +static void +span_thread_box(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + __DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage)); + span_thread_add_boxes(sna, op, box, 1, AREA_TO_ALPHA(coverage)); +} + +static void +span_thread_clipped_box(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + pixman_region16_t region; + + __DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, + AREA_TO_ALPHA(coverage))); + + pixman_region_init_rects(®ion, box, 1); + RegionIntersect(®ion, ®ion, clip); + if (REGION_NUM_RECTS(®ion)) { + span_thread_add_boxes(sna, op, + REGION_RECTS(®ion), + REGION_NUM_RECTS(®ion), + AREA_TO_ALPHA(coverage)); + } + pixman_region_fini(®ion); +} + +static span_func_t +thread_choose_span(struct sna_composite_spans_op *tmp, + PicturePtr dst, + PictFormatPtr maskFormat, + RegionPtr clip) +{ + span_func_t span; + + if (tmp->base.damage) + return NULL; + + if (is_mono(dst, maskFormat)) { + return NULL; + } else { + if (REGION_NUM_RECTS(clip) > 1) + span = span_thread_clipped_box; + else + span = span_thread_box; + } + + return span; +} + +static void +span_thread(void *arg) +{ + struct span_thread *thread = arg; + struct span_thread_boxes boxes; + struct tor tor; + const xTrapezoid *t; + int n, y1, y2; + + if (tor_init(&tor, &thread->extents, 2*thread->ntrap)) + return; + + boxes.op = thread->op; + boxes.num_boxes = 0; + + y1 = thread->extents.y1 - thread->draw_y; + y2 = thread->extents.y2 - thread->draw_y; + for (n = thread->ntrap, t = thread->traps; n--; t++) { + xTrapezoid tt; + + if (pixman_fixed_to_int(t->top) >= y2 || + pixman_fixed_to_int(t->bottom) < y1) + continue; + + if (!project_trapezoid_onto_grid(t, thread->dx, thread->dy, &tt)) + continue; + + tor_add_edge(&tor, &tt, &tt.left, 1); + tor_add_edge(&tor, &tt, &tt.right, -1); + } + + tor_render(thread->sna, &tor, + (struct sna_composite_spans_op *)&boxes, thread->clip, + thread->span, thread->unbounded); + + tor_fini(&tor); + + if (boxes.num_boxes) { + DBG(("%s: flushing %d boxes\n", __FUNCTION__, boxes.num_boxes)); + assert(boxes.num_boxes <= SPAN_THREAD_MAX_BOXES); + thread->op->thread_boxes(thread->sna, thread->op, + boxes.boxes, boxes.num_boxes); + } +} + static bool trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, PictFormatPtr maskFormat, unsigned int flags, @@ -4196,12 +4345,12 @@ trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, { struct sna *sna; struct sna_composite_spans_op tmp; - struct tor tor; BoxRec extents; pixman_region16_t clip; int16_t dst_x, dst_y; bool was_clear; int dx, dy, n; + int num_threads; if (NO_SCAN_CONVERTER) return false; @@ -4305,29 +4454,78 @@ trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, dx *= FAST_SAMPLES_X; dy *= FAST_SAMPLES_Y; - if (tor_init(&tor, &extents, 2*ntrap)) - goto skip; - for (n = 0; n < ntrap; n++) { - xTrapezoid t; + num_threads = 1; + if (!NO_GPU_THREADS && tmp.thread_boxes && + thread_choose_span(&tmp, dst, maskFormat, &clip)) + num_threads = sna_use_threads(extents.x2-extents.x1, + extents.y2-extents.y1, + 16); + if (num_threads == 1) { + struct tor tor; - if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t)) - continue; + if (tor_init(&tor, &extents, 2*ntrap)) + goto skip; - if (pixman_fixed_to_int(traps[n].top) + dst->pDrawable->y >= extents.y2 || - pixman_fixed_to_int(traps[n].bottom) + dst->pDrawable->y < extents.y1) - continue; + for (n = 0; n < ntrap; n++) { + xTrapezoid t; - tor_add_edge(&tor, &t, &t.left, 1); - tor_add_edge(&tor, &t, &t.right, -1); - } + if (!project_trapezoid_onto_grid(&traps[n], dx, dy, &t)) + continue; - tor_render(sna, &tor, &tmp, &clip, - choose_span(&tmp, dst, maskFormat, op, &clip), - !was_clear && maskFormat && !operator_is_bounded(op)); + if (pixman_fixed_to_int(traps[n].top) + dst->pDrawable->y >= extents.y2 || + pixman_fixed_to_int(traps[n].bottom) + dst->pDrawable->y < extents.y1) + continue; + + tor_add_edge(&tor, &t, &t.left, 1); + tor_add_edge(&tor, &t, &t.right, -1); + } + + tor_render(sna, &tor, &tmp, &clip, + choose_span(&tmp, dst, maskFormat, &clip), + !was_clear && maskFormat && !operator_is_bounded(op)); skip: - tor_fini(&tor); + tor_fini(&tor); + } else { + struct span_thread threads[num_threads]; + int y, h; + + DBG(("%s: using %d threads for span compositing %dx%d\n", + __FUNCTION__, num_threads, + extents.x2 - extents.x1, + extents.y2 - extents.y1)); + + threads[0].sna = sna; + threads[0].op = &tmp; + threads[0].traps = traps; + threads[0].ntrap = ntrap; + threads[0].extents = extents; + threads[0].clip = &clip; + threads[0].dx = dx; + threads[0].dy = dy; + threads[0].draw_y = dst->pDrawable->y; + threads[0].unbounded = !was_clear && maskFormat && !operator_is_bounded(op); + threads[0].span = thread_choose_span(&tmp, dst, maskFormat, &clip); + + y = extents.y1; + h = extents.y2 - extents.y1; + h = (h + num_threads - 1) / num_threads; + + for (n = 1; n < num_threads; n++) { + threads[n] = threads[0]; + threads[n].extents.y1 = y; + threads[n].extents.y2 = y += h; + + sna_threads_run(span_thread, &threads[n]); + } + + threads[0].extents.y1 = y; + threads[0].extents.y2 = extents.y2; + span_thread(&threads[0]); + + sna_threads_wait(); + } tmp.done(sna, &tmp); REGION_UNINIT(NULL, &clip); @@ -6282,7 +6480,7 @@ trap_span_converter(PicturePtr dst, } tor_render(sna, &tor, &tmp, clip, - choose_span(&tmp, dst, NULL, PictOpAdd, clip), false); + choose_span(&tmp, dst, NULL, clip), false); skip: tor_fini(&tor); @@ -6827,7 +7025,7 @@ triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, } tor_render(sna, &tor, &tmp, &clip, - choose_span(&tmp, dst, maskFormat, op, &clip), + choose_span(&tmp, dst, maskFormat, &clip), !was_clear && maskFormat && !operator_is_bounded(op)); skip: @@ -7201,7 +7399,7 @@ tristrip_span_converter(CARD8 op, PicturePtr src, PicturePtr dst, assert(tor.polygon->num_edges <= 2*count); tor_render(sna, &tor, &tmp, &clip, - choose_span(&tmp, dst, maskFormat, op, &clip), + choose_span(&tmp, dst, maskFormat, &clip), !was_clear && maskFormat && !operator_is_bounded(op)); skip: diff --git a/src/sna/sna_vertex.c b/src/sna/sna_vertex.c new file mode 100644 index 00000000..6755d9aa --- /dev/null +++ b/src/sna/sna_vertex.c @@ -0,0 +1,37 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#include "sna.h" + +#include <unistd.h> + +void sna_vertex_init(struct sna *sna) +{ + pthread_mutex_init(&sna->render.lock, NULL); + pthread_cond_init(&sna->render.wait, NULL); + sna->render.active = 0; +} |