summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2013-08-01 10:52:44 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2013-08-01 11:18:18 +0100
commit246911d742569eed6675698c1e51032ba50917d3 (patch)
tree2c9cc09b269fd810544678ca5d0bf27dc939b203 /src
parent8174c56c3ad6f1b0e6cd432c888f3eaca91159b4 (diff)
sna: Don't force inline string-ops for the general memcpy_blt routine
As we need optimal copy code for the general case, where unlike swizzling the run lengths are not known before hand, we need to call the arch specific routines from glibc. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src')
-rw-r--r--src/sna/blt.c10
-rw-r--r--src/sna/compiler.h4
2 files changed, 13 insertions, 1 deletions
diff --git a/src/sna/blt.c b/src/sna/blt.c
index 4a330938..4c27678d 100644
--- a/src/sna/blt.c
+++ b/src/sna/blt.c
@@ -138,7 +138,7 @@ xmm_save_128(__m128i *dst, __m128i data)
}
#endif
-fast_memcpy void
+fast void
memcpy_blt(const void *src, void *dst, int bpp,
int32_t src_stride, int32_t dst_stride,
int16_t src_x, int16_t src_y,
@@ -202,6 +202,14 @@ memcpy_blt(const void *src, void *dst, int bpp,
dst_bytes += dst_stride;
} while (--height);
break;
+ case 16:
+ do {
+ ((uint64_t *)dst_bytes)[0] = ((const uint64_t *)src_bytes)[0];
+ ((uint64_t *)dst_bytes)[1] = ((const uint64_t *)src_bytes)[1];
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
default:
do {
diff --git a/src/sna/compiler.h b/src/sna/compiler.h
index 9c1b2f1e..2f5dfc77 100644
--- a/src/sna/compiler.h
+++ b/src/sna/compiler.h
@@ -66,6 +66,10 @@
#endif
#if HAS_GCC(4, 6) && defined(__OPTIMIZE__)
+#define fast __attribute__((optimize("Ofast")))
+#endif
+
+#if HAS_GCC(4, 6) && defined(__OPTIMIZE__)
#define fast_memcpy __attribute__((optimize("Ofast"))) __attribute__((target("inline-all-stringops")))
#elif HAS_GCC(4, 5) && defined(__OPTIMIZE__)
#define fast_memcpy __attribute__((target("inline-all-stringops")))