summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/sna/blt.c42
-rw-r--r--src/sna/compiler.h5
2 files changed, 31 insertions, 16 deletions
diff --git a/src/sna/blt.c b/src/sna/blt.c
index 2dae9c2b..39e4149b 100644
--- a/src/sna/blt.c
+++ b/src/sna/blt.c
@@ -258,7 +258,8 @@ memcpy_to_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
if (dst_x & tile_mask) {
const unsigned x = (dst_x & tile_mask) * cpp;
const unsigned len = min(tile_width - x, w);
- memcpy(tile_row + x, src, len);
+ memcpy(assume_misaligned(tile_row + x, tile_width, x),
+ src, len);
tile_row += tile_size;
src = (const uint8_t *)src + len;
@@ -266,13 +267,14 @@ memcpy_to_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
}
}
while (w >= tile_width) {
- memcpy(tile_row, src, tile_width);
+ memcpy(assume_aligned(tile_row, tile_width),
+ src, tile_width);
tile_row += tile_size;
src = (const uint8_t *)src + tile_width;
w -= tile_width;
}
- memcpy(tile_row, src, w);
+ memcpy(assume_aligned(tile_row, tile_width), src, w);
src = (const uint8_t *)src + src_stride + w;
dst_y++;
}
@@ -314,7 +316,9 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
if (src_x & tile_mask) {
const unsigned x = (src_x & tile_mask) * cpp;
const unsigned len = min(tile_width - x, w);
- memcpy(dst, tile_row + x, len);
+ memcpy(dst,
+ assume_misaligned(tile_row, tile_width, x),
+ len);
tile_row += tile_size;
dst = (uint8_t *)dst + len;
@@ -322,13 +326,15 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
}
}
while (w >= tile_width) {
- memcpy(dst, tile_row, tile_width);
+ memcpy(dst,
+ assume_aligned(tile_row, tile_width),
+ tile_width);
tile_row += tile_size;
dst = (uint8_t *)dst + tile_width;
w -= tile_width;
}
- memcpy(dst, tile_row, w);
+ memcpy(dst, assume_aligned(tile_row, tile_width), w);
dst = (uint8_t *)dst + dst_stride + w;
src_y++;
}
@@ -379,7 +385,8 @@ memcpy_to_tiled_x__##swizzle (const void *src, void *dst, int bpp, \
tile_row + \
(dx >> tile_pixels) * tile_size + \
(dx & tile_mask) * cpp; \
- memcpy((char *)dst + swizzle(offset), src_row, 64); \
+ memcpy(assume_aligned((char *)dst+swizzle(offset),64), \
+ src_row, 64); \
src_row += 64; \
x -= 64; \
dx += swizzle_pixels; \
@@ -389,7 +396,7 @@ memcpy_to_tiled_x__##swizzle (const void *src, void *dst, int bpp, \
tile_row + \
(dx >> tile_pixels) * tile_size + \
(dx & tile_mask) * cpp; \
- memcpy((char *)dst + swizzle(offset), src_row, x); \
+ memcpy(assume_aligned((char *)dst + swizzle(offset), 64), src_row, x); \
} \
} \
}
@@ -439,7 +446,7 @@ memcpy_from_tiled_x__##swizzle (const void *src, void *dst, int bpp, \
tile_row + \
(sx >> tile_pixels) * tile_size + \
(sx & tile_mask) * cpp; \
- memcpy(dst_row, (const char *)src + swizzle(offset), 64); \
+ memcpy(dst_row, assume_aligned((const char *)src + swizzle(offset), 64), 64); \
dst_row += 64; \
x -= 64; \
sx += swizzle_pixels; \
@@ -449,7 +456,7 @@ memcpy_from_tiled_x__##swizzle (const void *src, void *dst, int bpp, \
tile_row + \
(sx >> tile_pixels) * tile_size + \
(sx & tile_mask) * cpp; \
- memcpy(dst_row, (const char *)src + swizzle(offset), x); \
+ memcpy(dst_row, assume_aligned((const char *)src + swizzle(offset), 64), x); \
} \
} \
}
@@ -510,7 +517,7 @@ memcpy_to_tiled_x__gen2(const void *src, void *dst, int bpp,
if (dst_x & tile_mask) {
const unsigned x = (dst_x & tile_mask) * cpp;
const unsigned len = min(tile_width - x, w);
- memcpy(tile_row + x, src, len);
+ memcpy(assume_misaligned(tile_row + x, tile_width, x), src, len);
tile_row += tile_size;
src = (const uint8_t *)src + len;
@@ -518,13 +525,14 @@ memcpy_to_tiled_x__gen2(const void *src, void *dst, int bpp,
}
}
while (w >= tile_width) {
- memcpy(tile_row, src, tile_width);
+ memcpy(assume_aligned(tile_row, tile_width),
+ src, tile_width);
tile_row += tile_size;
src = (const uint8_t *)src + tile_width;
w -= tile_width;
}
- memcpy(tile_row, src, w);
+ memcpy(assume_aligned(tile_row, tile_width), src, w);
src = (const uint8_t *)src + src_stride + w;
dst_y++;
}
@@ -566,7 +574,7 @@ memcpy_from_tiled_x__gen2(const void *src, void *dst, int bpp,
if (src_x & tile_mask) {
const unsigned x = (src_x & tile_mask) * cpp;
const unsigned len = min(tile_width - x, w);
- memcpy(dst, tile_row + x, len);
+ memcpy(dst, assume_misaligned(tile_row + x, tile_width, x), len);
tile_row += tile_size;
dst = (uint8_t *)dst + len;
@@ -574,13 +582,15 @@ memcpy_from_tiled_x__gen2(const void *src, void *dst, int bpp,
}
}
while (w >= tile_width) {
- memcpy(dst, tile_row, tile_width);
+ memcpy(dst,
+ assume_aligned(tile_row, tile_width),
+ tile_width);
tile_row += tile_size;
dst = (uint8_t *)dst + tile_width;
w -= tile_width;
}
- memcpy(dst, tile_row, w);
+ memcpy(dst, assume_aligned(tile_row, tile_width), w);
dst = (uint8_t *)dst + dst_stride + w;
src_y++;
}
diff --git a/src/sna/compiler.h b/src/sna/compiler.h
index c7231370..a1634b35 100644
--- a/src/sna/compiler.h
+++ b/src/sna/compiler.h
@@ -71,6 +71,11 @@
#if HAS_GCC(4, 7)
#define avx2 fast __attribute__((target("avx2,avx,sse4.2,sse2,fpmath=sse")))
+#define assume_aligned(ptr, align) __builtin_assume_aligned((ptr), (align))
+#define assume_misaligned(ptr, align, offset) __builtin_assume_aligned((ptr), (align), (offset))
+#else
+#define assume_aligned(ptr, align) (ptr)
+#define assume_misaligned(ptr, align, offset) (ptr)
#endif
#if HAS_GCC(4, 5) && defined(__OPTIMIZE__)