summaryrefslogtreecommitdiff
path: root/src/sna
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2013-06-21 21:00:23 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2013-06-21 21:00:23 +0100
commit62e42de300275a668a326357d454062221714fe8 (patch)
tree40a613401fbdcecaf4c4cb0417e9dc214cbe7806 /src/sna
parent53c113c3cc2f8527debc185f0819139ca8637637 (diff)
sna: Determine swizzling once during initialisation and choose memcpy_to_tiled_x
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/sna')
-rw-r--r--src/sna/blt.c324
-rw-r--r--src/sna/kgem.c48
-rw-r--r--src/sna/kgem.h24
-rw-r--r--src/sna/sna.h7
-rw-r--r--src/sna/sna_accel.c16
-rw-r--r--src/sna/sna_io.c15
6 files changed, 312 insertions, 122 deletions
diff --git a/src/sna/blt.c b/src/sna/blt.c
index af876672..4dbd9e86 100644
--- a/src/sna/blt.c
+++ b/src/sna/blt.c
@@ -213,12 +213,12 @@ memcpy_blt(const void *src, void *dst, int bpp,
}
}
-fast_memcpy void
-memcpy_to_tiled_x(const void *src, void *dst, int bpp, int swizzling,
- int32_t src_stride, int32_t dst_stride,
- int16_t src_x, int16_t src_y,
- int16_t dst_x, int16_t dst_y,
- uint16_t width, uint16_t height)
+static fast_memcpy void
+memcpy_to_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
{
const unsigned tile_width = 512;
const unsigned tile_height = 8;
@@ -226,14 +226,14 @@ memcpy_to_tiled_x(const void *src, void *dst, int bpp, int swizzling,
const unsigned cpp = bpp / 8;
const unsigned stride_tiles = dst_stride / tile_width;
- const unsigned swizzle_pixels = (swizzling ? 64 : tile_width) / cpp;
+ const unsigned swizzle_pixels = tile_width / cpp;
const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
const unsigned tile_mask = (1 << tile_pixels) - 1;
unsigned x, y;
- DBG(("%s(bpp=%d, swizzling=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
- __FUNCTION__, bpp, swizzling, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
@@ -252,19 +252,71 @@ memcpy_to_tiled_x(const void *src, void *dst, int bpp, int swizzling,
offset = tile_row +
(dx >> tile_pixels) * tile_size +
(dx & tile_mask) * cpp;
- switch (swizzling) {
- case I915_BIT_6_SWIZZLE_NONE:
- break;
- case I915_BIT_6_SWIZZLE_9:
- offset ^= (offset >> 3) & 64;
- break;
- case I915_BIT_6_SWIZZLE_9_10:
- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
- break;
- case I915_BIT_6_SWIZZLE_9_11:
- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
- break;
- }
+ memcpy((char *)dst + offset, src_row, length * cpp);
+
+ src_row += length * cpp;
+ x -= length * cpp;
+ dx += length;
+ }
+ while (x >= 512) {
+ assert((dx & tile_mask) == 0);
+ offset = tile_row + (dx >> tile_pixels) * tile_size;
+
+ memcpy((char *)dst + offset, src_row, 512);
+
+ src_row += 512;
+ x -= 512;
+ dx += swizzle_pixels;
+ }
+ if (x) {
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ memcpy((char *)dst + offset, src_row, x);
+ }
+ }
+}
+
+fast_memcpy static void
+memcpy_to_tiled_x__swizzle_9(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ const unsigned tile_width = 512;
+ const unsigned tile_height = 8;
+ const unsigned tile_size = 4096;
+
+ const unsigned cpp = bpp / 8;
+ const unsigned stride_tiles = dst_stride / tile_width;
+ const unsigned swizzle_pixels = 64 / cpp;
+ const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+ const unsigned tile_mask = (1 << tile_pixels) - 1;
+
+ unsigned x, y;
+
+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+
+ src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
+
+ for (y = 0; y < height; ++y) {
+ const uint32_t dy = y + dst_y;
+ const uint32_t tile_row =
+ (dy / tile_height * stride_tiles * tile_size +
+ (dy & (tile_height-1)) * tile_width);
+ const uint8_t *src_row = (const uint8_t *)src + src_stride * y;
+ uint32_t dx = dst_x, offset;
+
+ x = width * cpp;
+ if (dx & (swizzle_pixels - 1)) {
+ const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels);
+ const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx;
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ offset ^= (offset >> 3) & 64;
memcpy((char *)dst + offset, src_row, length * cpp);
@@ -272,64 +324,184 @@ memcpy_to_tiled_x(const void *src, void *dst, int bpp, int swizzling,
x -= length * cpp;
dx += length;
}
- if (swizzling) {
- while (x >= 64) {
- offset = tile_row +
- (dx >> tile_pixels) * tile_size +
- (dx & tile_mask) * cpp;
- switch (swizzling) {
- case I915_BIT_6_SWIZZLE_9:
- offset ^= (offset >> 3) & 64;
- break;
- case I915_BIT_6_SWIZZLE_9_10:
- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
- break;
- case I915_BIT_6_SWIZZLE_9_11:
- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
- break;
- }
-
- memcpy((char *)dst + offset, src_row, 64);
-
- src_row += 64;
- x -= 64;
- dx += swizzle_pixels;
- }
- } else {
- while (x >= 512) {
- assert((dx & tile_mask) == 0);
- offset = tile_row + (dx >> tile_pixels) * tile_size;
+ while (x >= 64) {
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ offset ^= (offset >> 3) & 64;
- memcpy((char *)dst + offset, src_row, 512);
+ memcpy((char *)dst + offset, src_row, 64);
- src_row += 512;
- x -= 512;
- dx += swizzle_pixels;
- }
+ src_row += 64;
+ x -= 64;
+ dx += swizzle_pixels;
}
if (x) {
offset = tile_row +
(dx >> tile_pixels) * tile_size +
(dx & tile_mask) * cpp;
- switch (swizzling) {
- case I915_BIT_6_SWIZZLE_NONE:
- break;
- case I915_BIT_6_SWIZZLE_9:
- offset ^= (offset >> 3) & 64;
- break;
- case I915_BIT_6_SWIZZLE_9_10:
- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
- break;
- case I915_BIT_6_SWIZZLE_9_11:
- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
- break;
- }
+ offset ^= (offset >> 3) & 64;
+ memcpy((char *)dst + offset, src_row, x);
+ }
+ }
+}
+
+fast_memcpy static void
+memcpy_to_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ const unsigned tile_width = 512;
+ const unsigned tile_height = 8;
+ const unsigned tile_size = 4096;
+
+ const unsigned cpp = bpp / 8;
+ const unsigned stride_tiles = dst_stride / tile_width;
+ const unsigned swizzle_pixels = 64 / cpp;
+ const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+ const unsigned tile_mask = (1 << tile_pixels) - 1;
+
+ unsigned x, y;
+
+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+
+ src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
+ for (y = 0; y < height; ++y) {
+ const uint32_t dy = y + dst_y;
+ const uint32_t tile_row =
+ (dy / tile_height * stride_tiles * tile_size +
+ (dy & (tile_height-1)) * tile_width);
+ const uint8_t *src_row = (const uint8_t *)src + src_stride * y;
+ uint32_t dx = dst_x, offset;
+
+ x = width * cpp;
+ if (dx & (swizzle_pixels - 1)) {
+ const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels);
+ const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx;
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+
+ memcpy((char *)dst + offset, src_row, length * cpp);
+
+ src_row += length * cpp;
+ x -= length * cpp;
+ dx += length;
+ }
+ while (x >= 64) {
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+
+ memcpy((char *)dst + offset, src_row, 64);
+
+ src_row += 64;
+ x -= 64;
+ dx += swizzle_pixels;
+ }
+ if (x) {
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
memcpy((char *)dst + offset, src_row, x);
}
}
}
+fast_memcpy static void
+memcpy_to_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ const unsigned tile_width = 512;
+ const unsigned tile_height = 8;
+ const unsigned tile_size = 4096;
+
+ const unsigned cpp = bpp / 8;
+ const unsigned stride_tiles = dst_stride / tile_width;
+ const unsigned swizzle_pixels = 64 / cpp;
+ const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+ const unsigned tile_mask = (1 << tile_pixels) - 1;
+
+ unsigned x, y;
+
+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+
+ src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
+
+ for (y = 0; y < height; ++y) {
+ const uint32_t dy = y + dst_y;
+ const uint32_t tile_row =
+ (dy / tile_height * stride_tiles * tile_size +
+ (dy & (tile_height-1)) * tile_width);
+ const uint8_t *src_row = (const uint8_t *)src + src_stride * y;
+ uint32_t dx = dst_x, offset;
+
+ x = width * cpp;
+ if (dx & (swizzle_pixels - 1)) {
+ const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels);
+ const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx;
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+ memcpy((char *)dst + offset, src_row, length * cpp);
+
+ src_row += length * cpp;
+ x -= length * cpp;
+ dx += length;
+ }
+ while (x >= 64) {
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+
+ memcpy((char *)dst + offset, src_row, 64);
+
+ src_row += 64;
+ x -= 64;
+ dx += swizzle_pixels;
+ }
+ if (x) {
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+ memcpy((char *)dst + offset, src_row, x);
+ }
+ }
+}
+
+void choose_memcpy_to_tiled_x(struct kgem *kgem, int swizzling)
+{
+ switch (swizzling) {
+ default:
+ case I915_BIT_6_SWIZZLE_NONE:
+ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0;
+ break;
+ case I915_BIT_6_SWIZZLE_9:
+ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9;
+ break;
+ case I915_BIT_6_SWIZZLE_9_10:
+ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_10;
+ break;
+ case I915_BIT_6_SWIZZLE_9_11:
+ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_11;
+ break;
+ }
+}
+
void
memmove_box(const void *src, void *dst,
int bpp, int32_t stride,
@@ -561,10 +733,10 @@ memcpy_xor(const void *src, void *dst, int bpp,
while (i >= 16) {
__m128i xmm1, xmm2, xmm3, xmm4;
- xmm1 = xmm_load_128u((__m128i*)s + 0);
- xmm2 = xmm_load_128u((__m128i*)s + 1);
- xmm3 = xmm_load_128u((__m128i*)s + 2);
- xmm4 = xmm_load_128u((__m128i*)s + 3);
+ xmm1 = xmm_load_128u((const __m128i*)s + 0);
+ xmm2 = xmm_load_128u((const __m128i*)s + 1);
+ xmm3 = xmm_load_128u((const __m128i*)s + 2);
+ xmm4 = xmm_load_128u((const __m128i*)s + 3);
xmm_save_128((__m128i*)d + 0,
_mm_or_si128(xmm1, mask));
@@ -583,8 +755,8 @@ memcpy_xor(const void *src, void *dst, int bpp,
if (i & 8) {
__m128i xmm1, xmm2;
- xmm1 = xmm_load_128u((__m128i*)s + 0);
- xmm2 = xmm_load_128u((__m128i*)s + 1);
+ xmm1 = xmm_load_128u((const __m128i*)s + 0);
+ xmm2 = xmm_load_128u((const __m128i*)s + 1);
xmm_save_128((__m128i*)d + 0,
_mm_or_si128(xmm1, mask));
@@ -597,7 +769,7 @@ memcpy_xor(const void *src, void *dst, int bpp,
if (i & 4) {
xmm_save_128((__m128i*)d,
- _mm_or_si128(xmm_load_128u((__m128i*)s),
+ _mm_or_si128(xmm_load_128u((const __m128i*)s),
mask));
d += 4;
@@ -643,7 +815,7 @@ memcpy_xor(const void *src, void *dst, int bpp,
case 2:
do {
uint16_t *d = (uint16_t *)dst_bytes;
- uint16_t *s = (uint16_t *)src_bytes;
+ const uint16_t *s = (const uint16_t *)src_bytes;
for (i = 0; i < width; i++)
d[i] = (s[i] & and) | or;
@@ -656,7 +828,7 @@ memcpy_xor(const void *src, void *dst, int bpp,
case 4:
do {
uint32_t *d = (uint32_t *)dst_bytes;
- uint32_t *s = (uint32_t *)src_bytes;
+ const uint32_t *s = (const uint32_t *)src_bytes;
for (i = 0; i < width; i++)
d[i] = (s[i] & and) | or;
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 66dce479..b32ceee8 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -964,6 +964,39 @@ err:
return false;
}
+static void kgem_init_swizzling(struct kgem *kgem)
+{
+ struct drm_i915_gem_get_tiling tiling;
+
+#ifndef __x86_64__
+ /* Between a register starved compiler emitting attrocious code
+ * and the extra overhead in the kernel for managing the tight
+ * 32-bit address space, unless we have a 64-bit system,
+ * using memcpy_to_tiled_x() is extremely slow.
+ */
+ return;
+#endif
+
+ if (kgem->gen < 050) /* bit17 swizzling :( */
+ return;
+
+ VG_CLEAR(tiling);
+ tiling.handle = gem_create(kgem->fd, 1);
+ if (!tiling.handle)
+ return;
+
+ if (!gem_set_tiling(kgem->fd, tiling.handle, I915_TILING_X, 512))
+ goto out;
+
+ if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling))
+ goto out;
+
+ choose_memcpy_to_tiled_x(kgem, tiling.swizzle_mode);
+out:
+ gem_close(kgem->fd, tiling.handle);
+}
+
+
void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
{
struct drm_i915_gem_get_aperture aperture;
@@ -1212,6 +1245,8 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT;
if (kgem->has_pinned_batches)
kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED;
+
+ kgem_init_swizzling(kgem);
}
/* XXX hopefully a good approximation */
@@ -5797,19 +5832,6 @@ void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset)
}
}
-int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo)
-{
- struct drm_i915_gem_get_tiling tiling;
-
- VG_CLEAR(tiling);
- tiling.handle = bo->handle;
- if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling))
- return 0;
-
- assert(bo->tiling == tiling.tiling_mode);
- return tiling.swizzle_mode;
-}
-
struct kgem_bo *
kgem_replace_bo(struct kgem *kgem,
struct kgem_bo *src,
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 33a4db08..91a38f75 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -196,6 +196,12 @@ struct kgem {
void (*retire)(struct kgem *kgem);
void (*expire)(struct kgem *kgem);
+ void (*memcpy_to_tiled_x)(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height);
+
uint16_t reloc__self[256];
uint32_t batch[64*1024-8] page_aligned;
struct drm_i915_gem_exec_object2 exec[256] page_aligned;
@@ -286,7 +292,6 @@ struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem,
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format);
void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset);
-int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo);
bool kgem_retire(struct kgem *kgem);
@@ -693,4 +698,21 @@ static inline void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch)
}
#endif
+static inline void
+memcpy_to_tiled_x(struct kgem *kgem,
+ const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ return kgem->memcpy_to_tiled_x(src, dst, bpp,
+ src_stride, dst_stride,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height);
+}
+
+void choose_memcpy_to_tiled_x(struct kgem *kgem, int swizzling);
+
#endif /* KGEM_H */
diff --git a/src/sna/sna.h b/src/sna/sna.h
index da5d8af2..f720c64f 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -848,12 +848,7 @@ memcpy_blt(const void *src, void *dst, int bpp,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
uint16_t width, uint16_t height);
-void
-memcpy_to_tiled_x(const void *src, void *dst, int bpp, int swizzling,
- int32_t src_stride, int32_t dst_stride,
- int16_t src_x, int16_t src_y,
- int16_t dst_x, int16_t dst_y,
- uint16_t width, uint16_t height);
+
void
memmove_box(const void *src, void *dst,
int bpp, int32_t stride,
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 599cfc11..44b87cde 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -3868,15 +3868,7 @@ static inline void box32_add_rect(Box32Rec *box, const xRectangle *r)
static bool can_upload_tiled_x(struct kgem *kgem, struct kgem_bo *bo)
{
-#ifndef __x86_64__
- /* Between a register starved compiler emitting attrocious code
- * and the extra overhead in the kernel for managing the tight
- * 32-bit address space, unless we have a 64-bit system,
- * using memcpy_to_tiled_x() is extremely slow.
- */
- return false;
-#endif
- if (kgem->gen < 050) /* bit17 swizzling :( */
+ if (!kgem->memcpy_to_tiled_x)
return false;
if (bo->tiling != I915_TILING_X)
@@ -3896,7 +3888,6 @@ try_upload_tiled_x(PixmapPtr pixmap, RegionRec *region,
struct sna_pixmap *priv = sna_pixmap(pixmap);
BoxRec *box;
uint8_t *dst;
- int swizzle;
int n;
DBG(("%s: bo? %d, can tile? %d\n", __FUNCTION__,
@@ -3919,10 +3910,9 @@ try_upload_tiled_x(PixmapPtr pixmap, RegionRec *region,
DBG(("%s: upload(%d, %d, %d, %d) x %d\n", __FUNCTION__, x, y, w, h, n));
kgem_bo_sync__cpu(&sna->kgem, priv->gpu_bo);
- swizzle = kgem_bo_get_swizzling(&sna->kgem, priv->gpu_bo);
do {
- memcpy_to_tiled_x(bits, dst,
- pixmap->drawable.bitsPerPixel, swizzle,
+ memcpy_to_tiled_x(&sna->kgem, bits, dst,
+ pixmap->drawable.bitsPerPixel,
stride, priv->gpu_bo->pitch,
box->x1 - x, box->y1 - y,
box->x1, box->y1,
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index 1ec1a60f..e51c0335 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -477,16 +477,7 @@ fallback:
static bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo)
{
-#ifndef __x86_64__
- /* Between a register starved compiler emitting attrocious code
- * and the extra overhead in the kernel for managing the tight
- * 32-bit address space, unless we have a 64-bit system,
- * using memcpy_to_tiled_x() is extremely slow.
- */
- return false;
-#endif
-
- if (kgem->gen < 050) /* bit17 swizzling :( */
+ if (!kgem->memcpy_to_tiled_x)
return false;
if (bo->tiling != I915_TILING_X)
@@ -505,7 +496,6 @@ write_boxes_inplace__tiled(struct kgem *kgem,
const BoxRec *box, int n)
{
uint8_t *dst;
- int swizzle;
assert(bo->tiling == I915_TILING_X);
@@ -514,9 +504,8 @@ write_boxes_inplace__tiled(struct kgem *kgem,
return false;
kgem_bo_sync__cpu(kgem, bo);
- swizzle = kgem_bo_get_swizzling(kgem, bo);
do {
- memcpy_to_tiled_x(src, dst, bpp, swizzle, stride, bo->pitch,
+ memcpy_to_tiled_x(kgem, src, dst, bpp, stride, bo->pitch,
box->x1 + src_dx, box->y1 + src_dy,
box->x1 + dst_dx, box->y1 + dst_dy,
box->x2 - box->x1, box->y2 - box->y1);