diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2014-06-28 14:18:23 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2014-06-28 14:18:23 +0100 |
commit | 2a0176379f0ff290d276adc72d44dfddafd96da5 (patch) | |
tree | ab7f33ae453f8806965bbf4208925a1e14190b28 | |
parent | 24cb50e53c789cb7a05d59ad103dda1c3a009485 (diff) |
sna: Micro-optimise unswizzling tiling/detiling
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | src/sna/blt.c | 152 |
1 files changed, 68 insertions, 84 deletions
diff --git a/src/sna/blt.c b/src/sna/blt.c index b61f88b2..b5bfee69 100644 --- a/src/sna/blt.c +++ b/src/sna/blt.c @@ -233,55 +233,47 @@ memcpy_to_tiled_x__swizzle_0(const void *src, void *dst, int bpp, const unsigned tile_size = 4096; const unsigned cpp = bpp / 8; - const unsigned stride_tiles = dst_stride / tile_width; - const unsigned swizzle_pixels = tile_width / cpp; - const unsigned tile_pixels = ffs(tile_width / cpp) - 1; - const unsigned tile_mask = (1 << tile_pixels) - 1; - - unsigned x, y; + const unsigned tile_pixels = tile_width / cpp; + const unsigned tile_shift = ffs(tile_pixels) - 1; + const unsigned tile_mask = tile_pixels - 1; DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + assert(src != dst); - src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; - - for (y = 0; y < height; ++y) { - const uint32_t dy = y + dst_y; - const uint32_t tile_row = - (dy / tile_height * stride_tiles * tile_size + - (dy & (tile_height-1)) * tile_width); - const uint8_t *src_row = (const uint8_t *)src + src_stride * y; - uint32_t dx = dst_x, offset; - - x = width * cpp; - if (dx & (swizzle_pixels - 1)) { - const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels); - const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx; - offset = tile_row + - (dx >> tile_pixels) * tile_size + - (dx & tile_mask) * cpp; - memcpy((char *)dst + offset, src_row, length * cpp); - - src_row += length * cpp; - x -= length * cpp; - dx += length; + if (src_x | src_y) + src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; + assert(src_stride >= width * cpp); + src_stride -= width * cpp; + + while (height--) { + unsigned w = width * cpp; + uint8_t *tile_row = dst; + + tile_row += dst_y / tile_height * dst_stride * tile_height; + tile_row += (dst_y & (tile_height-1)) * tile_width; + if (dst_x) { + tile_row += (dst_x >> tile_shift) * tile_size; + if (dst_x & tile_mask) { + const unsigned x = (dst_x & tile_mask) * cpp; + const unsigned len = min(tile_width - x, w); + memcpy(tile_row + x, src, len); + + tile_row += tile_size; + src = (const uint8_t *)src + len; + w -= len; + } } - while (x >= 512) { - assert((dx & tile_mask) == 0); - offset = tile_row + (dx >> tile_pixels) * tile_size; - - memcpy((char *)dst + offset, src_row, 512); + while (w >= tile_width) { + memcpy(tile_row, src, tile_width); - src_row += 512; - x -= 512; - dx += swizzle_pixels; - } - if (x) { - offset = tile_row + - (dx >> tile_pixels) * tile_size + - (dx & tile_mask) * cpp; - memcpy((char *)dst + offset, src_row, x); + tile_row += tile_size; + src = (const uint8_t *)src + tile_width; + w -= tile_width; } + memcpy(tile_row, src, w); + src = (const uint8_t *)src + src_stride + w; + dst_y++; } } @@ -297,55 +289,47 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp, const unsigned tile_size = 4096; const unsigned cpp = bpp / 8; - const unsigned stride_tiles = src_stride / tile_width; - const unsigned swizzle_pixels = tile_width / cpp; - const unsigned tile_pixels = ffs(tile_width / cpp) - 1; - const unsigned tile_mask = (1 << tile_pixels) - 1; - - unsigned x, y; + const unsigned tile_pixels = tile_width / cpp; + const unsigned tile_shift = ffs(tile_pixels) - 1; + const unsigned tile_mask = tile_pixels - 1; DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + assert(src != dst); - dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; - - for (y = 0; y < height; ++y) { - const uint32_t sy = y + src_y; - const uint32_t tile_row = - (sy / tile_height * stride_tiles * tile_size + - (sy & (tile_height-1)) * tile_width); - uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; - uint32_t sx = src_x, offset; - - x = width * cpp; - if (sx & (swizzle_pixels - 1)) { - const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); - const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; - offset = tile_row + - (sx >> tile_pixels) * tile_size + - (sx & tile_mask) * cpp; - memcpy(dst_row, (const char *)src + offset, length * cpp); - - dst_row += length * cpp; - x -= length * cpp; - sx += length; + if (dst_x | dst_y) + dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; + assert(dst_stride >= width * cpp); + dst_stride -= width * cpp; + + while (height--) { + unsigned w = width * cpp; + const uint8_t *tile_row = src; + + tile_row += src_y / tile_height * src_stride * tile_height; + tile_row += (src_y & (tile_height-1)) * tile_width; + if (src_x) { + tile_row += (src_x >> tile_shift) * tile_size; + if (src_x & tile_mask) { + const unsigned x = (src_x & tile_mask) * cpp; + const unsigned len = min(tile_width - x, w); + memcpy(dst, tile_row + x, len); + + tile_row += tile_size; + dst = (uint8_t *)dst + len; + w -= len; + } } - while (x >= 512) { - assert((sx & tile_mask) == 0); - offset = tile_row + (sx >> tile_pixels) * tile_size; - - memcpy(dst_row, (const char *)src + offset, 512); + while (w >= tile_width) { + memcpy(dst, tile_row, tile_width); - dst_row += 512; - x -= 512; - sx += swizzle_pixels; - } - if (x) { - offset = tile_row + - (sx >> tile_pixels) * tile_size + - (sx & tile_mask) * cpp; - memcpy(dst_row, (const char *)src + offset, x); + tile_row += tile_size; + dst = (uint8_t *)dst + tile_width; + w -= tile_width; } + memcpy(dst, tile_row, w); + dst = (uint8_t *)dst + dst_stride + w; + src_y++; } } |