diff options
Diffstat (limited to 'lib/libGL/gallium/drivers/llvmpipe/generated/lp_tile_soa.c')
-rw-r--r-- | lib/libGL/gallium/drivers/llvmpipe/generated/lp_tile_soa.c | 4936 |
1 files changed, 4936 insertions, 0 deletions
diff --git a/lib/libGL/gallium/drivers/llvmpipe/generated/lp_tile_soa.c b/lib/libGL/gallium/drivers/llvmpipe/generated/lp_tile_soa.c new file mode 100644 index 000000000..936bad915 --- /dev/null +++ b/lib/libGL/gallium/drivers/llvmpipe/generated/lp_tile_soa.c @@ -0,0 +1,4936 @@ +/* This file is autogenerated by lp_tile_soa.py from u_format.csv. Do not edit directly. */ + +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Pixel format accessor functions. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include "pipe/p_compiler.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_half.h" +#include "util/u_cpu_detect.h" +#include "lp_tile_soa.h" + +#ifdef DEBUG +unsigned lp_tile_unswizzle_count = 0; +unsigned lp_tile_swizzle_count = 0; +#endif + +const unsigned char +tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = { + { 0, 1, 4, 5}, + { 2, 3, 6, 7}, + { 8, 9, 12, 13}, + { 10, 11, 14, 15} +}; + +/* Note: these lookup tables could be replaced with some + * bit-twiddling code, but this is a little faster. + */ +static unsigned tile_x_offset[TILE_VECTOR_WIDTH * TILE_VECTOR_HEIGHT] = { + 0, 1, 0, 1, 2, 3, 2, 3, + 0, 1, 0, 1, 2, 3, 2, 3 +}; + +static unsigned tile_y_offset[TILE_VECTOR_WIDTH * TILE_VECTOR_HEIGHT] = { + 0, 0, 1, 1, 0, 0, 1, 1, + 2, 2, 3, 3, 2, 2, 3, 3 +}; + + +#if defined(PIPE_ARCH_SSE) + +#include "util/u_sse.h" + +static ALWAYS_INLINE void +swz4( const __m128i * restrict x, + const __m128i * restrict y, + const __m128i * restrict z, + const __m128i * restrict w, + __m128i * restrict a, + __m128i * restrict b, + __m128i * restrict c, + __m128i * restrict d) +{ + __m128i i, j, k, l; + __m128i m, n, o, p; + __m128i e, f, g, h; + + m = _mm_unpacklo_epi8(*x,*y); + n = _mm_unpackhi_epi8(*x,*y); + o = _mm_unpacklo_epi8(*z,*w); + p = _mm_unpackhi_epi8(*z,*w); + + i = _mm_unpacklo_epi16(m,n); + j = _mm_unpackhi_epi16(m,n); + k = _mm_unpacklo_epi16(o,p); + l = _mm_unpackhi_epi16(o,p); + + e = _mm_unpacklo_epi8(i,j); + f = _mm_unpackhi_epi8(i,j); + g = _mm_unpacklo_epi8(k,l); + h = _mm_unpackhi_epi8(k,l); + + *a = _mm_unpacklo_epi64(e,g); + *b = _mm_unpackhi_epi64(e,g); + *c = _mm_unpacklo_epi64(f,h); + *d = _mm_unpackhi_epi64(f,h); +} + +static ALWAYS_INLINE void +unswz4( const __m128i * restrict a, + const __m128i * restrict b, + const __m128i * restrict c, + const __m128i * restrict d, + __m128i * restrict x, + __m128i * restrict y, + __m128i * restrict z, + __m128i * restrict w) +{ + __m128i i, j, k, l; + __m128i m, n, o, p; + + i = _mm_unpacklo_epi8(*a,*b); + j = _mm_unpackhi_epi8(*a,*b); + k = _mm_unpacklo_epi8(*c,*d); + l = _mm_unpackhi_epi8(*c,*d); + + m = _mm_unpacklo_epi16(i,k); + n = _mm_unpackhi_epi16(i,k); + o = _mm_unpacklo_epi16(j,l); + p = _mm_unpackhi_epi16(j,l); + + *x = _mm_unpacklo_epi64(m,n); + *y = _mm_unpackhi_epi64(m,n); + *z = _mm_unpacklo_epi64(o,p); + *w = _mm_unpackhi_epi64(o,p); +} + +static void +lp_tile_b8g8r8a8_unorm_swizzle_4ub_sse2(uint8_t * restrict dst, + const uint8_t * restrict src, unsigned src_stride, + unsigned x0, unsigned y0) +{ + __m128i *dst128 = (__m128i *) dst; + unsigned x, y; + + src += y0 * src_stride; + src += x0 * sizeof(uint32_t); + + for (y = 0; y < TILE_SIZE; y += 4) { + const uint8_t *src_row = src; + + for (x = 0; x < TILE_SIZE; x += 4) { + swz4((const __m128i *) (src_row + 0 * src_stride), + (const __m128i *) (src_row + 1 * src_stride), + (const __m128i *) (src_row + 2 * src_stride), + (const __m128i *) (src_row + 3 * src_stride), + dst128 + 2, /* b */ + dst128 + 1, /* g */ + dst128 + 0, /* r */ + dst128 + 3); /* a */ + + dst128 += 4; + src_row += sizeof(__m128i); + } + + src += 4 * src_stride; + } +} + +static void +lp_tile_b8g8r8a8_unorm_unswizzle_4ub_sse2(const uint8_t * restrict src, + uint8_t * restrict dst, unsigned dst_stride, + unsigned x0, unsigned y0) +{ + unsigned int x, y; + const __m128i *src128 = (const __m128i *) src; + + dst += y0 * dst_stride; + dst += x0 * sizeof(uint32_t); + + for (y = 0; y < TILE_SIZE; y += 4) { + const uint8_t *dst_row = dst; + + for (x = 0; x < TILE_SIZE; x += 4) { + unswz4( &src128[2], /* b */ + &src128[1], /* g */ + &src128[0], /* r */ + &src128[3], /* a */ + (__m128i *) (dst_row + 0 * dst_stride), + (__m128i *) (dst_row + 1 * dst_stride), + (__m128i *) (dst_row + 2 * dst_stride), + (__m128i *) (dst_row + 3 * dst_stride)); + + src128 += 4; + dst_row += sizeof(__m128i);; + } + + dst += 4 * dst_stride; + } +} + +#endif /* PIPE_ARCH_SSE */ + +static void +lp_tile_none_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*1); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_b8g8r8a8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t b; + uint8_t g; + uint8_t r; + uint8_t a; + b = (*src_pixel++); + g = (*src_pixel++); + r = (*src_pixel++); + a = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_b8g8r8x8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t b; + uint8_t g; + uint8_t r; + b = (*src_pixel++); + g = (*src_pixel++); + r = (*src_pixel++); + ++src_pixel; + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_a8r8g8b8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t a; + uint8_t r; + uint8_t g; + uint8_t b; + a = (*src_pixel++); + r = (*src_pixel++); + g = (*src_pixel++); + b = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_x8r8g8b8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + ++src_pixel; + r = (*src_pixel++); + g = (*src_pixel++); + b = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_a8b8g8r8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t a; + uint8_t b; + uint8_t g; + uint8_t r; + a = (*src_pixel++); + b = (*src_pixel++); + g = (*src_pixel++); + r = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_x8b8g8r8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t b; + uint8_t g; + uint8_t r; + ++src_pixel; + b = (*src_pixel++); + g = (*src_pixel++); + r = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8g8b8x8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = (*src_pixel++); + g = (*src_pixel++); + b = (*src_pixel++); + ++src_pixel; + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_b5g5r5x1_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint16_t pixel = *src_pixel++; + uint8_t b = (uint8_t)(((uint32_t)(pixel & 0x1f)) * 0xff / 0x1f); + uint8_t g = (uint8_t)(((uint32_t)((pixel >> 5) & 0x1f)) * 0xff / 0x1f); + uint8_t r = (uint8_t)(((uint32_t)((pixel >> 10) & 0x1f)) * 0xff / 0x1f); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_b5g5r5a1_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint16_t pixel = *src_pixel++; + uint8_t b = (uint8_t)(((uint32_t)(pixel & 0x1f)) * 0xff / 0x1f); + uint8_t g = (uint8_t)(((uint32_t)((pixel >> 5) & 0x1f)) * 0xff / 0x1f); + uint8_t r = (uint8_t)(((uint32_t)((pixel >> 10) & 0x1f)) * 0xff / 0x1f); + uint8_t a = (uint8_t)(((uint32_t)(pixel >> 15)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_b4g4r4a4_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint16_t pixel = *src_pixel++; + uint8_t b = (uint8_t)(((uint32_t)(pixel & 0xf)) * 0xff / 0xf); + uint8_t g = (uint8_t)(((uint32_t)((pixel >> 4) & 0xf)) * 0xff / 0xf); + uint8_t r = (uint8_t)(((uint32_t)((pixel >> 8) & 0xf)) * 0xff / 0xf); + uint8_t a = (uint8_t)(((uint32_t)(pixel >> 12)) * 0xff / 0xf); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_b4g4r4x4_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint16_t pixel = *src_pixel++; + uint8_t b = (uint8_t)(((uint32_t)(pixel & 0xf)) * 0xff / 0xf); + uint8_t g = (uint8_t)(((uint32_t)((pixel >> 4) & 0xf)) * 0xff / 0xf); + uint8_t r = (uint8_t)(((uint32_t)((pixel >> 8) & 0xf)) * 0xff / 0xf); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_b5g6r5_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint16_t pixel = *src_pixel++; + uint8_t b = (uint8_t)(((uint32_t)(pixel & 0x1f)) * 0xff / 0x1f); + uint8_t g = (uint8_t)(((uint32_t)((pixel >> 5) & 0x3f)) * 0xff / 0x3f); + uint8_t r = (uint8_t)(((uint32_t)(pixel >> 11)) * 0xff / 0x1f); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r10g10b10a2_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint32_t pixel = *src_pixel++; + uint8_t r = (uint8_t)((pixel & 0x3ff) >> 2); + uint8_t g = (uint8_t)(((pixel >> 10) & 0x3ff) >> 2); + uint8_t b = (uint8_t)(((pixel >> 20) & 0x3ff) >> 2); + uint8_t a = (uint8_t)(((uint32_t)(pixel >> 30)) * 0xff / 0x3); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_b10g10r10a2_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint32_t pixel = *src_pixel++; + uint8_t b = (uint8_t)((pixel & 0x3ff) >> 2); + uint8_t g = (uint8_t)(((pixel >> 10) & 0x3ff) >> 2); + uint8_t r = (uint8_t)(((pixel >> 20) & 0x3ff) >> 2); + uint8_t a = (uint8_t)(((uint32_t)(pixel >> 30)) * 0xff / 0x3); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_l8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*1); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t rgb; + rgb = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = rgb; /* r */ + TILE_PIXEL(dst, x, y, 1) = rgb; /* g */ + TILE_PIXEL(dst, x, y, 2) = rgb; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_a8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*1); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t a; + a = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = 0; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_i8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*1); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t rgba; + rgba = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = rgba; /* r */ + TILE_PIXEL(dst, x, y, 1) = rgba; /* g */ + TILE_PIXEL(dst, x, y, 2) = rgba; /* b */ + TILE_PIXEL(dst, x, y, 3) = rgba; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_l4a4_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*1); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t pixel = *src_pixel++; + uint8_t rgb = (uint8_t)(((uint32_t)(pixel & 0xf)) * 0xff / 0xf); + uint8_t a = (uint8_t)(((uint32_t)(pixel >> 4)) * 0xff / 0xf); + TILE_PIXEL(dst, x, y, 0) = rgb; /* r */ + TILE_PIXEL(dst, x, y, 1) = rgb; /* g */ + TILE_PIXEL(dst, x, y, 2) = rgb; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_l8a8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t rgb; + uint8_t a; + rgb = (*src_pixel++); + a = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = rgb; /* r */ + TILE_PIXEL(dst, x, y, 1) = rgb; /* g */ + TILE_PIXEL(dst, x, y, 2) = rgb; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_l16_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t rgb; + rgb = (uint8_t)((*src_pixel++) >> 8); + TILE_PIXEL(dst, x, y, 0) = rgb; /* r */ + TILE_PIXEL(dst, x, y, 1) = rgb; /* g */ + TILE_PIXEL(dst, x, y, 2) = rgb; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_l8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*1); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t rgb; + rgb = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = rgb; /* r */ + TILE_PIXEL(dst, x, y, 1) = rgb; /* g */ + TILE_PIXEL(dst, x, y, 2) = rgb; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_l8a8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t rgb; + uint8_t a; + rgb = (*src_pixel++); + a = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = rgb; /* r */ + TILE_PIXEL(dst, x, y, 1) = rgb; /* g */ + TILE_PIXEL(dst, x, y, 2) = rgb; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8g8b8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*3); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = (*src_pixel++); + g = (*src_pixel++); + b = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8g8b8a8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + r = (*src_pixel++); + g = (*src_pixel++); + b = (*src_pixel++); + a = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_a8b8g8r8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t a; + uint8_t b; + uint8_t g; + uint8_t r; + a = (*src_pixel++); + b = (*src_pixel++); + g = (*src_pixel++); + r = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_x8b8g8r8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t b; + uint8_t g; + uint8_t r; + ++src_pixel; + b = (*src_pixel++); + g = (*src_pixel++); + r = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_b8g8r8a8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t b; + uint8_t g; + uint8_t r; + uint8_t a; + b = (*src_pixel++); + g = (*src_pixel++); + r = (*src_pixel++); + a = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_b8g8r8x8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t b; + uint8_t g; + uint8_t r; + b = (*src_pixel++); + g = (*src_pixel++); + r = (*src_pixel++); + ++src_pixel; + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_a8r8g8b8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t a; + uint8_t r; + uint8_t g; + uint8_t b; + a = (*src_pixel++); + r = (*src_pixel++); + g = (*src_pixel++); + b = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_x8r8g8b8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + ++src_pixel; + r = (*src_pixel++); + g = (*src_pixel++); + b = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8sg8sb8ux8u_norm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int8_t *src_pixel = (const int8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f); + g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f); + b = (*src_pixel++); + ++src_pixel; + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r10sg10sb10sa2u_norm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint32_t pixel = *src_pixel++; + uint8_t r = (uint8_t)((pixel & 0x3ff) >> 1); + uint8_t g = (uint8_t)(((pixel >> 10) & 0x3ff) >> 1); + uint8_t b = (uint8_t)(((pixel >> 20) & 0x3ff) >> 1); + uint8_t a = (uint8_t)(((uint32_t)(pixel >> 30)) * 0xff / 0x3); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r5sg5sb6u_norm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint16_t pixel = *src_pixel++; + uint8_t r = (uint8_t)(((uint32_t)(pixel & 0x1f)) * 0xff / 0xf); + uint8_t g = (uint8_t)(((uint32_t)((pixel >> 5) & 0x1f)) * 0xff / 0xf); + uint8_t b = (uint8_t)(((uint32_t)(pixel >> 10)) * 0xff / 0x3f); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r10g10b10a2_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint32_t pixel = *src_pixel++; + uint8_t r = (uint8_t)(((uint32_t)(pixel & 0x3ff)) * 0xff / 0x1); + uint8_t g = (uint8_t)(((uint32_t)((pixel >> 10) & 0x3ff)) * 0xff / 0x1); + uint8_t b = (uint8_t)(((uint32_t)((pixel >> 20) & 0x3ff)) * 0xff / 0x1); + uint8_t a = (uint8_t)(((uint32_t)(pixel >> 30)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r64_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const double *src_pixel = (const double *)(src_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + r = (uint8_t)((*src_pixel++) * 0xff); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r64g64_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const double *src_pixel = (const double *)(src_row + x0*16); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + r = (uint8_t)((*src_pixel++) * 0xff); + g = (uint8_t)((*src_pixel++) * 0xff); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r64g64b64_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const double *src_pixel = (const double *)(src_row + x0*24); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = (uint8_t)((*src_pixel++) * 0xff); + g = (uint8_t)((*src_pixel++) * 0xff); + b = (uint8_t)((*src_pixel++) * 0xff); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r64g64b64a64_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const double *src_pixel = (const double *)(src_row + x0*32); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + r = (uint8_t)((*src_pixel++) * 0xff); + g = (uint8_t)((*src_pixel++) * 0xff); + b = (uint8_t)((*src_pixel++) * 0xff); + a = (uint8_t)((*src_pixel++) * 0xff); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const float *src_pixel = (const float *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + r = float_to_ubyte((*src_pixel++)); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32g32_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const float *src_pixel = (const float *)(src_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + r = float_to_ubyte((*src_pixel++)); + g = float_to_ubyte((*src_pixel++)); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32g32b32_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const float *src_pixel = (const float *)(src_row + x0*12); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = float_to_ubyte((*src_pixel++)); + g = float_to_ubyte((*src_pixel++)); + b = float_to_ubyte((*src_pixel++)); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32g32b32a32_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const float *src_pixel = (const float *)(src_row + x0*16); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + r = float_to_ubyte((*src_pixel++)); + g = float_to_ubyte((*src_pixel++)); + b = float_to_ubyte((*src_pixel++)); + a = float_to_ubyte((*src_pixel++)); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + r = (uint8_t)((*src_pixel++) >> 24); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32g32_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + r = (uint8_t)((*src_pixel++) >> 24); + g = (uint8_t)((*src_pixel++) >> 24); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32g32b32_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*12); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = (uint8_t)((*src_pixel++) >> 24); + g = (uint8_t)((*src_pixel++) >> 24); + b = (uint8_t)((*src_pixel++) >> 24); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32g32b32a32_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*16); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + r = (uint8_t)((*src_pixel++) >> 24); + g = (uint8_t)((*src_pixel++) >> 24); + b = (uint8_t)((*src_pixel++) >> 24); + a = (uint8_t)((*src_pixel++) >> 24); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + r = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32g32_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + r = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32g32b32_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*12); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + b = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32g32b32a32_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*16); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + r = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + b = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + a = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int32_t *src_pixel = (const int32_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + r = (uint8_t)((*src_pixel++) >> 23); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32g32_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int32_t *src_pixel = (const int32_t *)(src_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + r = (uint8_t)((*src_pixel++) >> 23); + g = (uint8_t)((*src_pixel++) >> 23); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32g32b32_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int32_t *src_pixel = (const int32_t *)(src_row + x0*12); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = (uint8_t)((*src_pixel++) >> 23); + g = (uint8_t)((*src_pixel++) >> 23); + b = (uint8_t)((*src_pixel++) >> 23); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32g32b32a32_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int32_t *src_pixel = (const int32_t *)(src_row + x0*16); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + r = (uint8_t)((*src_pixel++) >> 23); + g = (uint8_t)((*src_pixel++) >> 23); + b = (uint8_t)((*src_pixel++) >> 23); + a = (uint8_t)((*src_pixel++) >> 23); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int32_t *src_pixel = (const int32_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + r = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32g32_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int32_t *src_pixel = (const int32_t *)(src_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + r = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32g32b32_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int32_t *src_pixel = (const int32_t *)(src_row + x0*12); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + b = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r32g32b32a32_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int32_t *src_pixel = (const int32_t *)(src_row + x0*16); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + r = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + b = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + a = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + r = float_to_ubyte(util_half_to_float((*src_pixel++))); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16g16_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + r = float_to_ubyte(util_half_to_float((*src_pixel++))); + g = float_to_ubyte(util_half_to_float((*src_pixel++))); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16g16b16_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*6); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = float_to_ubyte(util_half_to_float((*src_pixel++))); + g = float_to_ubyte(util_half_to_float((*src_pixel++))); + b = float_to_ubyte(util_half_to_float((*src_pixel++))); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16g16b16a16_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + r = float_to_ubyte(util_half_to_float((*src_pixel++))); + g = float_to_ubyte(util_half_to_float((*src_pixel++))); + b = float_to_ubyte(util_half_to_float((*src_pixel++))); + a = float_to_ubyte(util_half_to_float((*src_pixel++))); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + r = (uint8_t)((*src_pixel++) >> 8); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16g16_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + r = (uint8_t)((*src_pixel++) >> 8); + g = (uint8_t)((*src_pixel++) >> 8); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16g16b16_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*6); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = (uint8_t)((*src_pixel++) >> 8); + g = (uint8_t)((*src_pixel++) >> 8); + b = (uint8_t)((*src_pixel++) >> 8); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16g16b16a16_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + r = (uint8_t)((*src_pixel++) >> 8); + g = (uint8_t)((*src_pixel++) >> 8); + b = (uint8_t)((*src_pixel++) >> 8); + a = (uint8_t)((*src_pixel++) >> 8); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16g16_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16g16b16_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*6); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16g16b16a16_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + a = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int16_t *src_pixel = (const int16_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + r = (uint8_t)((*src_pixel++) >> 7); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16g16_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int16_t *src_pixel = (const int16_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + r = (uint8_t)((*src_pixel++) >> 7); + g = (uint8_t)((*src_pixel++) >> 7); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16g16b16_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int16_t *src_pixel = (const int16_t *)(src_row + x0*6); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = (uint8_t)((*src_pixel++) >> 7); + g = (uint8_t)((*src_pixel++) >> 7); + b = (uint8_t)((*src_pixel++) >> 7); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16g16b16a16_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int16_t *src_pixel = (const int16_t *)(src_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + r = (uint8_t)((*src_pixel++) >> 7); + g = (uint8_t)((*src_pixel++) >> 7); + b = (uint8_t)((*src_pixel++) >> 7); + a = (uint8_t)((*src_pixel++) >> 7); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int16_t *src_pixel = (const int16_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16g16_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int16_t *src_pixel = (const int16_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16g16b16_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int16_t *src_pixel = (const int16_t *)(src_row + x0*6); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r16g16b16a16_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int16_t *src_pixel = (const int16_t *)(src_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + a = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*1); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + r = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8g8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + r = (*src_pixel++); + g = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8g8b8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*3); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = (*src_pixel++); + g = (*src_pixel++); + b = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8g8b8a8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + r = (*src_pixel++); + g = (*src_pixel++); + b = (*src_pixel++); + a = (*src_pixel++); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*1); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8g8_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8g8b8_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*3); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8g8b8a8_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + a = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int8_t *src_pixel = (const int8_t *)(src_row + x0*1); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8g8_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int8_t *src_pixel = (const int8_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f); + g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8g8b8_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int8_t *src_pixel = (const int8_t *)(src_row + x0*3); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f); + g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f); + b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8g8b8a8_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int8_t *src_pixel = (const int8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f); + g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f); + b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f); + a = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int8_t *src_pixel = (const int8_t *)(src_row + x0*1); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = 0; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8g8_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int8_t *src_pixel = (const int8_t *)(src_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = 0; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8g8b8_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int8_t *src_pixel = (const int8_t *)(src_row + x0*3); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r8g8b8a8_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const int8_t *src_pixel = (const int8_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + a = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = a; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r10g10b10x2_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint32_t pixel = *src_pixel++; + uint8_t r = (uint8_t)(((uint32_t)(pixel & 0x3ff)) * 0xff / 0x1); + uint8_t g = (uint8_t)(((uint32_t)((pixel >> 10) & 0x3ff)) * 0xff / 0x1); + uint8_t b = (uint8_t)(((uint32_t)((pixel >> 20) & 0x3ff)) * 0xff / 0x1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +static void +lp_tile_r10g10b10x2_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + const uint8_t *src_row = src + y0*src_stride; + for (y = 0; y < TILE_SIZE; ++y) { + const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint32_t pixel = *src_pixel++; + uint8_t r = (uint8_t)((pixel & 0x3ff) >> 1); + uint8_t g = (uint8_t)(((pixel >> 10) & 0x3ff) >> 1); + uint8_t b = (uint8_t)(((pixel >> 20) & 0x3ff) >> 1); + TILE_PIXEL(dst, x, y, 0) = r; /* r */ + TILE_PIXEL(dst, x, y, 1) = g; /* g */ + TILE_PIXEL(dst, x, y, 2) = b; /* b */ + TILE_PIXEL(dst, x, y, 3) = 255; /* a */ + } + src_row += src_stride; + } +} + +void +lp_tile_swizzle_4ub(enum pipe_format format, uint8_t *dst, const void *src, unsigned src_stride, unsigned x, unsigned y) +{ + void (*func)(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0); +#ifdef DEBUG + lp_tile_swizzle_count += 1; +#endif + switch(format) { + case PIPE_FORMAT_NONE: + func = lp_tile_none_swizzle_4ub; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: +#ifdef PIPE_ARCH_SSE + func = util_cpu_caps.has_sse2 ? lp_tile_b8g8r8a8_unorm_swizzle_4ub_sse2 : lp_tile_b8g8r8a8_unorm_swizzle_4ub; +#else + func = lp_tile_b8g8r8a8_unorm_swizzle_4ub; +#endif + break; + case PIPE_FORMAT_B8G8R8X8_UNORM: + func = lp_tile_b8g8r8x8_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_A8R8G8B8_UNORM: + func = lp_tile_a8r8g8b8_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_X8R8G8B8_UNORM: + func = lp_tile_x8r8g8b8_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_A8B8G8R8_UNORM: + func = lp_tile_a8b8g8r8_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_X8B8G8R8_UNORM: + func = lp_tile_x8b8g8r8_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8X8_UNORM: + func = lp_tile_r8g8b8x8_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_B5G5R5X1_UNORM: + func = lp_tile_b5g5r5x1_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_B5G5R5A1_UNORM: + func = lp_tile_b5g5r5a1_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_B4G4R4A4_UNORM: + func = lp_tile_b4g4r4a4_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_B4G4R4X4_UNORM: + func = lp_tile_b4g4r4x4_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_B5G6R5_UNORM: + func = lp_tile_b5g6r5_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_R10G10B10A2_UNORM: + func = lp_tile_r10g10b10a2_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_B10G10R10A2_UNORM: + func = lp_tile_b10g10r10a2_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_L8_UNORM: + func = lp_tile_l8_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_A8_UNORM: + func = lp_tile_a8_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_I8_UNORM: + func = lp_tile_i8_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_L4A4_UNORM: + func = lp_tile_l4a4_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_L8A8_UNORM: + func = lp_tile_l8a8_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_L16_UNORM: + func = lp_tile_l16_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_L8_SRGB: + func = lp_tile_l8_srgb_swizzle_4ub; + break; + case PIPE_FORMAT_L8A8_SRGB: + func = lp_tile_l8a8_srgb_swizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8_SRGB: + func = lp_tile_r8g8b8_srgb_swizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8A8_SRGB: + func = lp_tile_r8g8b8a8_srgb_swizzle_4ub; + break; + case PIPE_FORMAT_A8B8G8R8_SRGB: + func = lp_tile_a8b8g8r8_srgb_swizzle_4ub; + break; + case PIPE_FORMAT_X8B8G8R8_SRGB: + func = lp_tile_x8b8g8r8_srgb_swizzle_4ub; + break; + case PIPE_FORMAT_B8G8R8A8_SRGB: + func = lp_tile_b8g8r8a8_srgb_swizzle_4ub; + break; + case PIPE_FORMAT_B8G8R8X8_SRGB: + func = lp_tile_b8g8r8x8_srgb_swizzle_4ub; + break; + case PIPE_FORMAT_A8R8G8B8_SRGB: + func = lp_tile_a8r8g8b8_srgb_swizzle_4ub; + break; + case PIPE_FORMAT_X8R8G8B8_SRGB: + func = lp_tile_x8r8g8b8_srgb_swizzle_4ub; + break; + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + func = lp_tile_r8sg8sb8ux8u_norm_swizzle_4ub; + break; + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + func = lp_tile_r10sg10sb10sa2u_norm_swizzle_4ub; + break; + case PIPE_FORMAT_R5SG5SB6U_NORM: + func = lp_tile_r5sg5sb6u_norm_swizzle_4ub; + break; + case PIPE_FORMAT_R10G10B10A2_USCALED: + func = lp_tile_r10g10b10a2_uscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R64_FLOAT: + func = lp_tile_r64_float_swizzle_4ub; + break; + case PIPE_FORMAT_R64G64_FLOAT: + func = lp_tile_r64g64_float_swizzle_4ub; + break; + case PIPE_FORMAT_R64G64B64_FLOAT: + func = lp_tile_r64g64b64_float_swizzle_4ub; + break; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + func = lp_tile_r64g64b64a64_float_swizzle_4ub; + break; + case PIPE_FORMAT_R32_FLOAT: + func = lp_tile_r32_float_swizzle_4ub; + break; + case PIPE_FORMAT_R32G32_FLOAT: + func = lp_tile_r32g32_float_swizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + func = lp_tile_r32g32b32_float_swizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + func = lp_tile_r32g32b32a32_float_swizzle_4ub; + break; + case PIPE_FORMAT_R32_UNORM: + func = lp_tile_r32_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_R32G32_UNORM: + func = lp_tile_r32g32_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32_UNORM: + func = lp_tile_r32g32b32_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32A32_UNORM: + func = lp_tile_r32g32b32a32_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_R32_USCALED: + func = lp_tile_r32_uscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R32G32_USCALED: + func = lp_tile_r32g32_uscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32_USCALED: + func = lp_tile_r32g32b32_uscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32A32_USCALED: + func = lp_tile_r32g32b32a32_uscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R32_SNORM: + func = lp_tile_r32_snorm_swizzle_4ub; + break; + case PIPE_FORMAT_R32G32_SNORM: + func = lp_tile_r32g32_snorm_swizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32_SNORM: + func = lp_tile_r32g32b32_snorm_swizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32A32_SNORM: + func = lp_tile_r32g32b32a32_snorm_swizzle_4ub; + break; + case PIPE_FORMAT_R32_SSCALED: + func = lp_tile_r32_sscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R32G32_SSCALED: + func = lp_tile_r32g32_sscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32_SSCALED: + func = lp_tile_r32g32b32_sscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + func = lp_tile_r32g32b32a32_sscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R16_FLOAT: + func = lp_tile_r16_float_swizzle_4ub; + break; + case PIPE_FORMAT_R16G16_FLOAT: + func = lp_tile_r16g16_float_swizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16_FLOAT: + func = lp_tile_r16g16b16_float_swizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + func = lp_tile_r16g16b16a16_float_swizzle_4ub; + break; + case PIPE_FORMAT_R16_UNORM: + func = lp_tile_r16_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_R16G16_UNORM: + func = lp_tile_r16g16_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16_UNORM: + func = lp_tile_r16g16b16_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16A16_UNORM: + func = lp_tile_r16g16b16a16_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_R16_USCALED: + func = lp_tile_r16_uscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R16G16_USCALED: + func = lp_tile_r16g16_uscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16_USCALED: + func = lp_tile_r16g16b16_uscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16A16_USCALED: + func = lp_tile_r16g16b16a16_uscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R16_SNORM: + func = lp_tile_r16_snorm_swizzle_4ub; + break; + case PIPE_FORMAT_R16G16_SNORM: + func = lp_tile_r16g16_snorm_swizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16_SNORM: + func = lp_tile_r16g16b16_snorm_swizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16A16_SNORM: + func = lp_tile_r16g16b16a16_snorm_swizzle_4ub; + break; + case PIPE_FORMAT_R16_SSCALED: + func = lp_tile_r16_sscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R16G16_SSCALED: + func = lp_tile_r16g16_sscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16_SSCALED: + func = lp_tile_r16g16b16_sscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + func = lp_tile_r16g16b16a16_sscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R8_UNORM: + func = lp_tile_r8_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_R8G8_UNORM: + func = lp_tile_r8g8_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8_UNORM: + func = lp_tile_r8g8b8_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + func = lp_tile_r8g8b8a8_unorm_swizzle_4ub; + break; + case PIPE_FORMAT_R8_USCALED: + func = lp_tile_r8_uscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R8G8_USCALED: + func = lp_tile_r8g8_uscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8_USCALED: + func = lp_tile_r8g8b8_uscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8A8_USCALED: + func = lp_tile_r8g8b8a8_uscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R8_SNORM: + func = lp_tile_r8_snorm_swizzle_4ub; + break; + case PIPE_FORMAT_R8G8_SNORM: + func = lp_tile_r8g8_snorm_swizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8_SNORM: + func = lp_tile_r8g8b8_snorm_swizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8A8_SNORM: + func = lp_tile_r8g8b8a8_snorm_swizzle_4ub; + break; + case PIPE_FORMAT_R8_SSCALED: + func = lp_tile_r8_sscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R8G8_SSCALED: + func = lp_tile_r8g8_sscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8_SSCALED: + func = lp_tile_r8g8b8_sscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + func = lp_tile_r8g8b8a8_sscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R10G10B10X2_USCALED: + func = lp_tile_r10g10b10x2_uscaled_swizzle_4ub; + break; + case PIPE_FORMAT_R10G10B10X2_SNORM: + func = lp_tile_r10g10b10x2_snorm_swizzle_4ub; + break; + default: + debug_printf("%s: unsupported format %s\n", __FUNCTION__, util_format_name(format)); + return; + } + func(dst, (const uint8_t *)src, src_stride, x, y); +} + +static void +lp_tile_none_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 1; + uint8_t *dstpix = (uint8_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint8_t)(((uint32_t)r[i+0]) * 0x1 / 0xff)) << 0); + const uint32_t pixel1 = (((uint8_t)(((uint32_t)r[i+1]) * 0x1 / 0xff)) << 0); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_b8g8r8a8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = ((b[i+0]) << 0) | ((g[i+0]) << 8) | ((r[i+0]) << 16) | ((a[i+0]) << 24); + const uint32_t pixel1 = ((b[i+1]) << 0) | ((g[i+1]) << 8) | ((r[i+1]) << 16) | ((a[i+1]) << 24); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_b8g8r8x8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = ((b[i+0]) << 0) | ((g[i+0]) << 8) | ((r[i+0]) << 16); + const uint32_t pixel1 = ((b[i+1]) << 0) | ((g[i+1]) << 8) | ((r[i+1]) << 16); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_a8r8g8b8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = ((a[i+0]) << 0) | ((r[i+0]) << 8) | ((g[i+0]) << 16) | ((b[i+0]) << 24); + const uint32_t pixel1 = ((a[i+1]) << 0) | ((r[i+1]) << 8) | ((g[i+1]) << 16) | ((b[i+1]) << 24); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_x8r8g8b8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = ((r[i+0]) << 8) | ((g[i+0]) << 16) | ((b[i+0]) << 24); + const uint32_t pixel1 = ((r[i+1]) << 8) | ((g[i+1]) << 16) | ((b[i+1]) << 24); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_a8b8g8r8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = ((a[i+0]) << 0) | ((b[i+0]) << 8) | ((g[i+0]) << 16) | ((r[i+0]) << 24); + const uint32_t pixel1 = ((a[i+1]) << 0) | ((b[i+1]) << 8) | ((g[i+1]) << 16) | ((r[i+1]) << 24); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_x8b8g8r8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = ((b[i+0]) << 8) | ((g[i+0]) << 16) | ((r[i+0]) << 24); + const uint32_t pixel1 = ((b[i+1]) << 8) | ((g[i+1]) << 16) | ((r[i+1]) << 24); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_r8g8b8x8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = ((r[i+0]) << 0) | ((g[i+0]) << 8) | ((b[i+0]) << 16); + const uint32_t pixel1 = ((r[i+1]) << 0) | ((g[i+1]) << 8) | ((b[i+1]) << 16); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_b5g5r5x1_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 2; + uint16_t *dstpix = (uint16_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint16_t)(b[i+0] >> 3)) << 0) | (((uint16_t)(g[i+0] >> 3)) << 5) | (((uint16_t)(r[i+0] >> 3)) << 10); + const uint32_t pixel1 = (((uint16_t)(b[i+1] >> 3)) << 0) | (((uint16_t)(g[i+1] >> 3)) << 5) | (((uint16_t)(r[i+1] >> 3)) << 10); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_b5g5r5a1_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 2; + uint16_t *dstpix = (uint16_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint16_t)(b[i+0] >> 3)) << 0) | (((uint16_t)(g[i+0] >> 3)) << 5) | (((uint16_t)(r[i+0] >> 3)) << 10) | (((uint16_t)(a[i+0] >> 7)) << 15); + const uint32_t pixel1 = (((uint16_t)(b[i+1] >> 3)) << 0) | (((uint16_t)(g[i+1] >> 3)) << 5) | (((uint16_t)(r[i+1] >> 3)) << 10) | (((uint16_t)(a[i+1] >> 7)) << 15); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_b4g4r4a4_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 2; + uint16_t *dstpix = (uint16_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint16_t)(b[i+0] >> 4)) << 0) | (((uint16_t)(g[i+0] >> 4)) << 4) | (((uint16_t)(r[i+0] >> 4)) << 8) | (((uint16_t)(a[i+0] >> 4)) << 12); + const uint32_t pixel1 = (((uint16_t)(b[i+1] >> 4)) << 0) | (((uint16_t)(g[i+1] >> 4)) << 4) | (((uint16_t)(r[i+1] >> 4)) << 8) | (((uint16_t)(a[i+1] >> 4)) << 12); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_b4g4r4x4_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 2; + uint16_t *dstpix = (uint16_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint16_t)(b[i+0] >> 4)) << 0) | (((uint16_t)(g[i+0] >> 4)) << 4) | (((uint16_t)(r[i+0] >> 4)) << 8); + const uint32_t pixel1 = (((uint16_t)(b[i+1] >> 4)) << 0) | (((uint16_t)(g[i+1] >> 4)) << 4) | (((uint16_t)(r[i+1] >> 4)) << 8); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_b5g6r5_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 2; + uint16_t *dstpix = (uint16_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint16_t)(b[i+0] >> 3)) << 0) | (((uint16_t)(g[i+0] >> 2)) << 5) | (((uint16_t)(r[i+0] >> 3)) << 11); + const uint32_t pixel1 = (((uint16_t)(b[i+1] >> 3)) << 0) | (((uint16_t)(g[i+1] >> 2)) << 5) | (((uint16_t)(r[i+1] >> 3)) << 11); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_r10g10b10a2_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint32_t)(((uint32_t)r[i+0]) * 0x3ff / 0xff)) << 0) | (((uint32_t)(((uint32_t)g[i+0]) * 0x3ff / 0xff)) << 10) | (((uint32_t)(((uint32_t)b[i+0]) * 0x3ff / 0xff)) << 20) | (((uint32_t)(a[i+0] >> 6)) << 30); + const uint32_t pixel1 = (((uint32_t)(((uint32_t)r[i+1]) * 0x3ff / 0xff)) << 0) | (((uint32_t)(((uint32_t)g[i+1]) * 0x3ff / 0xff)) << 10) | (((uint32_t)(((uint32_t)b[i+1]) * 0x3ff / 0xff)) << 20) | (((uint32_t)(a[i+1] >> 6)) << 30); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_b10g10r10a2_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint32_t)(((uint32_t)b[i+0]) * 0x3ff / 0xff)) << 0) | (((uint32_t)(((uint32_t)g[i+0]) * 0x3ff / 0xff)) << 10) | (((uint32_t)(((uint32_t)r[i+0]) * 0x3ff / 0xff)) << 20) | (((uint32_t)(a[i+0] >> 6)) << 30); + const uint32_t pixel1 = (((uint32_t)(((uint32_t)b[i+1]) * 0x3ff / 0xff)) << 0) | (((uint32_t)(((uint32_t)g[i+1]) * 0x3ff / 0xff)) << 10) | (((uint32_t)(((uint32_t)r[i+1]) * 0x3ff / 0xff)) << 20) | (((uint32_t)(a[i+1] >> 6)) << 30); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_l8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 1; + uint8_t *dstpix = (uint8_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = ((b[i+0]) << 0); + const uint32_t pixel1 = ((b[i+1]) << 0); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_a8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 1; + uint8_t *dstpix = (uint8_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = ((a[i+0]) << 0); + const uint32_t pixel1 = ((a[i+1]) << 0); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_i8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 1; + uint8_t *dstpix = (uint8_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = ((a[i+0]) << 0); + const uint32_t pixel1 = ((a[i+1]) << 0); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_l4a4_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 1; + uint8_t *dstpix = (uint8_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint8_t)(b[i+0] >> 4)) << 0) | (((uint8_t)(a[i+0] >> 4)) << 4); + const uint32_t pixel1 = (((uint8_t)(b[i+1] >> 4)) << 0) | (((uint8_t)(a[i+1] >> 4)) << 4); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_l8a8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 2; + uint16_t *dstpix = (uint16_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = ((b[i+0]) << 0) | ((a[i+0]) << 8); + const uint32_t pixel1 = ((b[i+1]) << 0) | ((a[i+1]) << 8); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_l16_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 2; + uint16_t *dstpix = (uint16_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint16_t)(((uint32_t)b[i+0]) * 0xffff / 0xff)) << 0); + const uint32_t pixel1 = (((uint16_t)(((uint32_t)b[i+1]) * 0xffff / 0xff)) << 0); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_l8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*1); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = TILE_PIXEL(src, x, y, 2); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_l8a8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = TILE_PIXEL(src, x, y, 2); + *dst_pixel++ = TILE_PIXEL(src, x, y, 3); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r8g8b8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*3); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = TILE_PIXEL(src, x, y, 0); + *dst_pixel++ = TILE_PIXEL(src, x, y, 1); + *dst_pixel++ = TILE_PIXEL(src, x, y, 2); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r8g8b8a8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = TILE_PIXEL(src, x, y, 0); + *dst_pixel++ = TILE_PIXEL(src, x, y, 1); + *dst_pixel++ = TILE_PIXEL(src, x, y, 2); + *dst_pixel++ = TILE_PIXEL(src, x, y, 3); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_a8b8g8r8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = TILE_PIXEL(src, x, y, 3); + *dst_pixel++ = TILE_PIXEL(src, x, y, 2); + *dst_pixel++ = TILE_PIXEL(src, x, y, 1); + *dst_pixel++ = TILE_PIXEL(src, x, y, 0); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_x8b8g8r8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + ++dst_pixel; + *dst_pixel++ = TILE_PIXEL(src, x, y, 2); + *dst_pixel++ = TILE_PIXEL(src, x, y, 1); + *dst_pixel++ = TILE_PIXEL(src, x, y, 0); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_b8g8r8a8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = TILE_PIXEL(src, x, y, 2); + *dst_pixel++ = TILE_PIXEL(src, x, y, 1); + *dst_pixel++ = TILE_PIXEL(src, x, y, 0); + *dst_pixel++ = TILE_PIXEL(src, x, y, 3); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_b8g8r8x8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = TILE_PIXEL(src, x, y, 2); + *dst_pixel++ = TILE_PIXEL(src, x, y, 1); + *dst_pixel++ = TILE_PIXEL(src, x, y, 0); + ++dst_pixel; + } + dst_row += dst_stride; + } +} + +static void +lp_tile_a8r8g8b8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = TILE_PIXEL(src, x, y, 3); + *dst_pixel++ = TILE_PIXEL(src, x, y, 0); + *dst_pixel++ = TILE_PIXEL(src, x, y, 1); + *dst_pixel++ = TILE_PIXEL(src, x, y, 2); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_x8r8g8b8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + ++dst_pixel; + *dst_pixel++ = TILE_PIXEL(src, x, y, 0); + *dst_pixel++ = TILE_PIXEL(src, x, y, 1); + *dst_pixel++ = TILE_PIXEL(src, x, y, 2); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r8sg8sb8ux8u_norm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int8_t *dst_pixel = (int8_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 0) >> 1); + *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 1) >> 1); + *dst_pixel++ = TILE_PIXEL(src, x, y, 2); + ++dst_pixel; + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r10sg10sb10sa2u_norm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint32_t pixel = 0; + pixel |= (uint32_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1ff / 0xff); + pixel |= ((uint32_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1ff / 0xff) << 10); + pixel |= ((uint32_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1ff / 0xff) << 20); + pixel |= ((uint32_t)(TILE_PIXEL(src, x, y, 3) >> 6) << 30); + *dst_pixel++ = pixel; + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r5sg5sb6u_norm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + uint16_t pixel = 0; + pixel |= (uint16_t)(TILE_PIXEL(src, x, y, 0) >> 4); + pixel |= ((uint16_t)(TILE_PIXEL(src, x, y, 1) >> 4) << 5); + pixel |= ((uint16_t)(TILE_PIXEL(src, x, y, 2) >> 2) << 10); + *dst_pixel++ = pixel; + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r10g10b10a2_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint32_t)(((uint32_t)r[i+0]) * 0x1 / 0xff)) << 0) | (((uint32_t)(((uint32_t)g[i+0]) * 0x1 / 0xff)) << 10) | (((uint32_t)(((uint32_t)b[i+0]) * 0x1 / 0xff)) << 20) | (((uint32_t)(((uint32_t)a[i+0]) * 0x1 / 0xff)) << 30); + const uint32_t pixel1 = (((uint32_t)(((uint32_t)r[i+1]) * 0x1 / 0xff)) << 0) | (((uint32_t)(((uint32_t)g[i+1]) * 0x1 / 0xff)) << 10) | (((uint32_t)(((uint32_t)b[i+1]) * 0x1 / 0xff)) << 20) | (((uint32_t)(((uint32_t)a[i+1]) * 0x1 / 0xff)) << 30); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_r64_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + double *dst_pixel = (double *)(dst_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 0) * (1.0f/0xff)); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r64g64_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + double *dst_pixel = (double *)(dst_row + x0*16); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 0) * (1.0f/0xff)); + *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 1) * (1.0f/0xff)); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r64g64b64_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + double *dst_pixel = (double *)(dst_row + x0*24); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 0) * (1.0f/0xff)); + *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 1) * (1.0f/0xff)); + *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 2) * (1.0f/0xff)); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r64g64b64a64_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + double *dst_pixel = (double *)(dst_row + x0*32); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 0) * (1.0f/0xff)); + *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 1) * (1.0f/0xff)); + *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 2) * (1.0f/0xff)); + *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 3) * (1.0f/0xff)); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + float *dst_pixel = (float *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 0)); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32g32_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + float *dst_pixel = (float *)(dst_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 0)); + *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 1)); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32g32b32_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + float *dst_pixel = (float *)(dst_row + x0*12); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 0)); + *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 1)); + *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 2)); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32g32b32a32_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + float *dst_pixel = (float *)(dst_row + x0*16); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 0)); + *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 1)); + *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 2)); + *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 3)); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint32_t)(((uint64_t)r[i+0]) * 0xffffffff / 0xff)) << 0); + const uint32_t pixel1 = (((uint32_t)(((uint64_t)r[i+1]) * 0xffffffff / 0xff)) << 0); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_r32g32_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0xffffffff / 0xff); + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0xffffffff / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32g32b32_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*12); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0xffffffff / 0xff); + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0xffffffff / 0xff); + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 2)) * 0xffffffff / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32g32b32a32_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*16); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0xffffffff / 0xff); + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0xffffffff / 0xff); + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 2)) * 0xffffffff / 0xff); + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 3)) * 0xffffffff / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint32_t)(((uint64_t)r[i+0]) * 0x1 / 0xff)) << 0); + const uint32_t pixel1 = (((uint32_t)(((uint64_t)r[i+1]) * 0x1 / 0xff)) << 0); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_r32g32_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32g32b32_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*12); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff); + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32g32b32a32_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*16); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff); + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff); + *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 3)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int32_t *dst_pixel = (int32_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x7fffffff / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32g32_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int32_t *dst_pixel = (int32_t *)(dst_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x7fffffff / 0xff); + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x7fffffff / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32g32b32_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int32_t *dst_pixel = (int32_t *)(dst_row + x0*12); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x7fffffff / 0xff); + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x7fffffff / 0xff); + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 2)) * 0x7fffffff / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32g32b32a32_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int32_t *dst_pixel = (int32_t *)(dst_row + x0*16); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x7fffffff / 0xff); + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x7fffffff / 0xff); + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 2)) * 0x7fffffff / 0xff); + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 3)) * 0x7fffffff / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int32_t *dst_pixel = (int32_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32g32_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int32_t *dst_pixel = (int32_t *)(dst_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32g32b32_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int32_t *dst_pixel = (int32_t *)(dst_row + x0*12); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff); + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r32g32b32a32_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int32_t *dst_pixel = (int32_t *)(dst_row + x0*16); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff); + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff); + *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 3)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r16_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 0) * (1.0f/0xff))); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r16g16_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 0) * (1.0f/0xff))); + *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 1) * (1.0f/0xff))); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r16g16b16_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*6); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 0) * (1.0f/0xff))); + *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 1) * (1.0f/0xff))); + *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 2) * (1.0f/0xff))); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r16g16b16a16_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 0) * (1.0f/0xff))); + *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 1) * (1.0f/0xff))); + *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 2) * (1.0f/0xff))); + *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 3) * (1.0f/0xff))); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r16_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 2; + uint16_t *dstpix = (uint16_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint16_t)(((uint32_t)r[i+0]) * 0xffff / 0xff)) << 0); + const uint32_t pixel1 = (((uint16_t)(((uint32_t)r[i+1]) * 0xffff / 0xff)) << 0); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_r16g16_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint16_t)(((uint32_t)r[i+0]) * 0xffff / 0xff)) << 0) | (((uint16_t)(((uint32_t)g[i+0]) * 0xffff / 0xff)) << 16); + const uint32_t pixel1 = (((uint16_t)(((uint32_t)r[i+1]) * 0xffff / 0xff)) << 0) | (((uint16_t)(((uint32_t)g[i+1]) * 0xffff / 0xff)) << 16); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_r16g16b16_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*6); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0xffff / 0xff); + *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0xffff / 0xff); + *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0xffff / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r16g16b16a16_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0xffff / 0xff); + *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0xffff / 0xff); + *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0xffff / 0xff); + *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 3)) * 0xffff / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r16_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 2; + uint16_t *dstpix = (uint16_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint16_t)(((uint32_t)r[i+0]) * 0x1 / 0xff)) << 0); + const uint32_t pixel1 = (((uint16_t)(((uint32_t)r[i+1]) * 0x1 / 0xff)) << 0); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_r16g16_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint16_t)(((uint32_t)r[i+0]) * 0x1 / 0xff)) << 0) | (((uint16_t)(((uint32_t)g[i+0]) * 0x1 / 0xff)) << 16); + const uint32_t pixel1 = (((uint16_t)(((uint32_t)r[i+1]) * 0x1 / 0xff)) << 0) | (((uint16_t)(((uint32_t)g[i+1]) * 0x1 / 0xff)) << 16); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_r16g16b16_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*6); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff); + *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r16g16b16a16_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff); + *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff); + *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 3)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r16_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int16_t *dst_pixel = (int16_t *)(dst_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x7fff / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r16g16_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int16_t *dst_pixel = (int16_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x7fff / 0xff); + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x7fff / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r16g16b16_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int16_t *dst_pixel = (int16_t *)(dst_row + x0*6); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x7fff / 0xff); + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x7fff / 0xff); + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x7fff / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r16g16b16a16_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int16_t *dst_pixel = (int16_t *)(dst_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x7fff / 0xff); + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x7fff / 0xff); + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x7fff / 0xff); + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 3)) * 0x7fff / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r16_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int16_t *dst_pixel = (int16_t *)(dst_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r16g16_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int16_t *dst_pixel = (int16_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r16g16b16_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int16_t *dst_pixel = (int16_t *)(dst_row + x0*6); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff); + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r16g16b16a16_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int16_t *dst_pixel = (int16_t *)(dst_row + x0*8); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff); + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff); + *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 3)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 1; + uint8_t *dstpix = (uint8_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = ((r[i+0]) << 0); + const uint32_t pixel1 = ((r[i+1]) << 0); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_r8g8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 2; + uint16_t *dstpix = (uint16_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = ((r[i+0]) << 0) | ((g[i+0]) << 8); + const uint32_t pixel1 = ((r[i+1]) << 0) | ((g[i+1]) << 8); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_r8g8b8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*3); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = TILE_PIXEL(src, x, y, 0); + *dst_pixel++ = TILE_PIXEL(src, x, y, 1); + *dst_pixel++ = TILE_PIXEL(src, x, y, 2); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r8g8b8a8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = ((r[i+0]) << 0) | ((g[i+0]) << 8) | ((b[i+0]) << 16) | ((a[i+0]) << 24); + const uint32_t pixel1 = ((r[i+1]) << 0) | ((g[i+1]) << 8) | ((b[i+1]) << 16) | ((a[i+1]) << 24); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_r8_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 1; + uint8_t *dstpix = (uint8_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint8_t)(((uint32_t)r[i+0]) * 0x1 / 0xff)) << 0); + const uint32_t pixel1 = (((uint8_t)(((uint32_t)r[i+1]) * 0x1 / 0xff)) << 0); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_r8g8_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 2; + uint16_t *dstpix = (uint16_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint8_t)(((uint32_t)r[i+0]) * 0x1 / 0xff)) << 0) | (((uint8_t)(((uint32_t)g[i+0]) * 0x1 / 0xff)) << 8); + const uint32_t pixel1 = (((uint8_t)(((uint32_t)r[i+1]) * 0x1 / 0xff)) << 0) | (((uint8_t)(((uint32_t)g[i+1]) * 0x1 / 0xff)) << 8); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_r8g8b8_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*3); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (uint8_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + *dst_pixel++ = (uint8_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff); + *dst_pixel++ = (uint8_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r8g8b8a8_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint8_t)(((uint32_t)r[i+0]) * 0x1 / 0xff)) << 0) | (((uint8_t)(((uint32_t)g[i+0]) * 0x1 / 0xff)) << 8) | (((uint8_t)(((uint32_t)b[i+0]) * 0x1 / 0xff)) << 16) | (((uint8_t)(((uint32_t)a[i+0]) * 0x1 / 0xff)) << 24); + const uint32_t pixel1 = (((uint8_t)(((uint32_t)r[i+1]) * 0x1 / 0xff)) << 0) | (((uint8_t)(((uint32_t)g[i+1]) * 0x1 / 0xff)) << 8) | (((uint8_t)(((uint32_t)b[i+1]) * 0x1 / 0xff)) << 16) | (((uint8_t)(((uint32_t)a[i+1]) * 0x1 / 0xff)) << 24); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_r8_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int8_t *dst_pixel = (int8_t *)(dst_row + x0*1); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 0) >> 1); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r8g8_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int8_t *dst_pixel = (int8_t *)(dst_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 0) >> 1); + *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 1) >> 1); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r8g8b8_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int8_t *dst_pixel = (int8_t *)(dst_row + x0*3); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 0) >> 1); + *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 1) >> 1); + *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 2) >> 1); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r8g8b8a8_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int8_t *dst_pixel = (int8_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 0) >> 1); + *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 1) >> 1); + *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 2) >> 1); + *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 3) >> 1); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r8_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int8_t *dst_pixel = (int8_t *)(dst_row + x0*1); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r8g8_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int8_t *dst_pixel = (int8_t *)(dst_row + x0*2); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r8g8b8_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int8_t *dst_pixel = (int8_t *)(dst_row + x0*3); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff); + *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r8g8b8a8_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + int8_t *dst_pixel = (int8_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff); + *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff); + *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff); + *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 3)) * 0x1 / 0xff); + } + dst_row += dst_stride; + } +} + +static void +lp_tile_r10g10b10x2_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + const unsigned dstpix_stride = dst_stride / 4; + uint32_t *dstpix = (uint32_t *) dst; + unsigned int qx, qy, i; + + for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) { + const unsigned py = y0 + qy; + for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) { + const unsigned px = x0 + qx; + const uint8_t *r = src + 0 * TILE_C_STRIDE; + const uint8_t *g = src + 1 * TILE_C_STRIDE; + const uint8_t *b = src + 2 * TILE_C_STRIDE; + const uint8_t *a = src + 3 * TILE_C_STRIDE; + (void) r; (void) g; (void) b; (void) a; /* silence warnings */ + for (i = 0; i < TILE_C_STRIDE; i += 2) { + const uint32_t pixel0 = (((uint32_t)(((uint32_t)r[i+0]) * 0x1 / 0xff)) << 0) | (((uint32_t)(((uint32_t)g[i+0]) * 0x1 / 0xff)) << 10) | (((uint32_t)(((uint32_t)b[i+0]) * 0x1 / 0xff)) << 20); + const uint32_t pixel1 = (((uint32_t)(((uint32_t)r[i+1]) * 0x1 / 0xff)) << 0) | (((uint32_t)(((uint32_t)g[i+1]) * 0x1 / 0xff)) << 10) | (((uint32_t)(((uint32_t)b[i+1]) * 0x1 / 0xff)) << 20); + const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]); + dstpix[offset + 0] = pixel0; + dstpix[offset + 1] = pixel1; + } + src += TILE_X_STRIDE; + } + } +} + +static void +lp_tile_r10g10b10x2_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0) +{ + unsigned x, y; + uint8_t *dst_row = dst + y0*dst_stride; + for (y = 0; y < TILE_SIZE; ++y) { + uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*4); + for (x = 0; x < TILE_SIZE; ++x) { + uint32_t pixel = 0; + pixel |= (uint32_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1ff / 0xff); + pixel |= ((uint32_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1ff / 0xff) << 10); + pixel |= ((uint32_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1ff / 0xff) << 20); + *dst_pixel++ = pixel; + } + dst_row += dst_stride; + } +} + +void +lp_tile_unswizzle_4ub(enum pipe_format format, const uint8_t *src, void *dst, unsigned dst_stride, unsigned x, unsigned y) +{ + void (*func)(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0); +#ifdef DEBUG + lp_tile_unswizzle_count += 1; +#endif + switch(format) { + case PIPE_FORMAT_NONE: + func = lp_tile_none_unswizzle_4ub; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: +#ifdef PIPE_ARCH_SSE + func = util_cpu_caps.has_sse2 ? lp_tile_b8g8r8a8_unorm_unswizzle_4ub_sse2 : lp_tile_b8g8r8a8_unorm_unswizzle_4ub; +#else + func = lp_tile_b8g8r8a8_unorm_unswizzle_4ub; +#endif + break; + case PIPE_FORMAT_B8G8R8X8_UNORM: + func = lp_tile_b8g8r8x8_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_A8R8G8B8_UNORM: + func = lp_tile_a8r8g8b8_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_X8R8G8B8_UNORM: + func = lp_tile_x8r8g8b8_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_A8B8G8R8_UNORM: + func = lp_tile_a8b8g8r8_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_X8B8G8R8_UNORM: + func = lp_tile_x8b8g8r8_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8X8_UNORM: + func = lp_tile_r8g8b8x8_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_B5G5R5X1_UNORM: + func = lp_tile_b5g5r5x1_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_B5G5R5A1_UNORM: + func = lp_tile_b5g5r5a1_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_B4G4R4A4_UNORM: + func = lp_tile_b4g4r4a4_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_B4G4R4X4_UNORM: + func = lp_tile_b4g4r4x4_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_B5G6R5_UNORM: + func = lp_tile_b5g6r5_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R10G10B10A2_UNORM: + func = lp_tile_r10g10b10a2_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_B10G10R10A2_UNORM: + func = lp_tile_b10g10r10a2_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_L8_UNORM: + func = lp_tile_l8_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_A8_UNORM: + func = lp_tile_a8_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_I8_UNORM: + func = lp_tile_i8_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_L4A4_UNORM: + func = lp_tile_l4a4_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_L8A8_UNORM: + func = lp_tile_l8a8_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_L16_UNORM: + func = lp_tile_l16_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_L8_SRGB: + func = lp_tile_l8_srgb_unswizzle_4ub; + break; + case PIPE_FORMAT_L8A8_SRGB: + func = lp_tile_l8a8_srgb_unswizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8_SRGB: + func = lp_tile_r8g8b8_srgb_unswizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8A8_SRGB: + func = lp_tile_r8g8b8a8_srgb_unswizzle_4ub; + break; + case PIPE_FORMAT_A8B8G8R8_SRGB: + func = lp_tile_a8b8g8r8_srgb_unswizzle_4ub; + break; + case PIPE_FORMAT_X8B8G8R8_SRGB: + func = lp_tile_x8b8g8r8_srgb_unswizzle_4ub; + break; + case PIPE_FORMAT_B8G8R8A8_SRGB: + func = lp_tile_b8g8r8a8_srgb_unswizzle_4ub; + break; + case PIPE_FORMAT_B8G8R8X8_SRGB: + func = lp_tile_b8g8r8x8_srgb_unswizzle_4ub; + break; + case PIPE_FORMAT_A8R8G8B8_SRGB: + func = lp_tile_a8r8g8b8_srgb_unswizzle_4ub; + break; + case PIPE_FORMAT_X8R8G8B8_SRGB: + func = lp_tile_x8r8g8b8_srgb_unswizzle_4ub; + break; + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + func = lp_tile_r8sg8sb8ux8u_norm_unswizzle_4ub; + break; + case PIPE_FORMAT_R10SG10SB10SA2U_NORM: + func = lp_tile_r10sg10sb10sa2u_norm_unswizzle_4ub; + break; + case PIPE_FORMAT_R5SG5SB6U_NORM: + func = lp_tile_r5sg5sb6u_norm_unswizzle_4ub; + break; + case PIPE_FORMAT_R10G10B10A2_USCALED: + func = lp_tile_r10g10b10a2_uscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R64_FLOAT: + func = lp_tile_r64_float_unswizzle_4ub; + break; + case PIPE_FORMAT_R64G64_FLOAT: + func = lp_tile_r64g64_float_unswizzle_4ub; + break; + case PIPE_FORMAT_R64G64B64_FLOAT: + func = lp_tile_r64g64b64_float_unswizzle_4ub; + break; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + func = lp_tile_r64g64b64a64_float_unswizzle_4ub; + break; + case PIPE_FORMAT_R32_FLOAT: + func = lp_tile_r32_float_unswizzle_4ub; + break; + case PIPE_FORMAT_R32G32_FLOAT: + func = lp_tile_r32g32_float_unswizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + func = lp_tile_r32g32b32_float_unswizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + func = lp_tile_r32g32b32a32_float_unswizzle_4ub; + break; + case PIPE_FORMAT_R32_UNORM: + func = lp_tile_r32_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R32G32_UNORM: + func = lp_tile_r32g32_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32_UNORM: + func = lp_tile_r32g32b32_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32A32_UNORM: + func = lp_tile_r32g32b32a32_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R32_USCALED: + func = lp_tile_r32_uscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R32G32_USCALED: + func = lp_tile_r32g32_uscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32_USCALED: + func = lp_tile_r32g32b32_uscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32A32_USCALED: + func = lp_tile_r32g32b32a32_uscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R32_SNORM: + func = lp_tile_r32_snorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R32G32_SNORM: + func = lp_tile_r32g32_snorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32_SNORM: + func = lp_tile_r32g32b32_snorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32A32_SNORM: + func = lp_tile_r32g32b32a32_snorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R32_SSCALED: + func = lp_tile_r32_sscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R32G32_SSCALED: + func = lp_tile_r32g32_sscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32_SSCALED: + func = lp_tile_r32g32b32_sscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + func = lp_tile_r32g32b32a32_sscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R16_FLOAT: + func = lp_tile_r16_float_unswizzle_4ub; + break; + case PIPE_FORMAT_R16G16_FLOAT: + func = lp_tile_r16g16_float_unswizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16_FLOAT: + func = lp_tile_r16g16b16_float_unswizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + func = lp_tile_r16g16b16a16_float_unswizzle_4ub; + break; + case PIPE_FORMAT_R16_UNORM: + func = lp_tile_r16_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R16G16_UNORM: + func = lp_tile_r16g16_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16_UNORM: + func = lp_tile_r16g16b16_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16A16_UNORM: + func = lp_tile_r16g16b16a16_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R16_USCALED: + func = lp_tile_r16_uscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R16G16_USCALED: + func = lp_tile_r16g16_uscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16_USCALED: + func = lp_tile_r16g16b16_uscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16A16_USCALED: + func = lp_tile_r16g16b16a16_uscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R16_SNORM: + func = lp_tile_r16_snorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R16G16_SNORM: + func = lp_tile_r16g16_snorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16_SNORM: + func = lp_tile_r16g16b16_snorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16A16_SNORM: + func = lp_tile_r16g16b16a16_snorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R16_SSCALED: + func = lp_tile_r16_sscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R16G16_SSCALED: + func = lp_tile_r16g16_sscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16_SSCALED: + func = lp_tile_r16g16b16_sscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + func = lp_tile_r16g16b16a16_sscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R8_UNORM: + func = lp_tile_r8_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R8G8_UNORM: + func = lp_tile_r8g8_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8_UNORM: + func = lp_tile_r8g8b8_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + func = lp_tile_r8g8b8a8_unorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R8_USCALED: + func = lp_tile_r8_uscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R8G8_USCALED: + func = lp_tile_r8g8_uscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8_USCALED: + func = lp_tile_r8g8b8_uscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8A8_USCALED: + func = lp_tile_r8g8b8a8_uscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R8_SNORM: + func = lp_tile_r8_snorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R8G8_SNORM: + func = lp_tile_r8g8_snorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8_SNORM: + func = lp_tile_r8g8b8_snorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8A8_SNORM: + func = lp_tile_r8g8b8a8_snorm_unswizzle_4ub; + break; + case PIPE_FORMAT_R8_SSCALED: + func = lp_tile_r8_sscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R8G8_SSCALED: + func = lp_tile_r8g8_sscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8_SSCALED: + func = lp_tile_r8g8b8_sscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + func = lp_tile_r8g8b8a8_sscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R10G10B10X2_USCALED: + func = lp_tile_r10g10b10x2_uscaled_unswizzle_4ub; + break; + case PIPE_FORMAT_R10G10B10X2_SNORM: + func = lp_tile_r10g10b10x2_snorm_unswizzle_4ub; + break; + default: + debug_printf("%s: unsupported format %s\n", __FUNCTION__, util_format_name(format)); + return; + } + func(src, (uint8_t *)dst, dst_stride, x, y); +} + |