summaryrefslogtreecommitdiff
path: root/lib/libGL/gallium/drivers/llvmpipe/generated/lp_tile_soa.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libGL/gallium/drivers/llvmpipe/generated/lp_tile_soa.c')
-rw-r--r--lib/libGL/gallium/drivers/llvmpipe/generated/lp_tile_soa.c4936
1 files changed, 4936 insertions, 0 deletions
diff --git a/lib/libGL/gallium/drivers/llvmpipe/generated/lp_tile_soa.c b/lib/libGL/gallium/drivers/llvmpipe/generated/lp_tile_soa.c
new file mode 100644
index 000000000..936bad915
--- /dev/null
+++ b/lib/libGL/gallium/drivers/llvmpipe/generated/lp_tile_soa.c
@@ -0,0 +1,4936 @@
+/* This file is autogenerated by lp_tile_soa.py from u_format.csv. Do not edit directly. */
+
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Pixel format accessor functions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_compiler.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_half.h"
+#include "util/u_cpu_detect.h"
+#include "lp_tile_soa.h"
+
+#ifdef DEBUG
+unsigned lp_tile_unswizzle_count = 0;
+unsigned lp_tile_swizzle_count = 0;
+#endif
+
+const unsigned char
+tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {
+ { 0, 1, 4, 5},
+ { 2, 3, 6, 7},
+ { 8, 9, 12, 13},
+ { 10, 11, 14, 15}
+};
+
+/* Note: these lookup tables could be replaced with some
+ * bit-twiddling code, but this is a little faster.
+ */
+static unsigned tile_x_offset[TILE_VECTOR_WIDTH * TILE_VECTOR_HEIGHT] = {
+ 0, 1, 0, 1, 2, 3, 2, 3,
+ 0, 1, 0, 1, 2, 3, 2, 3
+};
+
+static unsigned tile_y_offset[TILE_VECTOR_WIDTH * TILE_VECTOR_HEIGHT] = {
+ 0, 0, 1, 1, 0, 0, 1, 1,
+ 2, 2, 3, 3, 2, 2, 3, 3
+};
+
+
+#if defined(PIPE_ARCH_SSE)
+
+#include "util/u_sse.h"
+
+static ALWAYS_INLINE void
+swz4( const __m128i * restrict x,
+ const __m128i * restrict y,
+ const __m128i * restrict z,
+ const __m128i * restrict w,
+ __m128i * restrict a,
+ __m128i * restrict b,
+ __m128i * restrict c,
+ __m128i * restrict d)
+{
+ __m128i i, j, k, l;
+ __m128i m, n, o, p;
+ __m128i e, f, g, h;
+
+ m = _mm_unpacklo_epi8(*x,*y);
+ n = _mm_unpackhi_epi8(*x,*y);
+ o = _mm_unpacklo_epi8(*z,*w);
+ p = _mm_unpackhi_epi8(*z,*w);
+
+ i = _mm_unpacklo_epi16(m,n);
+ j = _mm_unpackhi_epi16(m,n);
+ k = _mm_unpacklo_epi16(o,p);
+ l = _mm_unpackhi_epi16(o,p);
+
+ e = _mm_unpacklo_epi8(i,j);
+ f = _mm_unpackhi_epi8(i,j);
+ g = _mm_unpacklo_epi8(k,l);
+ h = _mm_unpackhi_epi8(k,l);
+
+ *a = _mm_unpacklo_epi64(e,g);
+ *b = _mm_unpackhi_epi64(e,g);
+ *c = _mm_unpacklo_epi64(f,h);
+ *d = _mm_unpackhi_epi64(f,h);
+}
+
+static ALWAYS_INLINE void
+unswz4( const __m128i * restrict a,
+ const __m128i * restrict b,
+ const __m128i * restrict c,
+ const __m128i * restrict d,
+ __m128i * restrict x,
+ __m128i * restrict y,
+ __m128i * restrict z,
+ __m128i * restrict w)
+{
+ __m128i i, j, k, l;
+ __m128i m, n, o, p;
+
+ i = _mm_unpacklo_epi8(*a,*b);
+ j = _mm_unpackhi_epi8(*a,*b);
+ k = _mm_unpacklo_epi8(*c,*d);
+ l = _mm_unpackhi_epi8(*c,*d);
+
+ m = _mm_unpacklo_epi16(i,k);
+ n = _mm_unpackhi_epi16(i,k);
+ o = _mm_unpacklo_epi16(j,l);
+ p = _mm_unpackhi_epi16(j,l);
+
+ *x = _mm_unpacklo_epi64(m,n);
+ *y = _mm_unpackhi_epi64(m,n);
+ *z = _mm_unpacklo_epi64(o,p);
+ *w = _mm_unpackhi_epi64(o,p);
+}
+
+static void
+lp_tile_b8g8r8a8_unorm_swizzle_4ub_sse2(uint8_t * restrict dst,
+ const uint8_t * restrict src, unsigned src_stride,
+ unsigned x0, unsigned y0)
+{
+ __m128i *dst128 = (__m128i *) dst;
+ unsigned x, y;
+
+ src += y0 * src_stride;
+ src += x0 * sizeof(uint32_t);
+
+ for (y = 0; y < TILE_SIZE; y += 4) {
+ const uint8_t *src_row = src;
+
+ for (x = 0; x < TILE_SIZE; x += 4) {
+ swz4((const __m128i *) (src_row + 0 * src_stride),
+ (const __m128i *) (src_row + 1 * src_stride),
+ (const __m128i *) (src_row + 2 * src_stride),
+ (const __m128i *) (src_row + 3 * src_stride),
+ dst128 + 2, /* b */
+ dst128 + 1, /* g */
+ dst128 + 0, /* r */
+ dst128 + 3); /* a */
+
+ dst128 += 4;
+ src_row += sizeof(__m128i);
+ }
+
+ src += 4 * src_stride;
+ }
+}
+
+static void
+lp_tile_b8g8r8a8_unorm_unswizzle_4ub_sse2(const uint8_t * restrict src,
+ uint8_t * restrict dst, unsigned dst_stride,
+ unsigned x0, unsigned y0)
+{
+ unsigned int x, y;
+ const __m128i *src128 = (const __m128i *) src;
+
+ dst += y0 * dst_stride;
+ dst += x0 * sizeof(uint32_t);
+
+ for (y = 0; y < TILE_SIZE; y += 4) {
+ const uint8_t *dst_row = dst;
+
+ for (x = 0; x < TILE_SIZE; x += 4) {
+ unswz4( &src128[2], /* b */
+ &src128[1], /* g */
+ &src128[0], /* r */
+ &src128[3], /* a */
+ (__m128i *) (dst_row + 0 * dst_stride),
+ (__m128i *) (dst_row + 1 * dst_stride),
+ (__m128i *) (dst_row + 2 * dst_stride),
+ (__m128i *) (dst_row + 3 * dst_stride));
+
+ src128 += 4;
+ dst_row += sizeof(__m128i);;
+ }
+
+ dst += 4 * dst_stride;
+ }
+}
+
+#endif /* PIPE_ARCH_SSE */
+
+static void
+lp_tile_none_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*1);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_b8g8r8a8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t b;
+ uint8_t g;
+ uint8_t r;
+ uint8_t a;
+ b = (*src_pixel++);
+ g = (*src_pixel++);
+ r = (*src_pixel++);
+ a = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_b8g8r8x8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t b;
+ uint8_t g;
+ uint8_t r;
+ b = (*src_pixel++);
+ g = (*src_pixel++);
+ r = (*src_pixel++);
+ ++src_pixel;
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_a8r8g8b8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t a;
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ a = (*src_pixel++);
+ r = (*src_pixel++);
+ g = (*src_pixel++);
+ b = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_x8r8g8b8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ ++src_pixel;
+ r = (*src_pixel++);
+ g = (*src_pixel++);
+ b = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_a8b8g8r8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t a;
+ uint8_t b;
+ uint8_t g;
+ uint8_t r;
+ a = (*src_pixel++);
+ b = (*src_pixel++);
+ g = (*src_pixel++);
+ r = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_x8b8g8r8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t b;
+ uint8_t g;
+ uint8_t r;
+ ++src_pixel;
+ b = (*src_pixel++);
+ g = (*src_pixel++);
+ r = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8x8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = (*src_pixel++);
+ g = (*src_pixel++);
+ b = (*src_pixel++);
+ ++src_pixel;
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_b5g5r5x1_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint16_t pixel = *src_pixel++;
+ uint8_t b = (uint8_t)(((uint32_t)(pixel & 0x1f)) * 0xff / 0x1f);
+ uint8_t g = (uint8_t)(((uint32_t)((pixel >> 5) & 0x1f)) * 0xff / 0x1f);
+ uint8_t r = (uint8_t)(((uint32_t)((pixel >> 10) & 0x1f)) * 0xff / 0x1f);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_b5g5r5a1_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint16_t pixel = *src_pixel++;
+ uint8_t b = (uint8_t)(((uint32_t)(pixel & 0x1f)) * 0xff / 0x1f);
+ uint8_t g = (uint8_t)(((uint32_t)((pixel >> 5) & 0x1f)) * 0xff / 0x1f);
+ uint8_t r = (uint8_t)(((uint32_t)((pixel >> 10) & 0x1f)) * 0xff / 0x1f);
+ uint8_t a = (uint8_t)(((uint32_t)(pixel >> 15)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_b4g4r4a4_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint16_t pixel = *src_pixel++;
+ uint8_t b = (uint8_t)(((uint32_t)(pixel & 0xf)) * 0xff / 0xf);
+ uint8_t g = (uint8_t)(((uint32_t)((pixel >> 4) & 0xf)) * 0xff / 0xf);
+ uint8_t r = (uint8_t)(((uint32_t)((pixel >> 8) & 0xf)) * 0xff / 0xf);
+ uint8_t a = (uint8_t)(((uint32_t)(pixel >> 12)) * 0xff / 0xf);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_b4g4r4x4_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint16_t pixel = *src_pixel++;
+ uint8_t b = (uint8_t)(((uint32_t)(pixel & 0xf)) * 0xff / 0xf);
+ uint8_t g = (uint8_t)(((uint32_t)((pixel >> 4) & 0xf)) * 0xff / 0xf);
+ uint8_t r = (uint8_t)(((uint32_t)((pixel >> 8) & 0xf)) * 0xff / 0xf);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_b5g6r5_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint16_t pixel = *src_pixel++;
+ uint8_t b = (uint8_t)(((uint32_t)(pixel & 0x1f)) * 0xff / 0x1f);
+ uint8_t g = (uint8_t)(((uint32_t)((pixel >> 5) & 0x3f)) * 0xff / 0x3f);
+ uint8_t r = (uint8_t)(((uint32_t)(pixel >> 11)) * 0xff / 0x1f);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r10g10b10a2_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint32_t pixel = *src_pixel++;
+ uint8_t r = (uint8_t)((pixel & 0x3ff) >> 2);
+ uint8_t g = (uint8_t)(((pixel >> 10) & 0x3ff) >> 2);
+ uint8_t b = (uint8_t)(((pixel >> 20) & 0x3ff) >> 2);
+ uint8_t a = (uint8_t)(((uint32_t)(pixel >> 30)) * 0xff / 0x3);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_b10g10r10a2_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint32_t pixel = *src_pixel++;
+ uint8_t b = (uint8_t)((pixel & 0x3ff) >> 2);
+ uint8_t g = (uint8_t)(((pixel >> 10) & 0x3ff) >> 2);
+ uint8_t r = (uint8_t)(((pixel >> 20) & 0x3ff) >> 2);
+ uint8_t a = (uint8_t)(((uint32_t)(pixel >> 30)) * 0xff / 0x3);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_l8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*1);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t rgb;
+ rgb = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = rgb; /* r */
+ TILE_PIXEL(dst, x, y, 1) = rgb; /* g */
+ TILE_PIXEL(dst, x, y, 2) = rgb; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_a8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*1);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t a;
+ a = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = 0; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_i8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*1);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t rgba;
+ rgba = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = rgba; /* r */
+ TILE_PIXEL(dst, x, y, 1) = rgba; /* g */
+ TILE_PIXEL(dst, x, y, 2) = rgba; /* b */
+ TILE_PIXEL(dst, x, y, 3) = rgba; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_l4a4_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*1);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t pixel = *src_pixel++;
+ uint8_t rgb = (uint8_t)(((uint32_t)(pixel & 0xf)) * 0xff / 0xf);
+ uint8_t a = (uint8_t)(((uint32_t)(pixel >> 4)) * 0xff / 0xf);
+ TILE_PIXEL(dst, x, y, 0) = rgb; /* r */
+ TILE_PIXEL(dst, x, y, 1) = rgb; /* g */
+ TILE_PIXEL(dst, x, y, 2) = rgb; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_l8a8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t rgb;
+ uint8_t a;
+ rgb = (*src_pixel++);
+ a = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = rgb; /* r */
+ TILE_PIXEL(dst, x, y, 1) = rgb; /* g */
+ TILE_PIXEL(dst, x, y, 2) = rgb; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_l16_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t rgb;
+ rgb = (uint8_t)((*src_pixel++) >> 8);
+ TILE_PIXEL(dst, x, y, 0) = rgb; /* r */
+ TILE_PIXEL(dst, x, y, 1) = rgb; /* g */
+ TILE_PIXEL(dst, x, y, 2) = rgb; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_l8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*1);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t rgb;
+ rgb = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = rgb; /* r */
+ TILE_PIXEL(dst, x, y, 1) = rgb; /* g */
+ TILE_PIXEL(dst, x, y, 2) = rgb; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_l8a8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t rgb;
+ uint8_t a;
+ rgb = (*src_pixel++);
+ a = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = rgb; /* r */
+ TILE_PIXEL(dst, x, y, 1) = rgb; /* g */
+ TILE_PIXEL(dst, x, y, 2) = rgb; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*3);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = (*src_pixel++);
+ g = (*src_pixel++);
+ b = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8a8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ r = (*src_pixel++);
+ g = (*src_pixel++);
+ b = (*src_pixel++);
+ a = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_a8b8g8r8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t a;
+ uint8_t b;
+ uint8_t g;
+ uint8_t r;
+ a = (*src_pixel++);
+ b = (*src_pixel++);
+ g = (*src_pixel++);
+ r = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_x8b8g8r8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t b;
+ uint8_t g;
+ uint8_t r;
+ ++src_pixel;
+ b = (*src_pixel++);
+ g = (*src_pixel++);
+ r = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_b8g8r8a8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t b;
+ uint8_t g;
+ uint8_t r;
+ uint8_t a;
+ b = (*src_pixel++);
+ g = (*src_pixel++);
+ r = (*src_pixel++);
+ a = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_b8g8r8x8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t b;
+ uint8_t g;
+ uint8_t r;
+ b = (*src_pixel++);
+ g = (*src_pixel++);
+ r = (*src_pixel++);
+ ++src_pixel;
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_a8r8g8b8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t a;
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ a = (*src_pixel++);
+ r = (*src_pixel++);
+ g = (*src_pixel++);
+ b = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_x8r8g8b8_srgb_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ ++src_pixel;
+ r = (*src_pixel++);
+ g = (*src_pixel++);
+ b = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8sg8sb8ux8u_norm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int8_t *src_pixel = (const int8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f);
+ g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f);
+ b = (*src_pixel++);
+ ++src_pixel;
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r10sg10sb10sa2u_norm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint32_t pixel = *src_pixel++;
+ uint8_t r = (uint8_t)((pixel & 0x3ff) >> 1);
+ uint8_t g = (uint8_t)(((pixel >> 10) & 0x3ff) >> 1);
+ uint8_t b = (uint8_t)(((pixel >> 20) & 0x3ff) >> 1);
+ uint8_t a = (uint8_t)(((uint32_t)(pixel >> 30)) * 0xff / 0x3);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r5sg5sb6u_norm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint16_t pixel = *src_pixel++;
+ uint8_t r = (uint8_t)(((uint32_t)(pixel & 0x1f)) * 0xff / 0xf);
+ uint8_t g = (uint8_t)(((uint32_t)((pixel >> 5) & 0x1f)) * 0xff / 0xf);
+ uint8_t b = (uint8_t)(((uint32_t)(pixel >> 10)) * 0xff / 0x3f);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r10g10b10a2_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint32_t pixel = *src_pixel++;
+ uint8_t r = (uint8_t)(((uint32_t)(pixel & 0x3ff)) * 0xff / 0x1);
+ uint8_t g = (uint8_t)(((uint32_t)((pixel >> 10) & 0x3ff)) * 0xff / 0x1);
+ uint8_t b = (uint8_t)(((uint32_t)((pixel >> 20) & 0x3ff)) * 0xff / 0x1);
+ uint8_t a = (uint8_t)(((uint32_t)(pixel >> 30)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r64_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const double *src_pixel = (const double *)(src_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ r = (uint8_t)((*src_pixel++) * 0xff);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r64g64_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const double *src_pixel = (const double *)(src_row + x0*16);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ r = (uint8_t)((*src_pixel++) * 0xff);
+ g = (uint8_t)((*src_pixel++) * 0xff);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r64g64b64_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const double *src_pixel = (const double *)(src_row + x0*24);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = (uint8_t)((*src_pixel++) * 0xff);
+ g = (uint8_t)((*src_pixel++) * 0xff);
+ b = (uint8_t)((*src_pixel++) * 0xff);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r64g64b64a64_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const double *src_pixel = (const double *)(src_row + x0*32);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ r = (uint8_t)((*src_pixel++) * 0xff);
+ g = (uint8_t)((*src_pixel++) * 0xff);
+ b = (uint8_t)((*src_pixel++) * 0xff);
+ a = (uint8_t)((*src_pixel++) * 0xff);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const float *src_pixel = (const float *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ r = float_to_ubyte((*src_pixel++));
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32g32_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const float *src_pixel = (const float *)(src_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ r = float_to_ubyte((*src_pixel++));
+ g = float_to_ubyte((*src_pixel++));
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const float *src_pixel = (const float *)(src_row + x0*12);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = float_to_ubyte((*src_pixel++));
+ g = float_to_ubyte((*src_pixel++));
+ b = float_to_ubyte((*src_pixel++));
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32a32_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const float *src_pixel = (const float *)(src_row + x0*16);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ r = float_to_ubyte((*src_pixel++));
+ g = float_to_ubyte((*src_pixel++));
+ b = float_to_ubyte((*src_pixel++));
+ a = float_to_ubyte((*src_pixel++));
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ r = (uint8_t)((*src_pixel++) >> 24);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32g32_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ r = (uint8_t)((*src_pixel++) >> 24);
+ g = (uint8_t)((*src_pixel++) >> 24);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*12);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = (uint8_t)((*src_pixel++) >> 24);
+ g = (uint8_t)((*src_pixel++) >> 24);
+ b = (uint8_t)((*src_pixel++) >> 24);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32a32_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*16);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ r = (uint8_t)((*src_pixel++) >> 24);
+ g = (uint8_t)((*src_pixel++) >> 24);
+ b = (uint8_t)((*src_pixel++) >> 24);
+ a = (uint8_t)((*src_pixel++) >> 24);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ r = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32g32_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ r = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*12);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ b = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32a32_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*16);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ r = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ b = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ a = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int32_t *src_pixel = (const int32_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ r = (uint8_t)((*src_pixel++) >> 23);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32g32_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int32_t *src_pixel = (const int32_t *)(src_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ r = (uint8_t)((*src_pixel++) >> 23);
+ g = (uint8_t)((*src_pixel++) >> 23);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int32_t *src_pixel = (const int32_t *)(src_row + x0*12);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = (uint8_t)((*src_pixel++) >> 23);
+ g = (uint8_t)((*src_pixel++) >> 23);
+ b = (uint8_t)((*src_pixel++) >> 23);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32a32_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int32_t *src_pixel = (const int32_t *)(src_row + x0*16);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ r = (uint8_t)((*src_pixel++) >> 23);
+ g = (uint8_t)((*src_pixel++) >> 23);
+ b = (uint8_t)((*src_pixel++) >> 23);
+ a = (uint8_t)((*src_pixel++) >> 23);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int32_t *src_pixel = (const int32_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ r = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32g32_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int32_t *src_pixel = (const int32_t *)(src_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ r = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int32_t *src_pixel = (const int32_t *)(src_row + x0*12);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ b = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32a32_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int32_t *src_pixel = (const int32_t *)(src_row + x0*16);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ r = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ b = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ a = (uint8_t)(((uint64_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ r = float_to_ubyte(util_half_to_float((*src_pixel++)));
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16g16_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ r = float_to_ubyte(util_half_to_float((*src_pixel++)));
+ g = float_to_ubyte(util_half_to_float((*src_pixel++)));
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*6);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = float_to_ubyte(util_half_to_float((*src_pixel++)));
+ g = float_to_ubyte(util_half_to_float((*src_pixel++)));
+ b = float_to_ubyte(util_half_to_float((*src_pixel++)));
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16a16_float_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ r = float_to_ubyte(util_half_to_float((*src_pixel++)));
+ g = float_to_ubyte(util_half_to_float((*src_pixel++)));
+ b = float_to_ubyte(util_half_to_float((*src_pixel++)));
+ a = float_to_ubyte(util_half_to_float((*src_pixel++)));
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ r = (uint8_t)((*src_pixel++) >> 8);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16g16_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ r = (uint8_t)((*src_pixel++) >> 8);
+ g = (uint8_t)((*src_pixel++) >> 8);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*6);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = (uint8_t)((*src_pixel++) >> 8);
+ g = (uint8_t)((*src_pixel++) >> 8);
+ b = (uint8_t)((*src_pixel++) >> 8);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16a16_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ r = (uint8_t)((*src_pixel++) >> 8);
+ g = (uint8_t)((*src_pixel++) >> 8);
+ b = (uint8_t)((*src_pixel++) >> 8);
+ a = (uint8_t)((*src_pixel++) >> 8);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16g16_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*6);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16a16_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint16_t *src_pixel = (const uint16_t *)(src_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ a = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int16_t *src_pixel = (const int16_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ r = (uint8_t)((*src_pixel++) >> 7);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16g16_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int16_t *src_pixel = (const int16_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ r = (uint8_t)((*src_pixel++) >> 7);
+ g = (uint8_t)((*src_pixel++) >> 7);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int16_t *src_pixel = (const int16_t *)(src_row + x0*6);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = (uint8_t)((*src_pixel++) >> 7);
+ g = (uint8_t)((*src_pixel++) >> 7);
+ b = (uint8_t)((*src_pixel++) >> 7);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16a16_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int16_t *src_pixel = (const int16_t *)(src_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ r = (uint8_t)((*src_pixel++) >> 7);
+ g = (uint8_t)((*src_pixel++) >> 7);
+ b = (uint8_t)((*src_pixel++) >> 7);
+ a = (uint8_t)((*src_pixel++) >> 7);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int16_t *src_pixel = (const int16_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16g16_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int16_t *src_pixel = (const int16_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int16_t *src_pixel = (const int16_t *)(src_row + x0*6);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16a16_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int16_t *src_pixel = (const int16_t *)(src_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ a = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*1);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ r = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8g8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ r = (*src_pixel++);
+ g = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*3);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = (*src_pixel++);
+ g = (*src_pixel++);
+ b = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8a8_unorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ r = (*src_pixel++);
+ g = (*src_pixel++);
+ b = (*src_pixel++);
+ a = (*src_pixel++);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*1);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8g8_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*3);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8a8_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint8_t *src_pixel = (const uint8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ a = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int8_t *src_pixel = (const int8_t *)(src_row + x0*1);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8g8_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int8_t *src_pixel = (const int8_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f);
+ g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int8_t *src_pixel = (const int8_t *)(src_row + x0*3);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f);
+ g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f);
+ b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8a8_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int8_t *src_pixel = (const int8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f);
+ g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f);
+ b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f);
+ a = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x7f);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int8_t *src_pixel = (const int8_t *)(src_row + x0*1);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = 0; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8g8_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int8_t *src_pixel = (const int8_t *)(src_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = 0; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int8_t *src_pixel = (const int8_t *)(src_row + x0*3);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8a8_sscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const int8_t *src_pixel = (const int8_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ r = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ g = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ b = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ a = (uint8_t)(((uint32_t)(*src_pixel++)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = a; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r10g10b10x2_uscaled_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint32_t pixel = *src_pixel++;
+ uint8_t r = (uint8_t)(((uint32_t)(pixel & 0x3ff)) * 0xff / 0x1);
+ uint8_t g = (uint8_t)(((uint32_t)((pixel >> 10) & 0x3ff)) * 0xff / 0x1);
+ uint8_t b = (uint8_t)(((uint32_t)((pixel >> 20) & 0x3ff)) * 0xff / 0x1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+static void
+lp_tile_r10g10b10x2_snorm_swizzle_4ub(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ const uint8_t *src_row = src + y0*src_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ const uint32_t *src_pixel = (const uint32_t *)(src_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint32_t pixel = *src_pixel++;
+ uint8_t r = (uint8_t)((pixel & 0x3ff) >> 1);
+ uint8_t g = (uint8_t)(((pixel >> 10) & 0x3ff) >> 1);
+ uint8_t b = (uint8_t)(((pixel >> 20) & 0x3ff) >> 1);
+ TILE_PIXEL(dst, x, y, 0) = r; /* r */
+ TILE_PIXEL(dst, x, y, 1) = g; /* g */
+ TILE_PIXEL(dst, x, y, 2) = b; /* b */
+ TILE_PIXEL(dst, x, y, 3) = 255; /* a */
+ }
+ src_row += src_stride;
+ }
+}
+
+void
+lp_tile_swizzle_4ub(enum pipe_format format, uint8_t *dst, const void *src, unsigned src_stride, unsigned x, unsigned y)
+{
+ void (*func)(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0);
+#ifdef DEBUG
+ lp_tile_swizzle_count += 1;
+#endif
+ switch(format) {
+ case PIPE_FORMAT_NONE:
+ func = lp_tile_none_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+#ifdef PIPE_ARCH_SSE
+ func = util_cpu_caps.has_sse2 ? lp_tile_b8g8r8a8_unorm_swizzle_4ub_sse2 : lp_tile_b8g8r8a8_unorm_swizzle_4ub;
+#else
+ func = lp_tile_b8g8r8a8_unorm_swizzle_4ub;
+#endif
+ break;
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ func = lp_tile_b8g8r8x8_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ func = lp_tile_a8r8g8b8_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ func = lp_tile_x8r8g8b8_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_A8B8G8R8_UNORM:
+ func = lp_tile_a8b8g8r8_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_X8B8G8R8_UNORM:
+ func = lp_tile_x8b8g8r8_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8X8_UNORM:
+ func = lp_tile_r8g8b8x8_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_B5G5R5X1_UNORM:
+ func = lp_tile_b5g5r5x1_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_B5G5R5A1_UNORM:
+ func = lp_tile_b5g5r5a1_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_B4G4R4A4_UNORM:
+ func = lp_tile_b4g4r4a4_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_B4G4R4X4_UNORM:
+ func = lp_tile_b4g4r4x4_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ func = lp_tile_b5g6r5_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R10G10B10A2_UNORM:
+ func = lp_tile_r10g10b10a2_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ func = lp_tile_b10g10r10a2_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_L8_UNORM:
+ func = lp_tile_l8_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_A8_UNORM:
+ func = lp_tile_a8_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_I8_UNORM:
+ func = lp_tile_i8_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_L4A4_UNORM:
+ func = lp_tile_l4a4_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_L8A8_UNORM:
+ func = lp_tile_l8a8_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_L16_UNORM:
+ func = lp_tile_l16_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_L8_SRGB:
+ func = lp_tile_l8_srgb_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_L8A8_SRGB:
+ func = lp_tile_l8a8_srgb_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8_SRGB:
+ func = lp_tile_r8g8b8_srgb_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_SRGB:
+ func = lp_tile_r8g8b8a8_srgb_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_A8B8G8R8_SRGB:
+ func = lp_tile_a8b8g8r8_srgb_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_X8B8G8R8_SRGB:
+ func = lp_tile_x8b8g8r8_srgb_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_B8G8R8A8_SRGB:
+ func = lp_tile_b8g8r8a8_srgb_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_B8G8R8X8_SRGB:
+ func = lp_tile_b8g8r8x8_srgb_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_A8R8G8B8_SRGB:
+ func = lp_tile_a8r8g8b8_srgb_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_X8R8G8B8_SRGB:
+ func = lp_tile_x8r8g8b8_srgb_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
+ func = lp_tile_r8sg8sb8ux8u_norm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
+ func = lp_tile_r10sg10sb10sa2u_norm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R5SG5SB6U_NORM:
+ func = lp_tile_r5sg5sb6u_norm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R10G10B10A2_USCALED:
+ func = lp_tile_r10g10b10a2_uscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R64_FLOAT:
+ func = lp_tile_r64_float_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R64G64_FLOAT:
+ func = lp_tile_r64g64_float_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R64G64B64_FLOAT:
+ func = lp_tile_r64g64b64_float_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R64G64B64A64_FLOAT:
+ func = lp_tile_r64g64b64a64_float_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32_FLOAT:
+ func = lp_tile_r32_float_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ func = lp_tile_r32g32_float_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ func = lp_tile_r32g32b32_float_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ func = lp_tile_r32g32b32a32_float_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32_UNORM:
+ func = lp_tile_r32_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32_UNORM:
+ func = lp_tile_r32g32_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32_UNORM:
+ func = lp_tile_r32g32b32_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ func = lp_tile_r32g32b32a32_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32_USCALED:
+ func = lp_tile_r32_uscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32_USCALED:
+ func = lp_tile_r32g32_uscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32_USCALED:
+ func = lp_tile_r32g32b32_uscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32A32_USCALED:
+ func = lp_tile_r32g32b32a32_uscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32_SNORM:
+ func = lp_tile_r32_snorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32_SNORM:
+ func = lp_tile_r32g32_snorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ func = lp_tile_r32g32b32_snorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ func = lp_tile_r32g32b32a32_snorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32_SSCALED:
+ func = lp_tile_r32_sscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32_SSCALED:
+ func = lp_tile_r32g32_sscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ func = lp_tile_r32g32b32_sscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ func = lp_tile_r32g32b32a32_sscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16_FLOAT:
+ func = lp_tile_r16_float_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16_FLOAT:
+ func = lp_tile_r16g16_float_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16_FLOAT:
+ func = lp_tile_r16g16b16_float_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
+ func = lp_tile_r16g16b16a16_float_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16_UNORM:
+ func = lp_tile_r16_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16_UNORM:
+ func = lp_tile_r16g16_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ func = lp_tile_r16g16b16_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ func = lp_tile_r16g16b16a16_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16_USCALED:
+ func = lp_tile_r16_uscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16_USCALED:
+ func = lp_tile_r16g16_uscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ func = lp_tile_r16g16b16_uscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ func = lp_tile_r16g16b16a16_uscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16_SNORM:
+ func = lp_tile_r16_snorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16_SNORM:
+ func = lp_tile_r16g16_snorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ func = lp_tile_r16g16b16_snorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ func = lp_tile_r16g16b16a16_snorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16_SSCALED:
+ func = lp_tile_r16_sscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16_SSCALED:
+ func = lp_tile_r16g16_sscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ func = lp_tile_r16g16b16_sscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ func = lp_tile_r16g16b16a16_sscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8_UNORM:
+ func = lp_tile_r8_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8_UNORM:
+ func = lp_tile_r8g8_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ func = lp_tile_r8g8b8_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ func = lp_tile_r8g8b8a8_unorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8_USCALED:
+ func = lp_tile_r8_uscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8_USCALED:
+ func = lp_tile_r8g8_uscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ func = lp_tile_r8g8b8_uscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ func = lp_tile_r8g8b8a8_uscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8_SNORM:
+ func = lp_tile_r8_snorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8_SNORM:
+ func = lp_tile_r8g8_snorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ func = lp_tile_r8g8b8_snorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ func = lp_tile_r8g8b8a8_snorm_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8_SSCALED:
+ func = lp_tile_r8_sscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8_SSCALED:
+ func = lp_tile_r8g8_sscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ func = lp_tile_r8g8b8_sscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ func = lp_tile_r8g8b8a8_sscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R10G10B10X2_USCALED:
+ func = lp_tile_r10g10b10x2_uscaled_swizzle_4ub;
+ break;
+ case PIPE_FORMAT_R10G10B10X2_SNORM:
+ func = lp_tile_r10g10b10x2_snorm_swizzle_4ub;
+ break;
+ default:
+ debug_printf("%s: unsupported format %s\n", __FUNCTION__, util_format_name(format));
+ return;
+ }
+ func(dst, (const uint8_t *)src, src_stride, x, y);
+}
+
+static void
+lp_tile_none_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 1;
+ uint8_t *dstpix = (uint8_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint8_t)(((uint32_t)r[i+0]) * 0x1 / 0xff)) << 0);
+ const uint32_t pixel1 = (((uint8_t)(((uint32_t)r[i+1]) * 0x1 / 0xff)) << 0);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_b8g8r8a8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = ((b[i+0]) << 0) | ((g[i+0]) << 8) | ((r[i+0]) << 16) | ((a[i+0]) << 24);
+ const uint32_t pixel1 = ((b[i+1]) << 0) | ((g[i+1]) << 8) | ((r[i+1]) << 16) | ((a[i+1]) << 24);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_b8g8r8x8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = ((b[i+0]) << 0) | ((g[i+0]) << 8) | ((r[i+0]) << 16);
+ const uint32_t pixel1 = ((b[i+1]) << 0) | ((g[i+1]) << 8) | ((r[i+1]) << 16);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_a8r8g8b8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = ((a[i+0]) << 0) | ((r[i+0]) << 8) | ((g[i+0]) << 16) | ((b[i+0]) << 24);
+ const uint32_t pixel1 = ((a[i+1]) << 0) | ((r[i+1]) << 8) | ((g[i+1]) << 16) | ((b[i+1]) << 24);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_x8r8g8b8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = ((r[i+0]) << 8) | ((g[i+0]) << 16) | ((b[i+0]) << 24);
+ const uint32_t pixel1 = ((r[i+1]) << 8) | ((g[i+1]) << 16) | ((b[i+1]) << 24);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_a8b8g8r8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = ((a[i+0]) << 0) | ((b[i+0]) << 8) | ((g[i+0]) << 16) | ((r[i+0]) << 24);
+ const uint32_t pixel1 = ((a[i+1]) << 0) | ((b[i+1]) << 8) | ((g[i+1]) << 16) | ((r[i+1]) << 24);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_x8b8g8r8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = ((b[i+0]) << 8) | ((g[i+0]) << 16) | ((r[i+0]) << 24);
+ const uint32_t pixel1 = ((b[i+1]) << 8) | ((g[i+1]) << 16) | ((r[i+1]) << 24);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_r8g8b8x8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = ((r[i+0]) << 0) | ((g[i+0]) << 8) | ((b[i+0]) << 16);
+ const uint32_t pixel1 = ((r[i+1]) << 0) | ((g[i+1]) << 8) | ((b[i+1]) << 16);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_b5g5r5x1_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 2;
+ uint16_t *dstpix = (uint16_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint16_t)(b[i+0] >> 3)) << 0) | (((uint16_t)(g[i+0] >> 3)) << 5) | (((uint16_t)(r[i+0] >> 3)) << 10);
+ const uint32_t pixel1 = (((uint16_t)(b[i+1] >> 3)) << 0) | (((uint16_t)(g[i+1] >> 3)) << 5) | (((uint16_t)(r[i+1] >> 3)) << 10);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_b5g5r5a1_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 2;
+ uint16_t *dstpix = (uint16_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint16_t)(b[i+0] >> 3)) << 0) | (((uint16_t)(g[i+0] >> 3)) << 5) | (((uint16_t)(r[i+0] >> 3)) << 10) | (((uint16_t)(a[i+0] >> 7)) << 15);
+ const uint32_t pixel1 = (((uint16_t)(b[i+1] >> 3)) << 0) | (((uint16_t)(g[i+1] >> 3)) << 5) | (((uint16_t)(r[i+1] >> 3)) << 10) | (((uint16_t)(a[i+1] >> 7)) << 15);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_b4g4r4a4_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 2;
+ uint16_t *dstpix = (uint16_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint16_t)(b[i+0] >> 4)) << 0) | (((uint16_t)(g[i+0] >> 4)) << 4) | (((uint16_t)(r[i+0] >> 4)) << 8) | (((uint16_t)(a[i+0] >> 4)) << 12);
+ const uint32_t pixel1 = (((uint16_t)(b[i+1] >> 4)) << 0) | (((uint16_t)(g[i+1] >> 4)) << 4) | (((uint16_t)(r[i+1] >> 4)) << 8) | (((uint16_t)(a[i+1] >> 4)) << 12);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_b4g4r4x4_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 2;
+ uint16_t *dstpix = (uint16_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint16_t)(b[i+0] >> 4)) << 0) | (((uint16_t)(g[i+0] >> 4)) << 4) | (((uint16_t)(r[i+0] >> 4)) << 8);
+ const uint32_t pixel1 = (((uint16_t)(b[i+1] >> 4)) << 0) | (((uint16_t)(g[i+1] >> 4)) << 4) | (((uint16_t)(r[i+1] >> 4)) << 8);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_b5g6r5_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 2;
+ uint16_t *dstpix = (uint16_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint16_t)(b[i+0] >> 3)) << 0) | (((uint16_t)(g[i+0] >> 2)) << 5) | (((uint16_t)(r[i+0] >> 3)) << 11);
+ const uint32_t pixel1 = (((uint16_t)(b[i+1] >> 3)) << 0) | (((uint16_t)(g[i+1] >> 2)) << 5) | (((uint16_t)(r[i+1] >> 3)) << 11);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_r10g10b10a2_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint32_t)(((uint32_t)r[i+0]) * 0x3ff / 0xff)) << 0) | (((uint32_t)(((uint32_t)g[i+0]) * 0x3ff / 0xff)) << 10) | (((uint32_t)(((uint32_t)b[i+0]) * 0x3ff / 0xff)) << 20) | (((uint32_t)(a[i+0] >> 6)) << 30);
+ const uint32_t pixel1 = (((uint32_t)(((uint32_t)r[i+1]) * 0x3ff / 0xff)) << 0) | (((uint32_t)(((uint32_t)g[i+1]) * 0x3ff / 0xff)) << 10) | (((uint32_t)(((uint32_t)b[i+1]) * 0x3ff / 0xff)) << 20) | (((uint32_t)(a[i+1] >> 6)) << 30);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_b10g10r10a2_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint32_t)(((uint32_t)b[i+0]) * 0x3ff / 0xff)) << 0) | (((uint32_t)(((uint32_t)g[i+0]) * 0x3ff / 0xff)) << 10) | (((uint32_t)(((uint32_t)r[i+0]) * 0x3ff / 0xff)) << 20) | (((uint32_t)(a[i+0] >> 6)) << 30);
+ const uint32_t pixel1 = (((uint32_t)(((uint32_t)b[i+1]) * 0x3ff / 0xff)) << 0) | (((uint32_t)(((uint32_t)g[i+1]) * 0x3ff / 0xff)) << 10) | (((uint32_t)(((uint32_t)r[i+1]) * 0x3ff / 0xff)) << 20) | (((uint32_t)(a[i+1] >> 6)) << 30);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_l8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 1;
+ uint8_t *dstpix = (uint8_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = ((b[i+0]) << 0);
+ const uint32_t pixel1 = ((b[i+1]) << 0);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_a8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 1;
+ uint8_t *dstpix = (uint8_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = ((a[i+0]) << 0);
+ const uint32_t pixel1 = ((a[i+1]) << 0);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_i8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 1;
+ uint8_t *dstpix = (uint8_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = ((a[i+0]) << 0);
+ const uint32_t pixel1 = ((a[i+1]) << 0);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_l4a4_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 1;
+ uint8_t *dstpix = (uint8_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint8_t)(b[i+0] >> 4)) << 0) | (((uint8_t)(a[i+0] >> 4)) << 4);
+ const uint32_t pixel1 = (((uint8_t)(b[i+1] >> 4)) << 0) | (((uint8_t)(a[i+1] >> 4)) << 4);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_l8a8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 2;
+ uint16_t *dstpix = (uint16_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = ((b[i+0]) << 0) | ((a[i+0]) << 8);
+ const uint32_t pixel1 = ((b[i+1]) << 0) | ((a[i+1]) << 8);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_l16_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 2;
+ uint16_t *dstpix = (uint16_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint16_t)(((uint32_t)b[i+0]) * 0xffff / 0xff)) << 0);
+ const uint32_t pixel1 = (((uint16_t)(((uint32_t)b[i+1]) * 0xffff / 0xff)) << 0);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_l8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*1);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 2);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_l8a8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 2);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 3);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*3);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 0);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 1);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 2);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8a8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 0);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 1);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 2);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 3);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_a8b8g8r8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 3);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 2);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 1);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 0);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_x8b8g8r8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ ++dst_pixel;
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 2);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 1);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 0);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_b8g8r8a8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 2);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 1);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 0);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 3);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_b8g8r8x8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 2);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 1);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 0);
+ ++dst_pixel;
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_a8r8g8b8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 3);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 0);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 1);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 2);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_x8r8g8b8_srgb_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ ++dst_pixel;
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 0);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 1);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 2);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r8sg8sb8ux8u_norm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int8_t *dst_pixel = (int8_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 0) >> 1);
+ *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 1) >> 1);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 2);
+ ++dst_pixel;
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r10sg10sb10sa2u_norm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint32_t pixel = 0;
+ pixel |= (uint32_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1ff / 0xff);
+ pixel |= ((uint32_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1ff / 0xff) << 10);
+ pixel |= ((uint32_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1ff / 0xff) << 20);
+ pixel |= ((uint32_t)(TILE_PIXEL(src, x, y, 3) >> 6) << 30);
+ *dst_pixel++ = pixel;
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r5sg5sb6u_norm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint16_t pixel = 0;
+ pixel |= (uint16_t)(TILE_PIXEL(src, x, y, 0) >> 4);
+ pixel |= ((uint16_t)(TILE_PIXEL(src, x, y, 1) >> 4) << 5);
+ pixel |= ((uint16_t)(TILE_PIXEL(src, x, y, 2) >> 2) << 10);
+ *dst_pixel++ = pixel;
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r10g10b10a2_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint32_t)(((uint32_t)r[i+0]) * 0x1 / 0xff)) << 0) | (((uint32_t)(((uint32_t)g[i+0]) * 0x1 / 0xff)) << 10) | (((uint32_t)(((uint32_t)b[i+0]) * 0x1 / 0xff)) << 20) | (((uint32_t)(((uint32_t)a[i+0]) * 0x1 / 0xff)) << 30);
+ const uint32_t pixel1 = (((uint32_t)(((uint32_t)r[i+1]) * 0x1 / 0xff)) << 0) | (((uint32_t)(((uint32_t)g[i+1]) * 0x1 / 0xff)) << 10) | (((uint32_t)(((uint32_t)b[i+1]) * 0x1 / 0xff)) << 20) | (((uint32_t)(((uint32_t)a[i+1]) * 0x1 / 0xff)) << 30);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_r64_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ double *dst_pixel = (double *)(dst_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 0) * (1.0f/0xff));
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r64g64_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ double *dst_pixel = (double *)(dst_row + x0*16);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 0) * (1.0f/0xff));
+ *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 1) * (1.0f/0xff));
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r64g64b64_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ double *dst_pixel = (double *)(dst_row + x0*24);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 0) * (1.0f/0xff));
+ *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 1) * (1.0f/0xff));
+ *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 2) * (1.0f/0xff));
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r64g64b64a64_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ double *dst_pixel = (double *)(dst_row + x0*32);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 0) * (1.0f/0xff));
+ *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 1) * (1.0f/0xff));
+ *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 2) * (1.0f/0xff));
+ *dst_pixel++ = (double)(TILE_PIXEL(src, x, y, 3) * (1.0f/0xff));
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ float *dst_pixel = (float *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 0));
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32g32_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ float *dst_pixel = (float *)(dst_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 0));
+ *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 1));
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ float *dst_pixel = (float *)(dst_row + x0*12);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 0));
+ *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 1));
+ *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 2));
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32a32_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ float *dst_pixel = (float *)(dst_row + x0*16);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 0));
+ *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 1));
+ *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 2));
+ *dst_pixel++ = ubyte_to_float(TILE_PIXEL(src, x, y, 3));
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint32_t)(((uint64_t)r[i+0]) * 0xffffffff / 0xff)) << 0);
+ const uint32_t pixel1 = (((uint32_t)(((uint64_t)r[i+1]) * 0xffffffff / 0xff)) << 0);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_r32g32_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0xffffffff / 0xff);
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0xffffffff / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*12);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0xffffffff / 0xff);
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0xffffffff / 0xff);
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 2)) * 0xffffffff / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32a32_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*16);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0xffffffff / 0xff);
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0xffffffff / 0xff);
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 2)) * 0xffffffff / 0xff);
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 3)) * 0xffffffff / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint32_t)(((uint64_t)r[i+0]) * 0x1 / 0xff)) << 0);
+ const uint32_t pixel1 = (((uint32_t)(((uint64_t)r[i+1]) * 0x1 / 0xff)) << 0);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_r32g32_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*12);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff);
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32a32_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*16);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff);
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff);
+ *dst_pixel++ = (uint32_t)(((uint64_t)TILE_PIXEL(src, x, y, 3)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int32_t *dst_pixel = (int32_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x7fffffff / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32g32_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int32_t *dst_pixel = (int32_t *)(dst_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x7fffffff / 0xff);
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x7fffffff / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int32_t *dst_pixel = (int32_t *)(dst_row + x0*12);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x7fffffff / 0xff);
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x7fffffff / 0xff);
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 2)) * 0x7fffffff / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32a32_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int32_t *dst_pixel = (int32_t *)(dst_row + x0*16);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x7fffffff / 0xff);
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x7fffffff / 0xff);
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 2)) * 0x7fffffff / 0xff);
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 3)) * 0x7fffffff / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int32_t *dst_pixel = (int32_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32g32_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int32_t *dst_pixel = (int32_t *)(dst_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int32_t *dst_pixel = (int32_t *)(dst_row + x0*12);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff);
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r32g32b32a32_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int32_t *dst_pixel = (int32_t *)(dst_row + x0*16);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff);
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff);
+ *dst_pixel++ = (int32_t)(((uint64_t)TILE_PIXEL(src, x, y, 3)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r16_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 0) * (1.0f/0xff)));
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r16g16_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 0) * (1.0f/0xff)));
+ *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 1) * (1.0f/0xff)));
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*6);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 0) * (1.0f/0xff)));
+ *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 1) * (1.0f/0xff)));
+ *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 2) * (1.0f/0xff)));
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16a16_float_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 0) * (1.0f/0xff)));
+ *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 1) * (1.0f/0xff)));
+ *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 2) * (1.0f/0xff)));
+ *dst_pixel++ = util_float_to_half((float)(TILE_PIXEL(src, x, y, 3) * (1.0f/0xff)));
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r16_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 2;
+ uint16_t *dstpix = (uint16_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint16_t)(((uint32_t)r[i+0]) * 0xffff / 0xff)) << 0);
+ const uint32_t pixel1 = (((uint16_t)(((uint32_t)r[i+1]) * 0xffff / 0xff)) << 0);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_r16g16_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint16_t)(((uint32_t)r[i+0]) * 0xffff / 0xff)) << 0) | (((uint16_t)(((uint32_t)g[i+0]) * 0xffff / 0xff)) << 16);
+ const uint32_t pixel1 = (((uint16_t)(((uint32_t)r[i+1]) * 0xffff / 0xff)) << 0) | (((uint16_t)(((uint32_t)g[i+1]) * 0xffff / 0xff)) << 16);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_r16g16b16_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*6);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0xffff / 0xff);
+ *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0xffff / 0xff);
+ *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0xffff / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16a16_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0xffff / 0xff);
+ *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0xffff / 0xff);
+ *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0xffff / 0xff);
+ *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 3)) * 0xffff / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r16_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 2;
+ uint16_t *dstpix = (uint16_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint16_t)(((uint32_t)r[i+0]) * 0x1 / 0xff)) << 0);
+ const uint32_t pixel1 = (((uint16_t)(((uint32_t)r[i+1]) * 0x1 / 0xff)) << 0);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_r16g16_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint16_t)(((uint32_t)r[i+0]) * 0x1 / 0xff)) << 0) | (((uint16_t)(((uint32_t)g[i+0]) * 0x1 / 0xff)) << 16);
+ const uint32_t pixel1 = (((uint16_t)(((uint32_t)r[i+1]) * 0x1 / 0xff)) << 0) | (((uint16_t)(((uint32_t)g[i+1]) * 0x1 / 0xff)) << 16);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_r16g16b16_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*6);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff);
+ *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16a16_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint16_t *dst_pixel = (uint16_t *)(dst_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff);
+ *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff);
+ *dst_pixel++ = (uint16_t)(((uint32_t)TILE_PIXEL(src, x, y, 3)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r16_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int16_t *dst_pixel = (int16_t *)(dst_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x7fff / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r16g16_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int16_t *dst_pixel = (int16_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x7fff / 0xff);
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x7fff / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int16_t *dst_pixel = (int16_t *)(dst_row + x0*6);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x7fff / 0xff);
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x7fff / 0xff);
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x7fff / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16a16_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int16_t *dst_pixel = (int16_t *)(dst_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x7fff / 0xff);
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x7fff / 0xff);
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x7fff / 0xff);
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 3)) * 0x7fff / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r16_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int16_t *dst_pixel = (int16_t *)(dst_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r16g16_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int16_t *dst_pixel = (int16_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int16_t *dst_pixel = (int16_t *)(dst_row + x0*6);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff);
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r16g16b16a16_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int16_t *dst_pixel = (int16_t *)(dst_row + x0*8);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff);
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff);
+ *dst_pixel++ = (int16_t)(((uint32_t)TILE_PIXEL(src, x, y, 3)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 1;
+ uint8_t *dstpix = (uint8_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = ((r[i+0]) << 0);
+ const uint32_t pixel1 = ((r[i+1]) << 0);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_r8g8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 2;
+ uint16_t *dstpix = (uint16_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = ((r[i+0]) << 0) | ((g[i+0]) << 8);
+ const uint32_t pixel1 = ((r[i+1]) << 0) | ((g[i+1]) << 8);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_r8g8b8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*3);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 0);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 1);
+ *dst_pixel++ = TILE_PIXEL(src, x, y, 2);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8a8_unorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = ((r[i+0]) << 0) | ((g[i+0]) << 8) | ((b[i+0]) << 16) | ((a[i+0]) << 24);
+ const uint32_t pixel1 = ((r[i+1]) << 0) | ((g[i+1]) << 8) | ((b[i+1]) << 16) | ((a[i+1]) << 24);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_r8_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 1;
+ uint8_t *dstpix = (uint8_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint8_t)(((uint32_t)r[i+0]) * 0x1 / 0xff)) << 0);
+ const uint32_t pixel1 = (((uint8_t)(((uint32_t)r[i+1]) * 0x1 / 0xff)) << 0);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_r8g8_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 2;
+ uint16_t *dstpix = (uint16_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint8_t)(((uint32_t)r[i+0]) * 0x1 / 0xff)) << 0) | (((uint8_t)(((uint32_t)g[i+0]) * 0x1 / 0xff)) << 8);
+ const uint32_t pixel1 = (((uint8_t)(((uint32_t)r[i+1]) * 0x1 / 0xff)) << 0) | (((uint8_t)(((uint32_t)g[i+1]) * 0x1 / 0xff)) << 8);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_r8g8b8_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint8_t *dst_pixel = (uint8_t *)(dst_row + x0*3);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (uint8_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ *dst_pixel++ = (uint8_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff);
+ *dst_pixel++ = (uint8_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8a8_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint8_t)(((uint32_t)r[i+0]) * 0x1 / 0xff)) << 0) | (((uint8_t)(((uint32_t)g[i+0]) * 0x1 / 0xff)) << 8) | (((uint8_t)(((uint32_t)b[i+0]) * 0x1 / 0xff)) << 16) | (((uint8_t)(((uint32_t)a[i+0]) * 0x1 / 0xff)) << 24);
+ const uint32_t pixel1 = (((uint8_t)(((uint32_t)r[i+1]) * 0x1 / 0xff)) << 0) | (((uint8_t)(((uint32_t)g[i+1]) * 0x1 / 0xff)) << 8) | (((uint8_t)(((uint32_t)b[i+1]) * 0x1 / 0xff)) << 16) | (((uint8_t)(((uint32_t)a[i+1]) * 0x1 / 0xff)) << 24);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_r8_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int8_t *dst_pixel = (int8_t *)(dst_row + x0*1);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 0) >> 1);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r8g8_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int8_t *dst_pixel = (int8_t *)(dst_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 0) >> 1);
+ *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 1) >> 1);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int8_t *dst_pixel = (int8_t *)(dst_row + x0*3);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 0) >> 1);
+ *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 1) >> 1);
+ *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 2) >> 1);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8a8_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int8_t *dst_pixel = (int8_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 0) >> 1);
+ *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 1) >> 1);
+ *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 2) >> 1);
+ *dst_pixel++ = (int8_t)(TILE_PIXEL(src, x, y, 3) >> 1);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r8_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int8_t *dst_pixel = (int8_t *)(dst_row + x0*1);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r8g8_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int8_t *dst_pixel = (int8_t *)(dst_row + x0*2);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int8_t *dst_pixel = (int8_t *)(dst_row + x0*3);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff);
+ *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r8g8b8a8_sscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ int8_t *dst_pixel = (int8_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1 / 0xff);
+ *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1 / 0xff);
+ *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1 / 0xff);
+ *dst_pixel++ = (int8_t)(((uint32_t)TILE_PIXEL(src, x, y, 3)) * 0x1 / 0xff);
+ }
+ dst_row += dst_stride;
+ }
+}
+
+static void
+lp_tile_r10g10b10x2_uscaled_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ const unsigned dstpix_stride = dst_stride / 4;
+ uint32_t *dstpix = (uint32_t *) dst;
+ unsigned int qx, qy, i;
+
+ for (qy = 0; qy < TILE_SIZE; qy += TILE_VECTOR_HEIGHT) {
+ const unsigned py = y0 + qy;
+ for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {
+ const unsigned px = x0 + qx;
+ const uint8_t *r = src + 0 * TILE_C_STRIDE;
+ const uint8_t *g = src + 1 * TILE_C_STRIDE;
+ const uint8_t *b = src + 2 * TILE_C_STRIDE;
+ const uint8_t *a = src + 3 * TILE_C_STRIDE;
+ (void) r; (void) g; (void) b; (void) a; /* silence warnings */
+ for (i = 0; i < TILE_C_STRIDE; i += 2) {
+ const uint32_t pixel0 = (((uint32_t)(((uint32_t)r[i+0]) * 0x1 / 0xff)) << 0) | (((uint32_t)(((uint32_t)g[i+0]) * 0x1 / 0xff)) << 10) | (((uint32_t)(((uint32_t)b[i+0]) * 0x1 / 0xff)) << 20);
+ const uint32_t pixel1 = (((uint32_t)(((uint32_t)r[i+1]) * 0x1 / 0xff)) << 0) | (((uint32_t)(((uint32_t)g[i+1]) * 0x1 / 0xff)) << 10) | (((uint32_t)(((uint32_t)b[i+1]) * 0x1 / 0xff)) << 20);
+ const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);
+ dstpix[offset + 0] = pixel0;
+ dstpix[offset + 1] = pixel1;
+ }
+ src += TILE_X_STRIDE;
+ }
+ }
+}
+
+static void
+lp_tile_r10g10b10x2_snorm_unswizzle_4ub(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)
+{
+ unsigned x, y;
+ uint8_t *dst_row = dst + y0*dst_stride;
+ for (y = 0; y < TILE_SIZE; ++y) {
+ uint32_t *dst_pixel = (uint32_t *)(dst_row + x0*4);
+ for (x = 0; x < TILE_SIZE; ++x) {
+ uint32_t pixel = 0;
+ pixel |= (uint32_t)(((uint32_t)TILE_PIXEL(src, x, y, 0)) * 0x1ff / 0xff);
+ pixel |= ((uint32_t)(((uint32_t)TILE_PIXEL(src, x, y, 1)) * 0x1ff / 0xff) << 10);
+ pixel |= ((uint32_t)(((uint32_t)TILE_PIXEL(src, x, y, 2)) * 0x1ff / 0xff) << 20);
+ *dst_pixel++ = pixel;
+ }
+ dst_row += dst_stride;
+ }
+}
+
+void
+lp_tile_unswizzle_4ub(enum pipe_format format, const uint8_t *src, void *dst, unsigned dst_stride, unsigned x, unsigned y)
+{
+ void (*func)(const uint8_t *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0);
+#ifdef DEBUG
+ lp_tile_unswizzle_count += 1;
+#endif
+ switch(format) {
+ case PIPE_FORMAT_NONE:
+ func = lp_tile_none_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+#ifdef PIPE_ARCH_SSE
+ func = util_cpu_caps.has_sse2 ? lp_tile_b8g8r8a8_unorm_unswizzle_4ub_sse2 : lp_tile_b8g8r8a8_unorm_unswizzle_4ub;
+#else
+ func = lp_tile_b8g8r8a8_unorm_unswizzle_4ub;
+#endif
+ break;
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ func = lp_tile_b8g8r8x8_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ func = lp_tile_a8r8g8b8_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ func = lp_tile_x8r8g8b8_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_A8B8G8R8_UNORM:
+ func = lp_tile_a8b8g8r8_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_X8B8G8R8_UNORM:
+ func = lp_tile_x8b8g8r8_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8X8_UNORM:
+ func = lp_tile_r8g8b8x8_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_B5G5R5X1_UNORM:
+ func = lp_tile_b5g5r5x1_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_B5G5R5A1_UNORM:
+ func = lp_tile_b5g5r5a1_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_B4G4R4A4_UNORM:
+ func = lp_tile_b4g4r4a4_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_B4G4R4X4_UNORM:
+ func = lp_tile_b4g4r4x4_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ func = lp_tile_b5g6r5_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R10G10B10A2_UNORM:
+ func = lp_tile_r10g10b10a2_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ func = lp_tile_b10g10r10a2_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_L8_UNORM:
+ func = lp_tile_l8_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_A8_UNORM:
+ func = lp_tile_a8_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_I8_UNORM:
+ func = lp_tile_i8_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_L4A4_UNORM:
+ func = lp_tile_l4a4_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_L8A8_UNORM:
+ func = lp_tile_l8a8_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_L16_UNORM:
+ func = lp_tile_l16_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_L8_SRGB:
+ func = lp_tile_l8_srgb_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_L8A8_SRGB:
+ func = lp_tile_l8a8_srgb_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8_SRGB:
+ func = lp_tile_r8g8b8_srgb_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_SRGB:
+ func = lp_tile_r8g8b8a8_srgb_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_A8B8G8R8_SRGB:
+ func = lp_tile_a8b8g8r8_srgb_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_X8B8G8R8_SRGB:
+ func = lp_tile_x8b8g8r8_srgb_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_B8G8R8A8_SRGB:
+ func = lp_tile_b8g8r8a8_srgb_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_B8G8R8X8_SRGB:
+ func = lp_tile_b8g8r8x8_srgb_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_A8R8G8B8_SRGB:
+ func = lp_tile_a8r8g8b8_srgb_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_X8R8G8B8_SRGB:
+ func = lp_tile_x8r8g8b8_srgb_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
+ func = lp_tile_r8sg8sb8ux8u_norm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
+ func = lp_tile_r10sg10sb10sa2u_norm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R5SG5SB6U_NORM:
+ func = lp_tile_r5sg5sb6u_norm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R10G10B10A2_USCALED:
+ func = lp_tile_r10g10b10a2_uscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R64_FLOAT:
+ func = lp_tile_r64_float_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R64G64_FLOAT:
+ func = lp_tile_r64g64_float_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R64G64B64_FLOAT:
+ func = lp_tile_r64g64b64_float_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R64G64B64A64_FLOAT:
+ func = lp_tile_r64g64b64a64_float_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32_FLOAT:
+ func = lp_tile_r32_float_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ func = lp_tile_r32g32_float_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ func = lp_tile_r32g32b32_float_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ func = lp_tile_r32g32b32a32_float_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32_UNORM:
+ func = lp_tile_r32_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32_UNORM:
+ func = lp_tile_r32g32_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32_UNORM:
+ func = lp_tile_r32g32b32_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ func = lp_tile_r32g32b32a32_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32_USCALED:
+ func = lp_tile_r32_uscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32_USCALED:
+ func = lp_tile_r32g32_uscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32_USCALED:
+ func = lp_tile_r32g32b32_uscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32A32_USCALED:
+ func = lp_tile_r32g32b32a32_uscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32_SNORM:
+ func = lp_tile_r32_snorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32_SNORM:
+ func = lp_tile_r32g32_snorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ func = lp_tile_r32g32b32_snorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ func = lp_tile_r32g32b32a32_snorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32_SSCALED:
+ func = lp_tile_r32_sscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32_SSCALED:
+ func = lp_tile_r32g32_sscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ func = lp_tile_r32g32b32_sscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ func = lp_tile_r32g32b32a32_sscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16_FLOAT:
+ func = lp_tile_r16_float_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16_FLOAT:
+ func = lp_tile_r16g16_float_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16_FLOAT:
+ func = lp_tile_r16g16b16_float_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
+ func = lp_tile_r16g16b16a16_float_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16_UNORM:
+ func = lp_tile_r16_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16_UNORM:
+ func = lp_tile_r16g16_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ func = lp_tile_r16g16b16_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ func = lp_tile_r16g16b16a16_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16_USCALED:
+ func = lp_tile_r16_uscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16_USCALED:
+ func = lp_tile_r16g16_uscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ func = lp_tile_r16g16b16_uscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ func = lp_tile_r16g16b16a16_uscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16_SNORM:
+ func = lp_tile_r16_snorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16_SNORM:
+ func = lp_tile_r16g16_snorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ func = lp_tile_r16g16b16_snorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ func = lp_tile_r16g16b16a16_snorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16_SSCALED:
+ func = lp_tile_r16_sscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16_SSCALED:
+ func = lp_tile_r16g16_sscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ func = lp_tile_r16g16b16_sscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ func = lp_tile_r16g16b16a16_sscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8_UNORM:
+ func = lp_tile_r8_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8_UNORM:
+ func = lp_tile_r8g8_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ func = lp_tile_r8g8b8_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ func = lp_tile_r8g8b8a8_unorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8_USCALED:
+ func = lp_tile_r8_uscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8_USCALED:
+ func = lp_tile_r8g8_uscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ func = lp_tile_r8g8b8_uscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ func = lp_tile_r8g8b8a8_uscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8_SNORM:
+ func = lp_tile_r8_snorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8_SNORM:
+ func = lp_tile_r8g8_snorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ func = lp_tile_r8g8b8_snorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ func = lp_tile_r8g8b8a8_snorm_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8_SSCALED:
+ func = lp_tile_r8_sscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8_SSCALED:
+ func = lp_tile_r8g8_sscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ func = lp_tile_r8g8b8_sscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ func = lp_tile_r8g8b8a8_sscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R10G10B10X2_USCALED:
+ func = lp_tile_r10g10b10x2_uscaled_unswizzle_4ub;
+ break;
+ case PIPE_FORMAT_R10G10B10X2_SNORM:
+ func = lp_tile_r10g10b10x2_snorm_unswizzle_4ub;
+ break;
+ default:
+ debug_printf("%s: unsupported format %s\n", __FUNCTION__, util_format_name(format));
+ return;
+ }
+ func(src, (uint8_t *)dst, dst_stride, x, y);
+}
+