summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am5
-rw-r--r--src/intel_module.c70
-rw-r--r--src/sna/Makefile.am115
-rw-r--r--src/sna/README30
-rw-r--r--src/sna/blt.c73
-rw-r--r--src/sna/gen2_render.c1237
-rw-r--r--src/sna/gen2_render.h785
-rw-r--r--src/sna/gen3_render.c3694
-rw-r--r--src/sna/gen3_render.h1479
-rw-r--r--src/sna/gen4_render.c2762
-rw-r--r--src/sna/gen4_render.h2643
-rw-r--r--src/sna/gen5_render.c2841
-rw-r--r--src/sna/gen5_render.h2730
-rw-r--r--src/sna/gen6_render.c2860
-rw-r--r--src/sna/gen6_render.h1598
-rw-r--r--src/sna/kgem.c1775
-rw-r--r--src/sna/kgem.h332
-rw-r--r--src/sna/kgem_debug.c390
-rw-r--r--src/sna/kgem_debug.h28
-rw-r--r--src/sna/kgem_debug_gen3.c1615
-rw-r--r--src/sna/kgem_debug_gen4.c711
-rw-r--r--src/sna/kgem_debug_gen5.c687
-rw-r--r--src/sna/kgem_debug_gen6.c1099
-rw-r--r--src/sna/sna.h596
-rw-r--r--src/sna/sna_accel.c3306
-rw-r--r--src/sna/sna_blt.c1339
-rw-r--r--src/sna/sna_composite.c722
-rw-r--r--src/sna/sna_damage.c944
-rw-r--r--src/sna/sna_damage.h94
-rw-r--r--src/sna/sna_display.c1656
-rw-r--r--src/sna/sna_dri.c1446
-rw-r--r--src/sna/sna_driver.c925
-rw-r--r--src/sna/sna_glyphs.c1145
-rw-r--r--src/sna/sna_gradient.c335
-rw-r--r--src/sna/sna_io.c452
-rw-r--r--src/sna/sna_module.h3
-rw-r--r--src/sna/sna_reg.h108
-rw-r--r--src/sna/sna_render.c888
-rw-r--r--src/sna/sna_render.h511
-rw-r--r--src/sna/sna_render_inline.h102
-rw-r--r--src/sna/sna_stream.c99
-rw-r--r--src/sna/sna_tiling.c264
-rw-r--r--src/sna/sna_transform.c139
-rw-r--r--src/sna/sna_trapezoids.c2375
-rw-r--r--src/sna/sna_video.c737
-rw-r--r--src/sna/sna_video.h130
-rw-r--r--src/sna/sna_video_hwmc.c252
-rw-r--r--src/sna/sna_video_hwmc.h74
-rw-r--r--src/sna/sna_video_overlay.c731
-rw-r--r--src/sna/sna_video_textured.c428
50 files changed, 49352 insertions, 8 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index abb03c3f..a7f219c1 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -35,6 +35,11 @@ intel_drv_ladir = @moduledir@/drivers
intel_drv_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@ -ldrm_intel ../uxa/libuxa.la legacy/liblegacy.la
intel_drv_la_LIBADD += @PCIACCESS_LIBS@
+if SNA
+SUBDIRS += sna
+intel_drv_la_LIBADD += sna/libsna.la
+endif
+
NULL:=#
intel_drv_la_SOURCES = \
diff --git a/src/intel_module.c b/src/intel_module.c
index 9468e72f..9b1da491 100644
--- a/src/intel_module.c
+++ b/src/intel_module.c
@@ -36,6 +36,7 @@
#include "intel.h"
#include "intel_driver.h"
#include "legacy/legacy.h"
+#include "sna/sna_module.h"
#include <xf86drmMode.h>
@@ -320,22 +321,49 @@ static Bool intel_pci_probe(DriverPtr driver,
scrn->name = INTEL_NAME;
scrn->Probe = NULL;
-#if KMS_ONLY
- intel_init_scrn(scrn);
-#else
switch (DEVICE_ID(device)) {
+#if !KMS_ONLY
case PCI_CHIP_I810:
case PCI_CHIP_I810_DC100:
case PCI_CHIP_I810_E:
case PCI_CHIP_I815:
lg_i810_init(scrn);
break;
+#endif
+#if SNA
+ case 0:
+#if SNA_GEN3
+ case PCI_CHIP_PINEVIEW_M:
+ case PCI_CHIP_PINEVIEW_G:
+ case PCI_CHIP_G33_G:
+ case PCI_CHIP_Q35_G:
+ case PCI_CHIP_Q33_G:
+#endif
+#if SNA_GEN5
+ case PCI_CHIP_IRONLAKE_D_G:
+ case PCI_CHIP_IRONLAKE_M_G:
+#endif
+#if SNA_GEN6
+ case PCI_CHIP_SANDYBRIDGE_GT1:
+ case PCI_CHIP_SANDYBRIDGE_GT2:
+ case PCI_CHIP_SANDYBRIDGE_GT2_PLUS:
+ case PCI_CHIP_SANDYBRIDGE_M_GT1:
+ case PCI_CHIP_SANDYBRIDGE_M_GT2:
+ case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS:
+ case PCI_CHIP_SANDYBRIDGE_S_GT:
+#endif
+ sna_init_scrn(scrn);
+ break;
+#endif
default:
+#if SNA_DEFAULT
+ sna_init_scrn(scrn);
+#else
intel_init_scrn(scrn);
+#endif
break;
}
-#endif
}
return scrn != NULL;
}
@@ -360,20 +388,46 @@ static XF86ModuleVersionInfo intel_version = {
static const OptionInfoRec *
intel_available_options(int chipid, int busid)
{
-#if KMS_ONLY
- return intel_uxa_available_options(chipid, busid);
-#else
switch (chipid) {
+#if !KMS_ONLY
case PCI_CHIP_I810:
case PCI_CHIP_I810_DC100:
case PCI_CHIP_I810_E:
case PCI_CHIP_I815:
return lg_i810_available_options(chipid, busid);
+#endif
+#if SNA
+ case 0:
+#if SNA_GEN3
+ case PCI_CHIP_PINEVIEW_M:
+ case PCI_CHIP_PINEVIEW_G:
+ case PCI_CHIP_G33_G:
+ case PCI_CHIP_Q35_G:
+ case PCI_CHIP_Q33_G:
+#endif
+#if SNA_GEN5
+ case PCI_CHIP_IRONLAKE_D_G:
+ case PCI_CHIP_IRONLAKE_M_G:
+#endif
+#if SNA_GEN6
+ case PCI_CHIP_SANDYBRIDGE_GT1:
+ case PCI_CHIP_SANDYBRIDGE_GT2:
+ case PCI_CHIP_SANDYBRIDGE_GT2_PLUS:
+ case PCI_CHIP_SANDYBRIDGE_M_GT1:
+ case PCI_CHIP_SANDYBRIDGE_M_GT2:
+ case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS:
+ case PCI_CHIP_SANDYBRIDGE_S_GT:
+#endif
+ return sna_available_options(chipid, busid);
+#endif
default:
+#if SNA_DEFAULT
+ return sna_available_options(chipid, busid);
+#else
return intel_uxa_available_options(chipid, busid);
- }
#endif
+ }
}
static DriverRec intel = {
diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am
new file mode 100644
index 00000000..f65b281b
--- /dev/null
+++ b/src/sna/Makefile.am
@@ -0,0 +1,115 @@
+# Copyright 2005 Adam Jackson.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# on the rights to use, copy, modify, merge, publish, distribute, sub
+# license, and/or sell copies of the Software, and to permit persons to whom
+# the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+# ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+AM_CFLAGS = @CWARNFLAGS@ @XORG_CFLAGS@ @UDEV_CFLAGS@ @DRM_CFLAGS@ @DRI_CFLAGS@ \
+ -I$(top_srcdir)/src -I$(top_srcdir)/uxa -I$(top_srcdir)/src/render_program
+
+noinst_LTLIBRARIES = libsna.la
+libsna_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@
+
+NULL:=#
+
+libsna_la_SOURCES = \
+ blt.c \
+ kgem.c \
+ kgem.h \
+ sna.h \
+ sna_accel.c \
+ sna_blt.c \
+ sna_composite.c \
+ sna_damage.c \
+ snd_damage.h \
+ sna_display.c \
+ sna_driver.c \
+ sna_driver.h \
+ sna_glyphs.c \
+ sna_gradient.c \
+ sna_io.c \
+ sna_render.c \
+ sna_render.h \
+ sna_render_inline.h \
+ sna_reg.h \
+ sna_stream.c \
+ sna_trapezoids.c \
+ sna_tiling.c \
+ sna_transform.c \
+ sna_video.c \
+ sna_video.h \
+ sna_video_overlay.c \
+ sna_video_textured.c \
+ $(NULL)
+
+if SNA_GEN2
+libsna_la_SOURCES += \
+ gen2_render.c \
+ gen2_render.h \
+ $(NULL)
+endif
+if SNA_GEN3
+libsna_la_SOURCES += \
+ gen3_render.c \
+ gen3_render.h \
+ $(NULL)
+endif
+if SNA_GEN4
+libsna_la_SOURCES += \
+ gen4_render.c \
+ gen4_render.h \
+ $(NULL)
+endif
+if SNA_GEN5
+libsna_la_SOURCES += \
+ gen5_render.c \
+ gen5_render.h \
+ $(NULL)
+endif
+if SNA_GEN6
+libsna_la_SOURCES += \
+ gen6_render.c \
+ gen6_render.h \
+ $(NULL)
+endif
+
+if DRI
+libsna_la_SOURCES += \
+ sna_dri.c \
+ $(NULL)
+libsna_la_LIBADD += \
+ $(DRI_LIBS) \
+ $(NULL)
+endif
+
+if XVMC
+libsna_la_SOURCES += \
+ sna_video_hwmc.h \
+ sna_video_hwmc.c \
+ $(NULL)
+endif
+
+if DEBUG
+libsna_la_SOURCES += \
+ kgem_debug.c \
+ kgem_debug.h \
+ kgem_debug_gen3.c \
+ kgem_debug_gen4.c \
+ kgem_debug_gen5.c \
+ kgem_debug_gen6.c \
+ $(NULL)
+endif
diff --git a/src/sna/README b/src/sna/README
new file mode 100644
index 00000000..fd847de3
--- /dev/null
+++ b/src/sna/README
@@ -0,0 +1,30 @@
+SandyBridge's New Acceleration
+------------------------------
+
+The guiding principle behind the design is to avoid GPU context switches.
+On SandyBridge (and beyond), these are especially pernicious because the
+RENDER and BLT engine are now on different rings and require
+synchronisation of the various execution units when switching contexts.
+They were not cheap on early generation, but with the increasing
+complexity of the GPU, avoiding such serialisations is important.
+
+Furthermore, we try very hard to avoid migrating between the CPU and GPU.
+Every pixmap (apart from temporary "scratch" surfaces which we intend to
+use on the GPU) is created in system memory. All operations are then done
+upon this shadow copy until we are forced to move it onto the GPU. Such
+migration can only be first triggered by: setting the pixmap as the
+scanout (we obviously need a GPU buffer here), using the pixmap as a DRI
+buffer (the client expects to perform hardware acceleration and we do not
+want to disappoint) and lastly using the pixmap as a RENDER target. This
+last is chosen because when we know we are going to perform hardware
+acceleration and will continue to do so without fallbacks, using the GPU
+is much, much faster than the CPU. The heuristic I chose therefore was
+that if the application uses RENDER, i.e. cairo, then it will only be
+using those paths and not intermixing core drawing operations and so
+unlikely to trigger a fallback.
+
+The complicating case is front-buffer rendering. So in order to accommodate
+using RENDER on an application whilst running xterm without a composite
+manager redirecting all the pixmaps to backing surfaces, we have to
+perform damage tracking to avoid excess migration of portions of the
+buffer.
diff --git a/src/sna/blt.c b/src/sna/blt.c
new file mode 100644
index 00000000..ac20372e
--- /dev/null
+++ b/src/sna/blt.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+
+#if DEBUG_BLT
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+void
+memcpy_blt(const void *src, void *dst, int bpp,
+ uint16_t src_stride, uint16_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ uint8_t *src_bytes;
+ uint8_t *dst_bytes;
+
+ assert(width && height);
+ assert(bpp >= 8);
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+
+ bpp /= 8;
+ width *= bpp;
+
+ src_bytes = (uint8_t *)src + src_stride * src_y + src_x * bpp;
+ dst_bytes = (uint8_t *)dst + dst_stride * dst_y + dst_x * bpp;
+
+ if (width == src_stride && width == dst_stride) {
+ memcpy(dst_bytes, src_bytes, width * height);
+ return;
+ }
+
+ do {
+ memcpy(dst_bytes, src_bytes, width);
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+}
diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
new file mode 100644
index 00000000..896f7308
--- /dev/null
+++ b/src/sna/gen2_render.c
@@ -0,0 +1,1237 @@
+/*
+ * Copyright © 2006,2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@intel.com>
+ * Eric Anholt <eric@anholt.net>
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_reg.h"
+#include "sna_render.h"
+#include "sna_render_inline.h"
+
+#include "gen2_render.h"
+
+#if DEBUG_RENDER
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+#define OUT_BATCH(v) batch_emit(sna, v)
+#define OUT_BATCH_F(v) batch_emit_float(sna, v)
+#define OUT_VERTEX(v) batch_emit_float(sna, v)
+
+static const struct blendinfo {
+ Bool dst_alpha;
+ Bool src_alpha;
+ uint32_t src_blend;
+ uint32_t dst_blend;
+} gen2_blend_op[] = {
+ /* Clear */
+ {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO},
+ /* Src */
+ {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO},
+ /* Dst */
+ {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE},
+ /* Over */
+ {0, 1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA},
+ /* OverReverse */
+ {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE},
+ /* In */
+ {1, 0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO},
+ /* InReverse */
+ {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA},
+ /* Out */
+ {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO},
+ /* OutReverse */
+ {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA},
+ /* Atop */
+ {1, 1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA},
+ /* AtopReverse */
+ {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA},
+ /* Xor */
+ {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA},
+ /* Add */
+ {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ONE},
+};
+
+static const struct formatinfo {
+ int fmt;
+ uint32_t card_fmt;
+} i8xx_tex_formats[] = {
+ {PICT_a8, MAPSURF_8BIT | MT_8BIT_A8},
+ {PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888},
+ {PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888},
+ {PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565},
+ {PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555},
+ {PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444},
+}, i85x_tex_formats[] = {
+ {PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888},
+ {PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888},
+};
+
+static inline uint32_t
+gen2_buf_tiling(uint32_t tiling)
+{
+ uint32_t v = 0;
+ switch (tiling) {
+ case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
+ case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
+ case I915_TILING_NONE: break;
+ }
+ return v;
+}
+
+static uint32_t
+gen2_get_dst_format(uint32_t format)
+{
+#define BIAS DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8)
+ switch (format) {
+ default:
+ assert(0);
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ return COLR_BUF_ARGB8888 | BIAS;
+ case PICT_r5g6b5:
+ return COLR_BUF_RGB565 | BIAS;
+ case PICT_a1r5g5b5:
+ case PICT_x1r5g5b5:
+ return COLR_BUF_ARGB1555 | BIAS;
+ case PICT_a8:
+ return COLR_BUF_8BIT | BIAS;
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ return COLR_BUF_ARGB4444 | BIAS;
+ }
+#undef BIAS
+}
+
+static Bool
+gen2_check_dst_format(uint32_t format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ case PICT_r5g6b5:
+ case PICT_a1r5g5b5:
+ case PICT_x1r5g5b5:
+ case PICT_a8:
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static uint32_t
+gen2_get_card_format(struct sna *sna, uint32_t format)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) {
+ if (i8xx_tex_formats[i].fmt == format)
+ return i8xx_tex_formats[i].card_fmt;
+ }
+
+ if (!(IS_I830(sna) || IS_845G(sna))) {
+ for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) {
+ if (i85x_tex_formats[i].fmt == format)
+ return i85x_tex_formats[i].card_fmt;
+ }
+ }
+
+ assert(0);
+ return 0;
+}
+
+static Bool
+gen2_check_card_format(struct sna *sna, uint32_t format)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) {
+ if (i8xx_tex_formats[i].fmt == format)
+ return TRUE;
+ }
+
+ if (!(IS_I830(sna) || IS_845G(sna))) {
+ for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) {
+ if (i85x_tex_formats[i].fmt == format)
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static uint32_t
+gen2_sampler_tiling_bits(uint32_t tiling)
+{
+ uint32_t bits = 0;
+ switch (tiling) {
+ default:
+ assert(0);
+ case I915_TILING_Y:
+ bits |= TM0S1_TILE_WALK;
+ case I915_TILING_X:
+ bits |= TM0S1_TILED_SURFACE;
+ case I915_TILING_NONE:
+ break;
+ }
+ return bits;
+}
+
+static Bool
+gen2_check_filter(PicturePtr picture)
+{
+ switch (picture->filter) {
+ case PictFilterNearest:
+ case PictFilterBilinear:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static Bool
+gen2_check_repeat(PicturePtr picture)
+{
+ if (!picture->repeat)
+ return TRUE;
+
+ switch (picture->repeatType) {
+ case RepeatNone:
+ case RepeatNormal:
+ case RepeatPad:
+ case RepeatReflect:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static void
+gen2_emit_texture(struct sna *sna,
+ const struct sna_composite_channel *channel,
+ int unit)
+{
+ uint32_t filter;
+ uint32_t wrap_mode;
+ uint32_t texcoordtype;
+
+ if (channel->is_affine)
+ texcoordtype = TEXCOORDTYPE_CARTESIAN;
+ else
+ texcoordtype = TEXCOORDTYPE_HOMOGENEOUS;
+
+ switch (channel->repeat) {
+ default:
+ assert(0);
+ case RepeatNone:
+ wrap_mode = TEXCOORDMODE_CLAMP_BORDER;
+ break;
+ case RepeatNormal:
+ wrap_mode = TEXCOORDMODE_WRAP;
+ break;
+ case RepeatPad:
+ wrap_mode = TEXCOORDMODE_CLAMP;
+ break;
+ case RepeatReflect:
+ wrap_mode = TEXCOORDMODE_MIRROR;
+ break;
+ }
+
+ switch (channel->filter) {
+ default:
+ assert(0);
+ case PictFilterNearest:
+ filter = (FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT |
+ FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT);
+ break;
+ case PictFilterBilinear:
+ filter = (FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT |
+ FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT);
+ break;
+ }
+ filter |= MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT;
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
+ LOAD_TEXTURE_MAP(unit) | 4);
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ channel->bo,
+ I915_GEM_DOMAIN_SAMPLER << 16,
+ 0));
+ OUT_BATCH(((channel->height - 1) << TM0S1_HEIGHT_SHIFT) |
+ ((channel->width - 1) << TM0S1_WIDTH_SHIFT) |
+ gen2_get_card_format(sna, channel->pict_format) |
+ gen2_sampler_tiling_bits(channel->bo->tiling));
+ OUT_BATCH((channel->bo->pitch / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D);
+ OUT_BATCH(filter);
+ OUT_BATCH(0); /* default color */
+ OUT_BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(unit) |
+ ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL |
+ texcoordtype |
+ ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(wrap_mode) |
+ ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode));
+ /* map texel stream */
+ OUT_BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD);
+ if (unit == 0)
+ OUT_BATCH(TEXBIND_SET0(TEXCOORDSRC_VTXSET_0) |
+ TEXBIND_SET1(TEXCOORDSRC_KEEP) |
+ TEXBIND_SET2(TEXCOORDSRC_KEEP) |
+ TEXBIND_SET3(TEXCOORDSRC_KEEP));
+ else
+ OUT_BATCH(TEXBIND_SET0(TEXCOORDSRC_VTXSET_0) |
+ TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) |
+ TEXBIND_SET2(TEXCOORDSRC_KEEP) |
+ TEXBIND_SET3(TEXCOORDSRC_KEEP));
+ OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+ (unit << 16) |
+ DISABLE_TEX_STREAM_BUMP |
+ ENABLE_TEX_STREAM_COORD_SET |
+ TEX_STREAM_COORD_SET(unit) |
+ ENABLE_TEX_STREAM_MAP_IDX |
+ TEX_STREAM_MAP_IDX(unit));
+}
+
+static void
+gen2_get_blend_factors(const struct sna_composite_op *op,
+ uint32_t *c_out,
+ uint32_t *a_out)
+{
+ uint32_t cblend, ablend;
+
+ /* If component alpha is active in the mask and the blend operation
+ * uses the source alpha, then we know we don't need the source
+ * value (otherwise we would have hit a fallback earlier), so we
+ * provide the source alpha (src.A * mask.X) as output color.
+ * Conversely, if CA is set and we don't need the source alpha, then
+ * we produce the source value (src.X * mask.X) and the source alpha
+ * is unused.. Otherwise, we provide the non-CA source value
+ * (src.X * mask.A).
+ *
+ * The PICT_FORMAT_RGB(pict) == 0 fixups are not needed on 855+'s a8
+ * pictures, but we need to implement it for 830/845 and there's no
+ * harm done in leaving it in.
+ */
+ cblend =
+ TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_MODULE |
+ TB0C_OUTPUT_WRITE_CURRENT;
+ ablend =
+ TB0A_RESULT_SCALE_1X | TB0A_OP_MODULE |
+ TB0A_OUTPUT_WRITE_CURRENT;
+
+ /* Get the source picture's channels into TBx_ARG1 */
+ if ((op->has_component_alpha && gen2_blend_op[op->op].src_alpha) ||
+ op->dst.format == PICT_a8) {
+ /* Producing source alpha value, so the first set of channels
+ * is src.A instead of src.X. We also do this if the destination
+ * is a8, in which case src.G is what's written, and the other
+ * channels are ignored.
+ */
+ ablend |= TB0A_ARG1_SEL_TEXEL0;
+ cblend |= TB0C_ARG1_SEL_TEXEL0 | TB0C_ARG1_REPLICATE_ALPHA;
+ } else {
+ if (PICT_FORMAT_RGB(op->src.pict_format) != 0)
+ cblend |= TB0C_ARG1_SEL_TEXEL0;
+ else
+ cblend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT; /* 0.0 */
+ ablend |= TB0A_ARG1_SEL_TEXEL0;
+ }
+
+ if (op->mask.bo) {
+ cblend |= TB0C_ARG2_SEL_TEXEL1;
+ if (op->dst.format == PICT_a8 || op->has_component_alpha)
+ cblend |= TB0C_ARG2_REPLICATE_ALPHA;
+ ablend |= TB0A_ARG2_SEL_TEXEL1;
+ } else {
+ cblend |= TB0C_ARG2_SEL_ONE;
+ ablend |= TB0A_ARG2_SEL_ONE;
+ }
+
+ *c_out = cblend;
+ *a_out = ablend;
+}
+
+static uint32_t gen2_get_blend_cntl(int op,
+ Bool has_component_alpha,
+ uint32_t dst_format)
+{
+ uint32_t sblend, dblend;
+
+ sblend = gen2_blend_op[op].src_blend;
+ dblend = gen2_blend_op[op].dst_blend;
+
+ /* If there's no dst alpha channel, adjust the blend op so that
+ * we'll treat it as always 1.
+ */
+ if (PICT_FORMAT_A(dst_format) == 0 && gen2_blend_op[op].dst_alpha) {
+ if (sblend == BLENDFACTOR_DST_ALPHA)
+ sblend = BLENDFACTOR_ONE;
+ else if (sblend == BLENDFACTOR_INV_DST_ALPHA)
+ sblend = BLENDFACTOR_ZERO;
+ }
+
+ /* If the source alpha is being used, then we should only be in a case
+ * where the source blend factor is 0, and the source blend value is
+ * the mask channels multiplied by the source picture's alpha.
+ */
+ if (has_component_alpha && gen2_blend_op[op].src_alpha) {
+ if (dblend == BLENDFACTOR_SRC_ALPHA)
+ dblend = BLENDFACTOR_SRC_COLR;
+ else if (dblend == BLENDFACTOR_INV_SRC_ALPHA)
+ dblend = BLENDFACTOR_INV_SRC_COLR;
+ }
+
+ return (sblend << S8_SRC_BLEND_FACTOR_SHIFT |
+ dblend << S8_DST_BLEND_FACTOR_SHIFT);
+}
+
+static void gen2_emit_invariant(struct sna *sna)
+{
+ OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(0));
+ OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(1));
+ OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(2));
+ OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(3));
+
+ OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
+ OUT_BATCH(0);
+
+ OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
+ OUT_BATCH(0);
+
+ OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
+ OUT_BATCH(0);
+
+ OUT_BATCH(_3DSTATE_FOG_MODE_CMD);
+ OUT_BATCH(FOGFUNC_ENABLE |
+ FOG_LINEAR_CONST | FOGSRC_INDEX_Z | ENABLE_FOG_DENSITY);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+ MAP_UNIT(0) |
+ DISABLE_TEX_STREAM_BUMP |
+ ENABLE_TEX_STREAM_COORD_SET |
+ TEX_STREAM_COORD_SET(0) |
+ ENABLE_TEX_STREAM_MAP_IDX |
+ TEX_STREAM_MAP_IDX(0));
+ OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+ MAP_UNIT(1) |
+ DISABLE_TEX_STREAM_BUMP |
+ ENABLE_TEX_STREAM_COORD_SET |
+ TEX_STREAM_COORD_SET(1) |
+ ENABLE_TEX_STREAM_MAP_IDX |
+ TEX_STREAM_MAP_IDX(1));
+ OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+ MAP_UNIT(2) |
+ DISABLE_TEX_STREAM_BUMP |
+ ENABLE_TEX_STREAM_COORD_SET |
+ TEX_STREAM_COORD_SET(2) |
+ ENABLE_TEX_STREAM_MAP_IDX |
+ TEX_STREAM_MAP_IDX(2));
+ OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+ MAP_UNIT(3) |
+ DISABLE_TEX_STREAM_BUMP |
+ ENABLE_TEX_STREAM_COORD_SET |
+ TEX_STREAM_COORD_SET(3) |
+ ENABLE_TEX_STREAM_MAP_IDX |
+ TEX_STREAM_MAP_IDX(3));
+
+ OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+ OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(0));
+ OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+ OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(1));
+ OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+ OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(2));
+ OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+ OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(3));
+
+ OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
+ ENABLE_POINT_RASTER_RULE |
+ OGL_POINT_RASTER_RULE |
+ ENABLE_LINE_STRIP_PROVOKE_VRTX |
+ ENABLE_TRI_FAN_PROVOKE_VRTX |
+ ENABLE_TRI_STRIP_PROVOKE_VRTX |
+ LINE_STRIP_PROVOKE_VRTX(1) |
+ TRI_FAN_PROVOKE_VRTX(2) | TRI_STRIP_PROVOKE_VRTX(2));
+
+ OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+ OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(_3DSTATE_VERTEX_TRANSFORM);
+ OUT_BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE);
+
+ OUT_BATCH(_3DSTATE_W_STATE_CMD);
+ OUT_BATCH(MAGIC_W_STATE_DWORD1);
+ OUT_BATCH_F(1.0);
+
+ OUT_BATCH(_3DSTATE_COLOR_FACTOR_CMD);
+ OUT_BATCH(0x80808080); /* .5 required in alpha for GL_DOT3_RGBA_EXT */
+
+ OUT_BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD);
+ OUT_BATCH(TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) |
+ TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) |
+ TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) |
+ TEXBIND_SET0(TEXCOORDSRC_VTXSET_0));
+
+ /* copy from mesa */
+ OUT_BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD |
+ DISABLE_INDPT_ALPHA_BLEND |
+ ENABLE_ALPHA_BLENDFUNC | ABLENDFUNC_ADD);
+
+ OUT_BATCH(_3DSTATE_FOG_COLOR_CMD |
+ FOG_COLOR_RED(0) | FOG_COLOR_GREEN(0) | FOG_COLOR_BLUE(0));
+
+ OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD);
+ OUT_BATCH(0);
+
+ OUT_BATCH(_3DSTATE_MODES_1_CMD |
+ ENABLE_COLR_BLND_FUNC |
+ BLENDFUNC_ADD |
+ ENABLE_SRC_BLND_FACTOR |
+ SRC_BLND_FACT(BLENDFACTOR_ONE) |
+ ENABLE_DST_BLND_FACTOR | DST_BLND_FACT(BLENDFACTOR_ZERO));
+ OUT_BATCH(_3DSTATE_MODES_2_CMD |
+ ENABLE_GLOBAL_DEPTH_BIAS |
+ GLOBAL_DEPTH_BIAS(0) |
+ ENABLE_ALPHA_TEST_FUNC |
+ ALPHA_TEST_FUNC(0) | /* always */
+ ALPHA_REF_VALUE(0));
+ OUT_BATCH(_3DSTATE_MODES_3_CMD |
+ ENABLE_DEPTH_TEST_FUNC |
+ DEPTH_TEST_FUNC(0x2) | /* COMPAREFUNC_LESS */
+ ENABLE_ALPHA_SHADE_MODE | ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) |
+ ENABLE_FOG_SHADE_MODE | FOG_SHADE_MODE(SHADE_MODE_LINEAR) |
+ ENABLE_SPEC_SHADE_MODE | SPEC_SHADE_MODE(SHADE_MODE_LINEAR) |
+ ENABLE_COLOR_SHADE_MODE | COLOR_SHADE_MODE(SHADE_MODE_LINEAR) |
+ ENABLE_CULL_MODE | CULLMODE_NONE);
+
+ OUT_BATCH(_3DSTATE_MODES_4_CMD |
+ ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) |
+ ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff) |
+ ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff));
+
+ OUT_BATCH(_3DSTATE_STENCIL_TEST_CMD |
+ ENABLE_STENCIL_PARMS |
+ STENCIL_FAIL_OP(0) | /* STENCILOP_KEEP */
+ STENCIL_PASS_DEPTH_FAIL_OP(0) | /* STENCILOP_KEEP */
+ STENCIL_PASS_DEPTH_PASS_OP(0) | /* STENCILOP_KEEP */
+ ENABLE_STENCIL_TEST_FUNC | STENCIL_TEST_FUNC(0) | /* COMPAREFUNC_ALWAYS */
+ ENABLE_STENCIL_REF_VALUE | STENCIL_REF_VALUE(0));
+
+ OUT_BATCH(_3DSTATE_MODES_5_CMD |
+ FLUSH_TEXTURE_CACHE |
+ ENABLE_SPRITE_POINT_TEX | SPRITE_POINT_TEX_OFF |
+ ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(0x2) | /* 1.0 */
+ ENABLE_FIXED_POINT_WIDTH | FIXED_POINT_WIDTH(1));
+
+ OUT_BATCH(_3DSTATE_ENABLES_1_CMD |
+ DISABLE_LOGIC_OP |
+ DISABLE_STENCIL_TEST |
+ DISABLE_DEPTH_BIAS |
+ DISABLE_SPEC_ADD |
+ DISABLE_FOG |
+ DISABLE_ALPHA_TEST |
+ ENABLE_COLOR_BLEND |
+ DISABLE_DEPTH_TEST);
+ OUT_BATCH(_3DSTATE_ENABLES_2_CMD |
+ DISABLE_STENCIL_WRITE |
+ ENABLE_TEX_CACHE |
+ DISABLE_DITHER |
+ ENABLE_COLOR_MASK |
+ ENABLE_COLOR_WRITE |
+ DISABLE_DEPTH_WRITE);
+
+ OUT_BATCH(_3DSTATE_STIPPLE);
+
+ /* Set default blend state */
+ OUT_BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) |
+ TEXPIPE_COLOR |
+ ENABLE_TEXOUTPUT_WRT_SEL | TEXOP_OUTPUT_CURRENT |
+ DISABLE_TEX_CNTRL_STAGE |
+ TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS |
+ TEXOP_LAST_STAGE | TEXBLENDOP_ARG1);
+ OUT_BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) |
+ TEXPIPE_ALPHA |
+ ENABLE_TEXOUTPUT_WRT_SEL | TEXOP_OUTPUT_CURRENT |
+ TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1);
+ OUT_BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) |
+ TEXPIPE_COLOR |
+ TEXBLEND_ARG1 |
+ TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_DIFFUSE);
+ OUT_BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) |
+ TEXPIPE_ALPHA |
+ TEXBLEND_ARG1 |
+ TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_DIFFUSE);
+
+ OUT_BATCH(_3DSTATE_AA_CMD |
+ AA_LINE_ECAAR_WIDTH_ENABLE |
+ AA_LINE_ECAAR_WIDTH_1_0 |
+ AA_LINE_REGION_WIDTH_ENABLE |
+ AA_LINE_REGION_WIDTH_1_0 | AA_LINE_DISABLE);
+
+ sna->render_state.gen2.need_invariant = FALSE;
+}
+
+static void
+gen2_get_batch(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ if (!kgem_check_batch(&sna->kgem, 50)) {
+ DBG(("%s: flushing batch: size %d > %d\n",
+ __FUNCTION__, 50,
+ sna->kgem.surface-sna->kgem.nbatch));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->kgem.nreloc + 3 > KGEM_RELOC_SIZE(&sna->kgem)) {
+ DBG(("%s: flushing batch: reloc %d >= %d\n",
+ __FUNCTION__,
+ sna->kgem.nreloc + 3,
+ (int)KGEM_RELOC_SIZE(&sna->kgem)));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->kgem.nexec + 3 > KGEM_EXEC_SIZE(&sna->kgem)) {
+ DBG(("%s: flushing batch: exec %d >= %d\n",
+ __FUNCTION__,
+ sna->kgem.nexec + 1,
+ (int)KGEM_EXEC_SIZE(&sna->kgem)));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->render_state.gen2.need_invariant)
+ gen2_emit_invariant(sna);
+}
+
+static void gen2_emit_composite_state(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ uint32_t texcoordfmt;
+ uint32_t cblend, ablend;
+
+ gen2_get_batch(sna, op);
+
+ OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
+ OUT_BATCH(BUF_3D_ID_COLOR_BACK |
+ gen2_buf_tiling(op->dst.bo->tiling) |
+ BUF_3D_PITCH(op->dst.bo->pitch));
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ op->dst.bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER,
+ 0));
+
+ OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
+ OUT_BATCH(gen2_get_dst_format(op->dst.format));
+
+ OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* ymin, xmin */
+ OUT_BATCH(DRAW_YMAX(op->dst.height - 1) |
+ DRAW_XMAX(op->dst.width - 1));
+ OUT_BATCH(0); /* yorig, xorig */
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+ I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
+ OUT_BATCH((1 + (op->mask.bo != NULL)) << 12);
+ OUT_BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
+ OUT_BATCH(S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
+ gen2_get_blend_cntl(op->op,
+ op->has_component_alpha,
+ op->dst.format) |
+ S8_ENABLE_COLOR_BUFFER_WRITE);
+
+ OUT_BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD | DISABLE_INDPT_ALPHA_BLEND);
+
+ gen2_get_blend_factors(op, &cblend, &ablend);
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
+ LOAD_TEXTURE_BLEND_STAGE(0) | 1);
+ OUT_BATCH(cblend);
+ OUT_BATCH(ablend);
+
+ OUT_BATCH(_3DSTATE_ENABLES_1_CMD | DISABLE_LOGIC_OP |
+ DISABLE_STENCIL_TEST | DISABLE_DEPTH_BIAS |
+ DISABLE_SPEC_ADD | DISABLE_FOG | DISABLE_ALPHA_TEST |
+ ENABLE_COLOR_BLEND | DISABLE_DEPTH_TEST);
+ /* We have to explicitly say we don't want write disabled */
+ OUT_BATCH(_3DSTATE_ENABLES_2_CMD | ENABLE_COLOR_MASK |
+ DISABLE_STENCIL_WRITE | ENABLE_TEX_CACHE |
+ DISABLE_DITHER | ENABLE_COLOR_WRITE | DISABLE_DEPTH_WRITE);
+
+ texcoordfmt = 0;
+ if (op->src.is_affine)
+ texcoordfmt |= TEXCOORDFMT_2D << 0;
+ else
+ texcoordfmt |= TEXCOORDFMT_3D << 0;
+ if (op->mask.bo) {
+ if (op->mask.is_affine)
+ texcoordfmt |= TEXCOORDFMT_2D << 2;
+ else
+ texcoordfmt |= TEXCOORDFMT_3D << 2;
+ }
+ OUT_BATCH(_3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt);
+
+ gen2_emit_texture(sna, &op->src, 0);
+ if (op->mask.bo)
+ gen2_emit_texture(sna, &op->mask, 1);
+}
+
+static inline void
+gen2_emit_composite_dstcoord(struct sna *sna, int dstX, int dstY)
+{
+ OUT_VERTEX(dstX);
+ OUT_VERTEX(dstY);
+}
+
+static void
+gen2_emit_composite_texcoord(struct sna *sna,
+ const struct sna_composite_channel *channel,
+ int16_t x, int16_t y)
+{
+ float s = 0, t = 0, w = 1;
+
+ x += channel->offset[0];
+ y += channel->offset[1];
+
+ if (channel->is_affine) {
+ sna_get_transformed_coordinates(x, y,
+ channel->transform,
+ &s, &t);
+ OUT_VERTEX(s * channel->scale[0]);
+ OUT_VERTEX(t * channel->scale[1]);
+ } else {
+ sna_get_transformed_coordinates_3d(x, y,
+ channel->transform,
+ &s, &t, &w);
+ OUT_VERTEX(s * channel->scale[0]);
+ OUT_VERTEX(t * channel->scale[1]);
+ OUT_VERTEX(w);
+ }
+}
+
+static void
+gen2_emit_composite_vertex(struct sna *sna,
+ const struct sna_composite_op *op,
+ int16_t srcX, int16_t srcY,
+ int16_t mskX, int16_t mskY,
+ int16_t dstX, int16_t dstY)
+{
+ gen2_emit_composite_dstcoord(sna, dstX, dstY);
+ gen2_emit_composite_texcoord(sna, &op->src, srcX, srcY);
+ gen2_emit_composite_texcoord(sna, &op->mask, mskX, mskY);
+}
+
+static void
+gen2_emit_composite_primitive(struct sna *sna,
+ const struct sna_composite_op *op,
+ int16_t srcX, int16_t srcY,
+ int16_t mskX, int16_t mskY,
+ int16_t dstX, int16_t dstY,
+ int16_t w, int16_t h)
+{
+ dstX += op->dst.x;
+ dstY += op->dst.y;
+
+ gen2_emit_composite_vertex(sna, op,
+ srcX + w, srcY + h,
+ mskX + w, mskY + h,
+ dstX + w, dstY + h);
+ gen2_emit_composite_vertex(sna, op,
+ srcX, srcY + h,
+ mskX, mskY + h,
+ dstX, dstY + h);
+ gen2_emit_composite_vertex(sna, op,
+ srcX, srcY,
+ mskX, mskY,
+ dstX, dstY);
+}
+
+static void gen2_magic_ca_pass(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ uint32_t ablend, cblend;
+
+ if (!op->need_magic_ca_pass)
+ return;
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(8) | 2);
+ OUT_BATCH(S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD |
+ gen2_get_blend_cntl(PictOpAdd,
+ op->has_component_alpha,
+ op->dst.format) |
+ S8_ENABLE_COLOR_BUFFER_WRITE);
+
+ gen2_get_blend_factors(op, &cblend, &ablend);
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
+ LOAD_TEXTURE_BLEND_STAGE(0) | 1);
+ OUT_BATCH(cblend);
+ OUT_BATCH(ablend);
+
+ memcpy(sna->kgem.batch + sna->kgem.nbatch,
+ sna->kgem.batch + sna->render_state.gen2.vertex_offset,
+ (1 + 3*sna->render.vertex_index)*sizeof(uint32_t));
+ sna->kgem.nbatch += 1 + 3*sna->render.vertex_index;
+}
+
+static void gen2_vertex_flush(struct sna *sna)
+{
+ if (sna->render.vertex_index == 0)
+ return;
+
+ sna->kgem.batch[sna->render_state.gen2.vertex_offset] |=
+ sna->render.vertex_index - 1;
+
+ if (sna->render.op)
+ gen2_magic_ca_pass(sna, sna->render.op);
+
+ sna->render_state.gen2.vertex_offset = 0;
+ sna->render.vertex_index = 0;
+}
+
+inline static int gen2_get_rectangles(struct sna *sna,
+ const const struct sna_composite_op *op,
+ int want)
+{
+ struct gen2_render_state *state = &sna->render_state.gen2;
+ int rem = batch_space(sna), size, need;
+
+ need = 0;
+ size = 3*op->floats_per_vertex;
+ if (op->need_magic_ca_pass)
+ need += 5, size *= 2;
+
+ need += size;
+ if (state->vertex_offset == 0)
+ need += 2;
+
+ if (rem < need)
+ return 0;
+
+ if (state->vertex_offset == 0) {
+ state->vertex_offset = sna->kgem.nbatch;
+ OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST);
+ rem--;
+ }
+
+ if (want * size > rem)
+ want = rem / size;
+
+ sna->render.vertex_index += 3*want;
+ return want;
+}
+
+fastcall static void
+gen2_render_composite_blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ if (!gen2_get_rectangles(sna, op, 1)) {
+ gen2_emit_composite_state(sna, op);
+ gen2_get_rectangles(sna, op, 1);
+ }
+
+ gen2_emit_composite_primitive(sna, op,
+ r->src.x, r->src.y,
+ r->mask.x, r->mask.y,
+ r->dst.x, r->dst.y,
+ r->width, r->height);
+}
+
+static void
+gen2_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = gen2_get_rectangles(sna, op, nbox);
+ if (nbox_this_time == 0) {
+ gen2_emit_composite_state(sna, op);
+ nbox_this_time = gen2_get_rectangles(sna, op, nbox);
+ }
+ nbox -= nbox_this_time;
+
+ do {
+ gen2_emit_composite_primitive(sna, op,
+ box->x1, box->y1,
+ box->x1, box->y1,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1);
+ box++;
+ } while (--nbox_this_time);
+ } while (nbox);
+}
+
+static void gen2_render_composite_done(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ gen2_vertex_flush(sna);
+ sna->render.op = NULL;
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ sna_render_composite_redirect_done(sna, op);
+
+ if (op->src.bo)
+ kgem_bo_destroy(&sna->kgem, op->src.bo);
+ if (op->mask.bo)
+ kgem_bo_destroy(&sna->kgem, op->mask.bo);
+}
+
+static Bool
+gen2_composite_solid_init(struct sna *sna,
+ struct sna_composite_channel *channel,
+ uint32_t color)
+{
+ channel->filter = PictFilterNearest;
+ channel->repeat = RepeatNormal;
+ channel->is_affine = TRUE;
+ channel->is_solid = TRUE;
+ channel->transform = NULL;
+ channel->width = 1;
+ channel->height = 1;
+ channel->pict_format = PICT_a8r8g8b8;
+
+ channel->bo = sna_render_get_solid(sna, color);
+
+ channel->scale[0] = channel->scale[1] = 1;
+ channel->offset[0] = channel->offset[1] = 0;
+ return channel->bo != NULL;
+}
+
+static int
+gen2_composite_picture(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int x, int y,
+ int w, int h,
+ int dst_x, int dst_y)
+{
+ PixmapPtr pixmap;
+ uint32_t color;
+ int16_t dx, dy;
+
+ DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
+ __FUNCTION__, x, y, w, h, dst_x, dst_y));
+
+ channel->is_solid = FALSE;
+ channel->card_format = -1;
+
+ if (sna_picture_is_solid(picture, &color))
+ return gen2_composite_solid_init(sna, channel, color);
+
+ if (picture->pDrawable == NULL)
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ if (!gen2_check_repeat(picture))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ if (!gen2_check_filter(picture))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
+ channel->filter = picture->filter;
+
+ pixmap = get_drawable_pixmap(picture->pDrawable);
+ get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
+
+ x += dx + picture->pDrawable->x;
+ y += dy + picture->pDrawable->y;
+
+ channel->is_affine = sna_transform_is_affine(picture->transform);
+ if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
+ DBG(("%s: integer translation (%d, %d), removing\n",
+ __FUNCTION__, dx, dy));
+ x += dx;
+ y += dy;
+ channel->transform = NULL;
+ channel->filter = PictFilterNearest;
+ } else
+ channel->transform = picture->transform;
+
+ if (!gen2_check_card_format(sna, picture->format))
+ return sna_render_picture_convert(sna, picture, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+
+ channel->pict_format = picture->format;
+ if (pixmap->drawable.width > 8192 || pixmap->drawable.height > 8192)
+ return sna_render_picture_extract(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ return sna_render_pixmap_bo(sna, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+}
+
+static Bool
+gen2_composite_set_target(struct sna *sna,
+ struct sna_composite_op *op,
+ PicturePtr dst)
+{
+ struct sna_pixmap *priv;
+
+ op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
+ op->dst.format = dst->format;
+ op->dst.width = op->dst.pixmap->drawable.width;
+ op->dst.height = op->dst.pixmap->drawable.height;
+
+ priv = sna_pixmap_force_to_gpu(op->dst.pixmap);
+ if (priv == NULL)
+ return FALSE;
+
+ op->dst.bo = priv->gpu_bo;
+ if (!priv->gpu_only)
+ op->damage = &priv->gpu_damage;
+
+ get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
+ &op->dst.x, &op->dst.y);
+ return TRUE;
+}
+
+static Bool
+try_blt(struct sna *sna,
+ PicturePtr dst,
+ PicturePtr source,
+ int width, int height)
+{
+ uint32_t color;
+
+ if (sna->kgem.mode == KGEM_BLT) {
+ DBG(("%s: already performing BLT\n", __FUNCTION__));
+ return TRUE;
+ }
+
+ if (width > 2048 || height > 2048) {
+ DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
+ __FUNCTION__, width, height));
+ return TRUE;
+ }
+
+ /* If it is a solid, try to use the BLT paths */
+ if (sna_picture_is_solid(source, &color))
+ return TRUE;
+
+ if (!source->pDrawable)
+ return FALSE;
+
+ return is_cpu(source->pDrawable);
+}
+
+static Bool
+gen2_render_composite(struct sna *sna,
+ uint8_t op,
+ PicturePtr src,
+ PicturePtr mask,
+ PicturePtr dst,
+ int16_t src_x, int16_t src_y,
+ int16_t mask_x, int16_t mask_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t width, int16_t height,
+ struct sna_composite_op *tmp)
+{
+ DBG(("%s()\n", __FUNCTION__));
+
+ /* Try to use the BLT engine unless it implies a
+ * 3D -> 2D context switch.
+ */
+ if (mask == NULL &&
+ try_blt(sna, dst, src, width, height) &&
+ sna_blt_composite(sna,
+ op, src, dst,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height,
+ tmp))
+ return TRUE;
+
+ if (op >= ARRAY_SIZE(gen2_blend_op)) {
+ DBG(("%s: fallback due to unhandled blend op: %d\n",
+ __FUNCTION__, op));
+ return FALSE;
+ }
+
+ if (!gen2_check_dst_format(dst->format)) {
+ DBG(("%s: fallback due to unhandled dst format: %x\n",
+ __FUNCTION__, dst->format));
+ return FALSE;
+ }
+
+ if (need_tiling(sna, width, height))
+ return sna_tiling_composite(sna,
+ op, src, mask, dst,
+ src_x, src_y,
+ mask_x, mask_y,
+ dst_x, dst_y,
+ width, height,
+ tmp);
+
+ memset(&tmp->u.gen2, 0, sizeof(tmp->u.gen2));
+
+ if (!gen2_composite_set_target(sna, tmp, dst)) {
+ DBG(("%s: unable to set render target\n",
+ __FUNCTION__));
+ return FALSE;
+ }
+
+ tmp->op = op;
+ if (tmp->dst.width > 2048 ||
+ tmp->dst.height > 2048 ||
+ tmp->dst.bo->pitch > 8192) {
+ if (!sna_render_composite_redirect(sna, tmp,
+ dst_x, dst_y, width, height))
+ return FALSE;
+ }
+
+ switch (gen2_composite_picture(sna, src, &tmp->src,
+ src_x, src_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ goto cleanup_dst;
+ case 0:
+ gen2_composite_solid_init(sna, &tmp->src, 0);
+ case 1:
+ break;
+ }
+
+ if (mask) {
+ switch (gen2_composite_picture(sna, mask, &tmp->mask,
+ mask_x, mask_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ goto cleanup_src;
+ case 0:
+ gen2_composite_solid_init(sna, &tmp->mask, 0);
+ case 1:
+ break;
+ }
+
+ if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
+ /* Check if it's component alpha that relies on a source alpha
+ * and on the source value. We can only get one of those
+ * into the single source value that we get to blend with.
+ */
+ tmp->has_component_alpha = TRUE;
+ if (gen2_blend_op[op].src_alpha &&
+ (gen2_blend_op[op].src_blend != BLENDFACTOR_ZERO)) {
+ if (op != PictOpOver)
+ return FALSE;
+
+ tmp->need_magic_ca_pass = TRUE;
+ tmp->op = PictOpOutReverse;
+ }
+ }
+ }
+
+ tmp->blt = gen2_render_composite_blt;
+ tmp->boxes = gen2_render_composite_boxes;
+ tmp->done = gen2_render_composite_done;
+
+ if (!kgem_check_bo(&sna->kgem, tmp->dst.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->src.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->mask.bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen2_emit_composite_state(sna, tmp);
+
+ sna->render.op = tmp;
+ return TRUE;
+
+cleanup_src:
+ if (tmp->src.bo)
+ kgem_bo_destroy(&sna->kgem, tmp->src.bo);
+cleanup_dst:
+ if (tmp->redirect.real_bo)
+ kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
+ return FALSE;
+}
+
+static void
+gen2_render_reset(struct sna *sna)
+{
+ sna->render_state.gen2.need_invariant = TRUE;
+ sna->render_state.gen2.vertex_offset = 0;
+}
+
+static void
+gen2_render_flush(struct sna *sna)
+{
+ gen2_vertex_flush(sna);
+}
+
+static void
+gen2_render_context_switch(struct sna *sna,
+ int new_mode)
+{
+}
+
+static void
+gen2_render_fini(struct sna *sna)
+{
+}
+
+Bool gen2_render_init(struct sna *sna)
+{
+ struct sna_render *render = &sna->render;
+
+ gen2_render_reset(sna);
+
+ /* Use the BLT (and overlay) for everything except when forced to
+ * use the texture combiners.
+ */
+ render->composite = gen2_render_composite;
+
+ /* XXX Y-tiling copies */
+
+ render->reset = gen2_render_reset;
+ render->flush = gen2_render_flush;
+ render->context_switch = gen2_render_context_switch;
+ render->fini = gen2_render_fini;
+
+ render->max_3d_size = 2048;
+ return TRUE;
+}
diff --git a/src/sna/gen2_render.h b/src/sna/gen2_render.h
new file mode 100644
index 00000000..945cd846
--- /dev/null
+++ b/src/sna/gen2_render.h
@@ -0,0 +1,785 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef GEN2_RENDER_H
+#define GEN2_RENDER_H
+
+#define CMD_3D (0x3<<29)
+
+#define PRIM3D_INLINE (CMD_3D | (0x1f<<24))
+#define PRIM3D_TRILIST (0x0<<18)
+#define PRIM3D_TRISTRIP (0x1<<18)
+#define PRIM3D_TRISTRIP_RVRSE (0x2<<18)
+#define PRIM3D_TRIFAN (0x3<<18)
+#define PRIM3D_POLY (0x4<<18)
+#define PRIM3D_LINELIST (0x5<<18)
+#define PRIM3D_LINESTRIP (0x6<<18)
+#define PRIM3D_RECTLIST (0x7<<18)
+#define PRIM3D_POINTLIST (0x8<<18)
+#define PRIM3D_DIB (0x9<<18)
+#define PRIM3D_CLEAR_RECT (0xa<<18)
+#define PRIM3D_ZONE_INIT (0xd<<18)
+#define PRIM3D_MASK (0x1f<<18)
+
+#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24))
+#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16)
+#define AA_LINE_ECAAR_WIDTH_0_5 0
+#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14)
+#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14)
+#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14)
+#define AA_LINE_REGION_WIDTH_ENABLE (1<<8)
+#define AA_LINE_REGION_WIDTH_0_5 0
+#define AA_LINE_REGION_WIDTH_1_0 (1<<6)
+#define AA_LINE_REGION_WIDTH_2_0 (2<<6)
+#define AA_LINE_REGION_WIDTH_4_0 (3<<6)
+#define AA_LINE_ENABLE ((1<<1) | 1)
+#define AA_LINE_DISABLE (1<<1)
+
+#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
+/* Dword 1 */
+#define BUF_3D_ID_COLOR_BACK (0x3<<24)
+#define BUF_3D_ID_DEPTH (0x7<<24)
+#define BUF_3D_USE_FENCE (1<<23)
+#define BUF_3D_TILED_SURFACE (1<<22)
+#define BUF_3D_TILE_WALK_X 0
+#define BUF_3D_TILE_WALK_Y (1<<21)
+#define BUF_3D_PITCH(x) (((x)/4)<<2)
+/* Dword 2 */
+#define BUF_3D_ADDR(x) ((x) & ~0x3)
+
+#define _3DSTATE_COLOR_FACTOR_CMD (CMD_3D | (0x1d<<24) | (0x1<<16))
+
+#define _3DSTATE_COLOR_FACTOR_N_CMD(stage) (CMD_3D | (0x1d<<24) | \
+ ((0x90+(stage))<<16))
+
+#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16))
+
+#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16))
+
+#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16))
+
+#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16))
+
+#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16))
+/* Dword 1 */
+#define DSTORG_HORT_BIAS(x) ((x)<<20)
+#define DSTORG_VERT_BIAS(x) ((x)<<16)
+#define COLOR_4_2_2_CHNL_WRT_ALL 0
+#define COLOR_4_2_2_CHNL_WRT_Y (1<<12)
+#define COLOR_4_2_2_CHNL_WRT_CR (2<<12)
+#define COLOR_4_2_2_CHNL_WRT_CB (3<<12)
+#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12)
+#define COLR_BUF_8BIT 0
+#define COLR_BUF_RGB555 (1<<8)
+#define COLR_BUF_RGB565 (2<<8)
+#define COLR_BUF_ARGB8888 (3<<8)
+#define COLR_BUF_ARGB4444 (8<<8)
+#define COLR_BUF_ARGB1555 (9<<8)
+#define DEPTH_IS_Z 0
+#define DEPTH_IS_W (1<<6)
+#define DEPTH_FRMT_16_FIXED 0
+#define DEPTH_FRMT_16_FLOAT (1<<2)
+#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2)
+#define DEPTH_FRMT_24_FLOAT_8_OTHER (3<<2)
+#define VERT_LINE_STRIDE_1 (1<<1)
+#define VERT_LINE_STRIDE_0 0
+#define VERT_LINE_STRIDE_OFS_1 1
+#define VERT_LINE_STRIDE_OFS_0 0
+
+#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3)
+/* Dword 1 */
+#define DRAW_RECT_DIS_DEPTH_OFS (1<<30)
+#define DRAW_DITHER_OFS_X(x) ((x)<<26)
+#define DRAW_DITHER_OFS_Y(x) ((x)<<24)
+/* Dword 2 */
+#define DRAW_YMIN(x) ((x)<<16)
+#define DRAW_XMIN(x) (x)
+/* Dword 3 */
+#define DRAW_YMAX(x) ((x)<<16)
+#define DRAW_XMAX(x) (x)
+/* Dword 4 */
+#define DRAW_YORG(x) ((x)<<16)
+#define DRAW_XORG(x) (x)
+
+#define _3DSTATE_ENABLES_1_CMD (CMD_3D|(0x3<<24))
+#define ENABLE_LOGIC_OP_MASK ((1<<23)|(1<<22))
+#define ENABLE_LOGIC_OP ((1<<23)|(1<<22))
+#define DISABLE_LOGIC_OP (1<<23)
+#define ENABLE_STENCIL_TEST ((1<<21)|(1<<20))
+#define DISABLE_STENCIL_TEST (1<<21)
+#define ENABLE_DEPTH_BIAS ((1<<11)|(1<<10))
+#define DISABLE_DEPTH_BIAS (1<<11)
+#define ENABLE_SPEC_ADD_MASK ((1<<9)|(1<<8))
+#define ENABLE_SPEC_ADD ((1<<9)|(1<<8))
+#define DISABLE_SPEC_ADD (1<<9)
+#define ENABLE_DIS_FOG_MASK ((1<<7)|(1<<6))
+#define ENABLE_FOG ((1<<7)|(1<<6))
+#define DISABLE_FOG (1<<7)
+#define ENABLE_DIS_ALPHA_TEST_MASK ((1<<5)|(1<<4))
+#define ENABLE_ALPHA_TEST ((1<<5)|(1<<4))
+#define DISABLE_ALPHA_TEST (1<<5)
+#define ENABLE_DIS_CBLEND_MASK ((1<<3)|(1<<2))
+#define ENABLE_COLOR_BLEND ((1<<3)|(1<<2))
+#define DISABLE_COLOR_BLEND (1<<3)
+#define ENABLE_DIS_DEPTH_TEST_MASK ((1<<1)|1)
+#define ENABLE_DEPTH_TEST ((1<<1)|1)
+#define DISABLE_DEPTH_TEST (1<<1)
+
+/* _3DSTATE_ENABLES_2, p138 */
+#define _3DSTATE_ENABLES_2_CMD (CMD_3D|(0x4<<24))
+#define ENABLE_STENCIL_WRITE ((1<<21)|(1<<20))
+#define DISABLE_STENCIL_WRITE (1<<21)
+#define ENABLE_TEX_CACHE ((1<<17)|(1<<16))
+#define DISABLE_TEX_CACHE (1<<17)
+#define ENABLE_DITHER ((1<<9)|(1<<8))
+#define DISABLE_DITHER (1<<9)
+#define ENABLE_COLOR_MASK (1<<10)
+#define WRITEMASK_ALPHA (1<<7)
+#define WRITEMASK_ALPHA_SHIFT 7
+#define WRITEMASK_RED (1<<6)
+#define WRITEMASK_RED_SHIFT 6
+#define WRITEMASK_GREEN (1<<5)
+#define WRITEMASK_GREEN_SHIFT 5
+#define WRITEMASK_BLUE (1<<4)
+#define WRITEMASK_BLUE_SHIFT 4
+#define WRITEMASK_MASK ((1<<4)|(1<<5)|(1<<6)|(1<<7))
+#define ENABLE_COLOR_WRITE ((1<<3)|(1<<2))
+#define DISABLE_COLOR_WRITE (1<<3)
+#define ENABLE_DIS_DEPTH_WRITE_MASK 0x3
+#define ENABLE_DEPTH_WRITE ((1<<1)|1)
+#define DISABLE_DEPTH_WRITE (1<<1)
+
+/* _3DSTATE_FOG_COLOR, p139 */
+#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24))
+#define FOG_COLOR_RED(x) ((x)<<16)
+#define FOG_COLOR_GREEN(x) ((x)<<8)
+#define FOG_COLOR_BLUE(x) (x)
+
+/* _3DSTATE_FOG_MODE, p140 */
+#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2)
+/* Dword 1 */
+#define FOGFUNC_ENABLE (1<<31)
+#define FOGFUNC_VERTEX 0
+#define FOGFUNC_PIXEL_EXP (1<<28)
+#define FOGFUNC_PIXEL_EXP2 (2<<28)
+#define FOGFUNC_PIXEL_LINEAR (3<<28)
+#define FOGSRC_INDEX_Z (1<<27)
+#define FOGSRC_INDEX_W ((1<<27)|(1<<25))
+#define FOG_LINEAR_CONST (1<<24)
+#define FOG_CONST_1(x) ((x)<<4)
+#define ENABLE_FOG_DENSITY (1<<23)
+/* Dword 2 */
+#define FOG_CONST_2(x) (x)
+/* Dword 3 */
+#define FOG_DENSITY(x) (x)
+
+/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p142 */
+#define _3DSTATE_INDPT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24))
+#define ENABLE_INDPT_ALPHA_BLEND ((1<<23)|(1<<22))
+#define DISABLE_INDPT_ALPHA_BLEND (1<<23)
+#define ALPHA_BLENDFUNC_MASK 0x3f0000
+#define ENABLE_ALPHA_BLENDFUNC (1<<21)
+#define ABLENDFUNC_ADD 0
+#define ABLENDFUNC_SUB (1<<16)
+#define ABLENDFUNC_RVSE_SUB (2<<16)
+#define ABLENDFUNC_MIN (3<<16)
+#define ABLENDFUNC_MAX (4<<16)
+#define SRC_DST_ABLEND_MASK 0xfff
+#define ENABLE_SRC_ABLEND_FACTOR (1<<11)
+#define SRC_ABLEND_FACT(x) ((x)<<6)
+#define ENABLE_DST_ABLEND_FACTOR (1<<5)
+#define DST_ABLEND_FACT(x) (x)
+
+#define BLENDFACTOR_ZERO 0x01
+#define BLENDFACTOR_ONE 0x02
+#define BLENDFACTOR_SRC_COLR 0x03
+#define BLENDFACTOR_INV_SRC_COLR 0x04
+#define BLENDFACTOR_SRC_ALPHA 0x05
+#define BLENDFACTOR_INV_SRC_ALPHA 0x06
+#define BLENDFACTOR_DST_ALPHA 0x07
+#define BLENDFACTOR_INV_DST_ALPHA 0x08
+#define BLENDFACTOR_DST_COLR 0x09
+#define BLENDFACTOR_INV_DST_COLR 0x0a
+#define BLENDFACTOR_SRC_ALPHA_SATURATE 0x0b
+#define BLENDFACTOR_CONST_COLOR 0x0c
+#define BLENDFACTOR_INV_CONST_COLOR 0x0d
+#define BLENDFACTOR_CONST_ALPHA 0x0e
+#define BLENDFACTOR_INV_CONST_ALPHA 0x0f
+#define BLENDFACTOR_MASK 0x0f
+
+/* _3DSTATE_MAP_BLEND_ARG, p152 */
+#define _3DSTATE_MAP_BLEND_ARG_CMD(stage) (CMD_3D|(0x0e<<24)|((stage)<<20))
+
+#define TEXPIPE_COLOR 0
+#define TEXPIPE_ALPHA (1<<18)
+#define TEXPIPE_KILL (2<<18)
+#define TEXBLEND_ARG0 0
+#define TEXBLEND_ARG1 (1<<15)
+#define TEXBLEND_ARG2 (2<<15)
+#define TEXBLEND_ARG3 (3<<15)
+#define TEXBLENDARG_MODIFY_PARMS (1<<6)
+#define TEXBLENDARG_REPLICATE_ALPHA (1<<5)
+#define TEXBLENDARG_INV_ARG (1<<4)
+#define TEXBLENDARG_ONE 0
+#define TEXBLENDARG_FACTOR 0x01
+#define TEXBLENDARG_ACCUM 0x02
+#define TEXBLENDARG_DIFFUSE 0x03
+#define TEXBLENDARG_SPEC 0x04
+#define TEXBLENDARG_CURRENT 0x05
+#define TEXBLENDARG_TEXEL0 0x06
+#define TEXBLENDARG_TEXEL1 0x07
+#define TEXBLENDARG_TEXEL2 0x08
+#define TEXBLENDARG_TEXEL3 0x09
+#define TEXBLENDARG_FACTOR_N 0x0e
+
+/* _3DSTATE_MAP_BLEND_OP, p155 */
+#define _3DSTATE_MAP_BLEND_OP_CMD(stage) (CMD_3D|(0x0d<<24)|((stage)<<20))
+#if 0
+# define TEXPIPE_COLOR 0
+# define TEXPIPE_ALPHA (1<<18)
+# define TEXPIPE_KILL (2<<18)
+#endif
+#define ENABLE_TEXOUTPUT_WRT_SEL (1<<17)
+#define TEXOP_OUTPUT_CURRENT 0
+#define TEXOP_OUTPUT_ACCUM (1<<15)
+#define ENABLE_TEX_CNTRL_STAGE ((1<<12)|(1<<11))
+#define DISABLE_TEX_CNTRL_STAGE (1<<12)
+#define TEXOP_SCALE_SHIFT 9
+#define TEXOP_SCALE_1X (0 << TEXOP_SCALE_SHIFT)
+#define TEXOP_SCALE_2X (1 << TEXOP_SCALE_SHIFT)
+#define TEXOP_SCALE_4X (2 << TEXOP_SCALE_SHIFT)
+#define TEXOP_MODIFY_PARMS (1<<8)
+#define TEXOP_LAST_STAGE (1<<7)
+#define TEXBLENDOP_KILLPIXEL 0x02
+#define TEXBLENDOP_ARG1 0x01
+#define TEXBLENDOP_ARG2 0x02
+#define TEXBLENDOP_MODULATE 0x03
+#define TEXBLENDOP_ADD 0x06
+#define TEXBLENDOP_ADDSIGNED 0x07
+#define TEXBLENDOP_BLEND 0x08
+#define TEXBLENDOP_BLEND_AND_ADD 0x09
+#define TEXBLENDOP_SUBTRACT 0x0a
+#define TEXBLENDOP_DOT3 0x0b
+#define TEXBLENDOP_DOT4 0x0c
+#define TEXBLENDOP_MODULATE_AND_ADD 0x0d
+#define TEXBLENDOP_MODULATE_2X_AND_ADD 0x0e
+#define TEXBLENDOP_MODULATE_4X_AND_ADD 0x0f
+
+/* _3DSTATE_MAP_BUMP_TABLE, p160 TODO */
+/* _3DSTATE_MAP_COLOR_CHROMA_KEY, p161 TODO */
+
+#define _3DSTATE_MAP_COORD_TRANSFORM ((3<<29)|(0x1d<<24)|(0x8c<<16))
+#define DISABLE_TEX_TRANSFORM (1<<28)
+#define TEXTURE_SET(x) (x<<29)
+
+#define _3DSTATE_VERTEX_TRANSFORM ((3<<29)|(0x1d<<24)|(0x8b<<16))
+#define DISABLE_VIEWPORT_TRANSFORM (1<<31)
+#define DISABLE_PERSPECTIVE_DIVIDE (1<<29)
+
+/* _3DSTATE_MAP_COORD_SET_BINDINGS, p162 */
+#define _3DSTATE_MAP_COORD_SETBIND_CMD (CMD_3D|(0x1d<<24)|(0x02<<16))
+#define TEXBIND_MASK3 ((1<<15)|(1<<14)|(1<<13)|(1<<12))
+#define TEXBIND_MASK2 ((1<<11)|(1<<10)|(1<<9)|(1<<8))
+#define TEXBIND_MASK1 ((1<<7)|(1<<6)|(1<<5)|(1<<4))
+#define TEXBIND_MASK0 ((1<<3)|(1<<2)|(1<<1)|1)
+
+#define TEXBIND_SET3(x) ((x)<<12)
+#define TEXBIND_SET2(x) ((x)<<8)
+#define TEXBIND_SET1(x) ((x)<<4)
+#define TEXBIND_SET0(x) (x)
+
+#define TEXCOORDSRC_KEEP 0
+#define TEXCOORDSRC_DEFAULT 0x01
+#define TEXCOORDSRC_VTXSET_0 0x08
+#define TEXCOORDSRC_VTXSET_1 0x09
+#define TEXCOORDSRC_VTXSET_2 0x0a
+#define TEXCOORDSRC_VTXSET_3 0x0b
+#define TEXCOORDSRC_VTXSET_4 0x0c
+#define TEXCOORDSRC_VTXSET_5 0x0d
+#define TEXCOORDSRC_VTXSET_6 0x0e
+#define TEXCOORDSRC_VTXSET_7 0x0f
+
+#define MAP_UNIT(unit) ((unit)<<16)
+#define MAP_UNIT_MASK (0x7<<16)
+
+/* _3DSTATE_MAP_COORD_SETS, p164 */
+#define _3DSTATE_MAP_COORD_SET_CMD (CMD_3D|(0x1c<<24)|(0x01<<19))
+#define TEXCOORD_SET(n) ((n)<<16)
+#define ENABLE_TEXCOORD_PARAMS (1<<15)
+#define TEXCOORDS_ARE_NORMAL (1<<14)
+#define TEXCOORDS_ARE_IN_TEXELUNITS 0
+#define TEXCOORDTYPE_CARTESIAN 0
+#define TEXCOORDTYPE_HOMOGENEOUS (1<<11)
+#define TEXCOORDTYPE_VECTOR (2<<11)
+#define TEXCOORDTYPE_MASK (0x7<<11)
+#define ENABLE_ADDR_V_CNTL (1<<7)
+#define ENABLE_ADDR_U_CNTL (1<<3)
+#define TEXCOORD_ADDR_V_MODE(x) ((x)<<4)
+#define TEXCOORD_ADDR_U_MODE(x) (x)
+#define TEXCOORDMODE_WRAP 0
+#define TEXCOORDMODE_MIRROR 1
+#define TEXCOORDMODE_CLAMP 2
+#define TEXCOORDMODE_WRAP_SHORTEST 3
+#define TEXCOORDMODE_CLAMP_BORDER 4
+#define TEXCOORD_ADDR_V_MASK 0x70
+#define TEXCOORD_ADDR_U_MASK 0x7
+
+/* _3DSTATE_MAP_CUBE, p168 TODO */
+#define _3DSTATE_MAP_CUBE (CMD_3D|(0x1c<<24)|(0x0a<<19))
+#define CUBE_NEGX_ENABLE (1<<5)
+#define CUBE_POSX_ENABLE (1<<4)
+#define CUBE_NEGY_ENABLE (1<<3)
+#define CUBE_POSY_ENABLE (1<<2)
+#define CUBE_NEGZ_ENABLE (1<<1)
+#define CUBE_POSZ_ENABLE (1<<0)
+
+#define _3DSTATE_MAP_INFO_CMD (CMD_3D|(0x1d<<24)|(0x0<<16)|3)
+#define TEXMAP_INDEX(x) ((x)<<28)
+#define MAP_SURFACE_8BIT (1<<24)
+#define MAP_SURFACE_16BIT (2<<24)
+#define MAP_SURFACE_32BIT (3<<24)
+#define MAP_FORMAT_2D (0)
+#define MAP_FORMAT_3D_CUBE (1<<11)
+
+/* _3DSTATE_MODES_1, p190 */
+#define _3DSTATE_MODES_1_CMD (CMD_3D|(0x08<<24))
+#define BLENDFUNC_MASK 0x3f0000
+#define ENABLE_COLR_BLND_FUNC (1<<21)
+#define BLENDFUNC_ADD 0
+#define BLENDFUNC_SUB (1<<16)
+#define BLENDFUNC_RVRSE_SUB (2<<16)
+#define BLENDFUNC_MIN (3<<16)
+#define BLENDFUNC_MAX (4<<16)
+#define SRC_DST_BLND_MASK 0xfff
+#define ENABLE_SRC_BLND_FACTOR (1<<11)
+#define ENABLE_DST_BLND_FACTOR (1<<5)
+#define SRC_BLND_FACT(x) ((x)<<6)
+#define DST_BLND_FACT(x) (x)
+
+/* _3DSTATE_MODES_2, p192 */
+#define _3DSTATE_MODES_2_CMD (CMD_3D|(0x0f<<24))
+#define ENABLE_GLOBAL_DEPTH_BIAS (1<<22)
+#define GLOBAL_DEPTH_BIAS(x) ((x)<<14)
+#define ENABLE_ALPHA_TEST_FUNC (1<<13)
+#define ENABLE_ALPHA_REF_VALUE (1<<8)
+#define ALPHA_TEST_FUNC(x) ((x)<<9)
+#define ALPHA_REF_VALUE(x) (x)
+
+#define ALPHA_TEST_REF_MASK 0x3fff
+
+/* _3DSTATE_MODES_3, p193 */
+#define _3DSTATE_MODES_3_CMD (CMD_3D|(0x02<<24))
+#define DEPTH_TEST_FUNC_MASK 0x1f0000
+#define ENABLE_DEPTH_TEST_FUNC (1<<20)
+/* Uses COMPAREFUNC */
+#define DEPTH_TEST_FUNC(x) ((x)<<16)
+#define ENABLE_ALPHA_SHADE_MODE (1<<11)
+#define ENABLE_FOG_SHADE_MODE (1<<9)
+#define ENABLE_SPEC_SHADE_MODE (1<<7)
+#define ENABLE_COLOR_SHADE_MODE (1<<5)
+#define ALPHA_SHADE_MODE(x) ((x)<<10)
+#define FOG_SHADE_MODE(x) ((x)<<8)
+#define SPEC_SHADE_MODE(x) ((x)<<6)
+#define COLOR_SHADE_MODE(x) ((x)<<4)
+#define CULLMODE_MASK 0xf
+#define ENABLE_CULL_MODE (1<<3)
+#define CULLMODE_BOTH 0
+#define CULLMODE_NONE 1
+#define CULLMODE_CW 2
+#define CULLMODE_CCW 3
+
+#define SHADE_MODE_LINEAR 0
+#define SHADE_MODE_FLAT 0x1
+
+/* _3DSTATE_MODES_4, p195 */
+#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x16<<24))
+#define ENABLE_LOGIC_OP_FUNC (1<<23)
+#define LOGIC_OP_FUNC(x) ((x)<<18)
+#define LOGICOP_MASK ((1<<18)|(1<<19)|(1<<20)|(1<<21))
+#define LOGICOP_CLEAR 0
+#define LOGICOP_NOR 0x1
+#define LOGICOP_AND_INV 0x2
+#define LOGICOP_COPY_INV 0x3
+#define LOGICOP_AND_RVRSE 0x4
+#define LOGICOP_INV 0x5
+#define LOGICOP_XOR 0x6
+#define LOGICOP_NAND 0x7
+#define LOGICOP_AND 0x8
+#define LOGICOP_EQUIV 0x9
+#define LOGICOP_NOOP 0xa
+#define LOGICOP_OR_INV 0xb
+#define LOGICOP_COPY 0xc
+#define LOGICOP_OR_RVRSE 0xd
+#define LOGICOP_OR 0xe
+#define LOGICOP_SET 0xf
+#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00))
+#define ENABLE_STENCIL_TEST_MASK (1<<17)
+#define STENCIL_TEST_MASK(x) ((x)<<8)
+#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff))
+#define ENABLE_STENCIL_WRITE_MASK (1<<16)
+#define STENCIL_WRITE_MASK(x) ((x)&0xff)
+
+/* _3DSTATE_MODES_5, p196 */
+#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24))
+#define ENABLE_SPRITE_POINT_TEX (1<<23)
+#define SPRITE_POINT_TEX_ON (1<<22)
+#define SPRITE_POINT_TEX_OFF 0
+#define FLUSH_RENDER_CACHE (1<<18)
+#define FLUSH_TEXTURE_CACHE (1<<16)
+#define FIXED_LINE_WIDTH_MASK 0xfc00
+#define ENABLE_FIXED_LINE_WIDTH (1<<15)
+#define FIXED_LINE_WIDTH(x) ((x)<<10)
+#define FIXED_POINT_WIDTH_MASK 0x3ff
+#define ENABLE_FIXED_POINT_WIDTH (1<<9)
+#define FIXED_POINT_WIDTH(x) (x)
+
+/* _3DSTATE_RASTERIZATION_RULES, p198 */
+#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24))
+#define ENABLE_POINT_RASTER_RULE (1<<15)
+#define OGL_POINT_RASTER_RULE (1<<13)
+#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8)
+#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5)
+#define ENABLE_TRI_STRIP_PROVOKE_VRTX (1<<2)
+#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6)
+#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3)
+#define TRI_STRIP_PROVOKE_VRTX(x) (x)
+
+/* _3DSTATE_SCISSOR_ENABLE, p200 */
+#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19))
+#define ENABLE_SCISSOR_RECT ((1<<1) | 1)
+#define DISABLE_SCISSOR_RECT (1<<1)
+
+/* _3DSTATE_SCISSOR_RECTANGLE_0, p201 */
+#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1)
+/* Dword 1 */
+#define SCISSOR_RECT_0_YMIN(x) ((x)<<16)
+#define SCISSOR_RECT_0_XMIN(x) (x)
+/* Dword 2 */
+#define SCISSOR_RECT_0_YMAX(x) ((x)<<16)
+#define SCISSOR_RECT_0_XMAX(x) (x)
+
+/* _3DSTATE_STENCIL_TEST, p202 */
+#define _3DSTATE_STENCIL_TEST_CMD (CMD_3D|(0x09<<24))
+#define ENABLE_STENCIL_PARMS (1<<23)
+#define STENCIL_OPS_MASK (0xffc000)
+#define STENCIL_FAIL_OP(x) ((x)<<20)
+#define STENCIL_PASS_DEPTH_FAIL_OP(x) ((x)<<17)
+#define STENCIL_PASS_DEPTH_PASS_OP(x) ((x)<<14)
+
+#define ENABLE_STENCIL_TEST_FUNC_MASK ((1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9))
+#define ENABLE_STENCIL_TEST_FUNC (1<<13)
+/* Uses COMPAREFUNC */
+#define STENCIL_TEST_FUNC(x) ((x)<<9)
+#define STENCIL_REF_VALUE_MASK ((1<<8)|0xff)
+#define ENABLE_STENCIL_REF_VALUE (1<<8)
+#define STENCIL_REF_VALUE(x) (x)
+
+/* _3DSTATE_VERTEX_FORMAT, p204 */
+#define _3DSTATE_VFT0_CMD (CMD_3D|(0x05<<24))
+#define VFT0_POINT_WIDTH (1<<12)
+#define VFT0_TEX_COUNT_MASK (7<<8)
+#define VFT0_TEX_COUNT_SHIFT 8
+#define VFT0_TEX_COUNT(x) ((x)<<8)
+#define VFT0_SPEC (1<<7)
+#define VFT0_DIFFUSE (1<<6)
+#define VFT0_DEPTH_OFFSET (1<<5)
+#define VFT0_XYZ (1<<1)
+#define VFT0_XYZW (2<<1)
+#define VFT0_XY (3<<1)
+#define VFT0_XYW (4<<1)
+#define VFT0_XYZW_MASK (7<<1)
+
+/* _3DSTATE_VERTEX_FORMAT_2, p206 */
+#define _3DSTATE_VERTEX_FORMAT_2_CMD (CMD_3D|(0x0a<<24))
+#define VFT1_TEX7_FMT(x) ((x)<<14)
+#define VFT1_TEX6_FMT(x) ((x)<<12)
+#define VFT1_TEX5_FMT(x) ((x)<<10)
+#define VFT1_TEX4_FMT(x) ((x)<<8)
+#define VFT1_TEX3_FMT(x) ((x)<<6)
+#define VFT1_TEX2_FMT(x) ((x)<<4)
+#define VFT1_TEX1_FMT(x) ((x)<<2)
+#define VFT1_TEX0_FMT(x) (x)
+#define VFT1_TEX0_MASK 3
+#define VFT1_TEX1_SHIFT 2
+#define TEXCOORDFMT_2D 0
+#define TEXCOORDFMT_3D 1
+#define TEXCOORDFMT_4D 2
+#define TEXCOORDFMT_1D 3
+
+/*New stuff picked up along the way */
+
+#define MLC_LOD_BIAS_MASK ((1<<7)-1)
+
+/* _3DSTATE_VERTEX_TRANSFORM, p207 */
+#define _3DSTATE_VERTEX_TRANS_CMD (CMD_3D|(0x1d<<24)|(0x8b<<16)|0)
+#define _3DSTATE_VERTEX_TRANS_MTX_CMD (CMD_3D|(0x1d<<24)|(0x8b<<16)|6)
+/* Dword 1 */
+#define ENABLE_VIEWPORT_TRANSFORM ((1<<31)|(1<<30))
+#define DISABLE_VIEWPORT_TRANSFORM (1<<31)
+#define ENABLE_PERSP_DIVIDE ((1<<29)|(1<<28))
+#define DISABLE_PERSP_DIVIDE (1<<29)
+#define VRTX_TRANS_LOAD_MATRICES 0x7421
+#define VRTX_TRANS_NO_LOAD_MATRICES 0x0000
+/* Dword 2 -> 7 are matrix elements */
+
+/* _3DSTATE_W_STATE, p209 */
+#define _3DSTATE_W_STATE_CMD (CMD_3D|(0x1d<<24)|(0x8d<<16)|1)
+/* Dword 1 */
+#define MAGIC_W_STATE_DWORD1 0x00000008
+/* Dword 2 */
+#define WFAR_VALUE(x) (x)
+
+/* Stipple command, carried over from the i810, apparently:
+ */
+#define _3DSTATE_STIPPLE (CMD_3D|(0x1d<<24)|(0x83<<16))
+#define ST1_ENABLE (1<<16)
+#define ST1_MASK (0xffff)
+
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D|(0x1d<<24)|(0x04<<16))
+#define I1_LOAD_S(n) (1<<((n)+4))
+#define S3_POINT_WIDTH_SHIFT 23
+#define S3_LINE_WIDTH_SHIFT 19
+#define S3_ALPHA_SHADE_MODE_SHIFT 18
+#define S3_FOG_SHADE_MODE_SHIFT 17
+#define S3_SPEC_SHADE_MODE_SHIFT 16
+#define S3_COLOR_SHADE_MODE_SHIFT 15
+#define S3_CULL_MODE_SHIFT 13
+#define S3_CULLMODE_BOTH (0)
+#define S3_CULLMODE_NONE (1<<13)
+#define S3_CULLMODE_CW (2<<13)
+#define S3_CULLMODE_CCW (3<<13)
+#define S3_POINT_WIDTH_PRESENT (1<<12)
+#define S3_SPEC_FOG_PRESENT (1<<11)
+#define S3_DIFFUSE_PRESENT (1<<10)
+#define S3_DEPTH_OFFSET_PRESENT (1<<9)
+#define S3_POSITION_SHIFT 6
+#define S3_VERTEXHAS_XYZ (1<<6)
+#define S3_VERTEXHAS_XYZW (2<<6)
+#define S3_VERTEXHAS_XY (3<<6)
+#define S3_VERTEXHAS_XYW (4<<6)
+#define S3_ENABLE_SPEC_ADD (1<<5)
+#define S3_ENABLE_FOG (1<<4)
+#define S3_ENABLE_LOCAL_DEPTH_BIAS (1<<3)
+#define S3_ENABLE_SPRITE_POINT (1<<1)
+#define S3_ENABLE_ANTIALIASING 1
+#define S8_ENABLE_ALPHA_TEST (1<<31)
+#define S8_ALPHA_TEST_FUNC_SHIFT 28
+#define S8_ALPHA_REFVALUE_SHIFT 20
+#define S8_ENABLE_DEPTH_TEST (1<<19)
+#define S8_DEPTH_TEST_FUNC_SHIFT 16
+#define S8_ENABLE_COLOR_BLEND (1<<15)
+#define S8_COLOR_BLEND_FUNC_SHIFT 12
+#define S8_BLENDFUNC_ADD (0)
+#define S8_BLENDFUNC_SUB (1<<12)
+#define S8_BLENDFUNC_RVRSE_SUB (2<<12)
+#define S8_BLENDFUNC_MIN (3<<12)
+#define S8_BLENDFUNC_MAX (4<<12)
+#define S8_SRC_BLEND_FACTOR_SHIFT 8
+#define S8_DST_BLEND_FACTOR_SHIFT 4
+#define S8_ENABLE_DEPTH_BUFFER_WRITE (1<<3)
+#define S8_ENABLE_COLOR_BUFFER_WRITE (1<<2)
+
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_2 (CMD_3D|(0x1d<<24)|(0x03<<16))
+#define LOAD_TEXTURE_MAP(x) (1<<((x)+11))
+#define LOAD_TEXTURE_BLEND_STAGE(x) (1<<((x)+7))
+#define LOAD_GLOBAL_COLOR_FACTOR (1<<6)
+
+#define TM0S0_ADDRESS_MASK 0xfffffffc
+#define TM0S0_USE_FENCE (1<<1)
+
+#define TM0S1_HEIGHT_SHIFT 21
+#define TM0S1_WIDTH_SHIFT 10
+#define TM0S1_PALETTE_SELECT (1<<9)
+#define TM0S1_MAPSURF_FORMAT_MASK (0x7 << 6)
+#define TM0S1_MAPSURF_FORMAT_SHIFT 6
+#define MAPSURF_8BIT_INDEXED (0<<6)
+#define MAPSURF_8BIT (1<<6)
+#define MAPSURF_16BIT (2<<6)
+#define MAPSURF_32BIT (3<<6)
+#define MAPSURF_411 (4<<6)
+#define MAPSURF_422 (5<<6)
+#define MAPSURF_COMPRESSED (6<<6)
+#define MAPSURF_4BIT_INDEXED (7<<6)
+#define TM0S1_MT_FORMAT_MASK (0x7 << 3)
+#define TM0S1_MT_FORMAT_SHIFT 3
+#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */
+#define MT_8BIT_IDX_RGB565 (0<<3) /* SURFACE_8BIT_INDEXED */
+#define MT_8BIT_IDX_ARGB1555 (1<<3)
+#define MT_8BIT_IDX_ARGB4444 (2<<3)
+#define MT_8BIT_IDX_AY88 (3<<3)
+#define MT_8BIT_IDX_ABGR8888 (4<<3)
+#define MT_8BIT_IDX_BUMP_88DVDU (5<<3)
+#define MT_8BIT_IDX_BUMP_655LDVDU (6<<3)
+#define MT_8BIT_IDX_ARGB8888 (7<<3)
+#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */
+#define MT_8BIT_L8 (1<<3)
+#define MT_8BIT_A8 (4<<3)
+#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */
+#define MT_16BIT_ARGB1555 (1<<3)
+#define MT_16BIT_ARGB4444 (2<<3)
+#define MT_16BIT_AY88 (3<<3)
+#define MT_16BIT_DIB_ARGB1555_8888 (4<<3)
+#define MT_16BIT_BUMP_88DVDU (5<<3)
+#define MT_16BIT_BUMP_655LDVDU (6<<3)
+#define MT_16BIT_DIB_RGB565_8888 (7<<3)
+#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */
+#define MT_32BIT_ABGR8888 (1<<3)
+#define MT_32BIT_XRGB8888 (2<<3)
+#define MT_32BIT_XBGR8888 (3<<3)
+#define MT_32BIT_BUMP_XLDVDU_8888 (6<<3)
+#define MT_32BIT_DIB_8888 (7<<3)
+#define MT_411_YUV411 (0<<3) /* SURFACE_411 */
+#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */
+#define MT_422_YCRCB_NORMAL (1<<3)
+#define MT_422_YCRCB_SWAPUV (2<<3)
+#define MT_422_YCRCB_SWAPUVY (3<<3)
+#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */
+#define MT_COMPRESS_DXT2_3 (1<<3)
+#define MT_COMPRESS_DXT4_5 (2<<3)
+#define MT_COMPRESS_FXT1 (3<<3)
+#define TM0S1_COLORSPACE_CONVERSION (1 << 2)
+#define TM0S1_TILED_SURFACE (1 << 1)
+#define TM0S1_TILE_WALK (1 << 0)
+
+#define TM0S2_PITCH_SHIFT 21
+#define TM0S2_CUBE_FACE_ENA_SHIFT 15
+#define TM0S2_CUBE_FACE_ENA_MASK (1<<15)
+#define TM0S2_MAP_FORMAT (1<<14)
+#define TM0S2_MAP_2D (0<<14)
+#define TM0S2_MAP_3D_CUBE (1<<14)
+#define TM0S2_VERTICAL_LINE_STRIDE (1<<13)
+#define TM0S2_VERITCAL_LINE_STRIDE_OFF (1<<12)
+#define TM0S2_OUTPUT_CHAN_SHIFT 10
+#define TM0S2_OUTPUT_CHAN_MASK (3<<10)
+
+#define TM0S3_MIP_FILTER_MASK (0x3<<30)
+#define TM0S3_MIP_FILTER_SHIFT 30
+#define MIPFILTER_NONE 0
+#define MIPFILTER_NEAREST 1
+#define MIPFILTER_LINEAR 3
+#define TM0S3_MAG_FILTER_MASK (0x3<<28)
+#define TM0S3_MAG_FILTER_SHIFT 28
+#define TM0S3_MIN_FILTER_MASK (0x3<<26)
+#define TM0S3_MIN_FILTER_SHIFT 26
+#define FILTER_NEAREST 0
+#define FILTER_LINEAR 1
+#define FILTER_ANISOTROPIC 2
+
+#define TM0S3_LOD_BIAS_SHIFT 17
+#define TM0S3_LOD_BIAS_MASK (0x1ff<<17)
+#define TM0S3_MAX_MIP_SHIFT 9
+#define TM0S3_MAX_MIP_MASK (0xff<<9)
+#define TM0S3_MIN_MIP_SHIFT 3
+#define TM0S3_MIN_MIP_MASK (0x3f<<3)
+#define TM0S3_KILL_PIXEL (1<<2)
+#define TM0S3_KEYED_FILTER (1<<1)
+#define TM0S3_CHROMA_KEY (1<<0)
+
+/* _3DSTATE_MAP_TEXEL_STREAM, p188 */
+#define _3DSTATE_MAP_TEX_STREAM_CMD (CMD_3D|(0x1c<<24)|(0x05<<19))
+#define DISABLE_TEX_STREAM_BUMP (1<<12)
+#define ENABLE_TEX_STREAM_BUMP ((1<<12)|(1<<11))
+#define TEX_MODIFY_UNIT_0 0
+#define TEX_MODIFY_UNIT_1 (1<<8)
+#define ENABLE_TEX_STREAM_COORD_SET (1<<7)
+#define TEX_STREAM_COORD_SET(x) ((x)<<4)
+#define ENABLE_TEX_STREAM_MAP_IDX (1<<3)
+#define TEX_STREAM_MAP_IDX(x) (x)
+
+#define FLUSH_MAP_CACHE (1<<0)
+
+#define _3DSTATE_MAP_FILTER_CMD (CMD_3D|(0x1c<<24)|(0x02<<19))
+#define FILTER_TEXMAP_INDEX(x) ((x) << 16)
+#define MAG_MODE_FILTER_ENABLE (1 << 5)
+#define MIN_MODE_FILTER_ENABLE (1 << 2)
+#define MAG_MAPFILTER_NEAREST (0 << 3)
+#define MAG_MAPFILTER_LINEAR (1 << 3)
+#define MAG_MAPFILTER_ANISOTROPIC (2 << 3)
+#define MIN_MAPFILTER_NEAREST (0)
+#define MIN_MAPFILTER_LINEAR (1)
+#define MIN_MAPFILTER_ANISOTROPIC (2)
+#define ENABLE_KEYS (1<<15)
+#define DISABLE_COLOR_KEY 0
+#define DISABLE_CHROMA_KEY 0
+#define DISABLE_KILL_PIXEL 0
+#define ENABLE_MIP_MODE_FILTER (1 << 9)
+#define MIPFILTER_NONE 0
+#define MIPFILTER_NEAREST 1
+#define MIPFILTER_LINEAR 3
+
+#define TB0C_LAST_STAGE (1 << 31)
+#define TB0C_RESULT_SCALE_1X (0 << 29)
+#define TB0C_RESULT_SCALE_2X (1 << 29)
+#define TB0C_RESULT_SCALE_4X (2 << 29)
+#define TB0C_OP_MODULE (3 << 25)
+#define TB0C_OUTPUT_WRITE_CURRENT (0 << 24)
+#define TB0C_OUTPUT_WRITE_ACCUM (1 << 24)
+#define TB0C_ARG3_REPLICATE_ALPHA (1<<23)
+#define TB0C_ARG3_INVERT (1<<22)
+#define TB0C_ARG3_SEL_XXX
+#define TB0C_ARG2_REPLICATE_ALPHA (1<<17)
+#define TB0C_ARG2_INVERT (1<<16)
+#define TB0C_ARG2_SEL_ONE (0 << 12)
+#define TB0C_ARG2_SEL_FACTOR (1 << 12)
+#define TB0C_ARG2_SEL_TEXEL0 (6 << 12)
+#define TB0C_ARG2_SEL_TEXEL1 (7 << 12)
+#define TB0C_ARG2_SEL_TEXEL2 (8 << 12)
+#define TB0C_ARG2_SEL_TEXEL3 (9 << 12)
+#define TB0C_ARG1_REPLICATE_ALPHA (1<<11)
+#define TB0C_ARG1_INVERT (1<<10)
+#define TB0C_ARG1_SEL_ONE (0 << 6)
+#define TB0C_ARG1_SEL_TEXEL0 (6 << 6)
+#define TB0C_ARG1_SEL_TEXEL1 (7 << 6)
+#define TB0C_ARG1_SEL_TEXEL2 (8 << 6)
+#define TB0C_ARG1_SEL_TEXEL3 (9 << 6)
+#define TB0C_ARG0_REPLICATE_ALPHA (1<<5)
+#define TB0C_ARG0_SEL_XXX
+
+#define TB0A_CTR_STAGE_ENABLE (1<<31)
+#define TB0A_RESULT_SCALE_1X (0 << 29)
+#define TB0A_RESULT_SCALE_2X (1 << 29)
+#define TB0A_RESULT_SCALE_4X (2 << 29)
+#define TB0A_OP_MODULE (3 << 25)
+#define TB0A_OUTPUT_WRITE_CURRENT (0<<24)
+#define TB0A_OUTPUT_WRITE_ACCUM (1<<24)
+#define TB0A_CTR_STAGE_SEL_BITS_XXX
+#define TB0A_ARG3_SEL_XXX
+#define TB0A_ARG3_INVERT (1<<17)
+#define TB0A_ARG2_INVERT (1<<16)
+#define TB0A_ARG2_SEL_ONE (0 << 12)
+#define TB0A_ARG2_SEL_TEXEL0 (6 << 12)
+#define TB0A_ARG2_SEL_TEXEL1 (7 << 12)
+#define TB0A_ARG2_SEL_TEXEL2 (8 << 12)
+#define TB0A_ARG2_SEL_TEXEL3 (9 << 12)
+#define TB0A_ARG1_INVERT (1<<10)
+#define TB0A_ARG1_SEL_ONE (0 << 6)
+#define TB0A_ARG1_SEL_TEXEL0 (6 << 6)
+#define TB0A_ARG1_SEL_TEXEL1 (7 << 6)
+#define TB0A_ARG1_SEL_TEXEL2 (8 << 6)
+#define TB0A_ARG1_SEL_TEXEL3 (9 << 6)
+
+#endif /* GEN2_RENDER_H */
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
new file mode 100644
index 00000000..203de08f
--- /dev/null
+++ b/src/sna/gen3_render.c
@@ -0,0 +1,3694 @@
+/*
+ * Copyright © 2010-2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_render.h"
+#include "sna_render_inline.h"
+#include "sna_reg.h"
+#include "sna_video.h"
+
+#include "gen3_render.h"
+
+#if DEBUG_RENDER
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+#define NO_COMPOSITE 0
+#define NO_COMPOSITE_SPANS 0
+#define NO_COPY 0
+#define NO_COPY_BOXES 0
+#define NO_FILL 0
+#define NO_FILL_BOXES 0
+
+enum {
+ SHADER_NONE = 0,
+ SHADER_ZERO,
+ SHADER_CONSTANT,
+ SHADER_LINEAR,
+ SHADER_RADIAL,
+ SHADER_TEXTURE,
+ SHADER_OPACITY,
+};
+
+#define OUT_BATCH(v) batch_emit(sna, v)
+#define OUT_BATCH_F(v) batch_emit_float(sna, v)
+#define OUT_VERTEX(v) vertex_emit(sna, v)
+
+enum gen3_radial_mode {
+ RADIAL_ONE,
+ RADIAL_TWO
+};
+
+static const struct blendinfo {
+ Bool dst_alpha;
+ Bool src_alpha;
+ uint32_t src_blend;
+ uint32_t dst_blend;
+} gen3_blend_op[] = {
+ /* Clear */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
+ /* Src */ {0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
+ /* Dst */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
+ /* Over */ {0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
+ /* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
+ /* In */ {1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
+ /* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
+ /* Out */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
+ /* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
+ /* Atop */ {1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
+ /* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
+ /* Xor */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
+ /* Add */ {0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
+};
+
+static const struct formatinfo {
+ int fmt, xfmt;
+ uint32_t card_fmt;
+ Bool rb_reversed;
+} gen3_tex_formats[] = {
+ {PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, FALSE},
+ {PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, FALSE},
+ {PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, FALSE},
+ {PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, FALSE},
+ {PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, FALSE},
+ {PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, FALSE},
+ {PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, FALSE},
+ {PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, FALSE},
+ {PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, TRUE},
+ {PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, FALSE},
+ {PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, TRUE},
+ {PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, FALSE},
+ {PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, TRUE},
+};
+
+#define xFixedToDouble(f) pixman_fixed_to_double(f)
+
+static inline uint32_t gen3_buf_tiling(uint32_t tiling)
+{
+ uint32_t v = 0;
+ switch (tiling) {
+ case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
+ case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
+ case I915_TILING_NONE: break;
+ }
+ return v;
+}
+
+static inline Bool
+gen3_check_pitch_3d(struct kgem_bo *bo)
+{
+ return bo->pitch <= 8192;
+}
+
+static uint32_t gen3_get_blend_cntl(int op,
+ Bool has_component_alpha,
+ uint32_t dst_format)
+{
+ uint32_t sblend = gen3_blend_op[op].src_blend;
+ uint32_t dblend = gen3_blend_op[op].dst_blend;
+
+ /* If there's no dst alpha channel, adjust the blend op so that we'll
+ * treat it as always 1.
+ */
+ if (gen3_blend_op[op].dst_alpha) {
+ if (PICT_FORMAT_A(dst_format) == 0) {
+ if (sblend == BLENDFACT_DST_ALPHA)
+ sblend = BLENDFACT_ONE;
+ else if (sblend == BLENDFACT_INV_DST_ALPHA)
+ sblend = BLENDFACT_ZERO;
+ }
+
+ /* gen3 engine reads 8bit color buffer into green channel
+ * in cases like color buffer blending etc., and also writes
+ * back green channel. So with dst_alpha blend we should use
+ * color factor. See spec on "8-bit rendering".
+ */
+ if (dst_format == PICT_a8) {
+ if (sblend == BLENDFACT_DST_ALPHA)
+ sblend = BLENDFACT_DST_COLR;
+ else if (sblend == BLENDFACT_INV_DST_ALPHA)
+ sblend = BLENDFACT_INV_DST_COLR;
+ }
+ }
+
+ /* If the source alpha is being used, then we should only be in a case
+ * where the source blend factor is 0, and the source blend value is the
+ * mask channels multiplied by the source picture's alpha.
+ */
+ if (has_component_alpha && gen3_blend_op[op].src_alpha) {
+ if (dblend == BLENDFACT_SRC_ALPHA)
+ dblend = BLENDFACT_SRC_COLR;
+ else if (dblend == BLENDFACT_INV_SRC_ALPHA)
+ dblend = BLENDFACT_INV_SRC_COLR;
+ }
+
+ return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
+ BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
+ sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT |
+ dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
+}
+
+static Bool gen3_check_dst_format(uint32_t format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ case PICT_a8b8g8r8:
+ case PICT_x8b8g8r8:
+ case PICT_r5g6b5:
+ case PICT_b5g6r5:
+ case PICT_a1r5g5b5:
+ case PICT_x1r5g5b5:
+ case PICT_a1b5g5r5:
+ case PICT_x1b5g5r5:
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ case PICT_a2b10g10r10:
+ case PICT_x2b10g10r10:
+ case PICT_a8:
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ case PICT_a4b4g4r4:
+ case PICT_x4b4g4r4:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static Bool gen3_dst_rb_reversed(uint32_t format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ case PICT_r5g6b5:
+ case PICT_a1r5g5b5:
+ case PICT_x1r5g5b5:
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ case PICT_a8:
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ return FALSE;
+ default:
+ return TRUE;
+ }
+}
+
+#define DSTORG_HORT_BIAS(x) ((x)<<20)
+#define DSTORG_VERT_BIAS(x) ((x)<<16)
+
+static uint32_t gen3_get_dst_format(uint32_t format)
+{
+#define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8))
+ switch (format) {
+ default:
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ case PICT_a8b8g8r8:
+ case PICT_x8b8g8r8:
+ return BIAS | COLR_BUF_ARGB8888;
+ case PICT_r5g6b5:
+ case PICT_b5g6r5:
+ return BIAS | COLR_BUF_RGB565;
+ case PICT_a1r5g5b5:
+ case PICT_x1r5g5b5:
+ case PICT_a1b5g5r5:
+ case PICT_x1b5g5r5:
+ return BIAS | COLR_BUF_ARGB1555;
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ case PICT_a2b10g10r10:
+ case PICT_x2b10g10r10:
+ return BIAS | COLR_BUF_ARGB2AAA;
+ case PICT_a8:
+ return BIAS | COLR_BUF_8BIT;
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ case PICT_a4b4g4r4:
+ case PICT_x4b4g4r4:
+ return BIAS | COLR_BUF_ARGB4444;
+ }
+#undef BIAS
+}
+
+static uint32_t gen3_texture_repeat(uint32_t repeat)
+{
+#define REPEAT(x) \
+ (SS3_NORMALIZED_COORDS | \
+ TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \
+ TEXCOORDMODE_##x << SS3_TCY_ADDR_MODE_SHIFT)
+ switch (repeat) {
+ default:
+ case RepeatNone:
+ return REPEAT(CLAMP_BORDER);
+ case RepeatNormal:
+ return REPEAT(WRAP);
+ case RepeatPad:
+ return REPEAT(CLAMP_EDGE);
+ case RepeatReflect:
+ return REPEAT(MIRROR);
+ }
+#undef REPEAT
+}
+
+static uint32_t gen3_gradient_repeat(uint32_t repeat)
+{
+#define REPEAT(x) \
+ (SS3_NORMALIZED_COORDS | \
+ TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \
+ TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT)
+ switch (repeat) {
+ default:
+ case RepeatNone:
+ return REPEAT(CLAMP_BORDER);
+ case RepeatNormal:
+ return REPEAT(WRAP);
+ case RepeatPad:
+ return REPEAT(CLAMP_EDGE);
+ case RepeatReflect:
+ return REPEAT(MIRROR);
+ }
+#undef REPEAT
+}
+
+static Bool gen3_check_repeat(uint32_t repeat)
+{
+ switch (repeat) {
+ case RepeatNone:
+ case RepeatNormal:
+ case RepeatPad:
+ case RepeatReflect:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static uint32_t gen3_filter(uint32_t filter)
+{
+ switch (filter) {
+ default:
+ assert(0);
+ case PictFilterNearest:
+ return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
+ FILTER_NEAREST << SS2_MIN_FILTER_SHIFT |
+ MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
+ case PictFilterBilinear:
+ return (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT |
+ FILTER_LINEAR << SS2_MIN_FILTER_SHIFT |
+ MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
+ }
+}
+
+static bool gen3_check_filter(uint32_t filter)
+{
+ switch (filter) {
+ case PictFilterNearest:
+ case PictFilterBilinear:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static inline void
+gen3_emit_composite_dstcoord(struct sna *sna, int16_t dstX, int16_t dstY)
+{
+ OUT_VERTEX(dstX);
+ OUT_VERTEX(dstY);
+}
+
+fastcall static void
+gen3_emit_composite_primitive_constant(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ int16_t dst_x = r->dst.x + op->dst.x;
+ int16_t dst_y = r->dst.y + op->dst.y;
+
+ gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
+ gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
+ gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
+}
+
+fastcall static void
+gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ int16_t dst_x, dst_y;
+ int16_t src_x, src_y;
+
+ dst_x = r->dst.x + op->dst.x;
+ dst_y = r->dst.y + op->dst.y;
+ src_x = r->src.x + op->src.offset[0];
+ src_y = r->src.y + op->src.offset[1];
+
+ gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
+ OUT_VERTEX(src_x + r->width);
+ OUT_VERTEX(src_y + r->height);
+
+ gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
+ OUT_VERTEX(src_x);
+ OUT_VERTEX(src_y + r->height);
+
+ gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
+ OUT_VERTEX(src_x);
+ OUT_VERTEX(src_y);
+}
+
+fastcall static void
+gen3_emit_composite_primitive_affine_gradient(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ PictTransform *transform = op->src.transform;
+ int16_t dst_x, dst_y;
+ int16_t src_x, src_y;
+ float sx, sy;
+
+ dst_x = r->dst.x + op->dst.x;
+ dst_y = r->dst.y + op->dst.y;
+ src_x = r->src.x + op->src.offset[0];
+ src_y = r->src.y + op->src.offset[1];
+
+ sna_get_transformed_coordinates(src_x + r->width, src_y + r->height,
+ transform,
+ &sx, &sy);
+ gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
+ OUT_VERTEX(sx);
+ OUT_VERTEX(sy);
+
+ sna_get_transformed_coordinates(src_x, src_y + r->height,
+ transform,
+ &sx, &sy);
+ gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
+ OUT_VERTEX(sx);
+ OUT_VERTEX(sy);
+
+ sna_get_transformed_coordinates(src_x, src_y,
+ transform,
+ &sx, &sy);
+ gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
+ OUT_VERTEX(sx);
+ OUT_VERTEX(sy);
+}
+
+fastcall static void
+gen3_emit_composite_primitive_identity_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float w = r->width;
+ float h = r->height;
+ float *v;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
+
+ v[8] = v[4] = r->dst.x + op->dst.x;
+ v[0] = v[4] + w;
+
+ v[9] = r->dst.y + op->dst.y;
+ v[5] = v[1] = v[9] + h;
+
+ v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
+ v[2] = v[6] + w * op->src.scale[0];
+
+ v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
+ v[7] = v[3] = v[11] + h * op->src.scale[1];
+}
+
+fastcall static void
+gen3_emit_composite_primitive_affine_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ PictTransform *transform = op->src.transform;
+ int16_t dst_x = r->dst.x + op->dst.x;
+ int16_t dst_y = r->dst.y + op->dst.y;
+ int src_x = r->src.x + (int)op->src.offset[0];
+ int src_y = r->src.y + (int)op->src.offset[1];
+ float sx, sy;
+
+ _sna_get_transformed_coordinates(src_x + r->width, src_y + r->height,
+ transform,
+ &sx, &sy);
+
+ gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
+ OUT_VERTEX(sx * op->src.scale[0]);
+ OUT_VERTEX(sy * op->src.scale[1]);
+
+ _sna_get_transformed_coordinates(src_x, src_y + r->height,
+ transform,
+ &sx, &sy);
+ gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
+ OUT_VERTEX(sx * op->src.scale[0]);
+ OUT_VERTEX(sy * op->src.scale[1]);
+
+ _sna_get_transformed_coordinates(src_x, src_y,
+ transform,
+ &sx, &sy);
+ gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
+ OUT_VERTEX(sx * op->src.scale[0]);
+ OUT_VERTEX(sy * op->src.scale[1]);
+}
+
+fastcall static void
+gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float w = r->width;
+ float h = r->height;
+ float *v;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
+
+ v[8] = v[4] = r->dst.x + op->dst.x;
+ v[0] = v[4] + w;
+
+ v[9] = r->dst.y + op->dst.y;
+ v[5] = v[1] = v[9] + h;
+
+ v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
+ v[2] = v[6] + w * op->mask.scale[0];
+
+ v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
+ v[7] = v[3] = v[11] + h * op->mask.scale[1];
+}
+
+fastcall static void
+gen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float dst_x, dst_y;
+ float src_x, src_y;
+ float msk_x, msk_y;
+ float w, h;
+ float *v;
+
+ dst_x = r->dst.x + op->dst.x;
+ dst_y = r->dst.y + op->dst.y;
+ src_x = r->src.x + op->src.offset[0];
+ src_y = r->src.y + op->src.offset[1];
+ msk_x = r->mask.x + op->mask.offset[0];
+ msk_y = r->mask.y + op->mask.offset[1];
+ w = r->width;
+ h = r->height;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 18;
+
+ v[0] = dst_x + w;
+ v[1] = dst_y + h;
+ v[2] = (src_x + w) * op->src.scale[0];
+ v[3] = (src_y + h) * op->src.scale[1];
+ v[4] = (msk_x + w) * op->mask.scale[0];
+ v[5] = (msk_y + h) * op->mask.scale[1];
+
+ v[6] = dst_x;
+ v[7] = v[1];
+ v[8] = src_x * op->src.scale[0];
+ v[9] = v[3];
+ v[10] = msk_x * op->mask.scale[0];
+ v[11] =v[5];
+
+ v[12] = v[6];
+ v[13] = dst_y;
+ v[14] = v[8];
+ v[15] = src_y * op->src.scale[1];
+ v[16] = v[10];
+ v[17] = msk_y * op->mask.scale[1];
+}
+
+fastcall static void
+gen3_emit_composite_primitive_affine_source_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ int16_t src_x, src_y;
+ float dst_x, dst_y;
+ float msk_x, msk_y;
+ float w, h;
+ float *v;
+
+ dst_x = r->dst.x + op->dst.x;
+ dst_y = r->dst.y + op->dst.y;
+ src_x = r->src.x + op->src.offset[0];
+ src_y = r->src.y + op->src.offset[1];
+ msk_x = r->mask.x + op->mask.offset[0];
+ msk_y = r->mask.y + op->mask.offset[1];
+ w = r->width;
+ h = r->height;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 18;
+
+ v[0] = dst_x + w;
+ v[1] = dst_y + h;
+ sna_get_transformed_coordinates(src_x + r->width, src_y + r->height,
+ op->src.transform,
+ &v[2], &v[3]);
+ v[2] *= op->src.scale[0];
+ v[3] *= op->src.scale[1];
+ v[4] = (msk_x + w) * op->mask.scale[0];
+ v[5] = (msk_y + h) * op->mask.scale[1];
+
+ v[6] = dst_x;
+ v[7] = v[1];
+ sna_get_transformed_coordinates(src_x, src_y + r->height,
+ op->src.transform,
+ &v[8], &v[9]);
+ v[8] *= op->src.scale[0];
+ v[9] *= op->src.scale[1];
+ v[10] = msk_x * op->mask.scale[0];
+ v[11] =v[5];
+
+ v[12] = v[6];
+ v[13] = dst_y;
+ sna_get_transformed_coordinates(src_x, src_y,
+ op->src.transform,
+ &v[14], &v[15]);
+ v[14] *= op->src.scale[0];
+ v[15] *= op->src.scale[1];
+ v[16] = v[10];
+ v[17] = msk_y * op->mask.scale[1];
+}
+
+static void
+gen3_emit_composite_texcoord(struct sna *sna,
+ const struct sna_composite_channel *channel,
+ int16_t x, int16_t y)
+{
+ float s = 0, t = 0, w = 1;
+
+ switch (channel->gen3.type) {
+ case SHADER_OPACITY:
+ case SHADER_NONE:
+ case SHADER_ZERO:
+ case SHADER_CONSTANT:
+ break;
+
+ case SHADER_LINEAR:
+ case SHADER_RADIAL:
+ case SHADER_TEXTURE:
+ x += channel->offset[0];
+ y += channel->offset[1];
+ if (channel->is_affine) {
+ sna_get_transformed_coordinates(x, y,
+ channel->transform,
+ &s, &t);
+ OUT_VERTEX(s * channel->scale[0]);
+ OUT_VERTEX(t * channel->scale[1]);
+ } else {
+ sna_get_transformed_coordinates_3d(x, y,
+ channel->transform,
+ &s, &t, &w);
+ OUT_VERTEX(s * channel->scale[0]);
+ OUT_VERTEX(t * channel->scale[1]);
+ OUT_VERTEX(0);
+ OUT_VERTEX(w);
+ }
+ break;
+ }
+}
+
+static void
+gen3_emit_composite_vertex(struct sna *sna,
+ const struct sna_composite_op *op,
+ int16_t srcX, int16_t srcY,
+ int16_t maskX, int16_t maskY,
+ int16_t dstX, int16_t dstY)
+{
+ gen3_emit_composite_dstcoord(sna, dstX, dstY);
+ gen3_emit_composite_texcoord(sna, &op->src, srcX, srcY);
+ gen3_emit_composite_texcoord(sna, &op->mask, maskX, maskY);
+}
+
+fastcall static void
+gen3_emit_composite_primitive(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ gen3_emit_composite_vertex(sna, op,
+ r->src.x + r->width,
+ r->src.y + r->height,
+ r->mask.x + r->width,
+ r->mask.y + r->height,
+ op->dst.x + r->dst.x + r->width,
+ op->dst.y + r->dst.y + r->height);
+ gen3_emit_composite_vertex(sna, op,
+ r->src.x,
+ r->src.y + r->height,
+ r->mask.x,
+ r->mask.y + r->height,
+ op->dst.x + r->dst.x,
+ op->dst.y + r->dst.y + r->height);
+ gen3_emit_composite_vertex(sna, op,
+ r->src.x,
+ r->src.y,
+ r->mask.x,
+ r->mask.y,
+ op->dst.x + r->dst.x,
+ op->dst.y + r->dst.y);
+}
+
+static inline void
+gen3_2d_perspective(struct sna *sna, int in, int out)
+{
+ gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W));
+ gen3_fs_mul(out,
+ gen3_fs_operand(in, X, Y, ZERO, ONE),
+ gen3_fs_operand_reg(out));
+}
+
+static inline void
+gen3_linear_coord(struct sna *sna,
+ const struct sna_composite_channel *channel,
+ int in, int out)
+{
+ int c = channel->gen3.constants;
+
+ if (!channel->is_affine) {
+ gen3_2d_perspective(sna, in, FS_U0);
+ in = FS_U0;
+ }
+
+ gen3_fs_mov(out, gen3_fs_operand_zero());
+ gen3_fs_dp3(out, MASK_X,
+ gen3_fs_operand(in, X, Y, ONE, ZERO),
+ gen3_fs_operand_reg(c));
+}
+
+static void
+gen3_radial_coord(struct sna *sna,
+ const struct sna_composite_channel *channel,
+ int in, int out)
+{
+ int c = channel->gen3.constants;
+
+ if (!channel->is_affine) {
+ gen3_2d_perspective(sna, in, FS_U0);
+ in = FS_U0;
+ }
+
+ switch (channel->gen3.mode) {
+ case RADIAL_ONE:
+ /*
+ pdx = (x - c1x) / dr, pdy = (y - c1y) / dr;
+ r² = pdx*pdx + pdy*pdy
+ t = r²/sqrt(r²) - r1/dr;
+ */
+ gen3_fs_mad(FS_U0, MASK_X | MASK_Y,
+ gen3_fs_operand(in, X, Y, ZERO, ZERO),
+ gen3_fs_operand(c, Z, Z, ZERO, ZERO),
+ gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO));
+ gen3_fs_dp2add(FS_U0, MASK_X,
+ gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
+ gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
+ gen3_fs_operand_zero());
+ gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X));
+ gen3_fs_mad(out, 0,
+ gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO),
+ gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
+ gen3_fs_operand(c, W, ZERO, ZERO, ZERO));
+ break;
+
+ case RADIAL_TWO:
+ /*
+ pdx = x - c1x, pdy = y - c1y;
+ A = dx² + dy² - dr²
+ B = -2*(pdx*dx + pdy*dy + r1*dr);
+ C = pdx² + pdy² - r1²;
+ det = B*B - 4*A*C;
+ t = (-B + sqrt (det)) / (2 * A)
+ */
+
+ /* u0.x = pdx, u0.y = pdy, u[0].z = r1; */
+ gen3_fs_add(FS_U0,
+ gen3_fs_operand(in, X, Y, ZERO, ZERO),
+ gen3_fs_operand(c, X, Y, Z, ZERO));
+ /* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */
+ gen3_fs_dp3(FS_U0, MASK_W,
+ gen3_fs_operand(FS_U0, X, Y, ONE, ZERO),
+ gen3_fs_operand(c+1, X, Y, Z, ZERO));
+ /* u1.x = pdx² + pdy² - r1²; [C] */
+ gen3_fs_dp3(FS_U1, MASK_X,
+ gen3_fs_operand(FS_U0, X, Y, Z, ZERO),
+ gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO));
+ /* u1.x = C, u1.y = B, u1.z=-4*A; */
+ gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W));
+ gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W));
+ /* u1.x = B² - 4*A*C */
+ gen3_fs_dp2add(FS_U1, MASK_X,
+ gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO),
+ gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO),
+ gen3_fs_operand_zero());
+ /* out.x = -B + sqrt (B² - 4*A*C), */
+ gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X));
+ gen3_fs_mad(out, MASK_X,
+ gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
+ gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO),
+ gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO));
+ /* out.x = (-B + sqrt (B² - 4*A*C)) / (2 * A), */
+ gen3_fs_mul(out,
+ gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
+ gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO));
+ break;
+ }
+}
+
+static void
+gen3_composite_emit_shader(struct sna *sna,
+ const struct sna_composite_op *op,
+ uint8_t blend)
+{
+ Bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0;
+ const struct sna_composite_channel *src, *mask;
+ struct gen3_render_state *state = &sna->render_state.gen3;
+ uint32_t shader_offset, id;
+ int src_reg, mask_reg;
+ int t, length;
+
+ src = &op->src;
+ mask = &op->mask;
+ if (mask->gen3.type == SHADER_NONE)
+ mask = NULL;
+
+ if (mask && src->is_opaque &&
+ gen3_blend_op[blend].src_alpha &&
+ op->has_component_alpha) {
+ src = mask;
+ mask = NULL;
+ }
+
+ id = (src->gen3.type |
+ src->is_affine << 4 |
+ src->alpha_fixup << 5 |
+ src->rb_reversed << 6);
+ if (mask) {
+ id |= (mask->gen3.type << 8 |
+ mask->is_affine << 12 |
+ gen3_blend_op[blend].src_alpha << 13 |
+ op->has_component_alpha << 14 |
+ mask->alpha_fixup << 15 |
+ mask->rb_reversed << 16);
+ }
+ id |= dst_is_alpha << 24;
+ id |= op->rb_reversed << 25;
+
+ if (id == state->last_shader)
+ return;
+
+ state->last_shader = id;
+
+ shader_offset = sna->kgem.nbatch++;
+ t = 0;
+ switch (src->gen3.type) {
+ case SHADER_NONE:
+ case SHADER_OPACITY:
+ assert(0);
+ case SHADER_ZERO:
+ break;
+ case SHADER_CONSTANT:
+ gen3_fs_dcl(FS_T8);
+ src_reg = FS_T8;
+ break;
+ case SHADER_TEXTURE:
+ case SHADER_RADIAL:
+ case SHADER_LINEAR:
+ gen3_fs_dcl(FS_S0);
+ gen3_fs_dcl(FS_T0);
+ t++;
+ break;
+ }
+
+ if (mask == NULL) {
+ if (src->gen3.type == SHADER_ZERO) {
+ gen3_fs_mov(FS_OC, gen3_fs_operand_zero());
+ goto done;
+ }
+ if (src->alpha_fixup && dst_is_alpha) {
+ gen3_fs_mov(FS_OC, gen3_fs_operand_one());
+ goto done;
+ }
+ /* No mask, so load directly to output color */
+ if (src->gen3.type != SHADER_CONSTANT) {
+ if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed)
+ src_reg = FS_R0;
+ else
+ src_reg = FS_OC;
+ }
+ switch (src->gen3.type) {
+ case SHADER_LINEAR:
+ gen3_linear_coord(sna, src, FS_T0, FS_R0);
+ gen3_fs_texld(src_reg, FS_S0, FS_R0);
+ break;
+
+ case SHADER_RADIAL:
+ gen3_radial_coord(sna, src, FS_T0, FS_R0);
+ gen3_fs_texld(src_reg, FS_S0, FS_R0);
+ break;
+
+ case SHADER_TEXTURE:
+ if (src->is_affine)
+ gen3_fs_texld(src_reg, FS_S0, FS_T0);
+ else
+ gen3_fs_texldp(src_reg, FS_S0, FS_T0);
+ break;
+
+ case SHADER_NONE:
+ case SHADER_CONSTANT:
+ case SHADER_ZERO:
+ break;
+ }
+
+ if (src_reg != FS_OC) {
+ if (src->alpha_fixup)
+ gen3_fs_mov(FS_OC,
+ src->rb_reversed ^ op->rb_reversed ?
+ gen3_fs_operand(src_reg, Z, Y, X, ONE) :
+ gen3_fs_operand(src_reg, X, Y, Z, ONE));
+ else if (dst_is_alpha)
+ gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, W, W, W, W));
+ else if (src->rb_reversed ^ op->rb_reversed)
+ gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, Z, Y, X, W));
+ else
+ gen3_fs_mov(FS_OC, gen3_fs_operand_reg(src_reg));
+ } else if (src->alpha_fixup)
+ gen3_fs_mov_masked(FS_OC, MASK_W, gen3_fs_operand_one());
+ } else {
+ int out_reg = FS_OC;
+ if (op->rb_reversed)
+ out_reg = FS_U0;
+
+ switch (mask->gen3.type) {
+ case SHADER_CONSTANT:
+ gen3_fs_dcl(FS_T9);
+ mask_reg = FS_T9;
+ break;
+ case SHADER_TEXTURE:
+ case SHADER_LINEAR:
+ case SHADER_RADIAL:
+ gen3_fs_dcl(FS_S0 + t);
+ case SHADER_OPACITY:
+ gen3_fs_dcl(FS_T0 + t);
+ break;
+ case SHADER_NONE:
+ case SHADER_ZERO:
+ assert(0);
+ break;
+ }
+
+ t = 0;
+ switch (src->gen3.type) {
+ case SHADER_LINEAR:
+ gen3_linear_coord(sna, src, FS_T0, FS_R0);
+ gen3_fs_texld(FS_R0, FS_S0, FS_R0);
+ src_reg = FS_R0;
+ t++;
+ break;
+
+ case SHADER_RADIAL:
+ gen3_radial_coord(sna, src, FS_T0, FS_R0);
+ gen3_fs_texld(FS_R0, FS_S0, FS_R0);
+ src_reg = FS_R0;
+ t++;
+ break;
+
+ case SHADER_TEXTURE:
+ if (src->is_affine)
+ gen3_fs_texld(FS_R0, FS_S0, FS_T0);
+ else
+ gen3_fs_texldp(FS_R0, FS_S0, FS_T0);
+ src_reg = FS_R0;
+ t++;
+ break;
+
+ case SHADER_CONSTANT:
+ case SHADER_NONE:
+ case SHADER_ZERO:
+ break;
+ }
+ if (src->alpha_fixup)
+ gen3_fs_mov_masked(src_reg, MASK_W, gen3_fs_operand_one());
+ if (src->rb_reversed)
+ gen3_fs_mov(src_reg, gen3_fs_operand(src_reg, Z, Y, X, W));
+
+ switch (mask->gen3.type) {
+ case SHADER_LINEAR:
+ gen3_linear_coord(sna, mask, FS_T0 + t, FS_R1);
+ gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
+ mask_reg = FS_R1;
+ break;
+
+ case SHADER_RADIAL:
+ gen3_radial_coord(sna, mask, FS_T0 + t, FS_R1);
+ gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
+ mask_reg = FS_R1;
+ break;
+
+ case SHADER_TEXTURE:
+ if (mask->is_affine)
+ gen3_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
+ else
+ gen3_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
+ mask_reg = FS_R1;
+ break;
+
+ case SHADER_OPACITY:
+ if (dst_is_alpha) {
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand(src_reg, W, W, W, W),
+ gen3_fs_operand(FS_T0 + t, X, X, X, X));
+ } else {
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand(src_reg, X, Y, Z, W),
+ gen3_fs_operand(FS_T0 + t, X, X, X, X));
+ }
+ goto mask_done;
+
+ case SHADER_CONSTANT:
+ case SHADER_NONE:
+ case SHADER_ZERO:
+ break;
+ }
+ if (mask->alpha_fixup)
+ gen3_fs_mov_masked(mask_reg, MASK_W, gen3_fs_operand_one());
+ if (mask->rb_reversed)
+ gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W));
+
+ if (dst_is_alpha) {
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand(src_reg, W, W, W, W),
+ gen3_fs_operand(mask_reg, W, W, W, W));
+ } else {
+ /* If component alpha is active in the mask and the blend
+ * operation uses the source alpha, then we know we don't
+ * need the source value (otherwise we would have hit a
+ * fallback earlier), so we provide the source alpha (src.A *
+ * mask.X) as output color.
+ * Conversely, if CA is set and we don't need the source alpha,
+ * then we produce the source value (src.X * mask.X) and the
+ * source alpha is unused. Otherwise, we provide the non-CA
+ * source value (src.X * mask.A).
+ */
+ if (op->has_component_alpha) {
+ if (gen3_blend_op[blend].src_alpha)
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand(src_reg, W, W, W, W),
+ gen3_fs_operand_reg(mask_reg));
+ else
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand_reg(src_reg),
+ gen3_fs_operand_reg(mask_reg));
+ } else {
+ gen3_fs_mul(out_reg,
+ gen3_fs_operand_reg(src_reg),
+ gen3_fs_operand(mask_reg, W, W, W, W));
+ }
+ }
+mask_done:
+ if (op->rb_reversed)
+ gen3_fs_mov(FS_OC, gen3_fs_operand(FS_U0, Z, Y, X, W));
+ }
+
+done:
+ length = sna->kgem.nbatch - shader_offset;
+ sna->kgem.batch[shader_offset] =
+ _3DSTATE_PIXEL_SHADER_PROGRAM | (length - 2);
+}
+
+static uint32_t gen3_ms_tiling(uint32_t tiling)
+{
+ uint32_t v = 0;
+ switch (tiling) {
+ case I915_TILING_Y: v |= MS3_TILE_WALK;
+ case I915_TILING_X: v |= MS3_TILED_SURFACE;
+ case I915_TILING_NONE: break;
+ }
+ return v;
+}
+
+static void gen3_emit_invariant(struct sna *sna)
+{
+ /* Disable independent alpha blend */
+ OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE |
+ IAB_MODIFY_FUNC | BLENDFUNC_ADD << IAB_FUNC_SHIFT |
+ IAB_MODIFY_SRC_FACTOR | BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT |
+ IAB_MODIFY_DST_FACTOR | BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT);
+
+ OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
+ CSB_TCB(0, 0) |
+ CSB_TCB(1, 1) |
+ CSB_TCB(2, 2) |
+ CSB_TCB(3, 3) |
+ CSB_TCB(4, 4) |
+ CSB_TCB(5, 5) |
+ CSB_TCB(6, 6) |
+ CSB_TCB(7, 7));
+
+ OUT_BATCH(_3DSTATE_MODES_4_CMD |
+ ENABLE_LOGIC_OP_FUNC |
+ LOGIC_OP_FUNC(LOGICOP_COPY));
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
+ OUT_BATCH(0x00000000); /* Disable texture coordinate wrap-shortest */
+ OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
+ S4_LINE_WIDTH_ONE |
+ S4_CULLMODE_NONE |
+ S4_VFMT_XY);
+ OUT_BATCH(0x00000000); /* Stencil. */
+
+ OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+ OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
+
+ OUT_BATCH(_3DSTATE_LOAD_INDIRECT);
+ OUT_BATCH(0x00000000);
+
+ OUT_BATCH(_3DSTATE_STIPPLE);
+ OUT_BATCH(0x00000000);
+
+ sna->render_state.gen3.need_invariant = FALSE;
+}
+
+static void
+gen3_get_batch(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+#define MAX_OBJECTS 3 /* worst case: dst + src + mask */
+
+ kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ if (!kgem_check_batch(&sna->kgem, 200)) {
+ DBG(("%s: flushing batch: size %d > %d\n",
+ __FUNCTION__, 200,
+ sna->kgem.surface-sna->kgem.nbatch));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->kgem.nreloc > KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS) {
+ DBG(("%s: flushing batch: reloc %d >= %d\n",
+ __FUNCTION__,
+ sna->kgem.nreloc,
+ (int)KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->kgem.nexec > KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1) {
+ DBG(("%s: flushing batch: exec %d >= %d\n",
+ __FUNCTION__,
+ sna->kgem.nexec,
+ (int)KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->render_state.gen3.need_invariant)
+ gen3_emit_invariant(sna);
+#undef MAX_OBJECTS
+}
+
+static void gen3_emit_composite_state(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ struct gen3_render_state *state = &sna->render_state.gen3;
+ uint32_t map[4];
+ uint32_t sampler[4];
+ struct kgem_bo *bo[2];
+ int tex_count, n;
+ uint32_t ss2;
+
+ gen3_get_batch(sna, op);
+
+ /* BUF_INFO is an implicit flush, so skip if the target is unchanged. */
+ if (op->dst.bo->unique_id != state->current_dst) {
+ uint32_t v;
+
+ OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
+ OUT_BATCH(BUF_3D_ID_COLOR_BACK |
+ gen3_buf_tiling(op->dst.bo->tiling) |
+ op->dst.bo->pitch);
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ op->dst.bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER,
+ 0));
+
+ OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
+ OUT_BATCH(gen3_get_dst_format(op->dst.format));
+
+ v = (DRAW_YMAX(op->dst.height - 1) |
+ DRAW_XMAX(op->dst.width - 1));
+ if (v != state->last_drawrect_limit) {
+ OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(v);
+ OUT_BATCH(0);
+ state->last_drawrect_limit = v;
+ }
+
+ state->current_dst = op->dst.bo->unique_id;
+ }
+ kgem_bo_mark_dirty(op->dst.bo);
+
+ ss2 = ~0;
+ tex_count = 0;
+ switch (op->src.gen3.type) {
+ case SHADER_OPACITY:
+ case SHADER_NONE:
+ assert(0);
+ case SHADER_ZERO:
+ break;
+ case SHADER_CONSTANT:
+ if (op->src.gen3.mode != state->last_diffuse) {
+ OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
+ OUT_BATCH(op->src.gen3.mode);
+ state->last_diffuse = op->src.gen3.mode;
+ }
+ break;
+ case SHADER_LINEAR:
+ case SHADER_RADIAL:
+ case SHADER_TEXTURE:
+ ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
+ ss2 |= S2_TEXCOORD_FMT(tex_count,
+ op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
+ map[tex_count * 2 + 0] =
+ op->src.card_format |
+ gen3_ms_tiling(op->src.bo->tiling) |
+ (op->src.height - 1) << MS3_HEIGHT_SHIFT |
+ (op->src.width - 1) << MS3_WIDTH_SHIFT;
+ map[tex_count * 2 + 1] =
+ (op->src.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
+
+ sampler[tex_count * 2 + 0] = op->src.filter;
+ sampler[tex_count * 2 + 1] =
+ op->src.repeat |
+ tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
+ bo[tex_count] = op->src.bo;
+ tex_count++;
+ break;
+ }
+ switch (op->mask.gen3.type) {
+ case SHADER_NONE:
+ case SHADER_ZERO:
+ break;
+ case SHADER_CONSTANT:
+ if (op->mask.gen3.mode != state->last_specular) {
+ OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
+ OUT_BATCH(op->mask.gen3.mode);
+ state->last_specular = op->mask.gen3.mode;
+ }
+ break;
+ case SHADER_LINEAR:
+ case SHADER_RADIAL:
+ case SHADER_TEXTURE:
+ ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
+ ss2 |= S2_TEXCOORD_FMT(tex_count,
+ op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
+ map[tex_count * 2 + 0] =
+ op->mask.card_format |
+ gen3_ms_tiling(op->mask.bo->tiling) |
+ (op->mask.height - 1) << MS3_HEIGHT_SHIFT |
+ (op->mask.width - 1) << MS3_WIDTH_SHIFT;
+ map[tex_count * 2 + 1] =
+ (op->mask.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
+
+ sampler[tex_count * 2 + 0] = op->mask.filter;
+ sampler[tex_count * 2 + 1] =
+ op->mask.repeat |
+ tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
+ bo[tex_count] = op->mask.bo;
+ tex_count++;
+ break;
+ case SHADER_OPACITY:
+ ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
+ ss2 |= S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_1D);
+ break;
+ }
+
+ {
+ uint32_t blend_offset = sna->kgem.nbatch;
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
+ OUT_BATCH(ss2);
+ OUT_BATCH(gen3_get_blend_cntl(op->op,
+ op->has_component_alpha,
+ op->dst.format));
+
+ if (memcmp(sna->kgem.batch + state->last_blend + 1,
+ sna->kgem.batch + blend_offset + 1,
+ 2 * 4) == 0)
+ sna->kgem.nbatch = blend_offset;
+ else
+ state->last_blend = blend_offset;
+ }
+
+ if (op->u.gen3.num_constants) {
+ int count = op->u.gen3.num_constants;
+ if (state->last_constants) {
+ int last = sna->kgem.batch[state->last_constants+1];
+ if (last == (1 << (count >> 2)) - 1 &&
+ memcmp(&sna->kgem.batch[state->last_constants+2],
+ op->u.gen3.constants,
+ count * sizeof(uint32_t)) == 0)
+ count = 0;
+ }
+ if (count) {
+ state->last_constants = sna->kgem.nbatch;
+ OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | count);
+ OUT_BATCH((1 << (count >> 2)) - 1);
+
+ memcpy(sna->kgem.batch + sna->kgem.nbatch,
+ op->u.gen3.constants,
+ count * sizeof(uint32_t));
+ sna->kgem.nbatch += count;
+ }
+ }
+
+ if (tex_count != 0) {
+ uint32_t rewind;
+
+ n = 0;
+ if (tex_count == state->tex_count) {
+ for (; n < tex_count; n++) {
+ if (map[2*n+0] != state->tex_map[2*n+0] ||
+ map[2*n+1] != state->tex_map[2*n+1] ||
+ state->tex_handle[n] != bo[n]->handle ||
+ state->tex_delta[n] != bo[n]->delta)
+ break;
+ }
+ }
+ if (n < tex_count) {
+ OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
+ OUT_BATCH((1 << tex_count) - 1);
+ for (n = 0; n < tex_count; n++) {
+ OUT_BATCH(kgem_add_reloc(&sna->kgem,
+ sna->kgem.nbatch,
+ bo[n],
+ I915_GEM_DOMAIN_SAMPLER<< 16,
+ 0));
+ OUT_BATCH(map[2*n + 0]);
+ OUT_BATCH(map[2*n + 1]);
+
+ state->tex_map[2*n+0] = map[2*n+0];
+ state->tex_map[2*n+1] = map[2*n+1];
+ state->tex_handle[n] = bo[n]->handle;
+ state->tex_delta[n] = bo[n]->delta;
+ }
+ state->tex_count = n;
+ }
+
+ rewind = sna->kgem.nbatch;
+ OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
+ OUT_BATCH((1 << tex_count) - 1);
+ for (n = 0; n < tex_count; n++) {
+ OUT_BATCH(sampler[2*n + 0]);
+ OUT_BATCH(sampler[2*n + 1]);
+ OUT_BATCH(0);
+ }
+ if (state->last_sampler &&
+ memcmp(&sna->kgem.batch[state->last_sampler+1],
+ &sna->kgem.batch[rewind + 1],
+ (3*tex_count + 1)*sizeof(uint32_t)) == 0)
+ sna->kgem.nbatch = rewind;
+ else
+ state->last_sampler = rewind;
+ }
+
+ gen3_composite_emit_shader(sna, op, op->op);
+}
+
+static void gen3_magic_ca_pass(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ if (!op->need_magic_ca_pass)
+ return;
+
+ DBG(("%s(%d)\n", __FUNCTION__,
+ sna->render.vertex_index - sna->render.vertex_start));
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
+ OUT_BATCH(gen3_get_blend_cntl(PictOpAdd, TRUE, op->dst.format));
+ gen3_composite_emit_shader(sna, op, PictOpAdd);
+
+ OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
+ (sna->render.vertex_index - sna->render.vertex_start));
+ OUT_BATCH(sna->render.vertex_start);
+}
+
+static void gen3_vertex_flush(struct sna *sna)
+{
+ if (sna->render_state.gen3.vertex_offset == 0 ||
+ sna->render.vertex_index == sna->render.vertex_start)
+ return;
+
+ DBG(("%s[%x] = %d\n", __FUNCTION__,
+ 4*sna->render_state.gen3.vertex_offset,
+ sna->render.vertex_index - sna->render.vertex_start));
+
+ sna->kgem.batch[sna->render_state.gen3.vertex_offset] =
+ PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
+ (sna->render.vertex_index - sna->render.vertex_start);
+ sna->kgem.batch[sna->render_state.gen3.vertex_offset + 1] =
+ sna->render.vertex_start;
+
+ if (sna->render.op)
+ gen3_magic_ca_pass(sna, sna->render.op);
+
+ sna->render_state.gen3.vertex_offset = 0;
+}
+
+static void gen3_vertex_finish(struct sna *sna, Bool last)
+{
+ struct kgem_bo *bo;
+ int delta;
+
+ DBG(("%s: last? %d\n", __FUNCTION__, last));
+
+ gen3_vertex_flush(sna);
+ if (!sna->render.vertex_used)
+ return;
+
+ if (last && sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
+ DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
+ sna->render.vertex_used, sna->kgem.nbatch));
+ memcpy(sna->kgem.batch + sna->kgem.nbatch,
+ sna->render.vertex_data,
+ sna->render.vertex_used * 4);
+ delta = sna->kgem.nbatch * 4;
+ bo = NULL;
+ sna->kgem.nbatch += sna->render.vertex_used;
+ } else {
+ bo = kgem_create_linear(&sna->kgem, 4*sna->render.vertex_used);
+ if (bo && !kgem_bo_write(&sna->kgem, bo,
+ sna->render.vertex_data,
+ 4*sna->render.vertex_used)) {
+ kgem_bo_destroy(&sna->kgem, bo);
+ return;
+ }
+ delta = 0;
+ DBG(("%s: new vbo: %d\n", __FUNCTION__,
+ sna->render.vertex_used));
+ }
+
+ DBG(("%s: reloc = %d\n", __FUNCTION__,
+ sna->render.vertex_reloc[0]));
+
+ sna->kgem.batch[sna->render.vertex_reloc[0]] =
+ kgem_add_reloc(&sna->kgem,
+ sna->render.vertex_reloc[0],
+ bo,
+ I915_GEM_DOMAIN_VERTEX << 16,
+ delta);
+ sna->render.vertex_reloc[0] = 0;
+ sna->render.vertex_used = 0;
+ sna->render.vertex_index = 0;
+
+ if (bo)
+ kgem_bo_destroy(&sna->kgem, bo);
+}
+
+static bool gen3_rectangle_begin(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ int ndwords, i1_cmd = 0, i1_len = 0;
+ struct gen3_render_state *state = &sna->render_state.gen3;
+
+ ndwords = 0;
+ if (state->vertex_offset == 0) {
+ ndwords += 2;
+ if (op->need_magic_ca_pass)
+ ndwords += 100;
+ }
+ if (sna->render.vertex_reloc[0] == 0)
+ i1_len++, i1_cmd |= I1_LOAD_S(0), ndwords++;
+ if (state->floats_per_vertex != op->floats_per_vertex)
+ i1_len++, i1_cmd |= I1_LOAD_S(1), ndwords++;
+ if (ndwords == 0)
+ return true;
+
+ if (!kgem_check_batch(&sna->kgem, ndwords+1))
+ return false;
+
+ if (i1_cmd) {
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | i1_cmd | (i1_len - 1));
+ if (sna->render.vertex_reloc[0] == 0)
+ sna->render.vertex_reloc[0] = sna->kgem.nbatch++;
+ if (state->floats_per_vertex != op->floats_per_vertex) {
+ state->floats_per_vertex = op->floats_per_vertex;
+ OUT_BATCH(state->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT |
+ state->floats_per_vertex << S1_VERTEX_PITCH_SHIFT);
+ }
+ }
+
+ if (state->vertex_offset == 0) {
+ if (sna->kgem.nbatch == 2 + state->last_vertex_offset) {
+ state->vertex_offset = state->last_vertex_offset;
+ } else {
+ state->vertex_offset = sna->kgem.nbatch;
+ OUT_BATCH(MI_NOOP); /* to be filled later */
+ OUT_BATCH(MI_NOOP);
+ sna->render.vertex_start = sna->render.vertex_index;
+ state->last_vertex_offset = state->vertex_offset;
+ }
+ }
+
+ return true;
+}
+
+static int gen3_get_rectangles__flush(struct sna *sna, bool ca)
+{
+ if (!kgem_check_batch(&sna->kgem, ca ? 105: 5))
+ return 0;
+ if (sna->kgem.nexec > KGEM_EXEC_SIZE(&sna->kgem) - 2)
+ return 0;
+ if (sna->kgem.nreloc > KGEM_RELOC_SIZE(&sna->kgem) - 1)
+ return 0;
+
+ gen3_vertex_finish(sna, FALSE);
+ assert(sna->render.vertex_index == 0);
+ assert(sna->render.vertex_used == 0);
+ return ARRAY_SIZE(sna->render.vertex_data);
+}
+
+inline static int gen3_get_rectangles(struct sna *sna,
+ const struct sna_composite_op *op,
+ int want)
+{
+ int rem = vertex_space(sna);
+
+ DBG(("%s: want=%d, rem=%d\n",
+ __FUNCTION__, 3*want*op->floats_per_vertex, rem));
+
+ assert(sna->render.vertex_index * op->floats_per_vertex == sna->render.vertex_used);
+ if (op->floats_per_vertex*3 > rem) {
+ DBG(("flushing vbo for %s: %d < %d\n",
+ __FUNCTION__, rem, 3*op->floats_per_vertex));
+ rem = gen3_get_rectangles__flush(sna, op->need_magic_ca_pass);
+ if (rem == 0)
+ return 0;
+ }
+
+ if (!gen3_rectangle_begin(sna, op)) {
+ DBG(("%s: flushing batch\n", __FUNCTION__));
+ return 0;
+ }
+
+ if (want > 1 && want * op->floats_per_vertex*3 > rem)
+ want = rem / (3*op->floats_per_vertex);
+ sna->render.vertex_index += 3*want;
+
+ assert(want);
+ assert(sna->render.vertex_index * op->floats_per_vertex <= ARRAY_SIZE(sna->render.vertex_data));
+ return want;
+}
+
+fastcall static void
+gen3_render_composite_blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", __FUNCTION__,
+ r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
+ r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
+ r->dst.x, r->dst.y, op->dst.x, op->dst.y,
+ r->width, r->height));
+
+ if (!gen3_get_rectangles(sna, op, 1)) {
+ gen3_emit_composite_state(sna, op);
+ gen3_get_rectangles(sna, op, 1);
+ }
+
+ op->prim_emit(sna, op, r);
+}
+
+static void
+gen3_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->src.offset[0], op->src.offset[1],
+ op->mask.offset[0], op->mask.offset[1],
+ op->dst.x, op->dst.y));
+
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = gen3_get_rectangles(sna, op, nbox);
+ if (nbox_this_time == 0) {
+ gen3_emit_composite_state(sna, op);
+ nbox_this_time = gen3_get_rectangles(sna, op, nbox);
+ }
+ nbox -= nbox_this_time;
+
+ do {
+ struct sna_composite_rectangles r;
+
+ DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1));
+
+ r.dst.x = box->x1; r.dst.y = box->y1;
+ r.width = box->x2 - box->x1;
+ r.height = box->y2 - box->y1;
+ r.src = r.mask = r.dst;
+
+ op->prim_emit(sna, op, &r);
+ box++;
+ } while (--nbox_this_time);
+ } while (nbox);
+}
+
+static void
+gen3_render_composite_done(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ assert(sna->render.op == op);
+
+ gen3_vertex_flush(sna);
+ sna->render.op = NULL;
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ sna_render_composite_redirect_done(sna, op);
+
+ if (op->src.bo)
+ kgem_bo_destroy(&sna->kgem, op->src.bo);
+ if (op->mask.bo)
+ kgem_bo_destroy(&sna->kgem, op->mask.bo);
+}
+
+static void
+gen3_render_reset(struct sna *sna)
+{
+ struct gen3_render_state *state = &sna->render_state.gen3;
+
+ state->need_invariant = TRUE;
+ state->current_dst = 0;
+ state->tex_count = 0;
+ state->last_drawrect_limit = ~0U;
+ state->last_target = 0;
+ state->last_blend = 0;
+ state->last_constants = 0;
+ state->last_sampler = 0;
+ state->last_shader = 0;
+ state->last_diffuse = 0xcc00ffee;
+ state->last_specular = 0xcc00ffee;
+
+ state->floats_per_vertex = 0;
+ state->last_floats_per_vertex = 0;
+ state->last_vertex_offset = 0;
+ state->vertex_offset = 0;
+
+ assert(sna->render.vertex_used == 0);
+ assert(sna->render.vertex_index == 0);
+ assert(sna->render.vertex_reloc[0] == 0);
+}
+
+static Bool gen3_composite_channel_set_format(struct sna_composite_channel *channel,
+ CARD32 format)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
+ if (gen3_tex_formats[i].fmt == format) {
+ channel->card_format = gen3_tex_formats[i].card_fmt;
+ channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+static Bool source_is_covered(PicturePtr picture,
+ int x, int y,
+ int width, int height)
+{
+ int x1, y1, x2, y2;
+
+ if (picture->repeat && picture->repeatType != RepeatNone)
+ return TRUE;
+
+ if (picture->pDrawable == NULL)
+ return FALSE;
+
+ if (picture->transform) {
+ pixman_box16_t sample;
+
+ sample.x1 = x;
+ sample.y1 = y;
+ sample.x2 = x + width;
+ sample.y2 = y + height;
+
+ pixman_transform_bounds(picture->transform, &sample);
+
+ x1 = sample.x1;
+ x2 = sample.x2;
+ y1 = sample.y1;
+ y2 = sample.y2;
+ } else {
+ x1 = x;
+ y1 = y;
+ x2 = x + width;
+ y2 = y + height;
+ }
+
+ return
+ x1 >= 0 && y1 >= 0 &&
+ x2 <= picture->pDrawable->width &&
+ y2 <= picture->pDrawable->height;
+}
+
+static Bool gen3_composite_channel_set_xformat(PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int x, int y,
+ int width, int height)
+{
+ int i;
+
+ if (PICT_FORMAT_A(picture->format) != 0)
+ return FALSE;
+
+ if (width == 0 || height == 0)
+ return FALSE;
+
+ if (!source_is_covered(picture, x, y, width, height))
+ return FALSE;
+
+ for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
+ if (gen3_tex_formats[i].xfmt == picture->format) {
+ channel->card_format = gen3_tex_formats[i].card_fmt;
+ channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
+ channel->alpha_fixup = true;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static int
+gen3_init_solid(struct sna *sna,
+ struct sna_composite_channel *channel,
+ uint32_t color)
+{
+ channel->gen3.mode = color;
+ channel->gen3.type = SHADER_CONSTANT;
+ if (color == 0)
+ channel->gen3.type = SHADER_ZERO;
+ if ((color & 0xff000000) == 0xff000000)
+ channel->is_opaque = true;
+
+ /* for consistency */
+ channel->repeat = RepeatNormal;
+ channel->filter = PictFilterNearest;
+ channel->pict_format = PICT_a8r8g8b8;
+ channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
+
+ return 1;
+}
+
+static void gen3_composite_channel_convert(struct sna_composite_channel *channel)
+{
+ if (channel->gen3.type == SHADER_TEXTURE)
+ channel->repeat = gen3_texture_repeat(channel->repeat);
+ else
+ channel->repeat = gen3_gradient_repeat(channel->repeat);
+
+ channel->filter = gen3_filter(channel->filter);
+ if (channel->card_format == 0)
+ gen3_composite_channel_set_format(channel, channel->pict_format);
+}
+
+static Bool gen3_gradient_setup(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int16_t ox, int16_t oy)
+{
+ int16_t dx, dy;
+
+ if (picture->repeat == 0) {
+ channel->repeat = RepeatNone;
+ } else switch (picture->repeatType) {
+ case RepeatNone:
+ case RepeatNormal:
+ case RepeatPad:
+ case RepeatReflect:
+ channel->repeat = picture->repeatType;
+ break;
+ default:
+ return FALSE;
+ }
+
+ channel->bo =
+ sna_render_get_gradient(sna,
+ (PictGradient *)picture->pSourcePict);
+ if (channel->bo == NULL)
+ return FALSE;
+
+ channel->pict_format = PICT_a8r8g8b8;
+ channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
+ channel->filter = PictFilterBilinear;
+ channel->is_affine = sna_transform_is_affine(picture->transform);
+ if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
+ DBG(("%s: integer translation (%d, %d), removing\n",
+ __FUNCTION__, dx, dy));
+ ox += dx;
+ oy += dy;
+ channel->transform = NULL;
+ } else
+ channel->transform = picture->transform;
+ channel->width = channel->bo->pitch / 4;
+ channel->height = 1;
+ channel->offset[0] = ox;
+ channel->offset[1] = oy;
+ channel->scale[0] = channel->scale[1] = 1;
+ return TRUE;
+}
+
+static int
+gen3_init_linear(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_op *op,
+ struct sna_composite_channel *channel,
+ int ox, int oy)
+{
+ PictLinearGradient *linear =
+ (PictLinearGradient *)picture->pSourcePict;
+ float x0, y0, sf;
+ float dx, dy, offset;
+ int n;
+
+ DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n",
+ __FUNCTION__,
+ xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y),
+ xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y)));
+
+ if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
+ return 0;
+
+ dx = xFixedToDouble(linear->p2.x - linear->p1.x);
+ dy = xFixedToDouble(linear->p2.y - linear->p1.y);
+ sf = dx*dx + dy*dy;
+ dx /= sf;
+ dy /= sf;
+
+ x0 = xFixedToDouble(linear->p1.x);
+ y0 = xFixedToDouble(linear->p1.y);
+ offset = dx*x0 + dy*y0;
+
+ n = op->u.gen3.num_constants;
+ channel->gen3.constants = FS_C0 + n / 4;
+ op->u.gen3.constants[n++] = dx;
+ op->u.gen3.constants[n++] = dy;
+ op->u.gen3.constants[n++] = -offset;
+ op->u.gen3.constants[n++] = 0;
+
+ if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
+ return 0;
+
+ channel->gen3.type = SHADER_LINEAR;
+ op->u.gen3.num_constants = n;
+
+ DBG(("%s: dx=%f, dy=%f, offset=%f, constants=%d\n",
+ __FUNCTION__, dx, dy, -offset, channel->gen3.constants - FS_C0));
+ return 1;
+}
+
+static int
+gen3_init_radial(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_op *op,
+ struct sna_composite_channel *channel,
+ int ox, int oy)
+{
+ PictRadialGradient *radial = (PictRadialGradient *)picture->pSourcePict;
+ double dx, dy, dr, r1;
+ int n;
+
+ dx = xFixedToDouble(radial->c2.x - radial->c1.x);
+ dy = xFixedToDouble(radial->c2.y - radial->c1.y);
+ dr = xFixedToDouble(radial->c2.radius - radial->c1.radius);
+
+ r1 = xFixedToDouble(radial->c1.radius);
+
+ n = op->u.gen3.num_constants;
+ channel->gen3.constants = FS_C0 + n / 4;
+ if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) {
+ if (radial->c2.radius == radial->c1.radius)
+ return 0;
+
+ op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.x) / dr;
+ op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.y) / dr;
+ op->u.gen3.constants[n++] = 1. / dr;
+ op->u.gen3.constants[n++] = -r1 / dr;
+
+ channel->gen3.mode = RADIAL_ONE;
+ } else {
+ op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.x);
+ op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.y);
+ op->u.gen3.constants[n++] = r1;
+ op->u.gen3.constants[n++] = -4 * (dx*dx + dy*dy - dr*dr);
+
+ op->u.gen3.constants[n++] = -2 * dx;
+ op->u.gen3.constants[n++] = -2 * dy;
+ op->u.gen3.constants[n++] = -2 * r1 * dr;
+ op->u.gen3.constants[n++] = 1 / (2 * (dx*dx + dy*dy - dr*dr));
+
+ channel->gen3.mode = RADIAL_TWO;
+ }
+
+ if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
+ return 0;
+
+ channel->gen3.type = SHADER_RADIAL;
+ op->u.gen3.num_constants = n;
+ return 1;
+}
+
+static Bool
+gen3_composite_picture(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_op *op,
+ struct sna_composite_channel *channel,
+ int16_t x, int16_t y,
+ int16_t w, int16_t h,
+ int16_t dst_x, int16_t dst_y)
+{
+ PixmapPtr pixmap;
+ uint32_t color;
+ int16_t dx, dy;
+
+ DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
+ __FUNCTION__, x, y, w, h, dst_x, dst_y));
+
+ channel->card_format = 0;
+
+ if (picture->pDrawable == NULL) {
+ SourcePict *source = picture->pSourcePict;
+ int ret = 0;
+
+ switch (source->type) {
+ case SourcePictTypeSolidFill:
+ ret = gen3_init_solid(sna, channel,
+ source->solidFill.color);
+ break;
+
+ case SourcePictTypeLinear:
+ ret = gen3_init_linear(sna, picture, op, channel,
+ x - dst_x, y - dst_y);
+ break;
+
+ case SourcePictTypeRadial:
+ ret = gen3_init_radial(sna, picture, op, channel,
+ x - dst_x, y - dst_y);
+ break;
+ }
+
+ if (ret == 0)
+ ret = sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+ return ret;
+ }
+
+ if (sna_picture_is_solid(picture, &color))
+ return gen3_init_solid(sna, channel, color);
+
+ if (!gen3_check_repeat(picture->repeat))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ if (!gen3_check_filter(picture->filter))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
+ channel->filter = picture->filter;
+ channel->pict_format = picture->format;
+
+ pixmap = get_drawable_pixmap(picture->pDrawable);
+ get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
+
+ x += dx + picture->pDrawable->x;
+ y += dy + picture->pDrawable->y;
+
+ channel->is_affine = sna_transform_is_affine(picture->transform);
+ if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
+ DBG(("%s: integer translation (%d, %d), removing\n",
+ __FUNCTION__, dx, dy));
+ x += dx;
+ y += dy;
+ channel->transform = NULL;
+ channel->filter = PictFilterNearest;
+ } else
+ channel->transform = picture->transform;
+
+ if (!gen3_composite_channel_set_format(channel, picture->format) &&
+ !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h))
+ return sna_render_picture_convert(sna, picture, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+
+ if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048)
+ return sna_render_picture_extract(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ return sna_render_pixmap_bo(sna, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+}
+
+static inline Bool
+picture_is_cpu(PicturePtr picture)
+{
+ if (!picture->pDrawable)
+ return FALSE;
+
+ /* If it is a solid, try to use the render paths */
+ if (picture->pDrawable->width == 1 &&
+ picture->pDrawable->height == 1 &&
+ picture->repeat)
+ return FALSE;
+
+ return is_cpu(picture->pDrawable);
+}
+
+static Bool
+try_blt(struct sna *sna,
+ PicturePtr dst,
+ PicturePtr source,
+ int width, int height)
+{
+ if (sna->kgem.mode == KGEM_BLT) {
+ DBG(("%s: already performing BLT\n", __FUNCTION__));
+ return TRUE;
+ }
+
+ if (width > 2048 || height > 2048) {
+ DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
+ __FUNCTION__, width, height));
+ return TRUE;
+ }
+
+ /* If we can sample directly from user-space, do so */
+ if (sna->kgem.has_vmap)
+ return FALSE;
+
+ /* is the source picture only in cpu memory e.g. a shm pixmap? */
+ return picture_is_cpu(source);
+}
+
+static void
+gen3_align_vertex(struct sna *sna,
+ struct sna_composite_op *op)
+{
+ if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
+ DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
+ sna->render_state.gen3.last_floats_per_vertex,
+ op->floats_per_vertex,
+ sna->render.vertex_index,
+ (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
+ sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
+ sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
+ sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
+ }
+}
+
+static Bool
+gen3_composite_set_target(struct sna *sna,
+ struct sna_composite_op *op,
+ PicturePtr dst)
+{
+ struct sna_pixmap *priv;
+
+ op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
+ op->dst.format = dst->format;
+ op->dst.width = op->dst.pixmap->drawable.width;
+ op->dst.height = op->dst.pixmap->drawable.height;
+ priv = sna_pixmap(op->dst.pixmap);
+
+ op->dst.bo = NULL;
+ if (priv && priv->gpu_bo == NULL) {
+ op->dst.bo = priv->cpu_bo;
+ op->damage = &priv->cpu_damage;
+ }
+ if (op->dst.bo == NULL) {
+ priv = sna_pixmap_force_to_gpu(op->dst.pixmap);
+ if (priv == NULL)
+ return FALSE;
+
+ op->dst.bo = priv->gpu_bo;
+ if (!priv->gpu_only)
+ op->damage = &priv->gpu_damage;
+ }
+
+ get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
+ &op->dst.x, &op->dst.y);
+
+ DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d)\n",
+ __FUNCTION__,
+ op->dst.pixmap, (int)op->dst.format,
+ op->dst.width, op->dst.height,
+ op->dst.bo->pitch,
+ op->dst.x, op->dst.y));
+
+ return TRUE;
+}
+
+static inline uint8_t mult(uint32_t s, uint32_t m, int shift)
+{
+ s = (s >> shift) & 0xff;
+ m = (m >> shift) & 0xff;
+ return (s * m) >> 8;
+}
+
+static Bool
+gen3_render_composite(struct sna *sna,
+ uint8_t op,
+ PicturePtr src,
+ PicturePtr mask,
+ PicturePtr dst,
+ int16_t src_x, int16_t src_y,
+ int16_t mask_x, int16_t mask_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t width, int16_t height,
+ struct sna_composite_op *tmp)
+{
+ DBG(("%s()\n", __FUNCTION__));
+
+#if NO_COMPOSITE
+ return sna_blt_composite(sna, op,
+ src, dst,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height, tmp);
+#endif
+
+ /* Try to use the BLT engine unless it implies a
+ * 3D -> 2D context switch.
+ */
+ if (mask == NULL &&
+ try_blt(sna, dst, src, width, height) &&
+ sna_blt_composite(sna,
+ op, src, dst,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height,
+ tmp))
+ return TRUE;
+
+ if (op >= ARRAY_SIZE(gen3_blend_op)) {
+ DBG(("%s: fallback due to unhandled blend op: %d\n",
+ __FUNCTION__, op));
+ return FALSE;
+ }
+
+ if (!gen3_check_dst_format(dst->format)) {
+ DBG(("%s: fallback due to unhandled dst format: %x\n",
+ __FUNCTION__, dst->format));
+ return FALSE;
+ }
+
+ if (need_tiling(sna, width, height))
+ return sna_tiling_composite(sna,
+ op, src, mask, dst,
+ src_x, src_y,
+ mask_x, mask_y,
+ dst_x, dst_y,
+ width, height,
+ tmp);
+
+ memset(&tmp->u.gen3, 0, sizeof(tmp->u.gen3));
+
+ if (!gen3_composite_set_target(sna, tmp, dst)) {
+ DBG(("%s: unable to set render target\n",
+ __FUNCTION__));
+ return FALSE;
+ }
+
+ tmp->op = op;
+ tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
+ if (tmp->dst.width > 2048 || tmp->dst.height > 2048 ||
+ !gen3_check_pitch_3d(tmp->dst.bo)) {
+ if (!sna_render_composite_redirect(sna, tmp,
+ dst_x, dst_y, width, height))
+ return FALSE;
+ }
+
+ tmp->src.gen3.type = SHADER_TEXTURE;
+ tmp->src.is_affine = TRUE;
+ DBG(("%s: preparing source\n", __FUNCTION__));
+ switch (gen3_composite_picture(sna, src, tmp, &tmp->src,
+ src_x, src_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ goto cleanup_dst;
+ case 0:
+ tmp->src.gen3.type = SHADER_ZERO;
+ break;
+ case 1:
+ gen3_composite_channel_convert(&tmp->src);
+ break;
+ }
+ DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.gen3.type));
+
+ tmp->mask.gen3.type = SHADER_NONE;
+ tmp->mask.is_affine = TRUE;
+ tmp->need_magic_ca_pass = FALSE;
+ tmp->has_component_alpha = FALSE;
+ if (mask && tmp->src.gen3.type != SHADER_ZERO) {
+ tmp->mask.gen3.type = SHADER_TEXTURE;
+ DBG(("%s: preparing mask\n", __FUNCTION__));
+ switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask,
+ mask_x, mask_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ goto cleanup_src;
+ case 0:
+ tmp->mask.gen3.type = SHADER_ZERO;
+ break;
+ case 1:
+ gen3_composite_channel_convert(&tmp->mask);
+ break;
+ }
+ DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.gen3.type));
+
+ if (tmp->mask.gen3.type == SHADER_ZERO) {
+ if (tmp->src.bo) {
+ kgem_bo_destroy(&sna->kgem,
+ tmp->src.bo);
+ tmp->src.bo = NULL;
+ }
+ tmp->src.gen3.type = SHADER_ZERO;
+ tmp->mask.gen3.type = SHADER_NONE;
+ }
+
+ if (tmp->mask.gen3.type != SHADER_NONE &&
+ mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
+ /* Check if it's component alpha that relies on a source alpha
+ * and on the source value. We can only get one of those
+ * into the single source value that we get to blend with.
+ */
+ tmp->has_component_alpha = TRUE;
+ if (tmp->mask.gen3.type == SHADER_CONSTANT &&
+ tmp->mask.gen3.mode == 0xffffffff) {
+ tmp->mask.gen3.type = SHADER_NONE;
+ tmp->has_component_alpha = FALSE;
+ } else if (tmp->src.gen3.type == SHADER_CONSTANT &&
+ tmp->src.gen3.mode == 0xffffffff) {
+ tmp->src = tmp->mask;
+ tmp->mask.gen3.type = SHADER_NONE;
+ tmp->mask.bo = NULL;
+ tmp->has_component_alpha = FALSE;
+ } else if (tmp->src.gen3.type == SHADER_CONSTANT &&
+ tmp->mask.gen3.type == SHADER_CONSTANT) {
+ uint32_t a,r,g,b;
+
+ a = mult(tmp->src.gen3.mode,
+ tmp->mask.gen3.mode,
+ 24);
+ r = mult(tmp->src.gen3.mode,
+ tmp->mask.gen3.mode,
+ 16);
+ g = mult(tmp->src.gen3.mode,
+ tmp->mask.gen3.mode,
+ 8);
+ b = mult(tmp->src.gen3.mode,
+ tmp->mask.gen3.mode,
+ 0);
+
+ DBG(("%s: combining constant source/mask: %x x %x -> %x\n",
+ __FUNCTION__,
+ tmp->src.gen3.mode,
+ tmp->mask.gen3.mode,
+ a << 24 | r << 16 | g << 8 | b));
+
+ tmp->src.gen3.mode =
+ a << 24 | r << 16 | g << 8 | b;
+
+ tmp->mask.gen3.type = SHADER_NONE;
+ tmp->has_component_alpha = FALSE;
+ } else if (gen3_blend_op[op].src_alpha &&
+ (gen3_blend_op[op].src_blend != BLENDFACT_ZERO)) {
+ if (op != PictOpOver)
+ goto cleanup_mask;
+
+ tmp->need_magic_ca_pass = TRUE;
+ tmp->op = PictOpOutReverse;
+ sna->render.vertex_start = sna->render.vertex_index;
+ }
+ }
+ }
+ DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__,
+ tmp->src.gen3.type, tmp->mask.gen3.type,
+ tmp->src.is_affine, tmp->mask.is_affine));
+
+ tmp->prim_emit = gen3_emit_composite_primitive;
+ if (tmp->mask.gen3.type == SHADER_NONE ||
+ tmp->mask.gen3.type == SHADER_CONSTANT) {
+ switch (tmp->src.gen3.type) {
+ case SHADER_NONE:
+ case SHADER_CONSTANT:
+ tmp->prim_emit = gen3_emit_composite_primitive_constant;
+ break;
+ case SHADER_LINEAR:
+ case SHADER_RADIAL:
+ if (tmp->src.transform == NULL)
+ tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
+ else if (tmp->src.is_affine)
+ tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
+ break;
+ case SHADER_TEXTURE:
+ if (tmp->src.transform == NULL)
+ tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
+ else if (tmp->src.is_affine)
+ tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
+ break;
+ }
+ } else if (tmp->mask.gen3.type == SHADER_TEXTURE) {
+ if (tmp->mask.transform == NULL) {
+ if (tmp->src.gen3.type == SHADER_CONSTANT)
+ tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask;
+ else if (tmp->src.transform == NULL)
+ tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
+ else if (tmp->src.is_affine)
+ tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask;
+ }
+ }
+
+ tmp->floats_per_vertex = 2;
+ if (tmp->src.gen3.type != SHADER_CONSTANT &&
+ tmp->src.gen3.type != SHADER_ZERO)
+ tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 3;
+ if (tmp->mask.gen3.type != SHADER_NONE &&
+ tmp->mask.gen3.type != SHADER_CONSTANT)
+ tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 3;
+ DBG(("%s: floats_per_vertex = 2 + %d + %d = %d\n", __FUNCTION__,
+ (tmp->src.gen3.type != SHADER_CONSTANT &&
+ tmp->src.gen3.type != SHADER_ZERO) ?
+ tmp->src.is_affine ? 2 : 3 : 0,
+ (tmp->mask.gen3.type != SHADER_NONE &&
+ tmp->mask.gen3.type != SHADER_CONSTANT) ?
+ tmp->mask.is_affine ? 2 : 3 : 0,
+ tmp->floats_per_vertex));
+
+ tmp->blt = gen3_render_composite_blt;
+ tmp->boxes = gen3_render_composite_boxes;
+ tmp->done = gen3_render_composite_done;
+
+ if (!kgem_check_bo(&sna->kgem, tmp->dst.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->src.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->mask.bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo)) {
+ if (tmp->src.bo == tmp->dst.bo || tmp->mask.bo == tmp->dst.bo) {
+ kgem_emit_flush(&sna->kgem);
+ } else {
+ OUT_BATCH(_3DSTATE_MODES_5_CMD |
+ PIPELINE_FLUSH_RENDER_CACHE |
+ PIPELINE_FLUSH_TEXTURE_CACHE);
+ kgem_clear_dirty(&sna->kgem);
+ }
+ }
+
+ gen3_emit_composite_state(sna, tmp);
+ gen3_align_vertex(sna, tmp);
+
+ sna->render.op = tmp;
+ return TRUE;
+
+cleanup_mask:
+ if (tmp->mask.bo)
+ kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
+cleanup_src:
+ if (tmp->src.bo)
+ kgem_bo_destroy(&sna->kgem, tmp->src.bo);
+cleanup_dst:
+ if (tmp->redirect.real_bo)
+ kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
+ return FALSE;
+}
+
+static void
+gen3_emit_composite_spans_vertex(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ int16_t x, int16_t y,
+ float opacity)
+{
+ gen3_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y);
+ gen3_emit_composite_texcoord(sna, &op->base.src, x, y);
+ OUT_VERTEX(opacity);
+}
+
+static void
+gen3_emit_composite_spans_primitive_zero(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ float *v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 6;
+
+ v[0] = op->base.dst.x + box->x2;
+ v[1] = op->base.dst.y + box->y2;
+
+ v[2] = op->base.dst.x + box->x1;
+ v[3] = v[1];
+
+ v[4] = v[2];
+ v[5] = op->base.dst.x + box->y1;
+}
+
+static void
+gen3_emit_composite_spans_primitive_constant(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ float *v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ v[0] = op->base.dst.x + box->x2;
+ v[1] = op->base.dst.y + box->y2;
+ v[2] = opacity;
+
+ v[3] = op->base.dst.x + box->x1;
+ v[4] = v[1];
+ v[5] = opacity;
+
+ v[6] = v[3];
+ v[7] = op->base.dst.y + box->y1;
+ v[8] = opacity;
+}
+
+static void
+gen3_emit_composite_spans_primitive_identity_source(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ float *v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 15;
+
+ v[0] = op->base.dst.x + box->x2;
+ v[1] = op->base.dst.y + box->y2;
+ v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
+ v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
+ v[4] = opacity;
+
+ v[5] = op->base.dst.x + box->x1;
+ v[6] = v[1];
+ v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
+ v[8] = v[3];
+ v[9] = opacity;
+
+ v[10] = v[5];
+ v[11] = op->base.dst.y + box->y1;
+ v[12] = v[7];
+ v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
+ v[14] = opacity;
+}
+
+static void
+gen3_emit_composite_spans_primitive_affine_source(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ PictTransform *transform = op->base.src.transform;
+ float x, y, *v;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 15;
+
+ v[0] = op->base.dst.x + box->x2;
+ v[6] = v[1] = op->base.dst.y + box->y2;
+ v[10] = v[5] = op->base.dst.x + box->x1;
+ v[11] = op->base.dst.y + box->y1;
+ v[4] = opacity;
+ v[9] = opacity;
+ v[14] = opacity;
+
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x2,
+ (int)op->base.src.offset[1] + box->y2,
+ transform,
+ &x, &y);
+ v[2] = x * op->base.src.scale[0];
+ v[3] = y * op->base.src.scale[1];
+
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
+ (int)op->base.src.offset[1] + box->y2,
+ transform,
+ &x, &y);
+ v[7] = x * op->base.src.scale[0];
+ v[8] = y * op->base.src.scale[1];
+
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
+ (int)op->base.src.offset[1] + box->y1,
+ transform,
+ &x, &y);
+ v[12] = x * op->base.src.scale[0];
+ v[13] = y * op->base.src.scale[1];
+}
+
+static void
+gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ float *v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 15;
+
+ v[0] = op->base.dst.x + box->x2;
+ v[1] = op->base.dst.y + box->y2;
+ v[2] = op->base.src.offset[0] + box->x2;
+ v[3] = op->base.src.offset[1] + box->y2;
+ v[4] = opacity;
+
+ v[5] = op->base.dst.x + box->x1;
+ v[6] = v[1];
+ v[7] = op->base.src.offset[0] + box->x1;
+ v[8] = v[3];
+ v[9] = opacity;
+
+ v[10] = v[5];
+ v[11] = op->base.dst.y + box->y1;
+ v[12] = v[7];
+ v[13] = op->base.src.offset[1] + box->y1;
+ v[14] = opacity;
+}
+
+static void
+gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ PictTransform *transform = op->base.src.transform;
+ float *v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 15;
+
+ v[0] = op->base.dst.x + box->x2;
+ v[1] = op->base.dst.y + box->y2;
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x2,
+ (int)op->base.src.offset[1] + box->y2,
+ transform,
+ &v[2], &v[3]);
+ v[4] = opacity;
+
+ v[5] = op->base.dst.x + box->x1;
+ v[6] = v[1];
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
+ (int)op->base.src.offset[1] + box->y2,
+ transform,
+ &v[7], &v[8]);
+ v[9] = opacity;
+
+ v[10] = v[5];
+ v[11] = op->base.dst.y + box->y1;
+ _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
+ (int)op->base.src.offset[1] + box->y1,
+ transform,
+ &v[12], &v[13]);
+ v[14] = opacity;
+}
+
+static void
+gen3_emit_composite_spans_primitive(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity)
+{
+ gen3_emit_composite_spans_vertex(sna, op,
+ box->x2, box->y2,
+ opacity);
+ gen3_emit_composite_spans_vertex(sna, op,
+ box->x1, box->y2,
+ opacity);
+ gen3_emit_composite_spans_vertex(sna, op,
+ box->x1, box->y1,
+ opacity);
+}
+
+static void
+gen3_render_composite_spans_boxes(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box, int nbox,
+ float opacity)
+{
+ DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
+ __FUNCTION__, nbox,
+ op->base.src.offset[0], op->base.src.offset[1],
+ opacity,
+ op->base.dst.x, op->base.dst.y));
+
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
+ if (nbox_this_time == 0) {
+ gen3_emit_composite_state(sna, &op->base);
+ nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
+ }
+ nbox -= nbox_this_time;
+
+ do {
+ DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1));
+
+ op->prim_emit(sna, op, box++, opacity);
+ } while (--nbox_this_time);
+ } while (nbox);
+}
+
+static void
+gen3_render_composite_spans_done(struct sna *sna,
+ const struct sna_composite_spans_op *op)
+{
+ gen3_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ sna_render_composite_redirect_done(sna, &op->base);
+ if (op->base.src.bo)
+ kgem_bo_destroy(&sna->kgem, op->base.src.bo);
+}
+
+static Bool
+gen3_render_composite_spans(struct sna *sna,
+ uint8_t op,
+ PicturePtr src,
+ PicturePtr dst,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t width, int16_t height,
+ struct sna_composite_spans_op *tmp)
+{
+ DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__,
+ src_x, src_y, dst_x, dst_y, width, height));
+
+#if NO_COMPOSITE_SPANS
+ return FALSE;
+#endif
+
+ if (op >= ARRAY_SIZE(gen3_blend_op)) {
+ DBG(("%s: fallback due to unhandled blend op: %d\n",
+ __FUNCTION__, op));
+ return FALSE;
+ }
+
+ if (!gen3_check_dst_format(dst->format)) {
+ DBG(("%s: fallback due to unhandled dst format: %x\n",
+ __FUNCTION__, dst->format));
+ return FALSE;
+ }
+
+ if (need_tiling(sna, width, height))
+ return FALSE;
+
+ if (!gen3_composite_set_target(sna, &tmp->base, dst)) {
+ DBG(("%s: unable to set render target\n",
+ __FUNCTION__));
+ return FALSE;
+ }
+
+ tmp->base.op = op;
+ tmp->base.rb_reversed = gen3_dst_rb_reversed(tmp->base.dst.format);
+ if (tmp->base.dst.width > 2048 || tmp->base.dst.height > 2048 ||
+ !gen3_check_pitch_3d(tmp->base.dst.bo)) {
+ if (!sna_render_composite_redirect(sna, &tmp->base,
+ dst_x, dst_y, width, height))
+ return FALSE;
+ }
+
+ tmp->base.src.gen3.type = SHADER_TEXTURE;
+ tmp->base.src.is_affine = TRUE;
+ DBG(("%s: preparing source\n", __FUNCTION__));
+ switch (gen3_composite_picture(sna, src, &tmp->base, &tmp->base.src,
+ src_x, src_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ goto cleanup_dst;
+ case 0:
+ tmp->base.src.gen3.type = SHADER_ZERO;
+ break;
+ case 1:
+ gen3_composite_channel_convert(&tmp->base.src);
+ break;
+ }
+ DBG(("%s: source type=%d\n", __FUNCTION__, tmp->base.src.gen3.type));
+
+ if (tmp->base.src.gen3.type != SHADER_ZERO)
+ tmp->base.mask.gen3.type = SHADER_OPACITY;
+
+ tmp->prim_emit = gen3_emit_composite_spans_primitive;
+ switch (tmp->base.src.gen3.type) {
+ case SHADER_NONE:
+ assert(0);
+ case SHADER_ZERO:
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_zero;
+ break;
+ case SHADER_CONSTANT:
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_constant;
+ break;
+ case SHADER_LINEAR:
+ case SHADER_RADIAL:
+ if (tmp->base.src.transform == NULL)
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient;
+ else if (tmp->base.src.is_affine)
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient;
+ break;
+ case SHADER_TEXTURE:
+ if (tmp->base.src.transform == NULL)
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source;
+ else if (tmp->base.src.is_affine)
+ tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source;
+ break;
+ }
+
+ tmp->base.floats_per_vertex = 2;
+ if (tmp->base.src.gen3.type != SHADER_CONSTANT &&
+ tmp->base.src.gen3.type != SHADER_ZERO)
+ tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
+ tmp->base.floats_per_vertex +=
+ tmp->base.mask.gen3.type == SHADER_OPACITY;
+
+ tmp->boxes = gen3_render_composite_spans_boxes;
+ tmp->done = gen3_render_composite_spans_done;
+
+ if (!kgem_check_bo(&sna->kgem, tmp->base.dst.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->base.src.bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(tmp->base.src.bo)) {
+ if (tmp->base.src.bo == tmp->base.dst.bo) {
+ kgem_emit_flush(&sna->kgem);
+ } else {
+ OUT_BATCH(_3DSTATE_MODES_5_CMD |
+ PIPELINE_FLUSH_RENDER_CACHE |
+ PIPELINE_FLUSH_TEXTURE_CACHE);
+ kgem_clear_dirty(&sna->kgem);
+ }
+ }
+
+ gen3_emit_composite_state(sna, &tmp->base);
+ gen3_align_vertex(sna, &tmp->base);
+ return TRUE;
+
+cleanup_dst:
+ if (tmp->base.redirect.real_bo)
+ kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
+ return FALSE;
+}
+
+static void
+gen3_emit_video_state(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ PixmapPtr pixmap,
+ struct kgem_bo *dst_bo,
+ int width, int height)
+{
+ uint32_t shader_offset;
+ uint32_t ms3, s5;
+
+ /* draw rect -- just clipping */
+ OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
+ OUT_BATCH(DRAW_DITHER_OFS_X(pixmap->drawable.x & 3) |
+ DRAW_DITHER_OFS_Y(pixmap->drawable.y & 3));
+ OUT_BATCH(0x00000000); /* ymin, xmin */
+ /* ymax, xmax */
+ OUT_BATCH((width - 1) | (height - 1) << 16);
+ OUT_BATCH(0x00000000); /* yorigin, xorigin */
+
+ OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+ I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(5) | I1_LOAD_S(6) |
+ 3);
+ OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
+ OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
+ S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
+ S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
+ s5 = 0x0;
+ if (pixmap->drawable.depth < 24)
+ s5 |= S5_COLOR_DITHER_ENABLE;
+ OUT_BATCH(s5);
+ OUT_BATCH((2 << S6_DEPTH_TEST_FUNC_SHIFT) |
+ (2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
+ (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
+ S6_COLOR_WRITE_ENABLE | (2 << S6_TRISTRIP_PV_SHIFT));
+
+ OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD);
+ OUT_BATCH(0x00000000);
+
+ OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
+ OUT_BATCH(gen3_get_dst_format(sna_format_for_depth(pixmap->drawable.depth)));
+
+ /* front buffer, pitch, offset */
+ OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
+ OUT_BATCH(BUF_3D_ID_COLOR_BACK |
+ gen3_buf_tiling(dst_bo->tiling) |
+ dst_bo->pitch);
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER,
+ 0));
+
+ if (!is_planar_fourcc(frame->id)) {
+ OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
+ OUT_BATCH(0x0000001); /* constant 0 */
+ /* constant 0: brightness/contrast */
+ OUT_BATCH_F(video->brightness / 128.0);
+ OUT_BATCH_F(video->contrast / 255.0);
+ OUT_BATCH_F(0.0);
+ OUT_BATCH_F(0.0);
+
+ OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
+ OUT_BATCH(0x00000001);
+ OUT_BATCH(SS2_COLORSPACE_CONVERSION |
+ (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
+ (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
+ OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
+ SS3_TCX_ADDR_MODE_SHIFT) |
+ (TEXCOORDMODE_CLAMP_EDGE <<
+ SS3_TCY_ADDR_MODE_SHIFT) |
+ (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
+ SS3_NORMALIZED_COORDS);
+ OUT_BATCH(0x00000000);
+
+ OUT_BATCH(_3DSTATE_MAP_STATE | 3);
+ OUT_BATCH(0x00000001); /* texture map #1 */
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ frame->bo,
+ I915_GEM_DOMAIN_SAMPLER << 16,
+ frame->YBufOffset));
+
+ ms3 = MAPSURF_422;
+ switch (frame->id) {
+ case FOURCC_YUY2:
+ ms3 |= MT_422_YCRCB_NORMAL;
+ break;
+ case FOURCC_UYVY:
+ ms3 |= MT_422_YCRCB_SWAPY;
+ break;
+ }
+ ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
+ ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
+ OUT_BATCH(ms3);
+ OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
+
+ shader_offset = sna->kgem.nbatch++;
+
+ gen3_fs_dcl(FS_S0);
+ gen3_fs_dcl(FS_T0);
+ gen3_fs_texld(FS_OC, FS_S0, FS_T0);
+ if (video->brightness != 0) {
+ gen3_fs_add(FS_OC,
+ gen3_fs_operand_reg(FS_OC),
+ gen3_fs_operand(FS_C0, X, X, X, ZERO));
+ }
+ } else {
+ /* For the planar formats, we set up three samplers --
+ * one for each plane, in a Y8 format. Because I
+ * couldn't get the special PLANAR_TO_PACKED
+ * shader setup to work, I did the manual pixel shader:
+ *
+ * y' = y - .0625
+ * u' = u - .5
+ * v' = v - .5;
+ *
+ * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
+ * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
+ * b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
+ *
+ * register assignment:
+ * r0 = (y',u',v',0)
+ * r1 = (y,y,y,y)
+ * r2 = (u,u,u,u)
+ * r3 = (v,v,v,v)
+ * OC = (r,g,b,1)
+ */
+ OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
+ OUT_BATCH(0x000001f); /* constants 0-4 */
+ /* constant 0: normalization offsets */
+ OUT_BATCH_F(-0.0625);
+ OUT_BATCH_F(-0.5);
+ OUT_BATCH_F(-0.5);
+ OUT_BATCH_F(0.0);
+ /* constant 1: r coefficients */
+ OUT_BATCH_F(1.1643);
+ OUT_BATCH_F(0.0);
+ OUT_BATCH_F(1.5958);
+ OUT_BATCH_F(0.0);
+ /* constant 2: g coefficients */
+ OUT_BATCH_F(1.1643);
+ OUT_BATCH_F(-0.39173);
+ OUT_BATCH_F(-0.81290);
+ OUT_BATCH_F(0.0);
+ /* constant 3: b coefficients */
+ OUT_BATCH_F(1.1643);
+ OUT_BATCH_F(2.017);
+ OUT_BATCH_F(0.0);
+ OUT_BATCH_F(0.0);
+ /* constant 4: brightness/contrast */
+ OUT_BATCH_F(video->brightness / 128.0);
+ OUT_BATCH_F(video->contrast / 255.0);
+ OUT_BATCH_F(0.0);
+ OUT_BATCH_F(0.0);
+
+ OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
+ OUT_BATCH(0x00000007);
+ /* sampler 0 */
+ OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
+ (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
+ OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
+ SS3_TCX_ADDR_MODE_SHIFT) |
+ (TEXCOORDMODE_CLAMP_EDGE <<
+ SS3_TCY_ADDR_MODE_SHIFT) |
+ (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
+ SS3_NORMALIZED_COORDS);
+ OUT_BATCH(0x00000000);
+ /* sampler 1 */
+ OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
+ (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
+ OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
+ SS3_TCX_ADDR_MODE_SHIFT) |
+ (TEXCOORDMODE_CLAMP_EDGE <<
+ SS3_TCY_ADDR_MODE_SHIFT) |
+ (1 << SS3_TEXTUREMAP_INDEX_SHIFT) |
+ SS3_NORMALIZED_COORDS);
+ OUT_BATCH(0x00000000);
+ /* sampler 2 */
+ OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
+ (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
+ OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
+ SS3_TCX_ADDR_MODE_SHIFT) |
+ (TEXCOORDMODE_CLAMP_EDGE <<
+ SS3_TCY_ADDR_MODE_SHIFT) |
+ (2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
+ SS3_NORMALIZED_COORDS);
+ OUT_BATCH(0x00000000);
+
+ OUT_BATCH(_3DSTATE_MAP_STATE | 9);
+ OUT_BATCH(0x00000007);
+
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ frame->bo,
+ I915_GEM_DOMAIN_SAMPLER << 16,
+ frame->YBufOffset));
+
+ ms3 = MAPSURF_8BIT | MT_8BIT_I8;
+ ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
+ ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
+ OUT_BATCH(ms3);
+ /* check to see if Y has special pitch than normal
+ * double u/v pitch, e.g i915 XvMC hw requires at
+ * least 1K alignment, so Y pitch might
+ * be same as U/V's.*/
+ if (frame->pitch[1])
+ OUT_BATCH(((frame->pitch[1] / 4) - 1) << MS4_PITCH_SHIFT);
+ else
+ OUT_BATCH(((frame->pitch[0] * 2 / 4) - 1) << MS4_PITCH_SHIFT);
+
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ frame->bo,
+ I915_GEM_DOMAIN_SAMPLER << 16,
+ frame->UBufOffset));
+
+ ms3 = MAPSURF_8BIT | MT_8BIT_I8;
+ ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
+ ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
+ OUT_BATCH(ms3);
+ OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
+
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+ frame->bo,
+ I915_GEM_DOMAIN_SAMPLER << 16,
+ frame->VBufOffset));
+
+ ms3 = MAPSURF_8BIT | MT_8BIT_I8;
+ ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
+ ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
+ OUT_BATCH(ms3);
+ OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
+
+ shader_offset = sna->kgem.nbatch++;
+
+ /* Declare samplers */
+ gen3_fs_dcl(FS_S0); /* Y */
+ gen3_fs_dcl(FS_S1); /* U */
+ gen3_fs_dcl(FS_S2); /* V */
+ gen3_fs_dcl(FS_T0); /* normalized coords */
+
+ /* Load samplers to temporaries. */
+ gen3_fs_texld(FS_R1, FS_S0, FS_T0);
+ gen3_fs_texld(FS_R2, FS_S1, FS_T0);
+ gen3_fs_texld(FS_R3, FS_S2, FS_T0);
+
+ /* Move the sampled YUV data in R[123] to the first
+ * 3 channels of R0.
+ */
+ gen3_fs_mov_masked(FS_R0, MASK_X,
+ gen3_fs_operand_reg(FS_R1));
+ gen3_fs_mov_masked(FS_R0, MASK_Y,
+ gen3_fs_operand_reg(FS_R2));
+ gen3_fs_mov_masked(FS_R0, MASK_Z,
+ gen3_fs_operand_reg(FS_R3));
+
+ /* Normalize the YUV data */
+ gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0),
+ gen3_fs_operand_reg(FS_C0));
+ /* dot-product the YUV data in R0 by the vectors of
+ * coefficients for calculating R, G, and B, storing
+ * the results in the R, G, or B channels of the output
+ * color. The OC results are implicitly clamped
+ * at the end of the program.
+ */
+ gen3_fs_dp3(FS_OC, MASK_X,
+ gen3_fs_operand_reg(FS_R0),
+ gen3_fs_operand_reg(FS_C1));
+ gen3_fs_dp3(FS_OC, MASK_Y,
+ gen3_fs_operand_reg(FS_R0),
+ gen3_fs_operand_reg(FS_C2));
+ gen3_fs_dp3(FS_OC, MASK_Z,
+ gen3_fs_operand_reg(FS_R0),
+ gen3_fs_operand_reg(FS_C3));
+ /* Set alpha of the output to 1.0, by wiring W to 1
+ * and not actually using the source.
+ */
+ gen3_fs_mov_masked(FS_OC, MASK_W,
+ gen3_fs_operand_one());
+
+ if (video->brightness != 0) {
+ gen3_fs_add(FS_OC,
+ gen3_fs_operand_reg(FS_OC),
+ gen3_fs_operand(FS_C4, X, X, X, ZERO));
+ }
+ }
+
+ sna->kgem.batch[shader_offset] =
+ _3DSTATE_PIXEL_SHADER_PROGRAM |
+ (sna->kgem.nbatch - shader_offset - 2);
+
+ /* video is the last operation in the batch, so state gets reset
+ * afterwards automatically
+ * gen3_reset();
+ */
+}
+
+static void
+gen3_video_get_batch(struct sna *sna)
+{
+ if (!kgem_check_batch(&sna->kgem, 120)) {
+ DBG(("%s: flushing batch: nbatch %d < %d\n",
+ __FUNCTION__,
+ batch_space(sna), 120));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->kgem.nreloc + 4 > KGEM_RELOC_SIZE(&sna->kgem)) {
+ DBG(("%s: flushing batch: reloc %d >= %d\n",
+ __FUNCTION__,
+ sna->kgem.nreloc + 4,
+ (int)KGEM_RELOC_SIZE(&sna->kgem)));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->kgem.nexec + 2 > KGEM_EXEC_SIZE(&sna->kgem)) {
+ DBG(("%s: flushing batch: exec %d >= %d\n",
+ __FUNCTION__,
+ sna->kgem.nexec + 2,
+ (int)KGEM_EXEC_SIZE(&sna->kgem)));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->render_state.gen3.need_invariant)
+ gen3_emit_invariant(sna);
+}
+
+static int
+gen3_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex)
+{
+ int size = floats_per_vertex * 3;
+ int rem = batch_space(sna) - 1;
+
+ if (size * want > rem)
+ want = rem / size;
+
+ return want;
+}
+
+static Bool
+gen3_render_video(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ RegionPtr dstRegion,
+ short src_w, short src_h,
+ short drw_w, short drw_h,
+ PixmapPtr pixmap)
+{
+ BoxPtr pbox = REGION_RECTS(dstRegion);
+ int nbox = REGION_NUM_RECTS(dstRegion);
+ int dxo = dstRegion->extents.x1;
+ int dyo = dstRegion->extents.y1;
+ int width = dstRegion->extents.x2 - dxo;
+ int height = dstRegion->extents.y2 - dyo;
+ float src_scale_x, src_scale_y;
+ int pix_xoff, pix_yoff;
+ struct kgem_bo *dst_bo;
+ int copy = 0;
+
+ DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, src_w, src_h, drw_w, drw_h));
+
+ if (pixmap->drawable.width > 2048 ||
+ pixmap->drawable.height > 2048 ||
+ !gen3_check_pitch_3d(sna_pixmap_get_bo(pixmap))) {
+ int bpp = pixmap->drawable.bitsPerPixel;
+
+ dst_bo = kgem_create_2d(&sna->kgem,
+ width, height, bpp,
+ kgem_choose_tiling(&sna->kgem,
+ I915_TILING_X,
+ width, height, bpp),
+ 0);
+ if (!dst_bo)
+ return FALSE;
+
+ pix_xoff = -dxo;
+ pix_yoff = -dyo;
+ copy = 1;
+ } else {
+ dst_bo = sna_pixmap_get_bo(pixmap);
+
+ width = pixmap->drawable.width;
+ height = pixmap->drawable.height;
+
+ /* Set up the offset for translating from the given region
+ * (in screen coordinates) to the backing pixmap.
+ */
+#ifdef COMPOSITE
+ pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
+ pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
+#else
+ pix_xoff = 0;
+ pix_yoff = 0;
+#endif
+ }
+
+ src_scale_x = ((float)src_w / frame->width) / drw_w;
+ src_scale_y = ((float)src_h / frame->height) / drw_h;
+
+ DBG(("%s: src offset=(%d, %d), scale=(%f, %f), dst offset=(%d, %d)\n",
+ __FUNCTION__,
+ dxo, dyo, src_scale_x, src_scale_y, pix_xoff, pix_yoff));
+
+ gen3_video_get_batch(sna);
+ gen3_emit_video_state(sna, video, frame, pixmap,
+ dst_bo, width, height);
+ do {
+ int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
+ if (nbox_this_time == 0) {
+ gen3_video_get_batch(sna);
+ gen3_emit_video_state(sna, video, frame, pixmap,
+ dst_bo, width, height);
+ nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
+ }
+ nbox -= nbox_this_time;
+
+ OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1));
+ while (nbox_this_time--) {
+ int box_x1 = pbox->x1;
+ int box_y1 = pbox->y1;
+ int box_x2 = pbox->x2;
+ int box_y2 = pbox->y2;
+
+ pbox++;
+
+ DBG(("%s: box (%d, %d), (%d, %d)\n",
+ __FUNCTION__, box_x1, box_y1, box_x2, box_y2));
+
+ /* bottom right */
+ OUT_BATCH_F(box_x2 + pix_xoff);
+ OUT_BATCH_F(box_y2 + pix_yoff);
+ OUT_BATCH_F((box_x2 - dxo) * src_scale_x);
+ OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
+
+ /* bottom left */
+ OUT_BATCH_F(box_x1 + pix_xoff);
+ OUT_BATCH_F(box_y2 + pix_yoff);
+ OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
+ OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
+
+ /* top left */
+ OUT_BATCH_F(box_x1 + pix_xoff);
+ OUT_BATCH_F(box_y1 + pix_yoff);
+ OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
+ OUT_BATCH_F((box_y1 - dyo) * src_scale_y);
+ }
+ } while (nbox);
+
+ if (copy) {
+#ifdef COMPOSITE
+ pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
+ pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
+#else
+ pix_xoff = 0;
+ pix_yoff = 0;
+#endif
+ sna_blt_copy_boxes(sna, GXcopy,
+ dst_bo, -dxo, -dyo,
+ sna_pixmap_get_bo(pixmap), pix_xoff, pix_yoff,
+ pixmap->drawable.bitsPerPixel,
+ REGION_RECTS(dstRegion),
+ REGION_NUM_RECTS(dstRegion));
+
+ kgem_bo_destroy(&sna->kgem, dst_bo);
+ }
+
+ return TRUE;
+}
+
+static void
+gen3_render_copy_setup_source(struct sna *sna,
+ struct sna_composite_channel *channel,
+ PixmapPtr pixmap,
+ struct kgem_bo *bo)
+{
+ channel->gen3.type = SHADER_TEXTURE;
+ channel->filter = gen3_filter(PictFilterNearest);
+ channel->repeat = gen3_texture_repeat(RepeatNone);
+ channel->width = pixmap->drawable.width;
+ channel->height = pixmap->drawable.height;
+ channel->scale[0] = 1./pixmap->drawable.width;
+ channel->scale[1] = 1./pixmap->drawable.height;
+ channel->offset[0] = 0;
+ channel->offset[1] = 0;
+ gen3_composite_channel_set_format(channel,
+ sna_format_for_depth(pixmap->drawable.depth));
+ channel->bo = bo;
+ channel->is_affine = 1;
+}
+
+static Bool
+gen3_render_copy_boxes(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+ PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int n)
+{
+ struct sna_composite_op tmp;
+
+#if NO_COPY_BOXES
+ return sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n);
+#endif
+
+ DBG(("%s (%d, %d)->(%d, %d) x %d\n",
+ __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
+
+ if (sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) ||
+ src_bo == dst_bo || /* XXX handle overlap using 3D ? */
+ src_bo->pitch > 8192 ||
+ src->drawable.width > 2048 ||
+ src->drawable.height > 2048 ||
+ dst_bo->pitch > 8192 ||
+ dst->drawable.width > 2048 ||
+ dst->drawable.height > 2048)
+ return sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n);
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, src_bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(src_bo))
+ kgem_emit_flush(&sna->kgem);
+
+ memset(&tmp, 0, sizeof(tmp));
+ tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
+
+ tmp.dst.pixmap = dst;
+ tmp.dst.width = dst->drawable.width;
+ tmp.dst.height = dst->drawable.height;
+ tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
+ tmp.dst.bo = dst_bo;
+
+ gen3_render_copy_setup_source(sna, &tmp.src, src, src_bo);
+
+ tmp.floats_per_vertex = 4;
+ tmp.mask.gen3.type = SHADER_NONE;
+
+ gen3_emit_composite_state(sna, &tmp);
+ gen3_align_vertex(sna, &tmp);
+
+ do {
+ int n_this_time;
+
+ n_this_time = gen3_get_rectangles(sna, &tmp, n);
+ if (n_this_time == 0) {
+ gen3_emit_composite_state(sna, &tmp);
+ n_this_time = gen3_get_rectangles(sna, &tmp, n);
+ }
+ n -= n_this_time;
+
+ do {
+ DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n",
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ box->x2 - box->x1, box->y2 - box->y1));
+ OUT_VERTEX(box->x2 + dst_dx);
+ OUT_VERTEX(box->y2 + dst_dy);
+ OUT_VERTEX((box->x2 + src_dx) * tmp.src.scale[0]);
+ OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
+
+ OUT_VERTEX(box->x1 + dst_dx);
+ OUT_VERTEX(box->y2 + dst_dy);
+ OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
+ OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
+
+ OUT_VERTEX(box->x1 + dst_dx);
+ OUT_VERTEX(box->y1 + dst_dy);
+ OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
+ OUT_VERTEX((box->y1 + src_dy) * tmp.src.scale[1]);
+
+ box++;
+ } while (--n_this_time);
+ } while (n);
+
+ gen3_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ return TRUE;
+}
+
+static void
+gen3_render_copy_blt(struct sna *sna,
+ const struct sna_copy_op *op,
+ int16_t sx, int16_t sy,
+ int16_t w, int16_t h,
+ int16_t dx, int16_t dy)
+{
+ if (!gen3_get_rectangles(sna, &op->base, 1)) {
+ gen3_emit_composite_state(sna, &op->base);
+ gen3_get_rectangles(sna, &op->base, 1);
+ }
+
+ OUT_VERTEX(dx+w);
+ OUT_VERTEX(dy+h);
+ OUT_VERTEX((sx+w)*op->base.src.scale[0]);
+ OUT_VERTEX((sy+h)*op->base.src.scale[1]);
+
+ OUT_VERTEX(dx);
+ OUT_VERTEX(dy+h);
+ OUT_VERTEX(sx*op->base.src.scale[0]);
+ OUT_VERTEX((sy+h)*op->base.src.scale[1]);
+
+ OUT_VERTEX(dx);
+ OUT_VERTEX(dy);
+ OUT_VERTEX(sx*op->base.src.scale[0]);
+ OUT_VERTEX(sy*op->base.src.scale[1]);
+}
+
+static void
+gen3_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
+{
+ gen3_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+}
+
+static Bool
+gen3_render_copy(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ struct sna_copy_op *tmp)
+{
+#if NO_COPY
+ return sna_blt_copy(sna, alu,
+ src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ op);
+#endif
+
+ /* Prefer to use the BLT */
+ if (sna->kgem.mode == KGEM_BLT &&
+ src->drawable.bitsPerPixel == dst->drawable.bitsPerPixel &&
+ sna_blt_copy(sna, alu,
+ src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ tmp))
+ return TRUE;
+
+ /* Must use the BLT if we can't RENDER... */
+ if (!(alu == GXcopy || alu == GXclear) ||
+ src->drawable.width > 2048 || src->drawable.height > 2048 ||
+ dst->drawable.width > 2048 || dst->drawable.height > 2048 ||
+ src_bo->pitch > 8192 || dst_bo->pitch > 8192) {
+ if (src->drawable.bitsPerPixel != dst->drawable.bitsPerPixel)
+ return FALSE;
+
+ return sna_blt_copy(sna, alu, src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ tmp);
+ }
+
+ tmp->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
+
+ tmp->base.dst.pixmap = dst;
+ tmp->base.dst.width = dst->drawable.width;
+ tmp->base.dst.height = dst->drawable.height;
+ tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
+ tmp->base.dst.bo = dst_bo;
+
+ gen3_render_copy_setup_source(sna, &tmp->base.src, src, src_bo);
+
+ tmp->base.floats_per_vertex = 4;
+ tmp->base.mask.gen3.type = SHADER_NONE;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, src_bo))
+ kgem_submit(&sna->kgem);
+
+ tmp->blt = gen3_render_copy_blt;
+ tmp->done = gen3_render_copy_done;
+
+ gen3_emit_composite_state(sna, &tmp->base);
+ gen3_align_vertex(sna, &tmp->base);
+ return TRUE;
+}
+
+static Bool
+gen3_render_fill_boxes_try_blt(struct sna *sna,
+ CARD8 op, PictFormat format,
+ const xRenderColor *color,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ const BoxRec *box, int n)
+{
+ uint8_t alu = GXcopy;
+ uint32_t pixel;
+
+ if (!sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ format))
+ return FALSE;
+
+ if (op == PictOpClear) {
+ alu = GXclear;
+ pixel = 0;
+ op = PictOpSrc;
+ }
+
+ if (op == PictOpOver) {
+ if ((pixel & 0xff000000) == 0xff000000)
+ op = PictOpSrc;
+ }
+
+ if (op != PictOpSrc)
+ return FALSE;
+
+ return sna_blt_fill_boxes(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ pixel, box, n);
+}
+
+static Bool
+gen3_render_fill_boxes(struct sna *sna,
+ CARD8 op,
+ PictFormat format,
+ const xRenderColor *color,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ const BoxRec *box, int n)
+{
+ struct sna_composite_op tmp;
+ uint32_t pixel;
+
+#if NO_FILL_BOXES
+ return gen3_render_fill_boxes_try_blt(sna, op, format, color,
+ dst, dst_bo,
+ box, n);
+#endif
+
+ DBG(("%s (op=%d, color=(%04x,%04x,%04x, %04x))\n",
+ __FUNCTION__, op,
+ color->red, color->green, color->blue, color->alpha));
+
+ if (op >= ARRAY_SIZE(gen3_blend_op)) {
+ DBG(("%s: fallback due to unhandled blend op: %d\n",
+ __FUNCTION__, op));
+ return FALSE;
+ }
+
+ if (dst->drawable.width > 2048 ||
+ dst->drawable.height > 2048 ||
+ dst_bo->pitch > 8192)
+ return gen3_render_fill_boxes_try_blt(sna, op, format, color,
+ dst, dst_bo,
+ box, n);
+
+ if (gen3_render_fill_boxes_try_blt(sna, op, format, color,
+ dst, dst_bo,
+ box, n))
+ return TRUE;
+
+ if (!sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ PICT_a8r8g8b8))
+ return FALSE;
+
+ memset(&tmp, 0, sizeof(tmp));
+ tmp.op = op;
+ tmp.dst.pixmap = dst;
+ tmp.dst.width = dst->drawable.width;
+ tmp.dst.height = dst->drawable.height;
+ tmp.dst.format = format;
+ tmp.dst.bo = dst_bo;
+ tmp.floats_per_vertex = 2;
+
+ tmp.src.gen3.type = SHADER_CONSTANT;
+ tmp.src.gen3.mode = pixel;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+
+ gen3_emit_composite_state(sna, &tmp);
+ gen3_align_vertex(sna, &tmp);
+
+ do {
+ int n_this_time = gen3_get_rectangles(sna, &tmp, n);
+ if (n_this_time == 0) {
+ gen3_emit_composite_state(sna, &tmp);
+ n_this_time = gen3_get_rectangles(sna, &tmp, n);
+ }
+ n -= n_this_time;
+
+ do {
+ DBG((" (%d, %d), (%d, %d)\n",
+ box->x1, box->y1, box->x2, box->y2));
+ OUT_VERTEX(box->x2);
+ OUT_VERTEX(box->y2);
+ OUT_VERTEX(box->x1);
+ OUT_VERTEX(box->y2);
+ OUT_VERTEX(box->x1);
+ OUT_VERTEX(box->y1);
+ box++;
+ } while (--n_this_time);
+ } while (n);
+
+ gen3_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ return TRUE;
+}
+
+static void
+gen3_render_fill_blt(struct sna *sna,
+ const struct sna_fill_op *op,
+ int16_t x, int16_t y, int16_t w, int16_t h)
+{
+ if (!gen3_get_rectangles(sna, &op->base, 1)) {
+ gen3_emit_composite_state(sna, &op->base);
+ gen3_get_rectangles(sna, &op->base, 1);
+ }
+
+ OUT_VERTEX(x+w);
+ OUT_VERTEX(y+h);
+ OUT_VERTEX(x);
+ OUT_VERTEX(y+h);
+ OUT_VERTEX(x);
+ OUT_VERTEX(y);
+}
+
+static void
+gen3_render_fill_done(struct sna *sna, const struct sna_fill_op *op)
+{
+ gen3_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+}
+
+static Bool
+gen3_render_fill(struct sna *sna, uint8_t alu,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ uint32_t color,
+ struct sna_fill_op *tmp)
+{
+#if NO_FILL
+ return sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ op);
+#endif
+
+ /* Prefer to use the BLT if already engaged */
+ if (sna->kgem.mode == KGEM_BLT &&
+ sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ tmp))
+ return TRUE;
+
+ /* Must use the BLT if we can't RENDER... */
+ if (!(alu == GXcopy || alu == GXclear) ||
+ dst->drawable.width > 2048 || dst->drawable.height > 2048 ||
+ dst_bo->pitch > 8192)
+ return sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ tmp);
+
+ if (alu == GXclear)
+ color = 0;
+
+ tmp->base.op = color == 0 ? PictOpClear : PictOpSrc;
+ tmp->base.dst.pixmap = dst;
+ tmp->base.dst.width = dst->drawable.width;
+ tmp->base.dst.height = dst->drawable.height;
+ tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
+ tmp->base.dst.bo = dst_bo;
+ tmp->base.floats_per_vertex = 2;
+
+ tmp->base.src.gen3.type = SHADER_CONSTANT;
+ tmp->base.src.gen3.mode =
+ sna_rgba_for_color(color, dst->drawable.depth);
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+
+ tmp->blt = gen3_render_fill_blt;
+ tmp->done = gen3_render_fill_done;
+
+ gen3_emit_composite_state(sna, &tmp->base);
+ gen3_align_vertex(sna, &tmp->base);
+ return TRUE;
+}
+
+static void gen3_render_flush(struct sna *sna)
+{
+ gen3_vertex_finish(sna, TRUE);
+}
+
+static void
+gen3_render_context_switch(struct sna *sna,
+ int new_mode)
+{
+}
+
+static void
+gen3_render_fini(struct sna *sna)
+{
+}
+
+Bool gen3_render_init(struct sna *sna)
+{
+ struct sna_render *render = &sna->render;
+
+ gen3_render_reset(sna);
+
+ render->composite = gen3_render_composite;
+ render->composite_spans = gen3_render_composite_spans;
+
+ render->video = gen3_render_video;
+
+ render->copy_boxes = gen3_render_copy_boxes;
+ render->copy = gen3_render_copy;
+
+ render->fill_boxes = gen3_render_fill_boxes;
+ render->fill = gen3_render_fill;
+
+ render->reset = gen3_render_reset;
+ render->flush = gen3_render_flush;
+ render->context_switch = gen3_render_context_switch;
+ render->fini = gen3_render_fini;
+
+ render->max_3d_size = 2048;
+ return TRUE;
+}
diff --git a/src/sna/gen3_render.h b/src/sna/gen3_render.h
new file mode 100644
index 00000000..3272d5cb
--- /dev/null
+++ b/src/sna/gen3_render.h
@@ -0,0 +1,1479 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef _I915_REG_H_
+#define _I915_REG_H_
+
+#define CMD_3D (3 << 29)
+
+#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value)
+
+#define PRIM3D (CMD_3D | (0x1f<<24))
+#define PRIM3D_INDIRECT_SEQUENTIAL ((1<<23) | (0<<17))
+#define PRIM3D_TRILIST (PRIM3D | (0x0<<18))
+#define PRIM3D_TRISTRIP (PRIM3D | (0x1<<18))
+#define PRIM3D_TRISTRIP_RVRSE (PRIM3D | (0x2<<18))
+#define PRIM3D_TRIFAN (PRIM3D | (0x3<<18))
+#define PRIM3D_POLY (PRIM3D | (0x4<<18))
+#define PRIM3D_LINELIST (PRIM3D | (0x5<<18))
+#define PRIM3D_LINESTRIP (PRIM3D | (0x6<<18))
+#define PRIM3D_RECTLIST (PRIM3D | (0x7<<18))
+#define PRIM3D_POINTLIST (PRIM3D | (0x8<<18))
+#define PRIM3D_DIB (PRIM3D | (0x9<<18))
+#define PRIM3D_CLEAR_RECT (PRIM3D | (0xa<<18))
+#define PRIM3D_ZONE_INIT (PRIM3D | (0xd<<18))
+#define PRIM3D_MASK (0x1f<<18)
+
+
+/* p137 */
+#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24))
+#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16)
+#define AA_LINE_ECAAR_WIDTH_0_5 0
+#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14)
+#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14)
+#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14)
+#define AA_LINE_REGION_WIDTH_ENABLE (1<<8)
+#define AA_LINE_REGION_WIDTH_0_5 0
+#define AA_LINE_REGION_WIDTH_1_0 (1<<6)
+#define AA_LINE_REGION_WIDTH_2_0 (2<<6)
+#define AA_LINE_REGION_WIDTH_4_0 (3<<6)
+
+/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/
+#define _3DSTATE_BACKFACE_STENCIL_OPS (CMD_3D | (0x8<<24))
+#define BFO_ENABLE_STENCIL_REF (1<<23)
+#define BFO_STENCIL_REF_SHIFT 15
+#define BFO_STENCIL_REF_MASK (0xff<<15)
+#define BFO_ENABLE_STENCIL_FUNCS (1<<14)
+#define BFO_STENCIL_TEST_SHIFT 11
+#define BFO_STENCIL_TEST_MASK (0x7<<11)
+#define BFO_STENCIL_FAIL_SHIFT 8
+#define BFO_STENCIL_FAIL_MASK (0x7<<8)
+#define BFO_STENCIL_PASS_Z_FAIL_SHIFT 5
+#define BFO_STENCIL_PASS_Z_FAIL_MASK (0x7<<5)
+#define BFO_STENCIL_PASS_Z_PASS_SHIFT 2
+#define BFO_STENCIL_PASS_Z_PASS_MASK (0x7<<2)
+#define BFO_ENABLE_STENCIL_TWO_SIDE (1<<1)
+#define BFO_STENCIL_TWO_SIDE (1<<0)
+
+/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */
+#define _3DSTATE_BACKFACE_STENCIL_MASKS (CMD_3D | (0x9<<24))
+#define BFM_ENABLE_STENCIL_TEST_MASK (1<<17)
+#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16)
+#define BFM_STENCIL_TEST_MASK_SHIFT 8
+#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8)
+#define BFM_STENCIL_WRITE_MASK_SHIFT 0
+#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0)
+
+/* 3DSTATE_BIN_CONTROL p141 */
+
+/* p143 */
+#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
+/* Dword 1 */
+#define BUF_3D_ID_COLOR_BACK (0x3<<24)
+#define BUF_3D_ID_DEPTH (0x7<<24)
+#define BUF_3D_USE_FENCE (1<<23)
+#define BUF_3D_TILED_SURFACE (1<<22)
+#define BUF_3D_TILE_WALK_X 0
+#define BUF_3D_TILE_WALK_Y (1<<21)
+/* Dword 2 */
+#define BUF_3D_ADDR(x) ((x) & ~0x3)
+
+/* 3DSTATE_CHROMA_KEY */
+
+/* 3DSTATE_CLEAR_PARAMETERS, p150 */
+#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5)
+/* Dword 1 */
+#define CLEARPARAM_CLEAR_RECT (1 << 16)
+#define CLEARPARAM_ZONE_INIT (0 << 16)
+#define CLEARPARAM_WRITE_COLOR (1 << 2)
+#define CLEARPARAM_WRITE_DEPTH (1 << 1)
+#define CLEARPARAM_WRITE_STENCIL (1 << 0)
+
+/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */
+#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16))
+
+/* 3DSTATE_COORD_SET_BINDINGS, p154 */
+#define _3DSTATE_COORD_SET_BINDINGS (CMD_3D | (0x16<<24))
+#define CSB_TCB(iunit, eunit) ((eunit)<<(iunit*3))
+
+/* p156 */
+#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16))
+
+/* p157 */
+#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16))
+
+/* p158 */
+#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16))
+
+/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */
+#define _3DSTATE_DEPTH_OFFSET_SCALE (CMD_3D | (0x1d<<24) | (0x97<<16))
+/* scale in dword 1 */
+
+/* The depth subrectangle is not supported, but must be disabled. */
+/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */
+#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | (1 << 1) | (0 << 0))
+
+/* p161 */
+#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16))
+/* Dword 1 */
+#define TEX_DEFAULT_COLOR_OGL (0<<30)
+#define TEX_DEFAULT_COLOR_D3D (1<<30)
+#define ZR_EARLY_DEPTH (1<<29)
+#define LOD_PRECLAMP_OGL (1<<28)
+#define LOD_PRECLAMP_D3D (0<<28)
+#define DITHER_FULL_ALWAYS (0<<26)
+#define DITHER_FULL_ON_FB_BLEND (1<<26)
+#define DITHER_CLAMPED_ALWAYS (2<<26)
+#define LINEAR_GAMMA_BLEND_32BPP (1<<25)
+#define DEBUG_DISABLE_ENH_DITHER (1<<24)
+#define DSTORG_HORT_BIAS(x) ((x)<<20)
+#define DSTORG_VERT_BIAS(x) ((x)<<16)
+#define COLOR_4_2_2_CHNL_WRT_ALL 0
+#define COLOR_4_2_2_CHNL_WRT_Y (1<<12)
+#define COLOR_4_2_2_CHNL_WRT_CR (2<<12)
+#define COLOR_4_2_2_CHNL_WRT_CB (3<<12)
+#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12)
+#define COLR_BUF_8BIT 0
+#define COLR_BUF_RGB555 (1<<8)
+#define COLR_BUF_RGB565 (2<<8)
+#define COLR_BUF_ARGB8888 (3<<8)
+#define COLR_BUF_ARGB4444 (8<<8)
+#define COLR_BUF_ARGB1555 (9<<8)
+#define COLR_BUF_ARGB2AAA (0xa<<8)
+#define DEPTH_IS_Z 0
+#define DEPTH_IS_W (1<<6)
+#define DEPTH_FRMT_16_FIXED 0
+#define DEPTH_FRMT_16_FLOAT (1<<2)
+#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2)
+#define DEPTH_FRMT_24_FLOAT_8_OTHER (3<<2)
+#define VERT_LINE_STRIDE_1 (1<<1)
+#define VERT_LINE_STRIDE_0 0
+#define VERT_LINE_STRIDE_OFS_1 1
+#define VERT_LINE_STRIDE_OFS_0 0
+
+/* p166 */
+#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3)
+/* Dword 1 */
+#define DRAW_RECT_DIS_DEPTH_OFS (1<<30)
+#define DRAW_DITHER_OFS_X(x) ((x)<<26)
+#define DRAW_DITHER_OFS_Y(x) ((x)<<24)
+/* Dword 2 */
+#define DRAW_YMIN(x) ((uint16_t)(x)<<16)
+#define DRAW_XMIN(x) ((uint16_t)(x))
+/* Dword 3 */
+#define DRAW_YMAX(x) ((uint16_t)(x)<<16)
+#define DRAW_XMAX(x) ((uint16_t)(x))
+/* Dword 4 */
+#define DRAW_YORG(x) ((uint16_t)(x)<<16)
+#define DRAW_XORG(x) ((uint16_t)(x))
+
+/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */
+
+/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */
+
+/* _3DSTATE_FOG_COLOR, p173 */
+#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24))
+#define FOG_COLOR_RED(x) ((x)<<16)
+#define FOG_COLOR_GREEN(x) ((x)<<8)
+#define FOG_COLOR_BLUE(x) (x)
+
+/* _3DSTATE_FOG_MODE, p174 */
+#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2)
+/* Dword 1 */
+#define FMC1_FOGFUNC_MODIFY_ENABLE (1<<31)
+#define FMC1_FOGFUNC_VERTEX (0<<28)
+#define FMC1_FOGFUNC_PIXEL_EXP (1<<28)
+#define FMC1_FOGFUNC_PIXEL_EXP2 (2<<28)
+#define FMC1_FOGFUNC_PIXEL_LINEAR (3<<28)
+#define FMC1_FOGFUNC_MASK (3<<28)
+#define FMC1_FOGINDEX_MODIFY_ENABLE (1<<27)
+#define FMC1_FOGINDEX_Z (0<<25)
+#define FMC1_FOGINDEX_W (1<<25)
+#define FMC1_C1_C2_MODIFY_ENABLE (1<<24)
+#define FMC1_DENSITY_MODIFY_ENABLE (1<<23)
+#define FMC1_C1_ONE (1<<13)
+#define FMC1_C1_MASK (0xffff<<4)
+/* Dword 2 */
+#define FMC2_C2_ONE (1<<16)
+/* Dword 3 */
+#define FMC3_D_ONE (1<<16)
+
+/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */
+#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24))
+#define IAB_MODIFY_ENABLE (1<<23)
+#define IAB_ENABLE (1<<22)
+#define IAB_MODIFY_FUNC (1<<21)
+#define IAB_FUNC_SHIFT 16
+#define IAB_MODIFY_SRC_FACTOR (1<<11)
+#define IAB_SRC_FACTOR_SHIFT 6
+#define IAB_SRC_FACTOR_MASK (BLENDFACT_MASK<<6)
+#define IAB_MODIFY_DST_FACTOR (1<<5)
+#define IAB_DST_FACTOR_SHIFT 0
+#define IAB_DST_FACTOR_MASK (BLENDFACT_MASK<<0)
+
+#define BLENDFACT_ZERO 0x01
+#define BLENDFACT_ONE 0x02
+#define BLENDFACT_SRC_COLR 0x03
+#define BLENDFACT_INV_SRC_COLR 0x04
+#define BLENDFACT_SRC_ALPHA 0x05
+#define BLENDFACT_INV_SRC_ALPHA 0x06
+#define BLENDFACT_DST_ALPHA 0x07
+#define BLENDFACT_INV_DST_ALPHA 0x08
+#define BLENDFACT_DST_COLR 0x09
+#define BLENDFACT_INV_DST_COLR 0x0a
+#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b
+#define BLENDFACT_CONST_COLOR 0x0c
+#define BLENDFACT_INV_CONST_COLOR 0x0d
+#define BLENDFACT_CONST_ALPHA 0x0e
+#define BLENDFACT_INV_CONST_ALPHA 0x0f
+#define BLENDFACT_MASK 0x0f
+
+#define BLENDFUNC_ADD 0x0
+#define BLENDFUNC_SUBTRACT 0x1
+#define BLENDFUNC_REVERSE_SUBTRACT 0x2
+#define BLENDFUNC_MIN 0x3
+#define BLENDFUNC_MAX 0x4
+#define BLENDFUNC_MASK 0x7
+
+/* 3DSTATE_LOAD_INDIRECT, p180 */
+
+#define _3DSTATE_LOAD_INDIRECT (CMD_3D|(0x1d<<24)|(0x7<<16))
+#define LI0_STATE_STATIC_INDIRECT (0x01<<8)
+#define LI0_STATE_DYNAMIC_INDIRECT (0x02<<8)
+#define LI0_STATE_SAMPLER (0x04<<8)
+#define LI0_STATE_MAP (0x08<<8)
+#define LI0_STATE_PROGRAM (0x10<<8)
+#define LI0_STATE_CONSTANTS (0x20<<8)
+
+#define SIS0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define SIS0_FORCE_LOAD (1<<1)
+#define SIS0_BUFFER_VALID (1<<0)
+#define SIS1_BUFFER_LENGTH(x) ((x)&0xff)
+
+#define DIS0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define DIS0_BUFFER_RESET (1<<1)
+#define DIS0_BUFFER_VALID (1<<0)
+
+#define SSB0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define SSB0_FORCE_LOAD (1<<1)
+#define SSB0_BUFFER_VALID (1<<0)
+#define SSB1_BUFFER_LENGTH(x) ((x)&0xff)
+
+#define MSB0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define MSB0_FORCE_LOAD (1<<1)
+#define MSB0_BUFFER_VALID (1<<0)
+#define MSB1_BUFFER_LENGTH(x) ((x)&0xff)
+
+#define PSP0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define PSP0_FORCE_LOAD (1<<1)
+#define PSP0_BUFFER_VALID (1<<0)
+#define PSP1_BUFFER_LENGTH(x) ((x)&0xff)
+
+#define PSC0_BUFFER_ADDRESS(x) ((x)&~0x3)
+#define PSC0_FORCE_LOAD (1<<1)
+#define PSC0_BUFFER_VALID (1<<0)
+#define PSC1_BUFFER_LENGTH(x) ((x)&0xff)
+
+/* _3DSTATE_RASTERIZATION_RULES */
+#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24))
+#define ENABLE_POINT_RASTER_RULE (1<<15)
+#define OGL_POINT_RASTER_RULE (1<<13)
+#define ENABLE_TEXKILL_3D_4D (1<<10)
+#define TEXKILL_3D (0<<9)
+#define TEXKILL_4D (1<<9)
+#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8)
+#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5)
+#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6)
+#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3)
+
+/* _3DSTATE_SCISSOR_ENABLE, p256 */
+#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19))
+#define ENABLE_SCISSOR_RECT ((1<<1) | 1)
+#define DISABLE_SCISSOR_RECT (1<<1)
+
+/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */
+#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1)
+/* Dword 1 */
+#define SCISSOR_RECT_0_YMIN(x) ((x)<<16)
+#define SCISSOR_RECT_0_XMIN(x) (x)
+/* Dword 2 */
+#define SCISSOR_RECT_0_YMAX(x) ((x)<<16)
+#define SCISSOR_RECT_0_XMAX(x) (x)
+
+/* p189 */
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16))
+#define I1_LOAD_S(n) (1<<(4+n))
+
+#define S0_VB_OFFSET_MASK 0xffffffc
+#define S0_AUTO_CACHE_INV_DISABLE (1<<0)
+
+#define S1_VERTEX_WIDTH_SHIFT 24
+#define S1_VERTEX_WIDTH_MASK (0x3f<<24)
+#define S1_VERTEX_PITCH_SHIFT 16
+#define S1_VERTEX_PITCH_MASK (0x3f<<16)
+
+#define TEXCOORDFMT_2D 0x0
+#define TEXCOORDFMT_3D 0x1
+#define TEXCOORDFMT_4D 0x2
+#define TEXCOORDFMT_1D 0x3
+#define TEXCOORDFMT_2D_16 0x4
+#define TEXCOORDFMT_4D_16 0x5
+#define TEXCOORDFMT_NOT_PRESENT 0xf
+#define S2_TEXCOORD_FMT0_MASK 0xf
+#define S2_TEXCOORD_FMT1_SHIFT 4
+#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4))
+#define S2_TEXCOORD_NONE (~0)
+
+#define TEXCOORD_WRAP_SHORTEST_TCX 8
+#define TEXCOORD_WRAP_SHORTEST_TCY 4
+#define TEXCOORD_WRAP_SHORTEST_TCZ 2
+#define TEXCOORD_PERSPECTIVE_DISABLE 1
+
+#define S3_WRAP_SHORTEST_TCX(unit) (TEXCOORD_WRAP_SHORTEST_TCX << ((unit) * 4))
+#define S3_WRAP_SHORTEST_TCY(unit) (TEXCOORD_WRAP_SHORTEST_TCY << ((unit) * 4))
+#define S3_WRAP_SHORTEST_TCZ(unit) (TEXCOORD_WRAP_SHORTEST_TCZ << ((unit) * 4))
+#define S3_PERSPECTIVE_DISABLE(unit) (TEXCOORD_PERSPECTIVE_DISABLE << ((unit) * 4))
+
+/* S3 not interesting */
+
+#define S4_POINT_WIDTH_SHIFT 23
+#define S4_POINT_WIDTH_MASK (0x1ff<<23)
+#define S4_LINE_WIDTH_SHIFT 19
+#define S4_LINE_WIDTH_ONE (0x2<<19)
+#define S4_LINE_WIDTH_MASK (0xf<<19)
+#define S4_FLATSHADE_ALPHA (1<<18)
+#define S4_FLATSHADE_FOG (1<<17)
+#define S4_FLATSHADE_SPECULAR (1<<16)
+#define S4_FLATSHADE_COLOR (1<<15)
+#define S4_CULLMODE_BOTH (0<<13)
+#define S4_CULLMODE_NONE (1<<13)
+#define S4_CULLMODE_CW (2<<13)
+#define S4_CULLMODE_CCW (3<<13)
+#define S4_CULLMODE_MASK (3<<13)
+#define S4_VFMT_POINT_WIDTH (1<<12)
+#define S4_VFMT_SPEC_FOG (1<<11)
+#define S4_VFMT_COLOR (1<<10)
+#define S4_VFMT_DEPTH_OFFSET (1<<9)
+#define S4_VFMT_XYZ (1<<6)
+#define S4_VFMT_XYZW (2<<6)
+#define S4_VFMT_XY (3<<6)
+#define S4_VFMT_XYW (4<<6)
+#define S4_VFMT_XYZW_MASK (7<<6)
+#define S4_FORCE_DEFAULT_DIFFUSE (1<<5)
+#define S4_FORCE_DEFAULT_SPECULAR (1<<4)
+#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3)
+#define S4_VFMT_FOG_PARAM (1<<2)
+#define S4_SPRITE_POINT_ENABLE (1<<1)
+#define S4_LINE_ANTIALIAS_ENABLE (1<<0)
+
+#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \
+ S4_VFMT_SPEC_FOG | \
+ S4_VFMT_COLOR | \
+ S4_VFMT_DEPTH_OFFSET | \
+ S4_VFMT_XYZW_MASK | \
+ S4_VFMT_FOG_PARAM)
+
+#define S5_WRITEDISABLE_ALPHA (1<<31)
+#define S5_WRITEDISABLE_RED (1<<30)
+#define S5_WRITEDISABLE_GREEN (1<<29)
+#define S5_WRITEDISABLE_BLUE (1<<28)
+#define S5_WRITEDISABLE_MASK (0xf<<28)
+#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27)
+#define S5_LAST_PIXEL_ENABLE (1<<26)
+#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25)
+#define S5_FOG_ENABLE (1<<24)
+#define S5_STENCIL_REF_SHIFT 16
+#define S5_STENCIL_REF_MASK (0xff<<16)
+#define S5_STENCIL_TEST_FUNC_SHIFT 13
+#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13)
+#define S5_STENCIL_FAIL_SHIFT 10
+#define S5_STENCIL_FAIL_MASK (0x7<<10)
+#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7
+#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7)
+#define S5_STENCIL_PASS_Z_PASS_SHIFT 4
+#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4)
+#define S5_STENCIL_WRITE_ENABLE (1<<3)
+#define S5_STENCIL_TEST_ENABLE (1<<2)
+#define S5_COLOR_DITHER_ENABLE (1<<1)
+#define S5_LOGICOP_ENABLE (1<<0)
+
+#define S6_ALPHA_TEST_ENABLE (1<<31)
+#define S6_ALPHA_TEST_FUNC_SHIFT 28
+#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28)
+#define S6_ALPHA_REF_SHIFT 20
+#define S6_ALPHA_REF_MASK (0xff<<20)
+#define S6_DEPTH_TEST_ENABLE (1<<19)
+#define S6_DEPTH_TEST_FUNC_SHIFT 16
+#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16)
+#define S6_CBUF_BLEND_ENABLE (1<<15)
+#define S6_CBUF_BLEND_FUNC_SHIFT 12
+#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12)
+#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8
+#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8)
+#define S6_CBUF_DST_BLEND_FACT_SHIFT 4
+#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4)
+#define S6_DEPTH_WRITE_ENABLE (1<<3)
+#define S6_COLOR_WRITE_ENABLE (1<<2)
+#define S6_TRISTRIP_PV_SHIFT 0
+#define S6_TRISTRIP_PV_MASK (0x3<<0)
+
+#define S7_DEPTH_OFFSET_CONST_MASK ~0
+
+/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */
+/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */
+
+/* _3DSTATE_MODES_4, p218 */
+#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x0d<<24))
+#define ENABLE_LOGIC_OP_FUNC (1<<23)
+#define LOGIC_OP_FUNC(x) ((x)<<18)
+#define LOGICOP_MASK (0xf<<18)
+#define LOGICOP_COPY 0xc
+#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00))
+#define ENABLE_STENCIL_TEST_MASK (1<<17)
+#define STENCIL_TEST_MASK(x) ((x)<<8)
+#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff))
+#define ENABLE_STENCIL_WRITE_MASK (1<<16)
+#define STENCIL_WRITE_MASK(x) ((x)&0xff)
+
+/* _3DSTATE_MODES_5, p220 */
+#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24))
+#define PIPELINE_FLUSH_RENDER_CACHE (1<<18)
+#define PIPELINE_FLUSH_TEXTURE_CACHE (1<<16)
+
+/* p221 */
+#define _3DSTATE_PIXEL_SHADER_CONSTANTS (CMD_3D|(0x1d<<24)|(0x6<<16))
+#define PS1_REG(n) (1<<(n))
+#define PS2_CONST_X(n) (n)
+#define PS3_CONST_Y(n) (n)
+#define PS4_CONST_Z(n) (n)
+#define PS5_CONST_W(n) (n)
+
+/* p222 */
+
+#define I915_MAX_TEX_INDIRECT 4
+#define I915_MAX_TEX_INSN 32
+#define I915_MAX_ALU_INSN 64
+#define I915_MAX_DECL_INSN 27
+#define I915_MAX_TEMPORARY 16
+
+/* Each instruction is 3 dwords long, though most don't require all
+ * this space. Maximum of 123 instructions. Smaller maxes per insn
+ * type.
+ */
+#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16))
+
+#define REG_TYPE_R 0 /* temporary regs, no need to
+ * dcl, must be written before
+ * read -- Preserved between
+ * phases.
+ */
+#define REG_TYPE_T 1 /* Interpolated values, must be
+ * dcl'ed before use.
+ *
+ * 0..7: texture coord,
+ * 8: diffuse spec,
+ * 9: specular color,
+ * 10: fog parameter in w.
+ */
+#define REG_TYPE_CONST 2 /* Restriction: only one const
+ * can be referenced per
+ * instruction, though it may be
+ * selected for multiple inputs.
+ * Constants not initialized
+ * default to zero.
+ */
+#define REG_TYPE_S 3 /* sampler */
+#define REG_TYPE_OC 4 /* output color (rgba) */
+#define REG_TYPE_OD 5 /* output depth (w), xyz are
+ * temporaries. If not written,
+ * interpolated depth is used?
+ */
+#define REG_TYPE_U 6 /* unpreserved temporaries */
+#define REG_TYPE_MASK 0x7
+#define REG_NR_MASK 0xf
+
+/* REG_TYPE_T:
+ */
+#define T_TEX0 0
+#define T_TEX1 1
+#define T_TEX2 2
+#define T_TEX3 3
+#define T_TEX4 4
+#define T_TEX5 5
+#define T_TEX6 6
+#define T_TEX7 7
+#define T_DIFFUSE 8
+#define T_SPECULAR 9
+#define T_FOG_W 10 /* interpolated fog is in W coord */
+
+/* Arithmetic instructions */
+
+/* .replicate_swizzle == selection and replication of a particular
+ * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww
+ */
+#define A0_NOP (0x0<<24) /* no operation */
+#define A0_ADD (0x1<<24) /* dst = src0 + src1 */
+#define A0_MOV (0x2<<24) /* dst = src0 */
+#define A0_MUL (0x3<<24) /* dst = src0 * src1 */
+#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */
+#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
+#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */
+#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */
+#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */
+#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */
+#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
+#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */
+#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
+#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */
+#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */
+#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */
+#define A0_FLR (0x10<<24) /* dst = floor(src0) */
+#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */
+#define A0_TRC (0x12<<24) /* dst = int(src0) */
+#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */
+#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */
+#define A0_DEST_SATURATE (1<<22)
+#define A0_DEST_TYPE_SHIFT 19
+/* Allow: R, OC, OD, U */
+#define A0_DEST_NR_SHIFT 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define A0_DEST_CHANNEL_X (1<<10)
+#define A0_DEST_CHANNEL_Y (2<<10)
+#define A0_DEST_CHANNEL_Z (4<<10)
+#define A0_DEST_CHANNEL_W (8<<10)
+#define A0_DEST_CHANNEL_ALL (0xf<<10)
+#define A0_DEST_CHANNEL_SHIFT 10
+#define A0_SRC0_TYPE_SHIFT 7
+#define A0_SRC0_NR_SHIFT 2
+
+#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y)
+#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z)
+
+#define SRC_X 0
+#define SRC_Y 1
+#define SRC_Z 2
+#define SRC_W 3
+#define SRC_ZERO 4
+#define SRC_ONE 5
+
+#define A1_SRC0_CHANNEL_X_NEGATE (1<<31)
+#define A1_SRC0_CHANNEL_X_SHIFT 28
+#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27)
+#define A1_SRC0_CHANNEL_Y_SHIFT 24
+#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23)
+#define A1_SRC0_CHANNEL_Z_SHIFT 20
+#define A1_SRC0_CHANNEL_W_NEGATE (1<<19)
+#define A1_SRC0_CHANNEL_W_SHIFT 16
+#define A1_SRC1_TYPE_SHIFT 13
+#define A1_SRC1_NR_SHIFT 8
+#define A1_SRC1_CHANNEL_X_NEGATE (1<<7)
+#define A1_SRC1_CHANNEL_X_SHIFT 4
+#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3)
+#define A1_SRC1_CHANNEL_Y_SHIFT 0
+
+#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31)
+#define A2_SRC1_CHANNEL_Z_SHIFT 28
+#define A2_SRC1_CHANNEL_W_NEGATE (1<<27)
+#define A2_SRC1_CHANNEL_W_SHIFT 24
+#define A2_SRC2_TYPE_SHIFT 21
+#define A2_SRC2_NR_SHIFT 16
+#define A2_SRC2_CHANNEL_X_NEGATE (1<<15)
+#define A2_SRC2_CHANNEL_X_SHIFT 12
+#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11)
+#define A2_SRC2_CHANNEL_Y_SHIFT 8
+#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7)
+#define A2_SRC2_CHANNEL_Z_SHIFT 4
+#define A2_SRC2_CHANNEL_W_NEGATE (1<<3)
+#define A2_SRC2_CHANNEL_W_SHIFT 0
+
+/* Texture instructions */
+#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared
+ * sampler and address, and output
+ * filtered texel data to destination
+ * register */
+#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a
+ * perspective divide of the texture
+ * coordinate .xyz values by .w before
+ * sampling. */
+#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the
+ * computed LOD by w. Only S4.6 two's
+ * comp is used. This implies that a
+ * float to fixed conversion is
+ * done. */
+#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling
+ * operation. Simply kills the pixel
+ * if any channel of the address
+ * register is < 0.0. */
+#define T0_DEST_TYPE_SHIFT 19
+/* Allow: R, OC, OD, U */
+/* Note: U (unpreserved) regs do not retain their values between
+ * phases (cannot be used for feedback)
+ *
+ * Note: oC and OD registers can only be used as the destination of a
+ * texture instruction once per phase (this is an implementation
+ * restriction).
+ */
+#define T0_DEST_NR_SHIFT 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */
+#define T0_SAMPLER_NR_MASK (0xf<<0)
+
+#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */
+/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */
+#define T1_ADDRESS_REG_NR_SHIFT 17
+#define T2_MBZ 0
+
+/* Declaration instructions */
+#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib)
+ * register or an s (sampler)
+ * register. */
+#define D0_SAMPLE_TYPE_SHIFT 22
+#define D0_SAMPLE_TYPE_2D (0x0<<22)
+#define D0_SAMPLE_TYPE_CUBE (0x1<<22)
+#define D0_SAMPLE_TYPE_VOLUME (0x2<<22)
+#define D0_SAMPLE_TYPE_MASK (0x3<<22)
+
+#define D0_TYPE_SHIFT 19
+/* Allow: T, S */
+#define D0_NR_SHIFT 14
+/* Allow T: 0..10, S: 0..15 */
+#define D0_CHANNEL_X (1<<10)
+#define D0_CHANNEL_Y (2<<10)
+#define D0_CHANNEL_Z (4<<10)
+#define D0_CHANNEL_W (8<<10)
+#define D0_CHANNEL_ALL (0xf<<10)
+#define D0_CHANNEL_NONE (0<<10)
+
+#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y)
+#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z)
+
+/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse
+ * or specular declarations.
+ *
+ * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw)
+ *
+ * Must be zero for S (sampler) dcls
+ */
+#define D1_MBZ 0
+#define D2_MBZ 0
+
+/* p207.
+ * The DWORD count is 3 times the number of bits set in MS1_MAPMASK_MASK
+ */
+#define _3DSTATE_MAP_STATE (CMD_3D|(0x1d<<24)|(0x0<<16))
+
+#define MS1_MAPMASK_SHIFT 0
+#define MS1_MAPMASK_MASK (0x8fff<<0)
+
+#define MS2_UNTRUSTED_SURFACE (1<<31)
+#define MS2_ADDRESS_MASK 0xfffffffc
+#define MS2_VERTICAL_LINE_STRIDE (1<<1)
+#define MS2_VERTICAL_OFFSET (1<<1)
+
+#define MS3_HEIGHT_SHIFT 21
+#define MS3_WIDTH_SHIFT 10
+#define MS3_PALETTE_SELECT (1<<9)
+#define MS3_MAPSURF_FORMAT_SHIFT 7
+#define MS3_MAPSURF_FORMAT_MASK (0x7<<7)
+#define MAPSURF_8BIT (1<<7)
+#define MAPSURF_16BIT (2<<7)
+#define MAPSURF_32BIT (3<<7)
+#define MAPSURF_422 (5<<7)
+#define MAPSURF_COMPRESSED (6<<7)
+#define MAPSURF_4BIT_INDEXED (7<<7)
+#define MS3_MT_FORMAT_MASK (0x7 << 3)
+#define MS3_MT_FORMAT_SHIFT 3
+#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */
+#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */
+#define MT_8BIT_L8 (1<<3)
+#define MT_8BIT_A8 (4<<3)
+#define MT_8BIT_MONO8 (5<<3)
+#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */
+#define MT_16BIT_ARGB1555 (1<<3)
+#define MT_16BIT_ARGB4444 (2<<3)
+#define MT_16BIT_AY88 (3<<3)
+#define MT_16BIT_88DVDU (5<<3)
+#define MT_16BIT_BUMP_655LDVDU (6<<3)
+#define MT_16BIT_I16 (7<<3)
+#define MT_16BIT_L16 (8<<3)
+#define MT_16BIT_A16 (9<<3)
+#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */
+#define MT_32BIT_ABGR8888 (1<<3)
+#define MT_32BIT_XRGB8888 (2<<3)
+#define MT_32BIT_XBGR8888 (3<<3)
+#define MT_32BIT_QWVU8888 (4<<3)
+#define MT_32BIT_AXVU8888 (5<<3)
+#define MT_32BIT_LXVU8888 (6<<3)
+#define MT_32BIT_XLVU8888 (7<<3)
+#define MT_32BIT_ARGB2101010 (8<<3)
+#define MT_32BIT_ABGR2101010 (9<<3)
+#define MT_32BIT_AWVU2101010 (0xA<<3)
+#define MT_32BIT_GR1616 (0xB<<3)
+#define MT_32BIT_VU1616 (0xC<<3)
+#define MT_32BIT_xI824 (0xD<<3)
+#define MT_32BIT_xA824 (0xE<<3)
+#define MT_32BIT_xL824 (0xF<<3)
+#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */
+#define MT_422_YCRCB_NORMAL (1<<3)
+#define MT_422_YCRCB_SWAPUV (2<<3)
+#define MT_422_YCRCB_SWAPUVY (3<<3)
+#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */
+#define MT_COMPRESS_DXT2_3 (1<<3)
+#define MT_COMPRESS_DXT4_5 (2<<3)
+#define MT_COMPRESS_FXT1 (3<<3)
+#define MT_COMPRESS_DXT1_RGB (4<<3)
+#define MS3_USE_FENCE_REGS (1<<2)
+#define MS3_TILED_SURFACE (1<<1)
+#define MS3_TILE_WALK (1<<0)
+
+/* The pitch is the pitch measured in DWORDS, minus 1 */
+#define MS4_PITCH_SHIFT 21
+#define MS4_CUBE_FACE_ENA_NEGX (1<<20)
+#define MS4_CUBE_FACE_ENA_POSX (1<<19)
+#define MS4_CUBE_FACE_ENA_NEGY (1<<18)
+#define MS4_CUBE_FACE_ENA_POSY (1<<17)
+#define MS4_CUBE_FACE_ENA_NEGZ (1<<16)
+#define MS4_CUBE_FACE_ENA_POSZ (1<<15)
+#define MS4_CUBE_FACE_ENA_MASK (0x3f<<15)
+#define MS4_MAX_LOD_SHIFT 9
+#define MS4_MAX_LOD_MASK (0x3f<<9)
+#define MS4_MIP_LAYOUT_LEGACY (0<<8)
+#define MS4_MIP_LAYOUT_BELOW_LPT (0<<8)
+#define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8)
+#define MS4_VOLUME_DEPTH_SHIFT 0
+#define MS4_VOLUME_DEPTH_MASK (0xff<<0)
+
+/* p244.
+ * The DWORD count is 3 times the number of bits set in SS1_MAPMASK_MASK.
+ */
+#define _3DSTATE_SAMPLER_STATE (CMD_3D|(0x1d<<24)|(0x1<<16))
+
+#define SS1_MAPMASK_SHIFT 0
+#define SS1_MAPMASK_MASK (0x8fff<<0)
+
+#define SS2_REVERSE_GAMMA_ENABLE (1<<31)
+#define SS2_PACKED_TO_PLANAR_ENABLE (1<<30)
+#define SS2_COLORSPACE_CONVERSION (1<<29)
+#define SS2_CHROMAKEY_SHIFT 27
+#define SS2_BASE_MIP_LEVEL_SHIFT 22
+#define SS2_BASE_MIP_LEVEL_MASK (0x1f<<22)
+#define SS2_MIP_FILTER_SHIFT 20
+#define SS2_MIP_FILTER_MASK (0x3<<20)
+#define MIPFILTER_NONE 0
+#define MIPFILTER_NEAREST 1
+#define MIPFILTER_LINEAR 3
+#define SS2_MAG_FILTER_SHIFT 17
+#define SS2_MAG_FILTER_MASK (0x7<<17)
+#define FILTER_NEAREST 0
+#define FILTER_LINEAR 1
+#define FILTER_ANISOTROPIC 2
+#define FILTER_4X4_1 3
+#define FILTER_4X4_2 4
+#define FILTER_4X4_FLAT 5
+#define FILTER_6X5_MONO 6 /* XXX - check */
+#define SS2_MIN_FILTER_SHIFT 14
+#define SS2_MIN_FILTER_MASK (0x7<<14)
+#define SS2_LOD_BIAS_SHIFT 5
+#define SS2_LOD_BIAS_ONE (0x10<<5)
+#define SS2_LOD_BIAS_MASK (0x1ff<<5)
+/* Shadow requires:
+ * MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format
+ * FILTER_4X4_x MIN and MAG filters
+ */
+#define SS2_SHADOW_ENABLE (1<<4)
+#define SS2_MAX_ANISO_MASK (1<<3)
+#define SS2_MAX_ANISO_2 (0<<3)
+#define SS2_MAX_ANISO_4 (1<<3)
+#define SS2_SHADOW_FUNC_SHIFT 0
+#define SS2_SHADOW_FUNC_MASK (0x7<<0)
+/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */
+
+#define SS3_MIN_LOD_SHIFT 24
+#define SS3_MIN_LOD_ONE (0x10<<24)
+#define SS3_MIN_LOD_MASK (0xff<<24)
+#define SS3_KILL_PIXEL_ENABLE (1<<17)
+#define SS3_TCX_ADDR_MODE_SHIFT 12
+#define SS3_TCX_ADDR_MODE_MASK (0x7<<12)
+#define TEXCOORDMODE_WRAP 0
+#define TEXCOORDMODE_MIRROR 1
+#define TEXCOORDMODE_CLAMP_EDGE 2
+#define TEXCOORDMODE_CUBE 3
+#define TEXCOORDMODE_CLAMP_BORDER 4
+#define TEXCOORDMODE_MIRROR_ONCE 5
+#define SS3_TCY_ADDR_MODE_SHIFT 9
+#define SS3_TCY_ADDR_MODE_MASK (0x7<<9)
+#define SS3_TCZ_ADDR_MODE_SHIFT 6
+#define SS3_TCZ_ADDR_MODE_MASK (0x7<<6)
+#define SS3_NORMALIZED_COORDS (1<<5)
+#define SS3_TEXTUREMAP_INDEX_SHIFT 1
+#define SS3_TEXTUREMAP_INDEX_MASK (0xf<<1)
+#define SS3_DEINTERLACER_ENABLE (1<<0)
+
+#define SS4_BORDER_COLOR_MASK (~0)
+
+/* 3DSTATE_SPAN_STIPPLE, p258
+ */
+#define _3DSTATE_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16))
+#define ST1_ENABLE (1<<16)
+#define ST1_MASK (0xffff)
+
+#define FLUSH_MAP_CACHE (1<<0)
+#define FLUSH_RENDER_CACHE (1<<1)
+
+#endif
+/* -*- c-basic-offset: 4 -*- */
+/*
+ * Copyright © 2006,2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/* Each instruction is 3 dwords long, though most don't require all
+ * this space. Maximum of 123 instructions. Smaller maxes per insn
+ * type.
+ */
+#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16))
+
+#define REG_TYPE_R 0 /* temporary regs, no need to
+ * dcl, must be written before
+ * read -- Preserved between
+ * phases.
+ */
+#define REG_TYPE_T 1 /* Interpolated values, must be
+ * dcl'ed before use.
+ *
+ * 0..7: texture coord,
+ * 8: diffuse spec,
+ * 9: specular color,
+ * 10: fog parameter in w.
+ */
+#define REG_TYPE_CONST 2 /* Restriction: only one const
+ * can be referenced per
+ * instruction, though it may be
+ * selected for multiple inputs.
+ * Constants not initialized
+ * default to zero.
+ */
+#define REG_TYPE_S 3 /* sampler */
+#define REG_TYPE_OC 4 /* output color (rgba) */
+#define REG_TYPE_OD 5 /* output depth (w), xyz are
+ * temporaries. If not written,
+ * interpolated depth is used?
+ */
+#define REG_TYPE_U 6 /* unpreserved temporaries */
+#define REG_TYPE_MASK 0x7
+#define REG_TYPE_SHIFT 4
+#define REG_NR_MASK 0xf
+
+/* REG_TYPE_T:
+*/
+#define T_TEX0 0
+#define T_TEX1 1
+#define T_TEX2 2
+#define T_TEX3 3
+#define T_TEX4 4
+#define T_TEX5 5
+#define T_TEX6 6
+#define T_TEX7 7
+#define T_DIFFUSE 8
+#define T_SPECULAR 9
+#define T_FOG_W 10 /* interpolated fog is in W coord */
+
+/* Arithmetic instructions */
+
+/* .replicate_swizzle == selection and replication of a particular
+ * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww
+ */
+#define A0_NOP (0x0<<24) /* no operation */
+#define A0_ADD (0x1<<24) /* dst = src0 + src1 */
+#define A0_MOV (0x2<<24) /* dst = src0 */
+#define A0_MUL (0x3<<24) /* dst = src0 * src1 */
+#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */
+#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
+#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */
+#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */
+#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */
+#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */
+#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
+#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */
+#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
+#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */
+#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */
+#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */
+#define A0_FLR (0x10<<24) /* dst = floor(src0) */
+#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */
+#define A0_TRC (0x12<<24) /* dst = int(src0) */
+#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */
+#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */
+#define A0_DEST_SATURATE (1<<22)
+#define A0_DEST_TYPE_SHIFT 19
+/* Allow: R, OC, OD, U */
+#define A0_DEST_NR_SHIFT 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define A0_DEST_CHANNEL_X (1<<10)
+#define A0_DEST_CHANNEL_Y (2<<10)
+#define A0_DEST_CHANNEL_Z (4<<10)
+#define A0_DEST_CHANNEL_W (8<<10)
+#define A0_DEST_CHANNEL_ALL (0xf<<10)
+#define A0_DEST_CHANNEL_SHIFT 10
+#define A0_SRC0_TYPE_SHIFT 7
+#define A0_SRC0_NR_SHIFT 2
+
+#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y)
+#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z)
+
+#define SRC_X 0
+#define SRC_Y 1
+#define SRC_Z 2
+#define SRC_W 3
+#define SRC_ZERO 4
+#define SRC_ONE 5
+
+#define A1_SRC0_CHANNEL_X_NEGATE (1<<31)
+#define A1_SRC0_CHANNEL_X_SHIFT 28
+#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27)
+#define A1_SRC0_CHANNEL_Y_SHIFT 24
+#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23)
+#define A1_SRC0_CHANNEL_Z_SHIFT 20
+#define A1_SRC0_CHANNEL_W_NEGATE (1<<19)
+#define A1_SRC0_CHANNEL_W_SHIFT 16
+#define A1_SRC1_TYPE_SHIFT 13
+#define A1_SRC1_NR_SHIFT 8
+#define A1_SRC1_CHANNEL_X_NEGATE (1<<7)
+#define A1_SRC1_CHANNEL_X_SHIFT 4
+#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3)
+#define A1_SRC1_CHANNEL_Y_SHIFT 0
+
+#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31)
+#define A2_SRC1_CHANNEL_Z_SHIFT 28
+#define A2_SRC1_CHANNEL_W_NEGATE (1<<27)
+#define A2_SRC1_CHANNEL_W_SHIFT 24
+#define A2_SRC2_TYPE_SHIFT 21
+#define A2_SRC2_NR_SHIFT 16
+#define A2_SRC2_CHANNEL_X_NEGATE (1<<15)
+#define A2_SRC2_CHANNEL_X_SHIFT 12
+#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11)
+#define A2_SRC2_CHANNEL_Y_SHIFT 8
+#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7)
+#define A2_SRC2_CHANNEL_Z_SHIFT 4
+#define A2_SRC2_CHANNEL_W_NEGATE (1<<3)
+#define A2_SRC2_CHANNEL_W_SHIFT 0
+
+/* Texture instructions */
+#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared
+ * sampler and address, and output
+ * filtered texel data to destination
+ * register */
+#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a
+ * perspective divide of the texture
+ * coordinate .xyz values by .w before
+ * sampling. */
+#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the
+ * computed LOD by w. Only S4.6 two's
+ * comp is used. This implies that a
+ * float to fixed conversion is
+ * done. */
+#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling
+ * operation. Simply kills the pixel
+ * if any channel of the address
+ * register is < 0.0. */
+#define T0_DEST_TYPE_SHIFT 19
+/* Allow: R, OC, OD, U */
+/* Note: U (unpreserved) regs do not retain their values between
+ * phases (cannot be used for feedback)
+ *
+ * Note: oC and OD registers can only be used as the destination of a
+ * texture instruction once per phase (this is an implementation
+ * restriction).
+ */
+#define T0_DEST_NR_SHIFT 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */
+#define T0_SAMPLER_NR_MASK (0xf<<0)
+
+#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */
+/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */
+#define T1_ADDRESS_REG_NR_SHIFT 17
+#define T2_MBZ 0
+
+/* Declaration instructions */
+#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib)
+ * register or an s (sampler)
+ * register. */
+#define D0_SAMPLE_TYPE_SHIFT 22
+#define D0_SAMPLE_TYPE_2D (0x0<<22)
+#define D0_SAMPLE_TYPE_CUBE (0x1<<22)
+#define D0_SAMPLE_TYPE_VOLUME (0x2<<22)
+#define D0_SAMPLE_TYPE_MASK (0x3<<22)
+
+#define D0_TYPE_SHIFT 19
+/* Allow: T, S */
+#define D0_NR_SHIFT 14
+/* Allow T: 0..10, S: 0..15 */
+#define D0_CHANNEL_X (1<<10)
+#define D0_CHANNEL_Y (2<<10)
+#define D0_CHANNEL_Z (4<<10)
+#define D0_CHANNEL_W (8<<10)
+#define D0_CHANNEL_ALL (0xf<<10)
+#define D0_CHANNEL_NONE (0<<10)
+
+#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y)
+#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z)
+
+/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse
+ * or specular declarations.
+ *
+ * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw)
+ *
+ * Must be zero for S (sampler) dcls
+ */
+#define D1_MBZ 0
+#define D2_MBZ 0
+
+
+/* MASK_* are the unshifted bitmasks of the destination mask in arithmetic
+ * operations
+ */
+#define MASK_X 0x1
+#define MASK_Y 0x2
+#define MASK_Z 0x4
+#define MASK_W 0x8
+#define MASK_XYZ (MASK_X | MASK_Y | MASK_Z)
+#define MASK_XYZW (MASK_XYZ | MASK_W)
+#define MASK_SATURATE 0x10
+
+/* Temporary, undeclared regs. Preserved between phases */
+#define FS_R0 ((REG_TYPE_R << REG_TYPE_SHIFT) | 0)
+#define FS_R1 ((REG_TYPE_R << REG_TYPE_SHIFT) | 1)
+#define FS_R2 ((REG_TYPE_R << REG_TYPE_SHIFT) | 2)
+#define FS_R3 ((REG_TYPE_R << REG_TYPE_SHIFT) | 3)
+
+/* Texture coordinate regs. Must be declared. */
+#define FS_T0 ((REG_TYPE_T << REG_TYPE_SHIFT) | 0)
+#define FS_T1 ((REG_TYPE_T << REG_TYPE_SHIFT) | 1)
+#define FS_T2 ((REG_TYPE_T << REG_TYPE_SHIFT) | 2)
+#define FS_T3 ((REG_TYPE_T << REG_TYPE_SHIFT) | 3)
+#define FS_T4 ((REG_TYPE_T << REG_TYPE_SHIFT) | 4)
+#define FS_T5 ((REG_TYPE_T << REG_TYPE_SHIFT) | 5)
+#define FS_T6 ((REG_TYPE_T << REG_TYPE_SHIFT) | 6)
+#define FS_T7 ((REG_TYPE_T << REG_TYPE_SHIFT) | 7)
+#define FS_T8 ((REG_TYPE_T << REG_TYPE_SHIFT) | 8)
+#define FS_T9 ((REG_TYPE_T << REG_TYPE_SHIFT) | 9)
+#define FS_T10 ((REG_TYPE_T << REG_TYPE_SHIFT) | 10)
+
+/* Constant values */
+#define FS_C0 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 0)
+#define FS_C1 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 1)
+#define FS_C2 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 2)
+#define FS_C3 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 3)
+#define FS_C4 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 4)
+#define FS_C5 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 5)
+#define FS_C6 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 6)
+#define FS_C7 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 7)
+
+/* Sampler regs */
+#define FS_S0 ((REG_TYPE_S << REG_TYPE_SHIFT) | 0)
+#define FS_S1 ((REG_TYPE_S << REG_TYPE_SHIFT) | 1)
+#define FS_S2 ((REG_TYPE_S << REG_TYPE_SHIFT) | 2)
+#define FS_S3 ((REG_TYPE_S << REG_TYPE_SHIFT) | 3)
+
+/* Output color */
+#define FS_OC ((REG_TYPE_OC << REG_TYPE_SHIFT) | 0)
+
+/* Output depth */
+#define FS_OD ((REG_TYPE_OD << REG_TYPE_SHIFT) | 0)
+
+/* Unpreserved temporary regs */
+#define FS_U0 ((REG_TYPE_U << REG_TYPE_SHIFT) | 0)
+#define FS_U1 ((REG_TYPE_U << REG_TYPE_SHIFT) | 1)
+#define FS_U2 ((REG_TYPE_U << REG_TYPE_SHIFT) | 2)
+#define FS_U3 ((REG_TYPE_U << REG_TYPE_SHIFT) | 3)
+
+#define X_CHANNEL_SHIFT (REG_TYPE_SHIFT + 3)
+#define Y_CHANNEL_SHIFT (X_CHANNEL_SHIFT + 4)
+#define Z_CHANNEL_SHIFT (Y_CHANNEL_SHIFT + 4)
+#define W_CHANNEL_SHIFT (Z_CHANNEL_SHIFT + 4)
+
+#define REG_CHANNEL_MASK 0xf
+
+#define REG_NR(reg) ((reg) & REG_NR_MASK)
+#define REG_TYPE(reg) (((reg) >> REG_TYPE_SHIFT) & REG_TYPE_MASK)
+#define REG_X(reg) (((reg) >> X_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
+#define REG_Y(reg) (((reg) >> Y_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
+#define REG_Z(reg) (((reg) >> Z_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
+#define REG_W(reg) (((reg) >> W_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
+
+enum gen3_fs_channel {
+ X_CHANNEL_VAL = 0,
+ Y_CHANNEL_VAL,
+ Z_CHANNEL_VAL,
+ W_CHANNEL_VAL,
+ ZERO_CHANNEL_VAL,
+ ONE_CHANNEL_VAL,
+
+ NEG_X_CHANNEL_VAL = X_CHANNEL_VAL | 0x8,
+ NEG_Y_CHANNEL_VAL = Y_CHANNEL_VAL | 0x8,
+ NEG_Z_CHANNEL_VAL = Z_CHANNEL_VAL | 0x8,
+ NEG_W_CHANNEL_VAL = W_CHANNEL_VAL | 0x8,
+ NEG_ONE_CHANNEL_VAL = ONE_CHANNEL_VAL | 0x8
+};
+
+#define gen3_fs_operand(reg, x, y, z, w) \
+ (reg) | \
+(x##_CHANNEL_VAL << X_CHANNEL_SHIFT) | \
+(y##_CHANNEL_VAL << Y_CHANNEL_SHIFT) | \
+(z##_CHANNEL_VAL << Z_CHANNEL_SHIFT) | \
+(w##_CHANNEL_VAL << W_CHANNEL_SHIFT)
+
+/**
+ * Construct an operand description for using a register with no swizzling
+ */
+#define gen3_fs_operand_reg(reg) \
+ gen3_fs_operand(reg, X, Y, Z, W)
+
+#define gen3_fs_operand_reg_negate(reg) \
+ gen3_fs_operand(reg, NEG_X, NEG_Y, NEG_Z, NEG_W)
+
+/**
+ * Returns an operand containing (0.0, 0.0, 0.0, 0.0).
+ */
+#define gen3_fs_operand_zero() gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ZERO)
+
+/**
+ * Returns an unused operand
+ */
+#define gen3_fs_operand_none() gen3_fs_operand_zero()
+
+/**
+ * Returns an operand containing (1.0, 1.0, 1.0, 1.0).
+ */
+#define gen3_fs_operand_one() gen3_fs_operand(FS_R0, ONE, ONE, ONE, ONE)
+
+#define gen3_get_hardware_channel_val(val, shift, negate) \
+ (((val & 0x7) << shift) | ((val & 0x8) ? negate : 0))
+
+/**
+ * Outputs a fragment shader command to declare a sampler or texture register.
+ */
+#define gen3_fs_dcl(reg) \
+ do { \
+ OUT_BATCH(D0_DCL | \
+ (REG_TYPE(reg) << D0_TYPE_SHIFT) | \
+ (REG_NR(reg) << D0_NR_SHIFT) | \
+ ((REG_TYPE(reg) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); \
+ OUT_BATCH(0); \
+ OUT_BATCH(0); \
+ } while (0)
+
+#define gen3_fs_texld(dest_reg, sampler_reg, address_reg) \
+ do { \
+ OUT_BATCH(T0_TEXLD | \
+ (REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \
+ (REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \
+ (REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \
+ OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \
+ (REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \
+ OUT_BATCH(0); \
+ } while (0)
+
+#define gen3_fs_texldp(dest_reg, sampler_reg, address_reg) \
+ do { \
+ OUT_BATCH(T0_TEXLDP | \
+ (REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \
+ (REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \
+ (REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \
+ OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \
+ (REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \
+ OUT_BATCH(0); \
+ } while (0)
+
+#define gen3_fs_arith_masked(op, dest_reg, dest_mask, operand0, operand1, operand2) \
+ _gen3_fs_arith_masked(A0_##op, dest_reg, dest_mask, operand0, operand1, operand2)
+
+#define gen3_fs_arith(op, dest_reg, operand0, operand1, operand2) \
+ _gen3_fs_arith(A0_##op, dest_reg, operand0, operand1, operand2)
+
+#define _gen3_fs_arith_masked(cmd, dest_reg, dest_mask, operand0, operand1, operand2) \
+ do { \
+ /* Set up destination register and write mask */ \
+ OUT_BATCH(cmd | \
+ (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \
+ (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \
+ (((dest_mask) & ~MASK_SATURATE) << A0_DEST_CHANNEL_SHIFT) | \
+ (((dest_mask) & MASK_SATURATE) ? A0_DEST_SATURATE : 0) | \
+ /* Set up operand 0 */ \
+ (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \
+ (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \
+ OUT_BATCH(gen3_get_hardware_channel_val(REG_X(operand0), \
+ A1_SRC0_CHANNEL_X_SHIFT, \
+ A1_SRC0_CHANNEL_X_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Y(operand0), \
+ A1_SRC0_CHANNEL_Y_SHIFT, \
+ A1_SRC0_CHANNEL_Y_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Z(operand0), \
+ A1_SRC0_CHANNEL_Z_SHIFT, \
+ A1_SRC0_CHANNEL_Z_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_W(operand0), \
+ A1_SRC0_CHANNEL_W_SHIFT, \
+ A1_SRC0_CHANNEL_W_NEGATE) | \
+ /* Set up operand 1 */ \
+ (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \
+ (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \
+ gen3_get_hardware_channel_val(REG_X(operand1), \
+ A1_SRC1_CHANNEL_X_SHIFT, \
+ A1_SRC1_CHANNEL_X_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Y(operand1), \
+ A1_SRC1_CHANNEL_Y_SHIFT, \
+ A1_SRC1_CHANNEL_Y_NEGATE)); \
+ OUT_BATCH(gen3_get_hardware_channel_val(REG_Z(operand1), \
+ A2_SRC1_CHANNEL_Z_SHIFT, \
+ A2_SRC1_CHANNEL_Z_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_W(operand1), \
+ A2_SRC1_CHANNEL_W_SHIFT, \
+ A2_SRC1_CHANNEL_W_NEGATE) | \
+ /* Set up operand 2 */ \
+ (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \
+ (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \
+ gen3_get_hardware_channel_val(REG_X(operand2), \
+ A2_SRC2_CHANNEL_X_SHIFT, \
+ A2_SRC2_CHANNEL_X_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Y(operand2), \
+ A2_SRC2_CHANNEL_Y_SHIFT, \
+ A2_SRC2_CHANNEL_Y_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Z(operand2), \
+ A2_SRC2_CHANNEL_Z_SHIFT, \
+ A2_SRC2_CHANNEL_Z_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_W(operand2), \
+ A2_SRC2_CHANNEL_W_SHIFT, \
+ A2_SRC2_CHANNEL_W_NEGATE)); \
+ } while (0)
+
+#define _gen3_fs_arith(cmd, dest_reg, operand0, operand1, operand2) do {\
+ /* Set up destination register and write mask */ \
+ OUT_BATCH(cmd | \
+ (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \
+ (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \
+ (A0_DEST_CHANNEL_ALL) | \
+ /* Set up operand 0 */ \
+ (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \
+ (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \
+ OUT_BATCH(gen3_get_hardware_channel_val(REG_X(operand0), \
+ A1_SRC0_CHANNEL_X_SHIFT, \
+ A1_SRC0_CHANNEL_X_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Y(operand0), \
+ A1_SRC0_CHANNEL_Y_SHIFT, \
+ A1_SRC0_CHANNEL_Y_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Z(operand0), \
+ A1_SRC0_CHANNEL_Z_SHIFT, \
+ A1_SRC0_CHANNEL_Z_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_W(operand0), \
+ A1_SRC0_CHANNEL_W_SHIFT, \
+ A1_SRC0_CHANNEL_W_NEGATE) | \
+ /* Set up operand 1 */ \
+ (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \
+ (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \
+ gen3_get_hardware_channel_val(REG_X(operand1), \
+ A1_SRC1_CHANNEL_X_SHIFT, \
+ A1_SRC1_CHANNEL_X_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Y(operand1), \
+ A1_SRC1_CHANNEL_Y_SHIFT, \
+ A1_SRC1_CHANNEL_Y_NEGATE)); \
+ OUT_BATCH(gen3_get_hardware_channel_val(REG_Z(operand1), \
+ A2_SRC1_CHANNEL_Z_SHIFT, \
+ A2_SRC1_CHANNEL_Z_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_W(operand1), \
+ A2_SRC1_CHANNEL_W_SHIFT, \
+ A2_SRC1_CHANNEL_W_NEGATE) | \
+ /* Set up operand 2 */ \
+ (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \
+ (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \
+ gen3_get_hardware_channel_val(REG_X(operand2), \
+ A2_SRC2_CHANNEL_X_SHIFT, \
+ A2_SRC2_CHANNEL_X_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Y(operand2), \
+ A2_SRC2_CHANNEL_Y_SHIFT, \
+ A2_SRC2_CHANNEL_Y_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_Z(operand2), \
+ A2_SRC2_CHANNEL_Z_SHIFT, \
+ A2_SRC2_CHANNEL_Z_NEGATE) | \
+ gen3_get_hardware_channel_val(REG_W(operand2), \
+ A2_SRC2_CHANNEL_W_SHIFT, \
+ A2_SRC2_CHANNEL_W_NEGATE)); \
+} while (0)
+
+#define gen3_fs_mov(dest_reg, operand0) \
+ gen3_fs_arith(MOV, dest_reg, \
+ operand0, \
+ gen3_fs_operand_none(), \
+ gen3_fs_operand_none())
+
+#define gen3_fs_mov_masked(dest_reg, dest_mask, operand0) \
+ gen3_fs_arith_masked (MOV, dest_reg, dest_mask, \
+ operand0, \
+ gen3_fs_operand_none(), \
+ gen3_fs_operand_none())
+
+
+#define gen3_fs_frc(dest_reg, operand0) \
+ gen3_fs_arith (FRC, dest_reg, \
+ operand0, \
+ gen3_fs_operand_none(), \
+ gen3_fs_operand_none())
+
+/** Add operand0 and operand1 and put the result in dest_reg */
+#define gen3_fs_add(dest_reg, operand0, operand1) \
+ gen3_fs_arith (ADD, dest_reg, \
+ operand0, operand1, \
+ gen3_fs_operand_none())
+
+/** Multiply operand0 and operand1 and put the result in dest_reg */
+#define gen3_fs_mul(dest_reg, operand0, operand1) \
+ gen3_fs_arith (MUL, dest_reg, \
+ operand0, operand1, \
+ gen3_fs_operand_none())
+
+/** Computes 1/(operand0.replicate_swizzle) puts the result in dest_reg */
+#define gen3_fs_rcp(dest_reg, dest_mask, operand0) \
+ do { \
+ if (dest_mask) { \
+ gen3_fs_arith_masked (RCP, dest_reg, dest_mask, \
+ operand0, \
+ gen3_fs_operand_none (), \
+ gen3_fs_operand_none ()); \
+ } else { \
+ gen3_fs_arith (RCP, dest_reg, \
+ operand0, \
+ gen3_fs_operand_none (), \
+ gen3_fs_operand_none ()); \
+ } \
+ } while (0)
+
+/** Computes 1/sqrt(operand0.replicate_swizzle) puts the result in dest_reg */
+#define gen3_fs_rsq(dest_reg, dest_mask, operand0) \
+ do { \
+ if (dest_mask) { \
+ gen3_fs_arith_masked (RSQ, dest_reg, dest_mask, \
+ operand0, \
+ gen3_fs_operand_none (), \
+ gen3_fs_operand_none ()); \
+ } else { \
+ gen3_fs_arith (RSQ, dest_reg, \
+ operand0, \
+ gen3_fs_operand_none (), \
+ gen3_fs_operand_none ()); \
+ } \
+ } while (0)
+
+/** Puts the minimum of operand0 and operand1 in dest_reg */
+#define gen3_fs_min(dest_reg, operand0, operand1) \
+ gen3_fs_arith (MIN, dest_reg, \
+ operand0, operand1, \
+ gen3_fs_operand_none())
+
+/** Puts the maximum of operand0 and operand1 in dest_reg */
+#define gen3_fs_max(dest_reg, operand0, operand1) \
+ gen3_fs_arith (MAX, dest_reg, \
+ operand0, operand1, \
+ gen3_fs_operand_none())
+
+#define gen3_fs_cmp(dest_reg, operand0, operand1, operand2) \
+ gen3_fs_arith (CMP, dest_reg, operand0, operand1, operand2)
+
+/** Perform operand0 * operand1 + operand2 and put the result in dest_reg */
+#define gen3_fs_mad(dest_reg, dest_mask, op0, op1, op2) \
+ do { \
+ if (dest_mask) { \
+ gen3_fs_arith_masked (MAD, dest_reg, dest_mask, op0, op1, op2); \
+ } else { \
+ gen3_fs_arith (MAD, dest_reg, op0, op1, op2); \
+ } \
+ } while (0)
+
+#define gen3_fs_dp2add(dest_reg, dest_mask, op0, op1, op2) \
+ do { \
+ if (dest_mask) { \
+ gen3_fs_arith_masked (DP2ADD, dest_reg, dest_mask, op0, op1, op2); \
+ } else { \
+ gen3_fs_arith (DP2ADD, dest_reg, op0, op1, op2); \
+ } \
+ } while (0)
+
+/**
+ * Perform a 3-component dot-product of operand0 and operand1 and put the
+ * resulting scalar in the channels of dest_reg specified by the dest_mask.
+ */
+#define gen3_fs_dp3(dest_reg, dest_mask, op0, op1) \
+ do { \
+ if (dest_mask) { \
+ gen3_fs_arith_masked (DP3, dest_reg, dest_mask, \
+ op0, op1,\
+ gen3_fs_operand_none()); \
+ } else { \
+ gen3_fs_arith (DP3, dest_reg, op0, op1,\
+ gen3_fs_operand_none()); \
+ } \
+ } while (0)
+
+/**
+ * Perform a 4-component dot-product of operand0 and operand1 and put the
+ * resulting scalar in the channels of dest_reg specified by the dest_mask.
+ */
+#define gen3_fs_dp4(dest_reg, dest_mask, op0, op1) \
+ do { \
+ if (dest_mask) { \
+ gen3_fs_arith_masked (DP4, dest_reg, dest_mask, \
+ op0, op1,\
+ gen3_fs_operand_none()); \
+ } else { \
+ gen3_fs_arith (DP4, dest_reg, op0, op1,\
+ gen3_fs_operand_none()); \
+ } \
+ } while (0)
+
+#define SHADER_TRAPEZOIDS (1 << 24)
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
new file mode 100644
index 00000000..82fef2d6
--- /dev/null
+++ b/src/sna/gen4_render.c
@@ -0,0 +1,2762 @@
+/*
+ * Copyright © 2006,2008,2011 Intel Corporation
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@sna.com>
+ * Eric Anholt <eric@anholt.net>
+ * Carl Worth <cworth@redhat.com>
+ * Keith Packard <keithp@keithp.com>
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xf86.h>
+
+#include "sna.h"
+#include "sna_reg.h"
+#include "sna_render.h"
+#include "sna_render_inline.h"
+#include "sna_video.h"
+
+#include "gen4_render.h"
+
+#if DEBUG_RENDER
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+/* gen4 has a serious issue with its shaders that we need to flush
+ * after every rectangle... So until that is resolved, prefer
+ * the BLT engine.
+ */
+#define PREFER_BLT 1
+
+#define FLUSH() do { \
+ gen4_vertex_flush(sna); \
+ OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); \
+} while (0)
+
+#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
+
+/* Set up a default static partitioning of the URB, which is supposed to
+ * allow anything we would want to do, at potentially lower performance.
+ */
+#define URB_CS_ENTRY_SIZE 1
+#define URB_CS_ENTRIES 0
+
+#define URB_VS_ENTRY_SIZE 1 // each 512-bit row
+#define URB_VS_ENTRIES 8 // we needs at least 8 entries
+
+#define URB_GS_ENTRY_SIZE 0
+#define URB_GS_ENTRIES 0
+
+#define URB_CLIP_ENTRY_SIZE 0
+#define URB_CLIP_ENTRIES 0
+
+#define URB_SF_ENTRY_SIZE 2
+#define URB_SF_ENTRIES 1
+
+/*
+ * this program computes dA/dx and dA/dy for the texture coordinates along
+ * with the base texture coordinate. It was extracted from the Mesa driver
+ */
+
+#define SF_KERNEL_NUM_GRF 16
+#define SF_MAX_THREADS 2
+
+#define PS_KERNEL_NUM_GRF 32
+#define PS_MAX_THREADS 48
+
+static const uint32_t sf_kernel[][4] = {
+#include "exa_sf.g4b"
+};
+
+static const uint32_t sf_kernel_mask[][4] = {
+#include "exa_sf_mask.g4b"
+};
+
+static const uint32_t ps_kernel_nomask_affine[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_nomask_projective[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_projective.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_maskca_affine[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_mask_affine.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
+#include "exa_wm_ca.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_maskca_projective[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_projective.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_mask_projective.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
+#include "exa_wm_ca.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_a.g4b"
+#include "exa_wm_mask_affine.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
+#include "exa_wm_ca_srcalpha.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_projective.g4b"
+#include "exa_wm_src_sample_a.g4b"
+#include "exa_wm_mask_projective.g4b"
+#include "exa_wm_mask_sample_argb.g4b"
+#include "exa_wm_ca_srcalpha.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_masknoca_affine[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_mask_affine.g4b"
+#include "exa_wm_mask_sample_a.g4b"
+#include "exa_wm_noca.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_masknoca_projective[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_projective.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_mask_projective.g4b"
+#include "exa_wm_mask_sample_a.g4b"
+#include "exa_wm_noca.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_packed_static[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_argb.g4b"
+#include "exa_wm_yuv_rgb.g4b"
+#include "exa_wm_write.g4b"
+};
+
+static const uint32_t ps_kernel_planar_static[][4] = {
+#include "exa_wm_xy.g4b"
+#include "exa_wm_src_affine.g4b"
+#include "exa_wm_src_sample_planar.g4b"
+#include "exa_wm_yuv_rgb.g4b"
+#include "exa_wm_write.g4b"
+};
+
+#define KERNEL(kernel_enum, kernel, masked) \
+ [kernel_enum] = {&kernel, sizeof(kernel), masked}
+static const struct wm_kernel_info {
+ const void *data;
+ unsigned int size;
+ Bool has_mask;
+} wm_kernels[] = {
+ KERNEL(WM_KERNEL, ps_kernel_nomask_affine, FALSE),
+ KERNEL(WM_KERNEL_PROJECTIVE, ps_kernel_nomask_projective, FALSE),
+
+ KERNEL(WM_KERNEL_MASK, ps_kernel_masknoca_affine, TRUE),
+ KERNEL(WM_KERNEL_MASK_PROJECTIVE, ps_kernel_masknoca_projective, TRUE),
+
+ KERNEL(WM_KERNEL_MASKCA, ps_kernel_maskca_affine, TRUE),
+ KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, ps_kernel_maskca_projective, TRUE),
+
+ KERNEL(WM_KERNEL_MASKCA_SRCALPHA,
+ ps_kernel_maskca_srcalpha_affine, TRUE),
+ KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
+ ps_kernel_maskca_srcalpha_projective, TRUE),
+
+ KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, FALSE),
+ KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, FALSE),
+};
+#undef KERNEL
+
+static const struct blendinfo {
+ Bool src_alpha;
+ uint32_t src_blend;
+ uint32_t dst_blend;
+} gen4_blend_op[] = {
+ /* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO},
+ /* Src */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO},
+ /* Dst */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE},
+ /* Over */ {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
+ /* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE},
+ /* In */ {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
+ /* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA},
+ /* Out */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
+ /* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
+ /* Atop */ {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
+ /* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA},
+ /* Xor */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
+ /* Add */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE},
+};
+
+/**
+ * Highest-valued BLENDFACTOR used in gen4_blend_op.
+ *
+ * This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR,
+ * GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
+ * GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
+ */
+#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1)
+
+static const struct formatinfo {
+ CARD32 pict_fmt;
+ uint32_t card_fmt;
+} gen4_tex_formats[] = {
+ {PICT_a8, GEN4_SURFACEFORMAT_A8_UNORM},
+ {PICT_a8r8g8b8, GEN4_SURFACEFORMAT_B8G8R8A8_UNORM},
+ {PICT_x8r8g8b8, GEN4_SURFACEFORMAT_B8G8R8X8_UNORM},
+ {PICT_a8b8g8r8, GEN4_SURFACEFORMAT_R8G8B8A8_UNORM},
+ {PICT_x8b8g8r8, GEN4_SURFACEFORMAT_R8G8B8X8_UNORM},
+ {PICT_r8g8b8, GEN4_SURFACEFORMAT_R8G8B8_UNORM},
+ {PICT_r5g6b5, GEN4_SURFACEFORMAT_B5G6R5_UNORM},
+ {PICT_a1r5g5b5, GEN4_SURFACEFORMAT_B5G5R5A1_UNORM},
+ {PICT_a2r10g10b10, GEN4_SURFACEFORMAT_B10G10R10A2_UNORM},
+ {PICT_x2r10g10b10, GEN4_SURFACEFORMAT_B10G10R10X2_UNORM},
+ {PICT_a2b10g10r10, GEN4_SURFACEFORMAT_R10G10B10A2_UNORM},
+ {PICT_x2r10g10b10, GEN4_SURFACEFORMAT_B10G10R10X2_UNORM},
+ {PICT_a4r4g4b4, GEN4_SURFACEFORMAT_B4G4R4A4_UNORM},
+};
+
+#define BLEND_OFFSET(s, d) \
+ (((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64)
+
+#define SAMPLER_OFFSET(sf, se, mf, me, k) \
+ ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
+
+static bool
+gen4_emit_pipelined_pointers(struct sna *sna,
+ const struct sna_composite_op *op,
+ int blend, int kernel);
+
+#define OUT_BATCH(v) batch_emit(sna, v)
+#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
+#define OUT_VERTEX_F(v) vertex_emit(sna, v)
+
+static int
+gen4_choose_composite_kernel(int op, Bool has_mask, Bool is_ca, Bool is_affine)
+{
+ int base;
+
+ if (has_mask) {
+ if (is_ca) {
+ if (gen4_blend_op[op].src_alpha)
+ base = WM_KERNEL_MASKCA_SRCALPHA;
+ else
+ base = WM_KERNEL_MASKCA;
+ } else
+ base = WM_KERNEL_MASK;
+ } else
+ base = WM_KERNEL;
+
+ return base + !is_affine;
+}
+
+static void gen4_magic_ca_pass(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ struct gen4_render_state *state = &sna->render_state.gen4;
+
+ if (!op->need_magic_ca_pass)
+ return;
+
+ DBG(("%s: CA fixup\n", __FUNCTION__));
+
+ gen4_emit_pipelined_pointers
+ (sna, op, PictOpAdd,
+ gen4_choose_composite_kernel(PictOpAdd,
+ TRUE, TRUE, op->is_affine));
+
+ OUT_BATCH(GEN4_3DPRIMITIVE |
+ GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+ (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+ (0 << 9) |
+ 4);
+ OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
+ OUT_BATCH(sna->render.vertex_start);
+ OUT_BATCH(1); /* single instance */
+ OUT_BATCH(0); /* start instance location */
+ OUT_BATCH(0); /* index buffer offset, ignored */
+
+ state->last_primitive = sna->kgem.nbatch;
+}
+
+static void gen4_vertex_flush(struct sna *sna)
+{
+ if (sna->render_state.gen4.vertex_offset == 0)
+ return;
+
+ DBG(("%s[%x] = %d\n", __FUNCTION__,
+ 4*sna->render_state.gen4.vertex_offset,
+ sna->render.vertex_index - sna->render.vertex_start));
+ sna->kgem.batch[sna->render_state.gen4.vertex_offset] =
+ sna->render.vertex_index - sna->render.vertex_start;
+ sna->render_state.gen4.vertex_offset = 0;
+
+ if (sna->render.op)
+ gen4_magic_ca_pass(sna, sna->render.op);
+}
+
+static void gen4_vertex_finish(struct sna *sna, Bool last)
+{
+ struct kgem_bo *bo;
+ int i, delta;
+
+ gen4_vertex_flush(sna);
+ if (!sna->render.vertex_used)
+ return;
+
+ /* Note: we only need dword alignment (currently) */
+
+ if (last && sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
+ DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
+ sna->render.vertex_used, sna->kgem.nbatch));
+ memcpy(sna->kgem.batch + sna->kgem.nbatch,
+ sna->render.vertex_data,
+ sna->render.vertex_used * 4);
+ delta = sna->kgem.nbatch * 4;
+ bo = NULL;
+ sna->kgem.nbatch += sna->render.vertex_used;
+ } else {
+ bo = kgem_create_linear(&sna->kgem, 4*sna->render.vertex_used);
+ if (bo && !kgem_bo_write(&sna->kgem, bo,
+ sna->render.vertex_data,
+ 4*sna->render.vertex_used)) {
+ kgem_bo_destroy(&sna->kgem, bo);
+ return;
+ }
+ delta = 0;
+ DBG(("%s: new vbo: %d\n", __FUNCTION__,
+ sna->render.vertex_used));
+ }
+
+ for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) {
+ if (sna->render.vertex_reloc[i]) {
+ DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
+ i, sna->render.vertex_reloc[i]));
+
+ sna->kgem.batch[sna->render.vertex_reloc[i]] =
+ kgem_add_reloc(&sna->kgem,
+ sna->render.vertex_reloc[i],
+ bo,
+ I915_GEM_DOMAIN_VERTEX << 16,
+ delta);
+ sna->render.vertex_reloc[i] = 0;
+ }
+ }
+
+ if (bo)
+ kgem_bo_destroy(&sna->kgem, bo);
+
+ sna->render.vertex_used = 0;
+ sna->render.vertex_index = 0;
+ sna->render_state.gen4.vb_id = 0;
+}
+
+static uint32_t gen4_get_blend(int op,
+ Bool has_component_alpha,
+ uint32_t dst_format)
+{
+ uint32_t src, dst;
+
+ src = gen4_blend_op[op].src_blend;
+ dst = gen4_blend_op[op].dst_blend;
+
+ /* If there's no dst alpha channel, adjust the blend op so that we'll treat
+ * it as always 1.
+ */
+ if (PICT_FORMAT_A(dst_format) == 0) {
+ if (src == GEN4_BLENDFACTOR_DST_ALPHA)
+ src = GEN4_BLENDFACTOR_ONE;
+ else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA)
+ src = GEN4_BLENDFACTOR_ZERO;
+ }
+
+ /* If the source alpha is being used, then we should only be in a
+ * case where the source blend factor is 0, and the source blend
+ * value is the mask channels multiplied by the source picture's alpha.
+ */
+ if (has_component_alpha && gen4_blend_op[op].src_alpha) {
+ if (dst == GEN4_BLENDFACTOR_SRC_ALPHA)
+ dst = GEN4_BLENDFACTOR_SRC_COLOR;
+ else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA)
+ dst = GEN4_BLENDFACTOR_INV_SRC_COLOR;
+ }
+
+ DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
+ op, dst_format, PICT_FORMAT_A(dst_format),
+ src, dst, BLEND_OFFSET(src, dst)));
+ return BLEND_OFFSET(src, dst);
+}
+
+static uint32_t gen4_get_dest_format(PictFormat format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ default:
+ return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
+ case PICT_a8b8g8r8:
+ case PICT_x8b8g8r8:
+ return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
+ case PICT_r5g6b5:
+ return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
+ case PICT_x1r5g5b5:
+ case PICT_a1r5g5b5:
+ return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
+ case PICT_a8:
+ return GEN4_SURFACEFORMAT_A8_UNORM;
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
+ }
+}
+
+static Bool gen4_check_dst_format(PictFormat format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ case PICT_a8b8g8r8:
+ case PICT_x8b8g8r8:
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ case PICT_r5g6b5:
+ case PICT_x1r5g5b5:
+ case PICT_a1r5g5b5:
+ case PICT_a8:
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static bool gen4_format_is_dst(uint32_t format)
+{
+ switch (format) {
+ case GEN4_SURFACEFORMAT_B8G8R8A8_UNORM:
+ case GEN4_SURFACEFORMAT_R8G8B8A8_UNORM:
+ case GEN4_SURFACEFORMAT_B10G10R10A2_UNORM:
+ case GEN4_SURFACEFORMAT_B5G6R5_UNORM:
+ case GEN4_SURFACEFORMAT_B5G5R5A1_UNORM:
+ case GEN4_SURFACEFORMAT_A8_UNORM:
+ case GEN4_SURFACEFORMAT_B4G4R4A4_UNORM:
+ return true;
+ default:
+ return false;
+ }
+}
+
+typedef struct gen4_surface_state_padded {
+ struct gen4_surface_state state;
+ char pad[32 - sizeof(struct gen4_surface_state)];
+} gen4_surface_state_padded;
+
+static void null_create(struct sna_static_stream *stream)
+{
+ /* A bunch of zeros useful for legacy border color and depth-stencil */
+ sna_static_stream_map(stream, 64, 64);
+}
+
+static void
+sampler_state_init(struct gen4_sampler_state *sampler_state,
+ sampler_filter_t filter,
+ sampler_extend_t extend)
+{
+ sampler_state->ss0.lod_preclamp = 1; /* GL mode */
+
+ /* We use the legacy mode to get the semantics specified by
+ * the Render extension. */
+ sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
+
+ switch (filter) {
+ default:
+ case SAMPLER_FILTER_NEAREST:
+ sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
+ sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
+ break;
+ case SAMPLER_FILTER_BILINEAR:
+ sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
+ sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
+ break;
+ }
+
+ switch (extend) {
+ default:
+ case SAMPLER_EXTEND_NONE:
+ sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
+ sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
+ sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
+ break;
+ case SAMPLER_EXTEND_REPEAT:
+ sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
+ sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
+ sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
+ break;
+ case SAMPLER_EXTEND_PAD:
+ sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
+ sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
+ sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
+ break;
+ case SAMPLER_EXTEND_REFLECT:
+ sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
+ sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
+ sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
+ break;
+ }
+}
+
+static uint32_t gen4_get_card_format(PictFormat format)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(gen4_tex_formats); i++) {
+ if (gen4_tex_formats[i].pict_fmt == format)
+ return gen4_tex_formats[i].card_fmt;
+ }
+ return -1;
+}
+
+static uint32_t gen4_filter(uint32_t filter)
+{
+ switch (filter) {
+ default:
+ assert(0);
+ case PictFilterNearest:
+ return SAMPLER_FILTER_NEAREST;
+ case PictFilterBilinear:
+ return SAMPLER_FILTER_BILINEAR;
+ }
+}
+
+static uint32_t gen4_check_filter(PicturePtr picture)
+{
+ switch (picture->filter) {
+ case PictFilterNearest:
+ case PictFilterBilinear:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static uint32_t gen4_repeat(uint32_t repeat)
+{
+ switch (repeat) {
+ default:
+ assert(0);
+ case RepeatNone:
+ return SAMPLER_EXTEND_NONE;
+ case RepeatNormal:
+ return SAMPLER_EXTEND_REPEAT;
+ case RepeatPad:
+ return SAMPLER_EXTEND_PAD;
+ case RepeatReflect:
+ return SAMPLER_EXTEND_REFLECT;
+ }
+}
+
+static bool gen4_check_repeat(PicturePtr picture)
+{
+ if (!picture->repeat)
+ return TRUE;
+
+ switch (picture->repeatType) {
+ case RepeatNone:
+ case RepeatNormal:
+ case RepeatPad:
+ case RepeatReflect:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+/**
+ * Sets up the common fields for a surface state buffer for the given
+ * picture in the given surface state buffer.
+ */
+static int
+gen4_bind_bo(struct sna *sna,
+ struct kgem_bo *bo,
+ uint32_t width,
+ uint32_t height,
+ uint32_t format,
+ Bool is_dst)
+{
+ struct gen4_surface_state *ss;
+ uint32_t domains;
+ uint16_t offset;
+
+ /* After the first bind, we manage the cache domains within the batch */
+ if (is_dst) {
+ domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
+ kgem_bo_mark_dirty(bo);
+ } else {
+ domains = I915_GEM_DOMAIN_SAMPLER << 16;
+ is_dst = gen4_format_is_dst(format);
+ }
+
+ offset = sna->kgem.surface - sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
+ offset *= sizeof(uint32_t);
+
+ if (is_dst) {
+ if (bo->dst_bound)
+ return bo->dst_bound;
+
+ bo->dst_bound = offset;
+ } else {
+ if (bo->src_bound)
+ return bo->src_bound;
+
+ bo->src_bound = offset;
+ }
+
+ sna->kgem.surface -=
+ sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
+ ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
+
+ ss->ss0.surface_type = GEN4_SURFACE_2D;
+ ss->ss0.surface_format = format;
+
+ ss->ss0.data_return_format = GEN4_SURFACERETURNFORMAT_FLOAT32;
+ ss->ss0.color_blend = 1;
+ ss->ss1.base_addr =
+ kgem_add_reloc(&sna->kgem,
+ sna->kgem.surface + 1,
+ bo, domains, 0);
+
+ ss->ss2.height = height - 1;
+ ss->ss2.width = width - 1;
+ ss->ss3.pitch = bo->pitch - 1;
+ ss->ss3.tiled_surface = bo->tiling != I915_TILING_NONE;
+ ss->ss3.tile_walk = bo->tiling == I915_TILING_Y;
+
+ DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
+ offset, bo->handle, ss->ss1.base_addr,
+ ss->ss0.surface_format, width, height, bo->pitch, bo->tiling,
+ domains & 0xffff ? "render" : "sampler"));
+
+ return offset;
+}
+
+fastcall static void
+gen4_emit_composite_primitive_solid(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = 1.;
+ v[2] = 1.;
+
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ v[4] = 0.;
+ v[5] = 1.;
+
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+ v[7] = 0.;
+ v[8] = 0.;
+}
+
+fastcall static void
+gen4_emit_composite_primitive_identity_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ const float *sf = op->src.scale;
+ float sx, sy, *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ sx = r->src.x + op->src.offset[0];
+ sy = r->src.y + op->src.offset[1];
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = (sx + r->width) * sf[0];
+ v[2] = (sy + r->height) * sf[1];
+
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ v[4] = sx * sf[0];
+ v[5] = v[2];
+
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+ v[7] = v[4];
+ v[8] = sy * sf[1];
+}
+
+fastcall static void
+gen4_emit_composite_primitive_affine_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float *v;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x + r->width,
+ op->src.offset[1] + r->src.y + r->height,
+ op->src.transform,
+ &v[1], &v[2]);
+ v[1] *= op->src.scale[0];
+ v[2] *= op->src.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
+ op->src.offset[1] + r->src.y + r->height,
+ op->src.transform,
+ &v[4], &v[5]);
+ v[4] *= op->src.scale[0];
+ v[5] *= op->src.scale[1];
+
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+ _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
+ op->src.offset[1] + r->src.y,
+ op->src.transform,
+ &v[7], &v[8]);
+ v[7] *= op->src.scale[0];
+ v[8] *= op->src.scale[1];
+}
+
+fastcall static void
+gen4_emit_composite_primitive_identity_source_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float src_x, src_y;
+ float msk_x, msk_y;
+ float w, h;
+ float *v;
+
+ src_x = r->src.x + op->src.offset[0];
+ src_y = r->src.y + op->src.offset[1];
+ msk_x = r->mask.x + op->mask.offset[0];
+ msk_y = r->mask.y + op->mask.offset[1];
+ w = r->width;
+ h = r->height;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 15;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = (src_x + w) * op->src.scale[0];
+ v[2] = (src_y + h) * op->src.scale[1];
+ v[3] = (msk_x + w) * op->mask.scale[0];
+ v[4] = (msk_y + h) * op->mask.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[5] = dst.f;
+ v[6] = src_x * op->src.scale[0];
+ v[7] = v[2];
+ v[8] = msk_x * op->mask.scale[0];
+ v[9] = v[4];
+
+ dst.p.y = r->dst.y;
+ v[10] = dst.f;
+ v[11] = v[6];
+ v[12] = src_y * op->src.scale[1];
+ v[13] = v[8];
+ v[14] = msk_y * op->mask.scale[1];
+}
+
+fastcall static void
+gen4_emit_composite_primitive(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
+ Bool is_affine = op->is_affine;
+ const float *src_sf = op->src.scale;
+ const float *mask_sf = op->mask.scale;
+
+ if (is_affine) {
+ sna_get_transformed_coordinates(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1],
+ op->src.transform,
+ &src_x[0],
+ &src_y[0]);
+
+ sna_get_transformed_coordinates(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[1],
+ &src_y[1]);
+
+ sna_get_transformed_coordinates(r->src.x + op->src.offset[0] + r->width,
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[2],
+ &src_y[2]);
+ } else {
+ if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1],
+ op->src.transform,
+ &src_x[0],
+ &src_y[0],
+ &src_w[0]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[1],
+ &src_y[1],
+ &src_w[1]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0] + r->width,
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[2],
+ &src_y[2],
+ &src_w[2]))
+ return;
+ }
+
+ if (op->mask.bo) {
+ if (is_affine) {
+ sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1],
+ op->mask.transform,
+ &mask_x[0],
+ &mask_y[0]);
+
+ sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[1],
+ &mask_y[1]);
+
+ sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0] + r->width,
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[2],
+ &mask_y[2]);
+ } else {
+ if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1],
+ op->mask.transform,
+ &mask_x[0],
+ &mask_y[0],
+ &mask_w[0]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[1],
+ &mask_y[1],
+ &mask_w[1]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0] + r->width,
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[2],
+ &mask_y[2],
+ &mask_w[2]))
+ return;
+ }
+ }
+
+ OUT_VERTEX(r->dst.x + r->width, r->dst.y + r->height);
+ OUT_VERTEX_F(src_x[2] * src_sf[0]);
+ OUT_VERTEX_F(src_y[2] * src_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(src_w[2]);
+ if (op->mask.bo) {
+ OUT_VERTEX_F(mask_x[2] * mask_sf[0]);
+ OUT_VERTEX_F(mask_y[2] * mask_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(mask_w[2]);
+ }
+
+ OUT_VERTEX(r->dst.x, r->dst.y + r->height);
+ OUT_VERTEX_F(src_x[1] * src_sf[0]);
+ OUT_VERTEX_F(src_y[1] * src_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(src_w[1]);
+ if (op->mask.bo) {
+ OUT_VERTEX_F(mask_x[1] * mask_sf[0]);
+ OUT_VERTEX_F(mask_y[1] * mask_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(mask_w[1]);
+ }
+
+ OUT_VERTEX(r->dst.x, r->dst.y);
+ OUT_VERTEX_F(src_x[0] * src_sf[0]);
+ OUT_VERTEX_F(src_y[0] * src_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(src_w[0]);
+ if (op->mask.bo) {
+ OUT_VERTEX_F(mask_x[0] * mask_sf[0]);
+ OUT_VERTEX_F(mask_y[0] * mask_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(mask_w[0]);
+ }
+}
+
+static void gen4_emit_vertex_buffer(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ int id = op->u.gen4.ve_id;
+
+ OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3);
+ OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
+ (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
+ sna->render.vertex_reloc[id] = sna->kgem.nbatch;
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ sna->render_state.gen4.vb_id |= 1 << id;
+}
+
+static void gen4_emit_primitive(struct sna *sna)
+{
+ if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) {
+ sna->render_state.gen4.vertex_offset = sna->kgem.nbatch - 5;
+ return;
+ }
+
+ OUT_BATCH(GEN4_3DPRIMITIVE |
+ GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+ (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+ (0 << 9) |
+ 4);
+ sna->render_state.gen4.vertex_offset = sna->kgem.nbatch;
+ OUT_BATCH(0); /* vertex count, to be filled in later */
+ OUT_BATCH(sna->render.vertex_index);
+ OUT_BATCH(1); /* single instance */
+ OUT_BATCH(0); /* start instance location */
+ OUT_BATCH(0); /* index buffer offset, ignored */
+ sna->render.vertex_start = sna->render.vertex_index;
+
+ sna->render_state.gen4.last_primitive = sna->kgem.nbatch;
+}
+
+static bool gen4_rectangle_begin(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ int id = op->u.gen4.ve_id;
+ int ndwords;
+
+ ndwords = 0;
+ if ((sna->render_state.gen4.vb_id & (1 << id)) == 0)
+ ndwords += 5;
+ if (sna->render_state.gen4.vertex_offset == 0)
+ ndwords += op->need_magic_ca_pass ? 20 : 6;
+ if (ndwords == 0)
+ return true;
+
+ if (!kgem_check_batch(&sna->kgem, ndwords))
+ return false;
+
+ if ((sna->render_state.gen4.vb_id & (1 << id)) == 0)
+ gen4_emit_vertex_buffer(sna, op);
+ if (sna->render_state.gen4.vertex_offset == 0)
+ gen4_emit_primitive(sna);
+
+ return true;
+}
+
+static int gen4_get_rectangles__flush(struct sna *sna)
+{
+ if (!kgem_check_batch(&sna->kgem, 25))
+ return 0;
+ if (sna->kgem.nexec > KGEM_EXEC_SIZE(&sna->kgem) - 1)
+ return 0;
+ if (sna->kgem.nreloc > KGEM_RELOC_SIZE(&sna->kgem) - 1)
+ return 0;
+
+ gen4_vertex_finish(sna, FALSE);
+ sna->render.vertex_index = 0;
+
+ return ARRAY_SIZE(sna->render.vertex_data);
+}
+
+inline static int gen4_get_rectangles(struct sna *sna,
+ const struct sna_composite_op *op,
+ int want)
+{
+ int rem = vertex_space(sna);
+
+ if (rem < 3*op->floats_per_vertex) {
+ DBG(("flushing vbo for %s: %d < %d\n",
+ __FUNCTION__, rem, 3*op->floats_per_vertex));
+ rem = gen4_get_rectangles__flush(sna);
+ if (rem == 0)
+ return 0;
+ }
+
+ if (!gen4_rectangle_begin(sna, op))
+ return 0;
+
+ if (want * op->floats_per_vertex*3 > rem)
+ want = rem / (3*op->floats_per_vertex);
+
+ sna->render.vertex_index += 3*want;
+ return want;
+}
+
+static uint32_t *gen4_composite_get_binding_table(struct sna *sna,
+ const struct sna_composite_op *op,
+ uint16_t *offset)
+{
+ uint32_t *table;
+
+ sna->kgem.surface -=
+ sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
+ /* Clear all surplus entries to zero in case of prefetch */
+ table = memset(sna->kgem.batch + sna->kgem.surface,
+ 0, sizeof(struct gen4_surface_state_padded));
+
+ *offset = sna->kgem.surface;
+
+ DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
+
+ return table;
+}
+
+static void
+gen4_emit_sip(struct sna *sna)
+{
+ /* Set system instruction pointer */
+ OUT_BATCH(GEN4_STATE_SIP | 0);
+ OUT_BATCH(0);
+}
+
+static void
+gen4_emit_urb(struct sna *sna)
+{
+ int urb_vs_start, urb_vs_size;
+ int urb_gs_start, urb_gs_size;
+ int urb_clip_start, urb_clip_size;
+ int urb_sf_start, urb_sf_size;
+ int urb_cs_start, urb_cs_size;
+
+ urb_vs_start = 0;
+ urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
+ urb_gs_start = urb_vs_start + urb_vs_size;
+ urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
+ urb_clip_start = urb_gs_start + urb_gs_size;
+ urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
+ urb_sf_start = urb_clip_start + urb_clip_size;
+ urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
+ urb_cs_start = urb_sf_start + urb_sf_size;
+ urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
+
+ OUT_BATCH(GEN4_URB_FENCE |
+ UF0_CS_REALLOC |
+ UF0_SF_REALLOC |
+ UF0_CLIP_REALLOC |
+ UF0_GS_REALLOC |
+ UF0_VS_REALLOC |
+ 1);
+ OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
+ ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
+ ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
+ OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
+ ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
+
+ /* Constant buffer state */
+ OUT_BATCH(GEN4_CS_URB_STATE | 0);
+ OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
+}
+
+static void
+gen4_emit_state_base_address(struct sna *sna)
+{
+ assert(sna->render_state.gen4.general_bo->proxy == NULL);
+ OUT_BATCH(GEN4_STATE_BASE_ADDRESS | 4);
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
+ sna->kgem.nbatch,
+ sna->render_state.gen4.general_bo,
+ I915_GEM_DOMAIN_INSTRUCTION << 16,
+ BASE_ADDRESS_MODIFY));
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
+ sna->kgem.nbatch,
+ NULL,
+ I915_GEM_DOMAIN_INSTRUCTION << 16,
+ BASE_ADDRESS_MODIFY));
+ OUT_BATCH(0); /* media */
+
+ /* upper bounds, all disabled */
+ OUT_BATCH(BASE_ADDRESS_MODIFY);
+ OUT_BATCH(0);
+}
+
+static void
+gen4_emit_invariant(struct sna *sna)
+{
+ OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
+ if (sna->kgem.gen >= 45)
+ OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+ else
+ OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+ gen4_emit_sip(sna);
+ gen4_emit_state_base_address(sna);
+
+ sna->render_state.gen4.needs_invariant = FALSE;
+}
+
+static void
+gen4_get_batch(struct sna *sna)
+{
+ kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
+ DBG(("%s: flushing batch: %d < %d+%d\n",
+ __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
+ 150, 4*8));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->render_state.gen4.needs_invariant)
+ gen4_emit_invariant(sna);
+}
+
+static void
+gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op)
+{
+ if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) {
+ DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
+ sna->render_state.gen4.floats_per_vertex,
+ op->floats_per_vertex,
+ sna->render.vertex_index,
+ (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
+ sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
+ sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
+ sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex;
+ }
+}
+
+static void
+gen4_emit_binding_table(struct sna *sna, uint16_t offset)
+{
+ if (sna->render_state.gen4.surface_table == offset)
+ return;
+
+ sna->render_state.gen4.surface_table = offset;
+
+ /* Binding table pointers */
+ OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | 4);
+ OUT_BATCH(0); /* vs */
+ OUT_BATCH(0); /* gs */
+ OUT_BATCH(0); /* clip */
+ OUT_BATCH(0); /* sf */
+ /* Only the PS uses the binding table */
+ OUT_BATCH(offset*4);
+}
+
+static bool
+gen4_emit_pipelined_pointers(struct sna *sna,
+ const struct sna_composite_op *op,
+ int blend, int kernel)
+{
+ uint16_t offset = sna->kgem.nbatch, last;
+
+ OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | 5);
+ OUT_BATCH(sna->render_state.gen4.vs);
+ OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
+ OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
+ OUT_BATCH(sna->render_state.gen4.sf[op->mask.bo != NULL]);
+ OUT_BATCH(sna->render_state.gen4.wm +
+ SAMPLER_OFFSET(op->src.filter, op->src.repeat,
+ op->mask.filter, op->mask.repeat,
+ kernel));
+ OUT_BATCH(sna->render_state.gen4.cc +
+ gen4_get_blend(blend, op->has_component_alpha, op->dst.format));
+
+ last = sna->render_state.gen4.last_pipelined_pointers;
+ if (last &&
+ sna->kgem.batch[offset + 1] == sna->kgem.batch[last + 1] &&
+ sna->kgem.batch[offset + 3] == sna->kgem.batch[last + 3] &&
+ sna->kgem.batch[offset + 4] == sna->kgem.batch[last + 4] &&
+ sna->kgem.batch[offset + 5] == sna->kgem.batch[last + 5] &&
+ sna->kgem.batch[offset + 6] == sna->kgem.batch[last + 6]) {
+ sna->kgem.nbatch = offset;
+ return false;
+ } else {
+ sna->render_state.gen4.last_pipelined_pointers = offset;
+ return true;
+ }
+}
+
+static void
+gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
+{
+ uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
+ uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
+
+ if (sna->render_state.gen4.drawrect_limit == limit &&
+ sna->render_state.gen4.drawrect_offset == offset)
+ return;
+ sna->render_state.gen4.drawrect_offset = offset;
+ sna->render_state.gen4.drawrect_limit = limit;
+
+ OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
+ OUT_BATCH(0x00000000);
+ OUT_BATCH(limit);
+ OUT_BATCH(offset);
+}
+
+static void
+gen4_emit_vertex_elements(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ /*
+ * vertex data in vertex buffer
+ * position: (x, y)
+ * texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0)
+ * texture coordinate 1 if (has_mask is TRUE): same as above
+ */
+ struct gen4_render_state *render = &sna->render_state.gen4;
+ Bool has_mask = op->mask.bo != NULL;
+ int nelem = has_mask ? 2 : 1;
+ int selem;
+ uint32_t w_component;
+ uint32_t src_format;
+ int id = op->u.gen4.ve_id;
+
+ if (render->ve_id == id)
+ return;
+
+ render->ve_id = id;
+
+ if (op->is_affine) {
+ src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
+ w_component = GEN4_VFCOMPONENT_STORE_1_FLT;
+ selem = 2;
+ } else {
+ src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
+ w_component = GEN4_VFCOMPONENT_STORE_SRC;
+ selem = 3;
+ }
+
+ /* The VUE layout
+ * dword 0-3: position (x, y, 1.0, 1.0),
+ * dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
+ * [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
+ */
+ OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + nelem) - 1));
+
+ /* x,y */
+ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+ GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
+ 0 << VE0_OFFSET_SHIFT); /* offsets vb in bytes */
+ OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
+ GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
+ GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
+ GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
+ (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */
+
+ /* u0, v0, w0 */
+ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+ src_format << VE0_FORMAT_SHIFT |
+ 4 << VE0_OFFSET_SHIFT); /* offset vb in bytes */
+ OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
+ GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
+ w_component << VE1_VFCOMPONENT_2_SHIFT |
+ GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
+ (2*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */
+
+ /* u1, v1, w1 */
+ if (has_mask) {
+ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+ src_format << VE0_FORMAT_SHIFT |
+ ((1 + selem) * 4) << VE0_OFFSET_SHIFT); /* vb offset in bytes */
+ OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
+ GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
+ w_component << VE1_VFCOMPONENT_2_SHIFT |
+ GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
+ (3*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */
+ }
+}
+
+static void
+gen4_emit_state(struct sna *sna,
+ const struct sna_composite_op *op,
+ uint16_t wm_binding_table)
+{
+ gen4_emit_binding_table(sna, wm_binding_table);
+ if (gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel))
+ gen4_emit_urb(sna);
+ gen4_emit_vertex_elements(sna, op);
+ gen4_emit_drawing_rectangle(sna, op);
+}
+
+static void
+gen4_bind_surfaces(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ uint32_t *binding_table;
+ uint16_t offset;
+
+ gen4_get_batch(sna);
+
+ binding_table = gen4_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen4_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen4_get_dest_format(op->dst.format),
+ TRUE);
+ binding_table[1] =
+ gen4_bind_bo(sna,
+ op->src.bo, op->src.width, op->src.height,
+ op->src.card_format,
+ FALSE);
+ if (op->mask.bo)
+ binding_table[2] =
+ gen4_bind_bo(sna,
+ op->mask.bo,
+ op->mask.width,
+ op->mask.height,
+ op->mask.card_format,
+ FALSE);
+
+ gen4_emit_state(sna, op, offset);
+}
+
+fastcall static void
+gen4_render_composite_blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
+ __FUNCTION__,
+ r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
+ r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
+ r->dst.x, r->dst.y, op->dst.x, op->dst.y,
+ r->width, r->height));
+
+ if (!gen4_get_rectangles(sna, op, 1)) {
+ gen4_bind_surfaces(sna, op);
+ gen4_get_rectangles(sna, op, 1);
+ }
+
+ op->prim_emit(sna, op, r);
+
+ /* XXX are the shaders fubar? */
+ FLUSH();
+}
+
+static void
+gen4_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
+ __FUNCTION__, nbox, op->dst.x, op->dst.y,
+ op->src.offset[0], op->src.offset[1],
+ op->src.width, op->src.height,
+ op->mask.offset[0], op->mask.offset[1],
+ op->mask.width, op->mask.height));
+
+ do {
+ struct sna_composite_rectangles r;
+
+ r.dst.x = box->x1;
+ r.dst.y = box->y1;
+ r.width = box->x2 - box->x1;
+ r.height = box->y2 - box->y1;
+ r.mask = r.src = r.dst;
+ gen4_render_composite_blt(sna, op, &r);
+ box++;
+ } while (--nbox);
+}
+
+#ifndef MAX
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#endif
+
+static uint32_t gen4_bind_video_source(struct sna *sna,
+ struct kgem_bo *src_bo,
+ uint32_t src_offset,
+ int src_width,
+ int src_height,
+ int src_pitch,
+ uint32_t src_surf_format)
+{
+ struct gen4_surface_state *ss;
+
+ sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
+
+ ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
+ ss->ss0.surface_type = GEN4_SURFACE_2D;
+ ss->ss0.surface_format = src_surf_format;
+ ss->ss0.color_blend = 1;
+
+ ss->ss1.base_addr =
+ kgem_add_reloc(&sna->kgem,
+ sna->kgem.surface + 1,
+ src_bo,
+ I915_GEM_DOMAIN_SAMPLER << 16,
+ src_offset);
+
+ ss->ss2.width = src_width - 1;
+ ss->ss2.height = src_height - 1;
+ ss->ss3.pitch = src_pitch - 1;
+
+ return sna->kgem.surface * sizeof(uint32_t);
+}
+
+static void gen4_video_bind_surfaces(struct sna *sna,
+ const struct sna_composite_op *op,
+ struct sna_video_frame *frame)
+{
+ uint32_t src_surf_format;
+ uint32_t src_surf_base[6];
+ int src_width[6];
+ int src_height[6];
+ int src_pitch[6];
+ uint32_t *binding_table;
+ uint16_t offset;
+ int n_src, n;
+
+ src_surf_base[0] = frame->YBufOffset;
+ src_surf_base[1] = frame->YBufOffset;
+ src_surf_base[2] = frame->VBufOffset;
+ src_surf_base[3] = frame->VBufOffset;
+ src_surf_base[4] = frame->UBufOffset;
+ src_surf_base[5] = frame->UBufOffset;
+
+ if (is_planar_fourcc(frame->id)) {
+ src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM;
+ src_width[1] = src_width[0] = frame->width;
+ src_height[1] = src_height[0] = frame->height;
+ src_pitch[1] = src_pitch[0] = frame->pitch[1];
+ src_width[4] = src_width[5] = src_width[2] = src_width[3] =
+ frame->width / 2;
+ src_height[4] = src_height[5] = src_height[2] = src_height[3] =
+ frame->height / 2;
+ src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
+ frame->pitch[0];
+ n_src = 6;
+ } else {
+ if (frame->id == FOURCC_UYVY)
+ src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY;
+ else
+ src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL;
+
+ src_width[0] = frame->width;
+ src_height[0] = frame->height;
+ src_pitch[0] = frame->pitch[0];
+ n_src = 1;
+ }
+
+ gen4_get_batch(sna);
+
+ binding_table = gen4_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen4_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen4_get_dest_format(op->dst.format),
+ TRUE);
+ for (n = 0; n < n_src; n++) {
+ binding_table[1+n] =
+ gen4_bind_video_source(sna,
+ frame->bo,
+ src_surf_base[n],
+ src_width[n],
+ src_height[n],
+ src_pitch[n],
+ src_surf_format);
+ }
+
+ gen4_emit_state(sna, op, offset);
+}
+
+static Bool
+gen4_render_video(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ RegionPtr dstRegion,
+ short src_w, short src_h,
+ short drw_w, short drw_h,
+ PixmapPtr pixmap)
+{
+ struct sna_composite_op tmp;
+ int nbox, dxo, dyo, pix_xoff, pix_yoff;
+ float src_scale_x, src_scale_y;
+ struct sna_pixmap *priv;
+ BoxPtr box;
+
+ DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, src_w, src_h, drw_w, drw_h));
+
+ priv = sna_pixmap_force_to_gpu(pixmap);
+ if (priv == NULL)
+ return FALSE;
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.op = PictOpSrc;
+ tmp.dst.pixmap = pixmap;
+ tmp.dst.width = pixmap->drawable.width;
+ tmp.dst.height = pixmap->drawable.height;
+ tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
+ tmp.dst.bo = priv->gpu_bo;
+
+ tmp.src.filter = SAMPLER_FILTER_BILINEAR;
+ tmp.src.repeat = SAMPLER_EXTEND_NONE;
+ tmp.u.gen4.wm_kernel =
+ is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+ tmp.u.gen4.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, tmp.dst.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, frame->bo))
+ kgem_submit(&sna->kgem);
+
+ if (!kgem_bo_is_dirty(frame->bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen4_video_bind_surfaces(sna, &tmp, frame);
+ gen4_align_vertex(sna, &tmp);
+
+ /* Set up the offset for translating from the given region (in screen
+ * coordinates) to the backing pixmap.
+ */
+#ifdef COMPOSITE
+ pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
+ pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
+#else
+ pix_xoff = 0;
+ pix_yoff = 0;
+#endif
+
+ dxo = dstRegion->extents.x1;
+ dyo = dstRegion->extents.y1;
+
+ /* Use normalized texture coordinates */
+ src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
+ src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
+
+ box = REGION_RECTS(dstRegion);
+ nbox = REGION_NUM_RECTS(dstRegion);
+ while (nbox--) {
+ BoxRec r;
+
+ r.x1 = box->x1 + pix_xoff;
+ r.x2 = box->x2 + pix_xoff;
+ r.y1 = box->y1 + pix_yoff;
+ r.y2 = box->y2 + pix_yoff;
+
+ if (!gen4_get_rectangles(sna, &tmp, 1)) {
+ gen4_video_bind_surfaces(sna, &tmp, frame);
+ gen4_get_rectangles(sna, &tmp, 1);
+ }
+
+ OUT_VERTEX(r.x2, r.y2);
+ OUT_VERTEX_F((box->x2 - dxo) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+
+ OUT_VERTEX(r.x1, r.y2);
+ OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+
+ OUT_VERTEX(r.x1, r.y1);
+ OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
+ OUT_VERTEX_F((box->y1 - dyo) * src_scale_y);
+
+ FLUSH();
+
+ sna_damage_add_box(&priv->gpu_damage, &r);
+ sna_damage_subtract_box(&priv->cpu_damage, &r);
+ box++;
+ }
+
+ return TRUE;
+}
+
+static Bool
+gen4_composite_solid_init(struct sna *sna,
+ struct sna_composite_channel *channel,
+ uint32_t color)
+{
+ channel->filter = PictFilterNearest;
+ channel->repeat = RepeatNormal;
+ channel->is_affine = TRUE;
+ channel->is_solid = TRUE;
+ channel->transform = NULL;
+ channel->width = 1;
+ channel->height = 1;
+ channel->card_format = GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+ channel->bo = sna_render_get_solid(sna, color);
+
+ channel->scale[0] = channel->scale[1] = 1;
+ channel->offset[0] = channel->offset[1] = 0;
+ return channel->bo != NULL;
+}
+
+static int
+gen4_composite_picture(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int x, int y,
+ int w, int h,
+ int dst_x, int dst_y)
+{
+ PixmapPtr pixmap;
+ uint32_t color;
+ int16_t dx, dy;
+
+ DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
+ __FUNCTION__, x, y, w, h, dst_x, dst_y));
+
+ channel->is_solid = FALSE;
+ channel->card_format = -1;
+
+ if (sna_picture_is_solid(picture, &color))
+ return gen4_composite_solid_init(sna, channel, color);
+
+ if (picture->pDrawable == NULL)
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ if (!gen4_check_repeat(picture))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ if (!gen4_check_filter(picture))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
+ channel->filter = picture->filter;
+
+ pixmap = get_drawable_pixmap(picture->pDrawable);
+ get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
+
+ x += dx + picture->pDrawable->x;
+ y += dy + picture->pDrawable->y;
+
+ channel->is_affine = sna_transform_is_affine(picture->transform);
+ if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
+ DBG(("%s: integer translation (%d, %d), removing\n",
+ __FUNCTION__, dx, dy));
+ x += dx;
+ y += dy;
+ channel->transform = NULL;
+ channel->filter = PictFilterNearest;
+ } else
+ channel->transform = picture->transform;
+
+ channel->card_format = gen4_get_card_format(picture->format);
+ if (channel->card_format == -1)
+ return sna_render_picture_convert(sna, picture, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+
+ if (pixmap->drawable.width > 8192 || pixmap->drawable.height > 8192)
+ return sna_render_picture_extract(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ return sna_render_pixmap_bo(sna, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+}
+
+static void gen4_composite_channel_convert(struct sna_composite_channel *channel)
+{
+ channel->repeat = gen4_repeat(channel->repeat);
+ channel->filter = gen4_filter(channel->filter);
+ if (channel->card_format == -1)
+ channel->card_format = gen4_get_card_format(channel->pict_format);
+}
+
+static void
+gen4_render_composite_done(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ gen4_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ sna->render.op = NULL;
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ sna_render_composite_redirect_done(sna, op);
+
+ if (op->src.bo)
+ kgem_bo_destroy(&sna->kgem, op->src.bo);
+ if (op->mask.bo)
+ kgem_bo_destroy(&sna->kgem, op->mask.bo);
+}
+
+static Bool
+gen4_composite_set_target(struct sna *sna,
+ PicturePtr dst,
+ struct sna_composite_op *op)
+{
+ struct sna_pixmap *priv;
+
+ if (!gen4_check_dst_format(dst->format)) {
+ DBG(("%s: incompatible render target format %08x\n",
+ __FUNCTION__, dst->format));
+ return FALSE;
+ }
+
+ op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
+ op->dst.width = op->dst.pixmap->drawable.width;
+ op->dst.height = op->dst.pixmap->drawable.height;
+ op->dst.format = dst->format;
+ priv = sna_pixmap_force_to_gpu(op->dst.pixmap);
+ if (priv == NULL)
+ return FALSE;
+
+ op->dst.bo = priv->gpu_bo;
+ if (!priv->gpu_only)
+ op->damage = &priv->gpu_damage;
+
+ get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
+ &op->dst.x, &op->dst.y);
+ return TRUE;
+}
+
+static inline Bool
+picture_is_cpu(PicturePtr picture)
+{
+ if (!picture->pDrawable)
+ return FALSE;
+
+ /* If it is a solid, try to use the render paths */
+ if (picture->pDrawable->width == 1 &&
+ picture->pDrawable->height == 1 &&
+ picture->repeat)
+ return FALSE;
+
+ return is_cpu(picture->pDrawable);
+}
+
+#if PREFER_BLT
+static inline bool prefer_blt(struct sna *sna)
+{
+ return true;
+}
+#else
+static inline bool prefer_blt(struct sna *sna)
+{
+ return sna->kgem.mode != KGEM_RENDER;
+}
+#endif
+
+static Bool
+try_blt(struct sna *sna,
+ PicturePtr dst,
+ PicturePtr source,
+ int width, int height)
+{
+ if (prefer_blt(sna)) {
+ DBG(("%s: already performing BLT\n", __FUNCTION__));
+ return TRUE;
+ }
+
+ if (width > 8192 || height > 8192) {
+ DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
+ __FUNCTION__, width, height));
+ return TRUE;
+ }
+
+ /* is the source picture only in cpu memory e.g. a shm pixmap? */
+ return picture_is_cpu(source);
+}
+
+static Bool
+gen4_render_composite(struct sna *sna,
+ uint8_t op,
+ PicturePtr src,
+ PicturePtr mask,
+ PicturePtr dst,
+ int16_t src_x, int16_t src_y,
+ int16_t msk_x, int16_t msk_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t width, int16_t height,
+ struct sna_composite_op *tmp)
+{
+ DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
+ width, height, sna->kgem.mode));
+
+ if (mask == NULL &&
+ try_blt(sna, dst, src, width, height) &&
+ sna_blt_composite(sna, op,
+ src, dst,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height, tmp))
+ return TRUE;
+
+ if (op >= ARRAY_SIZE(gen4_blend_op))
+ return FALSE;
+
+ if (need_tiling(sna, width, height))
+ return sna_tiling_composite(sna,
+ op, src, mask, dst,
+ src_x, src_y,
+ msk_x, msk_y,
+ dst_x, dst_y,
+ width, height,
+ tmp);
+
+ if (!gen4_composite_set_target(sna, dst, tmp))
+ return FALSE;
+
+ if (tmp->dst.width > 8192 || tmp->dst.height > 8192) {
+ if (!sna_render_composite_redirect(sna, tmp,
+ dst_x, dst_y, width, height))
+ return FALSE;
+ }
+
+ switch (gen4_composite_picture(sna, src, &tmp->src,
+ src_x, src_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ DBG(("%s: failed to prepare source\n", __FUNCTION__));
+ goto cleanup_dst;
+ case 0:
+ gen4_composite_solid_init(sna, &tmp->src, 0);
+ case 1:
+ gen4_composite_channel_convert(&tmp->src);
+ break;
+ }
+
+ tmp->op = op;
+ tmp->is_affine = tmp->src.is_affine;
+ tmp->has_component_alpha = FALSE;
+ tmp->need_magic_ca_pass = FALSE;
+
+ tmp->prim_emit = gen4_emit_composite_primitive;
+ if (mask) {
+ if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
+ tmp->has_component_alpha = TRUE;
+
+ /* Check if it's component alpha that relies on a source alpha and on
+ * the source value. We can only get one of those into the single
+ * source value that we get to blend with.
+ */
+ if (gen4_blend_op[op].src_alpha &&
+ (gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) {
+ if (op != PictOpOver) {
+ DBG(("%s -- fallback: unhandled component alpha blend\n",
+ __FUNCTION__));
+
+ goto cleanup_src;
+ }
+
+ tmp->need_magic_ca_pass = TRUE;
+ tmp->op = PictOpOutReverse;
+ }
+ }
+
+ switch (gen4_composite_picture(sna, mask, &tmp->mask,
+ msk_x, msk_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ DBG(("%s: failed to prepare mask\n", __FUNCTION__));
+ goto cleanup_src;
+ case 0:
+ gen4_composite_solid_init(sna, &tmp->mask, 0);
+ case 1:
+ gen4_composite_channel_convert(&tmp->mask);
+ break;
+ }
+
+ tmp->is_affine &= tmp->mask.is_affine;
+
+ if (tmp->src.transform == NULL && tmp->mask.transform == NULL)
+ tmp->prim_emit = gen4_emit_composite_primitive_identity_source_mask;
+
+ tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine;
+ } else {
+ if (tmp->src.is_solid)
+ tmp->prim_emit = gen4_emit_composite_primitive_solid;
+ else if (tmp->src.transform == NULL)
+ tmp->prim_emit = gen4_emit_composite_primitive_identity_source;
+ else if (tmp->src.is_affine)
+ tmp->prim_emit = gen4_emit_composite_primitive_affine_source;
+
+ tmp->mask.filter = SAMPLER_FILTER_NEAREST;
+ tmp->mask.repeat = SAMPLER_EXTEND_NONE;
+
+ tmp->floats_per_vertex = 3 + !tmp->is_affine;
+ }
+
+ tmp->u.gen4.wm_kernel =
+ gen4_choose_composite_kernel(tmp->op,
+ tmp->mask.bo != NULL,
+ tmp->has_component_alpha,
+ tmp->is_affine);
+ tmp->u.gen4.ve_id = (tmp->mask.bo != NULL) << 1 | tmp->is_affine;
+
+ tmp->blt = gen4_render_composite_blt;
+ tmp->boxes = gen4_render_composite_boxes;
+ tmp->done = gen4_render_composite_done;
+
+ if (!kgem_check_bo(&sna->kgem, tmp->dst.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->src.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->mask.bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen4_bind_surfaces(sna, tmp);
+ gen4_align_vertex(sna, tmp);
+
+ sna->render.op = tmp;
+ return TRUE;
+
+cleanup_src:
+ if (tmp->src.bo)
+ kgem_bo_destroy(&sna->kgem, tmp->src.bo);
+cleanup_dst:
+ if (tmp->redirect.real_bo)
+ kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
+ return FALSE;
+}
+
+static uint32_t gen4_get_dest_format_for_depth(int depth)
+{
+ switch (depth) {
+ case 32:
+ case 24:
+ default: return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
+ case 30: return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
+ case 16: return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
+ case 8: return GEN4_SURFACEFORMAT_A8_UNORM;
+ }
+}
+
+static uint32_t gen4_get_card_format_for_depth(int depth)
+{
+ switch (depth) {
+ case 32:
+ default: return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
+ case 30: return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
+ case 24: return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
+ case 16: return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
+ case 8: return GEN4_SURFACEFORMAT_A8_UNORM;
+ }
+}
+
+static void
+gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
+{
+ uint32_t *binding_table;
+ uint16_t offset;
+
+ gen4_get_batch(sna);
+
+ binding_table = gen4_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen4_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen4_get_dest_format_for_depth(op->dst.pixmap->drawable.depth),
+ TRUE);
+ binding_table[1] =
+ gen4_bind_bo(sna,
+ op->src.bo, op->src.width, op->src.height,
+ op->src.card_format,
+ FALSE);
+
+ if (sna->kgem.surface == offset &&
+ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table) {
+ sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
+ offset = sna->render_state.gen4.surface_table;
+ }
+
+ gen4_emit_state(sna, op, offset);
+}
+
+static void
+gen4_render_copy_one(struct sna *sna,
+ const struct sna_composite_op *op,
+ int sx, int sy,
+ int w, int h,
+ int dx, int dy)
+{
+ if (!gen4_get_rectangles(sna, op, 1)) {
+ gen4_copy_bind_surfaces(sna, op);
+ gen4_get_rectangles(sna, op, 1);
+ }
+
+ OUT_VERTEX(dx+w, dy+h);
+ OUT_VERTEX_F((sx+w)*op->src.scale[0]);
+ OUT_VERTEX_F((sy+h)*op->src.scale[1]);
+
+ OUT_VERTEX(dx, dy+h);
+ OUT_VERTEX_F(sx*op->src.scale[0]);
+ OUT_VERTEX_F((sy+h)*op->src.scale[1]);
+
+ OUT_VERTEX(dx, dy);
+ OUT_VERTEX_F(sx*op->src.scale[0]);
+ OUT_VERTEX_F(sy*op->src.scale[1]);
+
+ FLUSH();
+}
+
+static Bool
+gen4_render_copy_boxes(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+ PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int n)
+{
+ struct sna_composite_op tmp;
+
+ if (prefer_blt(sna) &&
+ sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
+ src->drawable.width > 8192 || src->drawable.height > 8192 ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n);
+
+ DBG(("%s (%d, %d)->(%d, %d) x %d\n",
+ __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
+
+ tmp.dst.pixmap = dst;
+ tmp.dst.width = dst->drawable.width;
+ tmp.dst.height = dst->drawable.height;
+ tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
+ tmp.dst.bo = dst_bo;
+
+ tmp.src.bo = src_bo;
+ tmp.src.filter = SAMPLER_FILTER_NEAREST;
+ tmp.src.repeat = SAMPLER_EXTEND_NONE;
+ tmp.src.card_format =
+ gen4_get_card_format_for_depth(src->drawable.depth),
+ tmp.src.width = src->drawable.width;
+ tmp.src.height = src->drawable.height;
+
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+ tmp.u.gen4.wm_kernel = WM_KERNEL;
+ tmp.u.gen4.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, src_bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(src_bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen4_copy_bind_surfaces(sna, &tmp);
+ gen4_align_vertex(sna, &tmp);
+
+ tmp.src.scale[0] = 1. / src->drawable.width;
+ tmp.src.scale[1] = 1. / src->drawable.height;
+ do {
+ gen4_render_copy_one(sna, &tmp,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x2 - box->x1, box->y2 - box->y1,
+ box->x1 + dst_dx, box->y1 + dst_dy);
+ box++;
+ } while (--n);
+
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ return TRUE;
+}
+
+static void
+gen4_render_copy_blt(struct sna *sna,
+ const struct sna_copy_op *op,
+ int16_t sx, int16_t sy,
+ int16_t w, int16_t h,
+ int16_t dx, int16_t dy)
+{
+ gen4_render_copy_one(sna, &op->base, sx, sy, w, h, dx, dy);
+}
+
+static void
+gen4_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
+{
+ gen4_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+}
+
+static Bool
+gen4_render_copy(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ struct sna_copy_op *op)
+{
+ if (prefer_blt(sna) &&
+ sna_blt_copy(sna, alu,
+ src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ op))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
+ src->drawable.width > 8192 || src->drawable.height > 8192 ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return sna_blt_copy(sna, alu, src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ op);
+
+ op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
+
+ op->base.dst.pixmap = dst;
+ op->base.dst.width = dst->drawable.width;
+ op->base.dst.height = dst->drawable.height;
+ op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
+ op->base.dst.bo = dst_bo;
+
+ op->base.src.bo = src_bo;
+ op->base.src.card_format =
+ gen4_get_card_format_for_depth(src->drawable.depth),
+ op->base.src.width = src->drawable.width;
+ op->base.src.height = src->drawable.height;
+ op->base.src.scale[0] = 1./src->drawable.width;
+ op->base.src.scale[1] = 1./src->drawable.height;
+ op->base.src.filter = SAMPLER_FILTER_NEAREST;
+ op->base.src.repeat = SAMPLER_EXTEND_NONE;
+
+ op->base.is_affine = true;
+ op->base.floats_per_vertex = 3;
+ op->base.u.gen4.wm_kernel = WM_KERNEL;
+ op->base.u.gen4.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, src_bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(src_bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen4_copy_bind_surfaces(sna, &op->base);
+ gen4_align_vertex(sna, &op->base);
+
+ op->blt = gen4_render_copy_blt;
+ op->done = gen4_render_copy_done;
+ return TRUE;
+}
+
+static void
+gen4_fill_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
+{
+ uint32_t *binding_table;
+ uint16_t offset;
+
+ gen4_get_batch(sna);
+
+ binding_table = gen4_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen4_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen4_get_dest_format_for_depth(op->dst.pixmap->drawable.depth),
+ TRUE);
+ binding_table[1] =
+ gen4_bind_bo(sna,
+ op->src.bo, 1, 1,
+ GEN4_SURFACEFORMAT_B8G8R8A8_UNORM,
+ FALSE);
+
+ if (sna->kgem.surface == offset &&
+ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table) {
+ sna->kgem.surface +=
+ sizeof(struct gen4_surface_state_padded)/sizeof(uint32_t);
+ offset = sna->render_state.gen4.surface_table;
+ }
+
+ gen4_emit_state(sna, op, offset);
+}
+
+static void
+gen4_render_fill_one(struct sna *sna,
+ const struct sna_composite_op *op,
+ int x, int y, int w, int h)
+{
+ if (!gen4_get_rectangles(sna, op, 1)) {
+ gen4_fill_bind_surfaces(sna, op);
+ gen4_get_rectangles(sna, op, 1);
+ }
+
+ OUT_VERTEX(x+w, y+h);
+ OUT_VERTEX_F(1);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(x, y+h);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(x, y);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(0);
+
+ FLUSH();
+}
+
+static Bool
+gen4_render_fill_boxes(struct sna *sna,
+ CARD8 op,
+ PictFormat format,
+ const xRenderColor *color,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ const BoxRec *box, int n)
+{
+ struct sna_composite_op tmp;
+ uint32_t pixel;
+
+ if (op >= ARRAY_SIZE(gen4_blend_op)) {
+ DBG(("%s: fallback due to unhandled blend op: %d\n",
+ __FUNCTION__, op));
+ return FALSE;
+ }
+
+ if (prefer_blt(sna) ||
+ dst->drawable.width > 8192 ||
+ dst->drawable.height > 8192 ||
+ !gen4_check_dst_format(format)) {
+ uint8_t alu = GXcopy;
+
+ if (op == PictOpClear) {
+ alu = GXclear;
+ pixel = 0;
+ op = PictOpSrc;
+ }
+
+ if (op == PictOpOver && color->alpha >= 0xff00)
+ op = PictOpSrc;
+
+ if (op == PictOpSrc &&
+ sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ format) &&
+ sna_blt_fill_boxes(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ pixel, box, n))
+ return TRUE;
+
+ if (dst->drawable.width > 8192 ||
+ dst->drawable.height > 8192 ||
+ !gen4_check_dst_format(format))
+ return FALSE;
+ }
+
+ if (!sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ PICT_a8r8g8b8))
+ return FALSE;
+
+ DBG(("%s(%08x x %d)\n", __FUNCTION__, pixel, n));
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.op = op;
+
+ tmp.dst.pixmap = dst;
+ tmp.dst.width = dst->drawable.width;
+ tmp.dst.height = dst->drawable.height;
+ tmp.dst.format = format;
+ tmp.dst.bo = dst_bo;
+
+ tmp.src.bo = sna_render_get_solid(sna, pixel);
+ tmp.src.filter = SAMPLER_FILTER_NEAREST;
+ tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
+
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+ tmp.u.gen4.wm_kernel = WM_KERNEL;
+ tmp.u.gen4.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+
+ gen4_fill_bind_surfaces(sna, &tmp);
+ gen4_align_vertex(sna, &tmp);
+
+ do {
+ gen4_render_fill_one(sna, &tmp,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1);
+ box++;
+ } while (--n);
+
+ kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ return TRUE;
+}
+
+static void
+gen4_render_fill_blt(struct sna *sna, const struct sna_fill_op *op,
+ int16_t x, int16_t y, int16_t w, int16_t h)
+{
+ gen4_render_fill_one(sna, &op->base, x, y, w, h);
+}
+
+static void
+gen4_render_fill_done(struct sna *sna, const struct sna_fill_op *op)
+{
+ gen4_vertex_flush(sna);
+ kgem_bo_destroy(&sna->kgem, op->base.src.bo);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+}
+
+static Bool
+gen4_render_fill(struct sna *sna, uint8_t alu,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ uint32_t color,
+ struct sna_fill_op *op)
+{
+ if (prefer_blt(sna) &&
+ sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ op))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ op);
+
+ if (alu == GXclear)
+ color = 0;
+
+ op->base.op = color == 0 ? PictOpClear : PictOpSrc;
+
+ op->base.dst.pixmap = dst;
+ op->base.dst.width = dst->drawable.width;
+ op->base.dst.height = dst->drawable.height;
+ op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
+ op->base.dst.bo = dst_bo;
+
+ op->base.src.bo =
+ sna_render_get_solid(sna,
+ sna_rgba_for_color(color,
+ dst->drawable.depth));
+ op->base.src.filter = SAMPLER_FILTER_NEAREST;
+ op->base.src.repeat = SAMPLER_EXTEND_REPEAT;
+
+ op->base.is_affine = TRUE;
+ op->base.floats_per_vertex = 3;
+ op->base.u.gen4.wm_kernel = WM_KERNEL;
+ op->base.u.gen4.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+
+ gen4_fill_bind_surfaces(sna, &op->base);
+ gen4_align_vertex(sna, &op->base);
+
+ op->blt = gen4_render_fill_blt;
+ op->done = gen4_render_fill_done;
+ return TRUE;
+}
+
+static void
+gen4_render_flush(struct sna *sna)
+{
+ gen4_vertex_finish(sna, TRUE);
+}
+
+static void
+gen4_render_context_switch(struct sna *sna,
+ int new_mode)
+{
+ if (sna->kgem.mode == 0)
+ return;
+
+ if (new_mode == KGEM_BLT) {
+#if 0
+ OUT_BATCH(MI_FLUSH |
+ MI_STATE_INSTRUCTION_CACHE_FLUSH |
+ MI_INHIBIT_RENDER_CACHE_FLUSH);
+#endif
+ }
+}
+
+static void gen4_render_reset(struct sna *sna)
+{
+ sna->render_state.gen4.needs_invariant = TRUE;
+ sna->render_state.gen4.vb_id = 0;
+ sna->render_state.gen4.ve_id = -1;
+ sna->render_state.gen4.last_primitive = -1;
+ sna->render_state.gen4.last_pipelined_pointers = 0;
+
+ sna->render_state.gen4.drawrect_offset = -1;
+ sna->render_state.gen4.drawrect_limit = -1;
+ sna->render_state.gen4.surface_table = -1;
+}
+
+static void gen4_render_fini(struct sna *sna)
+{
+ kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo);
+}
+
+static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream)
+{
+ struct gen4_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
+
+ /* Set up the vertex shader to be disabled (passthrough) */
+ vs->thread4.nr_urb_entries = URB_VS_ENTRIES;
+ vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
+ vs->vs6.vs_enable = 0;
+ vs->vs6.vert_cache_disable = 1;
+
+ return sna_static_stream_offsetof(stream, vs);
+}
+
+static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
+ uint32_t kernel)
+{
+ struct gen4_sf_unit_state *sf_state;
+
+ sf_state = sna_static_stream_map(stream, sizeof(*sf_state), 32);
+
+ sf_state->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
+ sf_state->thread0.kernel_start_pointer = kernel >> 6;
+ sf_state->sf1.single_program_flow = 1;
+ /* scratch space is not used in our kernel */
+ sf_state->thread2.scratch_space_base_pointer = 0;
+ sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
+ sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
+ sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
+ /* don't smash vertex header, read start from dw8 */
+ sf_state->thread3.urb_entry_read_offset = 1;
+ sf_state->thread3.dispatch_grf_start_reg = 3;
+ sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
+ sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
+ sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
+ sf_state->thread4.stats_enable = 1;
+ sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
+ sf_state->sf6.cull_mode = GEN4_CULLMODE_NONE;
+ sf_state->sf6.scissor = 0;
+ sf_state->sf7.trifan_pv = 2;
+ sf_state->sf6.dest_org_vbias = 0x8;
+ sf_state->sf6.dest_org_hbias = 0x8;
+
+ return sna_static_stream_offsetof(stream, sf_state);
+}
+
+static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream,
+ sampler_filter_t src_filter,
+ sampler_extend_t src_extend,
+ sampler_filter_t mask_filter,
+ sampler_extend_t mask_extend)
+{
+ struct gen4_sampler_state *sampler_state;
+
+ sampler_state = sna_static_stream_map(stream,
+ sizeof(struct gen4_sampler_state) * 2,
+ 32);
+ sampler_state_init(&sampler_state[0], src_filter, src_extend);
+ sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
+
+ return sna_static_stream_offsetof(stream, sampler_state);
+}
+
+static void gen4_init_wm_state(struct gen4_wm_unit_state *state,
+ Bool has_mask,
+ uint32_t kernel,
+ uint32_t sampler)
+{
+ state->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
+ state->thread0.kernel_start_pointer = kernel >> 6;
+
+ state->thread1.single_program_flow = 0;
+
+ /* scratch space is not used in our kernel */
+ state->thread2.scratch_space_base_pointer = 0;
+ state->thread2.per_thread_scratch_space = 0;
+
+ state->thread3.const_urb_entry_read_length = 0;
+ state->thread3.const_urb_entry_read_offset = 0;
+
+ state->thread3.urb_entry_read_offset = 0;
+ /* wm kernel use urb from 3, see wm_program in compiler module */
+ state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
+
+ state->wm4.sampler_count = 1; /* 1-4 samplers */
+
+ state->wm4.sampler_state_pointer = sampler >> 5;
+ state->wm5.max_threads = PS_MAX_THREADS - 1;
+ state->wm5.transposed_urb_read = 0;
+ state->wm5.thread_dispatch_enable = 1;
+ /* just use 16-pixel dispatch (4 subspans), don't need to change kernel
+ * start point
+ */
+ state->wm5.enable_16_pix = 1;
+ state->wm5.enable_8_pix = 0;
+ state->wm5.early_depth_test = 1;
+
+ /* Each pair of attributes (src/mask coords) is two URB entries */
+ if (has_mask) {
+ state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
+ state->thread3.urb_entry_read_length = 4;
+ } else {
+ state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
+ state->thread3.urb_entry_read_length = 2;
+ }
+}
+
+static uint32_t gen4_create_cc_viewport(struct sna_static_stream *stream)
+{
+ struct gen4_cc_viewport vp;
+
+ vp.min_depth = -1.e35;
+ vp.max_depth = 1.e35;
+
+ return sna_static_stream_add(stream, &vp, sizeof(vp), 32);
+}
+
+static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
+{
+ uint8_t *ptr, *base;
+ uint32_t vp;
+ int i, j;
+
+ vp = gen4_create_cc_viewport(stream);
+ base = ptr =
+ sna_static_stream_map(stream,
+ GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64,
+ 64);
+
+ for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) {
+ for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) {
+ struct gen4_cc_unit_state *state =
+ (struct gen4_cc_unit_state *)ptr;
+
+ state->cc3.blend_enable = 1; /* enable color blend */
+ state->cc4.cc_viewport_state_offset = vp >> 5;
+
+ state->cc5.logicop_func = 0xc; /* COPY */
+ state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD;
+
+ /* Fill in alpha blend factors same as color, for the future. */
+ state->cc5.ia_src_blend_factor = i;
+ state->cc5.ia_dest_blend_factor = j;
+
+ state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD;
+ state->cc6.clamp_post_alpha_blend = 1;
+ state->cc6.clamp_pre_alpha_blend = 1;
+ state->cc6.src_blend_factor = i;
+ state->cc6.dest_blend_factor = j;
+
+ ptr += 64;
+ }
+ }
+
+ return sna_static_stream_offsetof(stream, base);
+}
+
+static Bool gen4_render_setup(struct sna *sna)
+{
+ struct gen4_render_state *state = &sna->render_state.gen4;
+ struct sna_static_stream general;
+ struct gen4_wm_unit_state_padded *wm_state;
+ uint32_t sf[2], wm[KERNEL_COUNT];
+ int i, j, k, l, m;
+
+ sna_static_stream_init(&general);
+
+ /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
+ * dumps, you know it points to zero.
+ */
+ null_create(&general);
+
+ /* Set up the two SF states (one for blending with a mask, one without) */
+ sf[0] = sna_static_stream_add(&general,
+ sf_kernel,
+ sizeof(sf_kernel),
+ 64);
+ sf[1] = sna_static_stream_add(&general,
+ sf_kernel_mask,
+ sizeof(sf_kernel_mask),
+ 64);
+ for (m = 0; m < KERNEL_COUNT; m++) {
+ wm[m] = sna_static_stream_add(&general,
+ wm_kernels[m].data,
+ wm_kernels[m].size,
+ 64);
+ }
+
+ state->vs = gen4_create_vs_unit_state(&general);
+
+ state->sf[0] = gen4_create_sf_state(&general, sf[0]);
+ state->sf[1] = gen4_create_sf_state(&general, sf[1]);
+
+
+ /* Set up the WM states: each filter/extend type for source and mask, per
+ * kernel.
+ */
+ wm_state = sna_static_stream_map(&general,
+ sizeof(*wm_state) * KERNEL_COUNT *
+ FILTER_COUNT * EXTEND_COUNT *
+ FILTER_COUNT * EXTEND_COUNT,
+ 64);
+ state->wm = sna_static_stream_offsetof(&general, wm_state);
+ for (i = 0; i < FILTER_COUNT; i++) {
+ for (j = 0; j < EXTEND_COUNT; j++) {
+ for (k = 0; k < FILTER_COUNT; k++) {
+ for (l = 0; l < EXTEND_COUNT; l++) {
+ uint32_t sampler_state;
+
+ sampler_state =
+ gen4_create_sampler_state(&general,
+ i, j,
+ k, l);
+
+ for (m = 0; m < KERNEL_COUNT; m++) {
+ gen4_init_wm_state(&wm_state->state,
+ wm_kernels[m].has_mask,
+ wm[m],
+ sampler_state);
+ wm_state++;
+ }
+ }
+ }
+ }
+ }
+
+ state->cc = gen4_create_cc_unit_state(&general);
+
+ state->general_bo = sna_static_stream_fini(sna, &general);
+ return state->general_bo != NULL;
+}
+
+Bool gen4_render_init(struct sna *sna)
+{
+ if (!gen4_render_setup(sna))
+ return FALSE;
+
+ gen4_render_reset(sna);
+
+ sna->render.composite = gen4_render_composite;
+ sna->render.video = gen4_render_video;
+
+ sna->render.copy_boxes = gen4_render_copy_boxes;
+ sna->render.copy = gen4_render_copy;
+
+ sna->render.fill_boxes = gen4_render_fill_boxes;
+ sna->render.fill = gen4_render_fill;
+
+ sna->render.flush = gen4_render_flush;
+ sna->render.context_switch = gen4_render_context_switch;
+ sna->render.reset = gen4_render_reset;
+ sna->render.fini = gen4_render_fini;
+
+ sna->render.max_3d_size = 8192;
+ return TRUE;
+}
diff --git a/src/sna/gen4_render.h b/src/sna/gen4_render.h
new file mode 100644
index 00000000..a014e52f
--- /dev/null
+++ b/src/sna/gen4_render.h
@@ -0,0 +1,2643 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef GEN5_RENDER_H
+#define GEN5_RENDER_H
+
+#define GEN4_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
+ ((Pipeline) << 27) | \
+ ((Opcode) << 24) | \
+ ((Subopcode) << 16))
+
+#define GEN4_URB_FENCE GEN4_3D(0, 0, 0)
+#define GEN4_CS_URB_STATE GEN4_3D(0, 0, 1)
+#define GEN4_CONSTANT_BUFFER GEN4_3D(0, 0, 2)
+#define GEN4_STATE_PREFETCH GEN4_3D(0, 0, 3)
+
+#define GEN4_STATE_BASE_ADDRESS GEN4_3D(0, 1, 1)
+#define GEN4_STATE_SIP GEN4_3D(0, 1, 2)
+#define GEN4_PIPELINE_SELECT GEN4_3D(0, 1, 4)
+
+#define NEW_PIPELINE_SELECT GEN4_3D(1, 1, 4)
+
+#define GEN4_MEDIA_STATE_POINTERS GEN4_3D(2, 0, 0)
+#define GEN4_MEDIA_OBJECT GEN4_3D(2, 1, 0)
+
+#define GEN4_3DSTATE_PIPELINED_POINTERS GEN4_3D(3, 0, 0)
+#define GEN4_3DSTATE_BINDING_TABLE_POINTERS GEN4_3D(3, 0, 1)
+
+#define GEN4_3DSTATE_VERTEX_BUFFERS GEN4_3D(3, 0, 8)
+#define GEN4_3DSTATE_VERTEX_ELEMENTS GEN4_3D(3, 0, 9)
+#define GEN4_3DSTATE_INDEX_BUFFER GEN4_3D(3, 0, 0xa)
+#define GEN4_3DSTATE_VF_STATISTICS GEN4_3D(3, 0, 0xb)
+
+#define GEN4_3DSTATE_DRAWING_RECTANGLE GEN4_3D(3, 1, 0)
+#define GEN4_3DSTATE_CONSTANT_COLOR GEN4_3D(3, 1, 1)
+#define GEN4_3DSTATE_SAMPLER_PALETTE_LOAD GEN4_3D(3, 1, 2)
+#define GEN4_3DSTATE_CHROMA_KEY GEN4_3D(3, 1, 4)
+#define GEN4_3DSTATE_DEPTH_BUFFER GEN4_3D(3, 1, 5)
+# define GEN4_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
+# define GEN4_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
+
+#define GEN4_3DSTATE_POLY_STIPPLE_OFFSET GEN4_3D(3, 1, 6)
+#define GEN4_3DSTATE_POLY_STIPPLE_PATTERN GEN4_3D(3, 1, 7)
+#define GEN4_3DSTATE_LINE_STIPPLE GEN4_3D(3, 1, 8)
+#define GEN4_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN4_3D(3, 1, 9)
+/* These two are BLC and CTG only, not BW or CL */
+#define GEN4_3DSTATE_AA_LINE_PARAMS GEN4_3D(3, 1, 0xa)
+#define GEN4_3DSTATE_GS_SVB_INDEX GEN4_3D(3, 1, 0xb)
+
+#define GEN4_PIPE_CONTROL GEN4_3D(3, 2, 0)
+
+#define GEN4_3DPRIMITIVE GEN4_3D(3, 3, 0)
+
+#define GEN4_3DSTATE_CLEAR_PARAMS GEN4_3D(3, 1, 0x10)
+/* DW1 */
+# define GEN4_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
+
+#define PIPELINE_SELECT_3D 0
+#define PIPELINE_SELECT_MEDIA 1
+
+#define UF0_CS_REALLOC (1 << 13)
+#define UF0_VFE_REALLOC (1 << 12)
+#define UF0_SF_REALLOC (1 << 11)
+#define UF0_CLIP_REALLOC (1 << 10)
+#define UF0_GS_REALLOC (1 << 9)
+#define UF0_VS_REALLOC (1 << 8)
+#define UF1_CLIP_FENCE_SHIFT 20
+#define UF1_GS_FENCE_SHIFT 10
+#define UF1_VS_FENCE_SHIFT 0
+#define UF2_CS_FENCE_SHIFT 20
+#define UF2_VFE_FENCE_SHIFT 10
+#define UF2_SF_FENCE_SHIFT 0
+
+/* for GEN4_STATE_BASE_ADDRESS */
+#define BASE_ADDRESS_MODIFY (1 << 0)
+
+/* for GEN4_3DSTATE_PIPELINED_POINTERS */
+#define GEN4_GS_DISABLE 0
+#define GEN4_GS_ENABLE 1
+#define GEN4_CLIP_DISABLE 0
+#define GEN4_CLIP_ENABLE 1
+
+/* for GEN4_PIPE_CONTROL */
+#define GEN4_PIPE_CONTROL_NOWRITE (0 << 14)
+#define GEN4_PIPE_CONTROL_WRITE_QWORD (1 << 14)
+#define GEN4_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
+#define GEN4_PIPE_CONTROL_WRITE_TIME (3 << 14)
+#define GEN4_PIPE_CONTROL_DEPTH_STALL (1 << 13)
+#define GEN4_PIPE_CONTROL_WC_FLUSH (1 << 12)
+#define GEN4_PIPE_CONTROL_IS_FLUSH (1 << 11)
+#define GEN4_PIPE_CONTROL_TC_FLUSH (1 << 10)
+#define GEN4_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
+#define GEN4_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
+#define GEN4_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
+#define GEN4_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
+
+/* VERTEX_BUFFER_STATE Structure */
+#define VB0_BUFFER_INDEX_SHIFT 27
+#define VB0_VERTEXDATA (0 << 26)
+#define VB0_INSTANCEDATA (1 << 26)
+#define VB0_BUFFER_PITCH_SHIFT 0
+
+/* VERTEX_ELEMENT_STATE Structure */
+#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27
+#define VE0_VALID (1 << 26)
+#define VE0_FORMAT_SHIFT 16
+#define VE0_OFFSET_SHIFT 0
+#define VE1_VFCOMPONENT_0_SHIFT 28
+#define VE1_VFCOMPONENT_1_SHIFT 24
+#define VE1_VFCOMPONENT_2_SHIFT 20
+#define VE1_VFCOMPONENT_3_SHIFT 16
+#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
+
+/* 3DPRIMITIVE bits */
+#define GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
+#define GEN4_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
+/* Primitive types are in gen4_defines.h */
+#define GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT 10
+
+#define GEN4_SVG_CTL 0x7400
+
+#define GEN4_SVG_CTL_GS_BA (0 << 8)
+#define GEN4_SVG_CTL_SS_BA (1 << 8)
+#define GEN4_SVG_CTL_IO_BA (2 << 8)
+#define GEN4_SVG_CTL_GS_AUB (3 << 8)
+#define GEN4_SVG_CTL_IO_AUB (4 << 8)
+#define GEN4_SVG_CTL_SIP (5 << 8)
+
+#define GEN4_SVG_RDATA 0x7404
+#define GEN4_SVG_WORK_CTL 0x7408
+
+#define GEN4_VF_CTL 0x7500
+
+#define GEN4_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN4_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
+#define GEN4_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
+#define GEN4_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
+#define GEN4_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
+#define GEN4_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
+#define GEN4_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
+#define GEN4_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
+#define GEN4_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN4_VF_STRG_VAL 0x7504
+#define GEN4_VF_STR_VL_OVR 0x7508
+#define GEN4_VF_VC_OVR 0x750c
+#define GEN4_VF_STR_PSKIP 0x7510
+#define GEN4_VF_MAX_PRIM 0x7514
+#define GEN4_VF_RDATA 0x7518
+
+#define GEN4_VS_CTL 0x7600
+#define GEN4_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN4_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
+#define GEN4_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
+#define GEN4_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
+#define GEN4_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
+#define GEN4_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN4_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN4_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN4_VS_STRG_VAL 0x7604
+#define GEN4_VS_RDATA 0x7608
+
+#define GEN4_SF_CTL 0x7b00
+#define GEN4_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
+#define GEN4_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
+#define GEN4_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
+#define GEN4_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN4_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN4_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN4_SF_STRG_VAL 0x7b04
+#define GEN4_SF_RDATA 0x7b18
+
+#define GEN4_WIZ_CTL 0x7c00
+#define GEN4_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN4_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
+#define GEN4_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
+#define GEN4_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
+#define GEN4_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
+#define GEN4_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
+#define GEN4_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
+#define GEN4_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
+#define GEN4_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
+#define GEN4_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN4_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN4_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN4_WIZ_STRG_VAL 0x7c04
+#define GEN4_WIZ_RDATA 0x7c18
+
+#define GEN4_TS_CTL 0x7e00
+#define GEN4_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN4_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
+#define GEN4_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
+#define GEN4_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
+#define GEN4_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
+#define GEN4_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN4_TS_STRG_VAL 0x7e04
+#define GEN4_TS_RDATA 0x7e08
+
+#define GEN4_TD_CTL 0x8000
+#define GEN4_TD_CTL_MUX_SHIFT 8
+#define GEN4_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
+#define GEN4_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
+#define GEN4_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
+#define GEN4_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
+#define GEN4_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
+#define GEN4_TD_CTL2 0x8004
+#define GEN4_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
+#define GEN4_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
+#define GEN4_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
+#define GEN4_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
+#define GEN4_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
+#define GEN4_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
+#define GEN4_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
+#define GEN4_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
+#define GEN4_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
+#define GEN4_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
+#define GEN4_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
+#define GEN4_TD_VF_VS_EMSK 0x8008
+#define GEN4_TD_GS_EMSK 0x800c
+#define GEN4_TD_CLIP_EMSK 0x8010
+#define GEN4_TD_SF_EMSK 0x8014
+#define GEN4_TD_WIZ_EMSK 0x8018
+#define GEN4_TD_0_6_EHTRG_VAL 0x801c
+#define GEN4_TD_0_7_EHTRG_VAL 0x8020
+#define GEN4_TD_0_6_EHTRG_MSK 0x8024
+#define GEN4_TD_0_7_EHTRG_MSK 0x8028
+#define GEN4_TD_RDATA 0x802c
+#define GEN4_TD_TS_EMSK 0x8030
+
+#define GEN4_EU_CTL 0x8800
+#define GEN4_EU_CTL_SELECT_SHIFT 16
+#define GEN4_EU_CTL_DATA_MUX_SHIFT 8
+#define GEN4_EU_ATT_0 0x8810
+#define GEN4_EU_ATT_1 0x8814
+#define GEN4_EU_ATT_DATA_0 0x8820
+#define GEN4_EU_ATT_DATA_1 0x8824
+#define GEN4_EU_ATT_CLR_0 0x8830
+#define GEN4_EU_ATT_CLR_1 0x8834
+#define GEN4_EU_RDATA 0x8840
+
+/* 3D state:
+ */
+#define _3DOP_3DSTATE_PIPELINED 0x0
+#define _3DOP_3DSTATE_NONPIPELINED 0x1
+#define _3DOP_3DCONTROL 0x2
+#define _3DOP_3DPRIMITIVE 0x3
+
+#define _3DSTATE_PIPELINED_POINTERS 0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
+#define _3DSTATE_VERTEX_BUFFERS 0x08
+#define _3DSTATE_VERTEX_ELEMENTS 0x09
+#define _3DSTATE_INDEX_BUFFER 0x0A
+#define _3DSTATE_VF_STATISTICS 0x0B
+#define _3DSTATE_DRAWING_RECTANGLE 0x00
+#define _3DSTATE_CONSTANT_COLOR 0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
+#define _3DSTATE_CHROMA_KEY 0x04
+#define _3DSTATE_DEPTH_BUFFER 0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
+#define _3DSTATE_LINE_STIPPLE 0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
+#define _3DCONTROL 0x00
+#define _3DPRIMITIVE 0x00
+
+#define _3DPRIM_POINTLIST 0x01
+#define _3DPRIM_LINELIST 0x02
+#define _3DPRIM_LINESTRIP 0x03
+#define _3DPRIM_TRILIST 0x04
+#define _3DPRIM_TRISTRIP 0x05
+#define _3DPRIM_TRIFAN 0x06
+#define _3DPRIM_QUADLIST 0x07
+#define _3DPRIM_QUADSTRIP 0x08
+#define _3DPRIM_LINELIST_ADJ 0x09
+#define _3DPRIM_LINESTRIP_ADJ 0x0A
+#define _3DPRIM_TRILIST_ADJ 0x0B
+#define _3DPRIM_TRISTRIP_ADJ 0x0C
+#define _3DPRIM_TRISTRIP_REVERSE 0x0D
+#define _3DPRIM_POLYGON 0x0E
+#define _3DPRIM_RECTLIST 0x0F
+#define _3DPRIM_LINELOOP 0x10
+#define _3DPRIM_POINTLIST_BF 0x11
+#define _3DPRIM_LINESTRIP_CONT 0x12
+#define _3DPRIM_LINESTRIP_BF 0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
+
+#define GEN4_ANISORATIO_2 0
+#define GEN4_ANISORATIO_4 1
+#define GEN4_ANISORATIO_6 2
+#define GEN4_ANISORATIO_8 3
+#define GEN4_ANISORATIO_10 4
+#define GEN4_ANISORATIO_12 5
+#define GEN4_ANISORATIO_14 6
+#define GEN4_ANISORATIO_16 7
+
+#define GEN4_BLENDFACTOR_ONE 0x1
+#define GEN4_BLENDFACTOR_SRC_COLOR 0x2
+#define GEN4_BLENDFACTOR_SRC_ALPHA 0x3
+#define GEN4_BLENDFACTOR_DST_ALPHA 0x4
+#define GEN4_BLENDFACTOR_DST_COLOR 0x5
+#define GEN4_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
+#define GEN4_BLENDFACTOR_CONST_COLOR 0x7
+#define GEN4_BLENDFACTOR_CONST_ALPHA 0x8
+#define GEN4_BLENDFACTOR_SRC1_COLOR 0x9
+#define GEN4_BLENDFACTOR_SRC1_ALPHA 0x0A
+#define GEN4_BLENDFACTOR_ZERO 0x11
+#define GEN4_BLENDFACTOR_INV_SRC_COLOR 0x12
+#define GEN4_BLENDFACTOR_INV_SRC_ALPHA 0x13
+#define GEN4_BLENDFACTOR_INV_DST_ALPHA 0x14
+#define GEN4_BLENDFACTOR_INV_DST_COLOR 0x15
+#define GEN4_BLENDFACTOR_INV_CONST_COLOR 0x17
+#define GEN4_BLENDFACTOR_INV_CONST_ALPHA 0x18
+#define GEN4_BLENDFACTOR_INV_SRC1_COLOR 0x19
+#define GEN4_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
+
+#define GEN4_BLENDFUNCTION_ADD 0
+#define GEN4_BLENDFUNCTION_SUBTRACT 1
+#define GEN4_BLENDFUNCTION_REVERSE_SUBTRACT 2
+#define GEN4_BLENDFUNCTION_MIN 3
+#define GEN4_BLENDFUNCTION_MAX 4
+
+#define GEN4_ALPHATEST_FORMAT_UNORM8 0
+#define GEN4_ALPHATEST_FORMAT_FLOAT32 1
+
+#define GEN4_CHROMAKEY_KILL_ON_ANY_MATCH 0
+#define GEN4_CHROMAKEY_REPLACE_BLACK 1
+
+#define GEN4_CLIP_API_OGL 0
+#define GEN4_CLIP_API_DX 1
+
+#define GEN4_CLIPMODE_NORMAL 0
+#define GEN4_CLIPMODE_CLIP_ALL 1
+#define GEN4_CLIPMODE_CLIP_NON_REJECTED 2
+#define GEN4_CLIPMODE_REJECT_ALL 3
+#define GEN4_CLIPMODE_ACCEPT_ALL 4
+
+#define GEN4_CLIP_NDCSPACE 0
+#define GEN4_CLIP_SCREENSPACE 1
+
+#define GEN4_COMPAREFUNCTION_ALWAYS 0
+#define GEN4_COMPAREFUNCTION_NEVER 1
+#define GEN4_COMPAREFUNCTION_LESS 2
+#define GEN4_COMPAREFUNCTION_EQUAL 3
+#define GEN4_COMPAREFUNCTION_LEQUAL 4
+#define GEN4_COMPAREFUNCTION_GREATER 5
+#define GEN4_COMPAREFUNCTION_NOTEQUAL 6
+#define GEN4_COMPAREFUNCTION_GEQUAL 7
+
+#define GEN4_COVERAGE_PIXELS_HALF 0
+#define GEN4_COVERAGE_PIXELS_1 1
+#define GEN4_COVERAGE_PIXELS_2 2
+#define GEN4_COVERAGE_PIXELS_4 3
+
+#define GEN4_CULLMODE_BOTH 0
+#define GEN4_CULLMODE_NONE 1
+#define GEN4_CULLMODE_FRONT 2
+#define GEN4_CULLMODE_BACK 3
+
+#define GEN4_DEFAULTCOLOR_R8G8B8A8_UNORM 0
+#define GEN4_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
+
+#define GEN4_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
+#define GEN4_DEPTHFORMAT_D32_FLOAT 1
+#define GEN4_DEPTHFORMAT_D24_UNORM_S8_UINT 2
+#define GEN4_DEPTHFORMAT_D16_UNORM 5
+
+#define GEN4_FLOATING_POINT_IEEE_754 0
+#define GEN4_FLOATING_POINT_NON_IEEE_754 1
+
+#define GEN4_FRONTWINDING_CW 0
+#define GEN4_FRONTWINDING_CCW 1
+
+#define GEN4_INDEX_BYTE 0
+#define GEN4_INDEX_WORD 1
+#define GEN4_INDEX_DWORD 2
+
+#define GEN4_LOGICOPFUNCTION_CLEAR 0
+#define GEN4_LOGICOPFUNCTION_NOR 1
+#define GEN4_LOGICOPFUNCTION_AND_INVERTED 2
+#define GEN4_LOGICOPFUNCTION_COPY_INVERTED 3
+#define GEN4_LOGICOPFUNCTION_AND_REVERSE 4
+#define GEN4_LOGICOPFUNCTION_INVERT 5
+#define GEN4_LOGICOPFUNCTION_XOR 6
+#define GEN4_LOGICOPFUNCTION_NAND 7
+#define GEN4_LOGICOPFUNCTION_AND 8
+#define GEN4_LOGICOPFUNCTION_EQUIV 9
+#define GEN4_LOGICOPFUNCTION_NOOP 10
+#define GEN4_LOGICOPFUNCTION_OR_INVERTED 11
+#define GEN4_LOGICOPFUNCTION_COPY 12
+#define GEN4_LOGICOPFUNCTION_OR_REVERSE 13
+#define GEN4_LOGICOPFUNCTION_OR 14
+#define GEN4_LOGICOPFUNCTION_SET 15
+
+#define GEN4_MAPFILTER_NEAREST 0x0
+#define GEN4_MAPFILTER_LINEAR 0x1
+#define GEN4_MAPFILTER_ANISOTROPIC 0x2
+
+#define GEN4_MIPFILTER_NONE 0
+#define GEN4_MIPFILTER_NEAREST 1
+#define GEN4_MIPFILTER_LINEAR 3
+
+#define GEN4_POLYGON_FRONT_FACING 0
+#define GEN4_POLYGON_BACK_FACING 1
+
+#define GEN4_PREFILTER_ALWAYS 0x0
+#define GEN4_PREFILTER_NEVER 0x1
+#define GEN4_PREFILTER_LESS 0x2
+#define GEN4_PREFILTER_EQUAL 0x3
+#define GEN4_PREFILTER_LEQUAL 0x4
+#define GEN4_PREFILTER_GREATER 0x5
+#define GEN4_PREFILTER_NOTEQUAL 0x6
+#define GEN4_PREFILTER_GEQUAL 0x7
+
+#define GEN4_PROVOKING_VERTEX_0 0
+#define GEN4_PROVOKING_VERTEX_1 1
+#define GEN4_PROVOKING_VERTEX_2 2
+
+#define GEN4_RASTRULE_UPPER_LEFT 0
+#define GEN4_RASTRULE_UPPER_RIGHT 1
+
+#define GEN4_RENDERTARGET_CLAMPRANGE_UNORM 0
+#define GEN4_RENDERTARGET_CLAMPRANGE_SNORM 1
+#define GEN4_RENDERTARGET_CLAMPRANGE_FORMAT 2
+
+#define GEN4_STENCILOP_KEEP 0
+#define GEN4_STENCILOP_ZERO 1
+#define GEN4_STENCILOP_REPLACE 2
+#define GEN4_STENCILOP_INCRSAT 3
+#define GEN4_STENCILOP_DECRSAT 4
+#define GEN4_STENCILOP_INCR 5
+#define GEN4_STENCILOP_DECR 6
+#define GEN4_STENCILOP_INVERT 7
+
+#define GEN4_SURFACE_MIPMAPLAYOUT_BELOW 0
+#define GEN4_SURFACE_MIPMAPLAYOUT_RIGHT 1
+
+#define GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
+#define GEN4_SURFACEFORMAT_R32G32B32A32_SINT 0x001
+#define GEN4_SURFACEFORMAT_R32G32B32A32_UINT 0x002
+#define GEN4_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
+#define GEN4_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
+#define GEN4_SURFACEFORMAT_R64G64_FLOAT 0x005
+#define GEN4_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
+#define GEN4_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
+#define GEN4_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
+#define GEN4_SURFACEFORMAT_R32G32B32_FLOAT 0x040
+#define GEN4_SURFACEFORMAT_R32G32B32_SINT 0x041
+#define GEN4_SURFACEFORMAT_R32G32B32_UINT 0x042
+#define GEN4_SURFACEFORMAT_R32G32B32_UNORM 0x043
+#define GEN4_SURFACEFORMAT_R32G32B32_SNORM 0x044
+#define GEN4_SURFACEFORMAT_R32G32B32_SSCALED 0x045
+#define GEN4_SURFACEFORMAT_R32G32B32_USCALED 0x046
+#define GEN4_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
+#define GEN4_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
+#define GEN4_SURFACEFORMAT_R16G16B16A16_SINT 0x082
+#define GEN4_SURFACEFORMAT_R16G16B16A16_UINT 0x083
+#define GEN4_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
+#define GEN4_SURFACEFORMAT_R32G32_FLOAT 0x085
+#define GEN4_SURFACEFORMAT_R32G32_SINT 0x086
+#define GEN4_SURFACEFORMAT_R32G32_UINT 0x087
+#define GEN4_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
+#define GEN4_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
+#define GEN4_SURFACEFORMAT_L32A32_FLOAT 0x08A
+#define GEN4_SURFACEFORMAT_R32G32_UNORM 0x08B
+#define GEN4_SURFACEFORMAT_R32G32_SNORM 0x08C
+#define GEN4_SURFACEFORMAT_R64_FLOAT 0x08D
+#define GEN4_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
+#define GEN4_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
+#define GEN4_SURFACEFORMAT_A32X32_FLOAT 0x090
+#define GEN4_SURFACEFORMAT_L32X32_FLOAT 0x091
+#define GEN4_SURFACEFORMAT_I32X32_FLOAT 0x092
+#define GEN4_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
+#define GEN4_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
+#define GEN4_SURFACEFORMAT_R32G32_SSCALED 0x095
+#define GEN4_SURFACEFORMAT_R32G32_USCALED 0x096
+#define GEN4_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define GEN4_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
+#define GEN4_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
+#define GEN4_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
+#define GEN4_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
+#define GEN4_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
+#define GEN4_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
+#define GEN4_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
+#define GEN4_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
+#define GEN4_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
+#define GEN4_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
+#define GEN4_SURFACEFORMAT_R16G16_UNORM 0x0CC
+#define GEN4_SURFACEFORMAT_R16G16_SNORM 0x0CD
+#define GEN4_SURFACEFORMAT_R16G16_SINT 0x0CE
+#define GEN4_SURFACEFORMAT_R16G16_UINT 0x0CF
+#define GEN4_SURFACEFORMAT_R16G16_FLOAT 0x0D0
+#define GEN4_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
+#define GEN4_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
+#define GEN4_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
+#define GEN4_SURFACEFORMAT_R32_SINT 0x0D6
+#define GEN4_SURFACEFORMAT_R32_UINT 0x0D7
+#define GEN4_SURFACEFORMAT_R32_FLOAT 0x0D8
+#define GEN4_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
+#define GEN4_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
+#define GEN4_SURFACEFORMAT_L16A16_UNORM 0x0DF
+#define GEN4_SURFACEFORMAT_I24X8_UNORM 0x0E0
+#define GEN4_SURFACEFORMAT_L24X8_UNORM 0x0E1
+#define GEN4_SURFACEFORMAT_A24X8_UNORM 0x0E2
+#define GEN4_SURFACEFORMAT_I32_FLOAT 0x0E3
+#define GEN4_SURFACEFORMAT_L32_FLOAT 0x0E4
+#define GEN4_SURFACEFORMAT_A32_FLOAT 0x0E5
+#define GEN4_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
+#define GEN4_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
+#define GEN4_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
+#define GEN4_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
+#define GEN4_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
+#define GEN4_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
+#define GEN4_SURFACEFORMAT_L16A16_FLOAT 0x0F0
+#define GEN4_SURFACEFORMAT_R32_UNORM 0x0F1
+#define GEN4_SURFACEFORMAT_R32_SNORM 0x0F2
+#define GEN4_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
+#define GEN4_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
+#define GEN4_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
+#define GEN4_SURFACEFORMAT_R16G16_SSCALED 0x0F6
+#define GEN4_SURFACEFORMAT_R16G16_USCALED 0x0F7
+#define GEN4_SURFACEFORMAT_R32_SSCALED 0x0F8
+#define GEN4_SURFACEFORMAT_R32_USCALED 0x0F9
+#define GEN4_SURFACEFORMAT_B5G6R5_UNORM 0x100
+#define GEN4_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
+#define GEN4_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
+#define GEN4_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
+#define GEN4_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
+#define GEN4_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
+#define GEN4_SURFACEFORMAT_R8G8_UNORM 0x106
+#define GEN4_SURFACEFORMAT_R8G8_SNORM 0x107
+#define GEN4_SURFACEFORMAT_R8G8_SINT 0x108
+#define GEN4_SURFACEFORMAT_R8G8_UINT 0x109
+#define GEN4_SURFACEFORMAT_R16_UNORM 0x10A
+#define GEN4_SURFACEFORMAT_R16_SNORM 0x10B
+#define GEN4_SURFACEFORMAT_R16_SINT 0x10C
+#define GEN4_SURFACEFORMAT_R16_UINT 0x10D
+#define GEN4_SURFACEFORMAT_R16_FLOAT 0x10E
+#define GEN4_SURFACEFORMAT_I16_UNORM 0x111
+#define GEN4_SURFACEFORMAT_L16_UNORM 0x112
+#define GEN4_SURFACEFORMAT_A16_UNORM 0x113
+#define GEN4_SURFACEFORMAT_L8A8_UNORM 0x114
+#define GEN4_SURFACEFORMAT_I16_FLOAT 0x115
+#define GEN4_SURFACEFORMAT_L16_FLOAT 0x116
+#define GEN4_SURFACEFORMAT_A16_FLOAT 0x117
+#define GEN4_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define GEN4_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define GEN4_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
+#define GEN4_SURFACEFORMAT_R8G8_SSCALED 0x11C
+#define GEN4_SURFACEFORMAT_R8G8_USCALED 0x11D
+#define GEN4_SURFACEFORMAT_R16_SSCALED 0x11E
+#define GEN4_SURFACEFORMAT_R16_USCALED 0x11F
+#define GEN4_SURFACEFORMAT_R8_UNORM 0x140
+#define GEN4_SURFACEFORMAT_R8_SNORM 0x141
+#define GEN4_SURFACEFORMAT_R8_SINT 0x142
+#define GEN4_SURFACEFORMAT_R8_UINT 0x143
+#define GEN4_SURFACEFORMAT_A8_UNORM 0x144
+#define GEN4_SURFACEFORMAT_I8_UNORM 0x145
+#define GEN4_SURFACEFORMAT_L8_UNORM 0x146
+#define GEN4_SURFACEFORMAT_P4A4_UNORM 0x147
+#define GEN4_SURFACEFORMAT_A4P4_UNORM 0x148
+#define GEN4_SURFACEFORMAT_R8_SSCALED 0x149
+#define GEN4_SURFACEFORMAT_R8_USCALED 0x14A
+#define GEN4_SURFACEFORMAT_R1_UINT 0x181
+#define GEN4_SURFACEFORMAT_YCRCB_NORMAL 0x182
+#define GEN4_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
+#define GEN4_SURFACEFORMAT_BC1_UNORM 0x186
+#define GEN4_SURFACEFORMAT_BC2_UNORM 0x187
+#define GEN4_SURFACEFORMAT_BC3_UNORM 0x188
+#define GEN4_SURFACEFORMAT_BC4_UNORM 0x189
+#define GEN4_SURFACEFORMAT_BC5_UNORM 0x18A
+#define GEN4_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
+#define GEN4_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
+#define GEN4_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
+#define GEN4_SURFACEFORMAT_MONO8 0x18E
+#define GEN4_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
+#define GEN4_SURFACEFORMAT_YCRCB_SWAPY 0x190
+#define GEN4_SURFACEFORMAT_DXT1_RGB 0x191
+#define GEN4_SURFACEFORMAT_FXT1 0x192
+#define GEN4_SURFACEFORMAT_R8G8B8_UNORM 0x193
+#define GEN4_SURFACEFORMAT_R8G8B8_SNORM 0x194
+#define GEN4_SURFACEFORMAT_R8G8B8_SSCALED 0x195
+#define GEN4_SURFACEFORMAT_R8G8B8_USCALED 0x196
+#define GEN4_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
+#define GEN4_SURFACEFORMAT_R64G64B64_FLOAT 0x198
+#define GEN4_SURFACEFORMAT_BC4_SNORM 0x199
+#define GEN4_SURFACEFORMAT_BC5_SNORM 0x19A
+#define GEN4_SURFACEFORMAT_R16G16B16_UNORM 0x19C
+#define GEN4_SURFACEFORMAT_R16G16B16_SNORM 0x19D
+#define GEN4_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
+#define GEN4_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+
+#define GEN4_SURFACERETURNFORMAT_FLOAT32 0
+#define GEN4_SURFACERETURNFORMAT_S1 1
+
+#define GEN4_SURFACE_1D 0
+#define GEN4_SURFACE_2D 1
+#define GEN4_SURFACE_3D 2
+#define GEN4_SURFACE_CUBE 3
+#define GEN4_SURFACE_BUFFER 4
+#define GEN4_SURFACE_NULL 7
+
+#define GEN4_BORDER_COLOR_MODE_DEFAULT 0
+#define GEN4_BORDER_COLOR_MODE_LEGACY 1
+
+#define GEN4_TEXCOORDMODE_WRAP 0
+#define GEN4_TEXCOORDMODE_MIRROR 1
+#define GEN4_TEXCOORDMODE_CLAMP 2
+#define GEN4_TEXCOORDMODE_CUBE 3
+#define GEN4_TEXCOORDMODE_CLAMP_BORDER 4
+#define GEN4_TEXCOORDMODE_MIRROR_ONCE 5
+
+#define GEN4_THREAD_PRIORITY_NORMAL 0
+#define GEN4_THREAD_PRIORITY_HIGH 1
+
+#define GEN4_TILEWALK_XMAJOR 0
+#define GEN4_TILEWALK_YMAJOR 1
+
+#define GEN4_VERTEX_SUBPIXEL_PRECISION_8BITS 0
+#define GEN4_VERTEX_SUBPIXEL_PRECISION_4BITS 1
+
+#define GEN4_VERTEXBUFFER_ACCESS_VERTEXDATA 0
+#define GEN4_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
+
+#define GEN4_VFCOMPONENT_NOSTORE 0
+#define GEN4_VFCOMPONENT_STORE_SRC 1
+#define GEN4_VFCOMPONENT_STORE_0 2
+#define GEN4_VFCOMPONENT_STORE_1_FLT 3
+#define GEN4_VFCOMPONENT_STORE_1_INT 4
+#define GEN4_VFCOMPONENT_STORE_VID 5
+#define GEN4_VFCOMPONENT_STORE_IID 6
+#define GEN4_VFCOMPONENT_STORE_PID 7
+
+
+
+/* Execution Unit (EU) defines
+ */
+
+#define GEN4_ALIGN_1 0
+#define GEN4_ALIGN_16 1
+
+#define GEN4_ADDRESS_DIRECT 0
+#define GEN4_ADDRESS_REGISTER_INDIRECT_REGISTER 1
+
+#define GEN4_CHANNEL_X 0
+#define GEN4_CHANNEL_Y 1
+#define GEN4_CHANNEL_Z 2
+#define GEN4_CHANNEL_W 3
+
+#define GEN4_COMPRESSION_NONE 0
+#define GEN4_COMPRESSION_2NDHALF 1
+#define GEN4_COMPRESSION_COMPRESSED 2
+
+#define GEN4_CONDITIONAL_NONE 0
+#define GEN4_CONDITIONAL_Z 1
+#define GEN4_CONDITIONAL_NZ 2
+#define GEN4_CONDITIONAL_EQ 1 /* Z */
+#define GEN4_CONDITIONAL_NEQ 2 /* NZ */
+#define GEN4_CONDITIONAL_G 3
+#define GEN4_CONDITIONAL_GE 4
+#define GEN4_CONDITIONAL_L 5
+#define GEN4_CONDITIONAL_LE 6
+#define GEN4_CONDITIONAL_C 7
+#define GEN4_CONDITIONAL_O 8
+
+#define GEN4_DEBUG_NONE 0
+#define GEN4_DEBUG_BREAKPOINT 1
+
+#define GEN4_DEPENDENCY_NORMAL 0
+#define GEN4_DEPENDENCY_NOTCLEARED 1
+#define GEN4_DEPENDENCY_NOTCHECKED 2
+#define GEN4_DEPENDENCY_DISABLE 3
+
+#define GEN4_EXECUTE_1 0
+#define GEN4_EXECUTE_2 1
+#define GEN4_EXECUTE_4 2
+#define GEN4_EXECUTE_8 3
+#define GEN4_EXECUTE_16 4
+#define GEN4_EXECUTE_32 5
+
+#define GEN4_HORIZONTAL_STRIDE_0 0
+#define GEN4_HORIZONTAL_STRIDE_1 1
+#define GEN4_HORIZONTAL_STRIDE_2 2
+#define GEN4_HORIZONTAL_STRIDE_4 3
+
+#define GEN4_INSTRUCTION_NORMAL 0
+#define GEN4_INSTRUCTION_SATURATE 1
+
+#define GEN4_MASK_ENABLE 0
+#define GEN4_MASK_DISABLE 1
+
+#define GEN4_OPCODE_MOV 1
+#define GEN4_OPCODE_SEL 2
+#define GEN4_OPCODE_NOT 4
+#define GEN4_OPCODE_AND 5
+#define GEN4_OPCODE_OR 6
+#define GEN4_OPCODE_XOR 7
+#define GEN4_OPCODE_SHR 8
+#define GEN4_OPCODE_SHL 9
+#define GEN4_OPCODE_RSR 10
+#define GEN4_OPCODE_RSL 11
+#define GEN4_OPCODE_ASR 12
+#define GEN4_OPCODE_CMP 16
+#define GEN4_OPCODE_JMPI 32
+#define GEN4_OPCODE_IF 34
+#define GEN4_OPCODE_IFF 35
+#define GEN4_OPCODE_ELSE 36
+#define GEN4_OPCODE_ENDIF 37
+#define GEN4_OPCODE_DO 38
+#define GEN4_OPCODE_WHILE 39
+#define GEN4_OPCODE_BREAK 40
+#define GEN4_OPCODE_CONTINUE 41
+#define GEN4_OPCODE_HALT 42
+#define GEN4_OPCODE_MSAVE 44
+#define GEN4_OPCODE_MRESTORE 45
+#define GEN4_OPCODE_PUSH 46
+#define GEN4_OPCODE_POP 47
+#define GEN4_OPCODE_WAIT 48
+#define GEN4_OPCODE_SEND 49
+#define GEN4_OPCODE_ADD 64
+#define GEN4_OPCODE_MUL 65
+#define GEN4_OPCODE_AVG 66
+#define GEN4_OPCODE_FRC 67
+#define GEN4_OPCODE_RNDU 68
+#define GEN4_OPCODE_RNDD 69
+#define GEN4_OPCODE_RNDE 70
+#define GEN4_OPCODE_RNDZ 71
+#define GEN4_OPCODE_MAC 72
+#define GEN4_OPCODE_MACH 73
+#define GEN4_OPCODE_LZD 74
+#define GEN4_OPCODE_SAD2 80
+#define GEN4_OPCODE_SADA2 81
+#define GEN4_OPCODE_DP4 84
+#define GEN4_OPCODE_DPH 85
+#define GEN4_OPCODE_DP3 86
+#define GEN4_OPCODE_DP2 87
+#define GEN4_OPCODE_DPA2 88
+#define GEN4_OPCODE_LINE 89
+#define GEN4_OPCODE_NOP 126
+
+#define GEN4_PREDICATE_NONE 0
+#define GEN4_PREDICATE_NORMAL 1
+#define GEN4_PREDICATE_ALIGN1_ANYV 2
+#define GEN4_PREDICATE_ALIGN1_ALLV 3
+#define GEN4_PREDICATE_ALIGN1_ANY2H 4
+#define GEN4_PREDICATE_ALIGN1_ALL2H 5
+#define GEN4_PREDICATE_ALIGN1_ANY4H 6
+#define GEN4_PREDICATE_ALIGN1_ALL4H 7
+#define GEN4_PREDICATE_ALIGN1_ANY8H 8
+#define GEN4_PREDICATE_ALIGN1_ALL8H 9
+#define GEN4_PREDICATE_ALIGN1_ANY16H 10
+#define GEN4_PREDICATE_ALIGN1_ALL16H 11
+#define GEN4_PREDICATE_ALIGN16_REPLICATE_X 2
+#define GEN4_PREDICATE_ALIGN16_REPLICATE_Y 3
+#define GEN4_PREDICATE_ALIGN16_REPLICATE_Z 4
+#define GEN4_PREDICATE_ALIGN16_REPLICATE_W 5
+#define GEN4_PREDICATE_ALIGN16_ANY4H 6
+#define GEN4_PREDICATE_ALIGN16_ALL4H 7
+
+#define GEN4_ARCHITECTURE_REGISTER_FILE 0
+#define GEN4_GENERAL_REGISTER_FILE 1
+#define GEN4_MESSAGE_REGISTER_FILE 2
+#define GEN4_IMMEDIATE_VALUE 3
+
+#define GEN4_REGISTER_TYPE_UD 0
+#define GEN4_REGISTER_TYPE_D 1
+#define GEN4_REGISTER_TYPE_UW 2
+#define GEN4_REGISTER_TYPE_W 3
+#define GEN4_REGISTER_TYPE_UB 4
+#define GEN4_REGISTER_TYPE_B 5
+#define GEN4_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
+#define GEN4_REGISTER_TYPE_HF 6
+#define GEN4_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
+#define GEN4_REGISTER_TYPE_F 7
+
+#define GEN4_ARF_NULL 0x00
+#define GEN4_ARF_ADDRESS 0x10
+#define GEN4_ARF_ACCUMULATOR 0x20
+#define GEN4_ARF_FLAG 0x30
+#define GEN4_ARF_MASK 0x40
+#define GEN4_ARF_MASK_STACK 0x50
+#define GEN4_ARF_MASK_STACK_DEPTH 0x60
+#define GEN4_ARF_STATE 0x70
+#define GEN4_ARF_CONTROL 0x80
+#define GEN4_ARF_NOTIFICATION_COUNT 0x90
+#define GEN4_ARF_IP 0xA0
+
+#define GEN4_AMASK 0
+#define GEN4_IMASK 1
+#define GEN4_LMASK 2
+#define GEN4_CMASK 3
+
+
+
+#define GEN4_THREAD_NORMAL 0
+#define GEN4_THREAD_ATOMIC 1
+#define GEN4_THREAD_SWITCH 2
+
+#define GEN4_VERTICAL_STRIDE_0 0
+#define GEN4_VERTICAL_STRIDE_1 1
+#define GEN4_VERTICAL_STRIDE_2 2
+#define GEN4_VERTICAL_STRIDE_4 3
+#define GEN4_VERTICAL_STRIDE_8 4
+#define GEN4_VERTICAL_STRIDE_16 5
+#define GEN4_VERTICAL_STRIDE_32 6
+#define GEN4_VERTICAL_STRIDE_64 7
+#define GEN4_VERTICAL_STRIDE_128 8
+#define GEN4_VERTICAL_STRIDE_256 9
+#define GEN4_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
+
+#define GEN4_WIDTH_1 0
+#define GEN4_WIDTH_2 1
+#define GEN4_WIDTH_4 2
+#define GEN4_WIDTH_8 3
+#define GEN4_WIDTH_16 4
+
+#define GEN4_STATELESS_BUFFER_BOUNDARY_1K 0
+#define GEN4_STATELESS_BUFFER_BOUNDARY_2K 1
+#define GEN4_STATELESS_BUFFER_BOUNDARY_4K 2
+#define GEN4_STATELESS_BUFFER_BOUNDARY_8K 3
+#define GEN4_STATELESS_BUFFER_BOUNDARY_16K 4
+#define GEN4_STATELESS_BUFFER_BOUNDARY_32K 5
+#define GEN4_STATELESS_BUFFER_BOUNDARY_64K 6
+#define GEN4_STATELESS_BUFFER_BOUNDARY_128K 7
+#define GEN4_STATELESS_BUFFER_BOUNDARY_256K 8
+#define GEN4_STATELESS_BUFFER_BOUNDARY_512K 9
+#define GEN4_STATELESS_BUFFER_BOUNDARY_1M 10
+#define GEN4_STATELESS_BUFFER_BOUNDARY_2M 11
+
+#define GEN4_POLYGON_FACING_FRONT 0
+#define GEN4_POLYGON_FACING_BACK 1
+
+#define GEN4_MESSAGE_TARGET_NULL 0
+#define GEN4_MESSAGE_TARGET_MATH 1
+#define GEN4_MESSAGE_TARGET_SAMPLER 2
+#define GEN4_MESSAGE_TARGET_GATEWAY 3
+#define GEN4_MESSAGE_TARGET_DATAPORT_READ 4
+#define GEN4_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define GEN4_MESSAGE_TARGET_URB 6
+#define GEN4_MESSAGE_TARGET_THREAD_SPAWNER 7
+
+#define GEN4_SAMPLER_RETURN_FORMAT_FLOAT32 0
+#define GEN4_SAMPLER_RETURN_FORMAT_UINT32 2
+#define GEN4_SAMPLER_RETURN_FORMAT_SINT32 3
+
+#define GEN4_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
+#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
+#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
+#define GEN4_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
+#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
+#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
+#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
+#define GEN4_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
+#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
+#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define GEN4_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
+#define GEN4_SAMPLER_MESSAGE_SIMD8_RESINFO 2
+#define GEN4_SAMPLER_MESSAGE_SIMD16_RESINFO 2
+#define GEN4_SAMPLER_MESSAGE_SIMD4X2_LD 3
+#define GEN4_SAMPLER_MESSAGE_SIMD8_LD 3
+#define GEN4_SAMPLER_MESSAGE_SIMD16_LD 3
+
+#define GEN4_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
+#define GEN4_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
+#define GEN4_DATAPORT_OWORD_BLOCK_2_OWORDS 2
+#define GEN4_DATAPORT_OWORD_BLOCK_4_OWORDS 3
+#define GEN4_DATAPORT_OWORD_BLOCK_8_OWORDS 4
+
+#define GEN4_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
+#define GEN4_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
+
+#define GEN4_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
+#define GEN4_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+
+#define GEN4_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+#define GEN4_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
+#define GEN4_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define GEN4_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+
+#define GEN4_DATAPORT_READ_TARGET_DATA_CACHE 0
+#define GEN4_DATAPORT_READ_TARGET_RENDER_CACHE 1
+#define GEN4_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
+
+#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
+#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
+#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
+#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
+#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
+
+#define GEN4_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
+#define GEN4_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
+#define GEN4_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
+#define GEN4_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
+#define GEN4_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
+#define GEN4_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
+#define GEN4_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+
+#define GEN4_MATH_FUNCTION_INV 1
+#define GEN4_MATH_FUNCTION_LOG 2
+#define GEN4_MATH_FUNCTION_EXP 3
+#define GEN4_MATH_FUNCTION_SQRT 4
+#define GEN4_MATH_FUNCTION_RSQ 5
+#define GEN4_MATH_FUNCTION_SIN 6 /* was 7 */
+#define GEN4_MATH_FUNCTION_COS 7 /* was 8 */
+#define GEN4_MATH_FUNCTION_SINCOS 8 /* was 6 */
+#define GEN4_MATH_FUNCTION_TAN 9
+#define GEN4_MATH_FUNCTION_POW 10
+#define GEN4_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
+#define GEN4_MATH_FUNCTION_INT_DIV_QUOTIENT 12
+#define GEN4_MATH_FUNCTION_INT_DIV_REMAINDER 13
+
+#define GEN4_MATH_INTEGER_UNSIGNED 0
+#define GEN4_MATH_INTEGER_SIGNED 1
+
+#define GEN4_MATH_PRECISION_FULL 0
+#define GEN4_MATH_PRECISION_PARTIAL 1
+
+#define GEN4_MATH_SATURATE_NONE 0
+#define GEN4_MATH_SATURATE_SATURATE 1
+
+#define GEN4_MATH_DATA_VECTOR 0
+#define GEN4_MATH_DATA_SCALAR 1
+
+#define GEN4_URB_OPCODE_WRITE 0
+
+#define GEN4_URB_SWIZZLE_NONE 0
+#define GEN4_URB_SWIZZLE_INTERLEAVE 1
+#define GEN4_URB_SWIZZLE_TRANSPOSE 2
+
+#define GEN4_SCRATCH_SPACE_SIZE_1K 0
+#define GEN4_SCRATCH_SPACE_SIZE_2K 1
+#define GEN4_SCRATCH_SPACE_SIZE_4K 2
+#define GEN4_SCRATCH_SPACE_SIZE_8K 3
+#define GEN4_SCRATCH_SPACE_SIZE_16K 4
+#define GEN4_SCRATCH_SPACE_SIZE_32K 5
+#define GEN4_SCRATCH_SPACE_SIZE_64K 6
+#define GEN4_SCRATCH_SPACE_SIZE_128K 7
+#define GEN4_SCRATCH_SPACE_SIZE_256K 8
+#define GEN4_SCRATCH_SPACE_SIZE_512K 9
+#define GEN4_SCRATCH_SPACE_SIZE_1M 10
+#define GEN4_SCRATCH_SPACE_SIZE_2M 11
+
+
+
+
+#define CMD_URB_FENCE 0x6000
+#define CMD_CONST_BUFFER_STATE 0x6001
+#define CMD_CONST_BUFFER 0x6002
+
+#define CMD_STATE_BASE_ADDRESS 0x6101
+#define CMD_STATE_INSN_POINTER 0x6102
+#define CMD_PIPELINE_SELECT 0x6104
+
+#define CMD_PIPELINED_STATE_POINTERS 0x7800
+#define CMD_BINDING_TABLE_PTRS 0x7801
+#define CMD_VERTEX_BUFFER 0x7808
+#define CMD_VERTEX_ELEMENT 0x7809
+#define CMD_INDEX_BUFFER 0x780a
+#define CMD_VF_STATISTICS 0x780b
+
+#define CMD_DRAW_RECT 0x7900
+#define CMD_BLEND_CONSTANT_COLOR 0x7901
+#define CMD_CHROMA_KEY 0x7904
+#define CMD_DEPTH_BUFFER 0x7905
+#define CMD_POLY_STIPPLE_OFFSET 0x7906
+#define CMD_POLY_STIPPLE_PATTERN 0x7907
+#define CMD_LINE_STIPPLE_PATTERN 0x7908
+#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7908
+
+#define CMD_PIPE_CONTROL 0x7a00
+
+#define CMD_3D_PRIM 0x7b00
+
+#define CMD_MI_FLUSH 0x0200
+
+
+/* Various values from the R0 vertex header:
+ */
+#define R02_PRIM_END 0x1
+#define R02_PRIM_START 0x2
+
+/* media pipeline */
+
+#define GEN4_VFE_MODE_GENERIC 0x0
+#define GEN4_VFE_MODE_VLD_MPEG2 0x1
+#define GEN4_VFE_MODE_IS 0x2
+#define GEN4_VFE_MODE_AVC_MC 0x4
+#define GEN4_VFE_MODE_AVC_IT 0x7
+#define GEN4_VFE_MODE_VC1_IT 0xB
+
+#define GEN4_VFE_DEBUG_COUNTER_FREE 0
+#define GEN4_VFE_DEBUG_COUNTER_FROZEN 1
+#define GEN4_VFE_DEBUG_COUNTER_ONCE 2
+#define GEN4_VFE_DEBUG_COUNTER_ALWAYS 3
+
+/* VLD_STATE */
+#define GEN4_MPEG_TOP_FIELD 1
+#define GEN4_MPEG_BOTTOM_FIELD 2
+#define GEN4_MPEG_FRAME 3
+#define GEN4_MPEG_QSCALE_LINEAR 0
+#define GEN4_MPEG_QSCALE_NONLINEAR 1
+#define GEN4_MPEG_ZIGZAG_SCAN 0
+#define GEN4_MPEG_ALTER_VERTICAL_SCAN 1
+#define GEN4_MPEG_I_PICTURE 1
+#define GEN4_MPEG_P_PICTURE 2
+#define GEN4_MPEG_B_PICTURE 3
+
+/* Command packets:
+ */
+struct header
+{
+ unsigned int length:16;
+ unsigned int opcode:16;
+};
+
+
+union header_union
+{
+ struct header bits;
+ unsigned int dword;
+};
+
+struct gen4_3d_control
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int notify_enable:1;
+ unsigned int pad:3;
+ unsigned int wc_flush_enable:1;
+ unsigned int depth_stall_enable:1;
+ unsigned int operation:2;
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int pad:2;
+ unsigned int dest_addr_type:1;
+ unsigned int dest_addr:29;
+ } dest;
+
+ unsigned int dword2;
+ unsigned int dword3;
+};
+
+
+struct gen4_3d_primitive
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int pad:2;
+ unsigned int topology:5;
+ unsigned int indexed:1;
+ unsigned int opcode:16;
+ } header;
+
+ unsigned int verts_per_instance;
+ unsigned int start_vert_location;
+ unsigned int instance_count;
+ unsigned int start_instance_location;
+ unsigned int base_vert_location;
+};
+
+/* These seem to be passed around as function args, so it works out
+ * better to keep them as #defines:
+ */
+#define GEN4_FLUSH_READ_CACHE 0x1
+#define GEN4_FLUSH_STATE_CACHE 0x2
+#define GEN4_INHIBIT_FLUSH_RENDER_CACHE 0x4
+#define GEN4_FLUSH_SNAPSHOT_COUNTERS 0x8
+
+struct gen4_mi_flush
+{
+ unsigned int flags:4;
+ unsigned int pad:12;
+ unsigned int opcode:16;
+};
+
+struct gen4_vf_statistics
+{
+ unsigned int statistics_enable:1;
+ unsigned int pad:15;
+ unsigned int opcode:16;
+};
+
+
+
+struct gen4_binding_table_pointers
+{
+ struct header header;
+ unsigned int vs;
+ unsigned int gs;
+ unsigned int clp;
+ unsigned int sf;
+ unsigned int wm;
+};
+
+
+struct gen4_blend_constant_color
+{
+ struct header header;
+ float blend_constant_color[4];
+};
+
+
+struct gen4_depthbuffer
+{
+ union header_union header;
+
+ union {
+ struct {
+ unsigned int pitch:18;
+ unsigned int format:3;
+ unsigned int pad:4;
+ unsigned int depth_offset_disable:1;
+ unsigned int tile_walk:1;
+ unsigned int tiled_surface:1;
+ unsigned int pad2:1;
+ unsigned int surface_type:3;
+ } bits;
+ unsigned int dword;
+ } dword1;
+
+ unsigned int dword2_base_addr;
+
+ union {
+ struct {
+ unsigned int pad:1;
+ unsigned int mipmap_layout:1;
+ unsigned int lod:4;
+ unsigned int width:13;
+ unsigned int height:13;
+ } bits;
+ unsigned int dword;
+ } dword3;
+
+ union {
+ struct {
+ unsigned int pad:12;
+ unsigned int min_array_element:9;
+ unsigned int depth:11;
+ } bits;
+ unsigned int dword;
+ } dword4;
+};
+
+struct gen4_drawrect
+{
+ struct header header;
+ unsigned int xmin:16;
+ unsigned int ymin:16;
+ unsigned int xmax:16;
+ unsigned int ymax:16;
+ unsigned int xorg:16;
+ unsigned int yorg:16;
+};
+
+
+
+
+struct gen4_global_depth_offset_clamp
+{
+ struct header header;
+ float depth_offset_clamp;
+};
+
+struct gen4_indexbuffer
+{
+ union {
+ struct
+ {
+ unsigned int length:8;
+ unsigned int index_format:2;
+ unsigned int cut_index_enable:1;
+ unsigned int pad:5;
+ unsigned int opcode:16;
+ } bits;
+ unsigned int dword;
+
+ } header;
+
+ unsigned int buffer_start;
+ unsigned int buffer_end;
+};
+
+
+struct gen4_line_stipple
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int pattern:16;
+ unsigned int pad:16;
+ } bits0;
+
+ struct
+ {
+ unsigned int repeat_count:9;
+ unsigned int pad:7;
+ unsigned int inverse_repeat_count:16;
+ } bits1;
+};
+
+
+struct gen4_pipelined_state_pointers
+{
+ struct header header;
+
+ struct {
+ unsigned int pad:5;
+ unsigned int offset:27;
+ } vs;
+
+ struct
+ {
+ unsigned int enable:1;
+ unsigned int pad:4;
+ unsigned int offset:27;
+ } gs;
+
+ struct
+ {
+ unsigned int enable:1;
+ unsigned int pad:4;
+ unsigned int offset:27;
+ } clp;
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int offset:27;
+ } sf;
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int offset:27;
+ } wm;
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int offset:27; /* KW: check me! */
+ } cc;
+};
+
+
+struct gen4_polygon_stipple_offset
+{
+ struct header header;
+
+ struct {
+ unsigned int y_offset:5;
+ unsigned int pad:3;
+ unsigned int x_offset:5;
+ unsigned int pad0:19;
+ } bits0;
+};
+
+
+
+struct gen4_polygon_stipple
+{
+ struct header header;
+ unsigned int stipple[32];
+};
+
+
+
+struct gen4_pipeline_select
+{
+ struct
+ {
+ unsigned int pipeline_select:1;
+ unsigned int pad:15;
+ unsigned int opcode:16;
+ } header;
+};
+
+
+struct gen4_pipe_control
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int notify_enable:1;
+ unsigned int pad:2;
+ unsigned int instruction_state_cache_flush_enable:1;
+ unsigned int write_cache_flush_enable:1;
+ unsigned int depth_stall_enable:1;
+ unsigned int post_sync_operation:2;
+
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int pad:2;
+ unsigned int dest_addr_type:1;
+ unsigned int dest_addr:29;
+ } bits1;
+
+ unsigned int data0;
+ unsigned int data1;
+};
+
+
+struct gen4_urb_fence
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int vs_realloc:1;
+ unsigned int gs_realloc:1;
+ unsigned int clp_realloc:1;
+ unsigned int sf_realloc:1;
+ unsigned int vfe_realloc:1;
+ unsigned int cs_realloc:1;
+ unsigned int pad:2;
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int vs_fence:10;
+ unsigned int gs_fence:10;
+ unsigned int clp_fence:10;
+ unsigned int pad:2;
+ } bits0;
+
+ struct
+ {
+ unsigned int sf_fence:10;
+ unsigned int vf_fence:10;
+ unsigned int cs_fence:10;
+ unsigned int pad:2;
+ } bits1;
+};
+
+struct gen4_constant_buffer_state /* previously gen4_command_streamer */
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int nr_urb_entries:3;
+ unsigned int pad:1;
+ unsigned int urb_entry_size:5;
+ unsigned int pad0:23;
+ } bits0;
+};
+
+struct gen4_constant_buffer
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int valid:1;
+ unsigned int pad:7;
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int buffer_length:6;
+ unsigned int buffer_address:26;
+ } bits0;
+};
+
+struct gen4_state_base_address
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:4;
+ unsigned int general_state_address:27;
+ } bits0;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:4;
+ unsigned int surface_state_address:27;
+ } bits1;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:4;
+ unsigned int indirect_object_state_address:27;
+ } bits2;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:11;
+ unsigned int general_state_upper_bound:20;
+ } bits3;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:11;
+ unsigned int indirect_object_state_upper_bound:20;
+ } bits4;
+};
+
+struct gen4_state_prefetch
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int prefetch_count:3;
+ unsigned int pad:3;
+ unsigned int prefetch_pointer:26;
+ } bits0;
+};
+
+struct gen4_system_instruction_pointer
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int pad:4;
+ unsigned int system_instruction_pointer:28;
+ } bits0;
+};
+
+
+
+
+/* State structs for the various fixed function units:
+ */
+
+
+struct thread0
+{
+ unsigned int pad0:1;
+ unsigned int grf_reg_count:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer:26;
+};
+
+struct thread1
+{
+ unsigned int ext_halt_exception_enable:1;
+ unsigned int sw_exception_enable:1;
+ unsigned int mask_stack_exception_enable:1;
+ unsigned int timeout_exception_enable:1;
+ unsigned int illegal_op_exception_enable:1;
+ unsigned int pad0:3;
+ unsigned int depth_coef_urb_read_offset:6; /* WM only */
+ unsigned int pad1:2;
+ unsigned int floating_point_mode:1;
+ unsigned int thread_priority:1;
+ unsigned int binding_table_entry_count:8;
+ unsigned int pad3:5;
+ unsigned int single_program_flow:1;
+};
+
+struct thread2
+{
+ unsigned int per_thread_scratch_space:4;
+ unsigned int pad0:6;
+ unsigned int scratch_space_base_pointer:22;
+};
+
+
+struct thread3
+{
+ unsigned int dispatch_grf_start_reg:4;
+ unsigned int urb_entry_read_offset:6;
+ unsigned int pad0:1;
+ unsigned int urb_entry_read_length:6;
+ unsigned int pad1:1;
+ unsigned int const_urb_entry_read_offset:6;
+ unsigned int pad2:1;
+ unsigned int const_urb_entry_read_length:6;
+ unsigned int pad3:1;
+};
+
+
+
+struct gen4_clip_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:9;
+ unsigned int gs_output_stats:1; /* not always */
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:6; /* may be less */
+ unsigned int pad3:1;
+ } thread4;
+
+ struct
+ {
+ unsigned int pad0:13;
+ unsigned int clip_mode:3;
+ unsigned int userclip_enable_flags:8;
+ unsigned int userclip_must_clip:1;
+ unsigned int pad1:1;
+ unsigned int guard_band_enable:1;
+ unsigned int viewport_z_clip_enable:1;
+ unsigned int viewport_xy_clip_enable:1;
+ unsigned int vertex_position_space:1;
+ unsigned int api_mode:1;
+ unsigned int pad2:1;
+ } clip5;
+
+ struct
+ {
+ unsigned int pad0:5;
+ unsigned int clipper_viewport_state_ptr:27;
+ } clip6;
+
+
+ float viewport_xmin;
+ float viewport_xmax;
+ float viewport_ymin;
+ float viewport_ymax;
+};
+
+
+
+struct gen4_cc_unit_state
+{
+ struct
+ {
+ unsigned int pad0:3;
+ unsigned int bf_stencil_pass_depth_pass_op:3;
+ unsigned int bf_stencil_pass_depth_fail_op:3;
+ unsigned int bf_stencil_fail_op:3;
+ unsigned int bf_stencil_func:3;
+ unsigned int bf_stencil_enable:1;
+ unsigned int pad1:2;
+ unsigned int stencil_write_enable:1;
+ unsigned int stencil_pass_depth_pass_op:3;
+ unsigned int stencil_pass_depth_fail_op:3;
+ unsigned int stencil_fail_op:3;
+ unsigned int stencil_func:3;
+ unsigned int stencil_enable:1;
+ } cc0;
+
+
+ struct
+ {
+ unsigned int bf_stencil_ref:8;
+ unsigned int stencil_write_mask:8;
+ unsigned int stencil_test_mask:8;
+ unsigned int stencil_ref:8;
+ } cc1;
+
+
+ struct
+ {
+ unsigned int logicop_enable:1;
+ unsigned int pad0:10;
+ unsigned int depth_write_enable:1;
+ unsigned int depth_test_function:3;
+ unsigned int depth_test:1;
+ unsigned int bf_stencil_write_mask:8;
+ unsigned int bf_stencil_test_mask:8;
+ } cc2;
+
+
+ struct
+ {
+ unsigned int pad0:8;
+ unsigned int alpha_test_func:3;
+ unsigned int alpha_test:1;
+ unsigned int blend_enable:1;
+ unsigned int ia_blend_enable:1;
+ unsigned int pad1:1;
+ unsigned int alpha_test_format:1;
+ unsigned int pad2:16;
+ } cc3;
+
+ struct
+ {
+ unsigned int pad0:5;
+ unsigned int cc_viewport_state_offset:27;
+ } cc4;
+
+ struct
+ {
+ unsigned int pad0:2;
+ unsigned int ia_dest_blend_factor:5;
+ unsigned int ia_src_blend_factor:5;
+ unsigned int ia_blend_function:3;
+ unsigned int statistics_enable:1;
+ unsigned int logicop_func:4;
+ unsigned int pad1:11;
+ unsigned int dither_enable:1;
+ } cc5;
+
+ struct
+ {
+ unsigned int clamp_post_alpha_blend:1;
+ unsigned int clamp_pre_alpha_blend:1;
+ unsigned int clamp_range:2;
+ unsigned int pad0:11;
+ unsigned int y_dither_offset:2;
+ unsigned int x_dither_offset:2;
+ unsigned int dest_blend_factor:5;
+ unsigned int src_blend_factor:5;
+ unsigned int blend_function:3;
+ } cc6;
+
+ struct {
+ union {
+ float f;
+ unsigned char ub[4];
+ } alpha_ref;
+ } cc7;
+};
+
+
+
+struct gen4_sf_unit_state
+{
+ struct thread0 thread0;
+ struct {
+ unsigned int pad0:7;
+ unsigned int sw_exception_enable:1;
+ unsigned int pad1:3;
+ unsigned int mask_stack_exception_enable:1;
+ unsigned int pad2:1;
+ unsigned int illegal_op_exception_enable:1;
+ unsigned int pad3:2;
+ unsigned int floating_point_mode:1;
+ unsigned int thread_priority:1;
+ unsigned int binding_table_entry_count:8;
+ unsigned int pad4:5;
+ unsigned int single_program_flow:1;
+ } sf1;
+
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:10;
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:6;
+ unsigned int pad3:1;
+ } thread4;
+
+ struct
+ {
+ unsigned int front_winding:1;
+ unsigned int viewport_transform:1;
+ unsigned int pad0:3;
+ unsigned int sf_viewport_state_offset:27;
+ } sf5;
+
+ struct
+ {
+ unsigned int pad0:9;
+ unsigned int dest_org_vbias:4;
+ unsigned int dest_org_hbias:4;
+ unsigned int scissor:1;
+ unsigned int disable_2x2_trifilter:1;
+ unsigned int disable_zero_pix_trifilter:1;
+ unsigned int point_rast_rule:2;
+ unsigned int line_endcap_aa_region_width:2;
+ unsigned int line_width:4;
+ unsigned int fast_scissor_disable:1;
+ unsigned int cull_mode:2;
+ unsigned int aa_enable:1;
+ } sf6;
+
+ struct
+ {
+ unsigned int point_size:11;
+ unsigned int use_point_size_state:1;
+ unsigned int subpixel_precision:1;
+ unsigned int sprite_point:1;
+ unsigned int pad0:11;
+ unsigned int trifan_pv:2;
+ unsigned int linestrip_pv:2;
+ unsigned int tristrip_pv:2;
+ unsigned int line_last_pixel_enable:1;
+ } sf7;
+
+};
+
+
+struct gen4_gs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:10;
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:1;
+ unsigned int pad3:6;
+ } thread4;
+
+ struct
+ {
+ unsigned int sampler_count:3;
+ unsigned int pad0:2;
+ unsigned int sampler_state_pointer:27;
+ } gs5;
+
+
+ struct
+ {
+ unsigned int max_vp_index:4;
+ unsigned int pad0:26;
+ unsigned int reorder_enable:1;
+ unsigned int pad1:1;
+ } gs6;
+};
+
+
+struct gen4_vs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:10;
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:4;
+ unsigned int pad3:3;
+ } thread4;
+
+ struct
+ {
+ unsigned int sampler_count:3;
+ unsigned int pad0:2;
+ unsigned int sampler_state_pointer:27;
+ } vs5;
+
+ struct
+ {
+ unsigned int vs_enable:1;
+ unsigned int vert_cache_disable:1;
+ unsigned int pad0:30;
+ } vs6;
+};
+
+
+struct gen4_wm_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct {
+ unsigned int stats_enable:1;
+ unsigned int pad0:1;
+ unsigned int sampler_count:3;
+ unsigned int sampler_state_pointer:27;
+ } wm4;
+
+ struct
+ {
+ unsigned int enable_8_pix:1;
+ unsigned int enable_16_pix:1;
+ unsigned int enable_32_pix:1;
+ unsigned int pad0:7;
+ unsigned int legacy_global_depth_bias:1;
+ unsigned int line_stipple:1;
+ unsigned int depth_offset:1;
+ unsigned int polygon_stipple:1;
+ unsigned int line_aa_region_width:2;
+ unsigned int line_endcap_aa_region_width:2;
+ unsigned int early_depth_test:1;
+ unsigned int thread_dispatch_enable:1;
+ unsigned int program_uses_depth:1;
+ unsigned int program_computes_depth:1;
+ unsigned int program_uses_killpixel:1;
+ unsigned int legacy_line_rast: 1;
+ unsigned int transposed_urb_read:1;
+ unsigned int max_threads:7;
+ } wm5;
+
+ float global_depth_offset_constant;
+ float global_depth_offset_scale;
+
+ struct {
+ unsigned int pad0:1;
+ unsigned int grf_reg_count_1:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer_1:26;
+ } wm8;
+
+ struct {
+ unsigned int pad0:1;
+ unsigned int grf_reg_count_2:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer_2:26;
+ } wm9;
+
+ struct {
+ unsigned int pad0:1;
+ unsigned int grf_reg_count_3:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer_3:26;
+ } wm10;
+};
+
+struct gen4_wm_unit_state_padded {
+ struct gen4_wm_unit_state state;
+ char pad[64 - sizeof(struct gen4_wm_unit_state)];
+};
+
+/* The hardware supports two different modes for border color. The
+ * default (OpenGL) mode uses floating-point color channels, while the
+ * legacy mode uses 4 bytes.
+ *
+ * More significantly, the legacy mode respects the components of the
+ * border color for channels not present in the source, (whereas the
+ * default mode will ignore the border color's alpha channel and use
+ * alpha==1 for an RGB source, for example).
+ *
+ * The legacy mode matches the semantics specified by the Render
+ * extension.
+ */
+struct gen4_sampler_default_border_color {
+ float color[4];
+};
+
+struct gen4_sampler_legacy_border_color {
+ uint8_t color[4];
+};
+
+struct gen4_sampler_state
+{
+
+ struct
+ {
+ unsigned int shadow_function:3;
+ unsigned int lod_bias:11;
+ unsigned int min_filter:3;
+ unsigned int mag_filter:3;
+ unsigned int mip_filter:2;
+ unsigned int base_level:5;
+ unsigned int pad:1;
+ unsigned int lod_preclamp:1;
+ unsigned int border_color_mode:1;
+ unsigned int pad0:1;
+ unsigned int disable:1;
+ } ss0;
+
+ struct
+ {
+ unsigned int r_wrap_mode:3;
+ unsigned int t_wrap_mode:3;
+ unsigned int s_wrap_mode:3;
+ unsigned int pad:3;
+ unsigned int max_lod:10;
+ unsigned int min_lod:10;
+ } ss1;
+
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int border_color_pointer:27;
+ } ss2;
+
+ struct
+ {
+ unsigned int pad:19;
+ unsigned int max_aniso:3;
+ unsigned int chroma_key_mode:1;
+ unsigned int chroma_key_index:2;
+ unsigned int chroma_key_enable:1;
+ unsigned int monochrome_filter_width:3;
+ unsigned int monochrome_filter_height:3;
+ } ss3;
+};
+
+
+struct gen4_clipper_viewport
+{
+ float xmin;
+ float xmax;
+ float ymin;
+ float ymax;
+};
+
+struct gen4_cc_viewport
+{
+ float min_depth;
+ float max_depth;
+};
+
+struct gen4_sf_viewport
+{
+ struct {
+ float m00;
+ float m11;
+ float m22;
+ float m30;
+ float m31;
+ float m32;
+ } viewport;
+
+ struct {
+ short xmin;
+ short ymin;
+ short xmax;
+ short ymax;
+ } scissor;
+};
+
+/* Documented in the subsystem/shared-functions/sampler chapter...
+ */
+struct gen4_surface_state
+{
+ struct {
+ unsigned int cube_pos_z:1;
+ unsigned int cube_neg_z:1;
+ unsigned int cube_pos_y:1;
+ unsigned int cube_neg_y:1;
+ unsigned int cube_pos_x:1;
+ unsigned int cube_neg_x:1;
+ unsigned int pad:3;
+ unsigned int render_cache_read_mode:1;
+ unsigned int mipmap_layout_mode:1;
+ unsigned int vert_line_stride_ofs:1;
+ unsigned int vert_line_stride:1;
+ unsigned int color_blend:1;
+ unsigned int writedisable_blue:1;
+ unsigned int writedisable_green:1;
+ unsigned int writedisable_red:1;
+ unsigned int writedisable_alpha:1;
+ unsigned int surface_format:9;
+ unsigned int data_return_format:1;
+ unsigned int pad0:1;
+ unsigned int surface_type:3;
+ } ss0;
+
+ struct {
+ unsigned int base_addr;
+ } ss1;
+
+ struct {
+ unsigned int render_target_rotation:2;
+ unsigned int mip_count:4;
+ unsigned int width:13;
+ unsigned int height:13;
+ } ss2;
+
+ struct {
+ unsigned int tile_walk:1;
+ unsigned int tiled_surface:1;
+ unsigned int pad:1;
+ unsigned int pitch:18;
+ unsigned int depth:11;
+ } ss3;
+
+ struct {
+ unsigned int pad:19;
+ unsigned int min_array_elt:9;
+ unsigned int min_lod:4;
+ } ss4;
+
+ struct {
+ unsigned int pad:20;
+ unsigned int y_offset:4;
+ unsigned int pad2:1;
+ unsigned int x_offset:7;
+ } ss5;
+};
+
+
+
+struct gen4_vertex_buffer_state
+{
+ struct {
+ unsigned int pitch:11;
+ unsigned int pad:15;
+ unsigned int access_type:1;
+ unsigned int vb_index:5;
+ } vb0;
+
+ unsigned int start_addr;
+ unsigned int max_index;
+#if 1
+ unsigned int instance_data_step_rate; /* not included for sequential/random vertices? */
+#endif
+};
+
+#define GEN4_VBP_MAX 17
+
+struct gen4_vb_array_state {
+ struct header header;
+ struct gen4_vertex_buffer_state vb[GEN4_VBP_MAX];
+};
+
+
+struct gen4_vertex_element_state
+{
+ struct
+ {
+ unsigned int src_offset:11;
+ unsigned int pad:5;
+ unsigned int src_format:9;
+ unsigned int pad0:1;
+ unsigned int valid:1;
+ unsigned int vertex_buffer_index:5;
+ } ve0;
+
+ struct
+ {
+ unsigned int dst_offset:8;
+ unsigned int pad:8;
+ unsigned int vfcomponent3:4;
+ unsigned int vfcomponent2:4;
+ unsigned int vfcomponent1:4;
+ unsigned int vfcomponent0:4;
+ } ve1;
+};
+
+#define GEN4_VEP_MAX 18
+
+struct gen4_vertex_element_packet {
+ struct header header;
+ struct gen4_vertex_element_state ve[GEN4_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
+};
+
+
+struct gen4_urb_immediate {
+ unsigned int opcode:4;
+ unsigned int offset:6;
+ unsigned int swizzle_control:2;
+ unsigned int pad:1;
+ unsigned int allocate:1;
+ unsigned int used:1;
+ unsigned int complete:1;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+};
+
+/* Instruction format for the execution units:
+ */
+
+struct gen4_instruction
+{
+ struct
+ {
+ unsigned int opcode:7;
+ unsigned int pad:1;
+ unsigned int access_mode:1;
+ unsigned int mask_control:1;
+ unsigned int dependency_control:2;
+ unsigned int compression_control:2;
+ unsigned int thread_control:2;
+ unsigned int predicate_control:4;
+ unsigned int predicate_inverse:1;
+ unsigned int execution_size:3;
+ unsigned int destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */
+ unsigned int pad0:2;
+ unsigned int debug_control:1;
+ unsigned int saturate:1;
+ } header;
+
+ union {
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int src1_reg_file:2;
+ unsigned int src1_reg_type:3;
+ unsigned int pad:1;
+ unsigned int dest_subreg_nr:5;
+ unsigned int dest_reg_nr:8;
+ unsigned int dest_horiz_stride:2;
+ unsigned int dest_address_mode:1;
+ } da1;
+
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int pad:6;
+ int dest_indirect_offset:10; /* offset against the deref'd address reg */
+ unsigned int dest_subreg_nr:3; /* subnr for the address reg a0.x */
+ unsigned int dest_horiz_stride:2;
+ unsigned int dest_address_mode:1;
+ } ia1;
+
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int src1_reg_file:2;
+ unsigned int src1_reg_type:3;
+ unsigned int pad0:1;
+ unsigned int dest_writemask:4;
+ unsigned int dest_subreg_nr:1;
+ unsigned int dest_reg_nr:8;
+ unsigned int pad1:2;
+ unsigned int dest_address_mode:1;
+ } da16;
+
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int pad0:6;
+ unsigned int dest_writemask:4;
+ int dest_indirect_offset:6;
+ unsigned int dest_subreg_nr:3;
+ unsigned int pad1:2;
+ unsigned int dest_address_mode:1;
+ } ia16;
+ } bits1;
+
+
+ union {
+ struct
+ {
+ unsigned int src0_subreg_nr:5;
+ unsigned int src0_reg_nr:8;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_horiz_stride:2;
+ unsigned int src0_width:3;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad:6;
+ } da1;
+
+ struct
+ {
+ int src0_indirect_offset:10;
+ unsigned int src0_subreg_nr:3;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_horiz_stride:2;
+ unsigned int src0_width:3;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad:6;
+ } ia1;
+
+ struct
+ {
+ unsigned int src0_swz_x:2;
+ unsigned int src0_swz_y:2;
+ unsigned int src0_subreg_nr:1;
+ unsigned int src0_reg_nr:8;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_swz_z:2;
+ unsigned int src0_swz_w:2;
+ unsigned int pad0:1;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad1:6;
+ } da16;
+
+ struct
+ {
+ unsigned int src0_swz_x:2;
+ unsigned int src0_swz_y:2;
+ int src0_indirect_offset:6;
+ unsigned int src0_subreg_nr:3;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_swz_z:2;
+ unsigned int src0_swz_w:2;
+ unsigned int pad0:1;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad1:6;
+ } ia16;
+
+ } bits2;
+
+ union
+ {
+ struct
+ {
+ unsigned int src1_subreg_nr:5;
+ unsigned int src1_reg_nr:8;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad:1;
+ unsigned int src1_horiz_stride:2;
+ unsigned int src1_width:3;
+ unsigned int src1_vert_stride:4;
+ unsigned int pad0:7;
+ } da1;
+
+ struct
+ {
+ unsigned int src1_swz_x:2;
+ unsigned int src1_swz_y:2;
+ unsigned int src1_subreg_nr:1;
+ unsigned int src1_reg_nr:8;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad0:1;
+ unsigned int src1_swz_z:2;
+ unsigned int src1_swz_w:2;
+ unsigned int pad1:1;
+ unsigned int src1_vert_stride:4;
+ unsigned int pad2:7;
+ } da16;
+
+ struct
+ {
+ int src1_indirect_offset:10;
+ unsigned int src1_subreg_nr:3;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad0:1;
+ unsigned int src1_horiz_stride:2;
+ unsigned int src1_width:3;
+ unsigned int src1_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad1:6;
+ } ia1;
+
+ struct
+ {
+ unsigned int src1_swz_x:2;
+ unsigned int src1_swz_y:2;
+ int src1_indirect_offset:6;
+ unsigned int src1_subreg_nr:3;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad0:1;
+ unsigned int src1_swz_z:2;
+ unsigned int src1_swz_w:2;
+ unsigned int pad1:1;
+ unsigned int src1_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad2:6;
+ } ia16;
+
+
+ struct
+ {
+ int jump_count:16; /* note: signed */
+ unsigned int pop_count:4;
+ unsigned int pad0:12;
+ } if_else;
+
+ struct {
+ unsigned int function:4;
+ unsigned int int_type:1;
+ unsigned int precision:1;
+ unsigned int saturate:1;
+ unsigned int data_type:1;
+ unsigned int pad0:8;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } math;
+
+ struct {
+ unsigned int binding_table_index:8;
+ unsigned int sampler:4;
+ unsigned int return_format:2;
+ unsigned int msg_type:2;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } sampler;
+
+ struct gen4_urb_immediate urb;
+
+ struct {
+ unsigned int binding_table_index:8;
+ unsigned int msg_control:4;
+ unsigned int msg_type:2;
+ unsigned int target_cache:2;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } dp_read;
+
+ struct {
+ unsigned int binding_table_index:8;
+ unsigned int msg_control:3;
+ unsigned int pixel_scoreboard_clear:1;
+ unsigned int msg_type:3;
+ unsigned int send_commit_msg:1;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } dp_write;
+
+ struct {
+ unsigned int pad:16;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } generic;
+
+ unsigned int ud;
+ } bits3;
+};
+
+/* media pipeline */
+
+struct gen4_vfe_state {
+ struct {
+ unsigned int per_thread_scratch_space:4;
+ unsigned int pad3:3;
+ unsigned int extend_vfe_state_present:1;
+ unsigned int pad2:2;
+ unsigned int scratch_base:22;
+ } vfe0;
+
+ struct {
+ unsigned int debug_counter_control:2;
+ unsigned int children_present:1;
+ unsigned int vfe_mode:4;
+ unsigned int pad2:2;
+ unsigned int num_urb_entries:7;
+ unsigned int urb_entry_alloc_size:9;
+ unsigned int max_threads:7;
+ } vfe1;
+
+ struct {
+ unsigned int pad4:4;
+ unsigned int interface_descriptor_base:28;
+ } vfe2;
+};
+
+struct gen4_vld_state {
+ struct {
+ unsigned int pad6:6;
+ unsigned int scan_order:1;
+ unsigned int intra_vlc_format:1;
+ unsigned int quantizer_scale_type:1;
+ unsigned int concealment_motion_vector:1;
+ unsigned int frame_predict_frame_dct:1;
+ unsigned int top_field_first:1;
+ unsigned int picture_structure:2;
+ unsigned int intra_dc_precision:2;
+ unsigned int f_code_0_0:4;
+ unsigned int f_code_0_1:4;
+ unsigned int f_code_1_0:4;
+ unsigned int f_code_1_1:4;
+ } vld0;
+
+ struct {
+ unsigned int pad2:9;
+ unsigned int picture_coding_type:2;
+ unsigned int pad:21;
+ } vld1;
+
+ struct {
+ unsigned int index_0:4;
+ unsigned int index_1:4;
+ unsigned int index_2:4;
+ unsigned int index_3:4;
+ unsigned int index_4:4;
+ unsigned int index_5:4;
+ unsigned int index_6:4;
+ unsigned int index_7:4;
+ } desc_remap_table0;
+
+ struct {
+ unsigned int index_8:4;
+ unsigned int index_9:4;
+ unsigned int index_10:4;
+ unsigned int index_11:4;
+ unsigned int index_12:4;
+ unsigned int index_13:4;
+ unsigned int index_14:4;
+ unsigned int index_15:4;
+ } desc_remap_table1;
+};
+
+struct gen4_interface_descriptor {
+ struct {
+ unsigned int grf_reg_blocks:4;
+ unsigned int pad:2;
+ unsigned int kernel_start_pointer:26;
+ } desc0;
+
+ struct {
+ unsigned int pad:7;
+ unsigned int software_exception:1;
+ unsigned int pad2:3;
+ unsigned int maskstack_exception:1;
+ unsigned int pad3:1;
+ unsigned int illegal_opcode_exception:1;
+ unsigned int pad4:2;
+ unsigned int floating_point_mode:1;
+ unsigned int thread_priority:1;
+ unsigned int single_program_flow:1;
+ unsigned int pad5:1;
+ unsigned int const_urb_entry_read_offset:6;
+ unsigned int const_urb_entry_read_len:6;
+ } desc1;
+
+ struct {
+ unsigned int pad:2;
+ unsigned int sampler_count:3;
+ unsigned int sampler_state_pointer:27;
+ } desc2;
+
+ struct {
+ unsigned int binding_table_entry_count:5;
+ unsigned int binding_table_pointer:27;
+ } desc3;
+};
+
+struct gen6_blend_state
+{
+ struct {
+ unsigned int dest_blend_factor:5;
+ unsigned int source_blend_factor:5;
+ unsigned int pad3:1;
+ unsigned int blend_func:3;
+ unsigned int pad2:1;
+ unsigned int ia_dest_blend_factor:5;
+ unsigned int ia_source_blend_factor:5;
+ unsigned int pad1:1;
+ unsigned int ia_blend_func:3;
+ unsigned int pad0:1;
+ unsigned int ia_blend_enable:1;
+ unsigned int blend_enable:1;
+ } blend0;
+
+ struct {
+ unsigned int post_blend_clamp_enable:1;
+ unsigned int pre_blend_clamp_enable:1;
+ unsigned int clamp_range:2;
+ unsigned int pad0:4;
+ unsigned int x_dither_offset:2;
+ unsigned int y_dither_offset:2;
+ unsigned int dither_enable:1;
+ unsigned int alpha_test_func:3;
+ unsigned int alpha_test_enable:1;
+ unsigned int pad1:1;
+ unsigned int logic_op_func:4;
+ unsigned int logic_op_enable:1;
+ unsigned int pad2:1;
+ unsigned int write_disable_b:1;
+ unsigned int write_disable_g:1;
+ unsigned int write_disable_r:1;
+ unsigned int write_disable_a:1;
+ unsigned int pad3:1;
+ unsigned int alpha_to_coverage_dither:1;
+ unsigned int alpha_to_one:1;
+ unsigned int alpha_to_coverage:1;
+ } blend1;
+};
+
+struct gen6_color_calc_state
+{
+ struct {
+ unsigned int alpha_test_format:1;
+ unsigned int pad0:14;
+ unsigned int round_disable:1;
+ unsigned int bf_stencil_ref:8;
+ unsigned int stencil_ref:8;
+ } cc0;
+
+ union {
+ float alpha_ref_f;
+ struct {
+ unsigned int ui:8;
+ unsigned int pad0:24;
+ } alpha_ref_fi;
+ } cc1;
+
+ float constant_r;
+ float constant_g;
+ float constant_b;
+ float constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+ struct {
+ unsigned int pad0:3;
+ unsigned int bf_stencil_pass_depth_pass_op:3;
+ unsigned int bf_stencil_pass_depth_fail_op:3;
+ unsigned int bf_stencil_fail_op:3;
+ unsigned int bf_stencil_func:3;
+ unsigned int bf_stencil_enable:1;
+ unsigned int pad1:2;
+ unsigned int stencil_write_enable:1;
+ unsigned int stencil_pass_depth_pass_op:3;
+ unsigned int stencil_pass_depth_fail_op:3;
+ unsigned int stencil_fail_op:3;
+ unsigned int stencil_func:3;
+ unsigned int stencil_enable:1;
+ } ds0;
+
+ struct {
+ unsigned int bf_stencil_write_mask:8;
+ unsigned int bf_stencil_test_mask:8;
+ unsigned int stencil_write_mask:8;
+ unsigned int stencil_test_mask:8;
+ } ds1;
+
+ struct {
+ unsigned int pad0:26;
+ unsigned int depth_write_enable:1;
+ unsigned int depth_test_func:3;
+ unsigned int pad1:1;
+ unsigned int depth_test_enable:1;
+ } ds2;
+};
+
+typedef enum {
+ SAMPLER_FILTER_NEAREST = 0,
+ SAMPLER_FILTER_BILINEAR,
+ FILTER_COUNT
+} sampler_filter_t;
+
+typedef enum {
+ SAMPLER_EXTEND_NONE = 0,
+ SAMPLER_EXTEND_REPEAT,
+ SAMPLER_EXTEND_PAD,
+ SAMPLER_EXTEND_REFLECT,
+ EXTEND_COUNT
+} sampler_extend_t;
+
+typedef enum {
+ WM_KERNEL = 0,
+ WM_KERNEL_PROJECTIVE,
+
+ WM_KERNEL_MASK,
+ WM_KERNEL_MASK_PROJECTIVE,
+
+ WM_KERNEL_MASKCA,
+ WM_KERNEL_MASKCA_PROJECTIVE,
+
+ WM_KERNEL_MASKCA_SRCALPHA,
+ WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
+
+ WM_KERNEL_VIDEO_PLANAR,
+ WM_KERNEL_VIDEO_PACKED,
+ KERNEL_COUNT
+} wm_kernel_t;
+
+#endif
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
new file mode 100644
index 00000000..72afe98d
--- /dev/null
+++ b/src/sna/gen5_render.c
@@ -0,0 +1,2841 @@
+/*
+ * Copyright © 2006,2008,2011 Intel Corporation
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@sna.com>
+ * Eric Anholt <eric@anholt.net>
+ * Carl Worth <cworth@redhat.com>
+ * Keith Packard <keithp@keithp.com>
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xf86.h>
+
+#include "sna.h"
+#include "sna_reg.h"
+#include "sna_render.h"
+#include "sna_render_inline.h"
+#include "sna_video.h"
+
+#include "gen5_render.h"
+
+#if DEBUG_RENDER
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+#define GEN5_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
+
+/* Set up a default static partitioning of the URB, which is supposed to
+ * allow anything we would want to do, at potentially lower performance.
+ */
+#define URB_CS_ENTRY_SIZE 1
+#define URB_CS_ENTRIES 0
+
+#define URB_VS_ENTRY_SIZE 1 // each 512-bit row
+#define URB_VS_ENTRIES 8 // we needs at least 8 entries
+
+#define URB_GS_ENTRY_SIZE 0
+#define URB_GS_ENTRIES 0
+
+#define URB_CLIP_ENTRY_SIZE 0
+#define URB_CLIP_ENTRIES 0
+
+#define URB_SF_ENTRY_SIZE 2
+#define URB_SF_ENTRIES 1
+
+/*
+ * this program computes dA/dx and dA/dy for the texture coordinates along
+ * with the base texture coordinate. It was extracted from the Mesa driver
+ */
+
+#define SF_KERNEL_NUM_GRF 16
+#define SF_MAX_THREADS 2
+
+#define PS_KERNEL_NUM_GRF 32
+#define PS_MAX_THREADS 48
+
+static const uint32_t sf_kernel[][4] = {
+#include "exa_sf.g4b.gen5"
+};
+
+static const uint32_t sf_kernel_mask[][4] = {
+#include "exa_sf_mask.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_nomask_affine[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_affine.g4b.gen5"
+#include "exa_wm_src_sample_argb.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_nomask_projective[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_projective.g4b.gen5"
+#include "exa_wm_src_sample_argb.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_maskca_affine[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_affine.g4b.gen5"
+#include "exa_wm_src_sample_argb.g4b.gen5"
+#include "exa_wm_mask_affine.g4b.gen5"
+#include "exa_wm_mask_sample_argb.g4b.gen5"
+#include "exa_wm_ca.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_maskca_projective[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_projective.g4b.gen5"
+#include "exa_wm_src_sample_argb.g4b.gen5"
+#include "exa_wm_mask_projective.g4b.gen5"
+#include "exa_wm_mask_sample_argb.g4b.gen5"
+#include "exa_wm_ca.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_affine.g4b.gen5"
+#include "exa_wm_src_sample_a.g4b.gen5"
+#include "exa_wm_mask_affine.g4b.gen5"
+#include "exa_wm_mask_sample_argb.g4b.gen5"
+#include "exa_wm_ca_srcalpha.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_projective.g4b.gen5"
+#include "exa_wm_src_sample_a.g4b.gen5"
+#include "exa_wm_mask_projective.g4b.gen5"
+#include "exa_wm_mask_sample_argb.g4b.gen5"
+#include "exa_wm_ca_srcalpha.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_masknoca_affine[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_affine.g4b.gen5"
+#include "exa_wm_src_sample_argb.g4b.gen5"
+#include "exa_wm_mask_affine.g4b.gen5"
+#include "exa_wm_mask_sample_a.g4b.gen5"
+#include "exa_wm_noca.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_masknoca_projective[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_projective.g4b.gen5"
+#include "exa_wm_src_sample_argb.g4b.gen5"
+#include "exa_wm_mask_projective.g4b.gen5"
+#include "exa_wm_mask_sample_a.g4b.gen5"
+#include "exa_wm_noca.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_packed_static[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_affine.g4b.gen5"
+#include "exa_wm_src_sample_argb.g4b.gen5"
+#include "exa_wm_yuv_rgb.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+static const uint32_t ps_kernel_planar_static[][4] = {
+#include "exa_wm_xy.g4b.gen5"
+#include "exa_wm_src_affine.g4b.gen5"
+#include "exa_wm_src_sample_planar.g4b.gen5"
+#include "exa_wm_yuv_rgb.g4b.gen5"
+#include "exa_wm_write.g4b.gen5"
+};
+
+#define KERNEL(kernel_enum, kernel, masked) \
+ [kernel_enum] = {&kernel, sizeof(kernel), masked}
+static const struct wm_kernel_info {
+ const void *data;
+ unsigned int size;
+ Bool has_mask;
+} wm_kernels[] = {
+ KERNEL(WM_KERNEL, ps_kernel_nomask_affine, FALSE),
+ KERNEL(WM_KERNEL_PROJECTIVE, ps_kernel_nomask_projective, FALSE),
+
+ KERNEL(WM_KERNEL_MASK, ps_kernel_masknoca_affine, TRUE),
+ KERNEL(WM_KERNEL_MASK_PROJECTIVE, ps_kernel_masknoca_projective, TRUE),
+
+ KERNEL(WM_KERNEL_MASKCA, ps_kernel_maskca_affine, TRUE),
+ KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, ps_kernel_maskca_projective, TRUE),
+
+ KERNEL(WM_KERNEL_MASKCA_SRCALPHA,
+ ps_kernel_maskca_srcalpha_affine, TRUE),
+ KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
+ ps_kernel_maskca_srcalpha_projective, TRUE),
+
+ KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, FALSE),
+ KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, FALSE),
+};
+#undef KERNEL
+
+static const struct blendinfo {
+ Bool src_alpha;
+ uint32_t src_blend;
+ uint32_t dst_blend;
+} gen5_blend_op[] = {
+ /* Clear */ {0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ZERO},
+ /* Src */ {0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ZERO},
+ /* Dst */ {0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ONE},
+ /* Over */ {1, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
+ /* OverReverse */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ONE},
+ /* In */ {0, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
+ /* InReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_SRC_ALPHA},
+ /* Out */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
+ /* OutReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
+ /* Atop */ {1, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
+ /* AtopReverse */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_SRC_ALPHA},
+ /* Xor */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
+ /* Add */ {0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ONE},
+};
+
+/**
+ * Highest-valued BLENDFACTOR used in gen5_blend_op.
+ *
+ * This leaves out GEN5_BLENDFACTOR_INV_DST_COLOR,
+ * GEN5_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
+ * GEN5_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
+ */
+#define GEN5_BLENDFACTOR_COUNT (GEN5_BLENDFACTOR_INV_DST_ALPHA + 1)
+
+/* FIXME: surface format defined in gen5_defines.h, shared Sampling engine
+ * 1.7.2
+ */
+static const struct formatinfo {
+ CARD32 pict_fmt;
+ uint32_t card_fmt;
+} gen5_tex_formats[] = {
+ {PICT_a8, GEN5_SURFACEFORMAT_A8_UNORM},
+ {PICT_a8r8g8b8, GEN5_SURFACEFORMAT_B8G8R8A8_UNORM},
+ {PICT_x8r8g8b8, GEN5_SURFACEFORMAT_B8G8R8X8_UNORM},
+ {PICT_a8b8g8r8, GEN5_SURFACEFORMAT_R8G8B8A8_UNORM},
+ {PICT_x8b8g8r8, GEN5_SURFACEFORMAT_R8G8B8X8_UNORM},
+ {PICT_r8g8b8, GEN5_SURFACEFORMAT_R8G8B8_UNORM},
+ {PICT_r5g6b5, GEN5_SURFACEFORMAT_B5G6R5_UNORM},
+ {PICT_a1r5g5b5, GEN5_SURFACEFORMAT_B5G5R5A1_UNORM},
+ {PICT_a2r10g10b10, GEN5_SURFACEFORMAT_B10G10R10A2_UNORM},
+ {PICT_x2r10g10b10, GEN5_SURFACEFORMAT_B10G10R10X2_UNORM},
+ {PICT_a2b10g10r10, GEN5_SURFACEFORMAT_R10G10B10A2_UNORM},
+ {PICT_x2r10g10b10, GEN5_SURFACEFORMAT_B10G10R10X2_UNORM},
+ {PICT_a4r4g4b4, GEN5_SURFACEFORMAT_B4G4R4A4_UNORM},
+};
+
+#define BLEND_OFFSET(s, d) \
+ (((s) * GEN5_BLENDFACTOR_COUNT + (d)) * 64)
+
+#define SAMPLER_OFFSET(sf, se, mf, me, k) \
+ ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
+
+static bool
+gen5_emit_pipelined_pointers(struct sna *sna,
+ const struct sna_composite_op *op,
+ int blend, int kernel);
+
+#define OUT_BATCH(v) batch_emit(sna, v)
+#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
+#define OUT_VERTEX_F(v) vertex_emit(sna, v)
+
+static int
+gen5_choose_composite_kernel(int op, Bool has_mask, Bool is_ca, Bool is_affine)
+{
+ int base;
+
+ if (has_mask) {
+ if (is_ca) {
+ if (gen5_blend_op[op].src_alpha)
+ base = WM_KERNEL_MASKCA_SRCALPHA;
+ else
+ base = WM_KERNEL_MASKCA;
+ } else
+ base = WM_KERNEL_MASK;
+ } else
+ base = WM_KERNEL;
+
+ return base + !is_affine;
+}
+
+static void gen5_magic_ca_pass(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ struct gen5_render_state *state = &sna->render_state.gen5;
+
+ if (!op->need_magic_ca_pass)
+ return;
+
+ DBG(("%s: CA fixup\n", __FUNCTION__));
+
+ gen5_emit_pipelined_pointers
+ (sna, op, PictOpAdd,
+ gen5_choose_composite_kernel(PictOpAdd,
+ TRUE, TRUE, op->is_affine));
+
+ OUT_BATCH(GEN5_3DPRIMITIVE |
+ GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+ (_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+ (0 << 9) |
+ 4);
+ OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
+ OUT_BATCH(sna->render.vertex_start);
+ OUT_BATCH(1); /* single instance */
+ OUT_BATCH(0); /* start instance location */
+ OUT_BATCH(0); /* index buffer offset, ignored */
+
+ state->last_primitive = sna->kgem.nbatch;
+}
+
+static void gen5_vertex_flush(struct sna *sna)
+{
+ if (sna->render_state.gen5.vertex_offset == 0)
+ return;
+
+ DBG(("%s[%x] = %d\n", __FUNCTION__,
+ 4*sna->render_state.gen5.vertex_offset,
+ sna->render.vertex_index - sna->render.vertex_start));
+ sna->kgem.batch[sna->render_state.gen5.vertex_offset] =
+ sna->render.vertex_index - sna->render.vertex_start;
+ sna->render_state.gen5.vertex_offset = 0;
+
+ if (sna->render.op)
+ gen5_magic_ca_pass(sna, sna->render.op);
+}
+
+static void gen5_vertex_finish(struct sna *sna, Bool last)
+{
+ struct kgem_bo *bo;
+ int i, delta;
+
+ gen5_vertex_flush(sna);
+ if (!sna->render.vertex_used)
+ return;
+
+ /* Note: we only need dword alignment (currently) */
+
+ if (last && sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
+ DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
+ sna->render.vertex_used, sna->kgem.nbatch));
+ memcpy(sna->kgem.batch + sna->kgem.nbatch,
+ sna->render.vertex_data,
+ sna->render.vertex_used * 4);
+ delta = sna->kgem.nbatch * 4;
+ bo = NULL;
+ sna->kgem.nbatch += sna->render.vertex_used;
+ } else {
+ bo = kgem_create_linear(&sna->kgem, 4*sna->render.vertex_used);
+ if (bo && !kgem_bo_write(&sna->kgem, bo,
+ sna->render.vertex_data,
+ 4*sna->render.vertex_used)) {
+ kgem_bo_destroy(&sna->kgem, bo);
+ return;
+ }
+ delta = 0;
+ DBG(("%s: new vbo: %d\n", __FUNCTION__,
+ sna->render.vertex_used));
+ }
+
+ for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) {
+ if (sna->render.vertex_reloc[i]) {
+ DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
+ i, sna->render.vertex_reloc[i]));
+
+ sna->kgem.batch[sna->render.vertex_reloc[i]] =
+ kgem_add_reloc(&sna->kgem,
+ sna->render.vertex_reloc[i],
+ bo,
+ I915_GEM_DOMAIN_VERTEX << 16,
+ delta);
+ sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
+ kgem_add_reloc(&sna->kgem,
+ sna->render.vertex_reloc[i]+1,
+ bo,
+ I915_GEM_DOMAIN_VERTEX << 16,
+ delta + sna->render.vertex_used * 4 - 1);
+ sna->render.vertex_reloc[i] = 0;
+ }
+ }
+
+ if (bo)
+ kgem_bo_destroy(&sna->kgem, bo);
+
+ sna->render.vertex_used = 0;
+ sna->render.vertex_index = 0;
+ sna->render_state.gen5.vb_id = 0;
+}
+
+static uint32_t gen5_get_blend(int op,
+ Bool has_component_alpha,
+ uint32_t dst_format)
+{
+ uint32_t src, dst;
+
+ src = gen5_blend_op[op].src_blend;
+ dst = gen5_blend_op[op].dst_blend;
+
+ /* If there's no dst alpha channel, adjust the blend op so that we'll treat
+ * it as always 1.
+ */
+ if (PICT_FORMAT_A(dst_format) == 0) {
+ if (src == GEN5_BLENDFACTOR_DST_ALPHA)
+ src = GEN5_BLENDFACTOR_ONE;
+ else if (src == GEN5_BLENDFACTOR_INV_DST_ALPHA)
+ src = GEN5_BLENDFACTOR_ZERO;
+ }
+
+ /* If the source alpha is being used, then we should only be in a
+ * case where the source blend factor is 0, and the source blend
+ * value is the mask channels multiplied by the source picture's alpha.
+ */
+ if (has_component_alpha && gen5_blend_op[op].src_alpha) {
+ if (dst == GEN5_BLENDFACTOR_SRC_ALPHA)
+ dst = GEN5_BLENDFACTOR_SRC_COLOR;
+ else if (dst == GEN5_BLENDFACTOR_INV_SRC_ALPHA)
+ dst = GEN5_BLENDFACTOR_INV_SRC_COLOR;
+ }
+
+ DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
+ op, dst_format, PICT_FORMAT_A(dst_format),
+ src, dst, BLEND_OFFSET(src, dst)));
+ return BLEND_OFFSET(src, dst);
+}
+
+static uint32_t gen5_get_dest_format(PictFormat format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ default:
+ return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
+ case PICT_a8b8g8r8:
+ case PICT_x8b8g8r8:
+ return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
+ case PICT_r5g6b5:
+ return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
+ case PICT_x1r5g5b5:
+ case PICT_a1r5g5b5:
+ return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
+ case PICT_a8:
+ return GEN5_SURFACEFORMAT_A8_UNORM;
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
+ }
+}
+
+static Bool gen5_check_dst_format(PictFormat format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ case PICT_a8b8g8r8:
+ case PICT_x8b8g8r8:
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ case PICT_r5g6b5:
+ case PICT_x1r5g5b5:
+ case PICT_a1r5g5b5:
+ case PICT_a8:
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static uint32_t gen5_get_dest_format_for_depth(int depth)
+{
+ switch (depth) {
+ case 32:
+ case 24:
+ default: return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
+ case 30: return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
+ case 16: return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
+ case 8: return GEN5_SURFACEFORMAT_A8_UNORM;
+ }
+}
+
+static uint32_t gen5_get_card_format_for_depth(int depth)
+{
+ switch (depth) {
+ case 32:
+ default: return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
+ case 30: return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
+ case 24: return GEN5_SURFACEFORMAT_B8G8R8X8_UNORM;
+ case 16: return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
+ case 8: return GEN5_SURFACEFORMAT_A8_UNORM;
+ }
+}
+
+static bool gen5_format_is_dst(uint32_t format)
+{
+ switch (format) {
+ case GEN5_SURFACEFORMAT_B8G8R8A8_UNORM:
+ case GEN5_SURFACEFORMAT_R8G8B8A8_UNORM:
+ case GEN5_SURFACEFORMAT_B10G10R10A2_UNORM:
+ case GEN5_SURFACEFORMAT_B5G6R5_UNORM:
+ case GEN5_SURFACEFORMAT_B5G5R5A1_UNORM:
+ case GEN5_SURFACEFORMAT_A8_UNORM:
+ case GEN5_SURFACEFORMAT_B4G4R4A4_UNORM:
+ return true;
+ default:
+ return false;
+ }
+}
+
+typedef struct gen5_surface_state_padded {
+ struct gen5_surface_state state;
+ char pad[32 - sizeof(struct gen5_surface_state)];
+} gen5_surface_state_padded;
+
+static void null_create(struct sna_static_stream *stream)
+{
+ /* A bunch of zeros useful for legacy border color and depth-stencil */
+ sna_static_stream_map(stream, 64, 64);
+}
+
+static void
+sampler_state_init(struct gen5_sampler_state *sampler_state,
+ sampler_filter_t filter,
+ sampler_extend_t extend)
+{
+ sampler_state->ss0.lod_preclamp = 1; /* GL mode */
+
+ /* We use the legacy mode to get the semantics specified by
+ * the Render extension. */
+ sampler_state->ss0.border_color_mode = GEN5_BORDER_COLOR_MODE_LEGACY;
+
+ switch (filter) {
+ default:
+ case SAMPLER_FILTER_NEAREST:
+ sampler_state->ss0.min_filter = GEN5_MAPFILTER_NEAREST;
+ sampler_state->ss0.mag_filter = GEN5_MAPFILTER_NEAREST;
+ break;
+ case SAMPLER_FILTER_BILINEAR:
+ sampler_state->ss0.min_filter = GEN5_MAPFILTER_LINEAR;
+ sampler_state->ss0.mag_filter = GEN5_MAPFILTER_LINEAR;
+ break;
+ }
+
+ switch (extend) {
+ default:
+ case SAMPLER_EXTEND_NONE:
+ sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
+ sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
+ sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
+ break;
+ case SAMPLER_EXTEND_REPEAT:
+ sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
+ sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
+ sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
+ break;
+ case SAMPLER_EXTEND_PAD:
+ sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
+ sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
+ sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
+ break;
+ case SAMPLER_EXTEND_REFLECT:
+ sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
+ sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
+ sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
+ break;
+ }
+}
+
+static uint32_t gen5_get_card_format(PictFormat format)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(gen5_tex_formats); i++) {
+ if (gen5_tex_formats[i].pict_fmt == format)
+ return gen5_tex_formats[i].card_fmt;
+ }
+ return -1;
+}
+
+static uint32_t gen5_filter(uint32_t filter)
+{
+ switch (filter) {
+ default:
+ assert(0);
+ case PictFilterNearest:
+ return SAMPLER_FILTER_NEAREST;
+ case PictFilterBilinear:
+ return SAMPLER_FILTER_BILINEAR;
+ }
+}
+
+static uint32_t gen5_check_filter(PicturePtr picture)
+{
+ switch (picture->filter) {
+ case PictFilterNearest:
+ case PictFilterBilinear:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static uint32_t gen5_repeat(uint32_t repeat)
+{
+ switch (repeat) {
+ default:
+ assert(0);
+ case RepeatNone:
+ return SAMPLER_EXTEND_NONE;
+ case RepeatNormal:
+ return SAMPLER_EXTEND_REPEAT;
+ case RepeatPad:
+ return SAMPLER_EXTEND_PAD;
+ case RepeatReflect:
+ return SAMPLER_EXTEND_REFLECT;
+ }
+}
+
+static bool gen5_check_repeat(PicturePtr picture)
+{
+ if (!picture->repeat)
+ return TRUE;
+
+ switch (picture->repeatType) {
+ case RepeatNone:
+ case RepeatNormal:
+ case RepeatPad:
+ case RepeatReflect:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+/**
+ * Sets up the common fields for a surface state buffer for the given
+ * picture in the given surface state buffer.
+ */
+static int
+gen5_bind_bo(struct sna *sna,
+ struct kgem_bo *bo,
+ uint32_t width,
+ uint32_t height,
+ uint32_t format,
+ Bool is_dst)
+{
+ struct gen5_surface_state *ss;
+ uint32_t domains;
+ uint16_t offset;
+
+ /* After the first bind, we manage the cache domains within the batch */
+ if (is_dst) {
+ domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
+ kgem_bo_mark_dirty(bo);
+ } else {
+ domains = I915_GEM_DOMAIN_SAMPLER << 16;
+ is_dst = gen5_format_is_dst(format);
+ }
+
+ offset = sna->kgem.surface - sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
+ offset *= sizeof(uint32_t);
+
+ if (is_dst) {
+ if (bo->dst_bound)
+ return bo->dst_bound;
+
+ bo->dst_bound = offset;
+ } else {
+ if (bo->src_bound)
+ return bo->src_bound;
+
+ bo->src_bound = offset;
+ }
+
+ sna->kgem.surface -=
+ sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
+ ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
+
+ ss->ss0.surface_type = GEN5_SURFACE_2D;
+ ss->ss0.surface_format = format;
+
+ ss->ss0.data_return_format = GEN5_SURFACERETURNFORMAT_FLOAT32;
+ ss->ss0.color_blend = 1;
+ ss->ss1.base_addr =
+ kgem_add_reloc(&sna->kgem,
+ sna->kgem.surface + 1,
+ bo, domains, 0);
+
+ ss->ss2.height = height - 1;
+ ss->ss2.width = width - 1;
+ ss->ss3.pitch = bo->pitch - 1;
+ ss->ss3.tile_walk = bo->tiling == I915_TILING_Y;
+ ss->ss3.tiled_surface = bo->tiling != I915_TILING_NONE;
+
+ DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
+ offset, bo->handle, ss->ss1.base_addr,
+ ss->ss0.surface_format, width, height, bo->pitch, bo->tiling,
+ domains & 0xffff ? "render" : "sampler"));
+
+ return offset;
+}
+
+fastcall static void
+gen5_emit_composite_primitive_solid(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = 1.;
+ v[2] = 1.;
+
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ v[4] = 0.;
+ v[5] = 1.;
+
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+ v[7] = 0.;
+ v[8] = 0.;
+}
+
+fastcall static void
+gen5_emit_composite_primitive_identity_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ const float *sf = op->src.scale;
+ float sx, sy, *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ sx = r->src.x + op->src.offset[0];
+ sy = r->src.y + op->src.offset[1];
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = (sx + r->width) * sf[0];
+ v[2] = (sy + r->height) * sf[1];
+
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ v[4] = sx * sf[0];
+ v[5] = v[2];
+
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+ v[7] = v[4];
+ v[8] = sy * sf[1];
+}
+
+fastcall static void
+gen5_emit_composite_primitive_affine_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float *v;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x + r->width,
+ op->src.offset[1] + r->src.y + r->height,
+ op->src.transform,
+ &v[1], &v[2]);
+ v[1] *= op->src.scale[0];
+ v[2] *= op->src.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
+ op->src.offset[1] + r->src.y + r->height,
+ op->src.transform,
+ &v[4], &v[5]);
+ v[4] *= op->src.scale[0];
+ v[5] *= op->src.scale[1];
+
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+ _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
+ op->src.offset[1] + r->src.y,
+ op->src.transform,
+ &v[7], &v[8]);
+ v[7] *= op->src.scale[0];
+ v[8] *= op->src.scale[1];
+}
+
+fastcall static void
+gen5_emit_composite_primitive_identity_source_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float src_x, src_y;
+ float msk_x, msk_y;
+ float w, h;
+ float *v;
+
+ src_x = r->src.x + op->src.offset[0];
+ src_y = r->src.y + op->src.offset[1];
+ msk_x = r->mask.x + op->mask.offset[0];
+ msk_y = r->mask.y + op->mask.offset[1];
+ w = r->width;
+ h = r->height;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 15;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = (src_x + w) * op->src.scale[0];
+ v[2] = (src_y + h) * op->src.scale[1];
+ v[3] = (msk_x + w) * op->mask.scale[0];
+ v[4] = (msk_y + h) * op->mask.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[5] = dst.f;
+ v[6] = src_x * op->src.scale[0];
+ v[7] = v[2];
+ v[8] = msk_x * op->mask.scale[0];
+ v[9] = v[4];
+
+ dst.p.y = r->dst.y;
+ v[10] = dst.f;
+ v[11] = v[6];
+ v[12] = src_y * op->src.scale[1];
+ v[13] = v[8];
+ v[14] = msk_y * op->mask.scale[1];
+}
+
+fastcall static void
+gen5_emit_composite_primitive(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
+ Bool is_affine = op->is_affine;
+ const float *src_sf = op->src.scale;
+ const float *mask_sf = op->mask.scale;
+
+ if (is_affine) {
+ sna_get_transformed_coordinates(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1],
+ op->src.transform,
+ &src_x[0],
+ &src_y[0]);
+
+ sna_get_transformed_coordinates(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[1],
+ &src_y[1]);
+
+ sna_get_transformed_coordinates(r->src.x + op->src.offset[0] + r->width,
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[2],
+ &src_y[2]);
+ } else {
+ if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1],
+ op->src.transform,
+ &src_x[0],
+ &src_y[0],
+ &src_w[0]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[1],
+ &src_y[1],
+ &src_w[1]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0] + r->width,
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[2],
+ &src_y[2],
+ &src_w[2]))
+ return;
+ }
+
+ if (op->mask.bo) {
+ if (is_affine) {
+ sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1],
+ op->mask.transform,
+ &mask_x[0],
+ &mask_y[0]);
+
+ sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[1],
+ &mask_y[1]);
+
+ sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0] + r->width,
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[2],
+ &mask_y[2]);
+ } else {
+ if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1],
+ op->mask.transform,
+ &mask_x[0],
+ &mask_y[0],
+ &mask_w[0]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[1],
+ &mask_y[1],
+ &mask_w[1]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0] + r->width,
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[2],
+ &mask_y[2],
+ &mask_w[2]))
+ return;
+ }
+ }
+
+ OUT_VERTEX(r->dst.x + r->width, r->dst.y + r->height);
+ OUT_VERTEX_F(src_x[2] * src_sf[0]);
+ OUT_VERTEX_F(src_y[2] * src_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(src_w[2]);
+ if (op->mask.bo) {
+ OUT_VERTEX_F(mask_x[2] * mask_sf[0]);
+ OUT_VERTEX_F(mask_y[2] * mask_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(mask_w[2]);
+ }
+
+ OUT_VERTEX(r->dst.x, r->dst.y + r->height);
+ OUT_VERTEX_F(src_x[1] * src_sf[0]);
+ OUT_VERTEX_F(src_y[1] * src_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(src_w[1]);
+ if (op->mask.bo) {
+ OUT_VERTEX_F(mask_x[1] * mask_sf[0]);
+ OUT_VERTEX_F(mask_y[1] * mask_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(mask_w[1]);
+ }
+
+ OUT_VERTEX(r->dst.x, r->dst.y);
+ OUT_VERTEX_F(src_x[0] * src_sf[0]);
+ OUT_VERTEX_F(src_y[0] * src_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(src_w[0]);
+ if (op->mask.bo) {
+ OUT_VERTEX_F(mask_x[0] * mask_sf[0]);
+ OUT_VERTEX_F(mask_y[0] * mask_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(mask_w[0]);
+ }
+}
+
+static void gen5_emit_vertex_buffer(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ int id = op->u.gen5.ve_id;
+
+ OUT_BATCH(GEN5_3DSTATE_VERTEX_BUFFERS | 3);
+ OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
+ (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
+ sna->render.vertex_reloc[id] = sna->kgem.nbatch;
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ sna->render_state.gen5.vb_id |= 1 << id;
+}
+
+static void gen5_emit_primitive(struct sna *sna)
+{
+ if (sna->kgem.nbatch == sna->render_state.gen5.last_primitive) {
+ sna->render_state.gen5.vertex_offset = sna->kgem.nbatch - 5;
+ return;
+ }
+
+ OUT_BATCH(GEN5_3DPRIMITIVE |
+ GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+ (_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+ (0 << 9) |
+ 4);
+ sna->render_state.gen5.vertex_offset = sna->kgem.nbatch;
+ OUT_BATCH(0); /* vertex count, to be filled in later */
+ OUT_BATCH(sna->render.vertex_index);
+ OUT_BATCH(1); /* single instance */
+ OUT_BATCH(0); /* start instance location */
+ OUT_BATCH(0); /* index buffer offset, ignored */
+ sna->render.vertex_start = sna->render.vertex_index;
+
+ sna->render_state.gen5.last_primitive = sna->kgem.nbatch;
+}
+
+static bool gen5_rectangle_begin(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ int id = op->u.gen5.ve_id;
+ int ndwords;
+
+ ndwords = 0;
+ if ((sna->render_state.gen5.vb_id & (1 << id)) == 0)
+ ndwords += 5;
+ if (sna->render_state.gen5.vertex_offset == 0)
+ ndwords += op->need_magic_ca_pass ? 20 : 6;
+ if (ndwords == 0)
+ return true;
+
+ if (!kgem_check_batch(&sna->kgem, ndwords))
+ return false;
+
+ if ((sna->render_state.gen5.vb_id & (1 << id)) == 0)
+ gen5_emit_vertex_buffer(sna, op);
+ if (sna->render_state.gen5.vertex_offset == 0)
+ gen5_emit_primitive(sna);
+
+ return true;
+}
+
+static int gen5_get_rectangles__flush(struct sna *sna)
+{
+ if (!kgem_check_batch(&sna->kgem, 25))
+ return 0;
+ if (sna->kgem.nexec > KGEM_EXEC_SIZE(&sna->kgem) - 1)
+ return 0;
+ if (sna->kgem.nreloc > KGEM_RELOC_SIZE(&sna->kgem) - 1)
+ return 0;
+
+ gen5_vertex_finish(sna, FALSE);
+ sna->render.vertex_index = 0;
+
+ return ARRAY_SIZE(sna->render.vertex_data);
+}
+
+inline static int gen5_get_rectangles(struct sna *sna,
+ const struct sna_composite_op *op,
+ int want)
+{
+ int rem = vertex_space(sna);
+
+ if (rem < 3*op->floats_per_vertex) {
+ DBG(("flushing vbo for %s: %d < %d\n",
+ __FUNCTION__, rem, 3*op->floats_per_vertex));
+ rem = gen5_get_rectangles__flush(sna);
+ if (rem == 0)
+ return 0;
+ }
+
+ if (!gen5_rectangle_begin(sna, op))
+ return 0;
+
+ if (want * op->floats_per_vertex*3 > rem)
+ want = rem / (3*op->floats_per_vertex);
+
+ sna->render.vertex_index += 3*want;
+ return want;
+}
+
+static uint32_t *
+gen5_composite_get_binding_table(struct sna *sna,
+ const struct sna_composite_op *op,
+ uint16_t *offset)
+{
+ uint32_t *table;
+
+ sna->kgem.surface -=
+ sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
+ /* Clear all surplus entries to zero in case of prefetch */
+ table = memset(sna->kgem.batch + sna->kgem.surface,
+ 0, sizeof(struct gen5_surface_state_padded));
+ *offset = sna->kgem.surface;
+
+ DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
+
+ return table;
+}
+
+static void
+gen5_emit_sip(struct sna *sna)
+{
+ /* Set system instruction pointer */
+ OUT_BATCH(GEN5_STATE_SIP | 0);
+ OUT_BATCH(0);
+}
+
+static void
+gen5_emit_urb(struct sna *sna)
+{
+ int urb_vs_start, urb_vs_size;
+ int urb_gs_start, urb_gs_size;
+ int urb_clip_start, urb_clip_size;
+ int urb_sf_start, urb_sf_size;
+ int urb_cs_start, urb_cs_size;
+
+ urb_vs_start = 0;
+ urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
+ urb_gs_start = urb_vs_start + urb_vs_size;
+ urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
+ urb_clip_start = urb_gs_start + urb_gs_size;
+ urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
+ urb_sf_start = urb_clip_start + urb_clip_size;
+ urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
+ urb_cs_start = urb_sf_start + urb_sf_size;
+ urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
+
+ OUT_BATCH(GEN5_URB_FENCE |
+ UF0_CS_REALLOC |
+ UF0_SF_REALLOC |
+ UF0_CLIP_REALLOC |
+ UF0_GS_REALLOC |
+ UF0_VS_REALLOC |
+ 1);
+ OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
+ ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
+ ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
+ OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
+ ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
+
+ /* Constant buffer state */
+ OUT_BATCH(GEN5_CS_URB_STATE | 0);
+ OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
+}
+
+static void
+gen5_emit_state_base_address(struct sna *sna)
+{
+ assert(sna->render_state.gen5.general_bo->proxy == NULL);
+ OUT_BATCH(GEN5_STATE_BASE_ADDRESS | 6);
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
+ sna->kgem.nbatch,
+ sna->render_state.gen5.general_bo,
+ I915_GEM_DOMAIN_INSTRUCTION << 16,
+ BASE_ADDRESS_MODIFY));
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
+ sna->kgem.nbatch,
+ NULL,
+ I915_GEM_DOMAIN_INSTRUCTION << 16,
+ BASE_ADDRESS_MODIFY));
+ OUT_BATCH(0); /* media */
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
+ sna->kgem.nbatch,
+ sna->render_state.gen5.general_bo,
+ I915_GEM_DOMAIN_INSTRUCTION << 16,
+ BASE_ADDRESS_MODIFY));
+
+ /* upper bounds, all disabled */
+ OUT_BATCH(BASE_ADDRESS_MODIFY);
+ OUT_BATCH(0);
+ OUT_BATCH(BASE_ADDRESS_MODIFY);
+}
+
+static void
+gen5_emit_invariant(struct sna *sna)
+{
+ /* Ironlake errata workaround: Before disabling the clipper,
+ * you have to MI_FLUSH to get the pipeline idle.
+ */
+ OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
+ OUT_BATCH(GEN5_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+ gen5_emit_sip(sna);
+ gen5_emit_state_base_address(sna);
+
+ sna->render_state.gen5.needs_invariant = FALSE;
+}
+
+static void
+gen5_get_batch(struct sna *sna)
+{
+ kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
+ DBG(("%s: flushing batch: %d < %d+%d\n",
+ __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
+ 150, 4*8));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->render_state.gen5.needs_invariant)
+ gen5_emit_invariant(sna);
+}
+
+static void
+gen5_align_vertex(struct sna *sna, const struct sna_composite_op *op)
+{
+ if (op->floats_per_vertex != sna->render_state.gen5.floats_per_vertex) {
+ DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
+ sna->render_state.gen5.floats_per_vertex,
+ op->floats_per_vertex,
+ sna->render.vertex_index,
+ (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
+ sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
+ sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
+ sna->render_state.gen5.floats_per_vertex = op->floats_per_vertex;
+ }
+}
+
+static void
+gen5_emit_binding_table(struct sna *sna, uint16_t offset)
+{
+ if (sna->render_state.gen5.surface_table == offset)
+ return;
+
+ sna->render_state.gen5.surface_table = offset;
+
+ /* Binding table pointers */
+ OUT_BATCH(GEN5_3DSTATE_BINDING_TABLE_POINTERS | 4);
+ OUT_BATCH(0); /* vs */
+ OUT_BATCH(0); /* gs */
+ OUT_BATCH(0); /* clip */
+ OUT_BATCH(0); /* sf */
+ /* Only the PS uses the binding table */
+ OUT_BATCH(offset*4);
+}
+
+static bool
+gen5_emit_pipelined_pointers(struct sna *sna,
+ const struct sna_composite_op *op,
+ int blend, int kernel)
+{
+ uint16_t offset = sna->kgem.nbatch, last;
+
+ OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS | 5);
+ OUT_BATCH(sna->render_state.gen5.vs);
+ OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */
+ OUT_BATCH(GEN5_CLIP_DISABLE); /* passthrough */
+ OUT_BATCH(sna->render_state.gen5.sf[op->mask.bo != NULL]);
+ OUT_BATCH(sna->render_state.gen5.wm +
+ SAMPLER_OFFSET(op->src.filter, op->src.repeat,
+ op->mask.filter, op->mask.repeat,
+ kernel));
+ OUT_BATCH(sna->render_state.gen5.cc +
+ gen5_get_blend(blend, op->has_component_alpha, op->dst.format));
+
+ last = sna->render_state.gen5.last_pipelined_pointers;
+ if (last &&
+ sna->kgem.batch[offset + 1] == sna->kgem.batch[last + 1] &&
+ sna->kgem.batch[offset + 3] == sna->kgem.batch[last + 3] &&
+ sna->kgem.batch[offset + 4] == sna->kgem.batch[last + 4] &&
+ sna->kgem.batch[offset + 5] == sna->kgem.batch[last + 5] &&
+ sna->kgem.batch[offset + 6] == sna->kgem.batch[last + 6]) {
+ sna->kgem.nbatch = offset;
+ return false;
+ } else {
+ sna->render_state.gen5.last_pipelined_pointers = offset;
+ return true;
+ }
+}
+
+static void
+gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
+{
+ uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
+ uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
+
+ if (sna->render_state.gen5.drawrect_limit == limit &&
+ sna->render_state.gen5.drawrect_offset == offset)
+ return;
+ sna->render_state.gen5.drawrect_offset = offset;
+ sna->render_state.gen5.drawrect_limit = limit;
+
+ OUT_BATCH(GEN5_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
+ OUT_BATCH(0x00000000);
+ OUT_BATCH(limit);
+ OUT_BATCH(offset);
+}
+
+static void
+gen5_emit_vertex_elements(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ /*
+ * vertex data in vertex buffer
+ * position: (x, y)
+ * texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0)
+ * texture coordinate 1 if (has_mask is TRUE): same as above
+ */
+ struct gen5_render_state *render = &sna->render_state.gen5;
+ Bool has_mask = op->mask.bo != NULL;
+ Bool is_affine = op->is_affine;
+ int nelem = has_mask ? 2 : 1;
+ int selem = is_affine ? 2 : 3;
+ uint32_t w_component;
+ uint32_t src_format;
+ int id = op->u.gen5.ve_id;;
+
+ if (render->ve_id == id)
+ return;
+
+ render->ve_id = id;
+
+ if (is_affine) {
+ src_format = GEN5_SURFACEFORMAT_R32G32_FLOAT;
+ w_component = GEN5_VFCOMPONENT_STORE_1_FLT;
+ } else {
+ src_format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT;
+ w_component = GEN5_VFCOMPONENT_STORE_SRC;
+ }
+
+ /* The VUE layout
+ * dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
+ * dword 4-7: position (x, y, 1.0, 1.0),
+ * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
+ * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
+ *
+ * dword 4-15 are fetched from vertex buffer
+ */
+ OUT_BATCH(GEN5_3DSTATE_VERTEX_ELEMENTS |
+ ((2 * (2 + nelem)) + 1 - 2));
+
+ OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
+ (GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT));
+ OUT_BATCH((GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
+
+ /* x,y */
+ OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
+ (GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */
+ OUT_BATCH((GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+
+ /* u0, v0, w0 */
+ OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
+ (src_format << VE0_FORMAT_SHIFT) |
+ (4 << VE0_OFFSET_SHIFT)); /* offset vb in bytes */
+ OUT_BATCH((GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (w_component << VE1_VFCOMPONENT_2_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+
+ /* u1, v1, w1 */
+ if (has_mask) {
+ OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
+ (src_format << VE0_FORMAT_SHIFT) |
+ (((1 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */
+ OUT_BATCH((GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (w_component << VE1_VFCOMPONENT_2_SHIFT) |
+ (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+ }
+}
+
+static void
+gen5_emit_state(struct sna *sna,
+ const struct sna_composite_op *op,
+ uint16_t offset)
+{
+ gen5_emit_binding_table(sna, offset);
+ if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel))
+ gen5_emit_urb(sna);
+ gen5_emit_vertex_elements(sna, op);
+ gen5_emit_drawing_rectangle(sna, op);
+}
+
+static void gen5_bind_surfaces(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ uint32_t *binding_table;
+ uint16_t offset;
+
+ gen5_get_batch(sna);
+
+ binding_table = gen5_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen5_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen5_get_dest_format(op->dst.format),
+ TRUE);
+ binding_table[1] =
+ gen5_bind_bo(sna,
+ op->src.bo, op->src.width, op->src.height,
+ op->src.card_format,
+ FALSE);
+ if (op->mask.bo)
+ binding_table[2] =
+ gen5_bind_bo(sna,
+ op->mask.bo,
+ op->mask.width,
+ op->mask.height,
+ op->mask.card_format,
+ FALSE);
+
+ if (sna->kgem.surface == offset &&
+ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table &&
+ (op->mask.bo == NULL ||
+ sna->kgem.batch[sna->render_state.gen5.surface_table+2] == binding_table[2])) {
+ sna->kgem.surface += sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
+ offset = sna->render_state.gen5.surface_table;
+ }
+
+ gen5_emit_state(sna, op, offset);
+}
+
+
+fastcall static void
+gen5_render_composite_blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
+ __FUNCTION__,
+ r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
+ r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
+ r->dst.x, r->dst.y, op->dst.x, op->dst.y,
+ r->width, r->height));
+
+ if (!gen5_get_rectangles(sna, op, 1)) {
+ gen5_bind_surfaces(sna, op);
+ gen5_get_rectangles(sna, op, 1);
+ }
+
+ op->prim_emit(sna, op, r);
+}
+
+static void
+gen5_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
+ __FUNCTION__, nbox, op->dst.x, op->dst.y,
+ op->src.offset[0], op->src.offset[1],
+ op->src.width, op->src.height,
+ op->mask.offset[0], op->mask.offset[1],
+ op->mask.width, op->mask.height));
+
+ do {
+ int nbox_this_time = gen5_get_rectangles(sna, op, nbox);
+ if (nbox_this_time == 0) {
+ gen5_bind_surfaces(sna, op);
+ nbox_this_time = gen5_get_rectangles(sna, op, nbox);
+ }
+ nbox -= nbox_this_time;
+
+ do {
+ struct sna_composite_rectangles r;
+
+ DBG((" %s: (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1, box->x2, box->y2));
+
+ r.dst.x = box->x1;
+ r.dst.y = box->y1;
+ r.width = box->x2 - box->x1;
+ r.height = box->y2 - box->y1;
+ r.mask = r.src = r.dst;
+ op->prim_emit(sna, op, &r);
+ box++;
+ } while (--nbox_this_time);
+ } while (nbox);
+}
+
+#ifndef MAX
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#endif
+
+static uint32_t gen5_bind_video_source(struct sna *sna,
+ struct kgem_bo *src_bo,
+ uint32_t src_offset,
+ int src_width,
+ int src_height,
+ int src_pitch,
+ uint32_t src_surf_format)
+{
+ struct gen5_surface_state *ss;
+
+ sna->kgem.surface -= sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
+
+ ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
+ ss->ss0.surface_type = GEN5_SURFACE_2D;
+ ss->ss0.surface_format = src_surf_format;
+ ss->ss0.color_blend = 1;
+
+ ss->ss1.base_addr =
+ kgem_add_reloc(&sna->kgem,
+ sna->kgem.surface + 1,
+ src_bo,
+ I915_GEM_DOMAIN_SAMPLER << 16,
+ src_offset);
+
+ ss->ss2.width = src_width - 1;
+ ss->ss2.height = src_height - 1;
+ ss->ss3.pitch = src_pitch - 1;
+
+ return sna->kgem.surface * sizeof(uint32_t);
+}
+
+static void gen5_video_bind_surfaces(struct sna *sna,
+ struct sna_composite_op *op,
+ struct sna_video_frame *frame)
+{
+ uint32_t src_surf_format;
+ uint32_t src_surf_base[6];
+ int src_width[6];
+ int src_height[6];
+ int src_pitch[6];
+ uint32_t *binding_table;
+ int n_src, n;
+ uint16_t offset;
+
+
+ src_surf_base[0] = frame->YBufOffset;
+ src_surf_base[1] = frame->YBufOffset;
+ src_surf_base[2] = frame->VBufOffset;
+ src_surf_base[3] = frame->VBufOffset;
+ src_surf_base[4] = frame->UBufOffset;
+ src_surf_base[5] = frame->UBufOffset;
+
+ if (is_planar_fourcc(frame->id)) {
+ src_surf_format = GEN5_SURFACEFORMAT_R8_UNORM;
+ src_width[1] = src_width[0] = frame->width;
+ src_height[1] = src_height[0] = frame->height;
+ src_pitch[1] = src_pitch[0] = frame->pitch[1];
+ src_width[4] = src_width[5] = src_width[2] = src_width[3] =
+ frame->width / 2;
+ src_height[4] = src_height[5] = src_height[2] = src_height[3] =
+ frame->height / 2;
+ src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
+ frame->pitch[0];
+ n_src = 6;
+ } else {
+ if (frame->id == FOURCC_UYVY)
+ src_surf_format = GEN5_SURFACEFORMAT_YCRCB_SWAPY;
+ else
+ src_surf_format = GEN5_SURFACEFORMAT_YCRCB_NORMAL;
+
+ src_width[0] = frame->width;
+ src_height[0] = frame->height;
+ src_pitch[0] = frame->pitch[0];
+ n_src = 1;
+ }
+
+ gen5_get_batch(sna);
+ binding_table = gen5_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen5_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen5_get_dest_format_for_depth(op->dst.format),
+ TRUE);
+ for (n = 0; n < n_src; n++) {
+ binding_table[1+n] =
+ gen5_bind_video_source(sna,
+ frame->bo,
+ src_surf_base[n],
+ src_width[n],
+ src_height[n],
+ src_pitch[n],
+ src_surf_format);
+ }
+
+ gen5_emit_state(sna, op, offset);
+}
+
+static Bool
+gen5_render_video(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ RegionPtr dstRegion,
+ short src_w, short src_h,
+ short drw_w, short drw_h,
+ PixmapPtr pixmap)
+{
+ struct sna_composite_op tmp;
+ int nbox, dxo, dyo, pix_xoff, pix_yoff;
+ float src_scale_x, src_scale_y;
+ struct sna_pixmap *priv;
+ BoxPtr box;
+
+ DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, src_w, src_h, drw_w, drw_h));
+
+ priv = sna_pixmap_force_to_gpu(pixmap);
+ if (priv == NULL)
+ return FALSE;
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.op = PictOpSrc;
+ tmp.dst.pixmap = pixmap;
+ tmp.dst.width = pixmap->drawable.width;
+ tmp.dst.height = pixmap->drawable.height;
+ tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
+ tmp.dst.bo = priv->gpu_bo;
+
+ tmp.src.filter = SAMPLER_FILTER_BILINEAR;
+ tmp.src.repeat = SAMPLER_EXTEND_NONE;
+ tmp.u.gen5.wm_kernel =
+ is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
+ tmp.u.gen5.ve_id = 1;
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+
+ if (!kgem_check_bo(&sna->kgem, tmp.dst.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, frame->bo))
+ kgem_submit(&sna->kgem);
+
+ if (!kgem_bo_is_dirty(frame->bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen5_video_bind_surfaces(sna, &tmp, frame);
+ gen5_align_vertex(sna, &tmp);
+
+ /* Set up the offset for translating from the given region (in screen
+ * coordinates) to the backing pixmap.
+ */
+#ifdef COMPOSITE
+ pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
+ pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
+#else
+ pix_xoff = 0;
+ pix_yoff = 0;
+#endif
+
+ dxo = dstRegion->extents.x1;
+ dyo = dstRegion->extents.y1;
+
+ /* Use normalized texture coordinates */
+ src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
+ src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
+
+ box = REGION_RECTS(dstRegion);
+ nbox = REGION_NUM_RECTS(dstRegion);
+ while (nbox--) {
+ BoxRec r;
+
+ r.x1 = box->x1 + pix_xoff;
+ r.x2 = box->x2 + pix_xoff;
+ r.y1 = box->y1 + pix_yoff;
+ r.y2 = box->y2 + pix_yoff;
+
+ if (!gen5_get_rectangles(sna, &tmp, 1)) {
+ gen5_video_bind_surfaces(sna, &tmp, frame);
+ gen5_get_rectangles(sna, &tmp, 1);
+ }
+
+ OUT_VERTEX(r.x2, r.y2);
+ OUT_VERTEX_F((box->x2 - dxo) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+
+ OUT_VERTEX(r.x1, r.y2);
+ OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+
+ OUT_VERTEX(r.x1, r.y1);
+ OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
+ OUT_VERTEX_F((box->y1 - dyo) * src_scale_y);
+
+ sna_damage_add_box(&priv->gpu_damage, &r);
+ sna_damage_subtract_box(&priv->cpu_damage, &r);
+ box++;
+ }
+
+ return TRUE;
+}
+
+static int
+gen5_composite_solid_init(struct sna *sna,
+ struct sna_composite_channel *channel,
+ uint32_t color)
+{
+ channel->filter = PictFilterNearest;
+ channel->repeat = RepeatNormal;
+ channel->is_affine = TRUE;
+ channel->is_solid = TRUE;
+ channel->transform = NULL;
+ channel->width = 1;
+ channel->height = 1;
+ channel->card_format = GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+ channel->bo = sna_render_get_solid(sna, color);
+
+ channel->scale[0] = channel->scale[1] = 1;
+ channel->offset[0] = channel->offset[1] = 0;
+ return channel->bo != NULL;
+}
+
+static int
+gen5_composite_picture(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int x, int y,
+ int w, int h,
+ int dst_x, int dst_y)
+{
+ PixmapPtr pixmap;
+ uint32_t color;
+ int16_t dx, dy;
+
+ DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
+ __FUNCTION__, x, y, w, h, dst_x, dst_y));
+
+ channel->is_solid = FALSE;
+ channel->card_format = -1;
+
+ if (sna_picture_is_solid(picture, &color))
+ return gen5_composite_solid_init(sna, channel, color);
+
+ if (picture->pDrawable == NULL)
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ if (!gen5_check_repeat(picture))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ if (!gen5_check_filter(picture))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
+ channel->filter = picture->filter;
+
+ pixmap = get_drawable_pixmap(picture->pDrawable);
+ get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
+
+ x += dx + picture->pDrawable->x;
+ y += dy + picture->pDrawable->y;
+
+ channel->is_affine = sna_transform_is_affine(picture->transform);
+ if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
+ DBG(("%s: integer translation (%d, %d), removing\n",
+ __FUNCTION__, dx, dy));
+ x += dx;
+ y += dy;
+ channel->transform = NULL;
+ channel->filter = PictFilterNearest;
+ } else
+ channel->transform = picture->transform;
+
+ channel->card_format = gen5_get_card_format(picture->format);
+ if (channel->card_format == -1)
+ return sna_render_picture_convert(sna, picture, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+
+ if (pixmap->drawable.width > 8192 || pixmap->drawable.height > 8192)
+ return sna_render_picture_extract(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ return sna_render_pixmap_bo(sna, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+}
+
+static void gen5_composite_channel_convert(struct sna_composite_channel *channel)
+{
+ channel->repeat = gen5_repeat(channel->repeat);
+ channel->filter = gen5_filter(channel->filter);
+ if (channel->card_format == -1)
+ channel->card_format = gen5_get_card_format(channel->pict_format);
+}
+
+static void
+gen5_render_composite_done(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ gen5_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ sna->render.op = NULL;
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ sna_render_composite_redirect_done(sna, op);
+
+ if (op->src.bo)
+ kgem_bo_destroy(&sna->kgem, op->src.bo);
+ if (op->mask.bo)
+ kgem_bo_destroy(&sna->kgem, op->mask.bo);
+}
+
+static Bool
+gen5_composite_set_target(struct sna *sna,
+ PicturePtr dst,
+ struct sna_composite_op *op)
+{
+ struct sna_pixmap *priv;
+
+ DBG(("%s: dst=%p\n", __FUNCTION__, dst));
+
+ if (!gen5_check_dst_format(dst->format)) {
+ DBG(("%s: incompatible dst format %08x\n",
+ __FUNCTION__, dst->format));
+ return FALSE;
+ }
+
+ op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
+ op->dst.width = op->dst.pixmap->drawable.width;
+ op->dst.height = op->dst.pixmap->drawable.height;
+ op->dst.format = dst->format;
+ priv = sna_pixmap_force_to_gpu(op->dst.pixmap);
+ if (priv == NULL)
+ return FALSE;
+
+ DBG(("%s: pixmap=%p, format=%08x\n", __FUNCTION__,
+ op->dst.pixmap, (unsigned int)op->dst.format));
+
+
+ op->dst.bo = priv->gpu_bo;
+ if (!priv->gpu_only)
+ op->damage = &priv->gpu_damage;
+
+ DBG(("%s: bo=%p, damage=%p\n", __FUNCTION__, op->dst.bo, op->damage));
+
+ get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
+ &op->dst.x, &op->dst.y);
+ return TRUE;
+}
+
+static inline Bool
+picture_is_cpu(PicturePtr picture)
+{
+ if (!picture->pDrawable)
+ return FALSE;
+
+ /* If it is a solid, try to use the render paths */
+ if (picture->pDrawable->width == 1 &&
+ picture->pDrawable->height == 1 &&
+ picture->repeat)
+ return FALSE;
+
+ return is_cpu(picture->pDrawable);
+}
+
+static Bool
+try_blt(struct sna *sna,
+ PicturePtr dst,
+ PicturePtr source,
+ int width, int height)
+{
+ if (sna->kgem.mode == KGEM_BLT) {
+ DBG(("%s: already performing BLT\n", __FUNCTION__));
+ return TRUE;
+ }
+
+ if (width > 8192 || height > 8192) {
+ DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
+ __FUNCTION__, width, height));
+ return TRUE;
+ }
+
+ /* is the source picture only in cpu memory e.g. a shm pixmap? */
+ return picture_is_cpu(source);
+}
+
+static Bool
+gen5_render_composite(struct sna *sna,
+ uint8_t op,
+ PicturePtr src,
+ PicturePtr mask,
+ PicturePtr dst,
+ int16_t src_x, int16_t src_y,
+ int16_t msk_x, int16_t msk_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t width, int16_t height,
+ struct sna_composite_op *tmp)
+{
+ DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
+ width, height, sna->kgem.mode));
+
+ if (mask == NULL &&
+ try_blt(sna, dst, src, width, height) &&
+ sna_blt_composite(sna, op,
+ src, dst,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height, tmp))
+ return TRUE;
+
+ if (op >= ARRAY_SIZE(gen5_blend_op)) {
+ DBG(("%s: unhandled blend op %d\n", __FUNCTION__, op));
+ return FALSE;
+ }
+
+ if (need_tiling(sna, width, height))
+ return sna_tiling_composite(sna,
+ op, src, mask, dst,
+ src_x, src_y,
+ msk_x, msk_y,
+ dst_x, dst_y,
+ width, height,
+ tmp);
+
+ if (!gen5_composite_set_target(sna, dst, tmp)) {
+ DBG(("%s: failed to set composite target\n", __FUNCTION__));
+ return FALSE;
+ }
+
+ if (tmp->dst.width > 8192 || tmp->dst.height > 8192) {
+ if (!sna_render_composite_redirect(sna, tmp,
+ dst_x, dst_y, width, height))
+ return FALSE;
+ }
+
+ DBG(("%s: preparing source\n", __FUNCTION__));
+ switch (gen5_composite_picture(sna, src, &tmp->src,
+ src_x, src_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ DBG(("%s: failed to prepare source picture\n", __FUNCTION__));
+ goto cleanup_dst;
+ case 0:
+ gen5_composite_solid_init(sna, &tmp->src, 0);
+ case 1:
+ gen5_composite_channel_convert(&tmp->src);
+ break;
+ }
+
+ tmp->op = op;
+ tmp->is_affine = tmp->src.is_affine;
+ tmp->has_component_alpha = FALSE;
+ tmp->need_magic_ca_pass = FALSE;
+
+ tmp->prim_emit = gen5_emit_composite_primitive;
+ if (mask) {
+ if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
+ tmp->has_component_alpha = TRUE;
+
+ /* Check if it's component alpha that relies on a source alpha and on
+ * the source value. We can only get one of those into the single
+ * source value that we get to blend with.
+ */
+ if (gen5_blend_op[op].src_alpha &&
+ (gen5_blend_op[op].src_blend != GEN5_BLENDFACTOR_ZERO)) {
+ if (op != PictOpOver) {
+ DBG(("%s: unhandled CA blend op %d\n", __FUNCTION__, op));
+ goto cleanup_src;
+ }
+
+ tmp->need_magic_ca_pass = TRUE;
+ tmp->op = PictOpOutReverse;
+ }
+ }
+
+ DBG(("%s: preparing mask\n", __FUNCTION__));
+ switch (gen5_composite_picture(sna, mask, &tmp->mask,
+ msk_x, msk_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ DBG(("%s: failed to prepare mask picture\n", __FUNCTION__));
+ goto cleanup_src;
+ case 0:
+ gen5_composite_solid_init(sna, &tmp->mask, 0);
+ case 1:
+ gen5_composite_channel_convert(&tmp->mask);
+ break;
+ }
+
+ tmp->is_affine &= tmp->mask.is_affine;
+
+ if (tmp->src.transform == NULL && tmp->mask.transform == NULL)
+ tmp->prim_emit = gen5_emit_composite_primitive_identity_source_mask;
+
+ tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine;
+ } else {
+ if (tmp->src.is_solid)
+ tmp->prim_emit = gen5_emit_composite_primitive_solid;
+ else if (tmp->src.transform == NULL)
+ tmp->prim_emit = gen5_emit_composite_primitive_identity_source;
+ else if (tmp->src.is_affine)
+ tmp->prim_emit = gen5_emit_composite_primitive_affine_source;
+
+ tmp->floats_per_vertex = 3 + !tmp->is_affine;
+ }
+
+ tmp->u.gen5.wm_kernel =
+ gen5_choose_composite_kernel(tmp->op,
+ tmp->mask.bo != NULL,
+ tmp->has_component_alpha,
+ tmp->is_affine);
+ tmp->u.gen5.ve_id = (tmp->mask.bo != NULL) << 1 | tmp->is_affine;
+
+ tmp->blt = gen5_render_composite_blt;
+ tmp->boxes = gen5_render_composite_boxes;
+ tmp->done = gen5_render_composite_done;
+
+ if (!kgem_check_bo(&sna->kgem, tmp->dst.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->src.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->mask.bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen5_bind_surfaces(sna, tmp);
+ gen5_align_vertex(sna, tmp);
+
+ sna->render.op = tmp;
+ return TRUE;
+
+cleanup_src:
+ if (tmp->src.bo)
+ kgem_bo_destroy(&sna->kgem, tmp->src.bo);
+cleanup_dst:
+ if (tmp->redirect.real_bo)
+ kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
+ return FALSE;
+}
+
+static void
+gen5_copy_bind_surfaces(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ uint32_t *binding_table;
+ uint16_t offset;
+
+ gen5_get_batch(sna);
+
+ binding_table = gen5_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen5_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen5_get_dest_format_for_depth(op->dst.pixmap->drawable.depth),
+ TRUE);
+ binding_table[1] =
+ gen5_bind_bo(sna,
+ op->src.bo, op->src.width, op->src.height,
+ op->src.card_format,
+ FALSE);
+
+ if (sna->kgem.surface == offset &&
+ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table) {
+ sna->kgem.surface += sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
+ offset = sna->render_state.gen5.surface_table;
+ }
+
+ gen5_emit_state(sna, op,offset);
+}
+
+static Bool
+gen5_render_copy_boxes(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+ PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int n)
+{
+ struct sna_composite_op tmp;
+
+ if (sna->kgem.mode == KGEM_BLT &&
+ sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
+ src->drawable.width > 8192 || src->drawable.height > 8192 ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n);
+
+ DBG(("%s (%d, %d)->(%d, %d) x %d\n",
+ __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
+
+ tmp.dst.pixmap = dst;
+ tmp.dst.width = dst->drawable.width;
+ tmp.dst.height = dst->drawable.height;
+ tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
+ tmp.dst.bo = dst_bo;
+
+ tmp.src.bo = src_bo;
+ tmp.src.filter = SAMPLER_FILTER_NEAREST;
+ tmp.src.repeat = SAMPLER_EXTEND_NONE;
+ tmp.src.card_format =
+ gen5_get_card_format_for_depth(src->drawable.depth),
+ tmp.src.width = src->drawable.width;
+ tmp.src.height = src->drawable.height;
+
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+ tmp.u.gen5.wm_kernel = WM_KERNEL;
+ tmp.u.gen5.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, src_bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(src_bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen5_get_batch(sna);
+ gen5_copy_bind_surfaces(sna, &tmp);
+ gen5_align_vertex(sna, &tmp);
+
+ tmp.src.scale[0] = 1. / src->drawable.width;
+ tmp.src.scale[1] = 1. / src->drawable.height;
+ do {
+ int n_this_time = gen5_get_rectangles(sna, &tmp, n);
+ if (n_this_time == 0) {
+ gen5_copy_bind_surfaces(sna, &tmp);
+ n_this_time = gen5_get_rectangles(sna, &tmp, n);
+ }
+ n -= n_this_time;
+
+ do {
+ DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n",
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ box->x2 - box->x1, box->y2 - box->y1));
+ OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy);
+ OUT_VERTEX_F((box->x2 + src_dx) * tmp.src.scale[0]);
+ OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
+
+ OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy);
+ OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
+ OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
+
+ OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy);
+ OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
+ OUT_VERTEX_F((box->y1 + src_dy) * tmp.src.scale[1]);
+
+ box++;
+ } while (--n_this_time);
+ } while (n);
+
+ gen5_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ return TRUE;
+}
+
+static void
+gen5_render_copy_blt(struct sna *sna,
+ const struct sna_copy_op *op,
+ int16_t sx, int16_t sy,
+ int16_t w, int16_t h,
+ int16_t dx, int16_t dy)
+{
+ DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", __FUNCTION__,
+ sx, sy, dx, dy, w, h));
+
+ if (!gen5_get_rectangles(sna, &op->base, 1)) {
+ gen5_copy_bind_surfaces(sna, &op->base);
+ gen5_get_rectangles(sna, &op->base, 1);
+ }
+
+ OUT_VERTEX(dx+w, dy+h);
+ OUT_VERTEX_F((sx+w)*op->base.src.scale[0]);
+ OUT_VERTEX_F((sy+h)*op->base.src.scale[1]);
+
+ OUT_VERTEX(dx, dy+h);
+ OUT_VERTEX_F(sx*op->base.src.scale[0]);
+ OUT_VERTEX_F((sy+h)*op->base.src.scale[1]);
+
+ OUT_VERTEX(dx, dy);
+ OUT_VERTEX_F(sx*op->base.src.scale[0]);
+ OUT_VERTEX_F(sy*op->base.src.scale[1]);
+}
+
+static void
+gen5_render_copy_done(struct sna *sna,
+ const struct sna_copy_op *op)
+{
+ gen5_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ DBG(("%s()\n", __FUNCTION__));
+}
+
+static Bool
+gen5_render_copy(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ struct sna_copy_op *op)
+{
+ DBG(("%s (alu=%d)\n", __FUNCTION__, alu));
+
+ if (sna->kgem.mode == KGEM_BLT &&
+ sna_blt_copy(sna, alu,
+ src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ op))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
+ src->drawable.width > 8192 || src->drawable.height > 8192 ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return sna_blt_copy(sna, alu, src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ op);
+
+ op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
+
+ op->base.dst.pixmap = dst;
+ op->base.dst.width = dst->drawable.width;
+ op->base.dst.height = dst->drawable.height;
+ op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
+ op->base.dst.bo = dst_bo;
+
+ op->base.src.bo = src_bo;
+ op->base.src.card_format =
+ gen5_get_card_format_for_depth(src->drawable.depth),
+ op->base.src.width = src->drawable.width;
+ op->base.src.height = src->drawable.height;
+ op->base.src.scale[0] = 1./src->drawable.width;
+ op->base.src.scale[1] = 1./src->drawable.height;
+ op->base.src.filter = SAMPLER_FILTER_NEAREST;
+ op->base.src.repeat = SAMPLER_EXTEND_NONE;
+
+ op->base.is_affine = true;
+ op->base.floats_per_vertex = 3;
+ op->base.u.gen5.wm_kernel = WM_KERNEL;
+ op->base.u.gen5.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, src_bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(src_bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen5_copy_bind_surfaces(sna, &op->base);
+ gen5_align_vertex(sna, &op->base);
+
+ op->blt = gen5_render_copy_blt;
+ op->done = gen5_render_copy_done;
+ return TRUE;
+}
+
+static void
+gen5_fill_bind_surfaces(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ uint32_t *binding_table;
+ uint16_t offset;
+
+ gen5_get_batch(sna);
+
+ binding_table = gen5_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen5_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen5_get_dest_format(op->dst.format),
+ TRUE);
+ binding_table[1] =
+ gen5_bind_bo(sna,
+ op->src.bo, 1, 1,
+ GEN5_SURFACEFORMAT_B8G8R8A8_UNORM,
+ FALSE);
+
+ if (sna->kgem.surface == offset &&
+ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table) {
+ sna->kgem.surface +=
+ sizeof(struct gen5_surface_state_padded)/sizeof(uint32_t);
+ offset = sna->render_state.gen5.surface_table;
+ }
+
+ gen5_emit_state(sna, op, offset);
+}
+
+static Bool
+gen5_render_fill_boxes(struct sna *sna,
+ CARD8 op,
+ PictFormat format,
+ const xRenderColor *color,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ const BoxRec *box, int n)
+{
+ struct sna_composite_op tmp;
+ uint32_t pixel;
+
+ DBG(("%s op=%x, color=%08x, boxes=%d x [((%d, %d), (%d, %d))...]\n",
+ __FUNCTION__, op, pixel, n, box->x1, box->y1, box->x2, box->y2));
+
+ if (op >= ARRAY_SIZE(gen5_blend_op)) {
+ DBG(("%s: fallback due to unhandled blend op: %d\n",
+ __FUNCTION__, op));
+ return FALSE;
+ }
+
+ if (sna->kgem.mode == KGEM_BLT ||
+ dst->drawable.width > 8192 ||
+ dst->drawable.height > 8192 ||
+ !gen5_check_dst_format(format)) {
+ uint8_t alu = GXcopy;
+
+ if (op == PictOpClear) {
+ alu = GXclear;
+ pixel = 0;
+ op = PictOpSrc;
+ }
+
+ if (op == PictOpOver && color->alpha >= 0xff00)
+ op = PictOpSrc;
+
+ if (op == PictOpSrc &&
+ sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ format) &&
+ sna_blt_fill_boxes(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ pixel, box, n))
+ return TRUE;
+
+ if (dst->drawable.width > 8192 ||
+ dst->drawable.height > 8192 ||
+ !gen5_check_dst_format(format))
+ return FALSE;
+ }
+
+ if (!sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ PICT_a8r8g8b8))
+ return FALSE;
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.op = op;
+
+ tmp.dst.pixmap = dst;
+ tmp.dst.width = dst->drawable.width;
+ tmp.dst.height = dst->drawable.height;
+ tmp.dst.format = format;
+ tmp.dst.bo = dst_bo;
+
+ tmp.src.bo = sna_render_get_solid(sna, pixel);
+ tmp.src.filter = SAMPLER_FILTER_NEAREST;
+ tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
+
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+ tmp.u.gen5.wm_kernel = WM_KERNEL;
+ tmp.u.gen5.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+
+ gen5_fill_bind_surfaces(sna, &tmp);
+ gen5_align_vertex(sna, &tmp);
+
+ do {
+ int n_this_time = gen5_get_rectangles(sna, &tmp, n);
+ if (n_this_time == 0) {
+ gen5_fill_bind_surfaces(sna, &tmp);
+ n_this_time = gen5_get_rectangles(sna, &tmp, n);
+ }
+ n -= n_this_time;
+ do {
+ DBG((" (%d, %d), (%d, %d)\n",
+ box->x1, box->y1, box->x2, box->y2));
+ OUT_VERTEX(box->x2, box->y2);
+ OUT_VERTEX_F(1);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(box->x1, box->y2);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(box->x1, box->y1);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(0);
+
+ box++;
+ } while (--n_this_time);
+ } while (n);
+
+ gen5_vertex_flush(sna);
+ kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ return TRUE;
+}
+
+static void
+gen5_render_fill_blt(struct sna *sna,
+ const struct sna_fill_op *op,
+ int16_t x, int16_t y, int16_t w, int16_t h)
+{
+ DBG(("%s (%d, %d)x(%d, %d)\n", __FUNCTION__, x,y,w,h));
+
+ if (!gen5_get_rectangles(sna, &op->base, 1)) {
+ gen5_fill_bind_surfaces(sna, &op->base);
+ gen5_get_rectangles(sna, &op->base, 1);
+ }
+
+ OUT_VERTEX(x+w, y+h);
+ OUT_VERTEX_F(1);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(x, y+h);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(x, y);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(0);
+}
+
+static void
+gen5_render_fill_done(struct sna *sna,
+ const struct sna_fill_op *op)
+{
+ gen5_vertex_flush(sna);
+ kgem_bo_destroy(&sna->kgem, op->base.src.bo);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ DBG(("%s()\n", __FUNCTION__));
+}
+
+static Bool
+gen5_render_fill(struct sna *sna, uint8_t alu,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ uint32_t color,
+ struct sna_fill_op *op)
+{
+ DBG(("%s(alu=%d, color=%08x)\n", __FUNCTION__, alu, color));
+
+ if (sna->kgem.mode == KGEM_BLT &&
+ sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ op))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ op);
+
+ if (alu == GXclear)
+ color = 0;
+
+ op->base.op = color == 0 ? PictOpClear : PictOpSrc;
+
+ op->base.dst.pixmap = dst;
+ op->base.dst.width = dst->drawable.width;
+ op->base.dst.height = dst->drawable.height;
+ op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
+ op->base.dst.bo = dst_bo;
+
+ op->base.src.bo =
+ sna_render_get_solid(sna,
+ sna_rgba_for_color(color,
+ dst->drawable.depth));
+ op->base.src.filter = SAMPLER_FILTER_NEAREST;
+ op->base.src.repeat = SAMPLER_EXTEND_REPEAT;
+
+ op->base.is_affine = TRUE;
+ op->base.floats_per_vertex = 3;
+ op->base.u.gen5.wm_kernel = WM_KERNEL;
+ op->base.u.gen5.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+
+ gen5_fill_bind_surfaces(sna, &op->base);
+ gen5_align_vertex(sna, &op->base);
+
+ op->blt = gen5_render_fill_blt;
+ op->done = gen5_render_fill_done;
+ return TRUE;
+}
+
+static void
+gen5_render_flush(struct sna *sna)
+{
+ gen5_vertex_finish(sna, TRUE);
+}
+
+static void
+gen5_render_context_switch(struct sna *sna,
+ int new_mode)
+{
+ if (sna->kgem.mode == 0)
+ return;
+
+ /* Ironlake has a limitation that a 3D or Media command can't
+ * be the first command after a BLT, unless it's
+ * non-pipelined. Instead of trying to track it and emit a
+ * command at the right time, we just emit a dummy
+ * non-pipelined 3D instruction after each blit.
+ */
+ if (new_mode == KGEM_BLT) {
+#if 0
+ OUT_BATCH(MI_FLUSH |
+ MI_STATE_INSTRUCTION_CACHE_FLUSH |
+ MI_INHIBIT_RENDER_CACHE_FLUSH);
+#endif
+ } else {
+ OUT_BATCH(CMD_POLY_STIPPLE_OFFSET << 16);
+ OUT_BATCH(0);
+ }
+}
+
+static void gen5_render_reset(struct sna *sna)
+{
+ sna->render_state.gen5.needs_invariant = TRUE;
+ sna->render_state.gen5.vb_id = 0;
+ sna->render_state.gen5.ve_id = -1;
+ sna->render_state.gen5.last_primitive = -1;
+ sna->render_state.gen5.last_pipelined_pointers = 0;
+
+ sna->render_state.gen5.drawrect_offset = -1;
+ sna->render_state.gen5.drawrect_limit = -1;
+ sna->render_state.gen5.surface_table = -1;
+}
+
+static void gen5_render_fini(struct sna *sna)
+{
+ kgem_bo_destroy(&sna->kgem, sna->render_state.gen5.general_bo);
+}
+
+static uint32_t gen5_create_vs_unit_state(struct sna_static_stream *stream)
+{
+ struct gen5_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
+
+ /* Set up the vertex shader to be disabled (passthrough) */
+ vs->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
+ vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
+ vs->vs6.vs_enable = 0;
+ vs->vs6.vert_cache_disable = 1;
+
+ return sna_static_stream_offsetof(stream, vs);
+}
+
+static uint32_t gen5_create_sf_state(struct sna_static_stream *stream,
+ uint32_t kernel)
+{
+ struct gen5_sf_unit_state *sf_state;
+
+ sf_state = sna_static_stream_map(stream, sizeof(*sf_state), 32);
+
+ sf_state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
+ sf_state->thread0.kernel_start_pointer = kernel >> 6;
+ sf_state->sf1.single_program_flow = 1;
+ /* scratch space is not used in our kernel */
+ sf_state->thread2.scratch_space_base_pointer = 0;
+ sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
+ sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
+ sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
+ /* don't smash vertex header, read start from dw8 */
+ sf_state->thread3.urb_entry_read_offset = 1;
+ sf_state->thread3.dispatch_grf_start_reg = 3;
+ sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
+ sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
+ sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
+ sf_state->thread4.stats_enable = 1;
+ sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
+ sf_state->sf6.cull_mode = GEN5_CULLMODE_NONE;
+ sf_state->sf6.scissor = 0;
+ sf_state->sf7.trifan_pv = 2;
+ sf_state->sf6.dest_org_vbias = 0x8;
+ sf_state->sf6.dest_org_hbias = 0x8;
+
+ return sna_static_stream_offsetof(stream, sf_state);
+}
+
+static uint32_t gen5_create_sampler_state(struct sna_static_stream *stream,
+ sampler_filter_t src_filter,
+ sampler_extend_t src_extend,
+ sampler_filter_t mask_filter,
+ sampler_extend_t mask_extend)
+{
+ struct gen5_sampler_state *sampler_state;
+
+ sampler_state = sna_static_stream_map(stream,
+ sizeof(struct gen5_sampler_state) * 2,
+ 32);
+ sampler_state_init(&sampler_state[0], src_filter, src_extend);
+ sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
+
+ return sna_static_stream_offsetof(stream, sampler_state);
+}
+
+static void gen5_init_wm_state(struct gen5_wm_unit_state *state,
+ Bool has_mask,
+ uint32_t kernel,
+ uint32_t sampler)
+{
+ state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
+ state->thread0.kernel_start_pointer = kernel >> 6;
+
+ state->thread1.single_program_flow = 0;
+
+ /* scratch space is not used in our kernel */
+ state->thread2.scratch_space_base_pointer = 0;
+ state->thread2.per_thread_scratch_space = 0;
+
+ state->thread3.const_urb_entry_read_length = 0;
+ state->thread3.const_urb_entry_read_offset = 0;
+
+ state->thread3.urb_entry_read_offset = 0;
+ /* wm kernel use urb from 3, see wm_program in compiler module */
+ state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
+
+ state->wm4.sampler_count = 0; /* hardware requirement */
+
+ state->wm4.sampler_state_pointer = sampler >> 5;
+ state->wm5.max_threads = PS_MAX_THREADS - 1;
+ state->wm5.transposed_urb_read = 0;
+ state->wm5.thread_dispatch_enable = 1;
+ /* just use 16-pixel dispatch (4 subspans), don't need to change kernel
+ * start point
+ */
+ state->wm5.enable_16_pix = 1;
+ state->wm5.enable_8_pix = 0;
+ state->wm5.early_depth_test = 1;
+
+ /* Each pair of attributes (src/mask coords) is two URB entries */
+ if (has_mask) {
+ state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
+ state->thread3.urb_entry_read_length = 4;
+ } else {
+ state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
+ state->thread3.urb_entry_read_length = 2;
+ }
+
+ /* binding table entry count is only used for prefetching,
+ * and it has to be set 0 for Ironlake
+ */
+ state->thread1.binding_table_entry_count = 0;
+}
+
+static uint32_t gen5_create_cc_viewport(struct sna_static_stream *stream)
+{
+ struct gen5_cc_viewport vp;
+
+ vp.min_depth = -1.e35;
+ vp.max_depth = 1.e35;
+
+ return sna_static_stream_add(stream, &vp, sizeof(vp), 32);
+}
+
+static uint32_t gen5_create_cc_unit_state(struct sna_static_stream *stream)
+{
+ uint8_t *ptr, *base;
+ uint32_t vp;
+ int i, j;
+
+ vp = gen5_create_cc_viewport(stream);
+ base = ptr =
+ sna_static_stream_map(stream,
+ GEN5_BLENDFACTOR_COUNT*GEN5_BLENDFACTOR_COUNT*64,
+ 64);
+
+ for (i = 0; i < GEN5_BLENDFACTOR_COUNT; i++) {
+ for (j = 0; j < GEN5_BLENDFACTOR_COUNT; j++) {
+ struct gen5_cc_unit_state *state =
+ (struct gen5_cc_unit_state *)ptr;
+
+ state->cc3.blend_enable = 1; /* enable color blend */
+ state->cc4.cc_viewport_state_offset = vp >> 5;
+
+ state->cc5.logicop_func = 0xc; /* COPY */
+ state->cc5.ia_blend_function = GEN5_BLENDFUNCTION_ADD;
+
+ /* Fill in alpha blend factors same as color, for the future. */
+ state->cc5.ia_src_blend_factor = i;
+ state->cc5.ia_dest_blend_factor = j;
+
+ state->cc6.blend_function = GEN5_BLENDFUNCTION_ADD;
+ state->cc6.clamp_post_alpha_blend = 1;
+ state->cc6.clamp_pre_alpha_blend = 1;
+ state->cc6.src_blend_factor = i;
+ state->cc6.dest_blend_factor = j;
+
+ ptr += 64;
+ }
+ }
+
+ return sna_static_stream_offsetof(stream, base);
+}
+
+static Bool gen5_render_setup(struct sna *sna)
+{
+ struct gen5_render_state *state = &sna->render_state.gen5;
+ struct sna_static_stream general;
+ struct gen5_wm_unit_state_padded *wm_state;
+ uint32_t sf[2], wm[KERNEL_COUNT];
+ int i, j, k, l, m;
+
+ sna_static_stream_init(&general);
+
+ /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
+ * dumps, you know it points to zero.
+ */
+ null_create(&general);
+
+ /* Set up the two SF states (one for blending with a mask, one without) */
+ sf[0] = sna_static_stream_add(&general,
+ sf_kernel,
+ sizeof(sf_kernel),
+ 64);
+ sf[1] = sna_static_stream_add(&general,
+ sf_kernel_mask,
+ sizeof(sf_kernel_mask),
+ 64);
+ for (m = 0; m < KERNEL_COUNT; m++) {
+ wm[m] = sna_static_stream_add(&general,
+ wm_kernels[m].data,
+ wm_kernels[m].size,
+ 64);
+ }
+
+ state->vs = gen5_create_vs_unit_state(&general);
+
+ state->sf[0] = gen5_create_sf_state(&general, sf[0]);
+ state->sf[1] = gen5_create_sf_state(&general, sf[1]);
+
+
+ /* Set up the WM states: each filter/extend type for source and mask, per
+ * kernel.
+ */
+ wm_state = sna_static_stream_map(&general,
+ sizeof(*wm_state) * KERNEL_COUNT *
+ FILTER_COUNT * EXTEND_COUNT *
+ FILTER_COUNT * EXTEND_COUNT,
+ 64);
+ state->wm = sna_static_stream_offsetof(&general, wm_state);
+ for (i = 0; i < FILTER_COUNT; i++) {
+ for (j = 0; j < EXTEND_COUNT; j++) {
+ for (k = 0; k < FILTER_COUNT; k++) {
+ for (l = 0; l < EXTEND_COUNT; l++) {
+ uint32_t sampler_state;
+
+ sampler_state =
+ gen5_create_sampler_state(&general,
+ i, j,
+ k, l);
+
+ for (m = 0; m < KERNEL_COUNT; m++) {
+ gen5_init_wm_state(&wm_state->state,
+ wm_kernels[m].has_mask,
+ wm[m],
+ sampler_state);
+ wm_state++;
+ }
+ }
+ }
+ }
+ }
+
+ state->cc = gen5_create_cc_unit_state(&general);
+
+ state->general_bo = sna_static_stream_fini(sna, &general);
+ return state->general_bo != NULL;
+}
+
+Bool gen5_render_init(struct sna *sna)
+{
+ if (!gen5_render_setup(sna))
+ return FALSE;
+
+ gen5_render_reset(sna);
+
+ sna->render.composite = gen5_render_composite;
+ sna->render.video = gen5_render_video;
+
+ sna->render.copy_boxes = gen5_render_copy_boxes;
+ sna->render.copy = gen5_render_copy;
+
+ sna->render.fill_boxes = gen5_render_fill_boxes;
+ sna->render.fill = gen5_render_fill;
+
+ sna->render.flush = gen5_render_flush;
+ sna->render.context_switch = gen5_render_context_switch;
+ sna->render.reset = gen5_render_reset;
+ sna->render.fini = gen5_render_fini;
+
+ sna->render.max_3d_size = 8192;
+ return TRUE;
+}
diff --git a/src/sna/gen5_render.h b/src/sna/gen5_render.h
new file mode 100644
index 00000000..190580ea
--- /dev/null
+++ b/src/sna/gen5_render.h
@@ -0,0 +1,2730 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef GEN5_RENDER_H
+#define GEN5_RENDER_H
+
+#define GEN5_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
+ ((Pipeline) << 27) | \
+ ((Opcode) << 24) | \
+ ((Subopcode) << 16))
+
+#define GEN5_URB_FENCE GEN5_3D(0, 0, 0)
+#define GEN5_CS_URB_STATE GEN5_3D(0, 0, 1)
+#define GEN5_CONSTANT_BUFFER GEN5_3D(0, 0, 2)
+#define GEN5_STATE_PREFETCH GEN5_3D(0, 0, 3)
+
+#define GEN5_STATE_BASE_ADDRESS GEN5_3D(0, 1, 1)
+#define GEN5_STATE_SIP GEN5_3D(0, 1, 2)
+
+#define GEN5_PIPELINE_SELECT GEN5_3D(1, 1, 4)
+
+#define GEN5_MEDIA_STATE_POINTERS GEN5_3D(2, 0, 0)
+#define GEN5_MEDIA_OBJECT GEN5_3D(2, 1, 0)
+
+#define GEN5_3DSTATE_PIPELINED_POINTERS GEN5_3D(3, 0, 0)
+#define GEN5_3DSTATE_BINDING_TABLE_POINTERS GEN5_3D(3, 0, 1)
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */
+
+#define GEN5_3DSTATE_VERTEX_BUFFERS GEN5_3D(3, 0, 8)
+#define GEN5_3DSTATE_VERTEX_ELEMENTS GEN5_3D(3, 0, 9)
+#define GEN5_3DSTATE_INDEX_BUFFER GEN5_3D(3, 0, 0xa)
+#define GEN5_3DSTATE_VF_STATISTICS GEN5_3D(3, 0, 0xb)
+
+#define GEN5_3DSTATE_DRAWING_RECTANGLE GEN5_3D(3, 1, 0)
+#define GEN5_3DSTATE_CONSTANT_COLOR GEN5_3D(3, 1, 1)
+#define GEN5_3DSTATE_SAMPLER_PALETTE_LOAD GEN5_3D(3, 1, 2)
+#define GEN5_3DSTATE_CHROMA_KEY GEN5_3D(3, 1, 4)
+#define GEN5_3DSTATE_DEPTH_BUFFER GEN5_3D(3, 1, 5)
+# define GEN5_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
+# define GEN5_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
+
+#define GEN5_3DSTATE_POLY_STIPPLE_OFFSET GEN5_3D(3, 1, 6)
+#define GEN5_3DSTATE_POLY_STIPPLE_PATTERN GEN5_3D(3, 1, 7)
+#define GEN5_3DSTATE_LINE_STIPPLE GEN5_3D(3, 1, 8)
+#define GEN5_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN5_3D(3, 1, 9)
+/* These two are BLC and CTG only, not BW or CL */
+#define GEN5_3DSTATE_AA_LINE_PARAMS GEN5_3D(3, 1, 0xa)
+#define GEN5_3DSTATE_GS_SVB_INDEX GEN5_3D(3, 1, 0xb)
+
+#define GEN5_PIPE_CONTROL GEN5_3D(3, 2, 0)
+
+#define GEN5_3DPRIMITIVE GEN5_3D(3, 3, 0)
+
+#define GEN5_3DSTATE_CLEAR_PARAMS GEN5_3D(3, 1, 0x10)
+/* DW1 */
+# define GEN5_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
+
+/* for GEN6+ */
+#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN5_3D(3, 0, 0x02)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
+
+#define GEN6_3DSTATE_URB GEN5_3D(3, 0, 0x05)
+/* DW1 */
+# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16
+# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0
+/* DW2 */
+# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8
+# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0
+
+#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN5_3D(3, 0, 0x0d)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
+
+#define GEN6_3DSTATE_CC_STATE_POINTERS GEN5_3D(3, 0, 0x0e)
+
+#define GEN6_3DSTATE_VS GEN5_3D(3, 0, 0x10)
+
+#define GEN6_3DSTATE_GS GEN5_3D(3, 0, 0x11)
+/* DW4 */
+# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
+
+#define GEN6_3DSTATE_CLIP GEN5_3D(3, 0, 0x12)
+
+#define GEN6_3DSTATE_SF GEN5_3D(3, 0, 0x13)
+/* DW1 */
+# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW2 */
+/* DW3 */
+# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29)
+# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29)
+# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29)
+# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29)
+/* DW4 */
+# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
+# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
+# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
+
+
+#define GEN6_3DSTATE_WM GEN5_3D(3, 0, 0x14)
+/* DW2 */
+# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27
+# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+/* DW4 */
+# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16
+/* DW5 */
+# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
+# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
+# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
+# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
+/* DW6 */
+# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
+
+
+#define GEN6_3DSTATE_CONSTANT_VS GEN5_3D(3, 0, 0x15)
+#define GEN6_3DSTATE_CONSTANT_GS GEN5_3D(3, 0, 0x16)
+#define GEN6_3DSTATE_CONSTANT_PS GEN5_3D(3, 0, 0x17)
+
+#define GEN6_3DSTATE_SAMPLE_MASK GEN5_3D(3, 0, 0x18)
+
+#define GEN6_3DSTATE_MULTISAMPLE GEN5_3D(3, 1, 0x0d)
+/* DW1 */
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
+
+#define PIPELINE_SELECT_3D 0
+#define PIPELINE_SELECT_MEDIA 1
+
+#define UF0_CS_REALLOC (1 << 13)
+#define UF0_VFE_REALLOC (1 << 12)
+#define UF0_SF_REALLOC (1 << 11)
+#define UF0_CLIP_REALLOC (1 << 10)
+#define UF0_GS_REALLOC (1 << 9)
+#define UF0_VS_REALLOC (1 << 8)
+#define UF1_CLIP_FENCE_SHIFT 20
+#define UF1_GS_FENCE_SHIFT 10
+#define UF1_VS_FENCE_SHIFT 0
+#define UF2_CS_FENCE_SHIFT 20
+#define UF2_VFE_FENCE_SHIFT 10
+#define UF2_SF_FENCE_SHIFT 0
+
+/* for GEN5_STATE_BASE_ADDRESS */
+#define BASE_ADDRESS_MODIFY (1 << 0)
+
+/* for GEN5_3DSTATE_PIPELINED_POINTERS */
+#define GEN5_GS_DISABLE 0
+#define GEN5_GS_ENABLE 1
+#define GEN5_CLIP_DISABLE 0
+#define GEN5_CLIP_ENABLE 1
+
+/* for GEN5_PIPE_CONTROL */
+#define GEN5_PIPE_CONTROL_NOWRITE (0 << 14)
+#define GEN5_PIPE_CONTROL_WRITE_QWORD (1 << 14)
+#define GEN5_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
+#define GEN5_PIPE_CONTROL_WRITE_TIME (3 << 14)
+#define GEN5_PIPE_CONTROL_DEPTH_STALL (1 << 13)
+#define GEN5_PIPE_CONTROL_WC_FLUSH (1 << 12)
+#define GEN5_PIPE_CONTROL_IS_FLUSH (1 << 11)
+#define GEN5_PIPE_CONTROL_TC_FLUSH (1 << 10)
+#define GEN5_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
+#define GEN5_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
+#define GEN5_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
+#define GEN5_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
+
+/* VERTEX_BUFFER_STATE Structure */
+#define VB0_BUFFER_INDEX_SHIFT 27
+#define GEN6_VB0_BUFFER_INDEX_SHIFT 26
+#define VB0_VERTEXDATA (0 << 26)
+#define VB0_INSTANCEDATA (1 << 26)
+#define GEN6_VB0_VERTEXDATA (0 << 20)
+#define GEN6_VB0_INSTANCEDATA (1 << 20)
+#define VB0_BUFFER_PITCH_SHIFT 0
+
+/* VERTEX_ELEMENT_STATE Structure */
+#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27
+#define GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */
+#define VE0_VALID (1 << 26)
+#define GEN6_VE0_VALID (1 << 25) /* for GEN6 */
+#define VE0_FORMAT_SHIFT 16
+#define VE0_OFFSET_SHIFT 0
+#define VE1_VFCOMPONENT_0_SHIFT 28
+#define VE1_VFCOMPONENT_1_SHIFT 24
+#define VE1_VFCOMPONENT_2_SHIFT 20
+#define VE1_VFCOMPONENT_3_SHIFT 16
+#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
+
+/* 3DPRIMITIVE bits */
+#define GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
+#define GEN5_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
+/* Primitive types are in gen5_defines.h */
+#define GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT 10
+
+#define GEN5_SVG_CTL 0x7400
+
+#define GEN5_SVG_CTL_GS_BA (0 << 8)
+#define GEN5_SVG_CTL_SS_BA (1 << 8)
+#define GEN5_SVG_CTL_IO_BA (2 << 8)
+#define GEN5_SVG_CTL_GS_AUB (3 << 8)
+#define GEN5_SVG_CTL_IO_AUB (4 << 8)
+#define GEN5_SVG_CTL_SIP (5 << 8)
+
+#define GEN5_SVG_RDATA 0x7404
+#define GEN5_SVG_WORK_CTL 0x7408
+
+#define GEN5_VF_CTL 0x7500
+
+#define GEN5_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN5_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
+#define GEN5_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
+#define GEN5_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
+#define GEN5_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
+#define GEN5_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
+#define GEN5_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
+#define GEN5_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
+#define GEN5_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN5_VF_STRG_VAL 0x7504
+#define GEN5_VF_STR_VL_OVR 0x7508
+#define GEN5_VF_VC_OVR 0x750c
+#define GEN5_VF_STR_PSKIP 0x7510
+#define GEN5_VF_MAX_PRIM 0x7514
+#define GEN5_VF_RDATA 0x7518
+
+#define GEN5_VS_CTL 0x7600
+#define GEN5_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN5_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
+#define GEN5_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
+#define GEN5_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
+#define GEN5_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
+#define GEN5_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN5_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN5_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN5_VS_STRG_VAL 0x7604
+#define GEN5_VS_RDATA 0x7608
+
+#define GEN5_SF_CTL 0x7b00
+#define GEN5_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
+#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
+#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
+#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
+#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
+#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
+#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
+#define GEN5_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
+#define GEN5_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
+#define GEN5_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
+#define GEN5_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN5_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN5_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN5_SF_STRG_VAL 0x7b04
+#define GEN5_SF_RDATA 0x7b18
+
+#define GEN5_WIZ_CTL 0x7c00
+#define GEN5_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN5_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
+#define GEN5_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
+#define GEN5_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
+#define GEN5_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
+#define GEN5_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
+#define GEN5_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
+#define GEN5_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
+#define GEN5_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
+#define GEN5_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN5_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN5_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN5_WIZ_STRG_VAL 0x7c04
+#define GEN5_WIZ_RDATA 0x7c18
+
+#define GEN5_TS_CTL 0x7e00
+#define GEN5_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN5_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
+#define GEN5_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
+#define GEN5_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
+#define GEN5_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
+#define GEN5_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN5_TS_STRG_VAL 0x7e04
+#define GEN5_TS_RDATA 0x7e08
+
+#define GEN5_TD_CTL 0x8000
+#define GEN5_TD_CTL_MUX_SHIFT 8
+#define GEN5_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
+#define GEN5_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
+#define GEN5_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
+#define GEN5_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
+#define GEN5_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
+#define GEN5_TD_CTL2 0x8004
+#define GEN5_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
+#define GEN5_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
+#define GEN5_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
+#define GEN5_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
+#define GEN5_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
+#define GEN5_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
+#define GEN5_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
+#define GEN5_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
+#define GEN5_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
+#define GEN5_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
+#define GEN5_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
+#define GEN5_TD_VF_VS_EMSK 0x8008
+#define GEN5_TD_GS_EMSK 0x800c
+#define GEN5_TD_CLIP_EMSK 0x8010
+#define GEN5_TD_SF_EMSK 0x8014
+#define GEN5_TD_WIZ_EMSK 0x8018
+#define GEN5_TD_0_6_EHTRG_VAL 0x801c
+#define GEN5_TD_0_7_EHTRG_VAL 0x8020
+#define GEN5_TD_0_6_EHTRG_MSK 0x8024
+#define GEN5_TD_0_7_EHTRG_MSK 0x8028
+#define GEN5_TD_RDATA 0x802c
+#define GEN5_TD_TS_EMSK 0x8030
+
+#define GEN5_EU_CTL 0x8800
+#define GEN5_EU_CTL_SELECT_SHIFT 16
+#define GEN5_EU_CTL_DATA_MUX_SHIFT 8
+#define GEN5_EU_ATT_0 0x8810
+#define GEN5_EU_ATT_1 0x8814
+#define GEN5_EU_ATT_DATA_0 0x8820
+#define GEN5_EU_ATT_DATA_1 0x8824
+#define GEN5_EU_ATT_CLR_0 0x8830
+#define GEN5_EU_ATT_CLR_1 0x8834
+#define GEN5_EU_RDATA 0x8840
+
+/* 3D state:
+ */
+#define _3DOP_3DSTATE_PIPELINED 0x0
+#define _3DOP_3DSTATE_NONPIPELINED 0x1
+#define _3DOP_3DCONTROL 0x2
+#define _3DOP_3DPRIMITIVE 0x3
+
+#define _3DSTATE_PIPELINED_POINTERS 0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
+#define _3DSTATE_VERTEX_BUFFERS 0x08
+#define _3DSTATE_VERTEX_ELEMENTS 0x09
+#define _3DSTATE_INDEX_BUFFER 0x0A
+#define _3DSTATE_VF_STATISTICS 0x0B
+#define _3DSTATE_DRAWING_RECTANGLE 0x00
+#define _3DSTATE_CONSTANT_COLOR 0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
+#define _3DSTATE_CHROMA_KEY 0x04
+#define _3DSTATE_DEPTH_BUFFER 0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
+#define _3DSTATE_LINE_STIPPLE 0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
+#define _3DCONTROL 0x00
+#define _3DPRIMITIVE 0x00
+
+#define _3DPRIM_POINTLIST 0x01
+#define _3DPRIM_LINELIST 0x02
+#define _3DPRIM_LINESTRIP 0x03
+#define _3DPRIM_TRILIST 0x04
+#define _3DPRIM_TRISTRIP 0x05
+#define _3DPRIM_TRIFAN 0x06
+#define _3DPRIM_QUADLIST 0x07
+#define _3DPRIM_QUADSTRIP 0x08
+#define _3DPRIM_LINELIST_ADJ 0x09
+#define _3DPRIM_LINESTRIP_ADJ 0x0A
+#define _3DPRIM_TRILIST_ADJ 0x0B
+#define _3DPRIM_TRISTRIP_ADJ 0x0C
+#define _3DPRIM_TRISTRIP_REVERSE 0x0D
+#define _3DPRIM_POLYGON 0x0E
+#define _3DPRIM_RECTLIST 0x0F
+#define _3DPRIM_LINELOOP 0x10
+#define _3DPRIM_POINTLIST_BF 0x11
+#define _3DPRIM_LINESTRIP_CONT 0x12
+#define _3DPRIM_LINESTRIP_BF 0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
+
+#define GEN5_ANISORATIO_2 0
+#define GEN5_ANISORATIO_4 1
+#define GEN5_ANISORATIO_6 2
+#define GEN5_ANISORATIO_8 3
+#define GEN5_ANISORATIO_10 4
+#define GEN5_ANISORATIO_12 5
+#define GEN5_ANISORATIO_14 6
+#define GEN5_ANISORATIO_16 7
+
+#define GEN5_BLENDFACTOR_ONE 0x1
+#define GEN5_BLENDFACTOR_SRC_COLOR 0x2
+#define GEN5_BLENDFACTOR_SRC_ALPHA 0x3
+#define GEN5_BLENDFACTOR_DST_ALPHA 0x4
+#define GEN5_BLENDFACTOR_DST_COLOR 0x5
+#define GEN5_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
+#define GEN5_BLENDFACTOR_CONST_COLOR 0x7
+#define GEN5_BLENDFACTOR_CONST_ALPHA 0x8
+#define GEN5_BLENDFACTOR_SRC1_COLOR 0x9
+#define GEN5_BLENDFACTOR_SRC1_ALPHA 0x0A
+#define GEN5_BLENDFACTOR_ZERO 0x11
+#define GEN5_BLENDFACTOR_INV_SRC_COLOR 0x12
+#define GEN5_BLENDFACTOR_INV_SRC_ALPHA 0x13
+#define GEN5_BLENDFACTOR_INV_DST_ALPHA 0x14
+#define GEN5_BLENDFACTOR_INV_DST_COLOR 0x15
+#define GEN5_BLENDFACTOR_INV_CONST_COLOR 0x17
+#define GEN5_BLENDFACTOR_INV_CONST_ALPHA 0x18
+#define GEN5_BLENDFACTOR_INV_SRC1_COLOR 0x19
+#define GEN5_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
+
+#define GEN5_BLENDFUNCTION_ADD 0
+#define GEN5_BLENDFUNCTION_SUBTRACT 1
+#define GEN5_BLENDFUNCTION_REVERSE_SUBTRACT 2
+#define GEN5_BLENDFUNCTION_MIN 3
+#define GEN5_BLENDFUNCTION_MAX 4
+
+#define GEN5_ALPHATEST_FORMAT_UNORM8 0
+#define GEN5_ALPHATEST_FORMAT_FLOAT32 1
+
+#define GEN5_CHROMAKEY_KILL_ON_ANY_MATCH 0
+#define GEN5_CHROMAKEY_REPLACE_BLACK 1
+
+#define GEN5_CLIP_API_OGL 0
+#define GEN5_CLIP_API_DX 1
+
+#define GEN5_CLIPMODE_NORMAL 0
+#define GEN5_CLIPMODE_CLIP_ALL 1
+#define GEN5_CLIPMODE_CLIP_NON_REJECTED 2
+#define GEN5_CLIPMODE_REJECT_ALL 3
+#define GEN5_CLIPMODE_ACCEPT_ALL 4
+
+#define GEN5_CLIP_NDCSPACE 0
+#define GEN5_CLIP_SCREENSPACE 1
+
+#define GEN5_COMPAREFUNCTION_ALWAYS 0
+#define GEN5_COMPAREFUNCTION_NEVER 1
+#define GEN5_COMPAREFUNCTION_LESS 2
+#define GEN5_COMPAREFUNCTION_EQUAL 3
+#define GEN5_COMPAREFUNCTION_LEQUAL 4
+#define GEN5_COMPAREFUNCTION_GREATER 5
+#define GEN5_COMPAREFUNCTION_NOTEQUAL 6
+#define GEN5_COMPAREFUNCTION_GEQUAL 7
+
+#define GEN5_COVERAGE_PIXELS_HALF 0
+#define GEN5_COVERAGE_PIXELS_1 1
+#define GEN5_COVERAGE_PIXELS_2 2
+#define GEN5_COVERAGE_PIXELS_4 3
+
+#define GEN5_CULLMODE_BOTH 0
+#define GEN5_CULLMODE_NONE 1
+#define GEN5_CULLMODE_FRONT 2
+#define GEN5_CULLMODE_BACK 3
+
+#define GEN5_DEFAULTCOLOR_R8G8B8A8_UNORM 0
+#define GEN5_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
+
+#define GEN5_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
+#define GEN5_DEPTHFORMAT_D32_FLOAT 1
+#define GEN5_DEPTHFORMAT_D24_UNORM_S8_UINT 2
+#define GEN5_DEPTHFORMAT_D16_UNORM 5
+
+#define GEN5_FLOATING_POINT_IEEE_754 0
+#define GEN5_FLOATING_POINT_NON_IEEE_754 1
+
+#define GEN5_FRONTWINDING_CW 0
+#define GEN5_FRONTWINDING_CCW 1
+
+#define GEN5_INDEX_BYTE 0
+#define GEN5_INDEX_WORD 1
+#define GEN5_INDEX_DWORD 2
+
+#define GEN5_LOGICOPFUNCTION_CLEAR 0
+#define GEN5_LOGICOPFUNCTION_NOR 1
+#define GEN5_LOGICOPFUNCTION_AND_INVERTED 2
+#define GEN5_LOGICOPFUNCTION_COPY_INVERTED 3
+#define GEN5_LOGICOPFUNCTION_AND_REVERSE 4
+#define GEN5_LOGICOPFUNCTION_INVERT 5
+#define GEN5_LOGICOPFUNCTION_XOR 6
+#define GEN5_LOGICOPFUNCTION_NAND 7
+#define GEN5_LOGICOPFUNCTION_AND 8
+#define GEN5_LOGICOPFUNCTION_EQUIV 9
+#define GEN5_LOGICOPFUNCTION_NOOP 10
+#define GEN5_LOGICOPFUNCTION_OR_INVERTED 11
+#define GEN5_LOGICOPFUNCTION_COPY 12
+#define GEN5_LOGICOPFUNCTION_OR_REVERSE 13
+#define GEN5_LOGICOPFUNCTION_OR 14
+#define GEN5_LOGICOPFUNCTION_SET 15
+
+#define GEN5_MAPFILTER_NEAREST 0x0
+#define GEN5_MAPFILTER_LINEAR 0x1
+#define GEN5_MAPFILTER_ANISOTROPIC 0x2
+
+#define GEN5_MIPFILTER_NONE 0
+#define GEN5_MIPFILTER_NEAREST 1
+#define GEN5_MIPFILTER_LINEAR 3
+
+#define GEN5_POLYGON_FRONT_FACING 0
+#define GEN5_POLYGON_BACK_FACING 1
+
+#define GEN5_PREFILTER_ALWAYS 0x0
+#define GEN5_PREFILTER_NEVER 0x1
+#define GEN5_PREFILTER_LESS 0x2
+#define GEN5_PREFILTER_EQUAL 0x3
+#define GEN5_PREFILTER_LEQUAL 0x4
+#define GEN5_PREFILTER_GREATER 0x5
+#define GEN5_PREFILTER_NOTEQUAL 0x6
+#define GEN5_PREFILTER_GEQUAL 0x7
+
+#define GEN5_PROVOKING_VERTEX_0 0
+#define GEN5_PROVOKING_VERTEX_1 1
+#define GEN5_PROVOKING_VERTEX_2 2
+
+#define GEN5_RASTRULE_UPPER_LEFT 0
+#define GEN5_RASTRULE_UPPER_RIGHT 1
+
+#define GEN5_RENDERTARGET_CLAMPRANGE_UNORM 0
+#define GEN5_RENDERTARGET_CLAMPRANGE_SNORM 1
+#define GEN5_RENDERTARGET_CLAMPRANGE_FORMAT 2
+
+#define GEN5_STENCILOP_KEEP 0
+#define GEN5_STENCILOP_ZERO 1
+#define GEN5_STENCILOP_REPLACE 2
+#define GEN5_STENCILOP_INCRSAT 3
+#define GEN5_STENCILOP_DECRSAT 4
+#define GEN5_STENCILOP_INCR 5
+#define GEN5_STENCILOP_DECR 6
+#define GEN5_STENCILOP_INVERT 7
+
+#define GEN5_SURFACE_MIPMAPLAYOUT_BELOW 0
+#define GEN5_SURFACE_MIPMAPLAYOUT_RIGHT 1
+
+#define GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
+#define GEN5_SURFACEFORMAT_R32G32B32A32_SINT 0x001
+#define GEN5_SURFACEFORMAT_R32G32B32A32_UINT 0x002
+#define GEN5_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
+#define GEN5_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
+#define GEN5_SURFACEFORMAT_R64G64_FLOAT 0x005
+#define GEN5_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
+#define GEN5_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
+#define GEN5_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
+#define GEN5_SURFACEFORMAT_R32G32B32_FLOAT 0x040
+#define GEN5_SURFACEFORMAT_R32G32B32_SINT 0x041
+#define GEN5_SURFACEFORMAT_R32G32B32_UINT 0x042
+#define GEN5_SURFACEFORMAT_R32G32B32_UNORM 0x043
+#define GEN5_SURFACEFORMAT_R32G32B32_SNORM 0x044
+#define GEN5_SURFACEFORMAT_R32G32B32_SSCALED 0x045
+#define GEN5_SURFACEFORMAT_R32G32B32_USCALED 0x046
+#define GEN5_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
+#define GEN5_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
+#define GEN5_SURFACEFORMAT_R16G16B16A16_SINT 0x082
+#define GEN5_SURFACEFORMAT_R16G16B16A16_UINT 0x083
+#define GEN5_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
+#define GEN5_SURFACEFORMAT_R32G32_FLOAT 0x085
+#define GEN5_SURFACEFORMAT_R32G32_SINT 0x086
+#define GEN5_SURFACEFORMAT_R32G32_UINT 0x087
+#define GEN5_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
+#define GEN5_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
+#define GEN5_SURFACEFORMAT_L32A32_FLOAT 0x08A
+#define GEN5_SURFACEFORMAT_R32G32_UNORM 0x08B
+#define GEN5_SURFACEFORMAT_R32G32_SNORM 0x08C
+#define GEN5_SURFACEFORMAT_R64_FLOAT 0x08D
+#define GEN5_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
+#define GEN5_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
+#define GEN5_SURFACEFORMAT_A32X32_FLOAT 0x090
+#define GEN5_SURFACEFORMAT_L32X32_FLOAT 0x091
+#define GEN5_SURFACEFORMAT_I32X32_FLOAT 0x092
+#define GEN5_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
+#define GEN5_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
+#define GEN5_SURFACEFORMAT_R32G32_SSCALED 0x095
+#define GEN5_SURFACEFORMAT_R32G32_USCALED 0x096
+#define GEN5_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define GEN5_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
+#define GEN5_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
+#define GEN5_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
+#define GEN5_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
+#define GEN5_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
+#define GEN5_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
+#define GEN5_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
+#define GEN5_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
+#define GEN5_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
+#define GEN5_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
+#define GEN5_SURFACEFORMAT_R16G16_UNORM 0x0CC
+#define GEN5_SURFACEFORMAT_R16G16_SNORM 0x0CD
+#define GEN5_SURFACEFORMAT_R16G16_SINT 0x0CE
+#define GEN5_SURFACEFORMAT_R16G16_UINT 0x0CF
+#define GEN5_SURFACEFORMAT_R16G16_FLOAT 0x0D0
+#define GEN5_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
+#define GEN5_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
+#define GEN5_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
+#define GEN5_SURFACEFORMAT_R32_SINT 0x0D6
+#define GEN5_SURFACEFORMAT_R32_UINT 0x0D7
+#define GEN5_SURFACEFORMAT_R32_FLOAT 0x0D8
+#define GEN5_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
+#define GEN5_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
+#define GEN5_SURFACEFORMAT_L16A16_UNORM 0x0DF
+#define GEN5_SURFACEFORMAT_I24X8_UNORM 0x0E0
+#define GEN5_SURFACEFORMAT_L24X8_UNORM 0x0E1
+#define GEN5_SURFACEFORMAT_A24X8_UNORM 0x0E2
+#define GEN5_SURFACEFORMAT_I32_FLOAT 0x0E3
+#define GEN5_SURFACEFORMAT_L32_FLOAT 0x0E4
+#define GEN5_SURFACEFORMAT_A32_FLOAT 0x0E5
+#define GEN5_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
+#define GEN5_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
+#define GEN5_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
+#define GEN5_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
+#define GEN5_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
+#define GEN5_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
+#define GEN5_SURFACEFORMAT_L16A16_FLOAT 0x0F0
+#define GEN5_SURFACEFORMAT_R32_UNORM 0x0F1
+#define GEN5_SURFACEFORMAT_R32_SNORM 0x0F2
+#define GEN5_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
+#define GEN5_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
+#define GEN5_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
+#define GEN5_SURFACEFORMAT_R16G16_SSCALED 0x0F6
+#define GEN5_SURFACEFORMAT_R16G16_USCALED 0x0F7
+#define GEN5_SURFACEFORMAT_R32_SSCALED 0x0F8
+#define GEN5_SURFACEFORMAT_R32_USCALED 0x0F9
+#define GEN5_SURFACEFORMAT_B5G6R5_UNORM 0x100
+#define GEN5_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
+#define GEN5_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
+#define GEN5_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
+#define GEN5_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
+#define GEN5_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
+#define GEN5_SURFACEFORMAT_R8G8_UNORM 0x106
+#define GEN5_SURFACEFORMAT_R8G8_SNORM 0x107
+#define GEN5_SURFACEFORMAT_R8G8_SINT 0x108
+#define GEN5_SURFACEFORMAT_R8G8_UINT 0x109
+#define GEN5_SURFACEFORMAT_R16_UNORM 0x10A
+#define GEN5_SURFACEFORMAT_R16_SNORM 0x10B
+#define GEN5_SURFACEFORMAT_R16_SINT 0x10C
+#define GEN5_SURFACEFORMAT_R16_UINT 0x10D
+#define GEN5_SURFACEFORMAT_R16_FLOAT 0x10E
+#define GEN5_SURFACEFORMAT_I16_UNORM 0x111
+#define GEN5_SURFACEFORMAT_L16_UNORM 0x112
+#define GEN5_SURFACEFORMAT_A16_UNORM 0x113
+#define GEN5_SURFACEFORMAT_L8A8_UNORM 0x114
+#define GEN5_SURFACEFORMAT_I16_FLOAT 0x115
+#define GEN5_SURFACEFORMAT_L16_FLOAT 0x116
+#define GEN5_SURFACEFORMAT_A16_FLOAT 0x117
+#define GEN5_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define GEN5_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define GEN5_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
+#define GEN5_SURFACEFORMAT_R8G8_SSCALED 0x11C
+#define GEN5_SURFACEFORMAT_R8G8_USCALED 0x11D
+#define GEN5_SURFACEFORMAT_R16_SSCALED 0x11E
+#define GEN5_SURFACEFORMAT_R16_USCALED 0x11F
+#define GEN5_SURFACEFORMAT_R8_UNORM 0x140
+#define GEN5_SURFACEFORMAT_R8_SNORM 0x141
+#define GEN5_SURFACEFORMAT_R8_SINT 0x142
+#define GEN5_SURFACEFORMAT_R8_UINT 0x143
+#define GEN5_SURFACEFORMAT_A8_UNORM 0x144
+#define GEN5_SURFACEFORMAT_I8_UNORM 0x145
+#define GEN5_SURFACEFORMAT_L8_UNORM 0x146
+#define GEN5_SURFACEFORMAT_P4A4_UNORM 0x147
+#define GEN5_SURFACEFORMAT_A4P4_UNORM 0x148
+#define GEN5_SURFACEFORMAT_R8_SSCALED 0x149
+#define GEN5_SURFACEFORMAT_R8_USCALED 0x14A
+#define GEN5_SURFACEFORMAT_R1_UINT 0x181
+#define GEN5_SURFACEFORMAT_YCRCB_NORMAL 0x182
+#define GEN5_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
+#define GEN5_SURFACEFORMAT_BC1_UNORM 0x186
+#define GEN5_SURFACEFORMAT_BC2_UNORM 0x187
+#define GEN5_SURFACEFORMAT_BC3_UNORM 0x188
+#define GEN5_SURFACEFORMAT_BC4_UNORM 0x189
+#define GEN5_SURFACEFORMAT_BC5_UNORM 0x18A
+#define GEN5_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
+#define GEN5_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
+#define GEN5_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
+#define GEN5_SURFACEFORMAT_MONO8 0x18E
+#define GEN5_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
+#define GEN5_SURFACEFORMAT_YCRCB_SWAPY 0x190
+#define GEN5_SURFACEFORMAT_DXT1_RGB 0x191
+#define GEN5_SURFACEFORMAT_FXT1 0x192
+#define GEN5_SURFACEFORMAT_R8G8B8_UNORM 0x193
+#define GEN5_SURFACEFORMAT_R8G8B8_SNORM 0x194
+#define GEN5_SURFACEFORMAT_R8G8B8_SSCALED 0x195
+#define GEN5_SURFACEFORMAT_R8G8B8_USCALED 0x196
+#define GEN5_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
+#define GEN5_SURFACEFORMAT_R64G64B64_FLOAT 0x198
+#define GEN5_SURFACEFORMAT_BC4_SNORM 0x199
+#define GEN5_SURFACEFORMAT_BC5_SNORM 0x19A
+#define GEN5_SURFACEFORMAT_R16G16B16_UNORM 0x19C
+#define GEN5_SURFACEFORMAT_R16G16B16_SNORM 0x19D
+#define GEN5_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
+#define GEN5_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+
+#define GEN5_SURFACERETURNFORMAT_FLOAT32 0
+#define GEN5_SURFACERETURNFORMAT_S1 1
+
+#define GEN5_SURFACE_1D 0
+#define GEN5_SURFACE_2D 1
+#define GEN5_SURFACE_3D 2
+#define GEN5_SURFACE_CUBE 3
+#define GEN5_SURFACE_BUFFER 4
+#define GEN5_SURFACE_NULL 7
+
+#define GEN5_BORDER_COLOR_MODE_DEFAULT 0
+#define GEN5_BORDER_COLOR_MODE_LEGACY 1
+
+#define GEN5_TEXCOORDMODE_WRAP 0
+#define GEN5_TEXCOORDMODE_MIRROR 1
+#define GEN5_TEXCOORDMODE_CLAMP 2
+#define GEN5_TEXCOORDMODE_CUBE 3
+#define GEN5_TEXCOORDMODE_CLAMP_BORDER 4
+#define GEN5_TEXCOORDMODE_MIRROR_ONCE 5
+
+#define GEN5_THREAD_PRIORITY_NORMAL 0
+#define GEN5_THREAD_PRIORITY_HIGH 1
+
+#define GEN5_TILEWALK_XMAJOR 0
+#define GEN5_TILEWALK_YMAJOR 1
+
+#define GEN5_VERTEX_SUBPIXEL_PRECISION_8BITS 0
+#define GEN5_VERTEX_SUBPIXEL_PRECISION_4BITS 1
+
+#define GEN5_VERTEXBUFFER_ACCESS_VERTEXDATA 0
+#define GEN5_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
+
+#define GEN5_VFCOMPONENT_NOSTORE 0
+#define GEN5_VFCOMPONENT_STORE_SRC 1
+#define GEN5_VFCOMPONENT_STORE_0 2
+#define GEN5_VFCOMPONENT_STORE_1_FLT 3
+#define GEN5_VFCOMPONENT_STORE_1_INT 4
+#define GEN5_VFCOMPONENT_STORE_VID 5
+#define GEN5_VFCOMPONENT_STORE_IID 6
+#define GEN5_VFCOMPONENT_STORE_PID 7
+
+
+
+/* Execution Unit (EU) defines
+ */
+
+#define GEN5_ALIGN_1 0
+#define GEN5_ALIGN_16 1
+
+#define GEN5_ADDRESS_DIRECT 0
+#define GEN5_ADDRESS_REGISTER_INDIRECT_REGISTER 1
+
+#define GEN5_CHANNEL_X 0
+#define GEN5_CHANNEL_Y 1
+#define GEN5_CHANNEL_Z 2
+#define GEN5_CHANNEL_W 3
+
+#define GEN5_COMPRESSION_NONE 0
+#define GEN5_COMPRESSION_2NDHALF 1
+#define GEN5_COMPRESSION_COMPRESSED 2
+
+#define GEN5_CONDITIONAL_NONE 0
+#define GEN5_CONDITIONAL_Z 1
+#define GEN5_CONDITIONAL_NZ 2
+#define GEN5_CONDITIONAL_EQ 1 /* Z */
+#define GEN5_CONDITIONAL_NEQ 2 /* NZ */
+#define GEN5_CONDITIONAL_G 3
+#define GEN5_CONDITIONAL_GE 4
+#define GEN5_CONDITIONAL_L 5
+#define GEN5_CONDITIONAL_LE 6
+#define GEN5_CONDITIONAL_C 7
+#define GEN5_CONDITIONAL_O 8
+
+#define GEN5_DEBUG_NONE 0
+#define GEN5_DEBUG_BREAKPOINT 1
+
+#define GEN5_DEPENDENCY_NORMAL 0
+#define GEN5_DEPENDENCY_NOTCLEARED 1
+#define GEN5_DEPENDENCY_NOTCHECKED 2
+#define GEN5_DEPENDENCY_DISABLE 3
+
+#define GEN5_EXECUTE_1 0
+#define GEN5_EXECUTE_2 1
+#define GEN5_EXECUTE_4 2
+#define GEN5_EXECUTE_8 3
+#define GEN5_EXECUTE_16 4
+#define GEN5_EXECUTE_32 5
+
+#define GEN5_HORIZONTAL_STRIDE_0 0
+#define GEN5_HORIZONTAL_STRIDE_1 1
+#define GEN5_HORIZONTAL_STRIDE_2 2
+#define GEN5_HORIZONTAL_STRIDE_4 3
+
+#define GEN5_INSTRUCTION_NORMAL 0
+#define GEN5_INSTRUCTION_SATURATE 1
+
+#define GEN5_MASK_ENABLE 0
+#define GEN5_MASK_DISABLE 1
+
+#define GEN5_OPCODE_MOV 1
+#define GEN5_OPCODE_SEL 2
+#define GEN5_OPCODE_NOT 4
+#define GEN5_OPCODE_AND 5
+#define GEN5_OPCODE_OR 6
+#define GEN5_OPCODE_XOR 7
+#define GEN5_OPCODE_SHR 8
+#define GEN5_OPCODE_SHL 9
+#define GEN5_OPCODE_RSR 10
+#define GEN5_OPCODE_RSL 11
+#define GEN5_OPCODE_ASR 12
+#define GEN5_OPCODE_CMP 16
+#define GEN5_OPCODE_JMPI 32
+#define GEN5_OPCODE_IF 34
+#define GEN5_OPCODE_IFF 35
+#define GEN5_OPCODE_ELSE 36
+#define GEN5_OPCODE_ENDIF 37
+#define GEN5_OPCODE_DO 38
+#define GEN5_OPCODE_WHILE 39
+#define GEN5_OPCODE_BREAK 40
+#define GEN5_OPCODE_CONTINUE 41
+#define GEN5_OPCODE_HALT 42
+#define GEN5_OPCODE_MSAVE 44
+#define GEN5_OPCODE_MRESTORE 45
+#define GEN5_OPCODE_PUSH 46
+#define GEN5_OPCODE_POP 47
+#define GEN5_OPCODE_WAIT 48
+#define GEN5_OPCODE_SEND 49
+#define GEN5_OPCODE_ADD 64
+#define GEN5_OPCODE_MUL 65
+#define GEN5_OPCODE_AVG 66
+#define GEN5_OPCODE_FRC 67
+#define GEN5_OPCODE_RNDU 68
+#define GEN5_OPCODE_RNDD 69
+#define GEN5_OPCODE_RNDE 70
+#define GEN5_OPCODE_RNDZ 71
+#define GEN5_OPCODE_MAC 72
+#define GEN5_OPCODE_MACH 73
+#define GEN5_OPCODE_LZD 74
+#define GEN5_OPCODE_SAD2 80
+#define GEN5_OPCODE_SADA2 81
+#define GEN5_OPCODE_DP4 84
+#define GEN5_OPCODE_DPH 85
+#define GEN5_OPCODE_DP3 86
+#define GEN5_OPCODE_DP2 87
+#define GEN5_OPCODE_DPA2 88
+#define GEN5_OPCODE_LINE 89
+#define GEN5_OPCODE_NOP 126
+
+#define GEN5_PREDICATE_NONE 0
+#define GEN5_PREDICATE_NORMAL 1
+#define GEN5_PREDICATE_ALIGN1_ANYV 2
+#define GEN5_PREDICATE_ALIGN1_ALLV 3
+#define GEN5_PREDICATE_ALIGN1_ANY2H 4
+#define GEN5_PREDICATE_ALIGN1_ALL2H 5
+#define GEN5_PREDICATE_ALIGN1_ANY4H 6
+#define GEN5_PREDICATE_ALIGN1_ALL4H 7
+#define GEN5_PREDICATE_ALIGN1_ANY8H 8
+#define GEN5_PREDICATE_ALIGN1_ALL8H 9
+#define GEN5_PREDICATE_ALIGN1_ANY16H 10
+#define GEN5_PREDICATE_ALIGN1_ALL16H 11
+#define GEN5_PREDICATE_ALIGN16_REPLICATE_X 2
+#define GEN5_PREDICATE_ALIGN16_REPLICATE_Y 3
+#define GEN5_PREDICATE_ALIGN16_REPLICATE_Z 4
+#define GEN5_PREDICATE_ALIGN16_REPLICATE_W 5
+#define GEN5_PREDICATE_ALIGN16_ANY4H 6
+#define GEN5_PREDICATE_ALIGN16_ALL4H 7
+
+#define GEN5_ARCHITECTURE_REGISTER_FILE 0
+#define GEN5_GENERAL_REGISTER_FILE 1
+#define GEN5_MESSAGE_REGISTER_FILE 2
+#define GEN5_IMMEDIATE_VALUE 3
+
+#define GEN5_REGISTER_TYPE_UD 0
+#define GEN5_REGISTER_TYPE_D 1
+#define GEN5_REGISTER_TYPE_UW 2
+#define GEN5_REGISTER_TYPE_W 3
+#define GEN5_REGISTER_TYPE_UB 4
+#define GEN5_REGISTER_TYPE_B 5
+#define GEN5_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
+#define GEN5_REGISTER_TYPE_HF 6
+#define GEN5_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
+#define GEN5_REGISTER_TYPE_F 7
+
+#define GEN5_ARF_NULL 0x00
+#define GEN5_ARF_ADDRESS 0x10
+#define GEN5_ARF_ACCUMULATOR 0x20
+#define GEN5_ARF_FLAG 0x30
+#define GEN5_ARF_MASK 0x40
+#define GEN5_ARF_MASK_STACK 0x50
+#define GEN5_ARF_MASK_STACK_DEPTH 0x60
+#define GEN5_ARF_STATE 0x70
+#define GEN5_ARF_CONTROL 0x80
+#define GEN5_ARF_NOTIFICATION_COUNT 0x90
+#define GEN5_ARF_IP 0xA0
+
+#define GEN5_AMASK 0
+#define GEN5_IMASK 1
+#define GEN5_LMASK 2
+#define GEN5_CMASK 3
+
+
+
+#define GEN5_THREAD_NORMAL 0
+#define GEN5_THREAD_ATOMIC 1
+#define GEN5_THREAD_SWITCH 2
+
+#define GEN5_VERTICAL_STRIDE_0 0
+#define GEN5_VERTICAL_STRIDE_1 1
+#define GEN5_VERTICAL_STRIDE_2 2
+#define GEN5_VERTICAL_STRIDE_4 3
+#define GEN5_VERTICAL_STRIDE_8 4
+#define GEN5_VERTICAL_STRIDE_16 5
+#define GEN5_VERTICAL_STRIDE_32 6
+#define GEN5_VERTICAL_STRIDE_64 7
+#define GEN5_VERTICAL_STRIDE_128 8
+#define GEN5_VERTICAL_STRIDE_256 9
+#define GEN5_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
+
+#define GEN5_WIDTH_1 0
+#define GEN5_WIDTH_2 1
+#define GEN5_WIDTH_4 2
+#define GEN5_WIDTH_8 3
+#define GEN5_WIDTH_16 4
+
+#define GEN5_STATELESS_BUFFER_BOUNDARY_1K 0
+#define GEN5_STATELESS_BUFFER_BOUNDARY_2K 1
+#define GEN5_STATELESS_BUFFER_BOUNDARY_4K 2
+#define GEN5_STATELESS_BUFFER_BOUNDARY_8K 3
+#define GEN5_STATELESS_BUFFER_BOUNDARY_16K 4
+#define GEN5_STATELESS_BUFFER_BOUNDARY_32K 5
+#define GEN5_STATELESS_BUFFER_BOUNDARY_64K 6
+#define GEN5_STATELESS_BUFFER_BOUNDARY_128K 7
+#define GEN5_STATELESS_BUFFER_BOUNDARY_256K 8
+#define GEN5_STATELESS_BUFFER_BOUNDARY_512K 9
+#define GEN5_STATELESS_BUFFER_BOUNDARY_1M 10
+#define GEN5_STATELESS_BUFFER_BOUNDARY_2M 11
+
+#define GEN5_POLYGON_FACING_FRONT 0
+#define GEN5_POLYGON_FACING_BACK 1
+
+#define GEN5_MESSAGE_TARGET_NULL 0
+#define GEN5_MESSAGE_TARGET_MATH 1
+#define GEN5_MESSAGE_TARGET_SAMPLER 2
+#define GEN5_MESSAGE_TARGET_GATEWAY 3
+#define GEN5_MESSAGE_TARGET_DATAPORT_READ 4
+#define GEN5_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define GEN5_MESSAGE_TARGET_URB 6
+#define GEN5_MESSAGE_TARGET_THREAD_SPAWNER 7
+
+#define GEN5_SAMPLER_RETURN_FORMAT_FLOAT32 0
+#define GEN5_SAMPLER_RETURN_FORMAT_UINT32 2
+#define GEN5_SAMPLER_RETURN_FORMAT_SINT32 3
+
+#define GEN5_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
+#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
+#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
+#define GEN5_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
+#define GEN5_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
+#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
+#define GEN5_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
+#define GEN5_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
+#define GEN5_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
+#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define GEN5_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
+#define GEN5_SAMPLER_MESSAGE_SIMD8_RESINFO 2
+#define GEN5_SAMPLER_MESSAGE_SIMD16_RESINFO 2
+#define GEN5_SAMPLER_MESSAGE_SIMD4X2_LD 3
+#define GEN5_SAMPLER_MESSAGE_SIMD8_LD 3
+#define GEN5_SAMPLER_MESSAGE_SIMD16_LD 3
+
+#define GEN5_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
+#define GEN5_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
+#define GEN5_DATAPORT_OWORD_BLOCK_2_OWORDS 2
+#define GEN5_DATAPORT_OWORD_BLOCK_4_OWORDS 3
+#define GEN5_DATAPORT_OWORD_BLOCK_8_OWORDS 4
+
+#define GEN5_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
+#define GEN5_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
+
+#define GEN5_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
+#define GEN5_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+
+#define GEN5_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+#define GEN5_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
+#define GEN5_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define GEN5_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+
+#define GEN5_DATAPORT_READ_TARGET_DATA_CACHE 0
+#define GEN5_DATAPORT_READ_TARGET_RENDER_CACHE 1
+#define GEN5_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
+
+#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
+#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
+#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
+#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
+#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
+
+#define GEN5_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
+#define GEN5_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
+#define GEN5_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
+#define GEN5_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
+#define GEN5_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
+#define GEN5_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
+#define GEN5_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+
+#define GEN5_MATH_FUNCTION_INV 1
+#define GEN5_MATH_FUNCTION_LOG 2
+#define GEN5_MATH_FUNCTION_EXP 3
+#define GEN5_MATH_FUNCTION_SQRT 4
+#define GEN5_MATH_FUNCTION_RSQ 5
+#define GEN5_MATH_FUNCTION_SIN 6 /* was 7 */
+#define GEN5_MATH_FUNCTION_COS 7 /* was 8 */
+#define GEN5_MATH_FUNCTION_SINCOS 8 /* was 6 */
+#define GEN5_MATH_FUNCTION_TAN 9
+#define GEN5_MATH_FUNCTION_POW 10
+#define GEN5_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
+#define GEN5_MATH_FUNCTION_INT_DIV_QUOTIENT 12
+#define GEN5_MATH_FUNCTION_INT_DIV_REMAINDER 13
+
+#define GEN5_MATH_INTEGER_UNSIGNED 0
+#define GEN5_MATH_INTEGER_SIGNED 1
+
+#define GEN5_MATH_PRECISION_FULL 0
+#define GEN5_MATH_PRECISION_PARTIAL 1
+
+#define GEN5_MATH_SATURATE_NONE 0
+#define GEN5_MATH_SATURATE_SATURATE 1
+
+#define GEN5_MATH_DATA_VECTOR 0
+#define GEN5_MATH_DATA_SCALAR 1
+
+#define GEN5_URB_OPCODE_WRITE 0
+
+#define GEN5_URB_SWIZZLE_NONE 0
+#define GEN5_URB_SWIZZLE_INTERLEAVE 1
+#define GEN5_URB_SWIZZLE_TRANSPOSE 2
+
+#define GEN5_SCRATCH_SPACE_SIZE_1K 0
+#define GEN5_SCRATCH_SPACE_SIZE_2K 1
+#define GEN5_SCRATCH_SPACE_SIZE_4K 2
+#define GEN5_SCRATCH_SPACE_SIZE_8K 3
+#define GEN5_SCRATCH_SPACE_SIZE_16K 4
+#define GEN5_SCRATCH_SPACE_SIZE_32K 5
+#define GEN5_SCRATCH_SPACE_SIZE_64K 6
+#define GEN5_SCRATCH_SPACE_SIZE_128K 7
+#define GEN5_SCRATCH_SPACE_SIZE_256K 8
+#define GEN5_SCRATCH_SPACE_SIZE_512K 9
+#define GEN5_SCRATCH_SPACE_SIZE_1M 10
+#define GEN5_SCRATCH_SPACE_SIZE_2M 11
+
+
+
+
+#define CMD_URB_FENCE 0x6000
+#define CMD_CONST_BUFFER_STATE 0x6001
+#define CMD_CONST_BUFFER 0x6002
+
+#define CMD_STATE_BASE_ADDRESS 0x6101
+#define CMD_STATE_INSN_POINTER 0x6102
+#define CMD_PIPELINE_SELECT 0x6104
+
+#define CMD_PIPELINED_STATE_POINTERS 0x7800
+#define CMD_BINDING_TABLE_PTRS 0x7801
+#define CMD_VERTEX_BUFFER 0x7808
+#define CMD_VERTEX_ELEMENT 0x7809
+#define CMD_INDEX_BUFFER 0x780a
+#define CMD_VF_STATISTICS 0x780b
+
+#define CMD_DRAW_RECT 0x7900
+#define CMD_BLEND_CONSTANT_COLOR 0x7901
+#define CMD_CHROMA_KEY 0x7904
+#define CMD_DEPTH_BUFFER 0x7905
+#define CMD_POLY_STIPPLE_OFFSET 0x7906
+#define CMD_POLY_STIPPLE_PATTERN 0x7907
+#define CMD_LINE_STIPPLE_PATTERN 0x7908
+#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7908
+
+#define CMD_PIPE_CONTROL 0x7a00
+
+#define CMD_3D_PRIM 0x7b00
+
+#define CMD_MI_FLUSH 0x0200
+
+
+/* Various values from the R0 vertex header:
+ */
+#define R02_PRIM_END 0x1
+#define R02_PRIM_START 0x2
+
+/* media pipeline */
+
+#define GEN5_VFE_MODE_GENERIC 0x0
+#define GEN5_VFE_MODE_VLD_MPEG2 0x1
+#define GEN5_VFE_MODE_IS 0x2
+#define GEN5_VFE_MODE_AVC_MC 0x4
+#define GEN5_VFE_MODE_AVC_IT 0x7
+#define GEN5_VFE_MODE_VC1_IT 0xB
+
+#define GEN5_VFE_DEBUG_COUNTER_FREE 0
+#define GEN5_VFE_DEBUG_COUNTER_FROZEN 1
+#define GEN5_VFE_DEBUG_COUNTER_ONCE 2
+#define GEN5_VFE_DEBUG_COUNTER_ALWAYS 3
+
+/* VLD_STATE */
+#define GEN5_MPEG_TOP_FIELD 1
+#define GEN5_MPEG_BOTTOM_FIELD 2
+#define GEN5_MPEG_FRAME 3
+#define GEN5_MPEG_QSCALE_LINEAR 0
+#define GEN5_MPEG_QSCALE_NONLINEAR 1
+#define GEN5_MPEG_ZIGZAG_SCAN 0
+#define GEN5_MPEG_ALTER_VERTICAL_SCAN 1
+#define GEN5_MPEG_I_PICTURE 1
+#define GEN5_MPEG_P_PICTURE 2
+#define GEN5_MPEG_B_PICTURE 3
+
+/* Command packets:
+ */
+struct header
+{
+ unsigned int length:16;
+ unsigned int opcode:16;
+};
+
+
+union header_union
+{
+ struct header bits;
+ unsigned int dword;
+};
+
+struct gen5_3d_control
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int notify_enable:1;
+ unsigned int pad:3;
+ unsigned int wc_flush_enable:1;
+ unsigned int depth_stall_enable:1;
+ unsigned int operation:2;
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int pad:2;
+ unsigned int dest_addr_type:1;
+ unsigned int dest_addr:29;
+ } dest;
+
+ unsigned int dword2;
+ unsigned int dword3;
+};
+
+
+struct gen5_3d_primitive
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int pad:2;
+ unsigned int topology:5;
+ unsigned int indexed:1;
+ unsigned int opcode:16;
+ } header;
+
+ unsigned int verts_per_instance;
+ unsigned int start_vert_location;
+ unsigned int instance_count;
+ unsigned int start_instance_location;
+ unsigned int base_vert_location;
+};
+
+/* These seem to be passed around as function args, so it works out
+ * better to keep them as #defines:
+ */
+#define GEN5_FLUSH_READ_CACHE 0x1
+#define GEN5_FLUSH_STATE_CACHE 0x2
+#define GEN5_INHIBIT_FLUSH_RENDER_CACHE 0x4
+#define GEN5_FLUSH_SNAPSHOT_COUNTERS 0x8
+
+struct gen5_mi_flush
+{
+ unsigned int flags:4;
+ unsigned int pad:12;
+ unsigned int opcode:16;
+};
+
+struct gen5_vf_statistics
+{
+ unsigned int statistics_enable:1;
+ unsigned int pad:15;
+ unsigned int opcode:16;
+};
+
+
+
+struct gen5_binding_table_pointers
+{
+ struct header header;
+ unsigned int vs;
+ unsigned int gs;
+ unsigned int clp;
+ unsigned int sf;
+ unsigned int wm;
+};
+
+
+struct gen5_blend_constant_color
+{
+ struct header header;
+ float blend_constant_color[4];
+};
+
+
+struct gen5_depthbuffer
+{
+ union header_union header;
+
+ union {
+ struct {
+ unsigned int pitch:18;
+ unsigned int format:3;
+ unsigned int pad:4;
+ unsigned int depth_offset_disable:1;
+ unsigned int tile_walk:1;
+ unsigned int tiled_surface:1;
+ unsigned int pad2:1;
+ unsigned int surface_type:3;
+ } bits;
+ unsigned int dword;
+ } dword1;
+
+ unsigned int dword2_base_addr;
+
+ union {
+ struct {
+ unsigned int pad:1;
+ unsigned int mipmap_layout:1;
+ unsigned int lod:4;
+ unsigned int width:13;
+ unsigned int height:13;
+ } bits;
+ unsigned int dword;
+ } dword3;
+
+ union {
+ struct {
+ unsigned int pad:12;
+ unsigned int min_array_element:9;
+ unsigned int depth:11;
+ } bits;
+ unsigned int dword;
+ } dword4;
+};
+
+struct gen5_drawrect
+{
+ struct header header;
+ unsigned int xmin:16;
+ unsigned int ymin:16;
+ unsigned int xmax:16;
+ unsigned int ymax:16;
+ unsigned int xorg:16;
+ unsigned int yorg:16;
+};
+
+
+
+
+struct gen5_global_depth_offset_clamp
+{
+ struct header header;
+ float depth_offset_clamp;
+};
+
+struct gen5_indexbuffer
+{
+ union {
+ struct
+ {
+ unsigned int length:8;
+ unsigned int index_format:2;
+ unsigned int cut_index_enable:1;
+ unsigned int pad:5;
+ unsigned int opcode:16;
+ } bits;
+ unsigned int dword;
+
+ } header;
+
+ unsigned int buffer_start;
+ unsigned int buffer_end;
+};
+
+
+struct gen5_line_stipple
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int pattern:16;
+ unsigned int pad:16;
+ } bits0;
+
+ struct
+ {
+ unsigned int repeat_count:9;
+ unsigned int pad:7;
+ unsigned int inverse_repeat_count:16;
+ } bits1;
+};
+
+
+struct gen5_pipelined_state_pointers
+{
+ struct header header;
+
+ struct {
+ unsigned int pad:5;
+ unsigned int offset:27;
+ } vs;
+
+ struct
+ {
+ unsigned int enable:1;
+ unsigned int pad:4;
+ unsigned int offset:27;
+ } gs;
+
+ struct
+ {
+ unsigned int enable:1;
+ unsigned int pad:4;
+ unsigned int offset:27;
+ } clp;
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int offset:27;
+ } sf;
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int offset:27;
+ } wm;
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int offset:27; /* KW: check me! */
+ } cc;
+};
+
+
+struct gen5_polygon_stipple_offset
+{
+ struct header header;
+
+ struct {
+ unsigned int y_offset:5;
+ unsigned int pad:3;
+ unsigned int x_offset:5;
+ unsigned int pad0:19;
+ } bits0;
+};
+
+
+
+struct gen5_polygon_stipple
+{
+ struct header header;
+ unsigned int stipple[32];
+};
+
+
+
+struct gen5_pipeline_select
+{
+ struct
+ {
+ unsigned int pipeline_select:1;
+ unsigned int pad:15;
+ unsigned int opcode:16;
+ } header;
+};
+
+
+struct gen5_pipe_control
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int notify_enable:1;
+ unsigned int pad:2;
+ unsigned int instruction_state_cache_flush_enable:1;
+ unsigned int write_cache_flush_enable:1;
+ unsigned int depth_stall_enable:1;
+ unsigned int post_sync_operation:2;
+
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int pad:2;
+ unsigned int dest_addr_type:1;
+ unsigned int dest_addr:29;
+ } bits1;
+
+ unsigned int data0;
+ unsigned int data1;
+};
+
+
+struct gen5_urb_fence
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int vs_realloc:1;
+ unsigned int gs_realloc:1;
+ unsigned int clp_realloc:1;
+ unsigned int sf_realloc:1;
+ unsigned int vfe_realloc:1;
+ unsigned int cs_realloc:1;
+ unsigned int pad:2;
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int vs_fence:10;
+ unsigned int gs_fence:10;
+ unsigned int clp_fence:10;
+ unsigned int pad:2;
+ } bits0;
+
+ struct
+ {
+ unsigned int sf_fence:10;
+ unsigned int vf_fence:10;
+ unsigned int cs_fence:10;
+ unsigned int pad:2;
+ } bits1;
+};
+
+struct gen5_constant_buffer_state /* previously gen5_command_streamer */
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int nr_urb_entries:3;
+ unsigned int pad:1;
+ unsigned int urb_entry_size:5;
+ unsigned int pad0:23;
+ } bits0;
+};
+
+struct gen5_constant_buffer
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int valid:1;
+ unsigned int pad:7;
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int buffer_length:6;
+ unsigned int buffer_address:26;
+ } bits0;
+};
+
+struct gen5_state_base_address
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:4;
+ unsigned int general_state_address:27;
+ } bits0;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:4;
+ unsigned int surface_state_address:27;
+ } bits1;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:4;
+ unsigned int indirect_object_state_address:27;
+ } bits2;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:11;
+ unsigned int general_state_upper_bound:20;
+ } bits3;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:11;
+ unsigned int indirect_object_state_upper_bound:20;
+ } bits4;
+};
+
+struct gen5_state_prefetch
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int prefetch_count:3;
+ unsigned int pad:3;
+ unsigned int prefetch_pointer:26;
+ } bits0;
+};
+
+struct gen5_system_instruction_pointer
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int pad:4;
+ unsigned int system_instruction_pointer:28;
+ } bits0;
+};
+
+
+
+
+/* State structs for the various fixed function units:
+ */
+
+
+struct thread0
+{
+ unsigned int pad0:1;
+ unsigned int grf_reg_count:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer:26;
+};
+
+struct thread1
+{
+ unsigned int ext_halt_exception_enable:1;
+ unsigned int sw_exception_enable:1;
+ unsigned int mask_stack_exception_enable:1;
+ unsigned int timeout_exception_enable:1;
+ unsigned int illegal_op_exception_enable:1;
+ unsigned int pad0:3;
+ unsigned int depth_coef_urb_read_offset:6; /* WM only */
+ unsigned int pad1:2;
+ unsigned int floating_point_mode:1;
+ unsigned int thread_priority:1;
+ unsigned int binding_table_entry_count:8;
+ unsigned int pad3:5;
+ unsigned int single_program_flow:1;
+};
+
+struct thread2
+{
+ unsigned int per_thread_scratch_space:4;
+ unsigned int pad0:6;
+ unsigned int scratch_space_base_pointer:22;
+};
+
+
+struct thread3
+{
+ unsigned int dispatch_grf_start_reg:4;
+ unsigned int urb_entry_read_offset:6;
+ unsigned int pad0:1;
+ unsigned int urb_entry_read_length:6;
+ unsigned int pad1:1;
+ unsigned int const_urb_entry_read_offset:6;
+ unsigned int pad2:1;
+ unsigned int const_urb_entry_read_length:6;
+ unsigned int pad3:1;
+};
+
+
+
+struct gen5_clip_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:9;
+ unsigned int gs_output_stats:1; /* not always */
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:6; /* may be less */
+ unsigned int pad3:1;
+ } thread4;
+
+ struct
+ {
+ unsigned int pad0:13;
+ unsigned int clip_mode:3;
+ unsigned int userclip_enable_flags:8;
+ unsigned int userclip_must_clip:1;
+ unsigned int pad1:1;
+ unsigned int guard_band_enable:1;
+ unsigned int viewport_z_clip_enable:1;
+ unsigned int viewport_xy_clip_enable:1;
+ unsigned int vertex_position_space:1;
+ unsigned int api_mode:1;
+ unsigned int pad2:1;
+ } clip5;
+
+ struct
+ {
+ unsigned int pad0:5;
+ unsigned int clipper_viewport_state_ptr:27;
+ } clip6;
+
+
+ float viewport_xmin;
+ float viewport_xmax;
+ float viewport_ymin;
+ float viewport_ymax;
+};
+
+
+
+struct gen5_cc_unit_state
+{
+ struct
+ {
+ unsigned int pad0:3;
+ unsigned int bf_stencil_pass_depth_pass_op:3;
+ unsigned int bf_stencil_pass_depth_fail_op:3;
+ unsigned int bf_stencil_fail_op:3;
+ unsigned int bf_stencil_func:3;
+ unsigned int bf_stencil_enable:1;
+ unsigned int pad1:2;
+ unsigned int stencil_write_enable:1;
+ unsigned int stencil_pass_depth_pass_op:3;
+ unsigned int stencil_pass_depth_fail_op:3;
+ unsigned int stencil_fail_op:3;
+ unsigned int stencil_func:3;
+ unsigned int stencil_enable:1;
+ } cc0;
+
+
+ struct
+ {
+ unsigned int bf_stencil_ref:8;
+ unsigned int stencil_write_mask:8;
+ unsigned int stencil_test_mask:8;
+ unsigned int stencil_ref:8;
+ } cc1;
+
+
+ struct
+ {
+ unsigned int logicop_enable:1;
+ unsigned int pad0:10;
+ unsigned int depth_write_enable:1;
+ unsigned int depth_test_function:3;
+ unsigned int depth_test:1;
+ unsigned int bf_stencil_write_mask:8;
+ unsigned int bf_stencil_test_mask:8;
+ } cc2;
+
+
+ struct
+ {
+ unsigned int pad0:8;
+ unsigned int alpha_test_func:3;
+ unsigned int alpha_test:1;
+ unsigned int blend_enable:1;
+ unsigned int ia_blend_enable:1;
+ unsigned int pad1:1;
+ unsigned int alpha_test_format:1;
+ unsigned int pad2:16;
+ } cc3;
+
+ struct
+ {
+ unsigned int pad0:5;
+ unsigned int cc_viewport_state_offset:27;
+ } cc4;
+
+ struct
+ {
+ unsigned int pad0:2;
+ unsigned int ia_dest_blend_factor:5;
+ unsigned int ia_src_blend_factor:5;
+ unsigned int ia_blend_function:3;
+ unsigned int statistics_enable:1;
+ unsigned int logicop_func:4;
+ unsigned int pad1:11;
+ unsigned int dither_enable:1;
+ } cc5;
+
+ struct
+ {
+ unsigned int clamp_post_alpha_blend:1;
+ unsigned int clamp_pre_alpha_blend:1;
+ unsigned int clamp_range:2;
+ unsigned int pad0:11;
+ unsigned int y_dither_offset:2;
+ unsigned int x_dither_offset:2;
+ unsigned int dest_blend_factor:5;
+ unsigned int src_blend_factor:5;
+ unsigned int blend_function:3;
+ } cc6;
+
+ struct {
+ union {
+ float f;
+ unsigned char ub[4];
+ } alpha_ref;
+ } cc7;
+};
+
+
+
+struct gen5_sf_unit_state
+{
+ struct thread0 thread0;
+ struct {
+ unsigned int pad0:7;
+ unsigned int sw_exception_enable:1;
+ unsigned int pad1:3;
+ unsigned int mask_stack_exception_enable:1;
+ unsigned int pad2:1;
+ unsigned int illegal_op_exception_enable:1;
+ unsigned int pad3:2;
+ unsigned int floating_point_mode:1;
+ unsigned int thread_priority:1;
+ unsigned int binding_table_entry_count:8;
+ unsigned int pad4:5;
+ unsigned int single_program_flow:1;
+ } sf1;
+
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:10;
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:6;
+ unsigned int pad3:1;
+ } thread4;
+
+ struct
+ {
+ unsigned int front_winding:1;
+ unsigned int viewport_transform:1;
+ unsigned int pad0:3;
+ unsigned int sf_viewport_state_offset:27;
+ } sf5;
+
+ struct
+ {
+ unsigned int pad0:9;
+ unsigned int dest_org_vbias:4;
+ unsigned int dest_org_hbias:4;
+ unsigned int scissor:1;
+ unsigned int disable_2x2_trifilter:1;
+ unsigned int disable_zero_pix_trifilter:1;
+ unsigned int point_rast_rule:2;
+ unsigned int line_endcap_aa_region_width:2;
+ unsigned int line_width:4;
+ unsigned int fast_scissor_disable:1;
+ unsigned int cull_mode:2;
+ unsigned int aa_enable:1;
+ } sf6;
+
+ struct
+ {
+ unsigned int point_size:11;
+ unsigned int use_point_size_state:1;
+ unsigned int subpixel_precision:1;
+ unsigned int sprite_point:1;
+ unsigned int pad0:11;
+ unsigned int trifan_pv:2;
+ unsigned int linestrip_pv:2;
+ unsigned int tristrip_pv:2;
+ unsigned int line_last_pixel_enable:1;
+ } sf7;
+
+};
+
+
+struct gen5_gs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:10;
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:1;
+ unsigned int pad3:6;
+ } thread4;
+
+ struct
+ {
+ unsigned int sampler_count:3;
+ unsigned int pad0:2;
+ unsigned int sampler_state_pointer:27;
+ } gs5;
+
+
+ struct
+ {
+ unsigned int max_vp_index:4;
+ unsigned int pad0:26;
+ unsigned int reorder_enable:1;
+ unsigned int pad1:1;
+ } gs6;
+};
+
+
+struct gen5_vs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:10;
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:4;
+ unsigned int pad3:3;
+ } thread4;
+
+ struct
+ {
+ unsigned int sampler_count:3;
+ unsigned int pad0:2;
+ unsigned int sampler_state_pointer:27;
+ } vs5;
+
+ struct
+ {
+ unsigned int vs_enable:1;
+ unsigned int vert_cache_disable:1;
+ unsigned int pad0:30;
+ } vs6;
+};
+
+
+struct gen5_wm_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct {
+ unsigned int stats_enable:1;
+ unsigned int pad0:1;
+ unsigned int sampler_count:3;
+ unsigned int sampler_state_pointer:27;
+ } wm4;
+
+ struct
+ {
+ unsigned int enable_8_pix:1;
+ unsigned int enable_16_pix:1;
+ unsigned int enable_32_pix:1;
+ unsigned int pad0:7;
+ unsigned int legacy_global_depth_bias:1;
+ unsigned int line_stipple:1;
+ unsigned int depth_offset:1;
+ unsigned int polygon_stipple:1;
+ unsigned int line_aa_region_width:2;
+ unsigned int line_endcap_aa_region_width:2;
+ unsigned int early_depth_test:1;
+ unsigned int thread_dispatch_enable:1;
+ unsigned int program_uses_depth:1;
+ unsigned int program_computes_depth:1;
+ unsigned int program_uses_killpixel:1;
+ unsigned int legacy_line_rast: 1;
+ unsigned int transposed_urb_read:1;
+ unsigned int max_threads:7;
+ } wm5;
+
+ float global_depth_offset_constant;
+ float global_depth_offset_scale;
+
+ struct {
+ unsigned int pad0:1;
+ unsigned int grf_reg_count_1:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer_1:26;
+ } wm8;
+
+ struct {
+ unsigned int pad0:1;
+ unsigned int grf_reg_count_2:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer_2:26;
+ } wm9;
+
+ struct {
+ unsigned int pad0:1;
+ unsigned int grf_reg_count_3:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer_3:26;
+ } wm10;
+};
+
+struct gen5_wm_unit_state_padded {
+ struct gen5_wm_unit_state state;
+ char pad[64 - sizeof(struct gen5_wm_unit_state)];
+};
+
+/* The hardware supports two different modes for border color. The
+ * default (OpenGL) mode uses floating-point color channels, while the
+ * legacy mode uses 4 bytes.
+ *
+ * More significantly, the legacy mode respects the components of the
+ * border color for channels not present in the source, (whereas the
+ * default mode will ignore the border color's alpha channel and use
+ * alpha==1 for an RGB source, for example).
+ *
+ * The legacy mode matches the semantics specified by the Render
+ * extension.
+ */
+struct gen5_sampler_default_border_color {
+ float color[4];
+};
+
+struct gen5_sampler_legacy_border_color {
+ uint8_t color[4];
+};
+
+struct gen5_sampler_state
+{
+
+ struct
+ {
+ unsigned int shadow_function:3;
+ unsigned int lod_bias:11;
+ unsigned int min_filter:3;
+ unsigned int mag_filter:3;
+ unsigned int mip_filter:2;
+ unsigned int base_level:5;
+ unsigned int pad:1;
+ unsigned int lod_preclamp:1;
+ unsigned int border_color_mode:1;
+ unsigned int pad0:1;
+ unsigned int disable:1;
+ } ss0;
+
+ struct
+ {
+ unsigned int r_wrap_mode:3;
+ unsigned int t_wrap_mode:3;
+ unsigned int s_wrap_mode:3;
+ unsigned int pad:3;
+ unsigned int max_lod:10;
+ unsigned int min_lod:10;
+ } ss1;
+
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int border_color_pointer:27;
+ } ss2;
+
+ struct
+ {
+ unsigned int pad:19;
+ unsigned int max_aniso:3;
+ unsigned int chroma_key_mode:1;
+ unsigned int chroma_key_index:2;
+ unsigned int chroma_key_enable:1;
+ unsigned int monochrome_filter_width:3;
+ unsigned int monochrome_filter_height:3;
+ } ss3;
+};
+
+
+struct gen5_clipper_viewport
+{
+ float xmin;
+ float xmax;
+ float ymin;
+ float ymax;
+};
+
+struct gen5_cc_viewport
+{
+ float min_depth;
+ float max_depth;
+};
+
+struct gen5_sf_viewport
+{
+ struct {
+ float m00;
+ float m11;
+ float m22;
+ float m30;
+ float m31;
+ float m32;
+ } viewport;
+
+ struct {
+ short xmin;
+ short ymin;
+ short xmax;
+ short ymax;
+ } scissor;
+};
+
+/* Documented in the subsystem/shared-functions/sampler chapter...
+ */
+struct gen5_surface_state
+{
+ struct {
+ unsigned int cube_pos_z:1;
+ unsigned int cube_neg_z:1;
+ unsigned int cube_pos_y:1;
+ unsigned int cube_neg_y:1;
+ unsigned int cube_pos_x:1;
+ unsigned int cube_neg_x:1;
+ unsigned int pad:3;
+ unsigned int render_cache_read_mode:1;
+ unsigned int mipmap_layout_mode:1;
+ unsigned int vert_line_stride_ofs:1;
+ unsigned int vert_line_stride:1;
+ unsigned int color_blend:1;
+ unsigned int writedisable_blue:1;
+ unsigned int writedisable_green:1;
+ unsigned int writedisable_red:1;
+ unsigned int writedisable_alpha:1;
+ unsigned int surface_format:9;
+ unsigned int data_return_format:1;
+ unsigned int pad0:1;
+ unsigned int surface_type:3;
+ } ss0;
+
+ struct {
+ unsigned int base_addr;
+ } ss1;
+
+ struct {
+ unsigned int render_target_rotation:2;
+ unsigned int mip_count:4;
+ unsigned int width:13;
+ unsigned int height:13;
+ } ss2;
+
+ struct {
+ unsigned int tile_walk:1;
+ unsigned int tiled_surface:1;
+ unsigned int pad:1;
+ unsigned int pitch:18;
+ unsigned int depth:11;
+ } ss3;
+
+ struct {
+ unsigned int pad:19;
+ unsigned int min_array_elt:9;
+ unsigned int min_lod:4;
+ } ss4;
+
+ struct {
+ unsigned int pad:20;
+ unsigned int y_offset:4;
+ unsigned int pad2:1;
+ unsigned int x_offset:7;
+ } ss5;
+};
+
+
+
+struct gen5_vertex_buffer_state
+{
+ struct {
+ unsigned int pitch:11;
+ unsigned int pad:15;
+ unsigned int access_type:1;
+ unsigned int vb_index:5;
+ } vb0;
+
+ unsigned int start_addr;
+ unsigned int max_index;
+#if 1
+ unsigned int instance_data_step_rate; /* not included for sequential/random vertices? */
+#endif
+};
+
+#define GEN5_VBP_MAX 17
+
+struct gen5_vb_array_state {
+ struct header header;
+ struct gen5_vertex_buffer_state vb[GEN5_VBP_MAX];
+};
+
+
+struct gen5_vertex_element_state
+{
+ struct
+ {
+ unsigned int src_offset:11;
+ unsigned int pad:5;
+ unsigned int src_format:9;
+ unsigned int pad0:1;
+ unsigned int valid:1;
+ unsigned int vertex_buffer_index:5;
+ } ve0;
+
+ struct
+ {
+ unsigned int dst_offset:8;
+ unsigned int pad:8;
+ unsigned int vfcomponent3:4;
+ unsigned int vfcomponent2:4;
+ unsigned int vfcomponent1:4;
+ unsigned int vfcomponent0:4;
+ } ve1;
+};
+
+#define GEN5_VEP_MAX 18
+
+struct gen5_vertex_element_packet {
+ struct header header;
+ struct gen5_vertex_element_state ve[GEN5_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
+};
+
+
+struct gen5_urb_immediate {
+ unsigned int opcode:4;
+ unsigned int offset:6;
+ unsigned int swizzle_control:2;
+ unsigned int pad:1;
+ unsigned int allocate:1;
+ unsigned int used:1;
+ unsigned int complete:1;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+};
+
+/* Instruction format for the execution units:
+ */
+
+struct gen5_instruction
+{
+ struct
+ {
+ unsigned int opcode:7;
+ unsigned int pad:1;
+ unsigned int access_mode:1;
+ unsigned int mask_control:1;
+ unsigned int dependency_control:2;
+ unsigned int compression_control:2;
+ unsigned int thread_control:2;
+ unsigned int predicate_control:4;
+ unsigned int predicate_inverse:1;
+ unsigned int execution_size:3;
+ unsigned int destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */
+ unsigned int pad0:2;
+ unsigned int debug_control:1;
+ unsigned int saturate:1;
+ } header;
+
+ union {
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int src1_reg_file:2;
+ unsigned int src1_reg_type:3;
+ unsigned int pad:1;
+ unsigned int dest_subreg_nr:5;
+ unsigned int dest_reg_nr:8;
+ unsigned int dest_horiz_stride:2;
+ unsigned int dest_address_mode:1;
+ } da1;
+
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int pad:6;
+ int dest_indirect_offset:10; /* offset against the deref'd address reg */
+ unsigned int dest_subreg_nr:3; /* subnr for the address reg a0.x */
+ unsigned int dest_horiz_stride:2;
+ unsigned int dest_address_mode:1;
+ } ia1;
+
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int src1_reg_file:2;
+ unsigned int src1_reg_type:3;
+ unsigned int pad0:1;
+ unsigned int dest_writemask:4;
+ unsigned int dest_subreg_nr:1;
+ unsigned int dest_reg_nr:8;
+ unsigned int pad1:2;
+ unsigned int dest_address_mode:1;
+ } da16;
+
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int pad0:6;
+ unsigned int dest_writemask:4;
+ int dest_indirect_offset:6;
+ unsigned int dest_subreg_nr:3;
+ unsigned int pad1:2;
+ unsigned int dest_address_mode:1;
+ } ia16;
+ } bits1;
+
+
+ union {
+ struct
+ {
+ unsigned int src0_subreg_nr:5;
+ unsigned int src0_reg_nr:8;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_horiz_stride:2;
+ unsigned int src0_width:3;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad:6;
+ } da1;
+
+ struct
+ {
+ int src0_indirect_offset:10;
+ unsigned int src0_subreg_nr:3;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_horiz_stride:2;
+ unsigned int src0_width:3;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad:6;
+ } ia1;
+
+ struct
+ {
+ unsigned int src0_swz_x:2;
+ unsigned int src0_swz_y:2;
+ unsigned int src0_subreg_nr:1;
+ unsigned int src0_reg_nr:8;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_swz_z:2;
+ unsigned int src0_swz_w:2;
+ unsigned int pad0:1;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad1:6;
+ } da16;
+
+ struct
+ {
+ unsigned int src0_swz_x:2;
+ unsigned int src0_swz_y:2;
+ int src0_indirect_offset:6;
+ unsigned int src0_subreg_nr:3;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_swz_z:2;
+ unsigned int src0_swz_w:2;
+ unsigned int pad0:1;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad1:6;
+ } ia16;
+
+ } bits2;
+
+ union
+ {
+ struct
+ {
+ unsigned int src1_subreg_nr:5;
+ unsigned int src1_reg_nr:8;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad:1;
+ unsigned int src1_horiz_stride:2;
+ unsigned int src1_width:3;
+ unsigned int src1_vert_stride:4;
+ unsigned int pad0:7;
+ } da1;
+
+ struct
+ {
+ unsigned int src1_swz_x:2;
+ unsigned int src1_swz_y:2;
+ unsigned int src1_subreg_nr:1;
+ unsigned int src1_reg_nr:8;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad0:1;
+ unsigned int src1_swz_z:2;
+ unsigned int src1_swz_w:2;
+ unsigned int pad1:1;
+ unsigned int src1_vert_stride:4;
+ unsigned int pad2:7;
+ } da16;
+
+ struct
+ {
+ int src1_indirect_offset:10;
+ unsigned int src1_subreg_nr:3;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad0:1;
+ unsigned int src1_horiz_stride:2;
+ unsigned int src1_width:3;
+ unsigned int src1_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad1:6;
+ } ia1;
+
+ struct
+ {
+ unsigned int src1_swz_x:2;
+ unsigned int src1_swz_y:2;
+ int src1_indirect_offset:6;
+ unsigned int src1_subreg_nr:3;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad0:1;
+ unsigned int src1_swz_z:2;
+ unsigned int src1_swz_w:2;
+ unsigned int pad1:1;
+ unsigned int src1_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad2:6;
+ } ia16;
+
+
+ struct
+ {
+ int jump_count:16; /* note: signed */
+ unsigned int pop_count:4;
+ unsigned int pad0:12;
+ } if_else;
+
+ struct {
+ unsigned int function:4;
+ unsigned int int_type:1;
+ unsigned int precision:1;
+ unsigned int saturate:1;
+ unsigned int data_type:1;
+ unsigned int pad0:8;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } math;
+
+ struct {
+ unsigned int binding_table_index:8;
+ unsigned int sampler:4;
+ unsigned int return_format:2;
+ unsigned int msg_type:2;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } sampler;
+
+ struct gen5_urb_immediate urb;
+
+ struct {
+ unsigned int binding_table_index:8;
+ unsigned int msg_control:4;
+ unsigned int msg_type:2;
+ unsigned int target_cache:2;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } dp_read;
+
+ struct {
+ unsigned int binding_table_index:8;
+ unsigned int msg_control:3;
+ unsigned int pixel_scoreboard_clear:1;
+ unsigned int msg_type:3;
+ unsigned int send_commit_msg:1;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } dp_write;
+
+ struct {
+ unsigned int pad:16;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } generic;
+
+ unsigned int ud;
+ } bits3;
+};
+
+/* media pipeline */
+
+struct gen5_vfe_state {
+ struct {
+ unsigned int per_thread_scratch_space:4;
+ unsigned int pad3:3;
+ unsigned int extend_vfe_state_present:1;
+ unsigned int pad2:2;
+ unsigned int scratch_base:22;
+ } vfe0;
+
+ struct {
+ unsigned int debug_counter_control:2;
+ unsigned int children_present:1;
+ unsigned int vfe_mode:4;
+ unsigned int pad2:2;
+ unsigned int num_urb_entries:7;
+ unsigned int urb_entry_alloc_size:9;
+ unsigned int max_threads:7;
+ } vfe1;
+
+ struct {
+ unsigned int pad4:4;
+ unsigned int interface_descriptor_base:28;
+ } vfe2;
+};
+
+struct gen5_vld_state {
+ struct {
+ unsigned int pad6:6;
+ unsigned int scan_order:1;
+ unsigned int intra_vlc_format:1;
+ unsigned int quantizer_scale_type:1;
+ unsigned int concealment_motion_vector:1;
+ unsigned int frame_predict_frame_dct:1;
+ unsigned int top_field_first:1;
+ unsigned int picture_structure:2;
+ unsigned int intra_dc_precision:2;
+ unsigned int f_code_0_0:4;
+ unsigned int f_code_0_1:4;
+ unsigned int f_code_1_0:4;
+ unsigned int f_code_1_1:4;
+ } vld0;
+
+ struct {
+ unsigned int pad2:9;
+ unsigned int picture_coding_type:2;
+ unsigned int pad:21;
+ } vld1;
+
+ struct {
+ unsigned int index_0:4;
+ unsigned int index_1:4;
+ unsigned int index_2:4;
+ unsigned int index_3:4;
+ unsigned int index_4:4;
+ unsigned int index_5:4;
+ unsigned int index_6:4;
+ unsigned int index_7:4;
+ } desc_remap_table0;
+
+ struct {
+ unsigned int index_8:4;
+ unsigned int index_9:4;
+ unsigned int index_10:4;
+ unsigned int index_11:4;
+ unsigned int index_12:4;
+ unsigned int index_13:4;
+ unsigned int index_14:4;
+ unsigned int index_15:4;
+ } desc_remap_table1;
+};
+
+struct gen5_interface_descriptor {
+ struct {
+ unsigned int grf_reg_blocks:4;
+ unsigned int pad:2;
+ unsigned int kernel_start_pointer:26;
+ } desc0;
+
+ struct {
+ unsigned int pad:7;
+ unsigned int software_exception:1;
+ unsigned int pad2:3;
+ unsigned int maskstack_exception:1;
+ unsigned int pad3:1;
+ unsigned int illegal_opcode_exception:1;
+ unsigned int pad4:2;
+ unsigned int floating_point_mode:1;
+ unsigned int thread_priority:1;
+ unsigned int single_program_flow:1;
+ unsigned int pad5:1;
+ unsigned int const_urb_entry_read_offset:6;
+ unsigned int const_urb_entry_read_len:6;
+ } desc1;
+
+ struct {
+ unsigned int pad:2;
+ unsigned int sampler_count:3;
+ unsigned int sampler_state_pointer:27;
+ } desc2;
+
+ struct {
+ unsigned int binding_table_entry_count:5;
+ unsigned int binding_table_pointer:27;
+ } desc3;
+};
+
+struct gen6_blend_state
+{
+ struct {
+ unsigned int dest_blend_factor:5;
+ unsigned int source_blend_factor:5;
+ unsigned int pad3:1;
+ unsigned int blend_func:3;
+ unsigned int pad2:1;
+ unsigned int ia_dest_blend_factor:5;
+ unsigned int ia_source_blend_factor:5;
+ unsigned int pad1:1;
+ unsigned int ia_blend_func:3;
+ unsigned int pad0:1;
+ unsigned int ia_blend_enable:1;
+ unsigned int blend_enable:1;
+ } blend0;
+
+ struct {
+ unsigned int post_blend_clamp_enable:1;
+ unsigned int pre_blend_clamp_enable:1;
+ unsigned int clamp_range:2;
+ unsigned int pad0:4;
+ unsigned int x_dither_offset:2;
+ unsigned int y_dither_offset:2;
+ unsigned int dither_enable:1;
+ unsigned int alpha_test_func:3;
+ unsigned int alpha_test_enable:1;
+ unsigned int pad1:1;
+ unsigned int logic_op_func:4;
+ unsigned int logic_op_enable:1;
+ unsigned int pad2:1;
+ unsigned int write_disable_b:1;
+ unsigned int write_disable_g:1;
+ unsigned int write_disable_r:1;
+ unsigned int write_disable_a:1;
+ unsigned int pad3:1;
+ unsigned int alpha_to_coverage_dither:1;
+ unsigned int alpha_to_one:1;
+ unsigned int alpha_to_coverage:1;
+ } blend1;
+};
+
+struct gen6_color_calc_state
+{
+ struct {
+ unsigned int alpha_test_format:1;
+ unsigned int pad0:14;
+ unsigned int round_disable:1;
+ unsigned int bf_stencil_ref:8;
+ unsigned int stencil_ref:8;
+ } cc0;
+
+ union {
+ float alpha_ref_f;
+ struct {
+ unsigned int ui:8;
+ unsigned int pad0:24;
+ } alpha_ref_fi;
+ } cc1;
+
+ float constant_r;
+ float constant_g;
+ float constant_b;
+ float constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+ struct {
+ unsigned int pad0:3;
+ unsigned int bf_stencil_pass_depth_pass_op:3;
+ unsigned int bf_stencil_pass_depth_fail_op:3;
+ unsigned int bf_stencil_fail_op:3;
+ unsigned int bf_stencil_func:3;
+ unsigned int bf_stencil_enable:1;
+ unsigned int pad1:2;
+ unsigned int stencil_write_enable:1;
+ unsigned int stencil_pass_depth_pass_op:3;
+ unsigned int stencil_pass_depth_fail_op:3;
+ unsigned int stencil_fail_op:3;
+ unsigned int stencil_func:3;
+ unsigned int stencil_enable:1;
+ } ds0;
+
+ struct {
+ unsigned int bf_stencil_write_mask:8;
+ unsigned int bf_stencil_test_mask:8;
+ unsigned int stencil_write_mask:8;
+ unsigned int stencil_test_mask:8;
+ } ds1;
+
+ struct {
+ unsigned int pad0:26;
+ unsigned int depth_write_enable:1;
+ unsigned int depth_test_func:3;
+ unsigned int pad1:1;
+ unsigned int depth_test_enable:1;
+ } ds2;
+};
+
+typedef enum {
+ SAMPLER_FILTER_NEAREST = 0,
+ SAMPLER_FILTER_BILINEAR,
+ FILTER_COUNT
+} sampler_filter_t;
+
+typedef enum {
+ SAMPLER_EXTEND_NONE = 0,
+ SAMPLER_EXTEND_REPEAT,
+ SAMPLER_EXTEND_PAD,
+ SAMPLER_EXTEND_REFLECT,
+ EXTEND_COUNT
+} sampler_extend_t;
+
+typedef enum {
+ WM_KERNEL = 0,
+ WM_KERNEL_PROJECTIVE,
+
+ WM_KERNEL_MASK,
+ WM_KERNEL_MASK_PROJECTIVE,
+
+ WM_KERNEL_MASKCA,
+ WM_KERNEL_MASKCA_PROJECTIVE,
+
+ WM_KERNEL_MASKCA_SRCALPHA,
+ WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
+
+ WM_KERNEL_VIDEO_PLANAR,
+ WM_KERNEL_VIDEO_PACKED,
+ KERNEL_COUNT
+} wm_kernel_t;
+#endif
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
new file mode 100644
index 00000000..5d8eb77b
--- /dev/null
+++ b/src/sna/gen6_render.c
@@ -0,0 +1,2860 @@
+/*
+ * Copyright © 2006,2008,2011 Intel Corporation
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang@sna.com>
+ * Eric Anholt <eric@anholt.net>
+ * Carl Worth <cworth@redhat.com>
+ * Keith Packard <keithp@keithp.com>
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <xf86.h>
+
+#include "sna.h"
+#include "sna_reg.h"
+#include "sna_render.h"
+#include "sna_render_inline.h"
+#include "sna_video.h"
+
+#include "gen6_render.h"
+
+#if DEBUG_RENDER
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+#define ALWAYS_EMIT_DRAWRECT 1
+
+#define NO_COMPOSITE 0
+#define NO_COPY 0
+#define NO_COPY_BOXES 0
+#define NO_FILL 0
+#define NO_FILL_BOXES 0
+
+static const uint32_t ps_kernel_nomask_affine[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_nomask_projective[][4] = {
+#include "exa_wm_src_projective.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_maskca_affine[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_mask_affine.g6b"
+#include "exa_wm_mask_sample_argb.g6b"
+#include "exa_wm_ca.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_maskca_projective[][4] = {
+#include "exa_wm_src_projective.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_mask_projective.g6b"
+#include "exa_wm_mask_sample_argb.g6b"
+#include "exa_wm_ca.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_a.g6b"
+#include "exa_wm_mask_affine.g6b"
+#include "exa_wm_mask_sample_argb.g6b"
+#include "exa_wm_ca_srcalpha.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = {
+#include "exa_wm_src_projective.g6b"
+#include "exa_wm_src_sample_a.g6b"
+#include "exa_wm_mask_projective.g6b"
+#include "exa_wm_mask_sample_argb.g6b"
+#include "exa_wm_ca_srcalpha.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_masknoca_affine[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_mask_affine.g6b"
+#include "exa_wm_mask_sample_a.g6b"
+#include "exa_wm_noca.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_masknoca_projective[][4] = {
+#include "exa_wm_src_projective.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_mask_projective.g6b"
+#include "exa_wm_mask_sample_a.g6b"
+#include "exa_wm_noca.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_packed[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_yuv_rgb.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_planar[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_planar.g6b"
+#include "exa_wm_yuv_rgb.g6b"
+#include "exa_wm_write.g6b"
+};
+
+#define KERNEL(kernel_enum, kernel, masked) \
+ [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), masked}
+static const struct wm_kernel_info {
+ const char *name;
+ const void *data;
+ unsigned int size;
+ Bool has_mask;
+} wm_kernels[] = {
+ KERNEL(NOMASK, ps_kernel_nomask_affine, FALSE),
+ KERNEL(NOMASK_PROJECTIVE, ps_kernel_nomask_projective, FALSE),
+
+ KERNEL(MASK, ps_kernel_masknoca_affine, TRUE),
+ KERNEL(MASK_PROJECTIVE, ps_kernel_masknoca_projective, TRUE),
+
+ KERNEL(MASKCA, ps_kernel_maskca_affine, TRUE),
+ KERNEL(MASKCA_PROJECTIVE, ps_kernel_maskca_projective, TRUE),
+
+ KERNEL(MASKCA_SRCALPHA, ps_kernel_maskca_srcalpha_affine, TRUE),
+ KERNEL(MASKCA_SRCALPHA_PROJECTIVE, ps_kernel_maskca_srcalpha_projective, TRUE),
+
+ KERNEL(VIDEO_PLANAR, ps_kernel_planar, FALSE),
+ KERNEL(VIDEO_PACKED, ps_kernel_packed, FALSE),
+};
+#undef KERNEL
+
+static const struct blendinfo {
+ Bool src_alpha;
+ uint32_t src_blend;
+ uint32_t dst_blend;
+} gen6_blend_op[] = {
+ /* Clear */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO},
+ /* Src */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO},
+ /* Dst */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE},
+ /* Over */ {1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
+ /* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE},
+ /* In */ {0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
+ /* InReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA},
+ /* Out */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
+ /* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
+ /* Atop */ {1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
+ /* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA},
+ /* Xor */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
+ /* Add */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE},
+};
+
+/**
+ * Highest-valued BLENDFACTOR used in gen6_blend_op.
+ *
+ * This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR,
+ * GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
+ * GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
+ */
+#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1)
+
+/* FIXME: surface format defined in gen6_defines.h, shared Sampling engine
+ * 1.7.2
+ */
+static const struct formatinfo {
+ CARD32 pict_fmt;
+ uint32_t card_fmt;
+} gen6_tex_formats[] = {
+ {PICT_a8, GEN6_SURFACEFORMAT_A8_UNORM},
+ {PICT_a8r8g8b8, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM},
+ {PICT_x8r8g8b8, GEN6_SURFACEFORMAT_B8G8R8X8_UNORM},
+ {PICT_a8b8g8r8, GEN6_SURFACEFORMAT_R8G8B8A8_UNORM},
+ {PICT_x8b8g8r8, GEN6_SURFACEFORMAT_R8G8B8X8_UNORM},
+ {PICT_r8g8b8, GEN6_SURFACEFORMAT_R8G8B8_UNORM},
+ {PICT_r5g6b5, GEN6_SURFACEFORMAT_B5G6R5_UNORM},
+ {PICT_a1r5g5b5, GEN6_SURFACEFORMAT_B5G5R5A1_UNORM},
+ {PICT_a2r10g10b10, GEN6_SURFACEFORMAT_B10G10R10A2_UNORM},
+ {PICT_x2r10g10b10, GEN6_SURFACEFORMAT_B10G10R10X2_UNORM},
+ {PICT_a2b10g10r10, GEN6_SURFACEFORMAT_R10G10B10A2_UNORM},
+ {PICT_x2r10g10b10, GEN6_SURFACEFORMAT_B10G10R10X2_UNORM},
+ {PICT_a4r4g4b4, GEN6_SURFACEFORMAT_B4G4R4A4_UNORM},
+};
+
+#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64)
+
+#define BLEND_OFFSET(s, d) \
+ (((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE)
+
+#define SAMPLER_OFFSET(sf, se, mf, me) \
+ (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * 2 * sizeof(struct gen6_sampler_state))
+
+#define OUT_BATCH(v) batch_emit(sna, v)
+#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
+#define OUT_VERTEX_F(v) vertex_emit(sna, v)
+
+static uint32_t gen6_get_blend(int op,
+ Bool has_component_alpha,
+ uint32_t dst_format)
+{
+ uint32_t src, dst;
+
+ src = gen6_blend_op[op].src_blend;
+ dst = gen6_blend_op[op].dst_blend;
+
+ /* If there's no dst alpha channel, adjust the blend op so that
+ * we'll treat it always as 1.
+ */
+ if (PICT_FORMAT_A(dst_format) == 0) {
+ if (src == GEN6_BLENDFACTOR_DST_ALPHA)
+ src = GEN6_BLENDFACTOR_ONE;
+ else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA)
+ src = GEN6_BLENDFACTOR_ZERO;
+ }
+
+ /* If the source alpha is being used, then we should only be in a
+ * case where the source blend factor is 0, and the source blend
+ * value is the mask channels multiplied by the source picture's alpha.
+ */
+ if (has_component_alpha && gen6_blend_op[op].src_alpha) {
+ if (dst == GEN6_BLENDFACTOR_SRC_ALPHA)
+ dst = GEN6_BLENDFACTOR_SRC_COLOR;
+ else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA)
+ dst = GEN6_BLENDFACTOR_INV_SRC_COLOR;
+ }
+
+ DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
+ op, dst_format, PICT_FORMAT_A(dst_format),
+ src, dst, (int)BLEND_OFFSET(src, dst)));
+ return BLEND_OFFSET(src, dst);
+}
+
+static uint32_t gen6_get_dest_format(PictFormat format)
+{
+ switch (format) {
+ default:
+ assert(0);
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
+ case PICT_a8b8g8r8:
+ case PICT_x8b8g8r8:
+ return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
+ case PICT_r5g6b5:
+ return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
+ case PICT_x1r5g5b5:
+ case PICT_a1r5g5b5:
+ return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
+ case PICT_a8:
+ return GEN6_SURFACEFORMAT_A8_UNORM;
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
+ }
+}
+
+static Bool gen6_check_dst_format(PictFormat format)
+{
+ switch (format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ case PICT_a8b8g8r8:
+ case PICT_x8b8g8r8:
+ case PICT_a2r10g10b10:
+ case PICT_x2r10g10b10:
+ case PICT_r5g6b5:
+ case PICT_x1r5g5b5:
+ case PICT_a1r5g5b5:
+ case PICT_a8:
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static uint32_t gen6_get_dest_format_for_depth(int depth)
+{
+ switch (depth) {
+ default: assert(0);
+ case 32:
+ case 24: return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
+ case 30: return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
+ case 16: return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
+ case 8: return GEN6_SURFACEFORMAT_A8_UNORM;
+ }
+}
+
+static uint32_t gen6_get_card_format_for_depth(int depth)
+{
+ switch (depth) {
+ default: assert(0);
+ case 32: return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
+ case 30: return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
+ case 24: return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
+ case 16: return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
+ case 8: return GEN6_SURFACEFORMAT_A8_UNORM;
+ }
+}
+
+static bool gen6_format_is_dst(uint32_t format)
+{
+ switch (format) {
+ case GEN6_SURFACEFORMAT_B8G8R8A8_UNORM:
+ case GEN6_SURFACEFORMAT_R8G8B8A8_UNORM:
+ case GEN6_SURFACEFORMAT_B10G10R10A2_UNORM:
+ case GEN6_SURFACEFORMAT_B5G6R5_UNORM:
+ case GEN6_SURFACEFORMAT_B5G5R5A1_UNORM:
+ case GEN6_SURFACEFORMAT_A8_UNORM:
+ case GEN6_SURFACEFORMAT_B4G4R4A4_UNORM:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint32_t gen6_filter(uint32_t filter)
+{
+ switch (filter) {
+ default:
+ assert(0);
+ case PictFilterNearest:
+ return SAMPLER_FILTER_NEAREST;
+ case PictFilterBilinear:
+ return SAMPLER_FILTER_BILINEAR;
+ }
+}
+
+static uint32_t gen6_check_filter(PicturePtr picture)
+{
+ switch (picture->filter) {
+ case PictFilterNearest:
+ case PictFilterBilinear:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static uint32_t gen6_repeat(uint32_t repeat)
+{
+ switch (repeat) {
+ default:
+ assert(0);
+ case RepeatNone:
+ return SAMPLER_EXTEND_NONE;
+ case RepeatNormal:
+ return SAMPLER_EXTEND_REPEAT;
+ case RepeatPad:
+ return SAMPLER_EXTEND_PAD;
+ case RepeatReflect:
+ return SAMPLER_EXTEND_REFLECT;
+ }
+}
+
+static bool gen6_check_repeat(PicturePtr picture)
+{
+ if (!picture->repeat)
+ return TRUE;
+
+ switch (picture->repeatType) {
+ case RepeatNone:
+ case RepeatNormal:
+ case RepeatPad:
+ case RepeatReflect:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static int
+gen6_choose_composite_kernel(int op, Bool has_mask, Bool is_ca, Bool is_affine)
+{
+ int base;
+
+ if (has_mask) {
+ if (is_ca) {
+ if (gen6_blend_op[op].src_alpha)
+ base = GEN6_WM_KERNEL_MASKCA_SRCALPHA;
+ else
+ base = GEN6_WM_KERNEL_MASKCA;
+ } else
+ base = GEN6_WM_KERNEL_MASK;
+ } else
+ base = GEN6_WM_KERNEL_NOMASK;
+
+ return base + !is_affine;
+}
+
+static void
+gen6_emit_sip(struct sna *sna)
+{
+ /* Set system instruction pointer */
+ OUT_BATCH(GEN6_STATE_SIP | 0);
+ OUT_BATCH(0);
+}
+
+static void
+gen6_emit_urb(struct sna *sna)
+{
+ OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
+ OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
+ (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
+ OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
+ (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
+}
+
+static void
+gen6_emit_state_base_address(struct sna *sna)
+{
+ OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2));
+ OUT_BATCH(0); /* general */
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
+ sna->kgem.nbatch,
+ NULL,
+ I915_GEM_DOMAIN_INSTRUCTION << 16,
+ BASE_ADDRESS_MODIFY));
+ OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
+ sna->kgem.nbatch,
+ sna->render_state.gen6.general_bo,
+ I915_GEM_DOMAIN_INSTRUCTION << 16,
+ BASE_ADDRESS_MODIFY));
+ OUT_BATCH(0); /* indirect */
+ OUT_BATCH(kgem_add_reloc(&sna->kgem,
+ sna->kgem.nbatch,
+ sna->render_state.gen6.general_bo,
+ I915_GEM_DOMAIN_INSTRUCTION << 16,
+ BASE_ADDRESS_MODIFY));
+
+ /* upper bounds, disable */
+ OUT_BATCH(0);
+ OUT_BATCH(BASE_ADDRESS_MODIFY);
+ OUT_BATCH(0);
+ OUT_BATCH(BASE_ADDRESS_MODIFY);
+}
+
+static void
+gen6_emit_viewports(struct sna *sna)
+{
+ OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
+ GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
+ (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(sna->render_state.gen6.cc_vp);
+}
+
+static void
+gen6_emit_vs(struct sna *sna)
+{
+ /* disable VS constant buffer */
+ OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
+ OUT_BATCH(0); /* no VS kernel */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* pass-through */
+}
+
+static void
+gen6_emit_gs(struct sna *sna)
+{
+ /* disable GS constant buffer */
+ OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
+ OUT_BATCH(0); /* no GS kernel */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* pass-through */
+}
+
+static void
+gen6_emit_clip(struct sna *sna)
+{
+ OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* pass-through */
+ OUT_BATCH(0);
+}
+
+static void
+gen6_emit_wm_constants(struct sna *sna)
+{
+ /* disable WM constant buffer */
+ OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+}
+
+static void
+gen6_emit_null_depth_buffer(struct sna *sna)
+{
+ OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2));
+ OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
+ GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2));
+ OUT_BATCH(0);
+}
+
+static void
+gen6_emit_invariant(struct sna *sna)
+{
+ OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+ OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
+ OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
+ GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
+ OUT_BATCH(0);
+
+ OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
+ OUT_BATCH(1);
+
+ gen6_emit_sip(sna);
+ gen6_emit_urb(sna);
+
+ gen6_emit_state_base_address(sna);
+
+ gen6_emit_viewports(sna);
+ gen6_emit_vs(sna);
+ gen6_emit_gs(sna);
+ gen6_emit_clip(sna);
+ gen6_emit_wm_constants(sna);
+ gen6_emit_null_depth_buffer(sna);
+
+ sna->render_state.gen6.needs_invariant = FALSE;
+}
+
+static void
+gen6_emit_cc(struct sna *sna, uint32_t blend_offset)
+{
+ struct gen6_render_state *render = &sna->render_state.gen6;
+
+ if (render->blend == blend_offset)
+ return;
+
+ OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
+ OUT_BATCH((render->cc_blend + blend_offset) | 1);
+ if (render->blend == -1) {
+ OUT_BATCH(1);
+ OUT_BATCH(1);
+ } else {
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ }
+
+ render->blend = blend_offset;
+}
+
+static void
+gen6_emit_sampler(struct sna *sna, uint32_t state)
+{
+ assert(state <
+ 2 * sizeof(struct gen6_sampler_state) *
+ FILTER_COUNT * EXTEND_COUNT *
+ FILTER_COUNT * EXTEND_COUNT);
+
+ if (sna->render_state.gen6.samplers == state)
+ return;
+
+ sna->render_state.gen6.samplers = state;
+
+ OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
+ GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
+ (4 - 2));
+ OUT_BATCH(0); /* VS */
+ OUT_BATCH(0); /* GS */
+ OUT_BATCH(sna->render_state.gen6.wm_state + state);
+}
+
+static void
+gen6_emit_sf(struct sna *sna, Bool has_mask)
+{
+ int num_sf_outputs = has_mask ? 2 : 1;
+
+ if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs)
+ return;
+
+ DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
+ __FUNCTION__, num_sf_outputs, 1, 0));
+
+ sna->render_state.gen6.num_sf_outputs = num_sf_outputs;
+
+ OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
+ OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT |
+ 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+ 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT);
+ OUT_BATCH(0);
+ OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
+ OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* DW9 */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* DW14 */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* DW19 */
+}
+
+static void
+gen6_emit_wm(struct sna *sna, int kernel, int nr_surfaces, int nr_inputs)
+{
+ if (sna->render_state.gen6.kernel == kernel)
+ return;
+
+ sna->render_state.gen6.kernel = kernel;
+
+ DBG(("%s: switching to %s\n", __FUNCTION__, wm_kernels[kernel].name));
+
+ OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
+ OUT_BATCH(sna->render_state.gen6.wm_kernel[kernel]);
+ OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
+ nr_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
+ OUT_BATCH(0);
+ OUT_BATCH(6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT); /* DW4 */
+ OUT_BATCH((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT |
+ GEN6_3DSTATE_WM_DISPATCH_ENABLE |
+ GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
+ OUT_BATCH(nr_inputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT |
+ GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+}
+
+static bool
+gen6_emit_binding_table(struct sna *sna, uint16_t offset)
+{
+ if (sna->render_state.gen6.surface_table == offset)
+ return false;
+
+ /* Binding table pointers */
+ OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS |
+ GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
+ (4 - 2));
+ OUT_BATCH(0); /* vs */
+ OUT_BATCH(0); /* gs */
+ /* Only the PS uses the binding table */
+ OUT_BATCH(offset*4);
+
+ sna->render_state.gen6.surface_table = offset;
+ return true;
+}
+
+static void
+gen6_emit_drawing_rectangle(struct sna *sna,
+ const struct sna_composite_op *op,
+ bool force)
+{
+ uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
+ uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
+
+ if (!force &&
+ sna->render_state.gen6.drawrect_limit == limit &&
+ sna->render_state.gen6.drawrect_offset == offset)
+ return;
+
+ sna->render_state.gen6.drawrect_offset = offset;
+ sna->render_state.gen6.drawrect_limit = limit;
+
+ OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(limit);
+ OUT_BATCH(offset);
+}
+
+static void
+gen6_emit_vertex_elements(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ /*
+ * vertex data in vertex buffer
+ * position: (x, y)
+ * texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0)
+ * texture coordinate 1 if (has_mask is TRUE): same as above
+ */
+ struct gen6_render_state *render = &sna->render_state.gen6;
+ int nelem = op->mask.bo ? 2 : 1;
+ int selem = op->is_affine ? 2 : 3;
+ uint32_t w_component;
+ uint32_t src_format;
+ int id = op->u.gen6.ve_id;
+
+ if (render->ve_id == id)
+ return;
+ render->ve_id = id;
+
+ if (op->is_affine) {
+ src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
+ w_component = GEN6_VFCOMPONENT_STORE_1_FLT;
+ } else {
+ src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
+ w_component = GEN6_VFCOMPONENT_STORE_SRC;
+ }
+
+ /* The VUE layout
+ * dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
+ * dword 4-7: position (x, y, 1.0, 1.0),
+ * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
+ * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
+ *
+ * dword 4-15 are fetched from vertex buffer
+ */
+ OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS |
+ ((2 * (2 + nelem)) + 1 - 2));
+
+ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+ GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
+ 0 << VE0_OFFSET_SHIFT);
+ OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
+ GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
+ GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
+ GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
+
+ /* x,y */
+ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+ GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
+ 0 << VE0_OFFSET_SHIFT); /* offsets vb in bytes */
+ OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
+ GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
+ GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
+ GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
+
+ /* u0, v0, w0 */
+ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+ src_format << VE0_FORMAT_SHIFT |
+ 4 << VE0_OFFSET_SHIFT); /* offset vb in bytes */
+ OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
+ GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
+ w_component << VE1_VFCOMPONENT_2_SHIFT |
+ GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
+
+ /* u1, v1, w1 */
+ if (op->mask.bo) {
+ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
+ src_format << VE0_FORMAT_SHIFT |
+ ((1 + selem) * 4) << VE0_OFFSET_SHIFT); /* vb offset in bytes */
+ OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
+ GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
+ w_component << VE1_VFCOMPONENT_2_SHIFT |
+ GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
+ }
+}
+
+static void
+gen6_emit_state(struct sna *sna,
+ const struct sna_composite_op *op,
+ uint16_t wm_binding_table)
+
+{
+ bool need_flush =
+ (sna->kgem.batch[sna->kgem.nbatch-1] & (0xff<<23)) != MI_FLUSH;
+
+ gen6_emit_cc(sna,
+ gen6_get_blend(op->op,
+ op->has_component_alpha,
+ op->dst.format));
+
+ DBG(("%s: sampler src=(%d, %d), mask=(%d, %d), offset=%d\n",
+ __FUNCTION__,
+ op->src.filter, op->src.repeat,
+ op->mask.filter, op->mask.repeat,
+ (int)SAMPLER_OFFSET(op->src.filter,
+ op->src.repeat,
+ op->mask.filter,
+ op->mask.repeat)));
+ gen6_emit_sampler(sna,
+ SAMPLER_OFFSET(op->src.filter,
+ op->src.repeat,
+ op->mask.filter,
+ op->mask.repeat));
+ gen6_emit_sf(sna, op->mask.bo != NULL);
+ gen6_emit_wm(sna,
+ op->u.gen6.wm_kernel,
+ op->u.gen6.nr_surfaces,
+ op->u.gen6.nr_inputs);
+ gen6_emit_vertex_elements(sna, op);
+
+ /* XXX updating the binding table requires a non-pipelined cmd? */
+ need_flush &= gen6_emit_binding_table(sna, wm_binding_table);
+ gen6_emit_drawing_rectangle(sna, op, need_flush);
+}
+
+static void gen6_magic_ca_pass(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ struct gen6_render_state *state = &sna->render_state.gen6;
+
+ if (!op->need_magic_ca_pass)
+ return;
+
+ DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
+ sna->render.vertex_start, sna->render.vertex_index));
+
+ gen6_emit_cc(sna,
+ gen6_get_blend(PictOpAdd, TRUE, op->dst.format));
+ gen6_emit_wm(sna,
+ gen6_choose_composite_kernel(PictOpAdd,
+ TRUE, TRUE,
+ op->is_affine),
+ 3, 2);
+
+ OUT_BATCH(GEN6_3DPRIMITIVE |
+ GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+ _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
+ 0 << 9 |
+ 4);
+ OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
+ OUT_BATCH(sna->render.vertex_start);
+ OUT_BATCH(1); /* single instance */
+ OUT_BATCH(0); /* start instance location */
+ OUT_BATCH(0); /* index buffer offset, ignored */
+
+ state->last_primitive = sna->kgem.nbatch;
+}
+
+static void gen6_vertex_flush(struct sna *sna)
+{
+ if (sna->render_state.gen6.vertex_offset == 0)
+ return;
+
+ DBG(("%s[%x] = %d\n", __FUNCTION__,
+ 4*sna->render_state.gen6.vertex_offset,
+ sna->render.vertex_index - sna->render.vertex_start));
+ sna->kgem.batch[sna->render_state.gen6.vertex_offset] =
+ sna->render.vertex_index - sna->render.vertex_start;
+ sna->render_state.gen6.vertex_offset = 0;
+
+ if (sna->render.op)
+ gen6_magic_ca_pass(sna, sna->render.op);
+}
+
+static void gen6_vertex_finish(struct sna *sna, Bool last)
+{
+ struct kgem_bo *bo;
+ int i, delta;
+
+ gen6_vertex_flush(sna);
+ if (!sna->render.vertex_used)
+ return;
+
+ /* Note: we only need dword alignment (currently) */
+
+ if (last && sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
+ DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
+ sna->render.vertex_used, sna->kgem.nbatch));
+ memcpy(sna->kgem.batch + sna->kgem.nbatch,
+ sna->render.vertex_data,
+ sna->render.vertex_used * 4);
+ delta = sna->kgem.nbatch * 4;
+ bo = NULL;
+ sna->kgem.nbatch += sna->render.vertex_used;
+ } else {
+ bo = kgem_create_linear(&sna->kgem, 4*sna->render.vertex_used);
+ if (bo && !kgem_bo_write(&sna->kgem, bo,
+ sna->render.vertex_data,
+ 4*sna->render.vertex_used)) {
+ kgem_bo_destroy(&sna->kgem, bo);
+ return;
+ }
+ delta = 0;
+ DBG(("%s: new vbo: %d\n", __FUNCTION__,
+ sna->render.vertex_used));
+ }
+
+ for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) {
+ if (sna->render.vertex_reloc[i]) {
+ DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
+ i, sna->render.vertex_reloc[i]));
+
+ sna->kgem.batch[sna->render.vertex_reloc[i]] =
+ kgem_add_reloc(&sna->kgem,
+ sna->render.vertex_reloc[i],
+ bo,
+ I915_GEM_DOMAIN_VERTEX << 16,
+ delta);
+ sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
+ kgem_add_reloc(&sna->kgem,
+ sna->render.vertex_reloc[i]+1,
+ bo,
+ I915_GEM_DOMAIN_VERTEX << 16,
+ delta + sna->render.vertex_used * 4 - 1);
+ sna->render.vertex_reloc[i] = 0;
+ }
+ }
+
+ if (bo)
+ kgem_bo_destroy(&sna->kgem, bo);
+
+ sna->render.vertex_used = 0;
+ sna->render.vertex_index = 0;
+ sna->render_state.gen6.vb_id = 0;
+}
+
+typedef struct gen6_surface_state_padded {
+ struct gen6_surface_state state;
+ char pad[32 - sizeof(struct gen6_surface_state)];
+} gen6_surface_state_padded;
+
+static void null_create(struct sna_static_stream *stream)
+{
+ /* A bunch of zeros useful for legacy border color and depth-stencil */
+ sna_static_stream_map(stream, 64, 64);
+}
+
+static void
+sampler_state_init(struct gen6_sampler_state *sampler_state,
+ sampler_filter_t filter,
+ sampler_extend_t extend)
+{
+ sampler_state->ss0.lod_preclamp = 1; /* GL mode */
+
+ /* We use the legacy mode to get the semantics specified by
+ * the Render extension. */
+ sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY;
+
+ switch (filter) {
+ default:
+ case SAMPLER_FILTER_NEAREST:
+ sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST;
+ sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
+ break;
+ case SAMPLER_FILTER_BILINEAR:
+ sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR;
+ sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR;
+ break;
+ }
+
+ switch (extend) {
+ default:
+ case SAMPLER_EXTEND_NONE:
+ sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+ sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+ sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+ break;
+ case SAMPLER_EXTEND_REPEAT:
+ sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
+ sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
+ sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
+ break;
+ case SAMPLER_EXTEND_PAD:
+ sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
+ sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
+ sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
+ break;
+ case SAMPLER_EXTEND_REFLECT:
+ sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
+ sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
+ sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
+ break;
+ }
+}
+
+static uint32_t gen6_create_cc_viewport(struct sna_static_stream *stream)
+{
+ struct gen6_cc_viewport vp;
+
+ vp.min_depth = -1.e35;
+ vp.max_depth = 1.e35;
+
+ return sna_static_stream_add(stream, &vp, sizeof(vp), 32);
+}
+
+static uint32_t gen6_get_card_format(PictFormat format)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(gen6_tex_formats); i++) {
+ if (gen6_tex_formats[i].pict_fmt == format)
+ return gen6_tex_formats[i].card_fmt;
+ }
+ return -1;
+}
+
+static uint32_t
+gen6_tiling_bits(uint32_t tiling)
+{
+ switch (tiling) {
+ default: assert(0);
+ case I915_TILING_NONE: return 0;
+ case I915_TILING_X: return GEN6_SURFACE_TILED;
+ case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y;
+ }
+}
+
+/**
+ * Sets up the common fields for a surface state buffer for the given
+ * picture in the given surface state buffer.
+ */
+static int
+gen6_bind_bo(struct sna *sna,
+ struct kgem_bo *bo,
+ uint32_t width,
+ uint32_t height,
+ uint32_t format,
+ Bool is_dst)
+{
+ uint32_t *ss;
+ uint32_t domains;
+ uint16_t offset;
+
+ /* After the first bind, we manage the cache domains within the batch */
+ if (is_dst) {
+ domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
+ kgem_bo_mark_dirty(bo);
+ } else {
+ domains = I915_GEM_DOMAIN_SAMPLER << 16;
+ is_dst = gen6_format_is_dst(format);
+ }
+
+ offset = sna->kgem.surface - sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
+ offset *= sizeof(uint32_t);
+
+ if (is_dst) {
+ if (bo->dst_bound)
+ return bo->dst_bound;
+
+ bo->dst_bound = offset;
+ } else {
+ if (bo->src_bound)
+ return bo->src_bound;
+
+ bo->src_bound = offset;
+ }
+
+ sna->kgem.surface -=
+ sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
+ ss = sna->kgem.batch + sna->kgem.surface;
+ ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT |
+ GEN6_SURFACE_BLEND_ENABLED |
+ format << GEN6_SURFACE_FORMAT_SHIFT);
+ ss[1] = kgem_add_reloc(&sna->kgem,
+ sna->kgem.surface + 1,
+ bo, domains, 0);
+ ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT |
+ (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT);
+ ss[3] = (gen6_tiling_bits(bo->tiling) |
+ (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT);
+ ss[4] = 0;
+ ss[5] = 0;
+
+ DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
+ offset, bo->handle, ss[1],
+ format, width, height, bo->pitch, bo->tiling,
+ domains & 0xffff ? "render" : "sampler"));
+
+ return offset;
+}
+
+fastcall static void
+gen6_emit_composite_primitive_solid(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = 1.;
+ v[2] = 1.;
+
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ v[4] = 0.;
+ v[5] = 1.;
+
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+ v[7] = 0.;
+ v[8] = 0.;
+}
+
+fastcall static void
+gen6_emit_composite_primitive_identity_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float *v;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+
+ v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
+ v[1] = v[4] + r->width * op->src.scale[0];
+
+ v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
+ v[5] = v[2] = v[8] + r->height * op->src.scale[1];
+}
+
+fastcall static void
+gen6_emit_composite_primitive_affine_source(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float *v;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x + r->width,
+ op->src.offset[1] + r->src.y + r->height,
+ op->src.transform,
+ &v[1], &v[2]);
+ v[1] *= op->src.scale[0];
+ v[2] *= op->src.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
+ op->src.offset[1] + r->src.y + r->height,
+ op->src.transform,
+ &v[4], &v[5]);
+ v[4] *= op->src.scale[0];
+ v[5] *= op->src.scale[1];
+
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+ _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x,
+ op->src.offset[1] + r->src.y,
+ op->src.transform,
+ &v[7], &v[8]);
+ v[7] *= op->src.scale[0];
+ v[8] *= op->src.scale[1];
+}
+
+fastcall static void
+gen6_emit_composite_primitive_identity_source_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float src_x, src_y;
+ float msk_x, msk_y;
+ float w, h;
+ float *v;
+
+ src_x = r->src.x + op->src.offset[0];
+ src_y = r->src.y + op->src.offset[1];
+ msk_x = r->mask.x + op->mask.offset[0];
+ msk_y = r->mask.y + op->mask.offset[1];
+ w = r->width;
+ h = r->height;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 15;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = (src_x + w) * op->src.scale[0];
+ v[2] = (src_y + h) * op->src.scale[1];
+ v[3] = (msk_x + w) * op->mask.scale[0];
+ v[4] = (msk_y + h) * op->mask.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[5] = dst.f;
+ v[6] = src_x * op->src.scale[0];
+ v[7] = v[2];
+ v[8] = msk_x * op->mask.scale[0];
+ v[9] = v[4];
+
+ dst.p.y = r->dst.y;
+ v[10] = dst.f;
+ v[11] = v[6];
+ v[12] = src_y * op->src.scale[1];
+ v[13] = v[8];
+ v[14] = msk_y * op->mask.scale[1];
+}
+
+fastcall static void
+gen6_emit_composite_primitive(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
+ Bool is_affine = op->is_affine;
+ const float *src_sf = op->src.scale;
+ const float *mask_sf = op->mask.scale;
+
+ if (is_affine) {
+ sna_get_transformed_coordinates(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1],
+ op->src.transform,
+ &src_x[0],
+ &src_y[0]);
+
+ sna_get_transformed_coordinates(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[1],
+ &src_y[1]);
+
+ sna_get_transformed_coordinates(r->src.x + op->src.offset[0] + r->width,
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[2],
+ &src_y[2]);
+ } else {
+ if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1],
+ op->src.transform,
+ &src_x[0],
+ &src_y[0],
+ &src_w[0]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0],
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[1],
+ &src_y[1],
+ &src_w[1]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0] + r->width,
+ r->src.y + op->src.offset[1] + r->height,
+ op->src.transform,
+ &src_x[2],
+ &src_y[2],
+ &src_w[2]))
+ return;
+ }
+
+ if (op->mask.bo) {
+ if (is_affine) {
+ sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1],
+ op->mask.transform,
+ &mask_x[0],
+ &mask_y[0]);
+
+ sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[1],
+ &mask_y[1]);
+
+ sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0] + r->width,
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[2],
+ &mask_y[2]);
+ } else {
+ if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1],
+ op->mask.transform,
+ &mask_x[0],
+ &mask_y[0],
+ &mask_w[0]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0],
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[1],
+ &mask_y[1],
+ &mask_w[1]))
+ return;
+
+ if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0] + r->width,
+ r->mask.y + op->mask.offset[1] + r->height,
+ op->mask.transform,
+ &mask_x[2],
+ &mask_y[2],
+ &mask_w[2]))
+ return;
+ }
+ }
+
+ OUT_VERTEX(r->dst.x + r->width, r->dst.y + r->height);
+ OUT_VERTEX_F(src_x[2] * src_sf[0]);
+ OUT_VERTEX_F(src_y[2] * src_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(src_w[2]);
+ if (op->mask.bo) {
+ OUT_VERTEX_F(mask_x[2] * mask_sf[0]);
+ OUT_VERTEX_F(mask_y[2] * mask_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(mask_w[2]);
+ }
+
+ OUT_VERTEX(r->dst.x, r->dst.y + r->height);
+ OUT_VERTEX_F(src_x[1] * src_sf[0]);
+ OUT_VERTEX_F(src_y[1] * src_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(src_w[1]);
+ if (op->mask.bo) {
+ OUT_VERTEX_F(mask_x[1] * mask_sf[0]);
+ OUT_VERTEX_F(mask_y[1] * mask_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(mask_w[1]);
+ }
+
+ OUT_VERTEX(r->dst.x, r->dst.y);
+ OUT_VERTEX_F(src_x[0] * src_sf[0]);
+ OUT_VERTEX_F(src_y[0] * src_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(src_w[0]);
+ if (op->mask.bo) {
+ OUT_VERTEX_F(mask_x[0] * mask_sf[0]);
+ OUT_VERTEX_F(mask_y[0] * mask_sf[1]);
+ if (!is_affine)
+ OUT_VERTEX_F(mask_w[0]);
+ }
+}
+
+static void gen6_emit_vertex_buffer(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ int id = op->u.gen6.ve_id;
+
+ OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3);
+ OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
+ 4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT);
+ sna->render.vertex_reloc[id] = sna->kgem.nbatch;
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ sna->render_state.gen6.vb_id |= 1 << id;
+}
+
+static void gen6_emit_primitive(struct sna *sna)
+{
+ if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) {
+ sna->render_state.gen6.vertex_offset = sna->kgem.nbatch - 5;
+ return;
+ }
+
+ OUT_BATCH(GEN6_3DPRIMITIVE |
+ GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+ _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
+ 0 << 9 |
+ 4);
+ sna->render_state.gen6.vertex_offset = sna->kgem.nbatch;
+ OUT_BATCH(0); /* vertex count, to be filled in later */
+ OUT_BATCH(sna->render.vertex_index);
+ OUT_BATCH(1); /* single instance */
+ OUT_BATCH(0); /* start instance location */
+ OUT_BATCH(0); /* index buffer offset, ignored */
+ sna->render.vertex_start = sna->render.vertex_index;
+
+ sna->render_state.gen6.last_primitive = sna->kgem.nbatch;
+}
+
+static bool gen6_rectangle_begin(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ int id = op->u.gen6.ve_id;
+ int ndwords;
+
+ ndwords = 0;
+ if ((sna->render_state.gen6.vb_id & (1 << id)) == 0)
+ ndwords += 5;
+ if (sna->render_state.gen6.vertex_offset == 0)
+ ndwords += op->need_magic_ca_pass ? 60 : 6;
+ if (ndwords == 0)
+ return true;
+
+ if (!kgem_check_batch(&sna->kgem, ndwords))
+ return false;
+
+ if ((sna->render_state.gen6.vb_id & (1 << id)) == 0)
+ gen6_emit_vertex_buffer(sna, op);
+ if (sna->render_state.gen6.vertex_offset == 0)
+ gen6_emit_primitive(sna);
+
+ return true;
+}
+
+static int gen6_get_rectangles__flush(struct sna *sna, bool ca)
+{
+ if (!kgem_check_batch(&sna->kgem, ca ? 65 : 5))
+ return 0;
+ if (sna->kgem.nexec > KGEM_EXEC_SIZE(&sna->kgem) - 1)
+ return 0;
+ if (sna->kgem.nreloc > KGEM_RELOC_SIZE(&sna->kgem) - 1)
+ return 0;
+
+ gen6_vertex_finish(sna, FALSE);
+ sna->render.vertex_index = 0;
+
+ return ARRAY_SIZE(sna->render.vertex_data);
+}
+
+inline static int gen6_get_rectangles(struct sna *sna,
+ const struct sna_composite_op *op,
+ int want)
+{
+ int rem = vertex_space(sna);
+
+ if (rem < 3*op->floats_per_vertex) {
+ DBG(("flushing vbo for %s: %d < %d\n",
+ __FUNCTION__, rem, 3*op->floats_per_vertex));
+ rem = gen6_get_rectangles__flush(sna, op->need_magic_ca_pass);
+ if (rem == 0)
+ return 0;
+ }
+
+ if (!gen6_rectangle_begin(sna, op))
+ return 0;
+
+ if (want > 1 && want * op->floats_per_vertex*3 > rem)
+ want = rem / (3*op->floats_per_vertex);
+
+ sna->render.vertex_index += 3*want;
+ return want;
+}
+
+inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna,
+ const struct sna_composite_op *op,
+ uint16_t *offset)
+{
+ uint32_t *table;
+
+ sna->kgem.surface -=
+ sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
+ /* Clear all surplus entries to zero in case of prefetch */
+ table = memset(sna->kgem.batch + sna->kgem.surface,
+ 0, sizeof(struct gen6_surface_state_padded));
+
+ DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
+
+ *offset = sna->kgem.surface;
+ return table;
+}
+
+static uint32_t
+gen6_choose_composite_vertex_buffer(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ int has_mask = op->mask.bo != NULL;
+ int is_affine = op->is_affine;
+ return has_mask << 1 | is_affine;
+}
+
+static void
+gen6_get_batch(struct sna *sna)
+{
+ kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+ if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
+ DBG(("%s: flushing batch: %d < %d+%d\n",
+ __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
+ 150, 4*8));
+ kgem_submit(&sna->kgem);
+ }
+
+ if (sna->render_state.gen6.needs_invariant)
+ gen6_emit_invariant(sna);
+}
+
+static void gen6_emit_composite_state(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ uint32_t *binding_table;
+ uint16_t offset;
+
+ gen6_get_batch(sna);
+
+ binding_table = gen6_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen6_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen6_get_dest_format(op->dst.format),
+ TRUE);
+ binding_table[1] =
+ gen6_bind_bo(sna,
+ op->src.bo, op->src.width, op->src.height,
+ op->src.card_format,
+ FALSE);
+ if (op->mask.bo) {
+ binding_table[2] =
+ gen6_bind_bo(sna,
+ op->mask.bo,
+ op->mask.width,
+ op->mask.height,
+ op->mask.card_format,
+ FALSE);
+ }
+
+ if (sna->kgem.surface == offset&&
+ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table &&
+ (op->mask.bo == NULL ||
+ sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) {
+ sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
+ offset = sna->render_state.gen6.surface_table;
+ }
+
+ gen6_emit_state(sna, op, offset);
+}
+
+static void
+gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op)
+{
+ if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) {
+ DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
+ sna->render_state.gen6.floats_per_vertex,
+ op->floats_per_vertex,
+ sna->render.vertex_index,
+ (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
+ sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
+ sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
+ sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex;
+ }
+}
+
+fastcall static void
+gen6_render_composite_blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ if (!gen6_get_rectangles(sna, op, 1)) {
+ gen6_emit_composite_state(sna, op);
+ gen6_get_rectangles(sna, op, 1);
+ }
+
+ op->prim_emit(sna, op, r);
+}
+
+static void
+gen6_render_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("composite_boxes(%d)\n", nbox));
+
+ do {
+ int nbox_this_time = gen6_get_rectangles(sna, op, nbox);
+ if (nbox_this_time == 0) {
+ gen6_emit_composite_state(sna, op);
+ nbox_this_time = gen6_get_rectangles(sna, op, nbox);
+ }
+ nbox -= nbox_this_time;
+ do {
+ struct sna_composite_rectangles r;
+
+ DBG((" %s: (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1, box->x2, box->y2));
+
+ r.dst.x = box->x1;
+ r.dst.y = box->y1;
+ r.width = box->x2 - box->x1;
+ r.height = box->y2 - box->y1;
+ r.src = r.mask = r.dst;
+
+ op->prim_emit(sna, op, &r);
+ box++;
+ } while (--nbox_this_time);
+ } while (nbox);
+}
+
+#ifndef MAX
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#endif
+
+static uint32_t
+gen6_composite_create_blend_state(struct sna_static_stream *stream)
+{
+ char *base, *ptr;
+ int src, dst;
+
+ base = sna_static_stream_map(stream,
+ GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE,
+ 64);
+
+ ptr = base;
+ for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) {
+ for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) {
+ struct gen6_blend_state *blend =
+ (struct gen6_blend_state *)ptr;
+
+ blend->blend0.dest_blend_factor = dst;
+ blend->blend0.source_blend_factor = src;
+ blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD;
+ blend->blend0.blend_enable = 1;
+
+ blend->blend1.post_blend_clamp_enable = 1;
+ blend->blend1.pre_blend_clamp_enable = 1;
+
+ ptr += GEN6_BLEND_STATE_PADDED_SIZE;
+ }
+ }
+
+ return sna_static_stream_offsetof(stream, base);
+}
+
+static uint32_t gen6_bind_video_source(struct sna *sna,
+ struct kgem_bo *src_bo,
+ uint32_t src_offset,
+ int src_width,
+ int src_height,
+ int src_pitch,
+ uint32_t src_surf_format)
+{
+ struct gen6_surface_state *ss;
+
+ sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
+
+ ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
+ ss->ss0.surface_type = GEN6_SURFACE_2D;
+ ss->ss0.surface_format = src_surf_format;
+ ss->ss0.color_blend = 1;
+
+ ss->ss1.base_addr =
+ kgem_add_reloc(&sna->kgem,
+ sna->kgem.surface + 1,
+ src_bo,
+ I915_GEM_DOMAIN_SAMPLER << 16,
+ src_offset);
+
+ ss->ss2.width = src_width - 1;
+ ss->ss2.height = src_height - 1;
+ ss->ss3.pitch = src_pitch - 1;
+
+ return sna->kgem.surface * sizeof(uint32_t);
+}
+
+static void gen6_emit_video_state(struct sna *sna,
+ struct sna_composite_op *op,
+ struct sna_video_frame *frame)
+{
+ uint32_t src_surf_format;
+ uint32_t src_surf_base[6];
+ int src_width[6];
+ int src_height[6];
+ int src_pitch[6];
+ uint32_t *binding_table;
+ uint16_t offset;
+ int n_src, n;
+
+ gen6_get_batch(sna);
+
+ src_surf_base[0] = frame->YBufOffset;
+ src_surf_base[1] = frame->YBufOffset;
+ src_surf_base[2] = frame->VBufOffset;
+ src_surf_base[3] = frame->VBufOffset;
+ src_surf_base[4] = frame->UBufOffset;
+ src_surf_base[5] = frame->UBufOffset;
+
+ if (is_planar_fourcc(frame->id)) {
+ src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM;
+ src_width[1] = src_width[0] = frame->width;
+ src_height[1] = src_height[0] = frame->height;
+ src_pitch[1] = src_pitch[0] = frame->pitch[1];
+ src_width[4] = src_width[5] = src_width[2] = src_width[3] =
+ frame->width / 2;
+ src_height[4] = src_height[5] = src_height[2] = src_height[3] =
+ frame->height / 2;
+ src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
+ frame->pitch[0];
+ n_src = 6;
+ } else {
+ if (frame->id == FOURCC_UYVY)
+ src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY;
+ else
+ src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL;
+
+ src_width[0] = frame->width;
+ src_height[0] = frame->height;
+ src_pitch[0] = frame->pitch[0];
+ n_src = 1;
+ }
+
+ binding_table = gen6_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen6_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen6_get_dest_format(op->dst.format),
+ TRUE);
+ for (n = 0; n < n_src; n++) {
+ binding_table[1+n] =
+ gen6_bind_video_source(sna,
+ frame->bo,
+ src_surf_base[n],
+ src_width[n],
+ src_height[n],
+ src_pitch[n],
+ src_surf_format);
+ }
+
+ gen6_emit_state(sna, op, offset);
+}
+
+static Bool
+gen6_render_video(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ RegionPtr dstRegion,
+ short src_w, short src_h,
+ short drw_w, short drw_h,
+ PixmapPtr pixmap)
+{
+ struct sna_composite_op tmp;
+ int nbox, dxo, dyo, pix_xoff, pix_yoff;
+ float src_scale_x, src_scale_y;
+ struct sna_pixmap *priv;
+ BoxPtr box;
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
+ __FUNCTION__, src_w, src_h, drw_w, drw_h,
+ REGION_NUM_RECTS(dstRegion),
+ REGION_EXTENTS(NULL, dstRegion)->x1,
+ REGION_EXTENTS(NULL, dstRegion)->y1,
+ REGION_EXTENTS(NULL, dstRegion)->x2,
+ REGION_EXTENTS(NULL, dstRegion)->y2));
+
+ priv = sna_pixmap_force_to_gpu(pixmap);
+ if (priv == NULL)
+ return FALSE;
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.op = PictOpSrc;
+ tmp.dst.pixmap = pixmap;
+ tmp.dst.width = pixmap->drawable.width;
+ tmp.dst.height = pixmap->drawable.height;
+ tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
+ tmp.dst.bo = priv->gpu_bo;
+
+ tmp.src.filter = SAMPLER_FILTER_BILINEAR;
+ tmp.src.repeat = SAMPLER_EXTEND_NONE;
+
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+
+ if (is_planar_fourcc(frame->id)) {
+ tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_VIDEO_PLANAR;
+ tmp.u.gen6.nr_surfaces = 7;
+ } else {
+ tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_VIDEO_PACKED;
+ tmp.u.gen6.nr_surfaces = 2;
+ }
+ tmp.u.gen6.nr_inputs = 1;
+ tmp.u.gen6.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, tmp.dst.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, frame->bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(frame->bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen6_emit_video_state(sna, &tmp, frame);
+ gen6_align_vertex(sna, &tmp);
+
+ /* Set up the offset for translating from the given region (in screen
+ * coordinates) to the backing pixmap.
+ */
+#ifdef COMPOSITE
+ pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
+ pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
+#else
+ pix_xoff = 0;
+ pix_yoff = 0;
+#endif
+
+ dxo = dstRegion->extents.x1;
+ dyo = dstRegion->extents.y1;
+
+ /* Use normalized texture coordinates */
+ src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
+ src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
+
+ box = REGION_RECTS(dstRegion);
+ nbox = REGION_NUM_RECTS(dstRegion);
+ while (nbox--) {
+ BoxRec r;
+
+ r.x1 = box->x1 + pix_xoff;
+ r.x2 = box->x2 + pix_xoff;
+ r.y1 = box->y1 + pix_yoff;
+ r.y2 = box->y2 + pix_yoff;
+
+ if (!gen6_get_rectangles(sna, &tmp, 1)) {
+ gen6_emit_video_state(sna, &tmp, frame);
+ gen6_get_rectangles(sna, &tmp, 1);
+ }
+
+ OUT_VERTEX(r.x2, r.y2);
+ OUT_VERTEX_F((box->x2 - dxo) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+
+ OUT_VERTEX(r.x1, r.y2);
+ OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
+ OUT_VERTEX_F((box->y2 - dyo) * src_scale_y);
+
+ OUT_VERTEX(r.x1, r.y1);
+ OUT_VERTEX_F((box->x1 - dxo) * src_scale_x);
+ OUT_VERTEX_F((box->y1 - dyo) * src_scale_y);
+
+ sna_damage_add_box(&priv->gpu_damage, &r);
+ sna_damage_subtract_box(&priv->cpu_damage, &r);
+ box++;
+ }
+
+ return TRUE;
+}
+
+static Bool
+gen6_composite_solid_init(struct sna *sna,
+ struct sna_composite_channel *channel,
+ uint32_t color)
+{
+ DBG(("%s: color=%x\n", __FUNCTION__, color));
+
+ channel->filter = PictFilterNearest;
+ channel->repeat = RepeatNormal;
+ channel->is_affine = TRUE;
+ channel->is_solid = TRUE;
+ channel->transform = NULL;
+ channel->width = 1;
+ channel->height = 1;
+ channel->card_format = GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+ channel->bo = sna_render_get_solid(sna, color);
+
+ channel->scale[0] = channel->scale[1] = 1;
+ channel->offset[0] = channel->offset[1] = 0;
+ return channel->bo != NULL;
+}
+
+static int
+gen6_composite_picture(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int x, int y,
+ int w, int h,
+ int dst_x, int dst_y)
+{
+ PixmapPtr pixmap;
+ uint32_t color;
+ int16_t dx, dy;
+
+ DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
+ __FUNCTION__, x, y, w, h, dst_x, dst_y));
+
+ channel->is_solid = FALSE;
+ channel->card_format = -1;
+
+ if (sna_picture_is_solid(picture, &color))
+ return gen6_composite_solid_init(sna, channel, color);
+
+ if (picture->pDrawable == NULL)
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ if (!gen6_check_repeat(picture))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ if (!gen6_check_filter(picture))
+ return sna_render_picture_fixup(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+
+ channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
+ channel->filter = picture->filter;
+
+ pixmap = get_drawable_pixmap(picture->pDrawable);
+ get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
+
+ x += dx + picture->pDrawable->x;
+ y += dy + picture->pDrawable->y;
+
+ channel->is_affine = sna_transform_is_affine(picture->transform);
+ if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
+ DBG(("%s: integer translation (%d, %d), removing\n",
+ __FUNCTION__, dx, dy));
+ x += dx;
+ y += dy;
+ channel->transform = NULL;
+ channel->filter = PictFilterNearest;
+ } else
+ channel->transform = picture->transform;
+
+ channel->card_format = gen6_get_card_format(picture->format);
+ if (channel->card_format == -1)
+ return sna_render_picture_convert(sna, picture, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+
+ if (pixmap->drawable.width > 8192 || pixmap->drawable.height > 8192) {
+ DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__,
+ pixmap->drawable.width, pixmap->drawable.height));
+ return sna_render_picture_extract(sna, picture, channel,
+ x, y, w, h, dst_x, dst_y);
+ }
+
+ return sna_render_pixmap_bo(sna, channel, pixmap,
+ x, y, w, h, dst_x, dst_y);
+}
+
+static void gen6_composite_channel_convert(struct sna_composite_channel *channel)
+{
+ channel->repeat = gen6_repeat(channel->repeat);
+ channel->filter = gen6_filter(channel->filter);
+ if (channel->card_format == -1)
+ channel->card_format = gen6_get_card_format(channel->pict_format);
+ assert(channel->card_format != -1);
+}
+
+static void gen6_render_composite_done(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ gen6_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ sna->render.op = NULL;
+
+ sna_render_composite_redirect_done(sna, op);
+
+ if (op->src.bo)
+ kgem_bo_destroy(&sna->kgem, op->src.bo);
+ if (op->mask.bo)
+ kgem_bo_destroy(&sna->kgem, op->mask.bo);
+}
+
+static Bool
+gen6_composite_set_target(struct sna *sna,
+ struct sna_composite_op *op,
+ PicturePtr dst)
+{
+ struct sna_pixmap *priv;
+
+ if (!gen6_check_dst_format(dst->format)) {
+ DBG(("%s: unsupported target format %08x\n",
+ __FUNCTION__, dst->format));
+ return FALSE;
+ }
+
+ op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
+ op->dst.width = op->dst.pixmap->drawable.width;
+ op->dst.height = op->dst.pixmap->drawable.height;
+ op->dst.format = dst->format;
+ priv = sna_pixmap(op->dst.pixmap);
+
+ op->dst.bo = NULL;
+ if (priv && priv->gpu_bo == NULL) {
+ op->dst.bo = priv->cpu_bo;
+ op->damage = &priv->cpu_damage;
+ }
+ if (op->dst.bo == NULL) {
+ priv = sna_pixmap_force_to_gpu(op->dst.pixmap);
+ if (priv == NULL)
+ return FALSE;
+
+ op->dst.bo = priv->gpu_bo;
+ if (!priv->gpu_only)
+ op->damage = &priv->gpu_damage;
+ }
+
+ get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
+ &op->dst.x, &op->dst.y);
+
+ DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d)\n",
+ __FUNCTION__,
+ op->dst.pixmap, (int)op->dst.format,
+ op->dst.width, op->dst.height,
+ op->dst.bo->pitch,
+ op->dst.x, op->dst.y));
+ return TRUE;
+}
+
+static Bool
+try_blt(struct sna *sna, PicturePtr dst, int width, int height)
+{
+ if (sna->kgem.mode == KGEM_BLT) {
+ DBG(("%s: already performing BLT\n", __FUNCTION__));
+ return TRUE;
+ }
+
+ if (width > 8192 || height > 8192) {
+ DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
+ __FUNCTION__, width, height));
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static Bool
+gen6_render_composite(struct sna *sna,
+ uint8_t op,
+ PicturePtr src,
+ PicturePtr mask,
+ PicturePtr dst,
+ int16_t src_x, int16_t src_y,
+ int16_t msk_x, int16_t msk_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t width, int16_t height,
+ struct sna_composite_op *tmp)
+{
+
+#if NO_COMPOSITE
+ return sna_blt_composite(sna, op,
+ src, dst,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height, tmp);
+#endif
+
+ DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
+ width, height, sna->kgem.mode));
+
+ if (mask == NULL &&
+ try_blt(sna, dst, width, height) &&
+ sna_blt_composite(sna, op,
+ src, dst,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height, tmp))
+ return TRUE;
+
+ if (op >= ARRAY_SIZE(gen6_blend_op))
+ return FALSE;
+
+ if (need_tiling(sna, width, height))
+ return sna_tiling_composite(sna,
+ op, src, mask, dst,
+ src_x, src_y,
+ msk_x, msk_y,
+ dst_x, dst_y,
+ width, height,
+ tmp);
+
+ tmp->op = op;
+ if (!gen6_composite_set_target(sna, tmp, dst))
+ return FALSE;
+
+ if (tmp->dst.width > 8192 || tmp->dst.height > 8192) {
+ if (!sna_render_composite_redirect(sna, tmp,
+ dst_x, dst_y, width, height))
+ return FALSE;
+ }
+
+ switch (gen6_composite_picture(sna, src, &tmp->src,
+ src_x, src_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ goto cleanup_dst;
+ case 0:
+ gen6_composite_solid_init(sna, &tmp->src, 0);
+ case 1:
+ gen6_composite_channel_convert(&tmp->src);
+ break;
+ }
+
+ tmp->is_affine = tmp->src.is_affine;
+ tmp->has_component_alpha = FALSE;
+ tmp->need_magic_ca_pass = FALSE;
+
+ tmp->mask.bo = NULL;
+ tmp->mask.filter = SAMPLER_FILTER_NEAREST;
+ tmp->mask.repeat = SAMPLER_EXTEND_NONE;
+
+ tmp->prim_emit = gen6_emit_composite_primitive;
+ if (mask) {
+ if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
+ tmp->has_component_alpha = TRUE;
+
+ /* Check if it's component alpha that relies on a source alpha and on
+ * the source value. We can only get one of those into the single
+ * source value that we get to blend with.
+ */
+ if (gen6_blend_op[op].src_alpha &&
+ (gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) {
+ if (op != PictOpOver)
+ goto cleanup_src;
+
+ tmp->need_magic_ca_pass = TRUE;
+ tmp->op = PictOpOutReverse;
+ }
+ }
+
+ switch (gen6_composite_picture(sna, mask, &tmp->mask,
+ msk_x, msk_y,
+ width, height,
+ dst_x, dst_y)) {
+ case -1:
+ goto cleanup_src;
+ case 0:
+ gen6_composite_solid_init(sna, &tmp->mask, 0);
+ case 1:
+ gen6_composite_channel_convert(&tmp->mask);
+ break;
+ }
+
+ tmp->is_affine &= tmp->mask.is_affine;
+
+ if (tmp->src.transform == NULL && tmp->mask.transform == NULL)
+ tmp->prim_emit = gen6_emit_composite_primitive_identity_source_mask;
+
+ tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine;
+ } else {
+ if (tmp->src.is_solid)
+ tmp->prim_emit = gen6_emit_composite_primitive_solid;
+ else if (tmp->src.transform == NULL)
+ tmp->prim_emit = gen6_emit_composite_primitive_identity_source;
+ else if (tmp->src.is_affine)
+ tmp->prim_emit = gen6_emit_composite_primitive_affine_source;
+
+ tmp->floats_per_vertex = 3 + !tmp->is_affine;
+ }
+
+ tmp->u.gen6.wm_kernel =
+ gen6_choose_composite_kernel(tmp->op,
+ tmp->mask.bo != NULL,
+ tmp->has_component_alpha,
+ tmp->is_affine);
+ tmp->u.gen6.nr_surfaces = 2 + (tmp->mask.bo != NULL);
+ tmp->u.gen6.nr_inputs = 1 + (tmp->mask.bo != NULL);
+ tmp->u.gen6.ve_id =
+ gen6_choose_composite_vertex_buffer(sna, tmp);
+
+ tmp->blt = gen6_render_composite_blt;
+ tmp->boxes = gen6_render_composite_boxes;
+ tmp->done = gen6_render_composite_done;
+
+ if (!kgem_check_bo(&sna->kgem, tmp->dst.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->src.bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, tmp->mask.bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen6_emit_composite_state(sna, tmp);
+ gen6_align_vertex(sna, tmp);
+
+ sna->render.op = tmp;
+ return TRUE;
+
+cleanup_src:
+ if (tmp->src.bo)
+ kgem_bo_destroy(&sna->kgem, tmp->src.bo);
+cleanup_dst:
+ if (tmp->redirect.real_bo)
+ kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
+ return FALSE;
+}
+
+static void
+gen6_emit_copy_state(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ uint32_t *binding_table;
+ uint16_t offset;
+
+ gen6_get_batch(sna);
+
+ binding_table = gen6_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen6_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen6_get_dest_format_for_depth(op->dst.pixmap->drawable.depth),
+ TRUE);
+ binding_table[1] =
+ gen6_bind_bo(sna,
+ op->src.bo, op->src.width, op->src.height,
+ op->src.card_format,
+ FALSE);
+
+ if (sna->kgem.surface == offset &&
+ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
+ sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
+ offset = sna->render_state.gen6.surface_table;
+ }
+
+ gen6_emit_state(sna, op, offset);
+}
+
+static Bool
+gen6_render_copy_boxes(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+ PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int n)
+{
+ struct sna_composite_op tmp;
+
+#if NO_COPY_BOXES
+ return sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n);
+#endif
+
+ DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d\n",
+ __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu,
+ src_bo == dst_bo));
+
+ if (sna->kgem.mode == KGEM_BLT &&
+ sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
+ src->drawable.width > 8192 || src->drawable.height > 8192 ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n);
+ tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
+
+ tmp.dst.pixmap = dst;
+ tmp.dst.x = tmp.dst.y = 0;
+ tmp.dst.width = dst->drawable.width;
+ tmp.dst.height = dst->drawable.height;
+ tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
+ tmp.dst.bo = dst_bo;
+
+ tmp.src.bo = src_bo;
+ tmp.src.filter = SAMPLER_FILTER_NEAREST;
+ tmp.src.repeat = SAMPLER_EXTEND_NONE;
+ tmp.src.card_format =
+ gen6_get_card_format_for_depth(src->drawable.depth),
+ tmp.src.width = src->drawable.width;
+ tmp.src.height = src->drawable.height;
+
+ tmp.mask.bo = NULL;
+ tmp.mask.filter = SAMPLER_FILTER_NEAREST;
+ tmp.mask.repeat = SAMPLER_EXTEND_NONE;
+
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+
+ tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK;
+ tmp.u.gen6.nr_surfaces = 2;
+ tmp.u.gen6.nr_inputs = 1;
+ tmp.u.gen6.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, src_bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(src_bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen6_emit_copy_state(sna, &tmp);
+ gen6_align_vertex(sna, &tmp);
+
+ tmp.src.scale[0] = 1.f / src->drawable.width;
+ tmp.src.scale[1] = 1.f / src->drawable.height;
+ do {
+ float *v;
+ int n_this_time = gen6_get_rectangles(sna, &tmp, n);
+ if (n_this_time == 0) {
+ gen6_emit_copy_state(sna, &tmp);
+ n_this_time = gen6_get_rectangles(sna, &tmp, n);
+ }
+ n -= n_this_time;
+
+ v = sna->render.vertex_data + sna->render.vertex_used;
+ sna->render.vertex_used += 9 * n_this_time;
+ do {
+
+ DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n",
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ box->x2 - box->x1, box->y2 - box->y1));
+ v[0] = pack_2s(box->x2 + dst_dx, box->y2 + dst_dy);
+ v[3] = pack_2s(box->x1 + dst_dx, box->y2 + dst_dy);
+ v[6] = pack_2s(box->x1 + dst_dx, box->y1 + dst_dy);
+
+ v[1] = (box->x2 + src_dx) * tmp.src.scale[0];
+ v[7] = v[4] = (box->x1 + src_dx) * tmp.src.scale[0];
+
+ v[5] = v[2] = (box->y2 + src_dy) * tmp.src.scale[1];
+ v[8] = (box->y1 + src_dy) * tmp.src.scale[1];
+
+ v += 9;
+ box++;
+ } while (--n_this_time);
+ } while (n);
+
+ gen6_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ return TRUE;
+}
+
+static void
+gen6_render_copy_blt(struct sna *sna,
+ const struct sna_copy_op *op,
+ int16_t sx, int16_t sy,
+ int16_t w, int16_t h,
+ int16_t dx, int16_t dy)
+{
+ if (!gen6_get_rectangles(sna, &op->base, 1)) {
+ gen6_emit_copy_state(sna, &op->base);
+ gen6_get_rectangles(sna, &op->base, 1);
+ }
+
+ OUT_VERTEX(dx+w, dy+h);
+ OUT_VERTEX_F((sx+w)*op->base.src.scale[0]);
+ OUT_VERTEX_F((sy+h)*op->base.src.scale[1]);
+
+ OUT_VERTEX(dx, dy+h);
+ OUT_VERTEX_F(sx*op->base.src.scale[0]);
+ OUT_VERTEX_F((sy+h)*op->base.src.scale[1]);
+
+ OUT_VERTEX(dx, dy);
+ OUT_VERTEX_F(sx*op->base.src.scale[0]);
+ OUT_VERTEX_F(sy*op->base.src.scale[1]);
+}
+
+static void
+gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
+{
+ gen6_vertex_flush(sna);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+}
+
+static Bool
+gen6_render_copy(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ struct sna_copy_op *op)
+{
+#if NO_COPY
+ return sna_blt_copy(sna, alu,
+ src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ op);
+#endif
+
+ DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n",
+ __FUNCTION__, alu,
+ src->drawable.width, src->drawable.height,
+ dst->drawable.width, dst->drawable.height));
+
+ if (sna->kgem.mode == KGEM_BLT &&
+ sna_blt_copy(sna, alu,
+ src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ op))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
+ src->drawable.width > 8192 || src->drawable.height > 8192 ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return sna_blt_copy(sna, alu, src_bo, dst_bo,
+ dst->drawable.bitsPerPixel,
+ op);
+
+ op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
+
+ op->base.dst.pixmap = dst;
+ op->base.dst.width = dst->drawable.width;
+ op->base.dst.height = dst->drawable.height;
+ op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
+ op->base.dst.bo = dst_bo;
+
+ op->base.src.bo = src_bo;
+ op->base.src.card_format =
+ gen6_get_card_format_for_depth(src->drawable.depth),
+ op->base.src.width = src->drawable.width;
+ op->base.src.height = src->drawable.height;
+ op->base.src.scale[0] = 1./src->drawable.width;
+ op->base.src.scale[1] = 1./src->drawable.height;
+ op->base.src.filter = SAMPLER_FILTER_NEAREST;
+ op->base.src.repeat = SAMPLER_EXTEND_NONE;
+
+ op->base.is_affine = true;
+ op->base.floats_per_vertex = 3;
+
+ op->base.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK;
+ op->base.u.gen6.nr_surfaces = 2;
+ op->base.u.gen6.nr_inputs = 1;
+ op->base.u.gen6.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+ if (!kgem_check_bo(&sna->kgem, src_bo))
+ kgem_submit(&sna->kgem);
+
+ if (kgem_bo_is_dirty(src_bo))
+ kgem_emit_flush(&sna->kgem);
+
+ gen6_emit_copy_state(sna, &op->base);
+ gen6_align_vertex(sna, &op->base);
+
+ op->blt = gen6_render_copy_blt;
+ op->done = gen6_render_copy_done;
+ return TRUE;
+}
+
+static void
+gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
+{
+ uint32_t *binding_table;
+ uint16_t offset;
+
+ gen6_get_batch(sna);
+
+ binding_table = gen6_composite_get_binding_table(sna, op, &offset);
+
+ binding_table[0] =
+ gen6_bind_bo(sna,
+ op->dst.bo, op->dst.width, op->dst.height,
+ gen6_get_dest_format(op->dst.format),
+ TRUE);
+ binding_table[1] =
+ gen6_bind_bo(sna,
+ op->src.bo, 1, 1,
+ GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
+ FALSE);
+
+ if (sna->kgem.surface == offset &&
+ *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
+ sna->kgem.surface +=
+ sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t);
+ offset = sna->render_state.gen6.surface_table;
+ }
+
+ gen6_emit_state(sna, op, offset);
+}
+
+static Bool
+gen6_render_fill_boxes(struct sna *sna,
+ CARD8 op,
+ PictFormat format,
+ const xRenderColor *color,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ const BoxRec *box, int n)
+{
+ struct sna_composite_op tmp;
+ uint32_t pixel;
+
+ DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
+ __FUNCTION__, op,
+ color->red, color->green, color->blue, color->alpha, (int)format));
+
+ if (op >= ARRAY_SIZE(gen6_blend_op)) {
+ DBG(("%s: fallback due to unhandled blend op: %d\n",
+ __FUNCTION__, op));
+ return FALSE;
+ }
+
+ if (sna->kgem.mode == KGEM_BLT ||
+ dst->drawable.width > 8192 ||
+ dst->drawable.height > 8192 ||
+ !gen6_check_dst_format(format)) {
+ uint8_t alu = GXcopy;
+
+ if (op == PictOpClear) {
+ alu = GXclear;
+ pixel = 0;
+ op = PictOpSrc;
+ }
+
+ if (op == PictOpOver && color->alpha >= 0xff00)
+ op = PictOpSrc;
+
+ if (op == PictOpSrc &&
+ sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ format) &&
+ sna_blt_fill_boxes(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ pixel, box, n))
+ return TRUE;
+
+ if (dst->drawable.width > 8192 ||
+ dst->drawable.height > 8192 ||
+ !gen6_check_dst_format(format))
+ return FALSE;
+ }
+
+#if NO_FILL_BOXES
+ return FALSE;
+#endif
+
+ if (!sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ PICT_a8r8g8b8))
+ return FALSE;
+
+ DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
+ __FUNCTION__, pixel, n,
+ box[0].x1, box[0].y1, box[0].x2, box[0].y2));
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.op = op;
+
+ tmp.dst.pixmap = dst;
+ tmp.dst.width = dst->drawable.width;
+ tmp.dst.height = dst->drawable.height;
+ tmp.dst.format = format;
+ tmp.dst.bo = dst_bo;
+
+ tmp.src.bo = sna_render_get_solid(sna, pixel);
+ tmp.src.filter = SAMPLER_FILTER_NEAREST;
+ tmp.src.repeat = SAMPLER_EXTEND_REPEAT;
+
+ tmp.is_affine = TRUE;
+ tmp.floats_per_vertex = 3;
+
+ tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK;
+ tmp.u.gen6.nr_surfaces = 2;
+ tmp.u.gen6.nr_inputs = 1;
+ tmp.u.gen6.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+
+ gen6_emit_fill_state(sna, &tmp);
+ gen6_align_vertex(sna, &tmp);
+
+ do {
+ int n_this_time = gen6_get_rectangles(sna, &tmp, n);
+ if (n_this_time == 0) {
+ gen6_emit_fill_state(sna, &tmp);
+ n_this_time = gen6_get_rectangles(sna, &tmp, n);
+ }
+ n -= n_this_time;
+ do {
+ DBG((" (%d, %d), (%d, %d)\n",
+ box->x1, box->y1, box->x2, box->y2));
+ OUT_VERTEX(box->x2, box->y2);
+ OUT_VERTEX_F(1);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(box->x1, box->y2);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(box->x1, box->y1);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(0);
+
+ box++;
+ } while (--n_this_time);
+ } while (n);
+
+ gen6_vertex_flush(sna);
+ kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+ return TRUE;
+}
+
+static void
+gen6_render_fill_blt(struct sna *sna,
+ const struct sna_fill_op *op,
+ int16_t x, int16_t y, int16_t w, int16_t h)
+{
+ DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
+
+ if (!gen6_get_rectangles(sna, &op->base, 1)) {
+ gen6_emit_fill_state(sna, &op->base);
+ gen6_get_rectangles(sna, &op->base, 1);
+ }
+
+ OUT_VERTEX(x+w, y+h);
+ OUT_VERTEX_F(1);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(x, y+h);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(1);
+
+ OUT_VERTEX(x, y);
+ OUT_VERTEX_F(0);
+ OUT_VERTEX_F(0);
+}
+
+static void
+gen6_render_fill_done(struct sna *sna, const struct sna_fill_op *op)
+{
+ gen6_vertex_flush(sna);
+ kgem_bo_destroy(&sna->kgem, op->base.src.bo);
+ _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+}
+
+static Bool
+gen6_render_fill(struct sna *sna, uint8_t alu,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ uint32_t color,
+ struct sna_fill_op *op)
+{
+ DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color));
+
+#if NO_FILL
+ return sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ op);
+#endif
+
+ if (sna->kgem.mode == KGEM_BLT &&
+ sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ op))
+ return TRUE;
+
+ if (!(alu == GXcopy || alu == GXclear) ||
+ dst->drawable.width > 8192 || dst->drawable.height > 8192)
+ return sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ op);
+
+ if (alu == GXclear)
+ color = 0;
+
+ op->base.op = color == 0 ? PictOpClear : PictOpSrc;
+
+ op->base.dst.pixmap = dst;
+ op->base.dst.width = dst->drawable.width;
+ op->base.dst.height = dst->drawable.height;
+ op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
+ op->base.dst.bo = dst_bo;
+
+ op->base.src.bo =
+ sna_render_get_solid(sna,
+ sna_rgba_for_color(color,
+ dst->drawable.depth));
+ op->base.src.filter = SAMPLER_FILTER_NEAREST;
+ op->base.src.repeat = SAMPLER_EXTEND_REPEAT;
+
+ op->base.is_affine = TRUE;
+ op->base.floats_per_vertex = 3;
+
+ op->base.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK;
+ op->base.u.gen6.nr_surfaces = 2;
+ op->base.u.gen6.nr_inputs = 1;
+ op->base.u.gen6.ve_id = 1;
+
+ if (!kgem_check_bo(&sna->kgem, dst_bo))
+ kgem_submit(&sna->kgem);
+
+ gen6_emit_fill_state(sna, &op->base);
+ gen6_align_vertex(sna, &op->base);
+
+ op->blt = gen6_render_fill_blt;
+ op->done = gen6_render_fill_done;
+ return TRUE;
+}
+
+static void gen6_render_flush(struct sna *sna)
+{
+ gen6_vertex_finish(sna, TRUE);
+}
+
+static void
+gen6_render_context_switch(struct sna *sna,
+ int new_mode)
+{
+ if (!new_mode)
+ return;
+
+ if (sna->kgem.mode)
+ _kgem_submit(&sna->kgem);
+
+ sna->kgem.ring = new_mode;
+}
+
+static void gen6_render_reset(struct sna *sna)
+{
+ sna->render_state.gen6.needs_invariant = TRUE;
+ sna->render_state.gen6.vb_id = 0;
+ sna->render_state.gen6.ve_id = -1;
+ sna->render_state.gen6.last_primitive = -1;
+
+ sna->render_state.gen6.num_sf_outputs = 0;
+ sna->render_state.gen6.samplers = -1;
+ sna->render_state.gen6.blend = -1;
+ sna->render_state.gen6.kernel = -1;
+ sna->render_state.gen6.drawrect_offset = -1;
+ sna->render_state.gen6.drawrect_limit = -1;
+ sna->render_state.gen6.surface_table = -1;
+}
+
+static void gen6_render_fini(struct sna *sna)
+{
+ kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo);
+}
+
+static Bool gen6_render_setup(struct sna *sna)
+{
+ struct gen6_render_state *state = &sna->render_state.gen6;
+ struct sna_static_stream general;
+ struct gen6_sampler_state *ss;
+ int i, j, k, l, m;
+
+ sna_static_stream_init(&general);
+
+ /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
+ * dumps, you know it points to zero.
+ */
+ null_create(&general);
+
+ for (m = 0; m < GEN6_KERNEL_COUNT; m++)
+ state->wm_kernel[m] =
+ sna_static_stream_add(&general,
+ wm_kernels[m].data,
+ wm_kernels[m].size,
+ 64);
+
+ ss = sna_static_stream_map(&general,
+ 2 * sizeof(*ss) *
+ FILTER_COUNT * EXTEND_COUNT *
+ FILTER_COUNT * EXTEND_COUNT,
+ 32);
+ state->wm_state = sna_static_stream_offsetof(&general, ss);
+ for (i = 0; i < FILTER_COUNT; i++) {
+ for (j = 0; j < EXTEND_COUNT; j++) {
+ for (k = 0; k < FILTER_COUNT; k++) {
+ for (l = 0; l < EXTEND_COUNT; l++) {
+ sampler_state_init(ss++, i, j);
+ sampler_state_init(ss++, k, l);
+ }
+ }
+ }
+ }
+
+ state->cc_vp = gen6_create_cc_viewport(&general);
+ state->cc_blend = gen6_composite_create_blend_state(&general);
+
+ state->general_bo = sna_static_stream_fini(sna, &general);
+ return state->general_bo != NULL;
+}
+
+Bool gen6_render_init(struct sna *sna)
+{
+ if (!gen6_render_setup(sna))
+ return FALSE;
+
+ gen6_render_reset(sna);
+
+ sna->render.composite = gen6_render_composite;
+ sna->render.video = gen6_render_video;
+
+ sna->render.copy_boxes = gen6_render_copy_boxes;
+ sna->render.copy = gen6_render_copy;
+
+ sna->render.fill_boxes = gen6_render_fill_boxes;
+ sna->render.fill = gen6_render_fill;
+
+ sna->render.flush = gen6_render_flush;
+ sna->render.context_switch = gen6_render_context_switch;
+ sna->render.reset = gen6_render_reset;
+ sna->render.fini = gen6_render_fini;
+
+ sna->render.max_3d_size = 8192;
+ return TRUE;
+}
diff --git a/src/sna/gen6_render.h b/src/sna/gen6_render.h
new file mode 100644
index 00000000..42c5a6b9
--- /dev/null
+++ b/src/sna/gen6_render.h
@@ -0,0 +1,1598 @@
+#ifndef GEN6_RENDER_H
+#define GEN6_RENDER_H
+
+#define GEN6_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low))
+
+#define GEN6_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
+ ((Pipeline) << 27) | \
+ ((Opcode) << 24) | \
+ ((Subopcode) << 16))
+
+#define GEN6_STATE_BASE_ADDRESS GEN6_3D(0, 1, 1)
+#define GEN6_STATE_SIP GEN6_3D(0, 1, 2)
+
+#define GEN6_PIPELINE_SELECT GEN6_3D(1, 1, 4)
+
+#define GEN6_MEDIA_STATE_POINTERS GEN6_3D(2, 0, 0)
+#define GEN6_MEDIA_OBJECT GEN6_3D(2, 1, 0)
+
+#define GEN6_3DSTATE_BINDING_TABLE_POINTERS GEN6_3D(3, 0, 1)
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */
+
+#define GEN6_3DSTATE_VERTEX_BUFFERS GEN6_3D(3, 0, 8)
+#define GEN6_3DSTATE_VERTEX_ELEMENTS GEN6_3D(3, 0, 9)
+#define GEN6_3DSTATE_INDEX_BUFFER GEN6_3D(3, 0, 0xa)
+#define GEN6_3DSTATE_VF_STATISTICS GEN6_3D(3, 0, 0xb)
+
+#define GEN6_3DSTATE_DRAWING_RECTANGLE GEN6_3D(3, 1, 0)
+#define GEN6_3DSTATE_CONSTANT_COLOR GEN6_3D(3, 1, 1)
+#define GEN6_3DSTATE_SAMPLER_PALETTE_LOAD GEN6_3D(3, 1, 2)
+#define GEN6_3DSTATE_CHROMA_KEY GEN6_3D(3, 1, 4)
+#define GEN6_3DSTATE_DEPTH_BUFFER GEN6_3D(3, 1, 5)
+# define GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
+# define GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
+
+#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET GEN6_3D(3, 1, 6)
+#define GEN6_3DSTATE_POLY_STIPPLE_PATTERN GEN6_3D(3, 1, 7)
+#define GEN6_3DSTATE_LINE_STIPPLE GEN6_3D(3, 1, 8)
+#define GEN6_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN6_3D(3, 1, 9)
+/* These two are BLC and CTG only, not BW or CL */
+#define GEN6_3DSTATE_AA_LINE_PARAMS GEN6_3D(3, 1, 0xa)
+#define GEN6_3DSTATE_GS_SVB_INDEX GEN6_3D(3, 1, 0xb)
+
+#define GEN6_3DPRIMITIVE GEN6_3D(3, 3, 0)
+
+#define GEN6_3DSTATE_CLEAR_PARAMS GEN6_3D(3, 1, 0x10)
+/* DW1 */
+# define GEN6_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
+
+#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN6_3D(3, 0, 0x02)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
+
+#define GEN6_3DSTATE_URB GEN6_3D(3, 0, 0x05)
+/* DW1 */
+# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16
+# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0
+/* DW2 */
+# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8
+# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0
+
+#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN6_3D(3, 0, 0x0d)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
+
+#define GEN6_3DSTATE_CC_STATE_POINTERS GEN6_3D(3, 0, 0x0e)
+
+#define GEN6_3DSTATE_VS GEN6_3D(3, 0, 0x10)
+
+#define GEN6_3DSTATE_GS GEN6_3D(3, 0, 0x11)
+/* DW4 */
+# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
+
+#define GEN6_3DSTATE_CLIP GEN6_3D(3, 0, 0x12)
+
+#define GEN6_3DSTATE_SF GEN6_3D(3, 0, 0x13)
+/* DW1 */
+# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW2 */
+/* DW3 */
+# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29)
+# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29)
+# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29)
+# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29)
+/* DW4 */
+# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
+# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
+# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
+
+#define GEN6_3DSTATE_WM GEN6_3D(3, 0, 0x14)
+/* DW2 */
+# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT 27
+# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+/* DW4 */
+# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16
+/* DW5 */
+# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
+# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
+# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
+# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
+/* DW6 */
+# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
+
+
+#define GEN6_3DSTATE_CONSTANT_VS GEN6_3D(3, 0, 0x15)
+#define GEN6_3DSTATE_CONSTANT_GS GEN6_3D(3, 0, 0x16)
+#define GEN6_3DSTATE_CONSTANT_PS GEN6_3D(3, 0, 0x17)
+
+#define GEN6_3DSTATE_SAMPLE_MASK GEN6_3D(3, 0, 0x18)
+
+#define GEN6_3DSTATE_MULTISAMPLE GEN6_3D(3, 1, 0x0d)
+/* DW1 */
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
+
+#define PIPELINE_SELECT_3D 0
+#define PIPELINE_SELECT_MEDIA 1
+
+/* for GEN6_STATE_BASE_ADDRESS */
+#define BASE_ADDRESS_MODIFY (1 << 0)
+
+/* for GEN6_PIPE_CONTROL */
+#define GEN6_PIPE_CONTROL_NOWRITE (0 << 14)
+#define GEN6_PIPE_CONTROL_WRITE_QWORD (1 << 14)
+#define GEN6_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
+#define GEN6_PIPE_CONTROL_WRITE_TIME (3 << 14)
+#define GEN6_PIPE_CONTROL_DEPTH_STALL (1 << 13)
+#define GEN6_PIPE_CONTROL_WC_FLUSH (1 << 12)
+#define GEN6_PIPE_CONTROL_IS_FLUSH (1 << 11)
+#define GEN6_PIPE_CONTROL_TC_FLUSH (1 << 10)
+#define GEN6_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
+#define GEN6_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
+#define GEN6_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
+#define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
+
+/* VERTEX_BUFFER_STATE Structure */
+#define VB0_BUFFER_INDEX_SHIFT 26
+#define VB0_VERTEXDATA (0 << 20)
+#define VB0_INSTANCEDATA (1 << 20)
+#define VB0_BUFFER_PITCH_SHIFT 0
+
+/* VERTEX_ELEMENT_STATE Structure */
+#define VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */
+#define VE0_VALID (1 << 25) /* for GEN6 */
+#define VE0_FORMAT_SHIFT 16
+#define VE0_OFFSET_SHIFT 0
+#define VE1_VFCOMPONENT_0_SHIFT 28
+#define VE1_VFCOMPONENT_1_SHIFT 24
+#define VE1_VFCOMPONENT_2_SHIFT 20
+#define VE1_VFCOMPONENT_3_SHIFT 16
+#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
+
+/* 3DPRIMITIVE bits */
+#define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
+#define GEN6_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
+/* Primitive types are in gen6_defines.h */
+#define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT 10
+
+#define GEN6_SVG_CTL 0x7400
+
+#define GEN6_SVG_CTL_GS_BA (0 << 8)
+#define GEN6_SVG_CTL_SS_BA (1 << 8)
+#define GEN6_SVG_CTL_IO_BA (2 << 8)
+#define GEN6_SVG_CTL_GS_AUB (3 << 8)
+#define GEN6_SVG_CTL_IO_AUB (4 << 8)
+#define GEN6_SVG_CTL_SIP (5 << 8)
+
+#define GEN6_SVG_RDATA 0x7404
+#define GEN6_SVG_WORK_CTL 0x7408
+
+#define GEN6_VF_CTL 0x7500
+
+#define GEN6_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
+#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
+#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
+#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
+#define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
+#define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
+#define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
+#define GEN6_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN6_VF_STRG_VAL 0x7504
+#define GEN6_VF_STR_VL_OVR 0x7508
+#define GEN6_VF_VC_OVR 0x750c
+#define GEN6_VF_STR_PSKIP 0x7510
+#define GEN6_VF_MAX_PRIM 0x7514
+#define GEN6_VF_RDATA 0x7518
+
+#define GEN6_VS_CTL 0x7600
+#define GEN6_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
+#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
+#define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
+#define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
+#define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN6_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN6_VS_STRG_VAL 0x7604
+#define GEN6_VS_RDATA 0x7608
+
+#define GEN6_SF_CTL 0x7b00
+#define GEN6_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
+#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
+#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
+#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
+#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
+#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
+#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
+#define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
+#define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
+#define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
+#define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN6_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN6_SF_STRG_VAL 0x7b04
+#define GEN6_SF_RDATA 0x7b18
+
+#define GEN6_WIZ_CTL 0x7c00
+#define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
+#define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
+#define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
+#define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
+#define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
+#define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
+#define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
+#define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
+#define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN6_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN6_WIZ_STRG_VAL 0x7c04
+#define GEN6_WIZ_RDATA 0x7c18
+
+#define GEN6_TS_CTL 0x7e00
+#define GEN6_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
+#define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
+#define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
+#define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
+#define GEN6_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN6_TS_STRG_VAL 0x7e04
+#define GEN6_TS_RDATA 0x7e08
+
+#define GEN6_TD_CTL 0x8000
+#define GEN6_TD_CTL_MUX_SHIFT 8
+#define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
+#define GEN6_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
+#define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
+#define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
+#define GEN6_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
+#define GEN6_TD_CTL2 0x8004
+#define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
+#define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
+#define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
+#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
+#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
+#define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
+#define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
+#define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
+#define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
+#define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
+#define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
+#define GEN6_TD_VF_VS_EMSK 0x8008
+#define GEN6_TD_GS_EMSK 0x800c
+#define GEN6_TD_CLIP_EMSK 0x8010
+#define GEN6_TD_SF_EMSK 0x8014
+#define GEN6_TD_WIZ_EMSK 0x8018
+#define GEN6_TD_0_6_EHTRG_VAL 0x801c
+#define GEN6_TD_0_7_EHTRG_VAL 0x8020
+#define GEN6_TD_0_6_EHTRG_MSK 0x8024
+#define GEN6_TD_0_7_EHTRG_MSK 0x8028
+#define GEN6_TD_RDATA 0x802c
+#define GEN6_TD_TS_EMSK 0x8030
+
+#define GEN6_EU_CTL 0x8800
+#define GEN6_EU_CTL_SELECT_SHIFT 16
+#define GEN6_EU_CTL_DATA_MUX_SHIFT 8
+#define GEN6_EU_ATT_0 0x8810
+#define GEN6_EU_ATT_1 0x8814
+#define GEN6_EU_ATT_DATA_0 0x8820
+#define GEN6_EU_ATT_DATA_1 0x8824
+#define GEN6_EU_ATT_CLR_0 0x8830
+#define GEN6_EU_ATT_CLR_1 0x8834
+#define GEN6_EU_RDATA 0x8840
+
+#define GEN6_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
+ ((Pipeline) << 27) | \
+ ((Opcode) << 24) | \
+ ((Subopcode) << 16))
+
+#define GEN6_STATE_BASE_ADDRESS GEN6_3D(0, 1, 1)
+#define GEN6_STATE_SIP GEN6_3D(0, 1, 2)
+
+#define GEN6_PIPELINE_SELECT GEN6_3D(1, 1, 4)
+
+#define GEN6_MEDIA_STATE_POINTERS GEN6_3D(2, 0, 0)
+#define GEN6_MEDIA_OBJECT GEN6_3D(2, 1, 0)
+
+#define GEN6_3DSTATE_BINDING_TABLE_POINTERS GEN6_3D(3, 0, 1)
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */
+
+#define GEN6_3DSTATE_VERTEX_BUFFERS GEN6_3D(3, 0, 8)
+#define GEN6_3DSTATE_VERTEX_ELEMENTS GEN6_3D(3, 0, 9)
+#define GEN6_3DSTATE_INDEX_BUFFER GEN6_3D(3, 0, 0xa)
+#define GEN6_3DSTATE_VF_STATISTICS GEN6_3D(3, 0, 0xb)
+
+#define GEN6_3DSTATE_DRAWING_RECTANGLE GEN6_3D(3, 1, 0)
+#define GEN6_3DSTATE_CONSTANT_COLOR GEN6_3D(3, 1, 1)
+#define GEN6_3DSTATE_SAMPLER_PALETTE_LOAD GEN6_3D(3, 1, 2)
+#define GEN6_3DSTATE_CHROMA_KEY GEN6_3D(3, 1, 4)
+#define GEN6_3DSTATE_DEPTH_BUFFER GEN6_3D(3, 1, 5)
+# define GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
+# define GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
+
+#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET GEN6_3D(3, 1, 6)
+#define GEN6_3DSTATE_POLY_STIPPLE_PATTERN GEN6_3D(3, 1, 7)
+#define GEN6_3DSTATE_LINE_STIPPLE GEN6_3D(3, 1, 8)
+#define GEN6_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN6_3D(3, 1, 9)
+/* These two are BLC and CTG only, not BW or CL */
+#define GEN6_3DSTATE_AA_LINE_PARAMS GEN6_3D(3, 1, 0xa)
+#define GEN6_3DSTATE_GS_SVB_INDEX GEN6_3D(3, 1, 0xb)
+
+#define GEN6_PIPE_CONTROL GEN6_3D(3, 2, 0)
+
+#define GEN6_3DPRIMITIVE GEN6_3D(3, 3, 0)
+
+#define GEN6_3DSTATE_CLEAR_PARAMS GEN6_3D(3, 1, 0x10)
+/* DW1 */
+# define GEN6_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
+
+/* for GEN6+ */
+#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN6_3D(3, 0, 0x02)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
+
+#define GEN6_3DSTATE_URB GEN6_3D(3, 0, 0x05)
+/* DW1 */
+# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16
+# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0
+/* DW2 */
+# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8
+# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0
+
+#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN6_3D(3, 0, 0x0d)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
+
+#define GEN6_3DSTATE_CC_STATE_POINTERS GEN6_3D(3, 0, 0x0e)
+
+#define GEN6_3DSTATE_VS GEN6_3D(3, 0, 0x10)
+
+#define GEN6_3DSTATE_GS GEN6_3D(3, 0, 0x11)
+/* DW4 */
+# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
+
+#define GEN6_3DSTATE_CLIP GEN6_3D(3, 0, 0x12)
+
+#define GEN6_3DSTATE_SF GEN6_3D(3, 0, 0x13)
+/* DW1 */
+# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW2 */
+/* DW3 */
+# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29)
+# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29)
+# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29)
+# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29)
+/* DW4 */
+# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
+# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
+# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
+
+
+#define GEN6_3DSTATE_WM GEN6_3D(3, 0, 0x14)
+/* DW2 */
+# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27
+# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+/* DW4 */
+# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16
+/* DW5 */
+# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
+# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
+# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
+# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
+/* DW6 */
+# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
+
+
+#define GEN6_3DSTATE_CONSTANT_VS GEN6_3D(3, 0, 0x15)
+#define GEN6_3DSTATE_CONSTANT_GS GEN6_3D(3, 0, 0x16)
+#define GEN6_3DSTATE_CONSTANT_PS GEN6_3D(3, 0, 0x17)
+
+#define GEN6_3DSTATE_SAMPLE_MASK GEN6_3D(3, 0, 0x18)
+
+#define GEN6_3DSTATE_MULTISAMPLE GEN6_3D(3, 1, 0x0d)
+/* DW1 */
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
+
+#define PIPELINE_SELECT_3D 0
+#define PIPELINE_SELECT_MEDIA 1
+
+#define UF0_CS_REALLOC (1 << 13)
+#define UF0_VFE_REALLOC (1 << 12)
+#define UF0_SF_REALLOC (1 << 11)
+#define UF0_CLIP_REALLOC (1 << 10)
+#define UF0_GS_REALLOC (1 << 9)
+#define UF0_VS_REALLOC (1 << 8)
+#define UF1_CLIP_FENCE_SHIFT 20
+#define UF1_GS_FENCE_SHIFT 10
+#define UF1_VS_FENCE_SHIFT 0
+#define UF2_CS_FENCE_SHIFT 20
+#define UF2_VFE_FENCE_SHIFT 10
+#define UF2_SF_FENCE_SHIFT 0
+
+/* for GEN6_STATE_BASE_ADDRESS */
+#define BASE_ADDRESS_MODIFY (1 << 0)
+
+/* for GEN6_3DSTATE_PIPELINED_POINTERS */
+#define GEN6_GS_DISABLE 0
+#define GEN6_GS_ENABLE 1
+#define GEN6_CLIP_DISABLE 0
+#define GEN6_CLIP_ENABLE 1
+
+/* for GEN6_PIPE_CONTROL */
+#define GEN6_PIPE_CONTROL_NOWRITE (0 << 14)
+#define GEN6_PIPE_CONTROL_WRITE_QWORD (1 << 14)
+#define GEN6_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
+#define GEN6_PIPE_CONTROL_WRITE_TIME (3 << 14)
+#define GEN6_PIPE_CONTROL_DEPTH_STALL (1 << 13)
+#define GEN6_PIPE_CONTROL_WC_FLUSH (1 << 12)
+#define GEN6_PIPE_CONTROL_IS_FLUSH (1 << 11)
+#define GEN6_PIPE_CONTROL_TC_FLUSH (1 << 10)
+#define GEN6_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
+#define GEN6_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
+#define GEN6_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
+#define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
+
+/* 3DPRIMITIVE bits */
+#define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
+#define GEN6_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
+/* Primitive types are in gen6_defines.h */
+#define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT 10
+
+#define GEN6_SVG_CTL 0x7400
+
+#define GEN6_SVG_CTL_GS_BA (0 << 8)
+#define GEN6_SVG_CTL_SS_BA (1 << 8)
+#define GEN6_SVG_CTL_IO_BA (2 << 8)
+#define GEN6_SVG_CTL_GS_AUB (3 << 8)
+#define GEN6_SVG_CTL_IO_AUB (4 << 8)
+#define GEN6_SVG_CTL_SIP (5 << 8)
+
+#define GEN6_SVG_RDATA 0x7404
+#define GEN6_SVG_WORK_CTL 0x7408
+
+#define GEN6_VF_CTL 0x7500
+
+#define GEN6_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
+#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
+#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
+#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
+#define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
+#define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
+#define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
+#define GEN6_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN6_VF_STRG_VAL 0x7504
+#define GEN6_VF_STR_VL_OVR 0x7508
+#define GEN6_VF_VC_OVR 0x750c
+#define GEN6_VF_STR_PSKIP 0x7510
+#define GEN6_VF_MAX_PRIM 0x7514
+#define GEN6_VF_RDATA 0x7518
+
+#define GEN6_VS_CTL 0x7600
+#define GEN6_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
+#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
+#define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
+#define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
+#define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN6_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN6_VS_STRG_VAL 0x7604
+#define GEN6_VS_RDATA 0x7608
+
+#define GEN6_SF_CTL 0x7b00
+#define GEN6_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
+#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
+#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
+#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
+#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
+#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
+#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
+#define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
+#define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
+#define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
+#define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN6_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN6_SF_STRG_VAL 0x7b04
+#define GEN6_SF_RDATA 0x7b18
+
+#define GEN6_WIZ_CTL 0x7c00
+#define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
+#define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
+#define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
+#define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
+#define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
+#define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
+#define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
+#define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
+#define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN6_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN6_WIZ_STRG_VAL 0x7c04
+#define GEN6_WIZ_RDATA 0x7c18
+
+#define GEN6_TS_CTL 0x7e00
+#define GEN6_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
+#define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
+#define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
+#define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
+#define GEN6_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN6_TS_STRG_VAL 0x7e04
+#define GEN6_TS_RDATA 0x7e08
+
+#define GEN6_TD_CTL 0x8000
+#define GEN6_TD_CTL_MUX_SHIFT 8
+#define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
+#define GEN6_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
+#define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
+#define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
+#define GEN6_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
+#define GEN6_TD_CTL2 0x8004
+#define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
+#define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
+#define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
+#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
+#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
+#define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
+#define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
+#define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
+#define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
+#define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
+#define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
+#define GEN6_TD_VF_VS_EMSK 0x8008
+#define GEN6_TD_GS_EMSK 0x800c
+#define GEN6_TD_CLIP_EMSK 0x8010
+#define GEN6_TD_SF_EMSK 0x8014
+#define GEN6_TD_WIZ_EMSK 0x8018
+#define GEN6_TD_0_6_EHTRG_VAL 0x801c
+#define GEN6_TD_0_7_EHTRG_VAL 0x8020
+#define GEN6_TD_0_6_EHTRG_MSK 0x8024
+#define GEN6_TD_0_7_EHTRG_MSK 0x8028
+#define GEN6_TD_RDATA 0x802c
+#define GEN6_TD_TS_EMSK 0x8030
+
+#define GEN6_EU_CTL 0x8800
+#define GEN6_EU_CTL_SELECT_SHIFT 16
+#define GEN6_EU_CTL_DATA_MUX_SHIFT 8
+#define GEN6_EU_ATT_0 0x8810
+#define GEN6_EU_ATT_1 0x8814
+#define GEN6_EU_ATT_DATA_0 0x8820
+#define GEN6_EU_ATT_DATA_1 0x8824
+#define GEN6_EU_ATT_CLR_0 0x8830
+#define GEN6_EU_ATT_CLR_1 0x8834
+#define GEN6_EU_RDATA 0x8840
+
+/* 3D state:
+ */
+#define _3DOP_3DSTATE_PIPELINED 0x0
+#define _3DOP_3DSTATE_NONPIPELINED 0x1
+#define _3DOP_3DCONTROL 0x2
+#define _3DOP_3DPRIMITIVE 0x3
+
+#define _3DSTATE_PIPELINED_POINTERS 0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
+#define _3DSTATE_VERTEX_BUFFERS 0x08
+#define _3DSTATE_VERTEX_ELEMENTS 0x09
+#define _3DSTATE_INDEX_BUFFER 0x0A
+#define _3DSTATE_VF_STATISTICS 0x0B
+#define _3DSTATE_DRAWING_RECTANGLE 0x00
+#define _3DSTATE_CONSTANT_COLOR 0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
+#define _3DSTATE_CHROMA_KEY 0x04
+#define _3DSTATE_DEPTH_BUFFER 0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
+#define _3DSTATE_LINE_STIPPLE 0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
+#define _3DCONTROL 0x00
+#define _3DPRIMITIVE 0x00
+
+#define _3DPRIM_POINTLIST 0x01
+#define _3DPRIM_LINELIST 0x02
+#define _3DPRIM_LINESTRIP 0x03
+#define _3DPRIM_TRILIST 0x04
+#define _3DPRIM_TRISTRIP 0x05
+#define _3DPRIM_TRIFAN 0x06
+#define _3DPRIM_QUADLIST 0x07
+#define _3DPRIM_QUADSTRIP 0x08
+#define _3DPRIM_LINELIST_ADJ 0x09
+#define _3DPRIM_LINESTRIP_ADJ 0x0A
+#define _3DPRIM_TRILIST_ADJ 0x0B
+#define _3DPRIM_TRISTRIP_ADJ 0x0C
+#define _3DPRIM_TRISTRIP_REVERSE 0x0D
+#define _3DPRIM_POLYGON 0x0E
+#define _3DPRIM_RECTLIST 0x0F
+#define _3DPRIM_LINELOOP 0x10
+#define _3DPRIM_POINTLIST_BF 0x11
+#define _3DPRIM_LINESTRIP_CONT 0x12
+#define _3DPRIM_LINESTRIP_BF 0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
+
+#define GEN6_ANISORATIO_2 0
+#define GEN6_ANISORATIO_4 1
+#define GEN6_ANISORATIO_6 2
+#define GEN6_ANISORATIO_8 3
+#define GEN6_ANISORATIO_10 4
+#define GEN6_ANISORATIO_12 5
+#define GEN6_ANISORATIO_14 6
+#define GEN6_ANISORATIO_16 7
+
+#define GEN6_BLENDFACTOR_ONE 0x1
+#define GEN6_BLENDFACTOR_SRC_COLOR 0x2
+#define GEN6_BLENDFACTOR_SRC_ALPHA 0x3
+#define GEN6_BLENDFACTOR_DST_ALPHA 0x4
+#define GEN6_BLENDFACTOR_DST_COLOR 0x5
+#define GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
+#define GEN6_BLENDFACTOR_CONST_COLOR 0x7
+#define GEN6_BLENDFACTOR_CONST_ALPHA 0x8
+#define GEN6_BLENDFACTOR_SRC1_COLOR 0x9
+#define GEN6_BLENDFACTOR_SRC1_ALPHA 0x0A
+#define GEN6_BLENDFACTOR_ZERO 0x11
+#define GEN6_BLENDFACTOR_INV_SRC_COLOR 0x12
+#define GEN6_BLENDFACTOR_INV_SRC_ALPHA 0x13
+#define GEN6_BLENDFACTOR_INV_DST_ALPHA 0x14
+#define GEN6_BLENDFACTOR_INV_DST_COLOR 0x15
+#define GEN6_BLENDFACTOR_INV_CONST_COLOR 0x17
+#define GEN6_BLENDFACTOR_INV_CONST_ALPHA 0x18
+#define GEN6_BLENDFACTOR_INV_SRC1_COLOR 0x19
+#define GEN6_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
+
+#define GEN6_BLENDFUNCTION_ADD 0
+#define GEN6_BLENDFUNCTION_SUBTRACT 1
+#define GEN6_BLENDFUNCTION_REVERSE_SUBTRACT 2
+#define GEN6_BLENDFUNCTION_MIN 3
+#define GEN6_BLENDFUNCTION_MAX 4
+
+#define GEN6_ALPHATEST_FORMAT_UNORM8 0
+#define GEN6_ALPHATEST_FORMAT_FLOAT32 1
+
+#define GEN6_CHROMAKEY_KILL_ON_ANY_MATCH 0
+#define GEN6_CHROMAKEY_REPLACE_BLACK 1
+
+#define GEN6_CLIP_API_OGL 0
+#define GEN6_CLIP_API_DX 1
+
+#define GEN6_CLIPMODE_NORMAL 0
+#define GEN6_CLIPMODE_CLIP_ALL 1
+#define GEN6_CLIPMODE_CLIP_NON_REJECTED 2
+#define GEN6_CLIPMODE_REJECT_ALL 3
+#define GEN6_CLIPMODE_ACCEPT_ALL 4
+
+#define GEN6_CLIP_NDCSPACE 0
+#define GEN6_CLIP_SCREENSPACE 1
+
+#define GEN6_COMPAREFUNCTION_ALWAYS 0
+#define GEN6_COMPAREFUNCTION_NEVER 1
+#define GEN6_COMPAREFUNCTION_LESS 2
+#define GEN6_COMPAREFUNCTION_EQUAL 3
+#define GEN6_COMPAREFUNCTION_LEQUAL 4
+#define GEN6_COMPAREFUNCTION_GREATER 5
+#define GEN6_COMPAREFUNCTION_NOTEQUAL 6
+#define GEN6_COMPAREFUNCTION_GEQUAL 7
+
+#define GEN6_COVERAGE_PIXELS_HALF 0
+#define GEN6_COVERAGE_PIXELS_1 1
+#define GEN6_COVERAGE_PIXELS_2 2
+#define GEN6_COVERAGE_PIXELS_4 3
+
+#define GEN6_CULLMODE_BOTH 0
+#define GEN6_CULLMODE_NONE 1
+#define GEN6_CULLMODE_FRONT 2
+#define GEN6_CULLMODE_BACK 3
+
+#define GEN6_DEFAULTCOLOR_R8G8B8A8_UNORM 0
+#define GEN6_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
+
+#define GEN6_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
+#define GEN6_DEPTHFORMAT_D32_FLOAT 1
+#define GEN6_DEPTHFORMAT_D24_UNORM_S8_UINT 2
+#define GEN6_DEPTHFORMAT_D16_UNORM 5
+
+#define GEN6_FLOATING_POINT_IEEE_754 0
+#define GEN6_FLOATING_POINT_NON_IEEE_754 1
+
+#define GEN6_FRONTWINDING_CW 0
+#define GEN6_FRONTWINDING_CCW 1
+
+#define GEN6_INDEX_BYTE 0
+#define GEN6_INDEX_WORD 1
+#define GEN6_INDEX_DWORD 2
+
+#define GEN6_LOGICOPFUNCTION_CLEAR 0
+#define GEN6_LOGICOPFUNCTION_NOR 1
+#define GEN6_LOGICOPFUNCTION_AND_INVERTED 2
+#define GEN6_LOGICOPFUNCTION_COPY_INVERTED 3
+#define GEN6_LOGICOPFUNCTION_AND_REVERSE 4
+#define GEN6_LOGICOPFUNCTION_INVERT 5
+#define GEN6_LOGICOPFUNCTION_XOR 6
+#define GEN6_LOGICOPFUNCTION_NAND 7
+#define GEN6_LOGICOPFUNCTION_AND 8
+#define GEN6_LOGICOPFUNCTION_EQUIV 9
+#define GEN6_LOGICOPFUNCTION_NOOP 10
+#define GEN6_LOGICOPFUNCTION_OR_INVERTED 11
+#define GEN6_LOGICOPFUNCTION_COPY 12
+#define GEN6_LOGICOPFUNCTION_OR_REVERSE 13
+#define GEN6_LOGICOPFUNCTION_OR 14
+#define GEN6_LOGICOPFUNCTION_SET 15
+
+#define GEN6_MAPFILTER_NEAREST 0x0
+#define GEN6_MAPFILTER_LINEAR 0x1
+#define GEN6_MAPFILTER_ANISOTROPIC 0x2
+
+#define GEN6_MIPFILTER_NONE 0
+#define GEN6_MIPFILTER_NEAREST 1
+#define GEN6_MIPFILTER_LINEAR 3
+
+#define GEN6_POLYGON_FRONT_FACING 0
+#define GEN6_POLYGON_BACK_FACING 1
+
+#define GEN6_PREFILTER_ALWAYS 0x0
+#define GEN6_PREFILTER_NEVER 0x1
+#define GEN6_PREFILTER_LESS 0x2
+#define GEN6_PREFILTER_EQUAL 0x3
+#define GEN6_PREFILTER_LEQUAL 0x4
+#define GEN6_PREFILTER_GREATER 0x5
+#define GEN6_PREFILTER_NOTEQUAL 0x6
+#define GEN6_PREFILTER_GEQUAL 0x7
+
+#define GEN6_PROVOKING_VERTEX_0 0
+#define GEN6_PROVOKING_VERTEX_1 1
+#define GEN6_PROVOKING_VERTEX_2 2
+
+#define GEN6_RASTRULE_UPPER_LEFT 0
+#define GEN6_RASTRULE_UPPER_RIGHT 1
+
+#define GEN6_RENDERTARGET_CLAMPRANGE_UNORM 0
+#define GEN6_RENDERTARGET_CLAMPRANGE_SNORM 1
+#define GEN6_RENDERTARGET_CLAMPRANGE_FORMAT 2
+
+#define GEN6_STENCILOP_KEEP 0
+#define GEN6_STENCILOP_ZERO 1
+#define GEN6_STENCILOP_REPLACE 2
+#define GEN6_STENCILOP_INCRSAT 3
+#define GEN6_STENCILOP_DECRSAT 4
+#define GEN6_STENCILOP_INCR 5
+#define GEN6_STENCILOP_DECR 6
+#define GEN6_STENCILOP_INVERT 7
+
+#define GEN6_SURFACE_MIPMAPLAYOUT_BELOW 0
+#define GEN6_SURFACE_MIPMAPLAYOUT_RIGHT 1
+
+#define GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
+#define GEN6_SURFACEFORMAT_R32G32B32A32_SINT 0x001
+#define GEN6_SURFACEFORMAT_R32G32B32A32_UINT 0x002
+#define GEN6_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
+#define GEN6_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
+#define GEN6_SURFACEFORMAT_R64G64_FLOAT 0x005
+#define GEN6_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
+#define GEN6_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
+#define GEN6_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
+#define GEN6_SURFACEFORMAT_R32G32B32_FLOAT 0x040
+#define GEN6_SURFACEFORMAT_R32G32B32_SINT 0x041
+#define GEN6_SURFACEFORMAT_R32G32B32_UINT 0x042
+#define GEN6_SURFACEFORMAT_R32G32B32_UNORM 0x043
+#define GEN6_SURFACEFORMAT_R32G32B32_SNORM 0x044
+#define GEN6_SURFACEFORMAT_R32G32B32_SSCALED 0x045
+#define GEN6_SURFACEFORMAT_R32G32B32_USCALED 0x046
+#define GEN6_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
+#define GEN6_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
+#define GEN6_SURFACEFORMAT_R16G16B16A16_SINT 0x082
+#define GEN6_SURFACEFORMAT_R16G16B16A16_UINT 0x083
+#define GEN6_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
+#define GEN6_SURFACEFORMAT_R32G32_FLOAT 0x085
+#define GEN6_SURFACEFORMAT_R32G32_SINT 0x086
+#define GEN6_SURFACEFORMAT_R32G32_UINT 0x087
+#define GEN6_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
+#define GEN6_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
+#define GEN6_SURFACEFORMAT_L32A32_FLOAT 0x08A
+#define GEN6_SURFACEFORMAT_R32G32_UNORM 0x08B
+#define GEN6_SURFACEFORMAT_R32G32_SNORM 0x08C
+#define GEN6_SURFACEFORMAT_R64_FLOAT 0x08D
+#define GEN6_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
+#define GEN6_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
+#define GEN6_SURFACEFORMAT_A32X32_FLOAT 0x090
+#define GEN6_SURFACEFORMAT_L32X32_FLOAT 0x091
+#define GEN6_SURFACEFORMAT_I32X32_FLOAT 0x092
+#define GEN6_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
+#define GEN6_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
+#define GEN6_SURFACEFORMAT_R32G32_SSCALED 0x095
+#define GEN6_SURFACEFORMAT_R32G32_USCALED 0x096
+#define GEN6_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define GEN6_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
+#define GEN6_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
+#define GEN6_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
+#define GEN6_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
+#define GEN6_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
+#define GEN6_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
+#define GEN6_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
+#define GEN6_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
+#define GEN6_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
+#define GEN6_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
+#define GEN6_SURFACEFORMAT_R16G16_UNORM 0x0CC
+#define GEN6_SURFACEFORMAT_R16G16_SNORM 0x0CD
+#define GEN6_SURFACEFORMAT_R16G16_SINT 0x0CE
+#define GEN6_SURFACEFORMAT_R16G16_UINT 0x0CF
+#define GEN6_SURFACEFORMAT_R16G16_FLOAT 0x0D0
+#define GEN6_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
+#define GEN6_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
+#define GEN6_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
+#define GEN6_SURFACEFORMAT_R32_SINT 0x0D6
+#define GEN6_SURFACEFORMAT_R32_UINT 0x0D7
+#define GEN6_SURFACEFORMAT_R32_FLOAT 0x0D8
+#define GEN6_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
+#define GEN6_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
+#define GEN6_SURFACEFORMAT_L16A16_UNORM 0x0DF
+#define GEN6_SURFACEFORMAT_I24X8_UNORM 0x0E0
+#define GEN6_SURFACEFORMAT_L24X8_UNORM 0x0E1
+#define GEN6_SURFACEFORMAT_A24X8_UNORM 0x0E2
+#define GEN6_SURFACEFORMAT_I32_FLOAT 0x0E3
+#define GEN6_SURFACEFORMAT_L32_FLOAT 0x0E4
+#define GEN6_SURFACEFORMAT_A32_FLOAT 0x0E5
+#define GEN6_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
+#define GEN6_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
+#define GEN6_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
+#define GEN6_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
+#define GEN6_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
+#define GEN6_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
+#define GEN6_SURFACEFORMAT_L16A16_FLOAT 0x0F0
+#define GEN6_SURFACEFORMAT_R32_UNORM 0x0F1
+#define GEN6_SURFACEFORMAT_R32_SNORM 0x0F2
+#define GEN6_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
+#define GEN6_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
+#define GEN6_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
+#define GEN6_SURFACEFORMAT_R16G16_SSCALED 0x0F6
+#define GEN6_SURFACEFORMAT_R16G16_USCALED 0x0F7
+#define GEN6_SURFACEFORMAT_R32_SSCALED 0x0F8
+#define GEN6_SURFACEFORMAT_R32_USCALED 0x0F9
+#define GEN6_SURFACEFORMAT_B5G6R5_UNORM 0x100
+#define GEN6_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
+#define GEN6_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
+#define GEN6_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
+#define GEN6_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
+#define GEN6_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
+#define GEN6_SURFACEFORMAT_R8G8_UNORM 0x106
+#define GEN6_SURFACEFORMAT_R8G8_SNORM 0x107
+#define GEN6_SURFACEFORMAT_R8G8_SINT 0x108
+#define GEN6_SURFACEFORMAT_R8G8_UINT 0x109
+#define GEN6_SURFACEFORMAT_R16_UNORM 0x10A
+#define GEN6_SURFACEFORMAT_R16_SNORM 0x10B
+#define GEN6_SURFACEFORMAT_R16_SINT 0x10C
+#define GEN6_SURFACEFORMAT_R16_UINT 0x10D
+#define GEN6_SURFACEFORMAT_R16_FLOAT 0x10E
+#define GEN6_SURFACEFORMAT_I16_UNORM 0x111
+#define GEN6_SURFACEFORMAT_L16_UNORM 0x112
+#define GEN6_SURFACEFORMAT_A16_UNORM 0x113
+#define GEN6_SURFACEFORMAT_L8A8_UNORM 0x114
+#define GEN6_SURFACEFORMAT_I16_FLOAT 0x115
+#define GEN6_SURFACEFORMAT_L16_FLOAT 0x116
+#define GEN6_SURFACEFORMAT_A16_FLOAT 0x117
+#define GEN6_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define GEN6_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define GEN6_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
+#define GEN6_SURFACEFORMAT_R8G8_SSCALED 0x11C
+#define GEN6_SURFACEFORMAT_R8G8_USCALED 0x11D
+#define GEN6_SURFACEFORMAT_R16_SSCALED 0x11E
+#define GEN6_SURFACEFORMAT_R16_USCALED 0x11F
+#define GEN6_SURFACEFORMAT_R8_UNORM 0x140
+#define GEN6_SURFACEFORMAT_R8_SNORM 0x141
+#define GEN6_SURFACEFORMAT_R8_SINT 0x142
+#define GEN6_SURFACEFORMAT_R8_UINT 0x143
+#define GEN6_SURFACEFORMAT_A8_UNORM 0x144
+#define GEN6_SURFACEFORMAT_I8_UNORM 0x145
+#define GEN6_SURFACEFORMAT_L8_UNORM 0x146
+#define GEN6_SURFACEFORMAT_P4A4_UNORM 0x147
+#define GEN6_SURFACEFORMAT_A4P4_UNORM 0x148
+#define GEN6_SURFACEFORMAT_R8_SSCALED 0x149
+#define GEN6_SURFACEFORMAT_R8_USCALED 0x14A
+#define GEN6_SURFACEFORMAT_R1_UINT 0x181
+#define GEN6_SURFACEFORMAT_YCRCB_NORMAL 0x182
+#define GEN6_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
+#define GEN6_SURFACEFORMAT_BC1_UNORM 0x186
+#define GEN6_SURFACEFORMAT_BC2_UNORM 0x187
+#define GEN6_SURFACEFORMAT_BC3_UNORM 0x188
+#define GEN6_SURFACEFORMAT_BC4_UNORM 0x189
+#define GEN6_SURFACEFORMAT_BC5_UNORM 0x18A
+#define GEN6_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
+#define GEN6_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
+#define GEN6_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
+#define GEN6_SURFACEFORMAT_MONO8 0x18E
+#define GEN6_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
+#define GEN6_SURFACEFORMAT_YCRCB_SWAPY 0x190
+#define GEN6_SURFACEFORMAT_DXT1_RGB 0x191
+#define GEN6_SURFACEFORMAT_FXT1 0x192
+#define GEN6_SURFACEFORMAT_R8G8B8_UNORM 0x193
+#define GEN6_SURFACEFORMAT_R8G8B8_SNORM 0x194
+#define GEN6_SURFACEFORMAT_R8G8B8_SSCALED 0x195
+#define GEN6_SURFACEFORMAT_R8G8B8_USCALED 0x196
+#define GEN6_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
+#define GEN6_SURFACEFORMAT_R64G64B64_FLOAT 0x198
+#define GEN6_SURFACEFORMAT_BC4_SNORM 0x199
+#define GEN6_SURFACEFORMAT_BC5_SNORM 0x19A
+#define GEN6_SURFACEFORMAT_R16G16B16_UNORM 0x19C
+#define GEN6_SURFACEFORMAT_R16G16B16_SNORM 0x19D
+#define GEN6_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
+#define GEN6_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+
+#define GEN6_SURFACERETURNFORMAT_FLOAT32 0
+#define GEN6_SURFACERETURNFORMAT_S1 1
+
+#define GEN6_SURFACE_1D 0
+#define GEN6_SURFACE_2D 1
+#define GEN6_SURFACE_3D 2
+#define GEN6_SURFACE_CUBE 3
+#define GEN6_SURFACE_BUFFER 4
+#define GEN6_SURFACE_NULL 7
+
+#define GEN6_BORDER_COLOR_MODE_DEFAULT 0
+#define GEN6_BORDER_COLOR_MODE_LEGACY 1
+
+#define GEN6_TEXCOORDMODE_WRAP 0
+#define GEN6_TEXCOORDMODE_MIRROR 1
+#define GEN6_TEXCOORDMODE_CLAMP 2
+#define GEN6_TEXCOORDMODE_CUBE 3
+#define GEN6_TEXCOORDMODE_CLAMP_BORDER 4
+#define GEN6_TEXCOORDMODE_MIRROR_ONCE 5
+
+#define GEN6_THREAD_PRIORITY_NORMAL 0
+#define GEN6_THREAD_PRIORITY_HIGH 1
+
+#define GEN6_TILEWALK_XMAJOR 0
+#define GEN6_TILEWALK_YMAJOR 1
+
+#define GEN6_VERTEX_SUBPIXEL_PRECISION_8BITS 0
+#define GEN6_VERTEX_SUBPIXEL_PRECISION_4BITS 1
+
+#define GEN6_VERTEXBUFFER_ACCESS_VERTEXDATA 0
+#define GEN6_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
+
+#define GEN6_VFCOMPONENT_NOSTORE 0
+#define GEN6_VFCOMPONENT_STORE_SRC 1
+#define GEN6_VFCOMPONENT_STORE_0 2
+#define GEN6_VFCOMPONENT_STORE_1_FLT 3
+#define GEN6_VFCOMPONENT_STORE_1_INT 4
+#define GEN6_VFCOMPONENT_STORE_VID 5
+#define GEN6_VFCOMPONENT_STORE_IID 6
+#define GEN6_VFCOMPONENT_STORE_PID 7
+
+
+
+/* Execution Unit (EU) defines
+ */
+
+#define GEN6_ALIGN_1 0
+#define GEN6_ALIGN_16 1
+
+#define GEN6_ADDRESS_DIRECT 0
+#define GEN6_ADDRESS_REGISTER_INDIRECT_REGISTER 1
+
+#define GEN6_CHANNEL_X 0
+#define GEN6_CHANNEL_Y 1
+#define GEN6_CHANNEL_Z 2
+#define GEN6_CHANNEL_W 3
+
+#define GEN6_COMPRESSION_NONE 0
+#define GEN6_COMPRESSION_2NDHALF 1
+#define GEN6_COMPRESSION_COMPRESSED 2
+
+#define GEN6_CONDITIONAL_NONE 0
+#define GEN6_CONDITIONAL_Z 1
+#define GEN6_CONDITIONAL_NZ 2
+#define GEN6_CONDITIONAL_EQ 1 /* Z */
+#define GEN6_CONDITIONAL_NEQ 2 /* NZ */
+#define GEN6_CONDITIONAL_G 3
+#define GEN6_CONDITIONAL_GE 4
+#define GEN6_CONDITIONAL_L 5
+#define GEN6_CONDITIONAL_LE 6
+#define GEN6_CONDITIONAL_C 7
+#define GEN6_CONDITIONAL_O 8
+
+#define GEN6_DEBUG_NONE 0
+#define GEN6_DEBUG_BREAKPOINT 1
+
+#define GEN6_DEPENDENCY_NORMAL 0
+#define GEN6_DEPENDENCY_NOTCLEARED 1
+#define GEN6_DEPENDENCY_NOTCHECKED 2
+#define GEN6_DEPENDENCY_DISABLE 3
+
+#define GEN6_EXECUTE_1 0
+#define GEN6_EXECUTE_2 1
+#define GEN6_EXECUTE_4 2
+#define GEN6_EXECUTE_8 3
+#define GEN6_EXECUTE_16 4
+#define GEN6_EXECUTE_32 5
+
+#define GEN6_HORIZONTAL_STRIDE_0 0
+#define GEN6_HORIZONTAL_STRIDE_1 1
+#define GEN6_HORIZONTAL_STRIDE_2 2
+#define GEN6_HORIZONTAL_STRIDE_4 3
+
+#define GEN6_INSTRUCTION_NORMAL 0
+#define GEN6_INSTRUCTION_SATURATE 1
+
+#define GEN6_MASK_ENABLE 0
+#define GEN6_MASK_DISABLE 1
+
+#define GEN6_OPCODE_MOV 1
+#define GEN6_OPCODE_SEL 2
+#define GEN6_OPCODE_NOT 4
+#define GEN6_OPCODE_AND 5
+#define GEN6_OPCODE_OR 6
+#define GEN6_OPCODE_XOR 7
+#define GEN6_OPCODE_SHR 8
+#define GEN6_OPCODE_SHL 9
+#define GEN6_OPCODE_RSR 10
+#define GEN6_OPCODE_RSL 11
+#define GEN6_OPCODE_ASR 12
+#define GEN6_OPCODE_CMP 16
+#define GEN6_OPCODE_JMPI 32
+#define GEN6_OPCODE_IF 34
+#define GEN6_OPCODE_IFF 35
+#define GEN6_OPCODE_ELSE 36
+#define GEN6_OPCODE_ENDIF 37
+#define GEN6_OPCODE_DO 38
+#define GEN6_OPCODE_WHILE 39
+#define GEN6_OPCODE_BREAK 40
+#define GEN6_OPCODE_CONTINUE 41
+#define GEN6_OPCODE_HALT 42
+#define GEN6_OPCODE_MSAVE 44
+#define GEN6_OPCODE_MRESTORE 45
+#define GEN6_OPCODE_PUSH 46
+#define GEN6_OPCODE_POP 47
+#define GEN6_OPCODE_WAIT 48
+#define GEN6_OPCODE_SEND 49
+#define GEN6_OPCODE_ADD 64
+#define GEN6_OPCODE_MUL 65
+#define GEN6_OPCODE_AVG 66
+#define GEN6_OPCODE_FRC 67
+#define GEN6_OPCODE_RNDU 68
+#define GEN6_OPCODE_RNDD 69
+#define GEN6_OPCODE_RNDE 70
+#define GEN6_OPCODE_RNDZ 71
+#define GEN6_OPCODE_MAC 72
+#define GEN6_OPCODE_MACH 73
+#define GEN6_OPCODE_LZD 74
+#define GEN6_OPCODE_SAD2 80
+#define GEN6_OPCODE_SADA2 81
+#define GEN6_OPCODE_DP4 84
+#define GEN6_OPCODE_DPH 85
+#define GEN6_OPCODE_DP3 86
+#define GEN6_OPCODE_DP2 87
+#define GEN6_OPCODE_DPA2 88
+#define GEN6_OPCODE_LINE 89
+#define GEN6_OPCODE_NOP 126
+
+#define GEN6_PREDICATE_NONE 0
+#define GEN6_PREDICATE_NORMAL 1
+#define GEN6_PREDICATE_ALIGN1_ANYV 2
+#define GEN6_PREDICATE_ALIGN1_ALLV 3
+#define GEN6_PREDICATE_ALIGN1_ANY2H 4
+#define GEN6_PREDICATE_ALIGN1_ALL2H 5
+#define GEN6_PREDICATE_ALIGN1_ANY4H 6
+#define GEN6_PREDICATE_ALIGN1_ALL4H 7
+#define GEN6_PREDICATE_ALIGN1_ANY8H 8
+#define GEN6_PREDICATE_ALIGN1_ALL8H 9
+#define GEN6_PREDICATE_ALIGN1_ANY16H 10
+#define GEN6_PREDICATE_ALIGN1_ALL16H 11
+#define GEN6_PREDICATE_ALIGN16_REPLICATE_X 2
+#define GEN6_PREDICATE_ALIGN16_REPLICATE_Y 3
+#define GEN6_PREDICATE_ALIGN16_REPLICATE_Z 4
+#define GEN6_PREDICATE_ALIGN16_REPLICATE_W 5
+#define GEN6_PREDICATE_ALIGN16_ANY4H 6
+#define GEN6_PREDICATE_ALIGN16_ALL4H 7
+
+#define GEN6_ARCHITECTURE_REGISTER_FILE 0
+#define GEN6_GENERAL_REGISTER_FILE 1
+#define GEN6_MESSAGE_REGISTER_FILE 2
+#define GEN6_IMMEDIATE_VALUE 3
+
+#define GEN6_REGISTER_TYPE_UD 0
+#define GEN6_REGISTER_TYPE_D 1
+#define GEN6_REGISTER_TYPE_UW 2
+#define GEN6_REGISTER_TYPE_W 3
+#define GEN6_REGISTER_TYPE_UB 4
+#define GEN6_REGISTER_TYPE_B 5
+#define GEN6_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
+#define GEN6_REGISTER_TYPE_HF 6
+#define GEN6_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
+#define GEN6_REGISTER_TYPE_F 7
+
+#define GEN6_ARF_NULL 0x00
+#define GEN6_ARF_ADDRESS 0x10
+#define GEN6_ARF_ACCUMULATOR 0x20
+#define GEN6_ARF_FLAG 0x30
+#define GEN6_ARF_MASK 0x40
+#define GEN6_ARF_MASK_STACK 0x50
+#define GEN6_ARF_MASK_STACK_DEPTH 0x60
+#define GEN6_ARF_STATE 0x70
+#define GEN6_ARF_CONTROL 0x80
+#define GEN6_ARF_NOTIFICATION_COUNT 0x90
+#define GEN6_ARF_IP 0xA0
+
+#define GEN6_AMASK 0
+#define GEN6_IMASK 1
+#define GEN6_LMASK 2
+#define GEN6_CMASK 3
+
+
+
+#define GEN6_THREAD_NORMAL 0
+#define GEN6_THREAD_ATOMIC 1
+#define GEN6_THREAD_SWITCH 2
+
+#define GEN6_VERTICAL_STRIDE_0 0
+#define GEN6_VERTICAL_STRIDE_1 1
+#define GEN6_VERTICAL_STRIDE_2 2
+#define GEN6_VERTICAL_STRIDE_4 3
+#define GEN6_VERTICAL_STRIDE_8 4
+#define GEN6_VERTICAL_STRIDE_16 5
+#define GEN6_VERTICAL_STRIDE_32 6
+#define GEN6_VERTICAL_STRIDE_64 7
+#define GEN6_VERTICAL_STRIDE_128 8
+#define GEN6_VERTICAL_STRIDE_256 9
+#define GEN6_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
+
+#define GEN6_WIDTH_1 0
+#define GEN6_WIDTH_2 1
+#define GEN6_WIDTH_4 2
+#define GEN6_WIDTH_8 3
+#define GEN6_WIDTH_16 4
+
+#define GEN6_STATELESS_BUFFER_BOUNDARY_1K 0
+#define GEN6_STATELESS_BUFFER_BOUNDARY_2K 1
+#define GEN6_STATELESS_BUFFER_BOUNDARY_4K 2
+#define GEN6_STATELESS_BUFFER_BOUNDARY_8K 3
+#define GEN6_STATELESS_BUFFER_BOUNDARY_16K 4
+#define GEN6_STATELESS_BUFFER_BOUNDARY_32K 5
+#define GEN6_STATELESS_BUFFER_BOUNDARY_64K 6
+#define GEN6_STATELESS_BUFFER_BOUNDARY_128K 7
+#define GEN6_STATELESS_BUFFER_BOUNDARY_256K 8
+#define GEN6_STATELESS_BUFFER_BOUNDARY_512K 9
+#define GEN6_STATELESS_BUFFER_BOUNDARY_1M 10
+#define GEN6_STATELESS_BUFFER_BOUNDARY_2M 11
+
+#define GEN6_POLYGON_FACING_FRONT 0
+#define GEN6_POLYGON_FACING_BACK 1
+
+#define GEN6_MESSAGE_TARGET_NULL 0
+#define GEN6_MESSAGE_TARGET_MATH 1
+#define GEN6_MESSAGE_TARGET_SAMPLER 2
+#define GEN6_MESSAGE_TARGET_GATEWAY 3
+#define GEN6_MESSAGE_TARGET_DATAPORT_READ 4
+#define GEN6_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define GEN6_MESSAGE_TARGET_URB 6
+#define GEN6_MESSAGE_TARGET_THREAD_SPAWNER 7
+
+#define GEN6_SAMPLER_RETURN_FORMAT_FLOAT32 0
+#define GEN6_SAMPLER_RETURN_FORMAT_UINT32 2
+#define GEN6_SAMPLER_RETURN_FORMAT_SINT32 3
+
+#define GEN6_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
+#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
+#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
+#define GEN6_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
+#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
+#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
+#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
+#define GEN6_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
+#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
+#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define GEN6_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
+#define GEN6_SAMPLER_MESSAGE_SIMD8_RESINFO 2
+#define GEN6_SAMPLER_MESSAGE_SIMD16_RESINFO 2
+#define GEN6_SAMPLER_MESSAGE_SIMD4X2_LD 3
+#define GEN6_SAMPLER_MESSAGE_SIMD8_LD 3
+#define GEN6_SAMPLER_MESSAGE_SIMD16_LD 3
+
+#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
+#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
+#define GEN6_DATAPORT_OWORD_BLOCK_2_OWORDS 2
+#define GEN6_DATAPORT_OWORD_BLOCK_4_OWORDS 3
+#define GEN6_DATAPORT_OWORD_BLOCK_8_OWORDS 4
+
+#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
+#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
+
+#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
+#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
+#define GEN6_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+
+#define GEN6_DATAPORT_READ_TARGET_DATA_CACHE 0
+#define GEN6_DATAPORT_READ_TARGET_RENDER_CACHE 1
+#define GEN6_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
+
+#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
+#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
+#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
+#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
+#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
+
+#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
+#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
+#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
+#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
+#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
+#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
+#define GEN6_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+
+#define GEN6_MATH_FUNCTION_INV 1
+#define GEN6_MATH_FUNCTION_LOG 2
+#define GEN6_MATH_FUNCTION_EXP 3
+#define GEN6_MATH_FUNCTION_SQRT 4
+#define GEN6_MATH_FUNCTION_RSQ 5
+#define GEN6_MATH_FUNCTION_SIN 6 /* was 7 */
+#define GEN6_MATH_FUNCTION_COS 7 /* was 8 */
+#define GEN6_MATH_FUNCTION_SINCOS 8 /* was 6 */
+#define GEN6_MATH_FUNCTION_TAN 9
+#define GEN6_MATH_FUNCTION_POW 10
+#define GEN6_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
+#define GEN6_MATH_FUNCTION_INT_DIV_QUOTIENT 12
+#define GEN6_MATH_FUNCTION_INT_DIV_REMAINDER 13
+
+#define GEN6_MATH_INTEGER_UNSIGNED 0
+#define GEN6_MATH_INTEGER_SIGNED 1
+
+#define GEN6_MATH_PRECISION_FULL 0
+#define GEN6_MATH_PRECISION_PARTIAL 1
+
+#define GEN6_MATH_SATURATE_NONE 0
+#define GEN6_MATH_SATURATE_SATURATE 1
+
+#define GEN6_MATH_DATA_VECTOR 0
+#define GEN6_MATH_DATA_SCALAR 1
+
+#define GEN6_URB_OPCODE_WRITE 0
+
+#define GEN6_URB_SWIZZLE_NONE 0
+#define GEN6_URB_SWIZZLE_INTERLEAVE 1
+#define GEN6_URB_SWIZZLE_TRANSPOSE 2
+
+#define GEN6_SCRATCH_SPACE_SIZE_1K 0
+#define GEN6_SCRATCH_SPACE_SIZE_2K 1
+#define GEN6_SCRATCH_SPACE_SIZE_4K 2
+#define GEN6_SCRATCH_SPACE_SIZE_8K 3
+#define GEN6_SCRATCH_SPACE_SIZE_16K 4
+#define GEN6_SCRATCH_SPACE_SIZE_32K 5
+#define GEN6_SCRATCH_SPACE_SIZE_64K 6
+#define GEN6_SCRATCH_SPACE_SIZE_128K 7
+#define GEN6_SCRATCH_SPACE_SIZE_256K 8
+#define GEN6_SCRATCH_SPACE_SIZE_512K 9
+#define GEN6_SCRATCH_SPACE_SIZE_1M 10
+#define GEN6_SCRATCH_SPACE_SIZE_2M 11
+
+/* The hardware supports two different modes for border color. The
+ * default (OpenGL) mode uses floating-point color channels, while the
+ * legacy mode uses 4 bytes.
+ *
+ * More significantly, the legacy mode respects the components of the
+ * border color for channels not present in the source, (whereas the
+ * default mode will ignore the border color's alpha channel and use
+ * alpha==1 for an RGB source, for example).
+ *
+ * The legacy mode matches the semantics specified by the Render
+ * extension.
+ */
+struct gen6_sampler_default_border_color {
+ float color[4];
+};
+
+struct gen6_sampler_legacy_border_color {
+ uint8_t color[4];
+};
+
+struct gen6_sampler_state {
+ struct {
+ uint32_t shadow_function:3;
+ uint32_t lod_bias:11;
+ uint32_t min_filter:3;
+ uint32_t mag_filter:3;
+ uint32_t mip_filter:2;
+ uint32_t base_level:5;
+ uint32_t pad:1;
+ uint32_t lod_preclamp:1;
+ uint32_t border_color_mode:1;
+ uint32_t pad0:1;
+ uint32_t disable:1;
+ } ss0;
+
+ struct {
+ uint32_t r_wrap_mode:3;
+ uint32_t t_wrap_mode:3;
+ uint32_t s_wrap_mode:3;
+ uint32_t pad:3;
+ uint32_t max_lod:10;
+ uint32_t min_lod:10;
+ } ss1;
+
+ struct {
+ uint32_t border_color;
+ } ss2;
+
+ struct {
+ uint32_t pad:19;
+ uint32_t max_aniso:3;
+ uint32_t chroma_key_mode:1;
+ uint32_t chroma_key_index:2;
+ uint32_t chroma_key_enable:1;
+ uint32_t monochrome_filter_width:3;
+ uint32_t monochrome_filter_height:3;
+ } ss3;
+};
+
+struct gen6_blend_state {
+ struct {
+ uint32_t dest_blend_factor:5;
+ uint32_t source_blend_factor:5;
+ uint32_t pad3:1;
+ uint32_t blend_func:3;
+ uint32_t pad2:1;
+ uint32_t ia_dest_blend_factor:5;
+ uint32_t ia_source_blend_factor:5;
+ uint32_t pad1:1;
+ uint32_t ia_blend_func:3;
+ uint32_t pad0:1;
+ uint32_t ia_blend_enable:1;
+ uint32_t blend_enable:1;
+ } blend0;
+
+ struct {
+ uint32_t post_blend_clamp_enable:1;
+ uint32_t pre_blend_clamp_enable:1;
+ uint32_t clamp_range:2;
+ uint32_t pad0:4;
+ uint32_t x_dither_offset:2;
+ uint32_t y_dither_offset:2;
+ uint32_t dither_enable:1;
+ uint32_t alpha_test_func:3;
+ uint32_t alpha_test_enable:1;
+ uint32_t pad1:1;
+ uint32_t logic_op_func:4;
+ uint32_t logic_op_enable:1;
+ uint32_t pad2:1;
+ uint32_t write_disable_b:1;
+ uint32_t write_disable_g:1;
+ uint32_t write_disable_r:1;
+ uint32_t write_disable_a:1;
+ uint32_t pad3:1;
+ uint32_t alpha_to_coverage_dither:1;
+ uint32_t alpha_to_one:1;
+ uint32_t alpha_to_coverage:1;
+ } blend1;
+};
+
+struct gen6_color_calc_state {
+ struct {
+ uint32_t alpha_test_format:1;
+ uint32_t pad0:14;
+ uint32_t round_disable:1;
+ uint32_t bf_stencil_ref:8;
+ uint32_t stencil_ref:8;
+ } cc0;
+
+ union {
+ float alpha_ref_f;
+ struct {
+ uint32_t ui:8;
+ uint32_t pad0:24;
+ } alpha_ref_fi;
+ } cc1;
+
+ float constant_r;
+ float constant_g;
+ float constant_b;
+ float constant_a;
+};
+
+struct gen6_depth_stencil_state {
+ struct {
+ uint32_t pad0:3;
+ uint32_t bf_stencil_pass_depth_pass_op:3;
+ uint32_t bf_stencil_pass_depth_fail_op:3;
+ uint32_t bf_stencil_fail_op:3;
+ uint32_t bf_stencil_func:3;
+ uint32_t bf_stencil_enable:1;
+ uint32_t pad1:2;
+ uint32_t stencil_write_enable:1;
+ uint32_t stencil_pass_depth_pass_op:3;
+ uint32_t stencil_pass_depth_fail_op:3;
+ uint32_t stencil_fail_op:3;
+ uint32_t stencil_func:3;
+ uint32_t stencil_enable:1;
+ } ds0;
+
+ struct {
+ uint32_t bf_stencil_write_mask:8;
+ uint32_t bf_stencil_test_mask:8;
+ uint32_t stencil_write_mask:8;
+ uint32_t stencil_test_mask:8;
+ } ds1;
+
+ struct {
+ uint32_t pad0:26;
+ uint32_t depth_write_enable:1;
+ uint32_t depth_test_func:3;
+ uint32_t pad1:1;
+ uint32_t depth_test_enable:1;
+ } ds2;
+};
+
+struct gen6_surface_state {
+ struct {
+ uint32_t cube_pos_z:1;
+ uint32_t cube_neg_z:1;
+ uint32_t cube_pos_y:1;
+ uint32_t cube_neg_y:1;
+ uint32_t cube_pos_x:1;
+ uint32_t cube_neg_x:1;
+ uint32_t pad:3;
+ uint32_t render_cache_read_mode:1;
+ uint32_t mipmap_layout_mode:1;
+ uint32_t vert_line_stride_ofs:1;
+ uint32_t vert_line_stride:1;
+ uint32_t color_blend:1;
+ uint32_t writedisable_blue:1;
+ uint32_t writedisable_green:1;
+ uint32_t writedisable_red:1;
+ uint32_t writedisable_alpha:1;
+ uint32_t surface_format:9;
+ uint32_t data_return_format:1;
+ uint32_t pad0:1;
+ uint32_t surface_type:3;
+ } ss0;
+
+ struct {
+ uint32_t base_addr;
+ } ss1;
+
+ struct {
+ uint32_t render_target_rotation:2;
+ uint32_t mip_count:4;
+ uint32_t width:13;
+ uint32_t height:13;
+ } ss2;
+
+ struct {
+ uint32_t tile_walk:1;
+ uint32_t tiled_surface:1;
+ uint32_t pad:1;
+ uint32_t pitch:18;
+ uint32_t depth:11;
+ } ss3;
+
+ struct {
+ uint32_t pad:19;
+ uint32_t min_array_elt:9;
+ uint32_t min_lod:4;
+ } ss4;
+
+ struct {
+ uint32_t pad:20;
+ uint32_t y_offset:4;
+ uint32_t pad2:1;
+ uint32_t x_offset:7;
+ } ss5;
+};
+
+/* Surface state DW0 */
+#define GEN6_SURFACE_RC_READ_WRITE (1 << 8)
+#define GEN6_SURFACE_MIPLAYOUT_SHIFT 10
+#define GEN6_SURFACE_MIPMAPLAYOUT_BELOW 0
+#define GEN6_SURFACE_MIPMAPLAYOUT_RIGHT 1
+#define GEN6_SURFACE_CUBEFACE_ENABLES 0x3f
+#define GEN6_SURFACE_BLEND_ENABLED (1 << 13)
+#define GEN6_SURFACE_WRITEDISABLE_B_SHIFT 14
+#define GEN6_SURFACE_WRITEDISABLE_G_SHIFT 15
+#define GEN6_SURFACE_WRITEDISABLE_R_SHIFT 16
+#define GEN6_SURFACE_WRITEDISABLE_A_SHIFT 17
+#define GEN6_SURFACE_FORMAT_SHIFT 18
+#define GEN6_SURFACE_FORMAT_MASK INTEL_MASK(26, 18)
+
+#define GEN6_SURFACE_TYPE_SHIFT 29
+#define GEN6_SURFACE_TYPE_MASK GEN6_MASK(31, 29)
+#define GEN6_SURFACE_1D 0
+#define GEN6_SURFACE_2D 1
+#define GEN6_SURFACE_3D 2
+#define GEN6_SURFACE_CUBE 3
+#define GEN6_SURFACE_BUFFER 4
+#define GEN6_SURFACE_NULL 7
+
+/* Surface state DW2 */
+#define GEN6_SURFACE_HEIGHT_SHIFT 19
+#define GEN6_SURFACE_HEIGHT_MASK GEN6_MASK(31, 19)
+#define GEN6_SURFACE_WIDTH_SHIFT 6
+#define GEN6_SURFACE_WIDTH_MASK GEN6_MASK(18, 6)
+#define GEN6_SURFACE_LOD_SHIFT 2
+#define GEN6_SURFACE_LOD_MASK GEN6_MASK(5, 2)
+
+/* Surface state DW3 */
+#define GEN6_SURFACE_DEPTH_SHIFT 21
+#define GEN6_SURFACE_DEPTH_MASK GEN6_MASK(31, 21)
+#define GEN6_SURFACE_PITCH_SHIFT 3
+#define GEN6_SURFACE_PITCH_MASK GEN6_MASK(19, 3)
+#define GEN6_SURFACE_TILED (1 << 1)
+#define GEN6_SURFACE_TILED_Y (1 << 0)
+
+/* Surface state DW4 */
+#define GEN6_SURFACE_MIN_LOD_SHIFT 28
+#define GEN6_SURFACE_MIN_LOD_MASK GEN6_MASK(31, 28)
+
+/* Surface state DW5 */
+#define GEN6_SURFACE_X_OFFSET_SHIFT 25
+#define GEN6_SURFACE_X_OFFSET_MASK GEN6_MASK(31, 25)
+#define GEN6_SURFACE_Y_OFFSET_SHIFT 20
+#define GEN6_SURFACE_Y_OFFSET_MASK GEN6_MASK(23, 20)
+
+struct gen6_cc_viewport {
+ float min_depth;
+ float max_depth;
+};
+
+typedef enum {
+ SAMPLER_FILTER_NEAREST = 0,
+ SAMPLER_FILTER_BILINEAR,
+ FILTER_COUNT
+} sampler_filter_t;
+
+typedef enum {
+ SAMPLER_EXTEND_NONE = 0,
+ SAMPLER_EXTEND_REPEAT,
+ SAMPLER_EXTEND_PAD,
+ SAMPLER_EXTEND_REFLECT,
+ EXTEND_COUNT
+} sampler_extend_t;
+
+#endif
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
new file mode 100644
index 00000000..0dee6e55
--- /dev/null
+++ b/src/sna/kgem.c
@@ -0,0 +1,1775 @@
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_reg.h"
+
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <errno.h>
+#include <fcntl.h>
+
+static inline void list_move(struct list *list, struct list *head)
+{
+ __list_del(list->prev, list->next);
+ list_add(list, head);
+}
+
+static inline void list_replace(struct list *old,
+ struct list *new)
+{
+ new->next = old->next;
+ new->next->prev = new;
+ new->prev = old->prev;
+ new->prev->next = new;
+}
+
+#define list_last_entry(ptr, type, member) \
+ list_entry((ptr)->prev, type, member)
+
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+
+#define DBG_NO_HW 0
+#define DBG_NO_VMAP 0
+#define DBG_NO_RELAXED_FENCING 0
+#define DBG_DUMP 0
+
+#if DEBUG_KGEM
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+#define PAGE_SIZE 4096
+
+struct kgem_partial_bo {
+ struct kgem_bo base;
+ uint32_t used, alloc;
+ uint32_t need_io : 1;
+ uint32_t write : 1;
+};
+
+static struct drm_i915_gem_exec_object2 _kgem_dummy_exec;
+
+static int gem_set_tiling(int fd, uint32_t handle, int tiling, int stride)
+{
+ struct drm_i915_gem_set_tiling set_tiling;
+ int ret;
+
+ do {
+ set_tiling.handle = handle;
+ set_tiling.tiling_mode = tiling;
+ set_tiling.stride = stride;
+
+ ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
+ } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
+ return set_tiling.tiling_mode;
+}
+
+static void *gem_mmap(int fd, uint32_t handle, int size, int prot)
+{
+ struct drm_i915_gem_mmap_gtt mmap_arg;
+ void *ptr;
+
+ DBG(("%s(handle=%d, size=%d, prot=%s)\n", __FUNCTION__,
+ handle, size, prot & PROT_WRITE ? "read/write" : "read-only"));
+
+ mmap_arg.handle = handle;
+ if (drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg)) {
+ assert(0);
+ return NULL;
+ }
+
+ ptr = mmap(0, size, prot, MAP_SHARED, fd, mmap_arg.offset);
+ if (ptr == MAP_FAILED) {
+ assert(0);
+ ptr = NULL;
+ }
+
+ return ptr;
+}
+
+static int gem_write(int fd, uint32_t handle,
+ int offset, int length,
+ const void *src)
+{
+ struct drm_i915_gem_pwrite pwrite;
+
+ DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
+ handle, offset, length));
+
+ pwrite.handle = handle;
+ pwrite.offset = offset;
+ pwrite.size = length;
+ pwrite.data_ptr = (uintptr_t)src;
+ return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
+}
+
+static int gem_read(int fd, uint32_t handle, const void *dst, int length)
+{
+ struct drm_i915_gem_pread pread;
+
+ DBG(("%s(handle=%d, len=%d)\n", __FUNCTION__,
+ handle, length));
+
+ pread.handle = handle;
+ pread.offset = 0;
+ pread.size = length;
+ pread.data_ptr = (uintptr_t)dst;
+ return drmIoctl(fd, DRM_IOCTL_I915_GEM_PREAD, &pread);
+}
+
+Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
+ const void *data, int length)
+{
+ if (gem_write(kgem->fd, bo->handle, 0, length, data))
+ return FALSE;
+
+ _kgem_retire(kgem);
+ return TRUE;
+}
+
+static uint32_t gem_create(int fd, int size)
+{
+ struct drm_i915_gem_create create;
+
+#if DEBUG_KGEM
+ assert((size & (PAGE_SIZE-1)) == 0);
+#endif
+
+ create.handle = 0;
+ create.size = size;
+ (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
+
+ return create.handle;
+}
+
+static bool
+kgem_busy(struct kgem *kgem, int handle)
+{
+ struct drm_i915_gem_busy busy;
+
+ busy.handle = handle;
+ busy.busy = !kgem->wedged;
+ (void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
+
+ return busy.busy;
+}
+
+static bool
+gem_madvise(int fd, uint32_t handle, uint32_t state)
+{
+ struct drm_i915_gem_madvise madv;
+ int ret;
+
+ madv.handle = handle;
+ madv.madv = state;
+ madv.retained = 1;
+ ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
+ assert(ret == 0);
+
+ return madv.retained;
+ (void)ret;
+}
+
+static void gem_close(int fd, uint32_t handle)
+{
+ struct drm_gem_close close;
+
+ close.handle = handle;
+ (void)drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
+}
+
+static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo,
+ int handle, int size)
+{
+ memset(bo, 0, sizeof(*bo));
+
+ bo->refcnt = 1;
+ bo->handle = handle;
+ bo->aperture_size = bo->size = size;
+ bo->reusable = true;
+ bo->cpu_read = true;
+ bo->cpu_write = true;
+ list_init(&bo->request);
+ list_init(&bo->list);
+
+ return bo;
+}
+
+static struct kgem_bo *__kgem_bo_alloc(int handle, int size)
+{
+ struct kgem_bo *bo;
+
+ bo = malloc(sizeof(*bo));
+ if (bo == NULL)
+ return NULL;
+
+ return __kgem_bo_init(bo, handle, size);
+}
+
+static struct kgem_request *__kgem_request_alloc(void)
+{
+ struct kgem_request *rq;
+
+ rq = malloc(sizeof(*rq));
+ assert(rq);
+ if (rq == NULL)
+ return rq;
+
+ list_init(&rq->buffers);
+
+ return rq;
+}
+
+static inline unsigned long __fls(unsigned long word)
+{
+ asm("bsr %1,%0"
+ : "=r" (word)
+ : "rm" (word));
+ return word;
+}
+
+static struct list *inactive(struct kgem *kgem,
+ int size)
+{
+ uint32_t order = __fls(size / PAGE_SIZE);
+ if (order >= ARRAY_SIZE(kgem->inactive))
+ order = ARRAY_SIZE(kgem->inactive)-1;
+ return &kgem->inactive[order];
+}
+
+void kgem_init(struct kgem *kgem, int fd, int gen)
+{
+ drm_i915_getparam_t gp;
+ struct drm_i915_gem_get_aperture aperture;
+ int i;
+
+ kgem->fd = fd;
+ kgem->gen = gen;
+ kgem->wedged = drmCommandNone(kgem->fd, DRM_I915_GEM_THROTTLE) == -EIO;
+ kgem->wedged |= DBG_NO_HW;
+
+ kgem->ring = kgem->mode = KGEM_NONE;
+ kgem->flush = 0;
+
+ kgem->nbatch = 0;
+ kgem->nreloc = 0;
+ kgem->nexec = 0;
+ kgem->surface = ARRAY_SIZE(kgem->batch);
+ list_init(&kgem->partial);
+ list_init(&kgem->requests);
+ list_init(&kgem->active);
+ for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
+ list_init(&kgem->inactive[i]);
+
+ kgem->next_request = __kgem_request_alloc();
+
+ kgem->has_vmap = 0;
+#if defined(USE_VMAP) && defined(I915_PARAM_HAS_VMAP)
+ if (!DBG_NO_VMAP) {
+ drm_i915_getparam_t gp;
+
+ gp.param = I915_PARAM_HAS_VMAP;
+ gp.value = &i;
+ kgem->has_vmap =
+ drmIoctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0 &&
+ i > 0;
+ }
+#endif
+ DBG(("%s: using vmap=%d\n", __FUNCTION__, kgem->has_vmap));
+
+ if (gen < 40) {
+ kgem->has_relaxed_fencing = 0;
+ if (!DBG_NO_RELAXED_FENCING) {
+ drm_i915_getparam_t gp;
+
+ gp.param = I915_PARAM_HAS_RELAXED_FENCING;
+ gp.value = &i;
+ if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0) {
+ if (gen < 33)
+ kgem->has_relaxed_fencing = i >= 2;
+ else
+ kgem->has_relaxed_fencing = i > 0;
+ }
+ }
+ } else
+ kgem->has_relaxed_fencing = 1;
+ DBG(("%s: has relaxed fencing=%d\n", __FUNCTION__,
+ kgem->has_relaxed_fencing));
+
+ aperture.aper_available_size = 64*1024*1024;
+ (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
+
+ kgem->aperture_high = aperture.aper_available_size * 3/4;
+ kgem->aperture_low = aperture.aper_available_size * 1/4;
+ kgem->aperture = 0;
+ DBG(("%s: aperture low=%d, high=%d\n", __FUNCTION__,
+ kgem->aperture_low, kgem->aperture_high));
+
+ i = 8;
+ gp.param = I915_PARAM_NUM_FENCES_AVAIL;
+ gp.value = &i;
+ (void)drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
+ kgem->fence_max = i - 2;
+
+ DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max));
+}
+
+/* XXX hopefully a good approximation */
+static uint32_t kgem_get_unique_id(struct kgem *kgem)
+{
+ uint32_t id;
+ id = ++kgem->unique_id;
+ if (id == 0)
+ id = ++kgem->unique_id;
+ return id;
+}
+
+static uint32_t kgem_surface_size(struct kgem *kgem,
+ uint32_t width,
+ uint32_t height,
+ uint32_t bpp,
+ uint32_t tiling,
+ uint32_t *pitch)
+{
+ uint32_t tile_width, tile_height;
+ uint32_t size;
+
+ if (kgem->gen == 2) {
+ if (tiling) {
+ tile_width = 512;
+ tile_height = 16;
+ } else {
+ tile_width = 64;
+ tile_height = 2;
+ }
+ } else switch (tiling) {
+ default:
+ case I915_TILING_NONE:
+ tile_width = 64;
+ tile_height = 2;
+ break;
+ case I915_TILING_X:
+ tile_width = 512;
+ tile_height = 8;
+ break;
+ case I915_TILING_Y:
+ tile_width = 128;
+ tile_height = 32;
+ break;
+ }
+
+ *pitch = ALIGN(width * bpp / 8, tile_width);
+ if (kgem->gen < 40 && tiling != I915_TILING_NONE) {
+ if (*pitch > 8192)
+ return 0;
+ for (size = tile_width; size < *pitch; size <<= 1)
+ ;
+ *pitch = size;
+ }
+
+ size = *pitch * ALIGN(height, tile_height);
+ if (kgem->has_relaxed_fencing || tiling == I915_TILING_NONE)
+ return ALIGN(size, PAGE_SIZE);
+
+ /* We need to allocate a pot fence region for a tiled buffer. */
+ if (kgem->gen < 30)
+ tile_width = 512 * 1024;
+ else
+ tile_width = 1024 * 1024;
+ while (tile_width < size)
+ tile_width *= 2;
+ return tile_width;
+}
+
+static uint32_t kgem_aligned_height(uint32_t height, uint32_t tiling)
+{
+ uint32_t tile_height;
+
+ switch (tiling) {
+ default:
+ case I915_TILING_NONE:
+ tile_height = 2;
+ break;
+ case I915_TILING_X:
+ tile_height = 8;
+ break;
+ case I915_TILING_Y:
+ tile_height = 32;
+ break;
+ }
+
+ return ALIGN(height, tile_height);
+}
+
+static struct drm_i915_gem_exec_object2 *
+kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo)
+{
+ struct drm_i915_gem_exec_object2 *exec;
+
+ assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
+ exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec));
+ exec->handle = bo->handle;
+ exec->offset = bo->presumed_offset;
+
+ kgem->aperture += bo->aperture_size;
+
+ return exec;
+}
+
+void _kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
+{
+ bo->exec = kgem_add_handle(kgem, bo);
+ bo->rq = kgem->next_request;
+ list_move(&bo->request, &kgem->next_request->buffers);
+ kgem->flush |= bo->flush;
+}
+
+static uint32_t kgem_end_batch(struct kgem *kgem)
+{
+ kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END;
+ if (kgem->nbatch & 1)
+ kgem->batch[kgem->nbatch++] = MI_NOOP;
+
+ return kgem->nbatch;
+}
+
+static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo)
+{
+ int n;
+
+ for (n = 0; n < kgem->nreloc; n++) {
+ if (kgem->reloc[n].target_handle == 0) {
+ kgem->reloc[n].target_handle = bo->handle;
+ kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
+ kgem->reloc[n].delta + bo->presumed_offset;
+ }
+ }
+}
+
+static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
+{
+ assert(list_is_empty(&bo->list));
+ assert(bo->refcnt == 0);
+
+ bo->src_bound = bo->dst_bound = 0;
+
+ if(!bo->reusable)
+ goto destroy;
+
+ if (!bo->deleted && !bo->exec) {
+ if (!gem_madvise(kgem->fd, bo->handle, I915_MADV_DONTNEED)) {
+ kgem->need_purge = 1;
+ goto destroy;
+ }
+
+ bo->deleted = 1;
+ }
+
+ list_move(&bo->list, (bo->rq || bo->needs_flush) ? &kgem->active : inactive(kgem, bo->size));
+ return;
+
+destroy:
+ if (!bo->exec) {
+ list_del(&bo->request);
+ gem_close(kgem->fd, bo->handle);
+ free(bo);
+ }
+}
+
+static void kgem_bo_unref(struct kgem *kgem, struct kgem_bo *bo)
+{
+ if (--bo->refcnt == 0)
+ __kgem_bo_destroy(kgem, bo);
+}
+
+void _kgem_retire(struct kgem *kgem)
+{
+ struct kgem_bo *bo, *next;
+
+ list_for_each_entry_safe(bo, next, &kgem->active, list) {
+ if (bo->rq == NULL && !kgem_busy(kgem, bo->handle)) {
+ assert(bo->needs_flush);
+ assert(bo->deleted);
+ bo->needs_flush = 0;
+ list_move(&bo->list, inactive(kgem, bo->size));
+ }
+ }
+
+ while (!list_is_empty(&kgem->requests)) {
+ struct kgem_request *rq;
+
+ rq = list_first_entry(&kgem->requests,
+ struct kgem_request,
+ list);
+ if (kgem_busy(kgem, rq->bo->handle))
+ break;
+
+ while (!list_is_empty(&rq->buffers)) {
+ bo = list_first_entry(&rq->buffers,
+ struct kgem_bo,
+ request);
+ list_del(&bo->request);
+ bo->rq = NULL;
+ bo->gpu = false;
+
+ if (bo->refcnt == 0 && !bo->needs_flush) {
+ assert(bo->deleted);
+ if (bo->reusable) {
+ list_move(&bo->list,
+ inactive(kgem, bo->size));
+ } else {
+ gem_close(kgem->fd, bo->handle);
+ free(bo);
+ }
+ }
+ }
+
+ rq->bo->refcnt--;
+ assert(rq->bo->refcnt == 0);
+ if (gem_madvise(kgem->fd, rq->bo->handle, I915_MADV_DONTNEED)) {
+ rq->bo->deleted = 1;
+ list_move(&rq->bo->list,
+ inactive(kgem, rq->bo->size));
+ } else {
+ kgem->need_purge = 1;
+ gem_close(kgem->fd, rq->bo->handle);
+ free(rq->bo);
+ }
+
+ list_del(&rq->list);
+ free(rq);
+ }
+
+ kgem->retire = 0;
+}
+
+static void kgem_commit(struct kgem *kgem)
+{
+ struct kgem_request *rq = kgem->next_request;
+ struct kgem_bo *bo, *next;
+
+ list_for_each_entry_safe(bo, next, &rq->buffers, request) {
+ bo->src_bound = bo->dst_bound = 0;
+ bo->presumed_offset = bo->exec->offset;
+ bo->exec = NULL;
+ bo->dirty = false;
+ bo->gpu = true;
+ bo->cpu_read = false;
+ bo->cpu_write = false;
+
+ if (!bo->refcnt) {
+ if (!bo->reusable) {
+destroy:
+ list_del(&bo->list);
+ list_del(&bo->request);
+ gem_close(kgem->fd, bo->handle);
+ free(bo);
+ continue;
+ }
+ if (!bo->deleted) {
+ if (!gem_madvise(kgem->fd, bo->handle,
+ I915_MADV_DONTNEED)) {
+ kgem->need_purge = 1;
+ goto destroy;
+ }
+ bo->deleted = 1;
+ }
+ }
+ }
+
+ list_add_tail(&rq->list, &kgem->requests);
+ kgem->next_request = __kgem_request_alloc();
+}
+
+static void kgem_close_list(struct kgem *kgem, struct list *head)
+{
+ while (!list_is_empty(head)) {
+ struct kgem_bo *bo;
+
+ bo = list_first_entry(head, struct kgem_bo, list);
+ gem_close(kgem->fd, bo->handle);
+ list_del(&bo->list);
+ list_del(&bo->request);
+ free(bo);
+ }
+}
+
+static void kgem_close_inactive(struct kgem *kgem)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
+ kgem_close_list(kgem, &kgem->inactive[i]);
+}
+
+static void kgem_finish_partials(struct kgem *kgem)
+{
+ struct kgem_partial_bo *bo, *next;
+
+ list_for_each_entry_safe(bo, next, &kgem->partial, base.list) {
+ if (!bo->base.exec)
+ continue;
+
+ if (bo->write && bo->need_io) {
+ DBG(("%s: handle=%d, uploading %d/%d\n",
+ __FUNCTION__, bo->base.handle, bo->used, bo->alloc));
+ gem_write(kgem->fd, bo->base.handle,
+ 0, bo->used, bo+1);
+ bo->need_io = 0;
+ }
+
+ list_del(&bo->base.list);
+ kgem_bo_unref(kgem, &bo->base);
+ }
+}
+
+static void kgem_cleanup(struct kgem *kgem)
+{
+ while (!list_is_empty(&kgem->partial)) {
+ struct kgem_bo *bo;
+
+ bo = list_first_entry(&kgem->partial,
+ struct kgem_bo,
+ list);
+ list_del(&bo->list);
+ kgem_bo_unref(kgem, bo);
+ }
+
+ while (!list_is_empty(&kgem->requests)) {
+ struct kgem_request *rq;
+
+ rq = list_first_entry(&kgem->requests,
+ struct kgem_request,
+ list);
+ while (!list_is_empty(&rq->buffers)) {
+ struct kgem_bo *bo;
+
+ bo = list_first_entry(&rq->buffers,
+ struct kgem_bo,
+ request);
+ list_del(&bo->request);
+ bo->rq = NULL;
+ bo->gpu = false;
+ if (bo->refcnt == 0) {
+ list_del(&bo->list);
+ gem_close(kgem->fd, bo->handle);
+ free(bo);
+ }
+ }
+
+ list_del(&rq->list);
+ free(rq);
+ }
+
+ kgem_close_inactive(kgem);
+}
+
+static int kgem_batch_write(struct kgem *kgem, uint32_t handle)
+{
+ int ret;
+
+ /* If there is no surface data, just upload the batch */
+ if (kgem->surface == ARRAY_SIZE(kgem->batch))
+ return gem_write(kgem->fd, handle,
+ 0, sizeof(uint32_t)*kgem->nbatch,
+ kgem->batch);
+
+ /* Are the batch pages conjoint with the surface pages? */
+ if (kgem->surface < kgem->nbatch + PAGE_SIZE/4)
+ return gem_write(kgem->fd, handle,
+ 0, sizeof(kgem->batch),
+ kgem->batch);
+
+ /* Disjoint surface/batch, upload separately */
+ ret = gem_write(kgem->fd, handle,
+ 0, sizeof(uint32_t)*kgem->nbatch,
+ kgem->batch);
+ if (ret)
+ return ret;
+
+ return gem_write(kgem->fd, handle,
+ sizeof(uint32_t)*kgem->surface,
+ sizeof(kgem->batch) - sizeof(uint32_t)*kgem->surface,
+ kgem->batch + kgem->surface);
+}
+
+void _kgem_submit(struct kgem *kgem)
+{
+ struct kgem_request *rq;
+ uint32_t batch_end;
+ int size;
+
+ assert(kgem->nbatch);
+ assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
+
+ sna_kgem_context_switch(kgem, KGEM_NONE);
+
+ batch_end = kgem_end_batch(kgem);
+ sna_kgem_flush(kgem);
+
+ DBG(("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d\n",
+ kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
+ kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture));
+
+ assert(kgem->nbatch <= ARRAY_SIZE(kgem->batch));
+ assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc));
+ assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
+ assert(kgem->nfence <= kgem->fence_max);
+#if DEBUG_BATCH
+ __kgem_batch_debug(kgem, batch_end);
+#endif
+
+ rq = kgem->next_request;
+ if (kgem->surface != ARRAY_SIZE(kgem->batch))
+ size = sizeof(kgem->batch);
+ else
+ size = kgem->nbatch * sizeof(kgem->batch[0]);
+ rq->bo = kgem_create_linear(kgem, size);
+ if (rq->bo) {
+ uint32_t handle = rq->bo->handle;
+ int i;
+
+ i = kgem->nexec++;
+ kgem->exec[i].handle = handle;
+ kgem->exec[i].relocation_count = kgem->nreloc;
+ kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc;
+ kgem->exec[i].alignment = 0;
+ kgem->exec[i].offset = 0;
+ kgem->exec[i].flags = 0;
+ kgem->exec[i].rsvd1 = 0;
+ kgem->exec[i].rsvd2 = 0;
+
+ rq->bo->exec = &kgem->exec[i];
+ list_add(&rq->bo->request, &rq->buffers);
+
+ kgem_fixup_self_relocs(kgem, rq->bo);
+ kgem_finish_partials(kgem);
+
+ if (kgem_batch_write(kgem, handle) == 0) {
+ struct drm_i915_gem_execbuffer2 execbuf;
+ int ret;
+
+ execbuf.buffers_ptr = (uintptr_t)kgem->exec;
+ execbuf.buffer_count = kgem->nexec;
+ execbuf.batch_start_offset = 0;
+ execbuf.batch_len = batch_end*4;
+ execbuf.cliprects_ptr = 0;
+ execbuf.num_cliprects = 0;
+ execbuf.DR1 = 0;
+ execbuf.DR4 = 0;
+ execbuf.flags = kgem->ring;
+ execbuf.rsvd1 = 0;
+ execbuf.rsvd2 = 0;
+
+ if (DBG_DUMP) {
+ int fd = open("/tmp/i915-batchbuffers.dump",
+ O_WRONLY | O_CREAT | O_APPEND,
+ 0666);
+ if (fd != -1) {
+ ret = write(fd, kgem->batch, batch_end*4);
+ fd = close(fd);
+ }
+ }
+
+ ret = drmIoctl(kgem->fd,
+ DRM_IOCTL_I915_GEM_EXECBUFFER2,
+ &execbuf);
+ while (ret == -1 && errno == EBUSY) {
+ drmCommandNone(kgem->fd, DRM_I915_GEM_THROTTLE);
+ ret = drmIoctl(kgem->fd,
+ DRM_IOCTL_I915_GEM_EXECBUFFER2,
+ &execbuf);
+ }
+ if (ret == -1 && errno == EIO) {
+ DBG(("%s: GPU hang detected\n", __FUNCTION__));
+ kgem->wedged = 1;
+ ret = 0;
+ }
+#if DEBUG_KGEM
+ if (ret < 0) {
+ int i;
+ ErrorF("batch (end=%d, size=%d) submit failed: %d\n",
+ batch_end, size, errno);
+
+ i = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
+ if (i != -1) {
+ ret = write(i, kgem->batch, batch_end*4);
+ close(i);
+ }
+
+ for (i = 0; i < kgem->nexec; i++) {
+ struct kgem_request *rq = kgem->next_request;
+ struct kgem_bo *bo, *found = NULL;
+
+ list_for_each_entry(bo, &rq->buffers, request) {
+ if (bo->handle == kgem->exec[i].handle) {
+ found = bo;
+ break;
+ }
+ }
+ ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, deleted %d\n",
+ i,
+ kgem->exec[i].handle,
+ (int)kgem->exec[i].offset,
+ found ? found->size : 0,
+ found ? found->tiling : 0,
+ (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE),
+ found ? found->deleted : 1);
+ }
+ for (i = 0; i < kgem->nreloc; i++) {
+ ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n",
+ i,
+ (int)kgem->reloc[i].offset,
+ kgem->reloc[i].target_handle,
+ kgem->reloc[i].delta,
+ kgem->reloc[i].read_domains,
+ kgem->reloc[i].write_domain,
+ (int)kgem->reloc[i].presumed_offset);
+ }
+ abort();
+ }
+#endif
+ assert(ret == 0);
+
+ if (DEBUG_FLUSH_SYNC) {
+ struct drm_i915_gem_set_domain set_domain;
+ int ret;
+
+ set_domain.handle = handle;
+ set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+ set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+
+ ret = drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
+ if (ret == -1) {
+ DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
+ kgem->wedged = 1;
+ }
+ }
+ }
+ }
+
+ kgem_commit(kgem);
+ if (kgem->wedged)
+ kgem_cleanup(kgem);
+
+ kgem->nfence = 0;
+ kgem->nexec = 0;
+ kgem->nreloc = 0;
+ kgem->aperture = 0;
+ kgem->nbatch = 0;
+ kgem->surface = ARRAY_SIZE(kgem->batch);
+ kgem->mode = KGEM_NONE;
+ kgem->flush = 0;
+
+ kgem->retire = 1;
+
+ sna_kgem_reset(kgem);
+}
+
+void kgem_throttle(struct kgem *kgem)
+{
+ kgem->wedged |= drmCommandNone(kgem->fd, DRM_I915_GEM_THROTTLE) == -EIO;
+}
+
+bool kgem_needs_expire(struct kgem *kgem)
+{
+ int i;
+
+ if (!list_is_empty(&kgem->active))
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
+ if (!list_is_empty(&kgem->inactive[i]))
+ return true;
+ }
+
+ return false;
+}
+
+bool kgem_expire_cache(struct kgem *kgem)
+{
+ time_t now, expire;
+ struct kgem_bo *bo;
+ unsigned int size = 0, count = 0;
+ bool idle;
+ int i;
+
+ _kgem_retire(kgem);
+ if (kgem->wedged)
+ kgem_cleanup(kgem);
+
+ time(&now);
+ expire = 0;
+
+ idle = true;
+ for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
+ idle &= list_is_empty(&kgem->inactive[i]);
+ list_for_each_entry(bo, &kgem->inactive[i], list) {
+ assert(bo->deleted);
+ if (bo->delta) {
+ expire = now - 5;
+ break;
+ }
+
+ bo->delta = now;
+ }
+ }
+ if (!kgem->need_purge) {
+ if (idle)
+ return false;
+ if (expire == 0)
+ return true;
+ }
+
+ idle = true;
+ for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
+ while (!list_is_empty(&kgem->inactive[i])) {
+ bo = list_last_entry(&kgem->inactive[i],
+ struct kgem_bo, list);
+
+ if (!gem_madvise(kgem->fd, bo->handle,
+ I915_MADV_DONTNEED)) {
+ if (bo->delta > expire) {
+ idle = false;
+ break;
+ }
+ }
+
+ count++;
+ size += bo->size;
+
+ gem_close(kgem->fd, bo->handle);
+ list_del(&bo->list);
+ free(bo);
+ }
+ }
+
+ DBG(("%s: purge? %d -- expired %d objects, %d bytes\n", __FUNCTION__, kgem->need_purge, count, size));
+
+ kgem->need_purge = false;
+ return idle;
+ (void)count;
+ (void)size;
+}
+
+static struct kgem_bo *
+search_linear_cache(struct kgem *kgem, int size, bool active)
+{
+ struct kgem_bo *bo, *next;
+ struct list *cache;
+
+ if (!active) {
+ cache = inactive(kgem, size);
+ kgem_retire(kgem);
+ } else
+ cache = &kgem->active;
+
+ list_for_each_entry_safe(bo, next, cache, list) {
+ if (size > bo->size)
+ continue;
+
+ if (active && bo->tiling != I915_TILING_NONE)
+ continue;
+
+ list_del(&bo->list);
+
+ if (bo->deleted) {
+ if (!gem_madvise(kgem->fd, bo->handle,
+ I915_MADV_WILLNEED)) {
+ kgem->need_purge = 1;
+ goto next_bo;
+ }
+
+ bo->deleted = 0;
+ }
+
+ if (I915_TILING_NONE != bo->tiling &&
+ gem_set_tiling(kgem->fd, bo->handle,
+ I915_TILING_NONE, 0) != I915_TILING_NONE)
+ goto next_bo;
+
+ bo->tiling = I915_TILING_NONE;
+ bo->pitch = 0;
+ bo->delta = 0;
+ bo->aperture_size = bo->size;
+ DBG((" %s: found handle=%d (size=%d) in linear %s cache\n",
+ __FUNCTION__, bo->handle, bo->size,
+ active ? "active" : "inactive"));
+ assert(bo->refcnt == 0);
+ assert(bo->reusable);
+ return bo;
+next_bo:
+ list_del(&bo->request);
+ gem_close(kgem->fd, bo->handle);
+ free(bo);
+ }
+
+ return NULL;
+}
+
+struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name)
+{
+ struct drm_gem_open open_arg;
+
+ DBG(("%s(name=%d)\n", __FUNCTION__, name));
+
+ memset(&open_arg, 0, sizeof(open_arg));
+ open_arg.name = name;
+ if (drmIoctl(kgem->fd, DRM_IOCTL_GEM_OPEN, &open_arg))
+ return NULL;
+
+ DBG(("%s: new handle=%d\n", __FUNCTION__, open_arg.handle));
+ return __kgem_bo_alloc(open_arg.handle, 0);
+}
+
+struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size)
+{
+ struct kgem_bo *bo;
+ uint32_t handle;
+
+ DBG(("%s(%d)\n", __FUNCTION__, size));
+
+ size = ALIGN(size, PAGE_SIZE);
+ bo = search_linear_cache(kgem, size, false);
+ if (bo)
+ return kgem_bo_reference(bo);
+
+ handle = gem_create(kgem->fd, size);
+ if (handle == 0)
+ return NULL;
+
+ DBG(("%s: new handle=%d\n", __FUNCTION__, handle));
+ return __kgem_bo_alloc(handle, size);
+}
+
+int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int bpp)
+{
+ if (kgem->gen < 40) {
+ if (tiling) {
+ if (width * bpp > 8192 * 8) {
+ DBG(("%s: pitch too large for tliing [%d]\n",
+ __FUNCTION__, width*bpp/8));
+ return I915_TILING_NONE;
+ }
+
+ if (width > 2048 || height > 2048) {
+ DBG(("%s: large buffer (%dx%d), forcing TILING_X\n",
+ __FUNCTION__, width, height));
+ return -I915_TILING_X;
+ }
+ }
+ } else {
+ if (width*bpp > (MAXSHORT-512) * 8) {
+ DBG(("%s: large pitch [%d], forcing TILING_X\n",
+ __FUNCTION__, width*bpp/8));
+ return -I915_TILING_X;
+ }
+
+ if (tiling && (width > 8192 || height > 8192)) {
+ DBG(("%s: large tiled buffer [%dx%d], forcing TILING_X\n",
+ __FUNCTION__, width, height));
+ return -I915_TILING_X;
+ }
+ }
+
+ if (tiling == I915_TILING_Y && height < 16) {
+ DBG(("%s: too short [%d] for TILING_Y\n",
+ __FUNCTION__,height));
+ tiling = I915_TILING_X;
+ }
+ if (tiling == I915_TILING_X && height < 4) {
+ DBG(("%s: too short [%d] for TILING_X\n",
+ __FUNCTION__, height));
+ tiling = I915_TILING_NONE;
+ }
+
+ if (tiling == I915_TILING_X && width * bpp < 512/2) {
+ DBG(("%s: too thin [%d] for TILING_X\n",
+ __FUNCTION__, width));
+ tiling = I915_TILING_NONE;
+ }
+ if (tiling == I915_TILING_Y && width * bpp < 32/2) {
+ DBG(("%s: too thin [%d] for TILING_Y\n",
+ __FUNCTION__, width));
+ tiling = I915_TILING_NONE;
+ }
+
+ DBG(("%s: %dx%d -> %d\n", __FUNCTION__, width, height, tiling));
+ return tiling;
+}
+
+static bool _kgem_can_create_2d(struct kgem *kgem,
+ int width, int height, int bpp, int tiling)
+{
+ uint32_t pitch, size;
+
+ if (bpp < 8)
+ return false;
+
+ size = kgem_surface_size(kgem, width, height, bpp, tiling, &pitch);
+ if (size == 0 || size > kgem->aperture_low)
+ size = kgem_surface_size(kgem, width, height, bpp, I915_TILING_NONE, &pitch);
+ return size > 0 && size <= kgem->aperture_low;
+}
+
+#if DEBUG_KGEM
+bool kgem_can_create_2d(struct kgem *kgem,
+ int width, int height, int bpp, int tiling)
+{
+ bool ret = _kgem_can_create_2d(kgem, width, height, bpp, tiling);
+ DBG(("%s(%dx%d, bpp=%d, tiling=%d) = %d\n", __FUNCTION__,
+ width, height, bpp, tiling, ret));
+ return ret;
+}
+#else
+bool kgem_can_create_2d(struct kgem *kgem,
+ int width, int height, int bpp, int tiling)
+{
+ return _kgem_can_create_2d(kgem, width, height, bpp, tiling);
+}
+#endif
+
+static int kgem_bo_aperture_size(struct kgem *kgem, struct kgem_bo *bo)
+{
+ int size;
+
+ if (kgem->gen >= 40 || bo->tiling == I915_TILING_NONE) {
+ size = bo->size;
+ } else {
+ if (kgem->gen < 30)
+ size = 512 * 1024;
+ else
+ size = 1024 * 1024;
+ while (size < bo->size)
+ size *= 2;
+ }
+ return size;
+}
+
+struct kgem_bo *kgem_create_2d(struct kgem *kgem,
+ int width,
+ int height,
+ int bpp,
+ int tiling,
+ uint32_t flags)
+{
+ struct list *cache;
+ struct kgem_bo *bo, *next;
+ uint32_t pitch, tiled_height[3], size;
+ uint32_t handle;
+ int exact = flags & CREATE_EXACT;
+ int search;
+ int i;
+
+ if (tiling < 0)
+ tiling = -tiling, exact = 1;
+
+ DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d)\n", __FUNCTION__,
+ width, height, bpp, tiling, !!exact, !!(flags & CREATE_INACTIVE)));
+
+ assert(_kgem_can_create_2d(kgem, width, height, bpp, tiling));
+ size = kgem_surface_size(kgem, width, height, bpp, tiling, &pitch);
+ assert(size && size <= kgem->aperture_low);
+ if (flags & CREATE_INACTIVE)
+ goto skip_active_search;
+
+ for (i = 0; i <= I915_TILING_Y; i++)
+ tiled_height[i] = kgem_aligned_height(height, i);
+
+ search = 0;
+ /* Best active match first */
+ list_for_each_entry_safe(bo, next, &kgem->active, list) {
+ uint32_t s;
+
+ search++;
+
+ if (exact) {
+ if (bo->tiling != tiling)
+ continue;
+ } else {
+ if (bo->tiling > tiling)
+ continue;
+ }
+
+ if (bo->tiling) {
+ if (bo->pitch < pitch) {
+ DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
+ bo->tiling, tiling,
+ bo->pitch, pitch));
+ continue;
+ }
+ } else
+ bo->pitch = pitch;
+
+ s = bo->pitch * tiled_height[bo->tiling];
+ if (s > bo->size) {
+ DBG(("size too small: %d < %d\n",
+ bo->size, s));
+ continue;
+ }
+
+ list_del(&bo->list);
+
+ if (bo->deleted) {
+ if (!gem_madvise(kgem->fd, bo->handle,
+ I915_MADV_WILLNEED)) {
+ kgem->need_purge = 1;
+ gem_close(kgem->fd, bo->handle);
+ list_del(&bo->request);
+ free(bo);
+ continue;
+ }
+
+ bo->deleted = 0;
+ }
+
+ bo->unique_id = kgem_get_unique_id(kgem);
+ bo->delta = 0;
+ bo->aperture_size = kgem_bo_aperture_size(kgem, bo);
+ DBG((" from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
+ bo->pitch, bo->tiling, bo->handle, bo->unique_id));
+ assert(bo->refcnt == 0);
+ assert(bo->reusable);
+ return kgem_bo_reference(bo);
+ }
+
+ DBG(("searched %d active, no match\n", search));
+
+skip_active_search:
+ /* Now just look for a close match and prefer any currently active */
+ cache = inactive(kgem, size);
+ list_for_each_entry_safe(bo, next, cache, list) {
+ if (size > bo->size) {
+ DBG(("inactive too small: %d < %d\n",
+ bo->size, size));
+ continue;
+ }
+
+ if (bo->tiling != tiling ||
+ (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
+ if (tiling != gem_set_tiling(kgem->fd,
+ bo->handle,
+ tiling, pitch))
+ goto next_bo;
+ }
+
+ bo->pitch = pitch;
+ bo->tiling = tiling;
+
+ list_del(&bo->list);
+
+ if (bo->deleted) {
+ if (!gem_madvise(kgem->fd, bo->handle,
+ I915_MADV_WILLNEED)) {
+ kgem->need_purge = 1;
+ goto next_bo;
+ }
+
+ bo->deleted = 0;
+ }
+
+ bo->delta = 0;
+ bo->unique_id = kgem_get_unique_id(kgem);
+ bo->aperture_size = kgem_bo_aperture_size(kgem, bo);
+ assert(bo->pitch);
+ DBG((" from inactive: pitch=%d, tiling=%d: handle=%d, id=%d\n",
+ bo->pitch, bo->tiling, bo->handle, bo->unique_id));
+ assert(bo->refcnt == 0);
+ assert(bo->reusable);
+ return kgem_bo_reference(bo);
+
+next_bo:
+ gem_close(kgem->fd, bo->handle);
+ list_del(&bo->request);
+ free(bo);
+ continue;
+ }
+
+ handle = gem_create(kgem->fd, size);
+ if (handle == 0)
+ return NULL;
+
+ bo = __kgem_bo_alloc(handle, size);
+ if (!bo) {
+ gem_close(kgem->fd, handle);
+ return NULL;
+ }
+
+ bo->unique_id = kgem_get_unique_id(kgem);
+ bo->pitch = pitch;
+ if (tiling != I915_TILING_NONE)
+ bo->tiling = gem_set_tiling(kgem->fd, handle, tiling, pitch);
+ bo->aperture_size = kgem_bo_aperture_size(kgem, bo);
+
+ DBG((" new pitch=%d, tiling=%d, handle=%d, id=%d\n",
+ bo->pitch, bo->tiling, bo->handle, bo->unique_id));
+ return bo;
+}
+
+void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
+{
+ if (bo->proxy) {
+ kgem_bo_unref(kgem, bo->proxy);
+ list_del(&bo->request);
+ free(bo);
+ return;
+ }
+
+ __kgem_bo_destroy(kgem, bo);
+}
+
+void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo)
+{
+ /* The kernel will emit a flush *and* update its own flushing lists. */
+ kgem_busy(kgem, bo->handle);
+}
+
+bool kgem_check_bo(struct kgem *kgem, struct kgem_bo *bo)
+{
+ if (bo == NULL)
+ return true;
+
+ if (bo->exec)
+ return true;
+
+ if (kgem->aperture > kgem->aperture_low)
+ return false;
+
+ if (bo->size + kgem->aperture > kgem->aperture_high)
+ return false;
+
+ if (kgem->nexec == KGEM_EXEC_SIZE(kgem))
+ return false;
+
+ return true;
+}
+
+bool kgem_check_bo_fenced(struct kgem *kgem, ...)
+{
+ va_list ap;
+ struct kgem_bo *bo;
+ int num_fence = 0;
+ int num_exec = 0;
+ int size = 0;
+
+ if (kgem->aperture > kgem->aperture_low)
+ return false;
+
+ va_start(ap, kgem);
+ while ((bo = va_arg(ap, struct kgem_bo *))) {
+ if (bo->exec) {
+ if (kgem->gen >= 40 || bo->tiling == I915_TILING_NONE)
+ continue;
+
+ if ((bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0)
+ num_fence++;
+
+ continue;
+ }
+
+ size += bo->size;
+ num_exec++;
+ if (kgem->gen < 40 && bo->tiling)
+ num_fence++;
+ }
+ va_end(ap);
+
+ if (size + kgem->aperture > kgem->aperture_high)
+ return false;
+
+ if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem))
+ return false;
+
+ if (kgem->nfence + num_fence >= kgem->fence_max)
+ return false;
+
+ return true;
+}
+
+uint32_t kgem_add_reloc(struct kgem *kgem,
+ uint32_t pos,
+ struct kgem_bo *bo,
+ uint32_t read_write_domain,
+ uint32_t delta)
+{
+ int index;
+
+ index = kgem->nreloc++;
+ assert(index < ARRAY_SIZE(kgem->reloc));
+ kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
+ if (bo) {
+ assert(!bo->deleted);
+
+ delta += bo->delta;
+ if (bo->proxy) {
+ /* need to release the cache upon batch submit */
+ list_move(&bo->request, &kgem->next_request->buffers);
+ bo->exec = &_kgem_dummy_exec;
+ bo = bo->proxy;
+ }
+
+ assert(!bo->deleted);
+
+ if (bo->exec == NULL) {
+ _kgem_add_bo(kgem, bo);
+ if (bo->needs_flush &&
+ (read_write_domain >> 16) != I915_GEM_DOMAIN_RENDER)
+ bo->needs_flush = false;
+ }
+
+ if (read_write_domain & KGEM_RELOC_FENCED && kgem->gen < 40) {
+ if (bo->tiling &&
+ (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
+ assert(kgem->nfence < kgem->fence_max);
+ kgem->nfence++;
+ }
+ bo->exec->flags |= EXEC_OBJECT_NEEDS_FENCE;
+ }
+
+ kgem->reloc[index].delta = delta;
+ kgem->reloc[index].target_handle = bo->handle;
+ kgem->reloc[index].presumed_offset = bo->presumed_offset;
+
+ if (read_write_domain & 0x7fff)
+ bo->needs_flush = bo->dirty = true;
+
+ delta += bo->presumed_offset;
+ } else {
+ kgem->reloc[index].delta = delta;
+ kgem->reloc[index].target_handle = 0;
+ kgem->reloc[index].presumed_offset = 0;
+ }
+ kgem->reloc[index].read_domains = read_write_domain >> 16;
+ kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
+
+ return delta;
+}
+
+void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
+{
+ return gem_mmap(kgem->fd, bo->handle, bo->size, prot);
+}
+
+uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo)
+{
+ struct drm_gem_flink flink;
+ int ret;
+
+ memset(&flink, 0, sizeof(flink));
+ flink.handle = bo->handle;
+ ret = drmIoctl(kgem->fd, DRM_IOCTL_GEM_FLINK, &flink);
+ if (ret)
+ return 0;
+
+ bo->reusable = false;
+ return flink.name;
+}
+
+#if defined(USE_VMAP) && defined(I915_PARAM_HAS_VMAP)
+static uint32_t gem_vmap(int fd, void *ptr, int size, int read_only)
+{
+ struct drm_i915_gem_vmap vmap;
+
+ vmap.user_ptr = (uintptr_t)ptr;
+ vmap.user_size = size;
+ vmap.flags = 0;
+ if (read_only)
+ vmap.flags |= I915_VMAP_READ_ONLY;
+
+ if (drmIoctl(fd, DRM_IOCTL_I915_GEM_VMAP, &vmap))
+ return 0;
+
+ return vmap.handle;
+}
+
+struct kgem_bo *kgem_create_map(struct kgem *kgem,
+ void *ptr, uint32_t size,
+ bool read_only)
+{
+ struct kgem_bo *bo;
+ uint32_t handle;
+
+ if (!kgem->has_vmap)
+ return NULL;
+
+ handle = gem_vmap(kgem->fd, ptr, size, read_only);
+ if (handle == 0)
+ return NULL;
+
+ bo = __kgem_bo_alloc(handle, size);
+ if (bo == NULL) {
+ gem_close(kgem->fd, handle);
+ return NULL;
+ }
+
+ bo->reusable = false;
+ bo->sync = true;
+ DBG(("%s(ptr=%p, size=%d, read_only=%d) => handle=%d\n",
+ __FUNCTION__, ptr, size, read_only, handle));
+ return bo;
+}
+#else
+static uint32_t gem_vmap(int fd, void *ptr, int size, int read_only)
+{
+ return 0;
+}
+
+struct kgem_bo *kgem_create_map(struct kgem *kgem,
+ void *ptr, uint32_t size,
+ bool read_only)
+{
+ return NULL;
+}
+#endif
+
+void kgem_bo_sync(struct kgem *kgem, struct kgem_bo *bo, bool for_write)
+{
+ struct drm_i915_gem_set_domain set_domain;
+
+ kgem_bo_submit(kgem, bo);
+ if (for_write ? bo->cpu_write : bo->cpu_read)
+ return;
+
+ set_domain.handle = bo->handle;
+ set_domain.read_domains = I915_GEM_DOMAIN_CPU;
+ set_domain.write_domain = for_write ? I915_GEM_DOMAIN_CPU : 0;
+
+ drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
+ _kgem_retire(kgem);
+ bo->cpu_read = true;
+ if (for_write)
+ bo->cpu_write = true;
+}
+
+void kgem_clear_dirty(struct kgem *kgem)
+{
+ struct kgem_request *rq = kgem->next_request;
+ struct kgem_bo *bo;
+
+ list_for_each_entry(bo, &rq->buffers, request)
+ bo->dirty = false;
+}
+
+/* Flush the contents of the RenderCache and invalidate the TextureCache */
+void kgem_emit_flush(struct kgem *kgem)
+{
+ if (kgem->nbatch == 0)
+ return;
+
+ if (!kgem_check_batch(kgem, 4)) {
+ _kgem_submit(kgem);
+ return;
+ }
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ if (kgem->ring == KGEM_BLT) {
+ kgem->batch[kgem->nbatch++] = MI_FLUSH_DW | 2;
+ kgem->batch[kgem->nbatch++] = 0;
+ kgem->batch[kgem->nbatch++] = 0;
+ kgem->batch[kgem->nbatch++] = 0;
+ } else if (kgem->gen >= 50 && 0) {
+ kgem->batch[kgem->nbatch++] = PIPE_CONTROL | 2;
+ kgem->batch[kgem->nbatch++] =
+ PIPE_CONTROL_WC_FLUSH |
+ PIPE_CONTROL_TC_FLUSH |
+ PIPE_CONTROL_NOWRITE;
+ kgem->batch[kgem->nbatch++] = 0;
+ kgem->batch[kgem->nbatch++] = 0;
+ } else {
+ if ((kgem->batch[kgem->nbatch-1] & (0xff<<23)) == MI_FLUSH)
+ kgem->nbatch--;
+ kgem->batch[kgem->nbatch++] = MI_FLUSH | MI_INVALIDATE_MAP_CACHE;
+ }
+
+ kgem_clear_dirty(kgem);
+}
+
+struct kgem_bo *kgem_create_proxy(struct kgem_bo *target,
+ int offset, int length)
+{
+ struct kgem_bo *bo;
+
+ assert(target->proxy == NULL);
+
+ bo = __kgem_bo_alloc(target->handle, length);
+ if (bo == NULL)
+ return NULL;
+
+ bo->reusable = false;
+ bo->proxy = kgem_bo_reference(target);
+ bo->delta = offset;
+ return bo;
+}
+
+struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
+ uint32_t size, uint32_t flags,
+ void **ret)
+{
+ struct kgem_partial_bo *bo;
+ bool write = !!(flags & KGEM_BUFFER_WRITE);
+ int offset = 0;
+
+ DBG(("%s: size=%d, flags=%x\n", __FUNCTION__, size, flags));
+
+ list_for_each_entry(bo, &kgem->partial, base.list) {
+ if (bo->write != write)
+ continue;
+ if (bo->used + size < bo->alloc) {
+ DBG(("%s: reusing partial buffer? used=%d, total=%d\n",
+ __FUNCTION__, bo->used, bo->alloc));
+ offset = bo->used;
+ bo->used += size;
+ break;
+ }
+ }
+
+ if (offset == 0) {
+ uint32_t handle;
+ int alloc;
+
+ alloc = (flags & KGEM_BUFFER_LAST) ? 4096 : 32 * 1024;
+ alloc = ALIGN(size, alloc);
+
+ bo = malloc(sizeof(*bo) + alloc);
+ if (bo == NULL)
+ return NULL;
+
+ handle = 0;
+ if (kgem->has_vmap)
+ handle = gem_vmap(kgem->fd, bo+1, alloc, write);
+ if (handle == 0) {
+ struct kgem_bo *old;
+
+ old = NULL;
+ if (!write)
+ old = search_linear_cache(kgem, alloc, true);
+ if (old == NULL)
+ old = search_linear_cache(kgem, alloc, false);
+ if (old) {
+ memcpy(&bo->base, old, sizeof(*old));
+ if (old->rq)
+ list_replace(&old->request,
+ &bo->base.request);
+ else
+ list_init(&bo->base.request);
+ free(old);
+ bo->base.refcnt = 1;
+ } else {
+ if (!__kgem_bo_init(&bo->base,
+ gem_create(kgem->fd, alloc),
+ alloc)) {
+ free(bo);
+ return NULL;
+ }
+ }
+ bo->need_io = true;
+ } else {
+ __kgem_bo_init(&bo->base, handle, alloc);
+ bo->base.reusable = false;
+ bo->base.sync = true;
+ bo->need_io = 0;
+ }
+
+ bo->alloc = alloc;
+ bo->used = size;
+ bo->write = write;
+
+ list_add(&bo->base.list, &kgem->partial);
+ DBG(("%s(size=%d) new handle=%d\n",
+ __FUNCTION__, alloc, bo->base.handle));
+ }
+
+ *ret = (char *)(bo+1) + offset;
+ return kgem_create_proxy(&bo->base, offset, size);
+}
+
+struct kgem_bo *kgem_upload_source_image(struct kgem *kgem,
+ const void *data,
+ int x, int y,
+ int width, int height,
+ int stride, int bpp)
+{
+ int dst_stride = ALIGN(width * bpp, 32) >> 3;
+ int size = dst_stride * height;
+ struct kgem_bo *bo;
+ void *dst;
+
+ DBG(("%s : (%d, %d), (%d, %d), stride=%d, bpp=%d\n",
+ __FUNCTION__, x, y, width, height, stride, bpp));
+
+ bo = kgem_create_buffer(kgem, size, KGEM_BUFFER_WRITE, &dst);
+ if (bo == NULL)
+ return NULL;
+
+ memcpy_blt(data, dst, bpp,
+ stride, dst_stride,
+ x, y,
+ 0, 0,
+ width, height);
+
+ bo->pitch = dst_stride;
+ return bo;
+}
+
+void kgem_buffer_sync(struct kgem *kgem, struct kgem_bo *_bo)
+{
+ struct kgem_partial_bo *bo;
+
+ if (_bo->proxy)
+ _bo = _bo->proxy;
+
+ bo = (struct kgem_partial_bo *)_bo;
+
+ DBG(("%s(need_io=%s, sync=%d)\n", __FUNCTION__,
+ bo->need_io ? bo->write ? "write" : "read" : "none",
+ bo->base.sync));
+
+ if (bo->need_io) {
+ if (bo->write)
+ gem_write(kgem->fd, bo->base.handle,
+ 0, bo->used, bo+1);
+ else
+ gem_read(kgem->fd, bo->base.handle, bo+1, bo->used);
+ _kgem_retire(kgem);
+ bo->need_io = 0;
+ }
+
+ if (bo->base.sync)
+ kgem_bo_sync(kgem, &bo->base, bo->write);
+}
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
new file mode 100644
index 00000000..37803bdf
--- /dev/null
+++ b/src/sna/kgem.h
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdarg.h>
+
+#include <i915_drm.h>
+
+#ifndef KGEM_H
+#define KGEM_H
+
+struct kgem_bo {
+ struct kgem_bo *proxy;
+
+ struct list list;
+ struct list request;
+
+ struct kgem_request *rq;
+ struct drm_i915_gem_exec_object2 *exec;
+
+ uint16_t src_bound, dst_bound;
+ uint32_t unique_id;
+ uint32_t refcnt;
+ uint32_t handle;
+ uint32_t presumed_offset;
+ uint32_t size;
+ uint32_t aperture_size;
+ uint32_t delta;
+
+ uint32_t pitch : 16;
+ uint32_t tiling : 2;
+ uint32_t reusable : 1;
+ uint32_t dirty : 1;
+ uint32_t gpu : 1;
+ uint32_t needs_flush : 1;
+ uint32_t cpu_read : 1;
+ uint32_t cpu_write : 1;
+ uint32_t flush : 1;
+ uint32_t sync : 1;
+ uint32_t deleted : 1;
+};
+
+struct kgem_request {
+ struct list list;
+ struct kgem_bo *bo;
+ struct list buffers;
+};
+
+struct kgem {
+ int fd;
+ int wedged;
+ int gen;
+
+ uint32_t unique_id;
+
+ enum kgem_mode {
+ /* order matches I915_EXEC_RING ordering */
+ KGEM_NONE = 0,
+ KGEM_RENDER,
+ KGEM_BSD,
+ KGEM_BLT,
+ } mode, ring;
+
+ struct list active;
+ struct list inactive[16];
+ struct list partial;
+ struct list requests;
+ struct kgem_request *next_request;
+
+ uint16_t nbatch;
+ uint16_t surface;
+ uint16_t nexec;
+ uint16_t nreloc;
+ uint16_t nfence;
+
+ uint32_t retire;
+ uint32_t flush;
+ uint32_t need_purge;
+
+ uint32_t has_vmap :1;
+ uint32_t has_relaxed_fencing :1;
+
+ uint16_t fence_max;
+ uint32_t aperture_high, aperture_low, aperture;
+
+ uint32_t batch[4*1024];
+ struct drm_i915_gem_exec_object2 exec[256];
+ struct drm_i915_gem_relocation_entry reloc[384];
+};
+
+#define KGEM_BATCH_RESERVED 2
+#define KGEM_RELOC_RESERVED 4
+#define KGEM_EXEC_RESERVED 1
+
+#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
+#define KGEM_BATCH_SIZE(K) (ARRAY_SIZE((K)->batch)-KGEM_BATCH_RESERVED)
+#define KGEM_EXEC_SIZE(K) (ARRAY_SIZE((K)->exec)-KGEM_EXEC_RESERVED)
+#define KGEM_RELOC_SIZE(K) (ARRAY_SIZE((K)->reloc)-KGEM_RELOC_RESERVED)
+
+void kgem_init(struct kgem *kgem, int fd, int gen);
+
+struct kgem_bo *kgem_create_map(struct kgem *kgem,
+ void *ptr, uint32_t size,
+ bool read_only);
+
+struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name);
+
+struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size);
+struct kgem_bo *kgem_create_proxy(struct kgem_bo *target,
+ int offset, int length);
+
+struct kgem_bo *kgem_upload_source_image(struct kgem *kgem,
+ const void *data,
+ int x, int y,
+ int width, int height,
+ int stride, int bpp);
+
+int kgem_choose_tiling(struct kgem *kgem,
+ int tiling, int width, int height, int bpp);
+bool kgem_can_create_2d(struct kgem *kgem,
+ int width, int height, int bpp, int tiling);
+enum {
+ CREATE_EXACT = 0x1,
+ CREATE_INACTIVE = 0x2,
+};
+struct kgem_bo *kgem_create_2d(struct kgem *kgem,
+ int width,
+ int height,
+ int bpp,
+ int tiling,
+ uint32_t flags);
+
+void _kgem_retire(struct kgem *kgem);
+static inline void kgem_retire(struct kgem *kgem)
+{
+ if (kgem->retire)
+ _kgem_retire(kgem);
+}
+
+void _kgem_submit(struct kgem *kgem);
+static inline void kgem_submit(struct kgem *kgem)
+{
+ if (kgem->nbatch)
+ _kgem_submit(kgem);
+}
+
+static inline void kgem_bo_submit(struct kgem *kgem, struct kgem_bo *bo)
+{
+ if (bo->exec)
+ _kgem_submit(kgem);
+}
+
+void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo);
+static inline void kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo)
+{
+ kgem_bo_submit(kgem, bo);
+ __kgem_flush(kgem, bo);
+}
+
+static inline struct kgem_bo *kgem_bo_reference(struct kgem_bo *bo)
+{
+ bo->refcnt++;
+ return bo;
+}
+
+void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo);
+static inline void kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
+{
+ assert(bo->refcnt);
+ if (--bo->refcnt == 0)
+ _kgem_bo_destroy(kgem, bo);
+}
+
+void kgem_emit_flush(struct kgem *kgem);
+void kgem_clear_dirty(struct kgem *kgem);
+
+extern void sna_kgem_context_switch(struct kgem *kgem, int new_mode);
+static inline void kgem_set_mode(struct kgem *kgem, enum kgem_mode mode)
+{
+#if DEBUG_FLUSH_CACHE
+ kgem_emit_flush(kgem);
+#endif
+
+#if DEBUG_FLUSH_BATCH
+ kgem_submit(kgem);
+#endif
+
+ if (kgem->mode == mode)
+ return;
+
+ sna_kgem_context_switch(kgem, mode);
+
+ kgem->mode = mode;
+}
+
+static inline void _kgem_set_mode(struct kgem *kgem, enum kgem_mode mode)
+{
+ if (kgem->nbatch)
+ kgem->mode = mode;
+}
+
+static inline bool kgem_check_batch(struct kgem *kgem, int num_dwords)
+{
+ return kgem->nbatch + num_dwords + KGEM_BATCH_RESERVED <= kgem->surface;
+}
+
+static inline bool kgem_check_reloc(struct kgem *kgem, int num_reloc)
+{
+ return kgem->nreloc + num_reloc <= KGEM_RELOC_SIZE(kgem);
+}
+
+static inline bool kgem_check_batch_with_surfaces(struct kgem *kgem,
+ int num_dwords,
+ int num_surfaces)
+{
+ return (int)(kgem->nbatch + num_dwords + KGEM_BATCH_RESERVED) <= (int)(kgem->surface - num_surfaces*8) &&
+ kgem_check_reloc(kgem, num_surfaces);
+}
+
+static inline uint32_t *kgem_get_batch(struct kgem *kgem, int num_dwords)
+{
+ if (!kgem_check_batch(kgem, num_dwords))
+ _kgem_submit(kgem);
+
+ return kgem->batch + kgem->nbatch;
+}
+
+static inline void kgem_advance_batch(struct kgem *kgem, int num_dwords)
+{
+ kgem->nbatch += num_dwords;
+}
+
+bool kgem_check_bo(struct kgem *kgem, struct kgem_bo *bo);
+bool kgem_check_bo_fenced(struct kgem *kgem, ...) __attribute__((sentinel(NULL)));
+
+void _kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo);
+static inline void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
+{
+ if (bo->proxy)
+ bo = bo->proxy;
+
+ if (bo->exec == NULL)
+ _kgem_add_bo(kgem, bo);
+}
+
+#define KGEM_RELOC_FENCED 0x8000
+uint32_t kgem_add_reloc(struct kgem *kgem,
+ uint32_t pos,
+ struct kgem_bo *bo,
+ uint32_t read_write_domains,
+ uint32_t delta);
+
+void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot);
+uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo);
+
+Bool kgem_bo_write(struct kgem *kgem,
+ struct kgem_bo *bo,
+ const void *data,
+ int length);
+
+static inline bool kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo)
+{
+ if (bo->exec)
+ return true;
+ if (!bo->gpu)
+ return false;
+
+ kgem_retire(kgem);
+ return bo->rq != NULL;
+}
+
+static inline bool kgem_bo_is_dirty(struct kgem_bo *bo)
+{
+ if (bo == NULL)
+ return FALSE;
+
+ if (bo->proxy)
+ bo = bo->proxy;
+ return bo->dirty;
+}
+static inline void kgem_bo_mark_dirty(struct kgem_bo *bo)
+{
+ if (bo->proxy)
+ bo = bo->proxy;
+ bo->dirty = true;
+}
+
+void kgem_bo_sync(struct kgem *kgem, struct kgem_bo *bo, bool for_write);
+
+#define KGEM_BUFFER_WRITE 0x1
+#define KGEM_BUFFER_LAST 0x2
+struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
+ uint32_t size, uint32_t flags,
+ void **ret);
+void kgem_buffer_sync(struct kgem *kgem, struct kgem_bo *bo);
+
+void kgem_throttle(struct kgem *kgem);
+bool kgem_needs_expire(struct kgem *kgem);
+bool kgem_expire_cache(struct kgem *kgem);
+
+#if HAS_EXTRA_DEBUG
+void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch);
+#else
+static inline void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch) {}
+#endif
+
+#endif /* KGEM_H */
diff --git a/src/sna/kgem_debug.c b/src/sna/kgem_debug.c
new file mode 100644
index 00000000..0dcd7065
--- /dev/null
+++ b/src/sna/kgem_debug.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright © 2007-2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/mman.h>
+#include <assert.h>
+
+#include "sna.h"
+#include "sna_reg.h"
+
+#include "kgem_debug.h"
+
+struct drm_i915_gem_relocation_entry *
+kgem_debug_get_reloc_entry(struct kgem *kgem, uint32_t offset)
+{
+ int i;
+
+ offset *= sizeof(uint32_t);
+
+ for (i = 0; i < kgem->nreloc; i++)
+ if (kgem->reloc[i].offset == offset)
+ return kgem->reloc+i;
+
+ return NULL;
+}
+
+struct kgem_bo *
+kgem_debug_get_bo_for_reloc_entry(struct kgem *kgem,
+ struct drm_i915_gem_relocation_entry *reloc)
+{
+ struct kgem_bo *bo;
+
+ if (reloc == NULL)
+ return NULL;
+
+ list_for_each_entry(bo, &kgem->next_request->buffers, request)
+ if (bo->handle == reloc->target_handle && bo->proxy == NULL)
+ break;
+
+ assert(&bo->request != &kgem->next_request->buffers);
+
+ return bo;
+}
+
+static int kgem_debug_handle_is_fenced(struct kgem *kgem, uint32_t handle)
+{
+ int i;
+
+ for (i = 0; i < kgem->nexec; i++)
+ if (kgem->exec[i].handle == handle)
+ return kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE;
+
+ return 0;
+}
+
+void
+kgem_debug_print(const uint32_t *data,
+ uint32_t offset, unsigned int index,
+ char *fmt, ...)
+{
+ va_list va;
+ char buf[240];
+ int len;
+
+ len = snprintf(buf, sizeof(buf),
+ "0x%08x: 0x%08x: %s",
+ (offset + index) * 4,
+ data[index],
+ index == 0 ? "" : " ");
+
+ va_start(va, fmt);
+ vsnprintf(buf + len, sizeof(buf) - len, fmt, va);
+ va_end(va);
+
+ ErrorF("%s", buf);
+}
+
+static int
+decode_nop(struct kgem *kgem, uint32_t offset)
+{
+ uint32_t *data = kgem->batch + offset;
+ kgem_debug_print(data, offset, 0, "UNKNOWN\n");
+ assert(0);
+ return 1;
+}
+
+static int
+decode_mi(struct kgem *kgem, uint32_t offset)
+{
+ static const struct {
+ uint32_t opcode;
+ int len_mask;
+ int min_len;
+ int max_len;
+ const char *name;
+ } opcodes[] = {
+ { 0x08, 0, 1, 1, "MI_ARB_ON_OFF" },
+ { 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" },
+ { 0x30, 0x3f, 3, 3, "MI_BATCH_BUFFER" },
+ { 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" },
+ { 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" },
+ { 0x04, 0, 1, 1, "MI_FLUSH" },
+ { 0x22, 0x1f, 3, 3, "MI_LOAD_REGISTER_IMM" },
+ { 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" },
+ { 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" },
+ { 0x00, 0, 1, 1, "MI_NOOP" },
+ { 0x11, 0x3f, 2, 2, "MI_OVERLAY_FLIP" },
+ { 0x07, 0, 1, 1, "MI_REPORT_HEAD" },
+ { 0x18, 0x3f, 2, 2, "MI_SET_CONTEXT" },
+ { 0x20, 0x3f, 3, 4, "MI_STORE_DATA_IMM" },
+ { 0x21, 0x3f, 3, 4, "MI_STORE_DATA_INDEX" },
+ { 0x24, 0x3f, 3, 3, "MI_STORE_REGISTER_MEM" },
+ { 0x02, 0, 1, 1, "MI_USER_INTERRUPT" },
+ { 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" },
+ { 0x16, 0x7f, 3, 3, "MI_SEMAPHORE_MBOX" },
+ { 0x26, 0x1f, 3, 4, "MI_FLUSH_DW" },
+ { 0x0b, 0, 1, 1, "MI_SUSPEND_FLUSH" },
+ };
+ uint32_t *data = kgem->batch + offset;
+ int op;
+
+ for (op = 0; op < ARRAY_SIZE(opcodes); op++) {
+ if ((data[0] & 0x1f800000) >> 23 == opcodes[op].opcode) {
+ unsigned int len = 1, i;
+
+ kgem_debug_print(data, offset, 0, "%s\n", opcodes[op].name);
+ if (opcodes[op].max_len > 1) {
+ len = (data[0] & opcodes[op].len_mask) + 2;
+ if (len < opcodes[op].min_len ||
+ len > opcodes[op].max_len)
+ {
+ ErrorF("Bad length (%d) in %s, [%d, %d]\n",
+ len, opcodes[op].name,
+ opcodes[op].min_len,
+ opcodes[op].max_len);
+ assert(0);
+ }
+ }
+
+ for (i = 1; i < len; i++)
+ kgem_debug_print(data, offset, i, "dword %d\n", i);
+
+ return len;
+ }
+ }
+
+ kgem_debug_print(data, offset, 0, "MI UNKNOWN\n");
+ assert(0);
+ return 1;
+}
+
+static int
+decode_2d(struct kgem *kgem, uint32_t offset)
+{
+ static const struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ const char *name;
+ } opcodes[] = {
+ { 0x40, 5, 5, "COLOR_BLT" },
+ { 0x43, 6, 6, "SRC_COPY_BLT" },
+ { 0x01, 8, 8, "XY_SETUP_BLT" },
+ { 0x11, 9, 9, "XY_SETUP_MONO_PATTERN_SL_BLT" },
+ { 0x03, 3, 3, "XY_SETUP_CLIP_BLT" },
+ { 0x24, 2, 2, "XY_PIXEL_BLT" },
+ { 0x25, 3, 3, "XY_SCANLINES_BLT" },
+ { 0x26, 4, 4, "Y_TEXT_BLT" },
+ { 0x31, 5, 134, "XY_TEXT_IMMEDIATE_BLT" },
+ { 0x50, 6, 6, "XY_COLOR_BLT" },
+ { 0x51, 6, 6, "XY_PAT_BLT" },
+ { 0x76, 8, 8, "XY_PAT_CHROMA_BLT" },
+ { 0x72, 7, 135, "XY_PAT_BLT_IMMEDIATE" },
+ { 0x77, 9, 137, "XY_PAT_CHROMA_BLT_IMMEDIATE" },
+ { 0x52, 9, 9, "XY_MONO_PAT_BLT" },
+ { 0x59, 7, 7, "XY_MONO_PAT_FIXED_BLT" },
+ { 0x53, 8, 8, "XY_SRC_COPY_BLT" },
+ { 0x54, 8, 8, "XY_MONO_SRC_COPY_BLT" },
+ { 0x71, 9, 137, "XY_MONO_SRC_COPY_IMMEDIATE_BLT" },
+ { 0x55, 9, 9, "XY_FULL_BLT" },
+ { 0x55, 9, 137, "XY_FULL_IMMEDIATE_PATTERN_BLT" },
+ { 0x56, 9, 9, "XY_FULL_MONO_SRC_BLT" },
+ { 0x75, 10, 138, "XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT" },
+ { 0x57, 12, 12, "XY_FULL_MONO_PATTERN_BLT" },
+ { 0x58, 12, 12, "XY_FULL_MONO_PATTERN_MONO_SRC_BLT" },
+ };
+
+ unsigned int op, len;
+ char *format = NULL;
+ uint32_t *data = kgem->batch + offset;
+ struct drm_i915_gem_relocation_entry *reloc;
+
+ /* Special case the two most common ops that we detail in full */
+ switch ((data[0] & 0x1fc00000) >> 22) {
+ case 0x50:
+ kgem_debug_print(data, offset, 0,
+ "XY_COLOR_BLT (rgb %sabled, alpha %sabled, dst tile %d)\n",
+ (data[0] & (1 << 20)) ? "en" : "dis",
+ (data[0] & (1 << 21)) ? "en" : "dis",
+ (data[0] >> 11) & 1);
+
+ len = (data[0] & 0x000000ff) + 2;
+ assert(len == 6);
+
+ switch ((data[1] >> 24) & 0x3) {
+ case 0:
+ format="8";
+ break;
+ case 1:
+ format="565";
+ break;
+ case 2:
+ format="1555";
+ break;
+ case 3:
+ format="8888";
+ break;
+ }
+
+ kgem_debug_print(data, offset, 1, "format %s, pitch %d, "
+ "clipping %sabled\n", format,
+ (short)(data[1] & 0xffff),
+ data[1] & (1 << 30) ? "en" : "dis");
+ kgem_debug_print(data, offset, 2, "(%d,%d)\n",
+ data[2] & 0xffff, data[2] >> 16);
+ kgem_debug_print(data, offset, 3, "(%d,%d)\n",
+ data[3] & 0xffff, data[3] >> 16);
+ reloc = kgem_debug_get_reloc_entry(kgem, offset+4);
+ assert(reloc);
+ kgem_debug_print(data, offset, 4, "dst offset 0x%08x [handle=%d, delta=%d, read=%x, write=%x (fenced? %d)]\n",
+ data[4],
+ reloc->target_handle, reloc->delta,
+ reloc->read_domains, reloc->write_domain,
+ kgem_debug_handle_is_fenced(kgem, reloc->target_handle));
+ kgem_debug_print(data, offset, 5, "color\n");
+ return len;
+
+ case 0x53:
+ kgem_debug_print(data, offset, 0,
+ "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled, "
+ "src tile %d, dst tile %d)\n",
+ (data[0] & (1 << 20)) ? "en" : "dis",
+ (data[0] & (1 << 21)) ? "en" : "dis",
+ (data[0] >> 15) & 1,
+ (data[0] >> 11) & 1);
+
+ len = (data[0] & 0x000000ff) + 2;
+ assert(len == 8);
+
+ switch ((data[1] >> 24) & 0x3) {
+ case 0:
+ format="8";
+ break;
+ case 1:
+ format="565";
+ break;
+ case 2:
+ format="1555";
+ break;
+ case 3:
+ format="8888";
+ break;
+ }
+
+ kgem_debug_print(data, offset, 1, "format %s, dst pitch %d, "
+ "clipping %sabled\n", format,
+ (short)(data[1] & 0xffff),
+ data[1] & (1 << 30) ? "en" : "dis");
+ kgem_debug_print(data, offset, 2, "dst (%d,%d)\n",
+ data[2] & 0xffff, data[2] >> 16);
+ kgem_debug_print(data, offset, 3, "dst (%d,%d)\n",
+ data[3] & 0xffff, data[3] >> 16);
+ reloc = kgem_debug_get_reloc_entry(kgem, offset+4);
+ assert(reloc);
+ kgem_debug_print(data, offset, 4, "dst offset 0x%08x [handle=%d, delta=%d, read=%x, write=%x, (fenced? %d)]\n",
+ data[4],
+ reloc->target_handle, reloc->delta,
+ reloc->read_domains, reloc->write_domain,
+ kgem_debug_handle_is_fenced(kgem, reloc->target_handle));
+ kgem_debug_print(data, offset, 5, "src (%d,%d)\n",
+ data[5] & 0xffff, data[5] >> 16);
+ kgem_debug_print(data, offset, 6, "src pitch %d\n",
+ (short)(data[6] & 0xffff));
+ reloc = kgem_debug_get_reloc_entry(kgem, offset+7);
+ assert(reloc);
+ kgem_debug_print(data, offset, 7, "src offset 0x%08x [handle=%d, delta=%d, read=%x, write=%x (fenced? %d)]\n",
+ data[7],
+ reloc->target_handle, reloc->delta,
+ reloc->read_domains, reloc->write_domain,
+ kgem_debug_handle_is_fenced(kgem, reloc->target_handle));
+ return len;
+ }
+
+ for (op = 0; op < ARRAY_SIZE(opcodes); op++) {
+ if ((data[0] & 0x1fc00000) >> 22 == opcodes[op].opcode) {
+ unsigned int i;
+
+ len = 1;
+ kgem_debug_print(data, offset, 0, "%s\n", opcodes[op].name);
+ if (opcodes[op].max_len > 1) {
+ len = (data[0] & 0x000000ff) + 2;
+ assert(len >= opcodes[op].min_len &&
+ len <= opcodes[op].max_len);
+ }
+
+ for (i = 1; i < len; i++)
+ kgem_debug_print(data, offset, i, "dword %d\n", i);
+
+ return len;
+ }
+ }
+
+ kgem_debug_print(data, offset, 0, "2D UNKNOWN\n");
+ assert(0);
+ return 1;
+}
+
+static int (*decode_3d(int gen))(struct kgem*, uint32_t)
+{
+ if (gen >= 60) {
+ return kgem_gen6_decode_3d;
+ } else if (gen >= 50) {
+ return kgem_gen5_decode_3d;
+ } else if (gen >= 40) {
+ return kgem_gen4_decode_3d;
+ } else if (gen >= 30) {
+ return kgem_gen3_decode_3d;
+ }
+ assert(0);
+}
+
+static void (*finish_state(int gen))(struct kgem*)
+{
+ if (gen >= 60) {
+ return kgem_gen6_finish_state;
+ } else if (gen >= 50) {
+ return kgem_gen5_finish_state;
+ } else if (gen >= 40) {
+ return kgem_gen4_finish_state;
+ } else if (gen >= 30) {
+ return kgem_gen3_finish_state;
+ }
+ assert(0);
+}
+
+void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch)
+{
+ int (*const decode[])(struct kgem *, uint32_t) = {
+ decode_mi,
+ decode_nop,
+ decode_2d,
+ decode_3d(kgem->gen),
+ };
+ uint32_t offset = 0;
+
+ while (offset < nbatch) {
+ int class = (kgem->batch[offset] & 0xe0000000) >> 29;
+ assert(class < ARRAY_SIZE(decode));
+ offset += decode[class](kgem, offset);
+ }
+
+ finish_state(kgem->gen)(kgem);
+}
diff --git a/src/sna/kgem_debug.h b/src/sna/kgem_debug.h
new file mode 100644
index 00000000..f9a931df
--- /dev/null
+++ b/src/sna/kgem_debug.h
@@ -0,0 +1,28 @@
+#ifndef KGEM_DEBUG_H
+#define KGEM_DEBUG_H
+
+void
+kgem_debug_print(const uint32_t *data,
+ uint32_t offset, unsigned int index,
+ char *fmt, ...);
+
+struct drm_i915_gem_relocation_entry *
+kgem_debug_get_reloc_entry(struct kgem *kgem, uint32_t offset);
+
+struct kgem_bo *
+kgem_debug_get_bo_for_reloc_entry(struct kgem *kgem,
+ struct drm_i915_gem_relocation_entry *reloc);
+
+int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset);
+void kgem_gen6_finish_state(struct kgem *kgem);
+
+int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset);
+void kgem_gen5_finish_state(struct kgem *kgem);
+
+int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset);
+void kgem_gen4_finish_state(struct kgem *kgem);
+
+int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset);
+void kgem_gen3_finish_state(struct kgem *kgem);
+
+#endif
diff --git a/src/sna/kgem_debug_gen3.c b/src/sna/kgem_debug_gen3.c
new file mode 100644
index 00000000..da1d9fc9
--- /dev/null
+++ b/src/sna/kgem_debug_gen3.c
@@ -0,0 +1,1615 @@
+/*
+ * Copyright © 2007-2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/mman.h>
+#include <assert.h>
+
+#include "sna.h"
+#include "sna_reg.h"
+
+#include "gen3_render.h"
+
+#include "kgem_debug.h"
+
+enum type {
+ T_FLOAT32,
+ T_FLOAT16,
+};
+
+static struct state {
+ struct vertex_buffer {
+ int handle;
+ void *base;
+ const char *ptr;
+ int pitch;
+
+ struct kgem_bo *current;
+ } vb;
+ struct vertex_elements {
+ int offset;
+ bool valid;
+ enum type type;
+ int size;
+ uint8_t swizzle[4];
+ } ve[33];
+ int num_ve;
+} state;
+
+static float int_as_float(int i)
+{
+ union {
+ float f;
+ int i;
+ } x;
+ x.i = i;
+ return x.f;
+}
+
+static void gen3_update_vertex_buffer_addr(struct kgem *kgem,
+ uint32_t offset)
+{
+ uint32_t handle;
+ struct kgem_bo *bo = NULL;
+ void *base, *ptr;
+ int i;
+
+ offset *= sizeof(uint32_t);
+
+ for (i = 0; i < kgem->nreloc; i++)
+ if (kgem->reloc[i].offset == offset)
+ break;
+ assert(i < kgem->nreloc);
+ handle = kgem->reloc[i].target_handle;
+
+ if (handle == 0) {
+ base = kgem->batch;
+ } else {
+ list_for_each_entry(bo, &kgem->next_request->buffers, request)
+ if (bo->handle == handle)
+ break;
+ assert(&bo->request != &kgem->next_request->buffers);
+ base = kgem_bo_map(kgem, bo, PROT_READ);
+ }
+ ptr = (char *)base + kgem->reloc[i].delta;
+
+ if (state.vb.current)
+ munmap(state.vb.base, state.vb.current->size);
+
+ state.vb.current = bo;
+ state.vb.base = base;
+ state.vb.ptr = ptr;
+}
+
+static void gen3_update_vertex_buffer_pitch(struct kgem *kgem,
+ uint32_t offset)
+{
+ state.vb.pitch = kgem->batch[offset] >> 16 & 0x3f;
+ state.vb.pitch *= sizeof(uint32_t);
+}
+
+static void gen3_update_vertex_elements(struct kgem *kgem, uint32_t data)
+{
+ state.ve[1].valid = 1;
+
+ switch ((data >> 6) & 7) {
+ case 1:
+ state.ve[1].type = T_FLOAT32;
+ state.ve[1].size = 3;
+ state.ve[1].swizzle[0] = 1;
+ state.ve[1].swizzle[1] = 1;
+ state.ve[1].swizzle[2] = 1;
+ state.ve[1].swizzle[3] = 3;
+ break;
+ case 2:
+ state.ve[1].type = T_FLOAT32;
+ state.ve[1].size = 4;
+ state.ve[1].swizzle[0] = 1;
+ state.ve[1].swizzle[1] = 1;
+ state.ve[1].swizzle[2] = 1;
+ state.ve[1].swizzle[3] = 1;
+ break;
+ case 3:
+ state.ve[1].type = T_FLOAT32;
+ state.ve[1].size = 2;
+ state.ve[1].swizzle[0] = 1;
+ state.ve[1].swizzle[1] = 1;
+ state.ve[1].swizzle[2] = 2;
+ state.ve[1].swizzle[3] = 3;
+ break;
+ case 4:
+ state.ve[1].type = T_FLOAT32;
+ state.ve[1].size = 3;
+ state.ve[1].swizzle[0] = 1;
+ state.ve[1].swizzle[1] = 1;
+ state.ve[1].swizzle[2] = 3;
+ state.ve[1].swizzle[3] = 1;
+ break;
+ }
+
+ state.ve[2].valid = 0;
+ state.ve[3].valid = 0;
+}
+
+static void gen3_update_vertex_texcoords(struct kgem *kgem, uint32_t data)
+{
+ int id;
+ for (id = 0; id < 8; id++) {
+ uint32_t fmt = (data >> (id*4)) & 0xf;
+ int width;
+
+ state.ve[id+4].valid = fmt != 0xf;
+
+ width = 0;
+ switch (fmt) {
+ case 0:
+ state.ve[id+4].type = T_FLOAT32;
+ width = state.ve[id+4].size = 2;
+ break;
+ case 1:
+ state.ve[id+4].type = T_FLOAT32;
+ width = state.ve[id+4].size = 3;
+ break;
+ case 2:
+ state.ve[id+4].type = T_FLOAT32;
+ width = state.ve[id+4].size = 4;
+ break;
+ case 3:
+ state.ve[id+4].type = T_FLOAT32;
+ width = state.ve[id+4].size = 1;
+ break;
+ case 4:
+ state.ve[id+4].type = T_FLOAT16;
+ width = state.ve[id+4].size = 2;
+ break;
+ case 5:
+ state.ve[id+4].type = T_FLOAT16;
+ width = state.ve[id+4].size = 4;
+ break;
+ }
+
+ state.ve[id+4].swizzle[0] = width > 0 ? 1 : 2;
+ state.ve[id+4].swizzle[1] = width > 1 ? 1 : 2;
+ state.ve[id+4].swizzle[2] = width > 2 ? 1 : 2;
+ state.ve[id+4].swizzle[3] = width > 3 ? 1 : 2;
+ }
+}
+
+static void gen3_update_vertex_elements_offsets(struct kgem *kgem)
+{
+ int i, offset;
+
+ for (i = offset = 0; i < ARRAY_SIZE(state.ve); i++) {
+ int size;
+
+ if (!state.ve[i].valid)
+ continue;
+
+ size = 0;
+ switch (state.ve[i].type) {
+ case T_FLOAT16:
+ size = 4;
+ break;
+ case T_FLOAT32:
+ size = 4;
+ break;
+ }
+ state.ve[i].offset = offset;
+ offset += size * state.ve[i].size;
+ state.num_ve = i;
+ }
+}
+
+static void vertices_float32_out(const struct vertex_elements *ve, const float *f, int max)
+{
+ int c;
+
+ ErrorF("(");
+ for (c = 0; c < max; c++) {
+ switch (ve->swizzle[c]) {
+ case 0: ErrorF("#"); break;
+ case 1: ErrorF("%f", f[c]); break;
+ case 2: ErrorF("0.0"); break;
+ case 3: ErrorF("1.0"); break;
+ case 4: ErrorF("0x1"); break;
+ case 5: break;
+ default: ErrorF("?");
+ }
+ if (c < max-1)
+ ErrorF(", ");
+ }
+ ErrorF(")");
+}
+
+static void ve_out(const struct vertex_elements *ve, const void *ptr)
+{
+ switch (ve->type) {
+ case T_FLOAT32:
+ vertices_float32_out(ve, ptr, ve->size);
+ break;
+ case T_FLOAT16:
+ //vertices_float16_out(ve, ptr, ve->size);
+ break;
+ }
+}
+
+static void indirect_vertex_out(struct kgem *kgem, uint32_t v)
+{
+ const struct vertex_buffer *vb = &state.vb;
+ int i = 1;
+
+ do {
+ const struct vertex_elements *ve = &state.ve[i];
+ const void *ptr = vb->ptr + v * vb->pitch + ve->offset;
+
+ if (!ve->valid)
+ continue;
+
+ ve_out(ve, ptr);
+
+ while (++i <= state.num_ve && !state.ve[i].valid)
+ ;
+
+ if (i <= state.num_ve)
+ ErrorF(", ");
+ } while (i <= state.num_ve);
+}
+
+static int inline_vertex_out(struct kgem *kgem, void *base)
+{
+ const struct vertex_buffer *vb = &state.vb;
+ int i = 1;
+
+ do {
+ const struct vertex_elements *ve = &state.ve[i];
+ const void *ptr = (char *)base + ve->offset;
+
+ if (!ve->valid)
+ continue;
+
+ ve_out(ve, ptr);
+
+ while (++i <= state.num_ve && !state.ve[i].valid)
+ ;
+
+ if (i <= state.num_ve)
+ ErrorF(", ");
+ } while (i <= state.num_ve);
+
+ return vb->pitch;
+}
+
+static int
+gen3_decode_3d_1c(struct kgem *kgem, uint32_t offset)
+{
+ uint32_t *data = kgem->batch + offset;
+ uint32_t opcode;
+
+ opcode = (data[0] & 0x00f80000) >> 19;
+
+ switch (opcode) {
+ case 0x11:
+ kgem_debug_print(data, offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n");
+ return 1;
+ case 0x10:
+ kgem_debug_print(data, offset, 0, "3DSTATE_SCISSOR_ENABLE %s\n",
+ data[0]&1?"enabled":"disabled");
+ return 1;
+ case 0x01:
+ kgem_debug_print(data, offset, 0, "3DSTATE_MAP_COORD_SET_I830\n");
+ return 1;
+ case 0x0a:
+ kgem_debug_print(data, offset, 0, "3DSTATE_MAP_CUBE_I830\n");
+ return 1;
+ case 0x05:
+ kgem_debug_print(data, offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n");
+ return 1;
+ }
+
+ kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n",
+ opcode);
+ assert(0);
+ return 1;
+}
+
+/** Sets the string dstname to describe the destination of the PS instruction */
+static void
+gen3_get_instruction_dst(uint32_t *data, int i, char *dstname, int do_mask)
+{
+ uint32_t a0 = data[i];
+ int dst_nr = (a0 >> 14) & 0xf;
+ char dstmask[8];
+ char *sat;
+
+ if (do_mask) {
+ if (((a0 >> 10) & 0xf) == 0xf) {
+ dstmask[0] = 0;
+ } else {
+ int dstmask_index = 0;
+
+ dstmask[dstmask_index++] = '.';
+ if (a0 & (1 << 10))
+ dstmask[dstmask_index++] = 'x';
+ if (a0 & (1 << 11))
+ dstmask[dstmask_index++] = 'y';
+ if (a0 & (1 << 12))
+ dstmask[dstmask_index++] = 'z';
+ if (a0 & (1 << 13))
+ dstmask[dstmask_index++] = 'w';
+ dstmask[dstmask_index++] = 0;
+ }
+
+ if (a0 & (1 << 22))
+ sat = ".sat";
+ else
+ sat = "";
+ } else {
+ dstmask[0] = 0;
+ sat = "";
+ }
+
+ switch ((a0 >> 19) & 0x7) {
+ case 0:
+ assert(dst_nr <= 15);
+ sprintf(dstname, "R%d%s%s", dst_nr, dstmask, sat);
+ break;
+ case 4:
+ assert(dst_nr == 0);
+ sprintf(dstname, "oC%s%s", dstmask, sat);
+ break;
+ case 5:
+ assert(dst_nr == 0);
+ sprintf(dstname, "oD%s%s", dstmask, sat);
+ break;
+ case 6:
+ assert(dst_nr <= 3);
+ sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat);
+ break;
+ default:
+ sprintf(dstname, "RESERVED");
+ break;
+ }
+}
+
+static char *
+gen3_get_channel_swizzle(uint32_t select)
+{
+ switch (select & 0x7) {
+ case 0:
+ return (select & 8) ? "-x" : "x";
+ case 1:
+ return (select & 8) ? "-y" : "y";
+ case 2:
+ return (select & 8) ? "-z" : "z";
+ case 3:
+ return (select & 8) ? "-w" : "w";
+ case 4:
+ return (select & 8) ? "-0" : "0";
+ case 5:
+ return (select & 8) ? "-1" : "1";
+ default:
+ return (select & 8) ? "-bad" : "bad";
+ }
+}
+
+static void
+gen3_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name)
+{
+ switch (src_type) {
+ case 0:
+ sprintf(name, "R%d", src_nr);
+ assert(src_nr <= 15);
+ break;
+ case 1:
+ if (src_nr < 8)
+ sprintf(name, "T%d", src_nr);
+ else if (src_nr == 8)
+ sprintf(name, "DIFFUSE");
+ else if (src_nr == 9)
+ sprintf(name, "SPECULAR");
+ else if (src_nr == 10)
+ sprintf(name, "FOG");
+ else {
+ assert(0);
+ sprintf(name, "RESERVED");
+ }
+ break;
+ case 2:
+ sprintf(name, "C%d", src_nr);
+ assert(src_nr <= 31);
+ break;
+ case 4:
+ sprintf(name, "oC");
+ assert(src_nr == 0);
+ break;
+ case 5:
+ sprintf(name, "oD");
+ assert(src_nr == 0);
+ break;
+ case 6:
+ sprintf(name, "U%d", src_nr);
+ assert(src_nr <= 3);
+ break;
+ default:
+ sprintf(name, "RESERVED");
+ assert(0);
+ break;
+ }
+}
+
+static void
+gen3_get_instruction_src0(uint32_t *data, int i, char *srcname)
+{
+ uint32_t a0 = data[i];
+ uint32_t a1 = data[i + 1];
+ int src_nr = (a0 >> 2) & 0x1f;
+ char *swizzle_x = gen3_get_channel_swizzle((a1 >> 28) & 0xf);
+ char *swizzle_y = gen3_get_channel_swizzle((a1 >> 24) & 0xf);
+ char *swizzle_z = gen3_get_channel_swizzle((a1 >> 20) & 0xf);
+ char *swizzle_w = gen3_get_channel_swizzle((a1 >> 16) & 0xf);
+ char swizzle[100];
+
+ gen3_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname);
+ sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+ if (strcmp(swizzle, ".xyzw") != 0)
+ strcat(srcname, swizzle);
+}
+
+static void
+gen3_get_instruction_src1(uint32_t *data, int i, char *srcname)
+{
+ uint32_t a1 = data[i + 1];
+ uint32_t a2 = data[i + 2];
+ int src_nr = (a1 >> 8) & 0x1f;
+ char *swizzle_x = gen3_get_channel_swizzle((a1 >> 4) & 0xf);
+ char *swizzle_y = gen3_get_channel_swizzle((a1 >> 0) & 0xf);
+ char *swizzle_z = gen3_get_channel_swizzle((a2 >> 28) & 0xf);
+ char *swizzle_w = gen3_get_channel_swizzle((a2 >> 24) & 0xf);
+ char swizzle[100];
+
+ gen3_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname);
+ sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+ if (strcmp(swizzle, ".xyzw") != 0)
+ strcat(srcname, swizzle);
+}
+
+static void
+gen3_get_instruction_src2(uint32_t *data, int i, char *srcname)
+{
+ uint32_t a2 = data[i + 2];
+ int src_nr = (a2 >> 16) & 0x1f;
+ char *swizzle_x = gen3_get_channel_swizzle((a2 >> 12) & 0xf);
+ char *swizzle_y = gen3_get_channel_swizzle((a2 >> 8) & 0xf);
+ char *swizzle_z = gen3_get_channel_swizzle((a2 >> 4) & 0xf);
+ char *swizzle_w = gen3_get_channel_swizzle((a2 >> 0) & 0xf);
+ char swizzle[100];
+
+ gen3_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname);
+ sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+ if (strcmp(swizzle, ".xyzw") != 0)
+ strcat(srcname, swizzle);
+}
+
+static void
+gen3_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name)
+{
+ switch (src_type) {
+ case 0:
+ sprintf(name, "R%d", src_nr);
+ assert(src_nr <= 15);
+ break;
+ case 1:
+ if (src_nr < 8)
+ sprintf(name, "T%d", src_nr);
+ else if (src_nr == 8)
+ sprintf(name, "DIFFUSE");
+ else if (src_nr == 9)
+ sprintf(name, "SPECULAR");
+ else if (src_nr == 10)
+ sprintf(name, "FOG");
+ else {
+ assert(0);
+ sprintf(name, "RESERVED");
+ }
+ break;
+ case 4:
+ sprintf(name, "oC");
+ assert(src_nr == 0);
+ break;
+ case 5:
+ sprintf(name, "oD");
+ assert(src_nr == 0);
+ break;
+ default:
+ assert(0);
+ sprintf(name, "RESERVED");
+ break;
+ }
+}
+
+static void
+gen3_decode_alu1(uint32_t *data, uint32_t offset,
+ int i, char *instr_prefix, char *op_name)
+{
+ char dst[100], src0[100];
+
+ gen3_get_instruction_dst(data, i, dst, 1);
+ gen3_get_instruction_src0(data, i, src0);
+
+ kgem_debug_print(data, offset, i++, "%s: %s %s, %s\n", instr_prefix,
+ op_name, dst, src0);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+gen3_decode_alu2(uint32_t *data, uint32_t offset,
+ int i, char *instr_prefix, char *op_name)
+{
+ char dst[100], src0[100], src1[100];
+
+ gen3_get_instruction_dst(data, i, dst, 1);
+ gen3_get_instruction_src0(data, i, src0);
+ gen3_get_instruction_src1(data, i, src1);
+
+ kgem_debug_print(data, offset, i++, "%s: %s %s, %s, %s\n", instr_prefix,
+ op_name, dst, src0, src1);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+gen3_decode_alu3(uint32_t *data, uint32_t offset,
+ int i, char *instr_prefix, char *op_name)
+{
+ char dst[100], src0[100], src1[100], src2[100];
+
+ gen3_get_instruction_dst(data, i, dst, 1);
+ gen3_get_instruction_src0(data, i, src0);
+ gen3_get_instruction_src1(data, i, src1);
+ gen3_get_instruction_src2(data, i, src2);
+
+ kgem_debug_print(data, offset, i++, "%s: %s %s, %s, %s, %s\n", instr_prefix,
+ op_name, dst, src0, src1, src2);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+gen3_decode_tex(uint32_t *data, uint32_t offset, int i, char *instr_prefix,
+ char *tex_name)
+{
+ uint32_t t0 = data[i];
+ uint32_t t1 = data[i + 1];
+ char dst_name[100];
+ char addr_name[100];
+ int sampler_nr;
+
+ gen3_get_instruction_dst(data, i, dst_name, 0);
+ gen3_get_instruction_addr((t1 >> 24) & 0x7,
+ (t1 >> 17) & 0xf,
+ addr_name);
+ sampler_nr = t0 & 0xf;
+
+ kgem_debug_print(data, offset, i++, "%s: %s %s, S%d, %s\n", instr_prefix,
+ tex_name, dst_name, sampler_nr, addr_name);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+gen3_decode_dcl(uint32_t *data, uint32_t offset, int i, char *instr_prefix)
+{
+ uint32_t d0 = data[i];
+ char *sampletype;
+ int dcl_nr = (d0 >> 14) & 0xf;
+ char *dcl_x = d0 & (1 << 10) ? "x" : "";
+ char *dcl_y = d0 & (1 << 11) ? "y" : "";
+ char *dcl_z = d0 & (1 << 12) ? "z" : "";
+ char *dcl_w = d0 & (1 << 13) ? "w" : "";
+ char dcl_mask[10];
+
+ switch ((d0 >> 19) & 0x3) {
+ case 1:
+ sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w);
+ assert (strcmp(dcl_mask, "."));
+
+ assert(dcl_nr <= 10);
+ if (dcl_nr < 8) {
+ if (strcmp(dcl_mask, ".x") != 0 &&
+ strcmp(dcl_mask, ".xy") != 0 &&
+ strcmp(dcl_mask, ".xz") != 0 &&
+ strcmp(dcl_mask, ".w") != 0 &&
+ strcmp(dcl_mask, ".xyzw") != 0) {
+ assert(0);
+ }
+ kgem_debug_print(data, offset, i++, "%s: DCL T%d%s\n", instr_prefix,
+ dcl_nr, dcl_mask);
+ } else {
+ if (strcmp(dcl_mask, ".xz") == 0)
+ assert(0);
+ else if (strcmp(dcl_mask, ".xw") == 0)
+ assert(0);
+ else if (strcmp(dcl_mask, ".xzw") == 0)
+ assert(0);
+
+ if (dcl_nr == 8) {
+ kgem_debug_print(data, offset, i++, "%s: DCL DIFFUSE%s\n", instr_prefix,
+ dcl_mask);
+ } else if (dcl_nr == 9) {
+ kgem_debug_print(data, offset, i++, "%s: DCL SPECULAR%s\n", instr_prefix,
+ dcl_mask);
+ } else if (dcl_nr == 10) {
+ kgem_debug_print(data, offset, i++, "%s: DCL FOG%s\n", instr_prefix,
+ dcl_mask);
+ }
+ }
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ break;
+ case 3:
+ switch ((d0 >> 22) & 0x3) {
+ case 0:
+ sampletype = "2D";
+ break;
+ case 1:
+ sampletype = "CUBE";
+ break;
+ case 2:
+ sampletype = "3D";
+ break;
+ default:
+ sampletype = "RESERVED";
+ break;
+ }
+ assert(dcl_nr <= 15);
+ kgem_debug_print(data, offset, i++, "%s: DCL S%d %s\n", instr_prefix,
+ dcl_nr, sampletype);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ break;
+ default:
+ kgem_debug_print(data, offset, i++, "%s: DCL RESERVED%d\n", instr_prefix, dcl_nr);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ }
+}
+
+static void
+gen3_decode_instruction(uint32_t *data, uint32_t offset,
+ int i, char *instr_prefix)
+{
+ switch ((data[i] >> 24) & 0x1f) {
+ case 0x0:
+ kgem_debug_print(data, offset, i++, "%s: NOP\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ break;
+ case 0x01:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "ADD");
+ break;
+ case 0x02:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "MOV");
+ break;
+ case 0x03:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "MUL");
+ break;
+ case 0x04:
+ gen3_decode_alu3(data, offset, i, instr_prefix, "MAD");
+ break;
+ case 0x05:
+ gen3_decode_alu3(data, offset, i, instr_prefix, "DP2ADD");
+ break;
+ case 0x06:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "DP3");
+ break;
+ case 0x07:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "DP4");
+ break;
+ case 0x08:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "FRC");
+ break;
+ case 0x09:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "RCP");
+ break;
+ case 0x0a:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "RSQ");
+ break;
+ case 0x0b:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "EXP");
+ break;
+ case 0x0c:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "LOG");
+ break;
+ case 0x0d:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "CMP");
+ break;
+ case 0x0e:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "MIN");
+ break;
+ case 0x0f:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "MAX");
+ break;
+ case 0x10:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "FLR");
+ break;
+ case 0x11:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "MOD");
+ break;
+ case 0x12:
+ gen3_decode_alu1(data, offset, i, instr_prefix, "TRC");
+ break;
+ case 0x13:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "SGE");
+ break;
+ case 0x14:
+ gen3_decode_alu2(data, offset, i, instr_prefix, "SLT");
+ break;
+ case 0x15:
+ gen3_decode_tex(data, offset, i, instr_prefix, "TEXLD");
+ break;
+ case 0x16:
+ gen3_decode_tex(data, offset, i, instr_prefix, "TEXLDP");
+ break;
+ case 0x17:
+ gen3_decode_tex(data, offset, i, instr_prefix, "TEXLDB");
+ break;
+ case 0x19:
+ gen3_decode_dcl(data, offset, i, instr_prefix);
+ break;
+ default:
+ kgem_debug_print(data, offset, i++, "%s: unknown\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
+ break;
+ }
+}
+
+static char *
+gen3_decode_compare_func(uint32_t op)
+{
+ switch (op&0x7) {
+ case 0: return "always";
+ case 1: return "never";
+ case 2: return "less";
+ case 3: return "equal";
+ case 4: return "lequal";
+ case 5: return "greater";
+ case 6: return "notequal";
+ case 7: return "gequal";
+ }
+ return "";
+}
+
+static char *
+gen3_decode_stencil_op(uint32_t op)
+{
+ switch (op&0x7) {
+ case 0: return "keep";
+ case 1: return "zero";
+ case 2: return "replace";
+ case 3: return "incr_sat";
+ case 4: return "decr_sat";
+ case 5: return "greater";
+ case 6: return "incr";
+ case 7: return "decr";
+ }
+ return "";
+}
+
+#if 0
+/* part of MODES_4 */
+static char *
+gen3_decode_logic_op(uint32_t op)
+{
+ switch (op&0xf) {
+ case 0: return "clear";
+ case 1: return "nor";
+ case 2: return "and_inv";
+ case 3: return "copy_inv";
+ case 4: return "and_rvrse";
+ case 5: return "inv";
+ case 6: return "xor";
+ case 7: return "nand";
+ case 8: return "and";
+ case 9: return "equiv";
+ case 10: return "noop";
+ case 11: return "or_inv";
+ case 12: return "copy";
+ case 13: return "or_rvrse";
+ case 14: return "or";
+ case 15: return "set";
+ }
+ return "";
+}
+#endif
+
+static char *
+gen3_decode_blend_fact(uint32_t op)
+{
+ switch (op&0xf) {
+ case 1: return "zero";
+ case 2: return "one";
+ case 3: return "src_colr";
+ case 4: return "inv_src_colr";
+ case 5: return "src_alpha";
+ case 6: return "inv_src_alpha";
+ case 7: return "dst_alpha";
+ case 8: return "inv_dst_alpha";
+ case 9: return "dst_colr";
+ case 10: return "inv_dst_colr";
+ case 11: return "src_alpha_sat";
+ case 12: return "cnst_colr";
+ case 13: return "inv_cnst_colr";
+ case 14: return "cnst_alpha";
+ case 15: return "inv_const_alpha";
+ }
+ return "";
+}
+
+static char *
+decode_tex_coord_mode(uint32_t mode)
+{
+ switch (mode&0x7) {
+ case 0: return "wrap";
+ case 1: return "mirror";
+ case 2: return "clamp_edge";
+ case 3: return "cube";
+ case 4: return "clamp_border";
+ case 5: return "mirror_once";
+ }
+ return "";
+}
+
+static char *
+gen3_decode_sample_filter(uint32_t mode)
+{
+ switch (mode&0x7) {
+ case 0: return "nearest";
+ case 1: return "linear";
+ case 2: return "anisotropic";
+ case 3: return "4x4_1";
+ case 4: return "4x4_2";
+ case 5: return "4x4_flat";
+ case 6: return "6x5_mono";
+ }
+ return "";
+}
+
+static int
+gen3_decode_load_state_immediate_1(struct kgem *kgem, uint32_t offset)
+{
+ const uint32_t *data = kgem->batch + offset;
+ int len, i, word;
+
+ kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
+ len = (data[0] & 0x0000000f) + 2;
+ i = 1;
+ for (word = 0; word <= 8; word++) {
+ if (data[0] & (1 << (4 + word))) {
+ switch (word) {
+ case 0:
+ kgem_debug_print(data, offset, i, "S0: vbo offset: 0x%08x%s\n",
+ data[i]&(~1),data[i]&1?", auto cache invalidate disabled":"");
+ gen3_update_vertex_buffer_addr(kgem, offset + i);
+ break;
+ case 1:
+ kgem_debug_print(data, offset, i, "S1: vertex width: %i, vertex pitch: %i\n",
+ (data[i]>>24)&0x3f,(data[i]>>16)&0x3f);
+ gen3_update_vertex_buffer_pitch(kgem, offset + i);
+ break;
+ case 2:
+ {
+ char buf[200];
+ int len = 0;
+ int tex_num;
+ for (tex_num = 0; tex_num < 8; tex_num++) {
+ switch((data[i]>>tex_num*4)&0xf) {
+ case 0: len += sprintf(buf + len, "%i=2D ", tex_num); break;
+ case 1: len += sprintf(buf + len, "%i=3D ", tex_num); break;
+ case 2: len += sprintf(buf + len, "%i=4D ", tex_num); break;
+ case 3: len += sprintf(buf + len, "%i=1D ", tex_num); break;
+ case 4: len += sprintf(buf + len, "%i=2D_16 ", tex_num); break;
+ case 5: len += sprintf(buf + len, "%i=4D_16 ", tex_num); break;
+ case 0xf: len += sprintf(buf + len, "%i=NP ", tex_num); break;
+ }
+ }
+ kgem_debug_print(data, offset, i, "S2: texcoord formats: %s\n", buf);
+ gen3_update_vertex_texcoords(kgem, data[i]);
+ }
+
+ break;
+ case 3:
+ kgem_debug_print(data, offset, i, "S3: not documented\n", word);
+ break;
+ case 4:
+ {
+ char *cullmode = "";
+ char *vfmt_xyzw = "";
+ switch((data[i]>>13)&0x3) {
+ case 0: cullmode = "both"; break;
+ case 1: cullmode = "none"; break;
+ case 2: cullmode = "cw"; break;
+ case 3: cullmode = "ccw"; break;
+ }
+ switch(data[i] & (7<<6 | 1<<2)) {
+ case 1<<6: vfmt_xyzw = "XYZ,"; break;
+ case 2<<6: vfmt_xyzw = "XYZW,"; break;
+ case 3<<6: vfmt_xyzw = "XY,"; break;
+ case 4<<6: vfmt_xyzw = "XYW,"; break;
+ case 1<<6 | 1<<2: vfmt_xyzw = "XYZF,"; break;
+ case 2<<6 | 1<<2: vfmt_xyzw = "XYZWF,"; break;
+ case 3<<6 | 1<<2: vfmt_xyzw = "XYF,"; break;
+ case 4<<6 | 1<<2: vfmt_xyzw = "XYWF,"; break;
+ }
+ kgem_debug_print(data, offset, i, "S4: point_width=%i, line_width=%.1f,"
+ "%s%s%s%s%s cullmode=%s, vfmt=%s%s%s%s%s%s "
+ "%s%s\n",
+ (data[i]>>23)&0x1ff,
+ ((data[i]>>19)&0xf) / 2.0,
+ data[i]&(0xf<<15)?" flatshade=":"",
+ data[i]&(1<<18)?"Alpha,":"",
+ data[i]&(1<<17)?"Fog,":"",
+ data[i]&(1<<16)?"Specular,":"",
+ data[i]&(1<<15)?"Color,":"",
+ cullmode,
+ data[i]&(1<<12)?"PointWidth,":"",
+ data[i]&(1<<11)?"SpecFog,":"",
+ data[i]&(1<<10)?"Color,":"",
+ data[i]&(1<<9)?"DepthOfs,":"",
+ vfmt_xyzw,
+ data[i]&(1<<9)?"FogParam,":"",
+ data[i]&(1<<5)?"force default diffuse, ":"",
+ data[i]&(1<<4)?"force default specular, ":"",
+ data[i]&(1<<3)?"local depth ofs enable, ":"",
+ data[i]&(1<<1)?"point sprite enable, ":"",
+ data[i]&(1<<0)?"line AA enable, ":"");
+ gen3_update_vertex_elements(kgem, data[i]);
+ break;
+ }
+ case 5:
+ {
+ kgem_debug_print(data, offset, i, "S5:%s%s%s%s%s"
+ "%s%s%s%s stencil_ref=0x%x, stencil_test=%s, "
+ "stencil_fail=%s, stencil_pass_z_fail=%s, "
+ "stencil_pass_z_pass=%s, %s%s%s%s\n",
+ data[i]&(0xf<<28)?" write_disable=":"",
+ data[i]&(1<<31)?"Alpha,":"",
+ data[i]&(1<<30)?"Red,":"",
+ data[i]&(1<<29)?"Green,":"",
+ data[i]&(1<<28)?"Blue,":"",
+ data[i]&(1<<27)?" force default point size,":"",
+ data[i]&(1<<26)?" last pixel enable,":"",
+ data[i]&(1<<25)?" global depth ofs enable,":"",
+ data[i]&(1<<24)?" fog enable,":"",
+ (data[i]>>16)&0xff,
+ gen3_decode_compare_func(data[i]>>13),
+ gen3_decode_stencil_op(data[i]>>10),
+ gen3_decode_stencil_op(data[i]>>7),
+ gen3_decode_stencil_op(data[i]>>4),
+ data[i]&(1<<3)?"stencil write enable, ":"",
+ data[i]&(1<<2)?"stencil test enable, ":"",
+ data[i]&(1<<1)?"color dither enable, ":"",
+ data[i]&(1<<0)?"logicop enable, ":"");
+ }
+ break;
+ case 6:
+ kgem_debug_print(data, offset, i, "S6: %salpha_test=%s, alpha_ref=0x%x, "
+ "depth_test=%s, %ssrc_blnd_fct=%s, dst_blnd_fct=%s, "
+ "%s%stristrip_provoking_vertex=%i\n",
+ data[i]&(1<<31)?"alpha test enable, ":"",
+ gen3_decode_compare_func(data[i]>>28),
+ data[i]&(0xff<<20),
+ gen3_decode_compare_func(data[i]>>16),
+ data[i]&(1<<15)?"cbuf blend enable, ":"",
+ gen3_decode_blend_fact(data[i]>>8),
+ gen3_decode_blend_fact(data[i]>>4),
+ data[i]&(1<<3)?"depth write enable, ":"",
+ data[i]&(1<<2)?"cbuf write enable, ":"",
+ data[i]&(0x3));
+ break;
+ case 7:
+ kgem_debug_print(data, offset, i, "S7: depth offset constant: 0x%08x\n", data[i]);
+ break;
+ }
+ i++;
+ }
+ }
+
+ assert(len == i);
+ return len;
+}
+
+static int
+gen3_decode_3d_1d(struct kgem *kgem, uint32_t offset)
+{
+ uint32_t *data = kgem->batch + offset;
+ unsigned int len, i, c, idx, word, map, sampler, instr;
+ char *format, *zformat, *type;
+ uint32_t opcode;
+ const struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes_3d_1d[] = {
+ { 0x86, 4, 4, "3DSTATE_CHROMA_KEY" },
+ { 0x88, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" },
+ { 0x99, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" },
+ { 0x9a, 2, 2, "3DSTATE_DEFAULT_SPECULAR" },
+ { 0x98, 2, 2, "3DSTATE_DEFAULT_Z" },
+ { 0x97, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" },
+ { 0x9d, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" },
+ { 0x9e, 4, 4, "3DSTATE_MONO_FILTER" },
+ { 0x89, 4, 4, "3DSTATE_FOG_MODE" },
+ { 0x8f, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" },
+ { 0x83, 2, 2, "3DSTATE_SPAN_STIPPLE" },
+ }, *opcode_3d_1d;
+
+ opcode = (data[0] & 0x00ff0000) >> 16;
+
+ switch (opcode) {
+ case 0x07:
+ /* This instruction is unusual. A 0 length means just 1 DWORD instead of
+ * 2. The 0 length is specified in one place to be unsupported, but
+ * stated to be required in another, and 0 length LOAD_INDIRECTs appear
+ * to cause no harm at least.
+ */
+ kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_INDIRECT\n");
+ len = (data[0] & 0x000000ff) + 1;
+ i = 1;
+ if (data[0] & (0x01 << 8)) {
+ kgem_debug_print(data, offset, i++, "SIS.0\n");
+ kgem_debug_print(data, offset, i++, "SIS.1\n");
+ }
+ if (data[0] & (0x02 << 8)) {
+ kgem_debug_print(data, offset, i++, "DIS.0\n");
+ }
+ if (data[0] & (0x04 << 8)) {
+ kgem_debug_print(data, offset, i++, "SSB.0\n");
+ kgem_debug_print(data, offset, i++, "SSB.1\n");
+ }
+ if (data[0] & (0x08 << 8)) {
+ kgem_debug_print(data, offset, i++, "MSB.0\n");
+ kgem_debug_print(data, offset, i++, "MSB.1\n");
+ }
+ if (data[0] & (0x10 << 8)) {
+ kgem_debug_print(data, offset, i++, "PSP.0\n");
+ kgem_debug_print(data, offset, i++, "PSP.1\n");
+ }
+ if (data[0] & (0x20 << 8)) {
+ kgem_debug_print(data, offset, i++, "PSC.0\n");
+ kgem_debug_print(data, offset, i++, "PSC.1\n");
+ }
+ assert(len == i);
+ return len;
+ case 0x04:
+ return gen3_decode_load_state_immediate_1(kgem, offset);
+ case 0x03:
+ kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n");
+ len = (data[0] & 0x0000000f) + 2;
+ i = 1;
+ for (word = 6; word <= 14; word++) {
+ if (data[0] & (1 << word)) {
+ if (word == 6)
+ kgem_debug_print(data, offset, i++, "TBCF\n");
+ else if (word >= 7 && word <= 10) {
+ kgem_debug_print(data, offset, i++, "TB%dC\n", word - 7);
+ kgem_debug_print(data, offset, i++, "TB%dA\n", word - 7);
+ } else if (word >= 11 && word <= 14) {
+ kgem_debug_print(data, offset, i, "TM%dS0: offset=0x%08x, %s\n",
+ word - 11,
+ data[i]&0xfffffffe,
+ data[i]&1?"use fence":"");
+ i++;
+ kgem_debug_print(data, offset, i, "TM%dS1: height=%i, width=%i, %s\n",
+ word - 11,
+ data[i]>>21, (data[i]>>10)&0x3ff,
+ data[i]&2?(data[i]&1?"y-tiled":"x-tiled"):"");
+ i++;
+ kgem_debug_print(data, offset, i, "TM%dS2: pitch=%i, \n",
+ word - 11,
+ ((data[i]>>21) + 1)*4);
+ i++;
+ kgem_debug_print(data, offset, i++, "TM%dS3\n", word - 11);
+ kgem_debug_print(data, offset, i++, "TM%dS4: dflt color\n", word - 11);
+ }
+ }
+ }
+ assert(len == i);
+ return len;
+ case 0x00:
+ kgem_debug_print(data, offset, 0, "3DSTATE_MAP_STATE\n");
+ len = (data[0] & 0x0000003f) + 2;
+ kgem_debug_print(data, offset, 1, "mask\n");
+
+ i = 2;
+ for (map = 0; map <= 15; map++) {
+ if (data[1] & (1 << map)) {
+ int width, height, pitch, dword;
+ const char *tiling;
+
+ dword = data[i];
+ kgem_debug_print(data, offset, i++, "map %d MS2 %s%s%s\n", map,
+ dword&(1<<31)?"untrusted surface, ":"",
+ dword&(1<<1)?"vertical line stride enable, ":"",
+ dword&(1<<0)?"vertical ofs enable, ":"");
+
+ dword = data[i];
+ width = ((dword >> 10) & ((1 << 11) - 1))+1;
+ height = ((dword >> 21) & ((1 << 11) - 1))+1;
+
+ tiling = "none";
+ if (dword & (1 << 2))
+ tiling = "fenced";
+ else if (dword & (1 << 1))
+ tiling = dword & (1 << 0) ? "Y" : "X";
+ type = " BAD";
+ format = " (invalid)";
+ switch ((dword>>7) & 0x7) {
+ case 1:
+ type = "8";
+ switch ((dword>>3) & 0xf) {
+ case 0: format = "I"; break;
+ case 1: format = "L"; break;
+ case 4: format = "A"; break;
+ case 5: format = " mono"; break;
+ }
+ break;
+ case 2:
+ type = "16";
+ switch ((dword>>3) & 0xf) {
+ case 0: format = " rgb565"; break;
+ case 1: format = " argb1555"; break;
+ case 2: format = " argb4444"; break;
+ case 3: format = " ay88"; break;
+ case 5: format = " 88dvdu"; break;
+ case 6: format = " bump655"; break;
+ case 7: format = "I"; break;
+ case 8: format = "L"; break;
+ case 9: format = "A"; break;
+ }
+ break;
+ case 3:
+ type = "32";
+ switch ((dword>>3) & 0xf) {
+ case 0: format = " argb8888"; break;
+ case 1: format = " abgr8888"; break;
+ case 2: format = " xrgb8888"; break;
+ case 3: format = " xbgr8888"; break;
+ case 4: format = " qwvu8888"; break;
+ case 5: format = " axvu8888"; break;
+ case 6: format = " lxvu8888"; break;
+ case 7: format = " xlvu8888"; break;
+ case 8: format = " argb2101010"; break;
+ case 9: format = " abgr2101010"; break;
+ case 10: format = " awvu2101010"; break;
+ case 11: format = " gr1616"; break;
+ case 12: format = " vu1616"; break;
+ case 13: format = " xI824"; break;
+ case 14: format = " xA824"; break;
+ case 15: format = " xL824"; break;
+ }
+ break;
+ case 5:
+ type = "422";
+ switch ((dword>>3) & 0xf) {
+ case 0: format = " yuv_swapy"; break;
+ case 1: format = " yuv"; break;
+ case 2: format = " yuv_swapuv"; break;
+ case 3: format = " yuv_swapuvy"; break;
+ }
+ break;
+ case 6:
+ type = "compressed";
+ switch ((dword>>3) & 0x7) {
+ case 0: format = " dxt1"; break;
+ case 1: format = " dxt2_3"; break;
+ case 2: format = " dxt4_5"; break;
+ case 3: format = " fxt1"; break;
+ case 4: format = " dxt1_rb"; break;
+ }
+ break;
+ case 7:
+ type = "4b indexed";
+ switch ((dword>>3) & 0xf) {
+ case 7: format = " argb8888"; break;
+ }
+ break;
+ default:
+ format = "BAD";
+ break;
+ }
+ dword = data[i];
+ kgem_debug_print(data, offset, i++, "map %d MS3 [width=%d, height=%d, format=%s%s, tiling=%s%s]\n",
+ map, width, height, type, format, tiling,
+ dword&(1<<9)?" palette select":"");
+
+ dword = data[i];
+ pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1);
+ kgem_debug_print(data, offset, i++, "map %d MS4 [pitch=%d, max_lod=%i, vol_depth=%i, cube_face_ena=%x, %s]\n",
+ map, pitch,
+ (dword>>9)&0x3f, dword&0xff, (dword>>15)&0x3f,
+ dword&(1<<8)?"miplayout legacy":"miplayout right");
+ }
+ }
+ assert(len == i);
+ return len;
+ case 0x06:
+ kgem_debug_print(data, offset, 0, "3DSTATE_PIXEL_SHADER_CONSTANTS\n");
+ len = (data[0] & 0x000000ff) + 2;
+
+ i = 2;
+ for (c = 0; c <= 31; c++) {
+ if (data[1] & (1 << c)) {
+ kgem_debug_print(data, offset, i, "C%d.X = %f\n",
+ c, int_as_float(data[i]));
+ i++;
+ kgem_debug_print(data, offset, i, "C%d.Y = %f\n",
+ c, int_as_float(data[i]));
+ i++;
+ kgem_debug_print(data, offset, i, "C%d.Z = %f\n",
+ c, int_as_float(data[i]));
+ i++;
+ kgem_debug_print(data, offset, i, "C%d.W = %f\n",
+ c, int_as_float(data[i]));
+ i++;
+ }
+ }
+ assert(len == i);
+ return len;
+ case 0x05:
+ kgem_debug_print(data, offset, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n");
+ len = (data[0] & 0x000000ff) + 2;
+ assert(((len-1) % 3) == 0);
+ assert(len <= 370);
+ i = 1;
+ for (instr = 0; instr < (len - 1) / 3; instr++) {
+ char instr_prefix[10];
+
+ sprintf(instr_prefix, "PS%03d", instr);
+ gen3_decode_instruction(data, offset, i, instr_prefix);
+ i += 3;
+ }
+ return len;
+ case 0x01:
+ kgem_debug_print(data, offset, 0, "3DSTATE_SAMPLER_STATE\n");
+ kgem_debug_print(data, offset, 1, "mask\n");
+ len = (data[0] & 0x0000003f) + 2;
+ i = 2;
+ for (sampler = 0; sampler <= 15; sampler++) {
+ if (data[1] & (1 << sampler)) {
+ uint32_t dword;
+ char *mip_filter = "";
+ dword = data[i];
+ switch ((dword>>20)&0x3) {
+ case 0: mip_filter = "none"; break;
+ case 1: mip_filter = "nearest"; break;
+ case 3: mip_filter = "linear"; break;
+ }
+ kgem_debug_print(data, offset, i++, "sampler %d SS2:%s%s%s "
+ "base_mip_level=%i, mip_filter=%s, mag_filter=%s, min_filter=%s "
+ "lod_bias=%.2f,%s max_aniso=%i, shadow_func=%s\n", sampler,
+ dword&(1<<31)?" reverse gamma,":"",
+ dword&(1<<30)?" packed2planar,":"",
+ dword&(1<<29)?" colorspace conversion,":"",
+ (dword>>22)&0x1f,
+ mip_filter,
+ gen3_decode_sample_filter(dword>>17),
+ gen3_decode_sample_filter(dword>>14),
+ ((dword>>5)&0x1ff)/(0x10*1.0),
+ dword&(1<<4)?" shadow,":"",
+ dword&(1<<3)?4:2,
+ gen3_decode_compare_func(dword));
+ dword = data[i];
+ kgem_debug_print(data, offset, i++, "sampler %d SS3: min_lod=%.2f,%s "
+ "tcmode_x=%s, tcmode_y=%s, tcmode_z=%s,%s texmap_idx=%i,%s\n",
+ sampler, ((dword>>24)&0xff)/(0x10*1.0),
+ dword&(1<<17)?" kill pixel enable,":"",
+ decode_tex_coord_mode(dword>>12),
+ decode_tex_coord_mode(dword>>9),
+ decode_tex_coord_mode(dword>>6),
+ dword&(1<<5)?" normalized coords,":"",
+ (dword>>1)&0xf,
+ dword&(1<<0)?" deinterlacer,":"");
+ dword = data[i];
+ kgem_debug_print(data, offset, i++, "sampler %d SS4: border color\n",
+ sampler, ((dword>>24)&0xff)/(0x10*1.0),
+ dword);
+ }
+ }
+ assert(len == i);
+ return len;
+ case 0x85:
+ len = (data[0] & 0x0000000f) + 2;
+ assert(len == 2);
+
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_DEST_BUFFER_VARIABLES\n");
+
+ switch ((data[1] >> 8) & 0xf) {
+ case 0x0: format = "g8"; break;
+ case 0x1: format = "x1r5g5b5"; break;
+ case 0x2: format = "r5g6b5"; break;
+ case 0x3: format = "a8r8g8b8"; break;
+ case 0x4: format = "ycrcb_swapy"; break;
+ case 0x5: format = "ycrcb_normal"; break;
+ case 0x6: format = "ycrcb_swapuv"; break;
+ case 0x7: format = "ycrcb_swapuvy"; break;
+ case 0x8: format = "a4r4g4b4"; break;
+ case 0x9: format = "a1r5g5b5"; break;
+ case 0xa: format = "a2r10g10b10"; break;
+ default: format = "BAD"; break;
+ }
+ switch ((data[1] >> 2) & 0x3) {
+ case 0x0: zformat = "u16"; break;
+ case 0x1: zformat = "f16"; break;
+ case 0x2: zformat = "u24x8"; break;
+ default: zformat = "BAD"; break;
+ }
+ kgem_debug_print(data, offset, 1, "%s format, %s depth format, early Z %sabled\n",
+ format, zformat,
+ (data[1] & (1 << 31)) ? "en" : "dis");
+ return len;
+
+ case 0x8e:
+ {
+ const char *name, *tiling;
+
+ len = (data[0] & 0x0000000f) + 2;
+ assert(len == 3);
+
+ switch((data[1] >> 24) & 0x7) {
+ case 0x3: name = "color"; break;
+ case 0x7: name = "depth"; break;
+ default: name = "unknown"; break;
+ }
+
+ tiling = "none";
+ if (data[1] & (1 << 23))
+ tiling = "fenced";
+ else if (data[1] & (1 << 22))
+ tiling = data[1] & (1 << 21) ? "Y" : "X";
+
+ kgem_debug_print(data, offset, 0, "3DSTATE_BUFFER_INFO\n");
+ kgem_debug_print(data, offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff);
+
+ kgem_debug_print(data, offset, 2, "address\n");
+ return len;
+ }
+ case 0x81:
+ len = (data[0] & 0x0000000f) + 2;
+ assert(len == 3);
+
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_SCISSOR_RECTANGLE\n");
+ kgem_debug_print(data, offset, 1, "(%d,%d)\n",
+ data[1] & 0xffff, data[1] >> 16);
+ kgem_debug_print(data, offset, 2, "(%d,%d)\n",
+ data[2] & 0xffff, data[2] >> 16);
+
+ return len;
+ case 0x80:
+ len = (data[0] & 0x0000000f) + 2;
+ assert(len == 5);
+
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_DRAWING_RECTANGLE\n");
+ kgem_debug_print(data, offset, 1, "%s\n",
+ data[1]&(1<<30)?"depth ofs disabled ":"");
+ kgem_debug_print(data, offset, 2, "(%d,%d)\n",
+ data[2] & 0xffff, data[2] >> 16);
+ kgem_debug_print(data, offset, 3, "(%d,%d)\n",
+ data[3] & 0xffff, data[3] >> 16);
+ kgem_debug_print(data, offset, 4, "(%d,%d)\n",
+ (int16_t)(data[4] & 0xffff),
+ (int16_t)(data[4] >> 16));
+
+ return len;
+ case 0x9c:
+ len = (data[0] & 0x0000000f) + 2;
+ assert(len == 7);
+
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_CLEAR_PARAMETERS\n");
+ kgem_debug_print(data, offset, 1, "prim_type=%s, clear=%s%s%s\n",
+ data[1]&(1<<16)?"CLEAR_RECT":"ZONE_INIT",
+ data[1]&(1<<2)?"color,":"",
+ data[1]&(1<<1)?"depth,":"",
+ data[1]&(1<<0)?"stencil,":"");
+ kgem_debug_print(data, offset, 2, "clear color\n");
+ kgem_debug_print(data, offset, 3, "clear depth/stencil\n");
+ kgem_debug_print(data, offset, 4, "color value (rgba8888)\n");
+ kgem_debug_print(data, offset, 5, "depth value %f\n",
+ int_as_float(data[5]));
+ kgem_debug_print(data, offset, 6, "clear stencil\n");
+ return len;
+ }
+
+ for (idx = 0; idx < ARRAY_SIZE(opcodes_3d_1d); idx++) {
+ opcode_3d_1d = &opcodes_3d_1d[idx];
+ if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) {
+ len = (data[0] & 0xf) + 2;
+ kgem_debug_print(data, offset, 0, "%s\n", opcode_3d_1d->name);
+ for (i = 1; i < len; i++)
+ kgem_debug_print(data, offset, i, "dword %d\n", i);
+
+ return len;
+ }
+ }
+
+ kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode);
+ assert(0);
+ return 1;
+}
+
+#define VERTEX_OUT(fmt, ...) do { \
+ kgem_debug_print(data, offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \
+ i++; \
+} while (0)
+
+static int
+gen3_decode_3d_primitive(struct kgem *kgem, uint32_t offset)
+{
+ uint32_t *data = kgem->batch + offset;
+ char immediate = (data[0] & (1 << 23)) == 0;
+ unsigned int len, i, ret;
+ char *primtype;
+ unsigned int vertex = 0;
+
+ switch ((data[0] >> 18) & 0xf) {
+ case 0x0: primtype = "TRILIST"; break;
+ case 0x1: primtype = "TRISTRIP"; break;
+ case 0x2: primtype = "TRISTRIP_REVERSE"; break;
+ case 0x3: primtype = "TRIFAN"; break;
+ case 0x4: primtype = "POLYGON"; break;
+ case 0x5: primtype = "LINELIST"; break;
+ case 0x6: primtype = "LINESTRIP"; break;
+ case 0x7: primtype = "RECTLIST"; break;
+ case 0x8: primtype = "POINTLIST"; break;
+ case 0x9: primtype = "DIB"; break;
+ case 0xa: primtype = "CLEAR_RECT"; assert(0); break;
+ default: primtype = "unknown"; break;
+ }
+
+ gen3_update_vertex_elements_offsets(kgem);
+
+ /* XXX: 3DPRIM_DIB not supported */
+ if (immediate) {
+ len = (data[0] & 0x0003ffff) + 2;
+ kgem_debug_print(data, offset, 0, "3DPRIMITIVE inline %s\n", primtype);
+ for (i = 1; i < len; ) {
+ ErrorF(" [%d]: ", vertex);
+ i += inline_vertex_out(kgem, data + i) / sizeof(uint32_t);
+ ErrorF("\n");
+ vertex++;
+ }
+
+ ret = len;
+ } else {
+ /* indirect vertices */
+ len = data[0] & 0x0000ffff; /* index count */
+ if (data[0] & (1 << 17)) {
+ /* random vertex access */
+ kgem_debug_print(data, offset, 0,
+ "3DPRIMITIVE random indirect %s (%d)\n", primtype, len);
+ assert(0);
+ if (len == 0) {
+ /* vertex indices continue until 0xffff is found */
+ } else {
+ /* fixed size vertex index buffer */
+ }
+ ret = (len + 1) / 2 + 1;
+ goto out;
+ } else {
+ /* sequential vertex access */
+ vertex = data[1] & 0xffff;
+ kgem_debug_print(data, offset, 0,
+ "3DPRIMITIVE sequential indirect %s, %d starting from "
+ "%d\n", primtype, len, vertex);
+ kgem_debug_print(data, offset, 1, " start\n");
+ for (i = 0; i < len; i++) {
+ ErrorF(" [%d]: ", vertex);
+ indirect_vertex_out(kgem, vertex++);
+ ErrorF("\n");
+ }
+ ret = 2;
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset)
+{
+ uint32_t opcode;
+ unsigned int idx;
+
+ struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ char *name;
+ } opcodes[] = {
+ { 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" },
+ { 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" },
+ { 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" },
+ { 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" },
+ { 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
+ { 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" },
+ { 0x0d, 1, 1, "3DSTATE_MODES_4" },
+ { 0x0c, 1, 1, "3DSTATE_MODES_5" },
+ { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
+ };
+ uint32_t *data = kgem->batch + offset;
+
+ opcode = (data[0] & 0x1f000000) >> 24;
+
+ switch (opcode) {
+ case 0x1f:
+ return gen3_decode_3d_primitive(kgem, offset);
+ case 0x1d:
+ return gen3_decode_3d_1d(kgem, offset);
+ case 0x1c:
+ return gen3_decode_3d_1c(kgem, offset);
+ }
+
+ for (idx = 0; idx < ARRAY_SIZE(opcodes); idx++) {
+ if (opcode == opcodes[idx].opcode) {
+ unsigned int len = 1, i;
+
+ kgem_debug_print(data, offset, 0, "%s\n", opcodes[idx].name);
+ if (opcodes[idx].max_len > 1) {
+ len = (data[0] & 0xff) + 2;
+ assert(len >= opcodes[idx].min_len ||
+ len <= opcodes[idx].max_len);
+ }
+
+ for (i = 1; i < len; i++)
+ kgem_debug_print(data, offset, i, "dword %d\n", i);
+ return len;
+ }
+ }
+
+ kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode);
+ return 1;
+}
+
+
+void kgem_gen3_finish_state(struct kgem *kgem)
+{
+ if (state.vb.current)
+ munmap(state.vb.base, state.vb.current->size);
+
+ memset(&state, 0, sizeof(state));
+}
diff --git a/src/sna/kgem_debug_gen4.c b/src/sna/kgem_debug_gen4.c
new file mode 100644
index 00000000..d736cbd9
--- /dev/null
+++ b/src/sna/kgem_debug_gen4.c
@@ -0,0 +1,711 @@
+/*
+ * Copyright © 2007-2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/mman.h>
+#include <assert.h>
+
+#include "sna.h"
+#include "sna_reg.h"
+
+#include "gen4_render.h"
+
+#include "kgem_debug.h"
+
+static struct state {
+ struct vertex_buffer {
+ int handle;
+ void *base;
+ const char *ptr;
+ int pitch;
+
+ struct kgem_bo *current;
+ } vb[33];
+ struct vertex_elements {
+ int buffer;
+ int offset;
+ bool valid;
+ uint32_t type;
+ uint8_t swizzle[4];
+ } ve[33];
+ int num_ve;
+
+ struct dynamic_state {
+ struct kgem_bo *current;
+ void *base, *ptr;
+ } dynamic_state;
+} state;
+
+static void gen4_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
+{
+ uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch);
+ struct kgem_bo *bo = NULL;
+ void *base, *ptr;
+ int i;
+
+ for (i = 0; i < kgem->nreloc; i++)
+ if (kgem->reloc[i].offset == reloc)
+ break;
+ assert(i < kgem->nreloc);
+ reloc = kgem->reloc[i].target_handle;
+
+ if (reloc == 0) {
+ base = kgem->batch;
+ } else {
+ list_for_each_entry(bo, &kgem->next_request->buffers, request)
+ if (bo->handle == reloc)
+ break;
+ assert(&bo->request != &kgem->next_request->buffers);
+ base = kgem_bo_map(kgem, bo, PROT_READ);
+ }
+ ptr = (char *)base + kgem->reloc[i].delta;
+
+ i = data[0] >> 27;
+ if (state.vb[i].current)
+ munmap(state.vb[i].base, state.vb[i].current->size);
+
+ state.vb[i].current = bo;
+ state.vb[i].base = base;
+ state.vb[i].ptr = ptr;
+ state.vb[i].pitch = data[0] & 0x7ff;
+}
+
+static uint32_t
+get_ve_component(uint32_t data, int component)
+{
+ return (data >> (16 + (3 - component) * 4)) & 0x7;
+}
+
+static void gen4_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data)
+{
+ state.ve[id].buffer = data[0] >> 27;
+ state.ve[id].valid = !!(data[0] & (1 << 26));
+ state.ve[id].type = (data[0] >> 16) & 0x1ff;
+ state.ve[id].offset = data[0] & 0x7ff;
+ state.ve[id].swizzle[0] = get_ve_component(data[1], 0);
+ state.ve[id].swizzle[1] = get_ve_component(data[1], 1);
+ state.ve[id].swizzle[2] = get_ve_component(data[1], 2);
+ state.ve[id].swizzle[3] = get_ve_component(data[1], 3);
+}
+
+static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max)
+{
+ int c;
+
+ ErrorF("(");
+ for (c = 0; c < max; c++) {
+ switch (ve->swizzle[c]) {
+ case 0: ErrorF("#"); break;
+ case 1: ErrorF("%d", v[c]); break;
+ case 2: ErrorF("0.0"); break;
+ case 3: ErrorF("1.0"); break;
+ case 4: ErrorF("0x1"); break;
+ case 5: break;
+ default: ErrorF("?");
+ }
+ if (c < 3)
+ ErrorF(", ");
+ }
+ for (; c < 4; c++) {
+ switch (ve->swizzle[c]) {
+ case 0: ErrorF("#"); break;
+ case 1: ErrorF("1.0"); break;
+ case 2: ErrorF("0.0"); break;
+ case 3: ErrorF("1.0"); break;
+ case 4: ErrorF("0x1"); break;
+ case 5: break;
+ default: ErrorF("?");
+ }
+ if (c < 3)
+ ErrorF(", ");
+ }
+ ErrorF(")");
+}
+
+static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max)
+{
+ int c, o;
+
+ ErrorF("(");
+ for (c = o = 0; c < 4 && o < max; c++) {
+ switch (ve->swizzle[c]) {
+ case 0: ErrorF("#"); break;
+ case 1: ErrorF("%f", f[o++]); break;
+ case 2: ErrorF("0.0"); break;
+ case 3: ErrorF("1.0"); break;
+ case 4: ErrorF("0x1"); break;
+ case 5: break;
+ default: ErrorF("?");
+ }
+ if (c < 3)
+ ErrorF(", ");
+ }
+ for (; c < 4; c++) {
+ switch (ve->swizzle[c]) {
+ case 0: ErrorF("#"); break;
+ case 1: ErrorF("1.0"); break;
+ case 2: ErrorF("0.0"); break;
+ case 3: ErrorF("1.0"); break;
+ case 4: ErrorF("0x1"); break;
+ case 5: break;
+ default: ErrorF("?");
+ }
+ if (c < 3)
+ ErrorF(", ");
+ }
+ ErrorF(")");
+}
+
+static void ve_out(const struct vertex_elements *ve, const void *ptr)
+{
+ switch (ve->type) {
+ case GEN4_SURFACEFORMAT_R32_FLOAT:
+ vertices_float_out(ve, ptr, 1);
+ break;
+ case GEN4_SURFACEFORMAT_R32G32_FLOAT:
+ vertices_float_out(ve, ptr, 2);
+ break;
+ case GEN4_SURFACEFORMAT_R32G32B32_FLOAT:
+ vertices_float_out(ve, ptr, 3);
+ break;
+ case GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT:
+ vertices_float_out(ve, ptr, 4);
+ break;
+ case GEN4_SURFACEFORMAT_R16_SINT:
+ vertices_sint16_out(ve, ptr, 1);
+ break;
+ case GEN4_SURFACEFORMAT_R16G16_SINT:
+ vertices_sint16_out(ve, ptr, 2);
+ break;
+ case GEN4_SURFACEFORMAT_R16G16B16A16_SINT:
+ vertices_sint16_out(ve, ptr, 4);
+ break;
+ case GEN4_SURFACEFORMAT_R16_SSCALED:
+ vertices_sint16_out(ve, ptr, 1);
+ break;
+ case GEN4_SURFACEFORMAT_R16G16_SSCALED:
+ vertices_sint16_out(ve, ptr, 2);
+ break;
+ case GEN4_SURFACEFORMAT_R16G16B16A16_SSCALED:
+ vertices_sint16_out(ve, ptr, 4);
+ break;
+ }
+}
+
+static void indirect_vertex_out(struct kgem *kgem, uint32_t v)
+{
+ int i = 0;
+
+ do {
+ const struct vertex_elements *ve = &state.ve[i];
+ const struct vertex_buffer *vb = &state.vb[ve->buffer];
+ const void *ptr = vb->ptr + v * vb->pitch + ve->offset;
+
+ if (!ve->valid)
+ continue;
+
+ ve_out(ve, ptr);
+
+ while (++i <= state.num_ve && !state.ve[i].valid)
+ ;
+
+ if (i <= state.num_ve)
+ ErrorF(", ");
+ } while (i <= state.num_ve);
+}
+
+static void primitive_out(struct kgem *kgem, uint32_t *data)
+{
+ int n;
+
+ assert((data[0] & (1<<15)) == 0); /* XXX index buffers */
+
+ for (n = 0; n < data[1]; n++) {
+ int v = data[2] + n;
+ ErrorF(" [%d:%d] = ", n, v);
+ indirect_vertex_out(kgem, v);
+ ErrorF("\n");
+ }
+}
+
+static void
+state_base_out(uint32_t *data, uint32_t offset, unsigned int index,
+ char *name)
+{
+ if (data[index] & 1)
+ kgem_debug_print(data, offset, index,
+ "%s state base address 0x%08x\n",
+ name, data[index] & ~1);
+ else
+ kgem_debug_print(data, offset, index,
+ "%s state base not updated\n",
+ name);
+}
+
+static void
+state_max_out(uint32_t *data, uint32_t offset, unsigned int index,
+ char *name)
+{
+ if (data[index] == 1)
+ kgem_debug_print(data, offset, index,
+ "%s state upper bound disabled\n", name);
+ else if (data[index] & 1)
+ kgem_debug_print(data, offset, index,
+ "%s state upper bound 0x%08x\n",
+ name, data[index] & ~1);
+ else
+ kgem_debug_print(data, offset, index,
+ "%s state upper bound not updated\n",
+ name);
+}
+
+static const char *
+get_965_surfacetype(unsigned int surfacetype)
+{
+ switch (surfacetype) {
+ case 0: return "1D";
+ case 1: return "2D";
+ case 2: return "3D";
+ case 3: return "CUBE";
+ case 4: return "BUFFER";
+ case 7: return "NULL";
+ default: return "unknown";
+ }
+}
+
+static const char *
+get_965_depthformat(unsigned int depthformat)
+{
+ switch (depthformat) {
+ case 0: return "s8_z24float";
+ case 1: return "z32float";
+ case 2: return "z24s8";
+ case 5: return "z16";
+ default: return "unknown";
+ }
+}
+
+static const char *
+get_965_element_component(uint32_t data, int component)
+{
+ uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7;
+
+ switch (component_control) {
+ case 0:
+ return "nostore";
+ case 1:
+ switch (component) {
+ case 0: return "X";
+ case 1: return "Y";
+ case 2: return "Z";
+ case 3: return "W";
+ default: return "fail";
+ }
+ case 2:
+ return "0.0";
+ case 3:
+ return "1.0";
+ case 4:
+ return "0x1";
+ case 5:
+ return "VID";
+ default:
+ return "fail";
+ }
+}
+
+static const char *
+get_965_prim_type(uint32_t data)
+{
+ uint32_t primtype = (data >> 10) & 0x1f;
+
+ switch (primtype) {
+ case 0x01: return "point list";
+ case 0x02: return "line list";
+ case 0x03: return "line strip";
+ case 0x04: return "tri list";
+ case 0x05: return "tri strip";
+ case 0x06: return "tri fan";
+ case 0x07: return "quad list";
+ case 0x08: return "quad strip";
+ case 0x09: return "line list adj";
+ case 0x0a: return "line strip adj";
+ case 0x0b: return "tri list adj";
+ case 0x0c: return "tri strip adj";
+ case 0x0d: return "tri strip reverse";
+ case 0x0e: return "polygon";
+ case 0x0f: return "rect list";
+ case 0x10: return "line loop";
+ case 0x11: return "point list bf";
+ case 0x12: return "line strip cont";
+ case 0x13: return "line strip bf";
+ case 0x14: return "line strip cont bf";
+ case 0x15: return "tri fan no stipple";
+ default: return "fail";
+ }
+}
+
+#if 0
+struct reloc {
+ struct kgem_bo *bo;
+ void *base;
+};
+
+static void *
+get_reloc(struct kgem *kgem,
+ void *base, const uint32_t *reloc,
+ struct reloc *r)
+{
+ uint32_t delta = *reloc;
+
+ memset(r, 0, sizeof(*r));
+
+ if (base == 0) {
+ uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch);
+ struct kgem_bo *bo = NULL;
+ int i;
+
+ for (i = 0; i < kgem->nreloc; i++)
+ if (kgem->reloc[i].offset == handle)
+ break;
+ assert(i < kgem->nreloc);
+ handle = kgem->reloc[i].target_handle;
+ delta = kgem->reloc[i].delta;
+
+ if (handle == 0) {
+ base = kgem->batch;
+ } else {
+ list_for_each_entry(bo, &kgem->next_request->buffers, request)
+ if (bo->handle == handle)
+ break;
+ assert(&bo->request != &kgem->next_request->buffers);
+ base = kgem_bo_map(kgem, bo, PROT_READ);
+ r->bo = bo;
+ r->base = base;
+ }
+ }
+
+ return (char *)base + delta;
+}
+
+static void
+put_reloc(struct kgem *kgem, struct reloc *r)
+{
+ if (r->bo != NULL)
+ munmap(r->base, r->bo->size);
+}
+#endif
+
+int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset)
+{
+ static const struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ const char *name;
+ } opcodes[] = {
+ { 0x6000, 3, 3, "URB_FENCE" },
+ { 0x6001, 2, 2, "CS_URB_FENCE" },
+ { 0x6002, 2, 2, "CONSTANT_BUFFER" },
+ { 0x6101, 6, 6, "STATE_BASE_ADDRESS" },
+ { 0x6102, 2, 2 , "STATE_SIP" },
+ { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+ { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" },
+ { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+ { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" },
+ { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" },
+ { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" },
+ { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" },
+ { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" },
+ { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" },
+ { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
+ { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" },
+ { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" },
+ { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" },
+ { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
+ { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
+ { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
+ { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" },
+ { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
+ { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" },
+ { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" },
+ { 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" },
+ { 0x7b00, 6, 6, "3DPRIMITIVE" },
+ { 0x7805, 3, 3, "3DSTATE_URB" },
+ { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" },
+ { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" },
+ { 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" },
+ { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" },
+ };
+ uint32_t *data = kgem->batch + offset;
+ uint32_t op;
+ unsigned int len;
+ int i;
+ char *desc1 = NULL;
+
+ len = (data[0] & 0xff) + 2;
+ op = (data[0] & 0xffff0000) >> 16;
+ switch (op) {
+ case 0x6000:
+ assert(len == 3);
+
+ kgem_debug_print(data, offset, 0, "URB_FENCE: %s%s%s%s%s%s\n",
+ (data[0] >> 13) & 1 ? "cs " : "",
+ (data[0] >> 12) & 1 ? "vfe " : "",
+ (data[0] >> 11) & 1 ? "sf " : "",
+ (data[0] >> 10) & 1 ? "clip " : "",
+ (data[0] >> 9) & 1 ? "gs " : "",
+ (data[0] >> 8) & 1 ? "vs " : "");
+ kgem_debug_print(data, offset, 1,
+ "vs fence: %d, gs_fence: %d, clip_fence: %d\n",
+ data[1] & 0x3ff,
+ (data[1] >> 10) & 0x3ff,
+ (data[1] >> 20) & 0x3ff);
+ kgem_debug_print(data, offset, 2,
+ "sf fence: %d, vfe_fence: %d, cs_fence: %d\n",
+ data[2] & 0x3ff,
+ (data[2] >> 10) & 0x3ff,
+ (data[2] >> 20) & 0x7ff);
+ return len;
+
+ case 0x6001:
+ kgem_debug_print(data, offset, 0, "CS_URB_STATE\n");
+ kgem_debug_print(data, offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n",
+ (data[1] >> 4) & 0x1f,
+ (((data[1] >> 4) & 0x1f) + 1) * 64,
+ data[1] & 0x7);
+ return len;
+ case 0x6002:
+ kgem_debug_print(data, offset, 0, "CONSTANT_BUFFER: %s\n",
+ (data[0] >> 8) & 1 ? "valid" : "invalid");
+ kgem_debug_print(data, offset, 1, "offset: 0x%08x, length: %d bytes\n",
+ data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64);
+ return len;
+ case 0x6101:
+ i = 0;
+ kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n");
+ assert(len == 6);
+
+ state_base_out(data, offset, i++, "general");
+ state_base_out(data, offset, i++, "surface");
+ state_base_out(data, offset, i++, "media");
+
+ state_max_out(data, offset, i++, "general");
+ state_max_out(data, offset, i++, "media");
+
+ return len;
+
+ case 0x7801:
+ assert(len == 6);
+
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_BINDING_TABLE_POINTERS\n");
+ kgem_debug_print(data, offset, 1, "VS binding table\n");
+ kgem_debug_print(data, offset, 2, "GS binding table\n");
+ kgem_debug_print(data, offset, 3, "CLIP binding table\n");
+ kgem_debug_print(data, offset, 4, "SF binding table\n");
+ kgem_debug_print(data, offset, 5, "WM binding table\n");
+
+ return len;
+
+ case 0x7808:
+ assert((len - 1) % 4 == 0);
+ kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n");
+
+ for (i = 1; i < len;) {
+ gen4_update_vertex_buffer(kgem, data + i);
+
+ kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n",
+ data[i] >> 27,
+ data[i] & (1 << 20) ? "random" : "sequential",
+ data[i] & 0x07ff);
+ i++;
+ kgem_debug_print(data, offset, i++, "buffer address\n");
+ kgem_debug_print(data, offset, i++, "max index\n");
+ kgem_debug_print(data, offset, i++, "mbz\n");
+ }
+ return len;
+
+ case 0x7809:
+ assert((len + 1) % 2 == 0);
+ kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n");
+
+ memset(state.ve, 0, sizeof(state.ve)); /* XXX? */
+ for (i = 1; i < len;) {
+ gen4_update_vertex_elements(kgem, (i - 1)/2, data + i);
+
+ kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, "
+ "src offset 0x%04x bytes\n",
+ data[i] >> 27,
+ data[i] & (1 << 26) ? "" : "in",
+ (data[i] >> 16) & 0x1ff,
+ data[i] & 0x07ff);
+ i++;
+ kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), "
+ "dst offset 0x%02x bytes\n",
+ get_965_element_component(data[i], 0),
+ get_965_element_component(data[i], 1),
+ get_965_element_component(data[i], 2),
+ get_965_element_component(data[i], 3),
+ (data[i] & 0xff) * 4);
+ i++;
+ }
+ state.num_ve = (len - 1) / 2; /* XXX? */
+ return len;
+
+ case 0x780a:
+ assert(len == 3);
+ kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n");
+ kgem_debug_print(data, offset, 1, "beginning buffer address\n");
+ kgem_debug_print(data, offset, 2, "ending buffer address\n");
+ return len;
+
+ case 0x7900:
+ assert(len == 4);
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_DRAWING_RECTANGLE\n");
+ kgem_debug_print(data, offset, 1, "top left: %d,%d\n",
+ data[1] & 0xffff,
+ (data[1] >> 16) & 0xffff);
+ kgem_debug_print(data, offset, 2, "bottom right: %d,%d\n",
+ data[2] & 0xffff,
+ (data[2] >> 16) & 0xffff);
+ kgem_debug_print(data, offset, 3, "origin: %d,%d\n",
+ (int)data[3] & 0xffff,
+ ((int)data[3] >> 16) & 0xffff);
+ return len;
+
+ case 0x7905:
+ assert(len == 7);
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_DEPTH_BUFFER\n");
+ kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n",
+ get_965_surfacetype(data[1] >> 29),
+ get_965_depthformat((data[1] >> 18) & 0x7),
+ (data[1] & 0x0001ffff) + 1,
+ data[1] & (1 << 27) ? "" : "not ",
+ (data[1] & (1 << 22)) != 0,
+ (data[1] & (1 << 21)) != 0);
+ kgem_debug_print(data, offset, 2, "depth offset\n");
+ kgem_debug_print(data, offset, 3, "%dx%d\n",
+ ((data[3] & 0x0007ffc0) >> 6) + 1,
+ ((data[3] & 0xfff80000) >> 19) + 1);
+ kgem_debug_print(data, offset, 4, "volume depth\n");
+ kgem_debug_print(data, offset, 5, "\n");
+ kgem_debug_print(data, offset, 6, "\n");
+ return len;
+
+ case 0x7a00:
+ assert(len == 4 || len == 5);
+ switch ((data[1] >> 14) & 0x3) {
+ case 0: desc1 = "no write"; break;
+ case 1: desc1 = "qword write"; break;
+ case 2: desc1 = "PS_DEPTH_COUNT write"; break;
+ case 3: desc1 = "TIMESTAMP write"; break;
+ }
+ kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n");
+ kgem_debug_print(data, offset, 1,
+ "%s, %scs stall, %stlb invalidate, "
+ "%ssync gfdt, %sdepth stall, %sRC write flush, "
+ "%sinst flush, %sTC flush\n",
+ desc1,
+ data[1] & (1 << 20) ? "" : "no ",
+ data[1] & (1 << 18) ? "" : "no ",
+ data[1] & (1 << 17) ? "" : "no ",
+ data[1] & (1 << 13) ? "" : "no ",
+ data[1] & (1 << 12) ? "" : "no ",
+ data[1] & (1 << 11) ? "" : "no ",
+ data[1] & (1 << 10) ? "" : "no ");
+ if (len == 5) {
+ kgem_debug_print(data, offset, 2, "destination address\n");
+ kgem_debug_print(data, offset, 3, "immediate dword low\n");
+ kgem_debug_print(data, offset, 4, "immediate dword high\n");
+ } else {
+ for (i = 2; i < len; i++) {
+ kgem_debug_print(data, offset, i, "\n");
+ }
+ }
+ return len;
+
+ case 0x7b00:
+ assert(len == 6);
+ kgem_debug_print(data, offset, 0,
+ "3DPRIMITIVE: %s %s\n",
+ get_965_prim_type(data[0]),
+ (data[0] & (1 << 15)) ? "random" : "sequential");
+ kgem_debug_print(data, offset, 1, "vertex count\n");
+ kgem_debug_print(data, offset, 2, "start vertex\n");
+ kgem_debug_print(data, offset, 3, "instance count\n");
+ kgem_debug_print(data, offset, 4, "start instance\n");
+ kgem_debug_print(data, offset, 5, "index bias\n");
+ primitive_out(kgem, data);
+ return len;
+ }
+
+ /* For the rest, just dump the bytes */
+ for (i = 0; i < ARRAY_SIZE(opcodes); i++)
+ if (op == opcodes[i].opcode)
+ break;
+
+ assert(i < ARRAY_SIZE(opcodes));
+
+ len = 1;
+ kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name);
+ if (opcodes[i].max_len > 1) {
+ len = (data[0] & 0xff) + 2;
+ assert(len >= opcodes[i].min_len &&
+ len <= opcodes[i].max_len);
+ }
+
+ for (i = 1; i < len; i++)
+ kgem_debug_print(data, offset, i, "dword %d\n", i);
+
+ return len;
+}
+
+static void finish_vertex_buffers(struct kgem *kgem)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(state.vb); i++)
+ if (state.vb[i].current)
+ munmap(state.vb[i].base, state.vb[i].current->size);
+}
+
+void kgem_gen4_finish_state(struct kgem *kgem)
+{
+ finish_vertex_buffers(kgem);
+
+ if (state.dynamic_state.current)
+ munmap(state.dynamic_state.base, state.dynamic_state.current->size);
+
+ memset(&state, 0, sizeof(state));
+}
diff --git a/src/sna/kgem_debug_gen5.c b/src/sna/kgem_debug_gen5.c
new file mode 100644
index 00000000..78ba4432
--- /dev/null
+++ b/src/sna/kgem_debug_gen5.c
@@ -0,0 +1,687 @@
+/*
+ * Copyright © 2007-2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/mman.h>
+#include <assert.h>
+
+#include "sna.h"
+#include "sna_reg.h"
+
+#include "gen5_render.h"
+
+#include "kgem_debug.h"
+
+static struct state {
+ struct vertex_buffer {
+ int handle;
+ void *base;
+ int size;
+ const char *ptr;
+ int pitch;
+
+ struct kgem_bo *current;
+ } vb[17];
+ struct vertex_elements {
+ int buffer;
+ int offset;
+ bool valid;
+ uint32_t type;
+ uint8_t swizzle[4];
+ } ve[17];
+ int num_ve;
+
+ struct dynamic_state {
+ struct kgem_bo *current;
+ void *base, *ptr;
+ } dynamic_state;
+} state;
+
+static void gen5_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
+{
+ struct drm_i915_gem_relocation_entry *reloc;
+ struct kgem_bo *bo = NULL;
+ void *base, *ptr;
+ int i, size;
+
+ reloc = kgem_debug_get_reloc_entry(kgem, &data[1] - kgem->batch);
+ if (reloc->target_handle == 0) {
+ base = kgem->batch;
+ size = kgem->nbatch * sizeof(uint32_t);
+ } else {
+ bo = kgem_debug_get_bo_for_reloc_entry(kgem, reloc);
+ base = kgem_bo_map(kgem, bo, PROT_READ);
+ size = bo->size;
+ }
+ ptr = (char *)base + reloc->delta;
+
+ i = data[0] >> 27;
+ if (state.vb[i].current)
+ munmap(state.vb[i].base, state.vb[i].current->size);
+
+ state.vb[i].handle = reloc->target_handle;
+ state.vb[i].current = bo;
+ state.vb[i].base = base;
+ state.vb[i].ptr = ptr;
+ state.vb[i].pitch = data[0] & 0x7ff;
+ state.vb[i].size = size;
+}
+
+static uint32_t
+get_ve_component(uint32_t data, int component)
+{
+ return (data >> (16 + (3 - component) * 4)) & 0x7;
+}
+
+static void gen5_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data)
+{
+ state.ve[id].buffer = data[0] >> 27;
+ state.ve[id].valid = !!(data[0] & (1 << 26));
+ state.ve[id].type = (data[0] >> 16) & 0x1ff;
+ state.ve[id].offset = data[0] & 0x7ff;
+ state.ve[id].swizzle[0] = get_ve_component(data[1], 0);
+ state.ve[id].swizzle[1] = get_ve_component(data[1], 1);
+ state.ve[id].swizzle[2] = get_ve_component(data[1], 2);
+ state.ve[id].swizzle[3] = get_ve_component(data[1], 3);
+}
+
+static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max)
+{
+ int c, o;
+
+ ErrorF("(");
+ for (c = o = 0; c < 4 && o < max; c++) {
+ switch (ve->swizzle[c]) {
+ case 0: ErrorF("#"); break;
+ case 1: ErrorF("%d", v[o++]); break;
+ case 2: ErrorF("0.0"); break;
+ case 3: ErrorF("1.0"); break;
+ case 4: ErrorF("0x1"); break;
+ case 5: break;
+ default: ErrorF("?");
+ }
+ if (o < max)
+ ErrorF(", ");
+ }
+ ErrorF(")");
+}
+
+static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max)
+{
+ int c, o;
+
+ ErrorF("(");
+ for (c = o = 0; c < 4 && o < max; c++) {
+ switch (ve->swizzle[c]) {
+ case 0: ErrorF("#"); break;
+ case 1: ErrorF("%f", f[o++]); break;
+ case 2: ErrorF("0.0"); break;
+ case 3: ErrorF("1.0"); break;
+ case 4: ErrorF("0x1"); break;
+ case 5: break;
+ default: ErrorF("?");
+ }
+ if (o < max)
+ ErrorF(", ");
+ }
+ ErrorF(")");
+}
+
+static void ve_out(const struct vertex_elements *ve, const void *ptr)
+{
+ switch (ve->type) {
+ case GEN5_SURFACEFORMAT_R32_FLOAT:
+ vertices_float_out(ve, ptr, 1);
+ break;
+ case GEN5_SURFACEFORMAT_R32G32_FLOAT:
+ vertices_float_out(ve, ptr, 2);
+ break;
+ case GEN5_SURFACEFORMAT_R32G32B32_FLOAT:
+ vertices_float_out(ve, ptr, 3);
+ break;
+ case GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT:
+ vertices_float_out(ve, ptr, 4);
+ break;
+ case GEN5_SURFACEFORMAT_R16_SINT:
+ vertices_sint16_out(ve, ptr, 1);
+ break;
+ case GEN5_SURFACEFORMAT_R16G16_SINT:
+ vertices_sint16_out(ve, ptr, 2);
+ break;
+ case GEN5_SURFACEFORMAT_R16G16B16A16_SINT:
+ vertices_sint16_out(ve, ptr, 4);
+ break;
+ case GEN5_SURFACEFORMAT_R16_SSCALED:
+ vertices_sint16_out(ve, ptr, 1);
+ break;
+ case GEN5_SURFACEFORMAT_R16G16_SSCALED:
+ vertices_sint16_out(ve, ptr, 2);
+ break;
+ case GEN5_SURFACEFORMAT_R16G16B16A16_SSCALED:
+ vertices_sint16_out(ve, ptr, 4);
+ break;
+ }
+}
+
+static void indirect_vertex_out(struct kgem *kgem, uint32_t v)
+{
+ int i = 1;
+
+ do {
+ const struct vertex_elements *ve = &state.ve[i];
+ const struct vertex_buffer *vb = &state.vb[ve->buffer];
+ const void *ptr = vb->ptr + v * vb->pitch + ve->offset;
+
+ if (!ve->valid)
+ continue;
+
+ assert(vb->pitch);
+ assert(ve->offset + v*vb->pitch < vb->size);
+
+ ve_out(ve, ptr);
+
+ while (++i <= state.num_ve && !state.ve[i].valid)
+ ;
+
+ if (i <= state.num_ve)
+ ErrorF(", ");
+ } while (i <= state.num_ve);
+}
+
+static void primitive_out(struct kgem *kgem, uint32_t *data)
+{
+ int n;
+
+ assert((data[0] & (1<<15)) == 0); /* XXX index buffers */
+
+ for (n = 0; n < data[1]; n++) {
+ int v = data[2] + n;
+ ErrorF(" [%d:%d] = ", n, v);
+ indirect_vertex_out(kgem, v);
+ ErrorF("\n");
+ }
+}
+
+static void
+state_base_out(uint32_t *data, uint32_t offset, unsigned int index,
+ char *name)
+{
+ if (data[index] & 1)
+ kgem_debug_print(data, offset, index,
+ "%s state base address 0x%08x\n",
+ name, data[index] & ~1);
+ else
+ kgem_debug_print(data, offset, index,
+ "%s state base not updated\n",
+ name);
+}
+
+static void
+state_max_out(uint32_t *data, uint32_t offset, unsigned int index,
+ char *name)
+{
+ if (data[index] == 1)
+ kgem_debug_print(data, offset, index,
+ "%s state upper bound disabled\n", name);
+ else if (data[index] & 1)
+ kgem_debug_print(data, offset, index,
+ "%s state upper bound 0x%08x\n",
+ name, data[index] & ~1);
+ else
+ kgem_debug_print(data, offset, index,
+ "%s state upper bound not updated\n",
+ name);
+}
+
+static const char *
+get_965_surfacetype(unsigned int surfacetype)
+{
+ switch (surfacetype) {
+ case 0: return "1D";
+ case 1: return "2D";
+ case 2: return "3D";
+ case 3: return "CUBE";
+ case 4: return "BUFFER";
+ case 7: return "NULL";
+ default: return "unknown";
+ }
+}
+
+static const char *
+get_965_depthformat(unsigned int depthformat)
+{
+ switch (depthformat) {
+ case 0: return "s8_z24float";
+ case 1: return "z32float";
+ case 2: return "z24s8";
+ case 5: return "z16";
+ default: return "unknown";
+ }
+}
+
+static const char *
+get_965_element_component(uint32_t data, int component)
+{
+ uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7;
+
+ switch (component_control) {
+ case 0:
+ return "nostore";
+ case 1:
+ switch (component) {
+ case 0: return "X";
+ case 1: return "Y";
+ case 2: return "Z";
+ case 3: return "W";
+ default: return "fail";
+ }
+ case 2:
+ return "0.0";
+ case 3:
+ return "1.0";
+ case 4:
+ return "0x1";
+ case 5:
+ return "VID";
+ default:
+ return "fail";
+ }
+}
+
+static const char *
+get_965_prim_type(uint32_t data)
+{
+ uint32_t primtype = (data >> 10) & 0x1f;
+
+ switch (primtype) {
+ case 0x01: return "point list";
+ case 0x02: return "line list";
+ case 0x03: return "line strip";
+ case 0x04: return "tri list";
+ case 0x05: return "tri strip";
+ case 0x06: return "tri fan";
+ case 0x07: return "quad list";
+ case 0x08: return "quad strip";
+ case 0x09: return "line list adj";
+ case 0x0a: return "line strip adj";
+ case 0x0b: return "tri list adj";
+ case 0x0c: return "tri strip adj";
+ case 0x0d: return "tri strip reverse";
+ case 0x0e: return "polygon";
+ case 0x0f: return "rect list";
+ case 0x10: return "line loop";
+ case 0x11: return "point list bf";
+ case 0x12: return "line strip cont";
+ case 0x13: return "line strip bf";
+ case 0x14: return "line strip cont bf";
+ case 0x15: return "tri fan no stipple";
+ default: return "fail";
+ }
+}
+
+#if 0
+struct reloc {
+ struct kgem_bo *bo;
+ void *base;
+};
+
+static void *
+get_reloc(struct kgem *kgem,
+ void *base, const uint32_t *reloc,
+ struct reloc *r)
+{
+ uint32_t delta = *reloc;
+
+ memset(r, 0, sizeof(*r));
+
+ if (base == 0) {
+ uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch);
+ struct kgem_bo *bo = NULL;
+ int i;
+
+ for (i = 0; i < kgem->nreloc; i++)
+ if (kgem->reloc[i].offset == handle)
+ break;
+ assert(i < kgem->nreloc);
+ handle = kgem->reloc[i].target_handle;
+ delta = kgem->reloc[i].delta;
+
+ if (handle == 0) {
+ base = kgem->batch;
+ } else {
+ list_for_each_entry(bo, &kgem->next_request->buffers, request)
+ if (bo->handle == handle)
+ break;
+ assert(&bo->request != &kgem->next_request->buffers);
+ base = kgem_bo_map(kgem, bo, PROT_READ);
+ r->bo = bo;
+ r->base = base;
+ }
+ }
+
+ return (char *)base + delta;
+}
+
+static void
+put_reloc(struct kgem *kgem, struct reloc *r)
+{
+ if (r->bo != NULL)
+ munmap(r->base, r->bo->size);
+}
+#endif
+
+int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset)
+{
+ static const struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ const char *name;
+ } opcodes[] = {
+ { 0x6000, 3, 3, "URB_FENCE" },
+ { 0x6001, 2, 2, "CS_URB_FENCE" },
+ { 0x6002, 2, 2, "CONSTANT_BUFFER" },
+ { 0x6101, 6, 6, "STATE_BASE_ADDRESS" },
+ { 0x6102, 2, 2 , "STATE_SIP" },
+ { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+ { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" },
+ { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+ { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" },
+ { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" },
+ { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" },
+ { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" },
+ { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" },
+ { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" },
+ { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
+ { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" },
+ { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" },
+ { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" },
+ { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
+ { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
+ { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
+ { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" },
+ { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
+ { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" },
+ { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" },
+ { 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" },
+ { 0x7b00, 6, 6, "3DPRIMITIVE" },
+ { 0x7805, 3, 3, "3DSTATE_URB" },
+ { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" },
+ { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" },
+ { 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" },
+ { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" },
+ };
+ uint32_t *data = kgem->batch + offset;
+ uint32_t op;
+ unsigned int len;
+ int i;
+ char *desc1 = NULL;
+
+ len = (data[0] & 0xff) + 2;
+ op = (data[0] & 0xffff0000) >> 16;
+ switch (op) {
+ case 0x6000:
+ assert(len == 3);
+
+ kgem_debug_print(data, offset, 0, "URB_FENCE: %s%s%s%s%s%s\n",
+ (data[0] >> 13) & 1 ? "cs " : "",
+ (data[0] >> 12) & 1 ? "vfe " : "",
+ (data[0] >> 11) & 1 ? "sf " : "",
+ (data[0] >> 10) & 1 ? "clip " : "",
+ (data[0] >> 9) & 1 ? "gs " : "",
+ (data[0] >> 8) & 1 ? "vs " : "");
+ kgem_debug_print(data, offset, 1,
+ "vs fence: %d, gs_fence: %d, clip_fence: %d\n",
+ data[1] & 0x3ff,
+ (data[1] >> 10) & 0x3ff,
+ (data[1] >> 20) & 0x3ff);
+ kgem_debug_print(data, offset, 2,
+ "sf fence: %d, vfe_fence: %d, cs_fence: %d\n",
+ data[2] & 0x3ff,
+ (data[2] >> 10) & 0x3ff,
+ (data[2] >> 20) & 0x7ff);
+ return len;
+
+ case 0x6001:
+ kgem_debug_print(data, offset, 0, "CS_URB_STATE\n");
+ kgem_debug_print(data, offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n",
+ (data[1] >> 4) & 0x1f,
+ (((data[1] >> 4) & 0x1f) + 1) * 64,
+ data[1] & 0x7);
+ return len;
+ case 0x6002:
+ kgem_debug_print(data, offset, 0, "CONSTANT_BUFFER: %s\n",
+ (data[0] >> 8) & 1 ? "valid" : "invalid");
+ kgem_debug_print(data, offset, 1, "offset: 0x%08x, length: %d bytes\n",
+ data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64);
+ return len;
+ case 0x6101:
+ i = 0;
+ kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n");
+ assert(len == 8);
+
+ state_base_out(data, offset, i++, "general");
+ state_base_out(data, offset, i++, "surface");
+ state_base_out(data, offset, i++, "media");
+ state_base_out(data, offset, i++, "instruction");
+
+ state_max_out(data, offset, i++, "general");
+ state_max_out(data, offset, i++, "media");
+ state_max_out(data, offset, i++, "instruction");
+
+ return len;
+
+ case 0x7801:
+ assert(len == 6);
+
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_BINDING_TABLE_POINTERS\n");
+ kgem_debug_print(data, offset, 1, "VS binding table\n");
+ kgem_debug_print(data, offset, 2, "GS binding table\n");
+ kgem_debug_print(data, offset, 3, "CLIP binding table\n");
+ kgem_debug_print(data, offset, 4, "SF binding table\n");
+ kgem_debug_print(data, offset, 5, "WM binding table\n");
+
+ return len;
+
+ case 0x7808:
+ assert((len - 1) % 4 == 0);
+ kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n");
+
+ for (i = 1; i < len;) {
+ gen5_update_vertex_buffer(kgem, data + i);
+
+ kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n",
+ data[i] >> 27,
+ data[i] & (1 << 20) ? "random" : "sequential",
+ data[i] & 0x07ff);
+ i++;
+ kgem_debug_print(data, offset, i++, "buffer address\n");
+ kgem_debug_print(data, offset, i++, "max index\n");
+ kgem_debug_print(data, offset, i++, "mbz\n");
+ }
+ return len;
+
+ case 0x7809:
+ assert((len + 1) % 2 == 0);
+ kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n");
+
+ memset(state.ve, 0, sizeof(state.ve)); /* XXX? */
+ for (i = 1; i < len;) {
+ gen5_update_vertex_elements(kgem, (i - 1)/2, data + i);
+
+ kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, "
+ "src offset 0x%04x bytes\n",
+ data[i] >> 27,
+ data[i] & (1 << 26) ? "" : "in",
+ (data[i] >> 16) & 0x1ff,
+ data[i] & 0x07ff);
+ i++;
+ kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), "
+ "dst offset 0x%02x bytes\n",
+ get_965_element_component(data[i], 0),
+ get_965_element_component(data[i], 1),
+ get_965_element_component(data[i], 2),
+ get_965_element_component(data[i], 3),
+ (data[i] & 0xff) * 4);
+ i++;
+ }
+ state.num_ve = (len - 1) / 2; /* XXX? */
+ return len;
+
+ case 0x780a:
+ assert(len == 3);
+ kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n");
+ kgem_debug_print(data, offset, 1, "beginning buffer address\n");
+ kgem_debug_print(data, offset, 2, "ending buffer address\n");
+ return len;
+
+ case 0x7900:
+ assert(len == 4);
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_DRAWING_RECTANGLE\n");
+ kgem_debug_print(data, offset, 1, "top left: %d,%d\n",
+ data[1] & 0xffff,
+ (data[1] >> 16) & 0xffff);
+ kgem_debug_print(data, offset, 2, "bottom right: %d,%d\n",
+ data[2] & 0xffff,
+ (data[2] >> 16) & 0xffff);
+ kgem_debug_print(data, offset, 3, "origin: %d,%d\n",
+ (int)data[3] & 0xffff,
+ ((int)data[3] >> 16) & 0xffff);
+ return len;
+
+ case 0x7905:
+ assert(len == 7);
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_DEPTH_BUFFER\n");
+ kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n",
+ get_965_surfacetype(data[1] >> 29),
+ get_965_depthformat((data[1] >> 18) & 0x7),
+ (data[1] & 0x0001ffff) + 1,
+ data[1] & (1 << 27) ? "" : "not ",
+ (data[1] & (1 << 22)) != 0,
+ (data[1] & (1 << 21)) != 0);
+ kgem_debug_print(data, offset, 2, "depth offset\n");
+ kgem_debug_print(data, offset, 3, "%dx%d\n",
+ ((data[3] & 0x0007ffc0) >> 6) + 1,
+ ((data[3] & 0xfff80000) >> 19) + 1);
+ kgem_debug_print(data, offset, 4, "volume depth\n");
+ kgem_debug_print(data, offset, 5, "\n");
+ kgem_debug_print(data, offset, 6, "\n");
+ return len;
+
+ case 0x7a00:
+ assert(len == 4 || len == 5);
+ switch ((data[1] >> 14) & 0x3) {
+ case 0: desc1 = "no write"; break;
+ case 1: desc1 = "qword write"; break;
+ case 2: desc1 = "PS_DEPTH_COUNT write"; break;
+ case 3: desc1 = "TIMESTAMP write"; break;
+ }
+ kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n");
+ kgem_debug_print(data, offset, 1,
+ "%s, %scs stall, %stlb invalidate, "
+ "%ssync gfdt, %sdepth stall, %sRC write flush, "
+ "%sinst flush, %sTC flush\n",
+ desc1,
+ data[1] & (1 << 20) ? "" : "no ",
+ data[1] & (1 << 18) ? "" : "no ",
+ data[1] & (1 << 17) ? "" : "no ",
+ data[1] & (1 << 13) ? "" : "no ",
+ data[1] & (1 << 12) ? "" : "no ",
+ data[1] & (1 << 11) ? "" : "no ",
+ data[1] & (1 << 10) ? "" : "no ");
+ if (len == 5) {
+ kgem_debug_print(data, offset, 2, "destination address\n");
+ kgem_debug_print(data, offset, 3, "immediate dword low\n");
+ kgem_debug_print(data, offset, 4, "immediate dword high\n");
+ } else {
+ for (i = 2; i < len; i++) {
+ kgem_debug_print(data, offset, i, "\n");
+ }
+ }
+ return len;
+
+ case 0x7b00:
+ assert(len == 6);
+ kgem_debug_print(data, offset, 0,
+ "3DPRIMITIVE: %s %s\n",
+ get_965_prim_type(data[0]),
+ (data[0] & (1 << 15)) ? "random" : "sequential");
+ kgem_debug_print(data, offset, 1, "vertex count\n");
+ kgem_debug_print(data, offset, 2, "start vertex\n");
+ kgem_debug_print(data, offset, 3, "instance count\n");
+ kgem_debug_print(data, offset, 4, "start instance\n");
+ kgem_debug_print(data, offset, 5, "index bias\n");
+ primitive_out(kgem, data);
+ return len;
+ }
+
+ /* For the rest, just dump the bytes */
+ for (i = 0; i < ARRAY_SIZE(opcodes); i++)
+ if (op == opcodes[i].opcode)
+ break;
+
+ assert(i < ARRAY_SIZE(opcodes));
+
+ len = 1;
+ kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name);
+ if (opcodes[i].max_len > 1) {
+ len = (data[0] & 0xff) + 2;
+ assert(len >= opcodes[i].min_len &&
+ len <= opcodes[i].max_len);
+ }
+
+ for (i = 1; i < len; i++)
+ kgem_debug_print(data, offset, i, "dword %d\n", i);
+
+ return len;
+}
+
+static void finish_vertex_buffers(struct kgem *kgem)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(state.vb); i++)
+ if (state.vb[i].current)
+ munmap(state.vb[i].base, state.vb[i].current->size);
+}
+
+void kgem_gen5_finish_state(struct kgem *kgem)
+{
+ finish_vertex_buffers(kgem);
+
+ if (state.dynamic_state.current)
+ munmap(state.dynamic_state.base, state.dynamic_state.current->size);
+
+ memset(&state, 0, sizeof(state));
+}
diff --git a/src/sna/kgem_debug_gen6.c b/src/sna/kgem_debug_gen6.c
new file mode 100644
index 00000000..d441b536
--- /dev/null
+++ b/src/sna/kgem_debug_gen6.c
@@ -0,0 +1,1099 @@
+/*
+ * Copyright © 2007-2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ * Chris Wilson <chris"chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/mman.h>
+#include <assert.h>
+
+#include "sna.h"
+#include "sna_reg.h"
+#include "gen6_render.h"
+
+#include "kgem_debug.h"
+
+static struct state {
+ struct vertex_buffer {
+ int handle;
+ void *base;
+ const char *ptr;
+ int pitch;
+
+ struct kgem_bo *current;
+ } vb[33];
+ struct vertex_elements {
+ int buffer;
+ int offset;
+ bool valid;
+ uint32_t type;
+ uint8_t swizzle[4];
+ } ve[33];
+ int num_ve;
+
+ struct dynamic_state {
+ struct kgem_bo *current;
+ void *base, *ptr;
+ } dynamic_state;
+} state;
+
+static void gen6_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
+{
+ uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch);
+ struct kgem_bo *bo = NULL;
+ void *base, *ptr;
+ int i;
+
+ for (i = 0; i < kgem->nreloc; i++)
+ if (kgem->reloc[i].offset == reloc)
+ break;
+ assert(i < kgem->nreloc);
+ reloc = kgem->reloc[i].target_handle;
+
+ if (reloc == 0) {
+ base = kgem->batch;
+ } else {
+ list_for_each_entry(bo, &kgem->next_request->buffers, request)
+ if (bo->handle == reloc)
+ break;
+ assert(&bo->request != &kgem->next_request->buffers);
+ base = kgem_bo_map(kgem, bo, PROT_READ);
+ }
+ ptr = (char *)base + kgem->reloc[i].delta;
+
+ i = data[0] >> 26;
+ if (state.vb[i].current)
+ munmap(state.vb[i].base, state.vb[i].current->size);
+
+ state.vb[i].current = bo;
+ state.vb[i].base = base;
+ state.vb[i].ptr = ptr;
+ state.vb[i].pitch = data[0] & 0x7ff;
+}
+
+static void gen6_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset)
+{
+ uint32_t reloc = sizeof(uint32_t) * offset;
+ struct kgem_bo *bo = NULL;
+ void *base, *ptr;
+ int i;
+
+ if ((kgem->batch[offset] & 1) == 0)
+ return;
+
+ for (i = 0; i < kgem->nreloc; i++)
+ if (kgem->reloc[i].offset == reloc)
+ break;
+ if(i < kgem->nreloc) {
+ reloc = kgem->reloc[i].target_handle;
+
+ if (reloc == 0) {
+ base = kgem->batch;
+ } else {
+ list_for_each_entry(bo, &kgem->next_request->buffers, request)
+ if (bo->handle == reloc)
+ break;
+ assert(&bo->request != &kgem->next_request->buffers);
+ base = kgem_bo_map(kgem, bo, PROT_READ);
+ }
+ ptr = (char *)base + (kgem->reloc[i].delta & ~1);
+ } else {
+ bo = NULL;
+ base = NULL;
+ ptr = NULL;
+ }
+
+ if (state.dynamic_state.current)
+ munmap(state.dynamic_state.base, state.dynamic_state.current->size);
+
+ state.dynamic_state.current = bo;
+ state.dynamic_state.base = base;
+ state.dynamic_state.ptr = ptr;
+}
+
+static uint32_t
+get_ve_component(uint32_t data, int component)
+{
+ return (data >> (16 + (3 - component) * 4)) & 0x7;
+}
+
+static void gen6_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data)
+{
+ state.ve[id].buffer = data[0] >> 26;
+ state.ve[id].valid = !!(data[0] & (1 << 25));
+ state.ve[id].type = (data[0] >> 16) & 0x1ff;
+ state.ve[id].offset = data[0] & 0x7ff;
+ state.ve[id].swizzle[0] = get_ve_component(data[1], 0);
+ state.ve[id].swizzle[1] = get_ve_component(data[1], 1);
+ state.ve[id].swizzle[2] = get_ve_component(data[1], 2);
+ state.ve[id].swizzle[3] = get_ve_component(data[1], 3);
+}
+
+static void gen6_update_sf_state(struct kgem *kgem, uint32_t *data)
+{
+ state.num_ve = 1 + ((data[1] >> 22) & 0x3f);
+}
+
+static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max)
+{
+ int c;
+
+ ErrorF("(");
+ for (c = 0; c < max; c++) {
+ switch (ve->swizzle[c]) {
+ case 0: ErrorF("#"); break;
+ case 1: ErrorF("%d", v[c]); break;
+ case 2: ErrorF("0.0"); break;
+ case 3: ErrorF("1.0"); break;
+ case 4: ErrorF("0x1"); break;
+ case 5: break;
+ default: ErrorF("?");
+ }
+ if (c < 3)
+ ErrorF(", ");
+ }
+ for (; c < 4; c++) {
+ switch (ve->swizzle[c]) {
+ case 0: ErrorF("#"); break;
+ case 1: ErrorF("1.0"); break;
+ case 2: ErrorF("0.0"); break;
+ case 3: ErrorF("1.0"); break;
+ case 4: ErrorF("0x1"); break;
+ case 5: break;
+ default: ErrorF("?");
+ }
+ if (c < 3)
+ ErrorF(", ");
+ }
+ ErrorF(")");
+}
+
+static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max)
+{
+ int c, o;
+
+ ErrorF("(");
+ for (c = o = 0; c < 4 && o < max; c++) {
+ switch (ve->swizzle[c]) {
+ case 0: ErrorF("#"); break;
+ case 1: ErrorF("%f", f[o++]); break;
+ case 2: ErrorF("0.0"); break;
+ case 3: ErrorF("1.0"); break;
+ case 4: ErrorF("0x1"); break;
+ case 5: break;
+ default: ErrorF("?");
+ }
+ if (c < 3)
+ ErrorF(", ");
+ }
+ for (; c < 4; c++) {
+ switch (ve->swizzle[c]) {
+ case 0: ErrorF("#"); break;
+ case 1: ErrorF("1.0"); break;
+ case 2: ErrorF("0.0"); break;
+ case 3: ErrorF("1.0"); break;
+ case 4: ErrorF("0x1"); break;
+ case 5: break;
+ default: ErrorF("?");
+ }
+ if (c < 3)
+ ErrorF(", ");
+ }
+ ErrorF(")");
+}
+
+static void ve_out(const struct vertex_elements *ve, const void *ptr)
+{
+ switch (ve->type) {
+ case GEN6_SURFACEFORMAT_R32_FLOAT:
+ vertices_float_out(ve, ptr, 1);
+ break;
+ case GEN6_SURFACEFORMAT_R32G32_FLOAT:
+ vertices_float_out(ve, ptr, 2);
+ break;
+ case GEN6_SURFACEFORMAT_R32G32B32_FLOAT:
+ vertices_float_out(ve, ptr, 3);
+ break;
+ case GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT:
+ vertices_float_out(ve, ptr, 4);
+ break;
+ case GEN6_SURFACEFORMAT_R16_SINT:
+ vertices_sint16_out(ve, ptr, 1);
+ break;
+ case GEN6_SURFACEFORMAT_R16G16_SINT:
+ vertices_sint16_out(ve, ptr, 2);
+ break;
+ case GEN6_SURFACEFORMAT_R16G16B16A16_SINT:
+ vertices_sint16_out(ve, ptr, 4);
+ break;
+ case GEN6_SURFACEFORMAT_R16_SSCALED:
+ vertices_sint16_out(ve, ptr, 1);
+ break;
+ case GEN6_SURFACEFORMAT_R16G16_SSCALED:
+ vertices_sint16_out(ve, ptr, 2);
+ break;
+ case GEN6_SURFACEFORMAT_R16G16B16A16_SSCALED:
+ vertices_sint16_out(ve, ptr, 4);
+ break;
+ }
+}
+
+static void indirect_vertex_out(struct kgem *kgem, uint32_t v)
+{
+ int i = 1;
+
+ do {
+ const struct vertex_elements *ve = &state.ve[i];
+ const struct vertex_buffer *vb = &state.vb[ve->buffer];
+ const void *ptr = vb->ptr + v * vb->pitch + ve->offset;
+
+ if (!ve->valid)
+ continue;
+
+ ve_out(ve, ptr);
+
+ while (++i <= state.num_ve && !state.ve[i].valid)
+ ;
+
+ if (i <= state.num_ve)
+ ErrorF(", ");
+ } while (i <= state.num_ve);
+}
+
+static void primitive_out(struct kgem *kgem, uint32_t *data)
+{
+ int n;
+
+ assert((data[0] & (1<<15)) == 0); /* XXX index buffers */
+
+ for (n = 0; n < data[1]; n++) {
+ int v = data[2] + n;
+ ErrorF(" [%d:%d] = ", n, v);
+ indirect_vertex_out(kgem, v);
+ ErrorF("\n");
+ }
+}
+
+static void finish_vertex_buffers(struct kgem *kgem)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(state.vb); i++)
+ if (state.vb[i].current)
+ munmap(state.vb[i].base, state.vb[i].current->size);
+}
+
+static void finish_state(struct kgem *kgem)
+{
+ finish_vertex_buffers(kgem);
+
+ if (state.dynamic_state.current)
+ munmap(state.dynamic_state.base, state.dynamic_state.current->size);
+
+ memset(&state, 0, sizeof(state));
+}
+
+static void
+state_base_out(uint32_t *data, uint32_t offset, unsigned int index,
+ char *name)
+{
+ if (data[index] & 1)
+ kgem_debug_print(data, offset, index,
+ "%s state base address 0x%08x\n",
+ name, data[index] & ~1);
+ else
+ kgem_debug_print(data, offset, index,
+ "%s state base not updated\n",
+ name);
+}
+
+static void
+state_max_out(uint32_t *data, uint32_t offset, unsigned int index,
+ char *name)
+{
+ if (data[index] == 1)
+ kgem_debug_print(data, offset, index,
+ "%s state upper bound disabled\n", name);
+ else if (data[index] & 1)
+ kgem_debug_print(data, offset, index,
+ "%s state upper bound 0x%08x\n",
+ name, data[index] & ~1);
+ else
+ kgem_debug_print(data, offset, index,
+ "%s state upper bound not updated\n",
+ name);
+}
+
+static const char *
+get_965_surfacetype(unsigned int surfacetype)
+{
+ switch (surfacetype) {
+ case 0: return "1D";
+ case 1: return "2D";
+ case 2: return "3D";
+ case 3: return "CUBE";
+ case 4: return "BUFFER";
+ case 7: return "NULL";
+ default: return "unknown";
+ }
+}
+
+static const char *
+get_965_depthformat(unsigned int depthformat)
+{
+ switch (depthformat) {
+ case 0: return "s8_z24float";
+ case 1: return "z32float";
+ case 2: return "z24s8";
+ case 5: return "z16";
+ default: return "unknown";
+ }
+}
+
+static const char *
+get_965_element_component(uint32_t data, int component)
+{
+ uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7;
+
+ switch (component_control) {
+ case 0:
+ return "nostore";
+ case 1:
+ switch (component) {
+ case 0: return "X";
+ case 1: return "Y";
+ case 2: return "Z";
+ case 3: return "W";
+ default: return "fail";
+ }
+ case 2:
+ return "0.0";
+ case 3:
+ return "1.0";
+ case 4:
+ return "0x1";
+ case 5:
+ return "VID";
+ default:
+ return "fail";
+ }
+}
+
+static const char *
+get_965_prim_type(uint32_t data)
+{
+ uint32_t primtype = (data >> 10) & 0x1f;
+
+ switch (primtype) {
+ case 0x01: return "point list";
+ case 0x02: return "line list";
+ case 0x03: return "line strip";
+ case 0x04: return "tri list";
+ case 0x05: return "tri strip";
+ case 0x06: return "tri fan";
+ case 0x07: return "quad list";
+ case 0x08: return "quad strip";
+ case 0x09: return "line list adj";
+ case 0x0a: return "line strip adj";
+ case 0x0b: return "tri list adj";
+ case 0x0c: return "tri strip adj";
+ case 0x0d: return "tri strip reverse";
+ case 0x0e: return "polygon";
+ case 0x0f: return "rect list";
+ case 0x10: return "line loop";
+ case 0x11: return "point list bf";
+ case 0x12: return "line strip cont";
+ case 0x13: return "line strip bf";
+ case 0x14: return "line strip cont bf";
+ case 0x15: return "tri fan no stipple";
+ default: return "fail";
+ }
+}
+
+struct reloc {
+ struct kgem_bo *bo;
+ void *base;
+};
+
+static void *
+get_reloc(struct kgem *kgem,
+ void *base, const uint32_t *reloc,
+ struct reloc *r)
+{
+ uint32_t delta = *reloc;
+
+ memset(r, 0, sizeof(*r));
+
+ if (base == 0) {
+ uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch);
+ struct kgem_bo *bo = NULL;
+ int i;
+
+ for (i = 0; i < kgem->nreloc; i++)
+ if (kgem->reloc[i].offset == handle)
+ break;
+ assert(i < kgem->nreloc);
+ handle = kgem->reloc[i].target_handle;
+ delta = kgem->reloc[i].delta;
+
+ if (handle == 0) {
+ base = kgem->batch;
+ } else {
+ list_for_each_entry(bo, &kgem->next_request->buffers, request)
+ if (bo->handle == handle)
+ break;
+ assert(&bo->request != &kgem->next_request->buffers);
+ base = kgem_bo_map(kgem, bo, PROT_READ);
+ r->bo = bo;
+ r->base = base;
+ }
+ }
+
+ return (char *)base + (delta & ~3);
+}
+
+static void
+put_reloc(struct kgem *kgem, struct reloc *r)
+{
+ if (r->bo != NULL)
+ munmap(r->base, r->bo->size);
+}
+
+static const char *
+gen6_filter_to_string(uint32_t filter)
+{
+ switch (filter) {
+ default:
+ case GEN6_MAPFILTER_NEAREST: return "nearest";
+ case GEN6_MAPFILTER_LINEAR: return "linear";
+ }
+}
+
+static const char *
+gen6_repeat_to_string(uint32_t repeat)
+{
+ switch (repeat) {
+ default:
+ case GEN6_TEXCOORDMODE_CLAMP_BORDER: return "border";
+ case GEN6_TEXCOORDMODE_WRAP: return "wrap";
+ case GEN6_TEXCOORDMODE_CLAMP: return "clamp";
+ case GEN6_TEXCOORDMODE_MIRROR: return "mirror";
+ }
+}
+
+static void
+gen6_decode_sampler_state(struct kgem *kgem, const uint32_t *reloc)
+{
+ const struct gen6_sampler_state *ss;
+ struct reloc r;
+ const char *min, *mag;
+ const char *s_wrap, *t_wrap, *r_wrap;
+
+ ss = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r);
+
+ min = gen6_filter_to_string(ss->ss0.min_filter);
+ mag = gen6_filter_to_string(ss->ss0.mag_filter);
+
+ s_wrap = gen6_repeat_to_string(ss->ss1.s_wrap_mode);
+ t_wrap = gen6_repeat_to_string(ss->ss1.t_wrap_mode);
+ r_wrap = gen6_repeat_to_string(ss->ss1.r_wrap_mode);
+
+ ErrorF(" Sampler 0:\n");
+ ErrorF(" filter: min=%s, mag=%s\n", min, mag);
+ ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap);
+
+ ss++;
+ min = gen6_filter_to_string(ss->ss0.min_filter);
+ mag = gen6_filter_to_string(ss->ss0.mag_filter);
+
+ s_wrap = gen6_repeat_to_string(ss->ss1.s_wrap_mode);
+ t_wrap = gen6_repeat_to_string(ss->ss1.t_wrap_mode);
+ r_wrap = gen6_repeat_to_string(ss->ss1.r_wrap_mode);
+
+ ErrorF(" Sampler 1:\n");
+ ErrorF(" filter: min=%s, mag=%s\n", min, mag);
+ ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap);
+
+ put_reloc(kgem, &r);
+}
+
+static const char *
+gen6_blend_factor_to_string(uint32_t v)
+{
+ switch (v) {
+#define C(x) case GEN6_BLENDFACTOR_##x: return #x;
+ C(ONE);
+ C(SRC_COLOR);
+ C(SRC_ALPHA);
+ C(DST_ALPHA);
+ C(DST_COLOR);
+ C(SRC_ALPHA_SATURATE);
+ C(CONST_COLOR);
+ C(CONST_ALPHA);
+ C(SRC1_COLOR);
+ C(SRC1_ALPHA);
+ C(ZERO);
+ C(INV_SRC_COLOR);
+ C(INV_SRC_ALPHA);
+ C(INV_DST_ALPHA);
+ C(INV_DST_COLOR);
+ C(INV_CONST_COLOR);
+ C(INV_CONST_ALPHA);
+ C(INV_SRC1_COLOR);
+ C(INV_SRC1_ALPHA);
+#undef C
+ default: return "???";
+ }
+}
+
+static const char *
+gen6_blend_function_to_string(uint32_t v)
+{
+ switch (v) {
+#define C(x) case GEN6_BLENDFUNCTION_##x: return #x;
+ C(ADD);
+ C(SUBTRACT);
+ C(REVERSE_SUBTRACT);
+ C(MIN);
+ C(MAX);
+#undef C
+ default: return "???";
+ }
+}
+
+static void
+gen6_decode_blend(struct kgem *kgem, const uint32_t *reloc)
+{
+ const struct gen6_blend_state *blend;
+ struct reloc r;
+ const char *dst, *src;
+ const char *func;
+
+ blend = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r);
+
+ dst = gen6_blend_factor_to_string(blend->blend0.dest_blend_factor);
+ src = gen6_blend_factor_to_string(blend->blend0.source_blend_factor);
+ func = gen6_blend_function_to_string(blend->blend0.blend_func);
+
+ ErrorF(" Blend (%s): function %s, src=%s, dst=%s\n",
+ blend->blend0.blend_enable ? "enabled" : "disabled",
+ func, src, dst);
+
+ put_reloc(kgem, &r);
+}
+
+int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset)
+{
+ static const struct {
+ uint32_t opcode;
+ int min_len;
+ int max_len;
+ const char *name;
+ } opcodes[] = {
+ { 0x6101, 6, 6, "STATE_BASE_ADDRESS" },
+ { 0x6102, 2, 2 , "STATE_SIP" },
+ { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+ { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" },
+ { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+ { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" },
+ { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" },
+ { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" },
+ { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" },
+ { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" },
+ { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" },
+ { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
+ { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" },
+ { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" },
+ { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" },
+ { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
+ { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
+ { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
+ { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" },
+ { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
+ { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" },
+ { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" },
+ { 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" },
+ { 0x7b00, 6, 6, "3DPRIMITIVE" },
+ { 0x7802, 4, 4, "3DSTATE_SAMPLER_STATE_POINTERS" },
+ { 0x7805, 3, 3, "3DSTATE_URB" },
+ { 0x780d, 4, 4, "3DSTATE_VIEWPORT_STATE_POINTERS" },
+ { 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" },
+ { 0x780f, 2, 2, "3DSTATE_SCISSOR_STATE_POINTERS" },
+ { 0x7810, 6, 6, "3DSTATE_VS_STATE" },
+ { 0x7811, 7, 7, "3DSTATE_GS_STATE" },
+ { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" },
+ { 0x7813, 20, 20, "3DSTATE_SF_STATE" },
+ { 0x7814, 9, 9, "3DSTATE_WM_STATE" },
+ { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" },
+ { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" },
+ { 0x7817, 5, 5, "3DSTATE_CONSTANT_WM_STATE" },
+ { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" },
+ };
+ uint32_t *data = kgem->batch + offset;
+ uint32_t op;
+ unsigned int len;
+ int i, j;
+ char *desc1 = NULL;
+
+ len = (data[0] & 0xff) + 2;
+ op = (data[0] & 0xffff0000) >> 16;
+ switch (op) {
+ case 0x6101:
+ i = 0;
+ kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n");
+ if (kgem->gen >= 60) {
+ assert(len == 10);
+
+ state_base_out(data, offset, i++, "general");
+ state_base_out(data, offset, i++, "surface");
+ state_base_out(data, offset, i++, "dynamic");
+ state_base_out(data, offset, i++, "indirect");
+ state_base_out(data, offset, i++, "instruction");
+
+ state_max_out(data, offset, i++, "general");
+ state_max_out(data, offset, i++, "dynamic");
+ state_max_out(data, offset, i++, "indirect");
+ state_max_out(data, offset, i++, "instruction");
+
+ gen6_update_dynamic_buffer(kgem, offset + 3);
+ } else if (kgem->gen >= 50) {
+ assert(len == 8);
+
+ state_base_out(data, offset, i++, "general");
+ state_base_out(data, offset, i++, "surface");
+ state_base_out(data, offset, i++, "media");
+ state_base_out(data, offset, i++, "instruction");
+
+ state_max_out(data, offset, i++, "general");
+ state_max_out(data, offset, i++, "media");
+ state_max_out(data, offset, i++, "instruction");
+ }
+
+ return len;
+
+ case 0x7801:
+ if (kgem->gen >= 60) {
+ assert(len == 4);
+
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_BINDING_TABLE_POINTERS: VS mod %d, "
+ "GS mod %d, WM mod %d\n",
+ (data[0] & (1 << 8)) != 0,
+ (data[0] & (1 << 9)) != 0,
+ (data[0] & (1 << 12)) != 0);
+ kgem_debug_print(data, offset, 1, "VS binding table\n");
+ kgem_debug_print(data, offset, 2, "GS binding table\n");
+ kgem_debug_print(data, offset, 3, "WM binding table\n");
+ } else if (kgem->gen >= 40) {
+ assert(len == 6);
+
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_BINDING_TABLE_POINTERS\n");
+ kgem_debug_print(data, offset, 1, "VS binding table\n");
+ kgem_debug_print(data, offset, 2, "GS binding table\n");
+ kgem_debug_print(data, offset, 3, "CLIP binding table\n");
+ kgem_debug_print(data, offset, 4, "SF binding table\n");
+ kgem_debug_print(data, offset, 5, "WM binding table\n");
+ }
+
+ return len;
+
+ case 0x7802:
+ assert(len == 4);
+ kgem_debug_print(data, offset, 0, "3DSTATE_SAMPLER_STATE_POINTERS: VS mod %d, "
+ "GS mod %d, WM mod %d\n",
+ (data[0] & (1 << 8)) != 0,
+ (data[0] & (1 << 9)) != 0,
+ (data[0] & (1 << 12)) != 0);
+ kgem_debug_print(data, offset, 1, "VS sampler state\n");
+ kgem_debug_print(data, offset, 2, "GS sampler state\n");
+ kgem_debug_print(data, offset, 3, "WM sampler state\n");
+ gen6_decode_sampler_state(kgem, &data[3]);
+ return len;
+
+ case 0x7808:
+ assert((len - 1) % 4 == 0);
+ kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n");
+
+ for (i = 1; i < len;) {
+ gen6_update_vertex_buffer(kgem, data + i);
+
+ kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n",
+ data[i] >> 26,
+ data[i] & (1 << 20) ? "random" : "sequential",
+ data[i] & 0x07ff);
+ i++;
+ kgem_debug_print(data, offset, i++, "buffer address\n");
+ kgem_debug_print(data, offset, i++, "max index\n");
+ kgem_debug_print(data, offset, i++, "mbz\n");
+ }
+ return len;
+
+ case 0x7809:
+ assert((len + 1) % 2 == 0);
+ kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n");
+
+ for (i = 1; i < len;) {
+ gen6_update_vertex_elements(kgem, (i - 1)/2, data + i);
+
+ kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, "
+ "src offset 0x%04x bytes\n",
+ data[i] >> 26,
+ data[i] & (1 << 25) ? "" : "in",
+ (data[i] >> 16) & 0x1ff,
+ data[i] & 0x07ff);
+ i++;
+ kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), "
+ "dst offset 0x%02x bytes\n",
+ get_965_element_component(data[i], 0),
+ get_965_element_component(data[i], 1),
+ get_965_element_component(data[i], 2),
+ get_965_element_component(data[i], 3),
+ (data[i] & 0xff) * 4);
+ i++;
+ }
+ return len;
+
+ case 0x780d:
+ assert(len == 4);
+ kgem_debug_print(data, offset, 0, "3DSTATE_VIEWPORT_STATE_POINTERS\n");
+ kgem_debug_print(data, offset, 1, "clip\n");
+ kgem_debug_print(data, offset, 2, "sf\n");
+ kgem_debug_print(data, offset, 3, "cc\n");
+ return len;
+
+ case 0x780a:
+ assert(len == 3);
+ kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n");
+ kgem_debug_print(data, offset, 1, "beginning buffer address\n");
+ kgem_debug_print(data, offset, 2, "ending buffer address\n");
+ return len;
+
+ case 0x780e:
+ assert(len == 4);
+ kgem_debug_print(data, offset, 0, "3DSTATE_CC_STATE_POINTERS\n");
+ kgem_debug_print(data, offset, 1, "blend%s\n",
+ data[1] & 1 ? " update" : "");
+ if (data[1] & 1)
+ gen6_decode_blend(kgem, data+1);
+ kgem_debug_print(data, offset, 2, "depth+stencil%s\n",
+ data[2] & 1 ? " update" : "");
+ kgem_debug_print(data, offset, 3, "cc%s\n",
+ data[3] & 1 ? " update" : "");
+ return len;
+
+ case 0x780f:
+ assert(len == 2);
+ kgem_debug_print(data, offset, 0, "3DSTATE_SCISSOR_POINTERS\n");
+ kgem_debug_print(data, offset, 1, "scissor rect offset\n");
+ return len;
+
+ case 0x7810:
+ assert(len == 6);
+ kgem_debug_print(data, offset, 0, "3DSTATE_VS\n");
+ kgem_debug_print(data, offset, 1, "kernel pointer\n");
+ kgem_debug_print(data, offset, 2, "SPF=%d, VME=%d, Sampler Count %d, "
+ "Binding table count %d\n",
+ (data[2] >> 31) & 1,
+ (data[2] >> 30) & 1,
+ (data[2] >> 27) & 7,
+ (data[2] >> 18) & 0xff);
+ kgem_debug_print(data, offset, 3, "scratch offset\n");
+ kgem_debug_print(data, offset, 4, "Dispatch GRF start %d, VUE read length %d, "
+ "VUE read offset %d\n",
+ (data[4] >> 20) & 0x1f,
+ (data[4] >> 11) & 0x3f,
+ (data[4] >> 4) & 0x3f);
+ kgem_debug_print(data, offset, 5, "Max Threads %d, Vertex Cache %sable, "
+ "VS func %sable\n",
+ ((data[5] >> 25) & 0x7f) + 1,
+ (data[5] & (1 << 1)) != 0 ? "dis" : "en",
+ (data[5] & 1) != 0 ? "en" : "dis");
+ return len;
+
+ case 0x7811:
+ assert(len == 7);
+ kgem_debug_print(data, offset, 0, "3DSTATE_GS\n");
+ kgem_debug_print(data, offset, 1, "kernel pointer\n");
+ kgem_debug_print(data, offset, 2, "SPF=%d, VME=%d, Sampler Count %d, "
+ "Binding table count %d\n",
+ (data[2] >> 31) & 1,
+ (data[2] >> 30) & 1,
+ (data[2] >> 27) & 7,
+ (data[2] >> 18) & 0xff);
+ kgem_debug_print(data, offset, 3, "scratch offset\n");
+ kgem_debug_print(data, offset, 4, "Dispatch GRF start %d, VUE read length %d, "
+ "VUE read offset %d\n",
+ (data[4] & 0xf),
+ (data[4] >> 11) & 0x3f,
+ (data[4] >> 4) & 0x3f);
+ kgem_debug_print(data, offset, 5, "Max Threads %d, Rendering %sable\n",
+ ((data[5] >> 25) & 0x7f) + 1,
+ (data[5] & (1 << 8)) != 0 ? "en" : "dis");
+ kgem_debug_print(data, offset, 6, "Reorder %sable, Discard Adjaceny %sable, "
+ "GS %sable\n",
+ (data[6] & (1 << 30)) != 0 ? "en" : "dis",
+ (data[6] & (1 << 29)) != 0 ? "en" : "dis",
+ (data[6] & (1 << 15)) != 0 ? "en" : "dis");
+ return len;
+
+ case 0x7812:
+ assert(len == 4);
+ kgem_debug_print(data, offset, 0, "3DSTATE_CLIP\n");
+ kgem_debug_print(data, offset, 1, "UserClip distance cull test mask 0x%x\n",
+ data[1] & 0xff);
+ kgem_debug_print(data, offset, 2, "Clip %sable, API mode %s, Viewport XY test %sable, "
+ "Viewport Z test %sable, Guardband test %sable, Clip mode %d, "
+ "Perspective Divide %sable, Non-Perspective Barycentric %sable, "
+ "Tri Provoking %d, Line Provoking %d, Trifan Provoking %d\n",
+ (data[2] & (1 << 31)) != 0 ? "en" : "dis",
+ (data[2] & (1 << 30)) != 0 ? "D3D" : "OGL",
+ (data[2] & (1 << 28)) != 0 ? "en" : "dis",
+ (data[2] & (1 << 27)) != 0 ? "en" : "dis",
+ (data[2] & (1 << 26)) != 0 ? "en" : "dis",
+ (data[2] >> 13) & 7,
+ (data[2] & (1 << 9)) != 0 ? "dis" : "en",
+ (data[2] & (1 << 8)) != 0 ? "en" : "dis",
+ (data[2] >> 4) & 3,
+ (data[2] >> 2) & 3,
+ (data[2] & 3));
+ kgem_debug_print(data, offset, 3, "Min PointWidth %d, Max PointWidth %d, "
+ "Force Zero RTAIndex %sable, Max VPIndex %d\n",
+ (data[3] >> 17) & 0x7ff,
+ (data[3] >> 6) & 0x7ff,
+ (data[3] & (1 << 5)) != 0 ? "en" : "dis",
+ (data[3] & 0xf));
+ return len;
+
+ case 0x7813:
+ gen6_update_sf_state(kgem, data);
+ assert(len == 20);
+ kgem_debug_print(data, offset, 0, "3DSTATE_SF\n");
+ kgem_debug_print(data, offset, 1, "Attrib Out %d, Attrib Swizzle %sable, VUE read length %d, "
+ "VUE read offset %d\n",
+ (data[1] >> 22) & 0x3f,
+ (data[1] & (1 << 21)) != 0 ? "en" : "dis",
+ (data[1] >> 11) & 0x1f,
+ (data[1] >> 4) & 0x3f);
+ kgem_debug_print(data, offset, 2, "Legacy Global DepthBias %sable, FrontFace fill %d, BF fill %d, "
+ "VP transform %sable, FrontWinding_%s\n",
+ (data[2] & (1 << 11)) != 0 ? "en" : "dis",
+ (data[2] >> 5) & 3,
+ (data[2] >> 3) & 3,
+ (data[2] & (1 << 1)) != 0 ? "en" : "dis",
+ (data[2] & 1) != 0 ? "CCW" : "CW");
+ kgem_debug_print(data, offset, 3, "AA %sable, CullMode %d, Scissor %sable, Multisample m ode %d\n",
+ (data[3] & (1 << 31)) != 0 ? "en" : "dis",
+ (data[3] >> 29) & 3,
+ (data[3] & (1 << 11)) != 0 ? "en" : "dis",
+ (data[3] >> 8) & 3);
+ kgem_debug_print(data, offset, 4, "Last Pixel %sable, SubPixel Precision %d, Use PixelWidth %d\n",
+ (data[4] & (1 << 31)) != 0 ? "en" : "dis",
+ (data[4] & (1 << 12)) != 0 ? 4 : 8,
+ (data[4] & (1 << 11)) != 0);
+ kgem_debug_print(data, offset, 5, "Global Depth Offset Constant %f\n", data[5]);
+ kgem_debug_print(data, offset, 6, "Global Depth Offset Scale %f\n", data[6]);
+ kgem_debug_print(data, offset, 7, "Global Depth Offset Clamp %f\n", data[7]);
+ for (i = 0, j = 0; i < 8; i++, j+=2)
+ kgem_debug_print(data, offset, i+8, "Attrib %d (Override %s%s%s%s, Const Source %d, Swizzle Select %d, "
+ "Source %d); Attrib %d (Override %s%s%s%s, Const Source %d, Swizzle Select %d, Source %d)\n",
+ j+1,
+ (data[8+i] & (1 << 31)) != 0 ? "W":"",
+ (data[8+i] & (1 << 30)) != 0 ? "Z":"",
+ (data[8+i] & (1 << 29)) != 0 ? "Y":"",
+ (data[8+i] & (1 << 28)) != 0 ? "X":"",
+ (data[8+i] >> 25) & 3, (data[8+i] >> 22) & 3,
+ (data[8+i] >> 16) & 0x1f,
+ j,
+ (data[8+i] & (1 << 15)) != 0 ? "W":"",
+ (data[8+i] & (1 << 14)) != 0 ? "Z":"",
+ (data[8+i] & (1 << 13)) != 0 ? "Y":"",
+ (data[8+i] & (1 << 12)) != 0 ? "X":"",
+ (data[8+i] >> 9) & 3, (data[8+i] >> 6) & 3,
+ (data[8+i] & 0x1f));
+ kgem_debug_print(data, offset, 16, "Point Sprite TexCoord Enable\n");
+ kgem_debug_print(data, offset, 17, "Const Interp Enable\n");
+ kgem_debug_print(data, offset, 18, "Attrib 7-0 WrapShortest Enable\n");
+ kgem_debug_print(data, offset, 19, "Attrib 15-8 WrapShortest Enable\n");
+
+ return len;
+
+ case 0x7814:
+ assert(len == 9);
+ kgem_debug_print(data, offset, 0, "3DSTATE_WM\n");
+ kgem_debug_print(data, offset, 1, "kernel start pointer 0\n");
+ kgem_debug_print(data, offset, 2, "SPF=%d, VME=%d, Sampler Count %d, "
+ "Binding table count %d\n",
+ (data[2] >> 31) & 1,
+ (data[2] >> 30) & 1,
+ (data[2] >> 27) & 7,
+ (data[2] >> 18) & 0xff);
+ kgem_debug_print(data, offset, 3, "scratch offset\n");
+ kgem_debug_print(data, offset, 4, "Depth Clear %d, Depth Resolve %d, HiZ Resolve %d, "
+ "Dispatch GRF start[0] %d, start[1] %d, start[2] %d\n",
+ (data[4] & (1 << 30)) != 0,
+ (data[4] & (1 << 28)) != 0,
+ (data[4] & (1 << 27)) != 0,
+ (data[4] >> 16) & 0x7f,
+ (data[4] >> 8) & 0x7f,
+ (data[4] & 0x7f));
+ kgem_debug_print(data, offset, 5, "MaxThreads %d, PS KillPixel %d, PS computed Z %d, "
+ "PS use sourceZ %d, Thread Dispatch %d, PS use sourceW %d, Dispatch32 %d, "
+ "Dispatch16 %d, Dispatch8 %d\n",
+ ((data[5] >> 25) & 0x7f) + 1,
+ (data[5] & (1 << 22)) != 0,
+ (data[5] & (1 << 21)) != 0,
+ (data[5] & (1 << 20)) != 0,
+ (data[5] & (1 << 19)) != 0,
+ (data[5] & (1 << 8)) != 0,
+ (data[5] & (1 << 2)) != 0,
+ (data[5] & (1 << 1)) != 0,
+ (data[5] & (1 << 0)) != 0);
+ kgem_debug_print(data, offset, 6, "Num SF output %d, Pos XY offset %d, ZW interp mode %d , "
+ "Barycentric interp mode 0x%x, Point raster rule %d, Multisample mode %d, "
+ "Multisample Dispatch mode %d\n",
+ (data[6] >> 20) & 0x3f,
+ (data[6] >> 18) & 3,
+ (data[6] >> 16) & 3,
+ (data[6] >> 10) & 0x3f,
+ (data[6] & (1 << 9)) != 0,
+ (data[6] >> 1) & 3,
+ (data[6] & 1));
+ kgem_debug_print(data, offset, 7, "kernel start pointer 1\n");
+ kgem_debug_print(data, offset, 8, "kernel start pointer 2\n");
+
+ return len;
+
+ case 0x7900:
+ assert(len == 4);
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_DRAWING_RECTANGLE\n");
+ kgem_debug_print(data, offset, 1, "top left: %d, %d\n",
+ (uint16_t)(data[1] & 0xffff),
+ (uint16_t)(data[1] >> 16));
+ kgem_debug_print(data, offset, 2, "bottom right: %d, %d\n",
+ (uint16_t)(data[2] & 0xffff),
+ (uint16_t)(data[2] >> 16));
+ kgem_debug_print(data, offset, 3, "origin: %d, %d\n",
+ (int16_t)(data[3] & 0xffff),
+ (int16_t)(data[3] >> 16));
+ return len;
+
+ case 0x7905:
+ assert(len == 7);
+ kgem_debug_print(data, offset, 0,
+ "3DSTATE_DEPTH_BUFFER\n");
+ kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n",
+ get_965_surfacetype(data[1] >> 29),
+ get_965_depthformat((data[1] >> 18) & 0x7),
+ (data[1] & 0x0001ffff) + 1,
+ data[1] & (1 << 27) ? "" : "not ",
+ (data[1] & (1 << 22)) != 0,
+ (data[1] & (1 << 21)) != 0);
+ kgem_debug_print(data, offset, 2, "depth offset\n");
+ kgem_debug_print(data, offset, 3, "%dx%d\n",
+ ((data[3] & 0x0007ffc0) >> 6) + 1,
+ ((data[3] & 0xfff80000) >> 19) + 1);
+ kgem_debug_print(data, offset, 4, "volume depth\n");
+ kgem_debug_print(data, offset, 5, "\n");
+ kgem_debug_print(data, offset, 6, "\n");
+ return len;
+
+ case 0x7a00:
+ assert(len == 4 || len == 5);
+ switch ((data[1] >> 14) & 0x3) {
+ case 0: desc1 = "no write"; break;
+ case 1: desc1 = "qword write"; break;
+ case 2: desc1 = "PS_DEPTH_COUNT write"; break;
+ case 3: desc1 = "TIMESTAMP write"; break;
+ }
+ kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n");
+ kgem_debug_print(data, offset, 1,
+ "%s, %scs stall, %stlb invalidate, "
+ "%ssync gfdt, %sdepth stall, %sRC write flush, "
+ "%sinst flush, %sTC flush\n",
+ desc1,
+ data[1] & (1 << 20) ? "" : "no ",
+ data[1] & (1 << 18) ? "" : "no ",
+ data[1] & (1 << 17) ? "" : "no ",
+ data[1] & (1 << 13) ? "" : "no ",
+ data[1] & (1 << 12) ? "" : "no ",
+ data[1] & (1 << 11) ? "" : "no ",
+ data[1] & (1 << 10) ? "" : "no ");
+ if (len == 5) {
+ kgem_debug_print(data, offset, 2, "destination address\n");
+ kgem_debug_print(data, offset, 3, "immediate dword low\n");
+ kgem_debug_print(data, offset, 4, "immediate dword high\n");
+ } else {
+ for (i = 2; i < len; i++) {
+ kgem_debug_print(data, offset, i, "\n");
+ }
+ }
+ return len;
+
+ case 0x7b00:
+ assert(len == 6);
+ kgem_debug_print(data, offset, 0,
+ "3DPRIMITIVE: %s %s\n",
+ get_965_prim_type(data[0]),
+ (data[0] & (1 << 15)) ? "random" : "sequential");
+ kgem_debug_print(data, offset, 1, "vertex count\n");
+ kgem_debug_print(data, offset, 2, "start vertex\n");
+ kgem_debug_print(data, offset, 3, "instance count\n");
+ kgem_debug_print(data, offset, 4, "start instance\n");
+ kgem_debug_print(data, offset, 5, "index bias\n");
+ primitive_out(kgem, data);
+ return len;
+ }
+
+ /* For the rest, just dump the bytes */
+ for (i = 0; i < ARRAY_SIZE(opcodes); i++)
+ if (op == opcodes[i].opcode)
+ break;
+
+ assert(i < ARRAY_SIZE(opcodes));
+
+ len = 1;
+ kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name);
+ if (opcodes[i].max_len > 1) {
+ len = (data[0] & 0xff) + 2;
+ assert(len >= opcodes[i].min_len &&
+ len <= opcodes[i].max_len);
+ }
+
+ for (i = 1; i < len; i++)
+ kgem_debug_print(data, offset, i, "dword %d\n", i);
+
+ return len;
+}
+
+void kgem_gen6_finish_state(struct kgem *kgem)
+{
+ finish_state(kgem);
+}
diff --git a/src/sna/sna.h b/src/sna/sna.h
new file mode 100644
index 00000000..cb4b61ae
--- /dev/null
+++ b/src/sna/sna.h
@@ -0,0 +1,596 @@
+/**************************************************************************
+
+Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+Copyright © 2002 David Dawes
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * David Dawes <dawes@xfree86.org>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdint.h>
+
+#ifndef _SNA_H_
+#define _SNA_H_
+
+#include "xf86_OSproc.h"
+#include "compiler.h"
+#include "xf86PciInfo.h"
+#include "xf86Pci.h"
+#include "xf86Cursor.h"
+#include "xf86xv.h"
+#include "vgaHW.h"
+#include "xf86Crtc.h"
+#include "xf86RandR12.h"
+
+#include "xorg-server.h"
+#include <pciaccess.h>
+
+#include "xf86drm.h"
+#include "xf86drmMode.h"
+
+#define _XF86DRI_SERVER_
+#include "dri.h"
+#include "dri2.h"
+#include "i915_drm.h"
+
+#if HAVE_UDEV
+#include <libudev.h>
+#endif
+
+#define DBG(x)
+
+#define DEBUG_ALL 0
+#define DEBUG_ACCEL (DEBUG_ALL || 0)
+#define DEBUG_BATCH (DEBUG_ALL || 0)
+#define DEBUG_BLT (DEBUG_ALL || 0)
+#define DEBUG_COMPOSITE (DEBUG_ALL || 0)
+#define DEBUG_DAMAGE (DEBUG_ALL || 0)
+#define DEBUG_DISPLAY (DEBUG_ALL || 0)
+#define DEBUG_DRI (DEBUG_ALL || 0)
+#define DEBUG_GRADIENT (DEBUG_ALL || 0)
+#define DEBUG_GLYPHS (DEBUG_ALL || 0)
+#define DEBUG_IO (DEBUG_ALL || 0)
+#define DEBUG_KGEM (DEBUG_ALL || 0)
+#define DEBUG_RENDER (DEBUG_ALL || 0)
+#define DEBUG_STREAM (DEBUG_ALL || 0)
+#define DEBUG_TRAPEZOIDS (DEBUG_ALL || 0)
+#define DEBUG_VIDEO (DEBUG_ALL || 0)
+#define DEBUG_VIDEO_TEXTURED (DEBUG_ALL || 0)
+#define DEBUG_VIDEO_OVERLAY (DEBUG_ALL || 0)
+
+#define DEBUG_NO_RENDER 0
+#define DEBUG_NO_BLT 0
+#define DEBUG_NO_IO 0
+
+#define DEBUG_FLUSH_CACHE 0
+#define DEBUG_FLUSH_BATCH 0
+#define DEBUG_FLUSH_SYNC 0
+
+#define TEST_ALL 0
+#define TEST_ACCEL (TEST_ALL || 0)
+#define TEST_BATCH (TEST_ALL || 0)
+#define TEST_BLT (TEST_ALL || 0)
+#define TEST_COMPOSITE (TEST_ALL || 0)
+#define TEST_DAMAGE (TEST_ALL || 0)
+#define TEST_GRADIENT (TEST_ALL || 0)
+#define TEST_GLYPHS (TEST_ALL || 0)
+#define TEST_IO (TEST_ALL || 0)
+#define TEST_KGEM (TEST_ALL || 0)
+#define TEST_RENDER (TEST_ALL || 0)
+
+#include "intel_driver.h"
+#include "kgem.h"
+#include "sna_damage.h"
+#include "sna_render.h"
+
+static inline void list_add_tail(struct list *new, struct list *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+enum DRI2FrameEventType {
+ DRI2_SWAP,
+ DRI2_ASYNC_SWAP,
+ DRI2_FLIP,
+ DRI2_WAITMSC,
+};
+
+#ifndef CREATE_PIXMAP_USAGE_SCRATCH_HEADER
+#define CREATE_PIXMAP_USAGE_SCRATCH_HEADER -1
+#endif
+
+typedef struct _DRI2FrameEvent {
+ XID drawable_id;
+ XID client_id; /* fake client ID to track client destruction */
+ ClientPtr client;
+ enum DRI2FrameEventType type;
+ int frame;
+ int pipe;
+
+ /* for swaps & flips only */
+ DRI2SwapEventPtr event_complete;
+ void *event_data;
+ DRI2BufferPtr front;
+ DRI2BufferPtr back;
+} DRI2FrameEventRec, *DRI2FrameEventPtr;
+
+#define SNA_CURSOR_X 64
+#define SNA_CURSOR_Y SNA_CURSOR_X
+
+struct sna_pixmap {
+ PixmapPtr pixmap;
+ struct kgem_bo *gpu_bo, *cpu_bo;
+ struct sna_damage *gpu_damage, *cpu_damage;
+
+ struct list list;
+
+#define SOURCE_BIAS 4
+ uint16_t source_count;
+ uint8_t mapped :1;
+ uint8_t pinned :1;
+ uint8_t gpu_only :1;
+ uint8_t flush :1;
+};
+
+static inline PixmapPtr get_drawable_pixmap(DrawablePtr drawable)
+{
+ ScreenPtr screen = drawable->pScreen;
+
+ if (drawable->type == DRAWABLE_PIXMAP)
+ return (PixmapPtr)drawable;
+ else
+ return screen->GetWindowPixmap((WindowPtr)drawable);
+}
+
+extern DevPrivateKeyRec sna_pixmap_index;
+
+static inline struct sna_pixmap *sna_pixmap(PixmapPtr pixmap)
+{
+ return dixGetPrivate(&pixmap->devPrivates, &sna_pixmap_index);
+}
+
+static inline struct sna_pixmap *sna_pixmap_from_drawable(DrawablePtr drawable)
+{
+ return sna_pixmap(get_drawable_pixmap(drawable));
+}
+
+static inline void sna_set_pixmap(PixmapPtr pixmap, struct sna_pixmap *sna)
+{
+ dixSetPrivate(&pixmap->devPrivates, &sna_pixmap_index, sna);
+}
+
+enum {
+ OPTION_TILING_FB,
+ OPTION_TILING_2D,
+ OPTION_PREFER_OVERLAY,
+ OPTION_COLOR_KEY,
+ OPTION_VIDEO_KEY,
+ OPTION_SWAPBUFFERS_WAIT,
+ OPTION_HOTPLUG,
+ OPTION_THROTTLE,
+ OPTION_RELAXED_FENCING,
+ OPTION_VMAP,
+};
+
+enum {
+ FLUSH_TIMER = 0,
+ EXPIRE_TIMER,
+ NUM_TIMERS
+};
+
+struct sna {
+ ScrnInfoPtr scrn;
+
+ unsigned flags;
+#define SNA_NO_THROTTLE 0x1
+#define SNA_SWAP_WAIT 0x2
+
+ int timer[NUM_TIMERS];
+ int timer_active;
+
+ struct list deferred_free;
+ struct list dirty_pixmaps;
+
+ PixmapPtr front, shadow;
+
+ struct sna_mode {
+ uint32_t fb_id;
+ drmModeResPtr mode_res;
+ int cpp;
+
+ drmEventContext event_context;
+ DRI2FrameEventPtr flip_info;
+ int flip_count;
+ int flip_pending[2];
+ unsigned int fe_frame;
+ unsigned int fe_tv_sec;
+ unsigned int fe_tv_usec;
+
+ struct list outputs;
+ struct list crtcs;
+ } mode;
+
+ unsigned int tiling;
+#define SNA_TILING_FB 0x1
+#define SNA_TILING_2D 0x2
+#define SNA_TILING_3D 0x4
+#define SNA_TILING_ALL (~0)
+
+ int Chipset;
+ EntityInfoPtr pEnt;
+ struct pci_device *PciInfo;
+ struct intel_chipset chipset;
+
+ ScreenBlockHandlerProcPtr BlockHandler;
+ ScreenWakeupHandlerProcPtr WakeupHandler;
+ CloseScreenProcPtr CloseScreen;
+
+ union {
+ struct gen2_render_state gen2;
+ struct gen3_render_state gen3;
+ struct gen4_render_state gen4;
+ struct gen5_render_state gen5;
+ struct gen6_render_state gen6;
+ } render_state;
+ uint32_t have_render;
+
+ Bool directRenderingOpen;
+ char *deviceName;
+
+ /* Broken-out options. */
+ OptionInfoPtr Options;
+
+ /* Driver phase/state information */
+ Bool suspended;
+
+#if HAVE_UDEV
+ struct udev_monitor *uevent_monitor;
+ InputHandlerProc uevent_handler;
+#endif
+
+ struct kgem kgem;
+ struct sna_render render;
+};
+
+Bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna);
+extern void sna_mode_init(struct sna *sna);
+extern void sna_mode_remove_fb(struct sna *sna);
+extern void sna_mode_fini(struct sna *sna);
+
+extern int sna_crtc_id(xf86CrtcPtr crtc);
+extern int sna_output_dpms_status(xf86OutputPtr output);
+
+extern Bool sna_do_pageflip(struct sna *sna,
+ PixmapPtr pixmap,
+ DRI2FrameEventPtr flip_info, int ref_crtc_hw_id);
+
+static inline struct sna *
+to_sna(ScrnInfoPtr scrn)
+{
+ return (struct sna *)(scrn->driverPrivate);
+}
+
+static inline struct sna *
+to_sna_from_screen(ScreenPtr screen)
+{
+ return to_sna(xf86Screens[screen->myNum]);
+}
+
+static inline struct sna *
+to_sna_from_drawable(DrawablePtr drawable)
+{
+ return to_sna_from_screen(drawable->pScreen);
+}
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+#endif
+#define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1))
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+
+extern xf86CrtcPtr sna_covering_crtc(ScrnInfoPtr scrn, BoxPtr box,
+ xf86CrtcPtr desired, BoxPtr crtc_box_ret);
+
+extern bool sna_wait_for_scanline(struct sna *sna, PixmapPtr pixmap,
+ xf86CrtcPtr crtc, RegionPtr clip);
+
+Bool sna_dri2_open(struct sna *sna, ScreenPtr pScreen);
+void sna_dri2_close(struct sna *sna, ScreenPtr pScreen);
+void sna_dri2_frame_event(unsigned int frame, unsigned int tv_sec,
+ unsigned int tv_usec, DRI2FrameEventPtr flip_info);
+void sna_dri2_flip_event(unsigned int frame, unsigned int tv_sec,
+ unsigned int tv_usec, DRI2FrameEventPtr flip_info);
+
+extern Bool sna_crtc_on(xf86CrtcPtr crtc);
+int sna_crtc_to_pipe(xf86CrtcPtr crtc);
+
+/* sna_render.c */
+void sna_kgem_reset(struct kgem *kgem);
+void sna_kgem_flush(struct kgem *kgem);
+void sna_kgem_context_switch(struct kgem *kgem, int new_mode);
+
+CARD32 sna_format_for_depth(int depth);
+
+void sna_debug_flush(struct sna *sna);
+
+static inline void
+get_drawable_deltas(DrawablePtr drawable, PixmapPtr pixmap, int16_t *x, int16_t *y)
+{
+#ifdef COMPOSITE
+ if (drawable->type == DRAWABLE_WINDOW) {
+ *x = -pixmap->screen_x;
+ *y = -pixmap->screen_y;
+ return;
+ }
+#endif
+ *x = *y = 0;
+}
+
+static inline int
+get_drawable_dx(DrawablePtr drawable)
+{
+#ifdef COMPOSITE
+ if (drawable->type == DRAWABLE_WINDOW)
+ return -get_drawable_pixmap(drawable)->screen_x;
+#endif
+ return 0;
+}
+
+static inline int
+get_drawable_dy(DrawablePtr drawable)
+{
+#ifdef COMPOSITE
+ if (drawable->type == DRAWABLE_WINDOW)
+ return -get_drawable_pixmap(drawable)->screen_y;
+#endif
+ return 0;
+}
+
+static inline Bool pixmap_is_scanout(PixmapPtr pixmap)
+{
+ ScreenPtr screen = pixmap->drawable.pScreen;
+ return pixmap == screen->GetScreenPixmap(screen);
+}
+
+struct sna_pixmap *sna_pixmap_attach(PixmapPtr pixmap);
+
+PixmapPtr sna_pixmap_create_upload(ScreenPtr screen,
+ int width, int height, int depth);
+
+void sna_pixmap_move_to_cpu(PixmapPtr pixmap, bool write);
+struct sna_pixmap *sna_pixmap_move_to_gpu(PixmapPtr pixmap);
+struct sna_pixmap *sna_pixmap_force_to_gpu(PixmapPtr pixmap);
+
+void
+sna_drawable_move_region_to_cpu(DrawablePtr drawable,
+ RegionPtr region,
+ Bool write);
+
+static inline void
+sna_drawable_move_to_cpu(DrawablePtr drawable, bool write)
+{
+ RegionRec region;
+
+ pixman_region_init_rect(&region,
+ 0, 0, drawable->width, drawable->height);
+ sna_drawable_move_region_to_cpu(drawable, &region, write);
+}
+
+static inline Bool
+sna_drawable_move_to_gpu(DrawablePtr drawable)
+{
+ return sna_pixmap_move_to_gpu(get_drawable_pixmap(drawable)) != NULL;
+}
+
+static inline Bool
+sna_pixmap_is_gpu(PixmapPtr pixmap)
+{
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ return priv && priv->gpu_bo;
+}
+
+static inline struct kgem_bo *sna_pixmap_get_bo(PixmapPtr pixmap)
+{
+ return sna_pixmap(pixmap)->gpu_bo;
+}
+
+static inline struct kgem_bo *sna_pixmap_pin(PixmapPtr pixmap)
+{
+ struct sna_pixmap *priv;
+
+ priv = sna_pixmap_force_to_gpu(pixmap);
+ if (!priv)
+ return NULL;
+
+ priv->pinned = 1;
+ return priv->gpu_bo;
+}
+
+
+static inline Bool
+_sna_transform_point(const PictTransform *transform,
+ int64_t x, int64_t y, int64_t result[3])
+{
+ int j;
+
+ for (j = 0; j < 3; j++)
+ result[j] = (transform->matrix[j][0] * x +
+ transform->matrix[j][1] * y +
+ transform->matrix[j][2]);
+
+ return result[2] != 0;
+}
+
+static inline void
+_sna_get_transformed_coordinates(int x, int y,
+ const PictTransform *transform,
+ float *x_out, float *y_out)
+{
+
+ int64_t result[3];
+
+ _sna_transform_point(transform, x, y, result);
+ *x_out = result[0] / (double)result[2];
+ *y_out = result[1] / (double)result[2];
+}
+
+void
+sna_get_transformed_coordinates(int x, int y,
+ const PictTransform *transform,
+ float *x_out, float *y_out);
+
+Bool
+sna_get_transformed_coordinates_3d(int x, int y,
+ const PictTransform *transform,
+ float *x_out, float *y_out, float *z_out);
+
+Bool sna_transform_is_affine(const PictTransform *t);
+Bool sna_transform_is_integer_translation(const PictTransform *t,
+ int16_t *tx, int16_t *ty);
+Bool sna_transform_is_translation(const PictTransform *t,
+ pixman_fixed_t *tx, pixman_fixed_t *ty);
+
+
+static inline uint32_t pixmap_size(PixmapPtr pixmap)
+{
+ return (pixmap->drawable.height - 1) * pixmap->devKind +
+ pixmap->drawable.width * pixmap->drawable.bitsPerPixel/8;
+}
+
+static inline struct kgem_bo *pixmap_vmap(struct kgem *kgem, PixmapPtr pixmap)
+{
+ struct sna_pixmap *priv;
+
+ if (kgem->wedged)
+ return NULL;
+
+ priv = sna_pixmap_attach(pixmap);
+ if (priv == NULL)
+ return NULL;
+
+ if (priv->cpu_bo == NULL) {
+ priv->cpu_bo = kgem_create_map(kgem,
+ pixmap->devPrivate.ptr,
+ pixmap_size(pixmap),
+ 0);
+ if (priv->cpu_bo)
+ priv->cpu_bo->pitch = pixmap->devKind;
+ }
+
+ return priv->cpu_bo;
+}
+
+Bool sna_accel_pre_init(struct sna *sna);
+Bool sna_accel_init(ScreenPtr sreen, struct sna *sna);
+void sna_accel_block_handler(struct sna *sna);
+void sna_accel_wakeup_handler(struct sna *sna);
+void sna_accel_close(struct sna *sna);
+void sna_accel_free(struct sna *sna);
+
+Bool sna_accel_create(struct sna *sna);
+void sna_composite(CARD8 op,
+ PicturePtr src,
+ PicturePtr mask,
+ PicturePtr dst,
+ INT16 src_x, INT16 src_y,
+ INT16 mask_x, INT16 mask_y,
+ INT16 dst_x, INT16 dst_y,
+ CARD16 width, CARD16 height);
+void sna_composite_rectangles(CARD8 op,
+ PicturePtr dst,
+ xRenderColor *color,
+ int num_rects,
+ xRectangle *rects);
+void sna_composite_trapezoids(CARD8 op,
+ PicturePtr src,
+ PicturePtr dst,
+ PictFormatPtr maskFormat,
+ INT16 xSrc, INT16 ySrc,
+ int ntrap, xTrapezoid *traps);
+
+Bool sna_gradients_create(struct sna *sna);
+void sna_gradients_close(struct sna *sna);
+
+Bool sna_glyphs_init(ScreenPtr screen);
+Bool sna_glyphs_create(struct sna *sna);
+void sna_glyphs(CARD8 op,
+ PicturePtr src,
+ PicturePtr dst,
+ PictFormatPtr mask,
+ INT16 xSrc, INT16 ySrc,
+ int nlist,
+ GlyphListPtr list,
+ GlyphPtr *glyphs);
+void sna_glyph_unrealize(ScreenPtr screen, GlyphPtr glyph);
+void sna_glyphs_close(struct sna *sna);
+
+void sna_read_boxes(struct sna *sna,
+ struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+ PixmapPtr dst, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int n);
+void sna_write_boxes(struct sna *sna,
+ struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
+ const BoxRec *box, int n);
+
+struct kgem_bo *sna_replace(struct sna *sna,
+ struct kgem_bo *bo,
+ int width, int height, int bpp,
+ const void *src, int stride);
+
+Bool
+sna_compute_composite_extents(BoxPtr extents,
+ PicturePtr src, PicturePtr mask, PicturePtr dst,
+ INT16 src_x, INT16 src_y,
+ INT16 mask_x, INT16 mask_y,
+ INT16 dst_x, INT16 dst_y,
+ CARD16 width, CARD16 height);
+Bool
+sna_compute_composite_region(RegionPtr region,
+ PicturePtr src, PicturePtr mask, PicturePtr dst,
+ INT16 src_x, INT16 src_y,
+ INT16 mask_x, INT16 mask_y,
+ INT16 dst_x, INT16 dst_y,
+ CARD16 width, CARD16 height);
+
+void
+memcpy_blt(const void *src, void *dst, int bpp,
+ uint16_t src_stride, uint16_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height);
+
+#define SNA_CREATE_FB 0x10
+
+#endif /* _SNA_H */
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
new file mode 100644
index 00000000..bab2adb5
--- /dev/null
+++ b/src/sna/sna_accel.c
@@ -0,0 +1,3306 @@
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+
+#include <X11/fonts/font.h>
+#include <X11/fonts/fontstruct.h>
+
+#include <xaarop.h>
+#include <fb.h>
+#include <dixfontstr.h>
+
+#ifdef RENDER
+#include <mipict.h>
+#include <fbpict.h>
+#endif
+
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#if DEBUG_ACCEL
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+DevPrivateKeyRec sna_pixmap_index;
+
+#define PM_IS_SOLID(_draw, _pm) \
+ (((_pm) & FbFullMask((_draw)->depth)) == FbFullMask((_draw)->depth))
+
+#if DEBUG_ACCEL
+static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function)
+{
+ if (box->x1 < 0 || box->y1 < 0 ||
+ box->x2 > pixmap->drawable.width ||
+ box->y2 > pixmap->drawable.height)
+ {
+ ErrorF("%s: damage box is beyond the pixmap: box=(%d, %d), (%d, %d), pixmap=(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1, box->x2, box->y2,
+ pixmap->drawable.width,
+ pixmap->drawable.height);
+ assert(0);
+ }
+}
+#define assert_pixmap_contains_box(p, b) _assert_pixmap_contains_box(p, b, __FUNCTION__)
+#else
+#define assert_pixmap_contains_box(p, b)
+#endif
+
+static void sna_pixmap_destroy_gpu_bo(struct sna *sna, struct sna_pixmap *priv)
+{
+ kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
+ priv->gpu_bo = NULL;
+}
+
+static Bool sna_destroy_private(PixmapPtr pixmap, struct sna_pixmap *priv)
+{
+ struct sna *sna = to_sna_from_drawable(&pixmap->drawable);
+
+ list_del(&priv->list);
+
+ sna_damage_destroy(&priv->gpu_damage);
+ sna_damage_destroy(&priv->cpu_damage);
+
+ if (priv->mapped)
+ munmap(pixmap->devPrivate.ptr, priv->gpu_bo->size);
+
+ /* Always release the gpu bo back to the lower levels of caching */
+ if (priv->gpu_bo)
+ kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
+
+ if (priv->cpu_bo) {
+ if (pixmap->usage_hint != CREATE_PIXMAP_USAGE_SCRATCH_HEADER &&
+ kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) {
+ list_add_tail(&priv->list, &sna->deferred_free);
+ return false;
+ }
+ kgem_bo_sync(&sna->kgem, priv->cpu_bo, true);
+ kgem_bo_destroy(&sna->kgem, priv->cpu_bo);
+ }
+
+ free(priv);
+ return TRUE;
+}
+
+static uint32_t sna_pixmap_choose_tiling(PixmapPtr pixmap)
+{
+ struct sna *sna = to_sna_from_drawable(&pixmap->drawable);
+ uint32_t tiling, bit;
+
+ /* Use tiling by default, but disable per user request */
+ tiling = I915_TILING_X;
+ bit = pixmap->usage_hint == SNA_CREATE_FB ?
+ SNA_TILING_FB : SNA_TILING_2D;
+ if ((sna->tiling && (1 << bit)) == 0)
+ tiling = I915_TILING_NONE;
+
+ /* Also adjust tiling if it is not supported or likely to
+ * slow us down,
+ */
+ return kgem_choose_tiling(&sna->kgem, tiling,
+ pixmap->drawable.width,
+ pixmap->drawable.height,
+ pixmap->drawable.bitsPerPixel);
+}
+
+struct sna_pixmap *sna_pixmap_attach(PixmapPtr pixmap)
+{
+ struct sna *sna;
+ struct sna_pixmap *priv;
+
+ priv = sna_pixmap(pixmap);
+ if (priv)
+ return priv;
+
+ switch (pixmap->usage_hint) {
+ case CREATE_PIXMAP_USAGE_GLYPH_PICTURE:
+ return NULL;
+ }
+
+ sna = to_sna_from_drawable(&pixmap->drawable);
+ if (!kgem_can_create_2d(&sna->kgem,
+ pixmap->drawable.width,
+ pixmap->drawable.height,
+ pixmap->drawable.bitsPerPixel,
+ sna_pixmap_choose_tiling(pixmap)))
+ return NULL;
+
+ priv = calloc(1, sizeof(*priv));
+ if (!priv)
+ return NULL;
+
+ list_init(&priv->list);
+ priv->pixmap = pixmap;
+
+ sna_set_pixmap(pixmap, priv);
+ return priv;
+}
+
+static PixmapPtr
+sna_pixmap_create_scratch(ScreenPtr screen,
+ int width, int height, int depth,
+ uint32_t tiling)
+{
+ struct sna *sna = to_sna_from_screen(screen);
+ PixmapPtr pixmap;
+ struct sna_pixmap *priv;
+ int bpp = BitsPerPixel(depth);
+
+ DBG(("%s(%d, %d, %d, tiling=%d)\n", __FUNCTION__,
+ width, height, depth, tiling));
+
+ if (tiling == I915_TILING_Y && !sna->have_render)
+ tiling = I915_TILING_X;
+
+ tiling = kgem_choose_tiling(&sna->kgem, tiling, width, height, bpp);
+ if (!kgem_can_create_2d(&sna->kgem, width, height, bpp, tiling))
+ return fbCreatePixmap(screen, width, height, depth,
+ CREATE_PIXMAP_USAGE_SCRATCH);
+
+ /* you promise never to access this via the cpu... */
+ pixmap = fbCreatePixmap(screen, 0, 0, depth,
+ CREATE_PIXMAP_USAGE_SCRATCH);
+ if (!pixmap)
+ return NullPixmap;
+
+ priv = malloc(sizeof(*priv));
+ if (!priv) {
+ fbDestroyPixmap(pixmap);
+ return NullPixmap;
+ }
+
+ priv->gpu_bo = kgem_create_2d(&sna->kgem,
+ width, height, bpp, tiling,
+ 0);
+ if (priv->gpu_bo == NULL) {
+ free(priv);
+ fbDestroyPixmap(pixmap);
+ return NullPixmap;
+ }
+
+ priv->source_count = 0;
+ priv->cpu_bo = NULL;
+ priv->cpu_damage = priv->gpu_damage = NULL;
+ priv->gpu_only = 1;
+ priv->pinned = 0;
+ priv->mapped = 0;
+ list_init(&priv->list);
+
+ priv->pixmap = pixmap;
+ sna_set_pixmap(pixmap, priv);
+
+ miModifyPixmapHeader(pixmap,
+ width, height, depth, bpp,
+ priv->gpu_bo->pitch, NULL);
+ pixmap->devPrivate.ptr = NULL;
+
+ return pixmap;
+}
+
+
+static PixmapPtr sna_create_pixmap(ScreenPtr screen,
+ int width, int height, int depth,
+ unsigned int usage)
+{
+ PixmapPtr pixmap;
+
+ DBG(("%s(%d, %d, %d, usage=%x)\n", __FUNCTION__,
+ width, height, depth, usage));
+
+ if (usage == CREATE_PIXMAP_USAGE_SCRATCH &&
+ to_sna_from_screen(screen)->have_render)
+ return sna_pixmap_create_scratch(screen,
+ width, height, depth,
+ I915_TILING_Y);
+
+ /* XXX could use last deferred free? */
+
+ pixmap = fbCreatePixmap(screen, width, height, depth, usage);
+ if (pixmap == NullPixmap)
+ return NullPixmap;
+
+/* XXX if (pixmap->drawable.devKind * height > 128) */
+ sna_pixmap_attach(pixmap);
+ return pixmap;
+}
+
+static Bool sna_destroy_pixmap(PixmapPtr pixmap)
+{
+ if (pixmap->refcnt == 1) {
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ if (priv) {
+ if (!sna_destroy_private(pixmap, priv))
+ return TRUE;
+ }
+ }
+
+ return fbDestroyPixmap(pixmap);
+}
+
+static void sna_pixmap_map_to_cpu(struct sna *sna,
+ PixmapPtr pixmap,
+ struct sna_pixmap *priv)
+{
+ DBG(("%s: AWOOGA, AWOOGA!\n", __FUNCTION__));
+
+ if (priv->mapped == 0) {
+ ScreenPtr screen = pixmap->drawable.pScreen;
+ void *ptr;
+
+ ptr = kgem_bo_map(&sna->kgem,
+ priv->gpu_bo,
+ PROT_READ | PROT_WRITE);
+ assert(ptr != NULL);
+
+ screen->ModifyPixmapHeader(pixmap,
+ pixmap->drawable.width,
+ pixmap->drawable.height,
+ pixmap->drawable.depth,
+ pixmap->drawable.bitsPerPixel,
+ priv->gpu_bo->pitch,
+ ptr);
+ priv->mapped = 1;
+ }
+ kgem_bo_submit(&sna->kgem, priv->gpu_bo);
+}
+
+static inline void list_move(struct list *list, struct list *head)
+{
+ __list_del(list->prev, list->next);
+ list_add(list, head);
+}
+
+void
+sna_pixmap_move_to_cpu(PixmapPtr pixmap, bool write)
+{
+ struct sna *sna = to_sna_from_drawable(&pixmap->drawable);
+ struct sna_pixmap *priv;
+
+ DBG(("%s(pixmap=%p, write=%d)\n", __FUNCTION__, pixmap, write));
+
+ priv = sna_pixmap(pixmap);
+ if (priv == NULL) {
+ DBG(("%s: not attached to %p\n", __FUNCTION__, pixmap));
+ return;
+ }
+
+ DBG(("%s: gpu_bo=%p, gpu_damage=%p, gpu_only=%d\n",
+ __FUNCTION__, priv->gpu_bo, priv->gpu_damage, priv->gpu_only));
+
+ if (priv->gpu_bo == NULL) {
+ DBG(("%s: no GPU bo\n", __FUNCTION__));
+ goto done;
+ }
+
+ if (priv->gpu_only) {
+ sna_pixmap_map_to_cpu(sna, pixmap, priv);
+ goto done;
+ }
+
+ if (priv->gpu_damage) {
+ BoxPtr box;
+ int n;
+
+ DBG(("%s: flushing GPU damage\n", __FUNCTION__));
+
+ n = sna_damage_get_boxes(priv->gpu_damage, &box);
+ if (n) {
+ struct kgem_bo *dst_bo;
+ Bool ok = FALSE;
+
+ dst_bo = NULL;
+ if (sna->kgem.gen >= 30)
+ dst_bo = pixmap_vmap(&sna->kgem, pixmap);
+ if (dst_bo)
+ ok = sna->render.copy_boxes(sna, GXcopy,
+ pixmap, priv->gpu_bo, 0, 0,
+ pixmap, dst_bo, 0, 0,
+ box, n);
+ if (!ok)
+ sna_read_boxes(sna,
+ priv->gpu_bo, 0, 0,
+ pixmap, 0, 0,
+ box, n);
+ }
+
+ __sna_damage_destroy(priv->gpu_damage);
+ priv->gpu_damage = NULL;
+ }
+
+done:
+ if (priv->cpu_bo) {
+ DBG(("%s: syncing CPU bo\n", __FUNCTION__));
+ kgem_bo_sync(&sna->kgem, priv->cpu_bo, write);
+ }
+
+ if (write) {
+ DBG(("%s: marking as damaged\n", __FUNCTION__));
+ sna_damage_all(&priv->cpu_damage,
+ pixmap->drawable.width,
+ pixmap->drawable.height);
+
+ if (priv->gpu_bo && !priv->pinned)
+ sna_pixmap_destroy_gpu_bo(sna, priv);
+
+ if (priv->flush)
+ list_move(&priv->list, &sna->dirty_pixmaps);
+ }
+}
+
+static Bool
+region_subsumes_drawable(RegionPtr region, DrawablePtr drawable)
+{
+ const BoxRec *extents;
+
+ if (REGION_NUM_RECTS(region) != 1)
+ return false;
+
+ extents = RegionExtents(region);
+ return extents->x1 <= 0 && extents->y1 <= 0 &&
+ extents->x2 >= drawable->width &&
+ extents->y2 >= drawable->height;
+}
+
+void
+sna_drawable_move_region_to_cpu(DrawablePtr drawable,
+ RegionPtr region,
+ Bool write)
+{
+ struct sna *sna = to_sna_from_drawable(drawable);
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ struct sna_pixmap *priv;
+ int16_t dx, dy;
+
+ DBG(("%s(pixmap=%p, [(%d, %d), (%d, %d)], write=%d)\n",
+ __FUNCTION__, pixmap,
+ RegionExtents(region)->x1, RegionExtents(region)->y1,
+ RegionExtents(region)->x2, RegionExtents(region)->y2,
+ write));
+
+ priv = sna_pixmap(pixmap);
+ if (priv == NULL) {
+ DBG(("%s: not attached to %p\n", __FUNCTION__, pixmap));
+ return;
+ }
+
+ if (priv->gpu_only) {
+ DBG(("%s: gpu only\n", __FUNCTION__));
+ return sna_pixmap_map_to_cpu(sna, pixmap, priv);
+ }
+
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+ DBG(("%s: delta=(%d, %d)\n", __FUNCTION__, dx, dy));
+ if (dx | dy)
+ RegionTranslate(region, dx, dy);
+
+ if (region_subsumes_drawable(region, &pixmap->drawable)) {
+ DBG(("%s: region subsumes drawable\n", __FUNCTION__));
+ if (dx | dy)
+ RegionTranslate(region, -dx, -dy);
+ return sna_pixmap_move_to_cpu(pixmap, write);
+ }
+
+#if 0
+ pixman_region_intersect_rect(region, region,
+ 0, 0,
+ pixmap->drawable.width,
+ pixmap->drawable.height);
+#endif
+
+ if (priv->gpu_bo == NULL)
+ goto done;
+
+ if (sna_damage_contains_box(priv->gpu_damage,
+ REGION_EXTENTS(NULL, region))) {
+ RegionRec want, need, *r;
+
+ DBG(("%s: region intersects gpu damage\n", __FUNCTION__));
+
+ r = region;
+ /* expand the region to move 32x32 pixel blocks at a time */
+ if (priv->cpu_damage == NULL) {
+ int n = REGION_NUM_RECTS(region), i;
+ BoxPtr boxes = REGION_RECTS(region);
+ BoxPtr blocks = malloc(sizeof(BoxRec) * REGION_NUM_RECTS(region));
+ if (blocks) {
+ for (i = 0; i < n; i++) {
+ blocks[i].x1 = boxes[i].x1 & ~31;
+ if (blocks[i].x1 < 0)
+ blocks[i].x1 = 0;
+
+ blocks[i].x2 = (boxes[i].x2 + 31) & ~31;
+ if (blocks[i].x2 > pixmap->drawable.width)
+ blocks[i].x2 = pixmap->drawable.width;
+
+ blocks[i].y1 = boxes[i].y1 & ~31;
+ if (blocks[i].y1 < 0)
+ blocks[i].y1 = 0;
+
+ blocks[i].y2 = (boxes[i].y2 + 31) & ~31;
+ if (blocks[i].y2 > pixmap->drawable.height)
+ blocks[i].y2 = pixmap->drawable.height;
+ }
+ if (pixman_region_init_rects(&want, blocks, i))
+ r = &want;
+ free(blocks);
+ }
+ }
+
+ pixman_region_init(&need);
+ if (sna_damage_intersect(priv->gpu_damage, r, &need)) {
+ BoxPtr box = REGION_RECTS(&need);
+ int n = REGION_NUM_RECTS(&need);
+ struct kgem_bo *dst_bo;
+ Bool ok = FALSE;
+
+ dst_bo = NULL;
+ if (sna->kgem.gen >= 30)
+ dst_bo = pixmap_vmap(&sna->kgem, pixmap);
+ if (dst_bo)
+ ok = sna->render.copy_boxes(sna, GXcopy,
+ pixmap, priv->gpu_bo, 0, 0,
+ pixmap, dst_bo, 0, 0,
+ box, n);
+ if (!ok)
+ sna_read_boxes(sna,
+ priv->gpu_bo, 0, 0,
+ pixmap, 0, 0,
+ box, n);
+
+ sna_damage_subtract(&priv->gpu_damage,
+ n <= REGION_NUM_RECTS(r) ? &need : r);
+ RegionUninit(&need);
+ }
+ if (r == &want)
+ pixman_region_fini(&want);
+ }
+
+done:
+ if (priv->cpu_bo) {
+ DBG(("%s: syncing cpu bo\n", __FUNCTION__));
+ kgem_bo_sync(&sna->kgem, priv->cpu_bo, write);
+ }
+
+ if (write) {
+ DBG(("%s: applying cpu damage\n", __FUNCTION__));
+ assert_pixmap_contains_box(pixmap, RegionExtents(region));
+ sna_damage_add(&priv->cpu_damage, region);
+ if (priv->flush)
+ list_move(&priv->list, &sna->dirty_pixmaps);
+ }
+
+ if (dx | dy)
+ RegionTranslate(region, -dx, -dy);
+}
+
+static inline Bool
+_sna_drawable_use_gpu_bo(DrawablePtr drawable, const BoxPtr box)
+{
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ BoxRec extents;
+ int16_t dx, dy;
+
+ if (priv == NULL)
+ return FALSE;
+ if (priv->gpu_bo == NULL)
+ return FALSE;
+
+ if (priv->cpu_damage == NULL)
+ return TRUE;
+
+ assert(!priv->gpu_only);
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+
+ extents = *box;
+ extents.x1 += dx;
+ extents.x2 += dx;
+ extents.y1 += dy;
+ extents.y2 += dy;
+
+ return sna_damage_contains_box(priv->cpu_damage,
+ &extents) == PIXMAN_REGION_OUT;
+}
+
+static inline Bool
+sna_drawable_use_gpu_bo(DrawablePtr drawable, const BoxPtr box)
+{
+ Bool ret = _sna_drawable_use_gpu_bo(drawable, box);
+ DBG(("%s((%d, %d), (%d, %d)) = %d\n", __FUNCTION__,
+ box->x1, box->y1, box->x2, box->y2, ret));
+ return ret;
+}
+
+static inline Bool
+_sna_drawable_use_cpu_bo(DrawablePtr drawable, const BoxPtr box)
+{
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ BoxRec extents;
+ int16_t dx, dy;
+
+ if (priv == NULL)
+ return FALSE;
+ if (priv->cpu_bo == NULL)
+ return FALSE;
+
+ if (priv->gpu_damage == NULL)
+ return TRUE;
+
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+
+ extents = *box;
+ extents.x1 += dx;
+ extents.x2 += dx;
+ extents.y1 += dy;
+ extents.y2 += dy;
+
+ return sna_damage_contains_box(priv->gpu_damage,
+ &extents) == PIXMAN_REGION_OUT;
+}
+
+static inline Bool
+sna_drawable_use_cpu_bo(DrawablePtr drawable, const BoxPtr box)
+{
+ Bool ret = _sna_drawable_use_cpu_bo(drawable, box);
+ DBG(("%s((%d, %d), (%d, %d)) = %d\n", __FUNCTION__,
+ box->x1, box->y1, box->x2, box->y2, ret));
+ return ret;
+}
+
+PixmapPtr
+sna_pixmap_create_upload(ScreenPtr screen,
+ int width, int height, int depth)
+{
+ struct sna *sna = to_sna_from_screen(screen);
+ PixmapPtr pixmap;
+ struct sna_pixmap *priv;
+ int bpp = BitsPerPixel(depth);
+ int pad = ALIGN(width * bpp / 8, 4);
+ void *ptr;
+
+ DBG(("%s(%d, %d, %d)\n", __FUNCTION__, width, height, depth));
+ assert(width);
+ assert(height);
+ if (!sna->have_render ||
+ !kgem_can_create_2d(&sna->kgem,
+ width, height, bpp,
+ I915_TILING_NONE))
+ return fbCreatePixmap(screen, width, height, depth,
+ CREATE_PIXMAP_USAGE_SCRATCH);
+
+ pixmap = fbCreatePixmap(screen, 0, 0, depth,
+ CREATE_PIXMAP_USAGE_SCRATCH);
+ if (!pixmap)
+ return NullPixmap;
+
+ priv = malloc(sizeof(*priv));
+ if (!priv) {
+ fbDestroyPixmap(pixmap);
+ return NullPixmap;
+ }
+
+ priv->gpu_bo = kgem_create_buffer(&sna->kgem, pad*height, true, &ptr);
+ if (!priv->gpu_bo) {
+ free(priv);
+ fbDestroyPixmap(pixmap);
+ return NullPixmap;
+ }
+
+ priv->gpu_bo->pitch = pad;
+
+ priv->source_count = SOURCE_BIAS;
+ priv->cpu_bo = NULL;
+ priv->cpu_damage = priv->gpu_damage = NULL;
+ priv->gpu_only = 0;
+ priv->pinned = 0;
+ priv->mapped = 0;
+ list_init(&priv->list);
+
+ priv->pixmap = pixmap;
+ sna_set_pixmap(pixmap, priv);
+
+ miModifyPixmapHeader(pixmap, width, height, depth, bpp, pad, ptr);
+ return pixmap;
+}
+
+struct sna_pixmap *
+sna_pixmap_force_to_gpu(PixmapPtr pixmap)
+{
+ struct sna_pixmap *priv;
+
+ DBG(("%s(pixmap=%p)\n", __FUNCTION__, pixmap));
+
+ priv = sna_pixmap(pixmap);
+ if (priv == NULL) {
+ priv = sna_pixmap_attach(pixmap);
+ if (priv == NULL)
+ return NULL;
+
+ DBG(("%s: created priv and marking all cpu damaged\n",
+ __FUNCTION__));
+ sna_damage_all(&priv->cpu_damage,
+ pixmap->drawable.width,
+ pixmap->drawable.height);
+ }
+
+ if (priv->gpu_bo == NULL) {
+ struct sna *sna = to_sna_from_drawable(&pixmap->drawable);
+
+ priv->gpu_bo = kgem_create_2d(&sna->kgem,
+ pixmap->drawable.width,
+ pixmap->drawable.height,
+ pixmap->drawable.bitsPerPixel,
+ sna_pixmap_choose_tiling(pixmap),
+ 0);
+ if (priv->gpu_bo == NULL)
+ return NULL;
+
+ DBG(("%s: created gpu bo\n", __FUNCTION__));
+ }
+
+ if (!sna_pixmap_move_to_gpu(pixmap))
+ return NULL;
+
+ return priv;
+}
+
+struct sna_pixmap *
+sna_pixmap_move_to_gpu(PixmapPtr pixmap)
+{
+ struct sna *sna = to_sna_from_drawable(&pixmap->drawable);
+ struct sna_pixmap *priv;
+ BoxPtr box;
+ int n;
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ priv = sna_pixmap(pixmap);
+ if (priv == NULL)
+ return NULL;
+
+ sna_damage_reduce(&priv->cpu_damage);
+ DBG(("%s: CPU damage? %d\n", __FUNCTION__, priv->cpu_damage != NULL));
+
+ if (priv->gpu_bo == NULL) {
+ if (!sna->kgem.wedged)
+ priv->gpu_bo =
+ kgem_create_2d(&sna->kgem,
+ pixmap->drawable.width,
+ pixmap->drawable.height,
+ pixmap->drawable.bitsPerPixel,
+ sna_pixmap_choose_tiling(pixmap),
+ priv->cpu_damage ? CREATE_INACTIVE : 0);
+ if (priv->gpu_bo == NULL) {
+ assert(list_is_empty(&priv->list));
+ return NULL;
+ }
+ }
+
+ if (priv->cpu_damage == NULL)
+ goto done;
+
+ n = sna_damage_get_boxes(priv->cpu_damage, &box);
+ if (n) {
+ struct kgem_bo *src_bo;
+ Bool ok = FALSE;
+
+ src_bo = pixmap_vmap(&sna->kgem, pixmap);
+ if (src_bo)
+ ok = sna->render.copy_boxes(sna, GXcopy,
+ pixmap, src_bo, 0, 0,
+ pixmap, priv->gpu_bo, 0, 0,
+ box, n);
+ if (!ok) {
+ if (n == 1 && !priv->pinned &&
+ box->x1 <= 0 && box->y1 <= 0 &&
+ box->x2 >= pixmap->drawable.width &&
+ box->y2 >= pixmap->drawable.height) {
+ priv->gpu_bo =
+ sna_replace(sna,
+ priv->gpu_bo,
+ pixmap->drawable.width,
+ pixmap->drawable.height,
+ pixmap->drawable.bitsPerPixel,
+ pixmap->devPrivate.ptr,
+ pixmap->devKind);
+ } else {
+ sna_write_boxes(sna,
+ priv->gpu_bo, 0, 0,
+ pixmap->devPrivate.ptr,
+ pixmap->devKind,
+ pixmap->drawable.bitsPerPixel,
+ 0, 0,
+ box, n);
+ }
+ }
+ }
+
+ __sna_damage_destroy(priv->cpu_damage);
+ priv->cpu_damage = NULL;
+
+done:
+ list_del(&priv->list);
+ return priv;
+}
+
+static void sna_gc_move_to_cpu(GCPtr gc)
+{
+ DBG(("%s\n", __FUNCTION__));
+
+ if (gc->stipple)
+ sna_drawable_move_to_cpu(&gc->stipple->drawable, false);
+
+ if (gc->fillStyle == FillTiled)
+ sna_drawable_move_to_cpu(&gc->tile.pixmap->drawable, false);
+}
+
+static Bool
+sna_put_image_upload_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
+ int x, int y, int w, int h, char *bits, int stride)
+{
+ struct sna *sna = to_sna_from_drawable(drawable);
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ struct kgem_bo *src_bo;
+ Bool ok = FALSE;
+ BoxPtr box;
+ int nbox;
+ int16_t dx, dy;
+
+ box = REGION_RECTS(region);
+ nbox = REGION_NUM_RECTS(region);
+
+ DBG(("%s: %d x [(%d, %d), (%d, %d)...]\n",
+ __FUNCTION__, nbox,
+ box->x1, box->y1, box->x2, box->y2));
+
+ if (!priv->pinned && nbox == 1 &&
+ box->x1 <= 0 && box->y1 <= 0 &&
+ box->x2 >= pixmap->drawable.width &&
+ box->y2 >= pixmap->drawable.height) {
+ priv->gpu_bo =
+ sna_replace(sna, priv->gpu_bo,
+ pixmap->drawable.width,
+ pixmap->drawable.height,
+ pixmap->drawable.bitsPerPixel,
+ bits, stride);
+ return TRUE;
+ }
+
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+ x += dx + drawable->x;
+ y += dy + drawable->y;
+
+ src_bo = kgem_create_map(&sna->kgem, bits, stride*h, 1);
+ if (src_bo) {
+ src_bo->pitch = stride;
+ ok = sna->render.copy_boxes(sna, gc->alu,
+ pixmap, src_bo, -x, -y,
+ pixmap, priv->gpu_bo, 0, 0,
+ box, nbox);
+ kgem_bo_sync(&sna->kgem, src_bo, true);
+ kgem_bo_destroy(&sna->kgem, src_bo);
+ }
+
+ if (!ok && gc->alu == GXcopy) {
+ sna_write_boxes(sna,
+ priv->gpu_bo, 0, 0,
+ bits,
+ stride,
+ pixmap->drawable.bitsPerPixel,
+ -x, -y,
+ box, nbox);
+ ok = TRUE;
+ }
+
+ return ok;
+}
+
+static Bool
+sna_put_image_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
+ int x, int y, int w, int h, char *bits, int stride)
+{
+ struct sna *sna = to_sna_from_drawable(drawable);
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ int16_t dx, dy;
+
+ if (!priv->gpu_bo)
+ return false;
+
+ if (priv->gpu_only)
+ return sna_put_image_upload_blt(drawable, gc, region,
+ x, y, w, h, bits, stride);
+
+ if (gc->alu != GXcopy)
+ return false;
+
+ if (priv->cpu_bo)
+ kgem_bo_sync(&sna->kgem, priv->cpu_bo, true);
+
+ if (region_subsumes_drawable(region, &pixmap->drawable)) {
+ sna_damage_destroy(&priv->gpu_damage);
+ sna_damage_all(&priv->cpu_damage,
+ pixmap->drawable.width,
+ pixmap->drawable.height);
+ if (priv->gpu_bo && !priv->pinned)
+ sna_pixmap_destroy_gpu_bo(sna, priv);
+ } else {
+ assert_pixmap_contains_box(pixmap, RegionExtents(region));
+ sna_damage_subtract(&priv->gpu_damage, region);
+ sna_damage_add(&priv->cpu_damage, region);
+ }
+
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+ dx += drawable->x;
+ dy += drawable->y;
+
+ DBG(("%s: fbPutZImage(%d[+%d], %d[+%d], %d, %d)\n",
+ __FUNCTION__,
+ x+dx, pixmap->drawable.x,
+ y+dy, pixmap->drawable.y,
+ w, h));
+ fbPutZImage(&pixmap->drawable, region,
+ GXcopy, ~0U,
+ x + dx, y + dy, w, h,
+ (FbStip*)bits, stride/sizeof(FbStip));
+ return true;
+}
+
+static void
+sna_put_image(DrawablePtr drawable, GCPtr gc, int depth,
+ int x, int y, int w, int h, int left, int format,
+ char *bits)
+{
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ RegionRec region, *clip;
+ BoxRec box;
+ int16_t dx, dy;
+
+ DBG(("%s((%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
+
+ if (w == 0 || h == 0)
+ return;
+
+ if (priv == NULL) {
+ fbPutImage(drawable, gc, depth, x, y, w, h, left, format, bits);
+ return;
+ }
+
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+
+ box.x1 = x + drawable->x + dx;
+ box.y1 = y + drawable->y + dy;
+ box.x2 = box.x1 + w;
+ box.y2 = box.y1 + h;
+
+ if (box.x1 < 0)
+ box.x1 = 0;
+ if (box.y1 < 0)
+ box.y1 = 0;
+ if (box.x2 > pixmap->drawable.width)
+ box.x2 = pixmap->drawable.width;
+ if (box.y2 > pixmap->drawable.height)
+ box.y2 = pixmap->drawable.height;
+
+ RegionInit(&region, &box, 1);
+
+ clip = fbGetCompositeClip(gc);
+ if (clip) {
+ RegionTranslate(clip, dx, dy);
+ RegionIntersect(&region, &region, clip);
+ RegionTranslate(clip, -dx, -dy);
+ }
+
+ if (format != ZPixmap ||
+ !PM_IS_SOLID(drawable, gc->planemask) ||
+ !sna_put_image_blt(drawable, gc, &region,
+ x, y, w, h,
+ bits, PixmapBytePad(w, depth))) {
+ RegionTranslate(&region, -dx, -dy);
+
+ sna_drawable_move_region_to_cpu(drawable, &region, true);
+ DBG(("%s: fbPutImage(%d, %d, %d, %d)\n",
+ __FUNCTION__, x, y, w, h));
+ fbPutImage(drawable, gc, depth, x, y, w, h, left, format, bits);
+ }
+
+ RegionUninit(&region);
+}
+
+static void
+sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
+ BoxPtr box, int n,
+ int dx, int dy,
+ Bool reverse, Bool upsidedown, Pixel bitplane,
+ void *closure)
+{
+ struct sna *sna = to_sna_from_drawable(dst);
+ PixmapPtr src_pixmap = get_drawable_pixmap(src);
+ PixmapPtr dst_pixmap = get_drawable_pixmap(dst);
+ struct sna_pixmap *src_priv = sna_pixmap(src_pixmap);
+ struct sna_pixmap *dst_priv = sna_pixmap(dst_pixmap);
+ int alu = gc ? gc->alu : GXcopy;
+ int16_t src_dx, src_dy;
+ int16_t dst_dx, dst_dy;
+ int stride, bpp;
+ char *bits;
+ RegionRec region;
+ Bool replaces;
+
+ if (n == 0)
+ return;
+
+ DBG(("%s (boxes=%dx[(%d, %d), (%d, %d)...], src=+(%d, %d), alu=%d, src.size=%dx%d, dst.size=%dx%d)\n",
+ __FUNCTION__, n,
+ box[0].x1, box[0].y1, box[0].x2, box[0].y2,
+ dx, dy, alu,
+ src_pixmap->drawable.width, src_pixmap->drawable.height,
+ dst_pixmap->drawable.width, dst_pixmap->drawable.height));
+
+ pixman_region_init_rects(&region, box, n);
+
+ bpp = dst_pixmap->drawable.bitsPerPixel;
+
+ get_drawable_deltas(dst, dst_pixmap, &dst_dx, &dst_dy);
+ get_drawable_deltas(src, src_pixmap, &src_dx, &src_dy);
+ src_dx += dx;
+ src_dy += dy;
+
+ replaces = alu == GXcopy && n == 1 &&
+ box->x1 + dst_dx <= 0 &&
+ box->y1 + dst_dy <= 0 &&
+ box->x2 + dst_dx >= dst_pixmap->drawable.width &&
+ box->y2 + dst_dy >= dst_pixmap->drawable.height;
+
+ DBG(("%s: dst=(priv=%p, gpu_bo=%p, cpu_bo=%p), src=(priv=%p, gpu_bo=%p, cpu_bo=%p), replaces=%d\n",
+ __FUNCTION__,
+ dst_priv,
+ dst_priv ? dst_priv->gpu_bo : NULL,
+ dst_priv ? dst_priv->cpu_bo : NULL,
+ src_priv,
+ src_priv ? src_priv->gpu_bo : NULL,
+ src_priv ? src_priv->cpu_bo : NULL,
+ replaces));
+
+ /* Try to maintain the data on the GPU */
+ if (dst_priv && dst_priv->gpu_bo == NULL &&
+ src_priv && src_priv->gpu_bo != NULL &&
+ alu == GXcopy) {
+ uint32_t tiling =
+ sna_pixmap_choose_tiling(dst_pixmap);
+
+ DBG(("%s: create dst GPU bo for copy\n", __FUNCTION__));
+
+ if (!sna->kgem.wedged &&
+ kgem_can_create_2d(&sna->kgem,
+ dst_pixmap->drawable.width,
+ dst_pixmap->drawable.height,
+ dst_pixmap->drawable.bitsPerPixel,
+ tiling))
+ dst_priv->gpu_bo =
+ kgem_create_2d(&sna->kgem,
+ dst_pixmap->drawable.width,
+ dst_pixmap->drawable.height,
+ dst_pixmap->drawable.bitsPerPixel,
+ tiling, 0);
+ }
+
+ if (dst_priv && dst_priv->gpu_bo) {
+ if (!src_priv && !dst_priv->gpu_only) {
+ DBG(("%s: fallback - src_priv=%p but dst gpu_only=%d\n",
+ __FUNCTION__,
+ src_priv, dst_priv->gpu_only));
+ goto fallback;
+ }
+
+ if (alu != GXcopy && !sna_pixmap_move_to_gpu(dst_pixmap)) {
+ DBG(("%s: fallback - not a pure copy and failed to move dst to GPU\n",
+ __FUNCTION__));
+ goto fallback;
+ }
+
+ if (src_priv && src_priv->gpu_bo &&
+ sna_pixmap_move_to_gpu(src_pixmap)) {
+ if (!sna->render.copy_boxes(sna, alu,
+ src_pixmap, src_priv->gpu_bo, src_dx, src_dy,
+ dst_pixmap, dst_priv->gpu_bo, dst_dx, dst_dy,
+ box, n)) {
+ DBG(("%s: fallback - accelerated copy boxes failed\n",
+ __FUNCTION__));
+ goto fallback;
+ }
+
+ if (replaces) {
+ sna_damage_destroy(&dst_priv->cpu_damage);
+ sna_damage_all(&dst_priv->gpu_damage,
+ dst_pixmap->drawable.width,
+ dst_pixmap->drawable.height);
+ } else {
+ RegionTranslate(&region, dst_dx, dst_dy);
+ assert_pixmap_contains_box(dst_pixmap,
+ RegionExtents(&region));
+ sna_damage_add(&dst_priv->gpu_damage, &region);
+ if (alu == GXcopy)
+ sna_damage_subtract(&dst_priv->cpu_damage,
+ &region);
+ RegionTranslate(&region, -dst_dx, -dst_dy);
+ }
+ } else {
+ if (alu != GXcopy) {
+ DBG(("%s: fallback - not a copy and source is on the CPU\n",
+ __FUNCTION__));
+ goto fallback;
+ }
+
+ if (src_priv) {
+ RegionTranslate(&region, src_dx, src_dy);
+ sna_drawable_move_region_to_cpu(&src_pixmap->drawable,
+ &region, false);
+ RegionTranslate(&region, -src_dx, -src_dy);
+ }
+
+ if (!dst_priv->pinned && replaces) {
+ stride = src_pixmap->devKind;
+ bits = src_pixmap->devPrivate.ptr;
+ bits += src_dy * stride + src_dx * bpp / 8;
+
+ dst_priv->gpu_bo =
+ sna_replace(sna,
+ dst_priv->gpu_bo,
+ dst_pixmap->drawable.width,
+ dst_pixmap->drawable.height,
+ bpp, bits, stride);
+
+ sna_damage_destroy(&dst_priv->cpu_damage);
+ sna_damage_all(&dst_priv->gpu_damage,
+ dst_pixmap->drawable.width,
+ dst_pixmap->drawable.height);
+ } else {
+ DBG(("%s: dst is on the GPU, src is on the CPU, uploading\n",
+ __FUNCTION__));
+ sna_write_boxes(sna,
+ dst_priv->gpu_bo, dst_dx, dst_dy,
+ src_pixmap->devPrivate.ptr,
+ src_pixmap->devKind,
+ src_pixmap->drawable.bitsPerPixel,
+ src_dx, src_dy,
+ box, n);
+
+ RegionTranslate(&region, dst_dx, dst_dy);
+ assert_pixmap_contains_box(dst_pixmap,
+ RegionExtents(&region));
+ sna_damage_add(&dst_priv->gpu_damage,
+ &region);
+ sna_damage_subtract(&dst_priv->cpu_damage,
+ &region);
+ RegionTranslate(&region, -dst_dx, -dst_dy);
+ }
+ }
+ } else {
+ FbBits *dst_bits, *src_bits;
+ int dst_stride, src_stride;
+
+fallback:
+ DBG(("%s: fallback -- src=(%d, %d), dst=(%d, %d)\n",
+ __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy));
+ if (src_priv) {
+ RegionTranslate(&region, src_dx, src_dy);
+ sna_drawable_move_region_to_cpu(&src_pixmap->drawable,
+ &region, false);
+ RegionTranslate(&region, -src_dx, -src_dy);
+ }
+
+ RegionTranslate(&region, dst_dx, dst_dy);
+ if (dst_priv) {
+ if (alu == GXcopy) {
+ if (replaces) {
+ sna_damage_destroy(&dst_priv->gpu_damage);
+ sna_damage_all(&dst_priv->cpu_damage,
+ dst_pixmap->drawable.width,
+ dst_pixmap->drawable.height);
+ if (dst_priv->gpu_bo && !dst_priv->pinned)
+ sna_pixmap_destroy_gpu_bo(sna, dst_priv);
+ } else {
+ assert_pixmap_contains_box(dst_pixmap,
+ RegionExtents(&region));
+ sna_damage_subtract(&dst_priv->gpu_damage,
+ &region);
+ sna_damage_add(&dst_priv->cpu_damage,
+ &region);
+ }
+ } else
+ sna_drawable_move_region_to_cpu(&dst_pixmap->drawable,
+ &region, true);
+ }
+
+ dst_stride = dst_pixmap->devKind;
+ src_stride = src_pixmap->devKind;
+
+ dst_bits = (FbBits *)
+ ((char *)dst_pixmap->devPrivate.ptr +
+ dst_dy * dst_stride + dst_dx * bpp / 8);
+ src_bits = (FbBits *)
+ ((char *)src_pixmap->devPrivate.ptr +
+ src_dy * src_stride + src_dx * bpp / 8);
+
+ if (alu == GXcopy && !reverse && !upsidedown && bpp >= 8) {
+ do {
+ DBG(("%s: memcpy_blt(box=(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d), pitches=(%d, %d))\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1,
+ src_dx, src_dy,
+ dst_dx, dst_dy,
+ src_stride, dst_stride));
+ memcpy_blt(src_bits, dst_bits, bpp,
+ src_stride, dst_stride,
+ box->x1, box->y1,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1);
+ box++;
+ } while (--n);
+ } else {
+ dst_stride /= sizeof(FbBits);
+ src_stride /= sizeof(FbBits);
+ do {
+ DBG(("%s: fbBlt (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2, box->y2));
+ fbBlt(src_bits + box->y1 * src_stride,
+ src_stride,
+ box->x1 * bpp,
+
+ dst_bits + box->y1 * dst_stride,
+ dst_stride,
+ box->x1 * bpp,
+
+ (box->x2 - box->x1) * bpp,
+ (box->y2 - box->y1),
+
+ alu, -1, bpp,
+ reverse, upsidedown);
+ box++;
+ }while (--n);
+ }
+ }
+ RegionUninit(&region);
+}
+
+static RegionPtr
+sna_copy_area(DrawablePtr src, DrawablePtr dst, GCPtr gc,
+ int src_x, int src_y,
+ int width, int height,
+ int dst_x, int dst_y)
+{
+ struct sna *sna = to_sna_from_drawable(dst);
+
+ DBG(("%s: src=(%d, %d)x(%d, %d) -> dst=(%d, %d)\n",
+ __FUNCTION__, src_x, src_y, width, height, dst_x, dst_y));
+
+ if (sna->kgem.wedged ||
+ src->bitsPerPixel != dst->bitsPerPixel ||
+ !PM_IS_SOLID(dst, gc->planemask)) {
+ BoxRec box;
+ RegionRec region;
+
+ box.x1 = dst_x + dst->x;
+ box.y1 = dst_y + dst->y;
+ box.x2 = box.x1 + width;
+ box.y2 = box.y1 + height;
+ RegionInit(&region, &box, 1);
+
+ if (gc->pCompositeClip)
+ RegionIntersect(&region, &region, gc->pCompositeClip);
+
+ sna_drawable_move_region_to_cpu(dst, &region, true);
+ RegionTranslate(&region,
+ src_x - dst_x - dst->x + src->x,
+ src_y - dst_y - dst->y + src->y);
+ sna_drawable_move_region_to_cpu(src, &region, false);
+
+ return fbCopyArea(src, dst, gc,
+ src_x, src_y,
+ width, height,
+ dst_x, dst_y);
+ }
+
+ return miDoCopy(src, dst, gc,
+ src_x, src_y,
+ width, height,
+ dst_x, dst_y,
+ sna_copy_boxes, 0, NULL);
+}
+
+#define TRIM_BOX(box, d) do { \
+ if (box.x1 < 0) box.x1 = 0; \
+ if (box.x2 > d->width) box.x2 = d->width; \
+ if (box.y1 < 0) box.y1 = 0; \
+ if (box.y2 > d->height) box.y2 = d->height; \
+} while (0)
+
+#define CLIP_BOX(box, gc) \
+ if (gc->pCompositeClip) { \
+ BoxPtr extents = &gc->pCompositeClip->extents;\
+ if (box.x1 < extents->x1) box.x1 = extents->x1; \
+ if (box.x2 > extents->x2) box.x2 = extents->x2; \
+ if (box.y1 < extents->y1) box.y1 = extents->y1; \
+ if (box.y2 > extents->y2) box.y2 = extents->y2; \
+ }
+
+#define TRANSLATE_BOX(box, d) do { \
+ box.x1 += d->x; \
+ box.x2 += d->x; \
+ box.y1 += d->y; \
+ box.y2 += d->y; \
+} while (0)
+
+#define TRIM_AND_TRANSLATE_BOX(box, d, gc) do { \
+ TRIM_BOX(box, d); \
+ TRANSLATE_BOX(box, d); \
+ CLIP_BOX(box, gc); \
+} while (0)
+
+#define BOX_ADD_PT(box, x, y) do { \
+ if (box.x1 > x) box.x1 = x; \
+ else if (box.x2 < x) box.x2 = x; \
+ if (box.y1 > y) box.y1 = y; \
+ else if (box.y2 < y) box.y2 = y; \
+} while (0)
+
+#define BOX_ADD_RECT(box, x, y, w, h) do { \
+ if (box.x1 > x) box.x1 = x; \
+ else if (box.x2 < x + w) box.x2 = x + w; \
+ if (box.y1 > y) box.y1 = y; \
+ else if (box.y2 < y + h) box.y2 = y + h; \
+} while (0)
+
+#define BOX_EMPTY(box) (box.x2 <= box.x1 || box.y2 <= box.y1)
+
+static Bool
+box_intersect(BoxPtr a, const BoxPtr b)
+{
+ if (a->x1 < b->x1)
+ a->x1 = b->x1;
+ if (a->x2 > b->x2)
+ a->x2 = b->x2;
+ if (a->y1 < b->y1)
+ a->y1 = b->y1;
+ if (a->y2 > b->y2)
+ a->y2 = b->y2;
+
+ return a->x1 < a->x2 && a->y1 < a->y2;
+}
+
+static Bool
+sna_fill_init_blt(struct sna_fill_op *fill,
+ struct sna *sna,
+ PixmapPtr pixmap,
+ struct kgem_bo *bo,
+ uint8_t alu,
+ uint32_t pixel)
+{
+ memset(fill, 0, sizeof(*fill));
+ return sna->render.fill(sna, alu, pixmap, bo, pixel, fill);
+}
+
+static Bool
+sna_copy_init_blt(struct sna_copy_op *copy,
+ struct sna *sna,
+ PixmapPtr src, struct kgem_bo *src_bo,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ uint8_t alu)
+{
+ memset(copy, 0, sizeof(*copy));
+ return sna->render.copy(sna, alu, src, src_bo, dst, dst_bo, copy);
+}
+
+static Bool
+sna_fill_spans_blt(DrawablePtr drawable,
+ struct kgem_bo *bo, struct sna_damage **damage,
+ GCPtr gc, int n,
+ DDXPointPtr pt, int *width, int sorted)
+{
+ struct sna *sna = to_sna_from_drawable(drawable);
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ struct sna_fill_op fill;
+ BoxPtr extents, clip;
+ int nclip;
+ int16_t dx, dy;
+
+ if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel))
+ return false;
+
+ extents = REGION_EXTENTS(gc->screen, gc->pCompositeClip);
+ DBG(("%s: clip %d x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n",
+ __FUNCTION__,
+ REGION_NUM_RECTS(gc->pCompositeClip),
+ extents->x1, extents->y1, extents->x2, extents->y2,
+ n, pt->x, pt->y));
+
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+ while (n--) {
+ int X1 = pt->x;
+ int y = pt->y;
+ int X2 = X1 + (int)*width;
+
+ if (!gc->miTranslate) {
+ X1 += drawable->x;
+ X2 += drawable->x;
+ y += drawable->y;
+ }
+
+ pt++;
+ width++;
+
+ if (y < extents->y1 || extents->y2 <= y)
+ continue;
+
+ if (X1 < extents->x1)
+ X1 = extents->x1;
+
+ if (X2 > extents->x2)
+ X2 = extents->x2;
+
+ if (X1 >= X2)
+ continue;
+
+ nclip = REGION_NUM_RECTS(gc->pCompositeClip);
+ if (nclip == 1) {
+ X1 += dx;
+ if (X1 < 0)
+ X1 = 0;
+ X2 += dx;
+ if (X2 > pixmap->drawable.width)
+ X2 = pixmap->drawable.width;
+ if (X2 > X1) {
+ fill.blt(sna, &fill, X1, y+dy, X2-X1, 1);
+ if (damage) {
+ BoxRec box;
+
+ box.x1 = X1;
+ box.x2 = X2;
+ box.y1 = y + dy;
+ box.y2 = box.y1 + 1;
+
+ assert_pixmap_contains_box(pixmap, &box);
+ sna_damage_add_box(damage, &box);
+ }
+ }
+ } else {
+ clip = REGION_RECTS(gc->pCompositeClip);
+ while (nclip--) {
+ if (clip->y1 <= y && y < clip->y2) {
+ int x1 = clip->x1;
+ int x2 = clip->x2;
+
+ if (x1 < X1)
+ x1 = X1;
+ x1 += dx;
+ if (x1 < 0)
+ x1 = 0;
+ if (x2 > X2)
+ x2 = X2;
+ x2 += dx;
+ if (x2 > pixmap->drawable.width)
+ x2 = pixmap->drawable.width;
+
+ if (x2 > x1) {
+ fill.blt(sna, &fill,
+ x1, y + dy,
+ x2-x1, 1);
+ if (damage) {
+ BoxRec box;
+
+ box.x1 = x1;
+ box.y1 = y + dy;
+ box.x2 = x2;
+ box.y2 = box.y1 + 1;
+
+ assert_pixmap_contains_box(pixmap, &box);
+ sna_damage_add_box(damage, &box);
+ }
+ }
+ }
+ clip++;
+ }
+ }
+ }
+ fill.done(sna, &fill);
+ return TRUE;
+}
+
+static Bool
+sna_spans_extents(DrawablePtr drawable, GCPtr gc,
+ int n, DDXPointPtr pt, int *width,
+ BoxPtr out)
+{
+ BoxRec box;
+
+ if (n == 0)
+ return true;
+
+ box.x1 = pt->x;
+ box.x2 = box.x1 + *width;
+ box.y2 = box.y1 = pt->y;
+
+ while (--n) {
+ pt++;
+ width++;
+ if (box.x1 > pt->x)
+ box.x1 = pt->x;
+ if (box.x2 < pt->x + *width)
+ box.x2 = pt->x + *width;
+
+ if (box.y1 > pt->y)
+ box.y1 = pt->y;
+ else if (box.y2 < pt->y)
+ box.y2 = pt->y;
+ }
+ box.y2++;
+
+ if (gc) {
+ if (!gc->miTranslate)
+ TRANSLATE_BOX(box, drawable);
+ CLIP_BOX(box, gc);
+ }
+ *out = box;
+ return BOX_EMPTY(box);
+}
+
+static void
+sna_fill_spans(DrawablePtr drawable, GCPtr gc, int n,
+ DDXPointPtr pt, int *width, int sorted)
+{
+ struct sna *sna = to_sna_from_drawable(drawable);
+ BoxRec extents;
+ RegionRec region;
+
+ DBG(("%s(n=%d, pt[0]=(%d, %d)\n",
+ __FUNCTION__, n, pt[0].x, pt[0].y));
+
+ if (sna_spans_extents(drawable, gc, n, pt, width, &extents))
+ return;
+
+ DBG(("%s: extents (%d, %d), (%d, %d)\n", __FUNCTION__,
+ extents.x1, extents.y1, extents.x2, extents.y2));
+
+ if (sna->kgem.wedged)
+ goto fallback;
+
+ if (gc->fillStyle == FillSolid &&
+ PM_IS_SOLID(drawable, gc->planemask)) {
+ struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable);
+
+ DBG(("%s: trying solid fill [alu=%d, pixel=%08lx] blt paths\n",
+ __FUNCTION__, gc->alu, gc->fgPixel));
+
+ if (sna_drawable_use_gpu_bo(drawable, &extents) &&
+ sna_fill_spans_blt(drawable,
+ priv->gpu_bo,
+ priv->gpu_only ? NULL : &priv->gpu_damage,
+ gc, n, pt, width, sorted))
+ return;
+
+ if (sna_drawable_use_cpu_bo(drawable, &extents) &&
+ sna_fill_spans_blt(drawable,
+ priv->cpu_bo, &priv->cpu_damage,
+ gc, n, pt, width, sorted))
+ return;
+ }
+
+fallback:
+ DBG(("%s: fallback\n", __FUNCTION__));
+ RegionInit(&region, &extents, 1);
+ if (gc->pCompositeClip)
+ RegionIntersect(&region, &region, gc->pCompositeClip);
+ if (!RegionNotEmpty(&region))
+ return;
+
+ sna_gc_move_to_cpu(gc);
+ sna_drawable_move_region_to_cpu(drawable, &region, true);
+ RegionUninit(&region);
+
+ fbFillSpans(drawable, gc, n, pt, width, sorted);
+}
+
+static void
+sna_set_spans(DrawablePtr drawable, GCPtr gc, char *src,
+ DDXPointPtr pt, int *width, int n, int sorted)
+{
+ BoxRec extents;
+ RegionRec region;
+
+ if (sna_spans_extents(drawable, gc, n, pt, width, &extents))
+ return;
+
+ DBG(("%s: extents=(%d, %d), (%d, %d)\n", __FUNCTION__,
+ extents.x1, extents.y1, extents.x2, extents.y2));
+
+ RegionInit(&region, &extents, 1);
+ if (gc->pCompositeClip)
+ RegionIntersect(&region, &region, gc->pCompositeClip);
+ if (!RegionNotEmpty(&region))
+ return;
+
+ sna_drawable_move_region_to_cpu(drawable, &region, true);
+ RegionUninit(&region);
+
+ fbSetSpans(drawable, gc, src, pt, width, n, sorted);
+}
+
+static RegionPtr
+sna_copy_plane(DrawablePtr src, DrawablePtr dst, GCPtr gc,
+ int src_x, int src_y,
+ int w, int h,
+ int dst_x, int dst_y,
+ unsigned long bit)
+{
+ BoxRec box;
+ RegionRec region;
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d\n", __FUNCTION__,
+ src_x, src_y, dst_x, dst_y, w, h));
+
+ box.x1 = dst_x + dst->x;
+ box.y1 = dst_y + dst->y;
+ box.x2 = box.x1 + w;
+ box.y2 = box.y1 + h;
+
+ RegionInit(&region, &box, 1);
+ if (gc->pCompositeClip)
+ RegionIntersect(&region, &region, gc->pCompositeClip);
+
+ sna_drawable_move_region_to_cpu(dst, &region, true);
+ RegionTranslate(&region,
+ src_x - dst_x - dst->x + src->x,
+ src_y - dst_y - dst->y + src->y);
+ sna_drawable_move_region_to_cpu(src, &region, false);
+
+ return fbCopyPlane(src, dst, gc, src_x, src_y, w, h, dst_x, dst_y, bit);
+}
+
+static Bool
+sna_poly_point_blt(DrawablePtr drawable,
+ struct kgem_bo *bo,
+ struct sna_damage **damage,
+ GCPtr gc, int mode, int n, DDXPointPtr pt)
+{
+ struct sna *sna = to_sna_from_drawable(drawable);
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ RegionPtr clip = fbGetCompositeClip(gc);
+ struct sna_fill_op fill;
+ DDXPointRec last;
+ int16_t dx, dy;
+
+ DBG(("%s: alu=%d, pixel=%08lx\n", __FUNCTION__, gc->alu, gc->fgPixel));
+
+ if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel))
+ return FALSE;
+
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+
+ last.x = drawable->x;
+ last.y = drawable->y;
+
+ while (n--) {
+ int x, y;
+
+ x = pt->x;
+ y = pt->y;
+ pt++;
+ if (mode == CoordModePrevious) {
+ x += last.x;
+ y += last.x;
+ last.x = x;
+ last.y = y;
+ } else {
+ x += drawable->x;
+ y += drawable->y;
+ }
+
+ if (RegionContainsPoint(clip, x, y, NULL)) {
+ fill.blt(sna, &fill, x + dx, y + dy, 1, 1);
+ if (damage) {
+ BoxRec box;
+
+ box.x1 = x + dx;
+ box.y1 = x + dx;
+ box.x2 = box.x1 + 1;
+ box.y2 = box.y1 + 1;
+
+ assert_pixmap_contains_box(pixmap, &box);
+ sna_damage_add_box(damage, &box);
+ }
+ }
+ }
+ fill.done(sna, &fill);
+ return TRUE;
+}
+
+static Bool
+sna_poly_point_extents(DrawablePtr drawable, GCPtr gc,
+ int mode, int n, DDXPointPtr pt, BoxPtr out)
+{
+ BoxRec box;
+
+ if (n == 0)
+ return true;
+
+ box.x2 = box.x1 = pt->x;
+ box.y2 = box.y1 = pt->y;
+ while (--n) {
+ pt++;
+ BOX_ADD_PT(box, pt->x, pt->y);
+ }
+ box.x2++;
+ box.y2++;
+
+ TRIM_AND_TRANSLATE_BOX(box, drawable, gc);
+ *out = box;
+ return BOX_EMPTY(box);
+}
+
+static void
+sna_poly_point(DrawablePtr drawable, GCPtr gc,
+ int mode, int n, DDXPointPtr pt)
+{
+ struct sna *sna = to_sna_from_drawable(drawable);
+ BoxRec extents;
+ RegionRec region;
+
+ DBG(("%s(mode=%d, n=%d, pt[0]=(%d, %d)\n",
+ __FUNCTION__, mode, n, pt[0].x, pt[0].y));
+
+ if (sna_poly_point_extents(drawable, gc, mode, n, pt, &extents))
+ return;
+
+ DBG(("%s: extents (%d, %d), (%d, %d)\n", __FUNCTION__,
+ extents.x1, extents.y1, extents.x2, extents.y2));
+
+ if (sna->kgem.wedged)
+ goto fallback;
+
+ if (gc->fillStyle == FillSolid &&
+ PM_IS_SOLID(drawable, gc->planemask)) {
+ struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable);
+
+ DBG(("%s: trying solid fill [%08lx] blt paths\n",
+ __FUNCTION__, gc->fgPixel));
+
+ if (sna_drawable_use_gpu_bo(drawable, &extents) &&
+ sna_poly_point_blt(drawable,
+ priv->gpu_bo,
+ priv->gpu_only ? NULL : &priv->gpu_damage,
+ gc, mode, n, pt))
+ return;
+
+ if (sna_drawable_use_cpu_bo(drawable, &extents) &&
+ sna_poly_point_blt(drawable,
+ priv->cpu_bo,
+ &priv->cpu_damage,
+ gc, mode, n, pt))
+ return;
+ }
+
+fallback:
+ DBG(("%s: fallback\n", __FUNCTION__));
+ RegionInit(&region, &extents, 1);
+ if (gc->pCompositeClip)
+ RegionIntersect(&region, &region, gc->pCompositeClip);
+ if (!RegionNotEmpty(&region))
+ return;
+
+ sna_drawable_move_region_to_cpu(drawable, &region, true);
+ RegionUninit(&region);
+
+ fbPolyPoint(drawable, gc, mode, n, pt);
+}
+
+static Bool
+sna_poly_line_can_blt(int mode, int n, DDXPointPtr pt)
+{
+ int i;
+
+ if (mode == CoordModePrevious) {
+ for (i = 1; i < n; i++) {
+ if (pt[i].x != 0 && pt[i].y != 0)
+ return FALSE;
+ }
+ } else {
+ for (i = 1; i < n; i++) {
+ if (pt[i].x != pt[i-1].x && pt[i].y != pt[i-1].y)
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+static Bool
+sna_poly_line_blt(DrawablePtr drawable,
+ struct kgem_bo *bo,
+ struct sna_damage **damage,
+ GCPtr gc, int mode, int n, DDXPointPtr pt)
+{
+ struct sna *sna = to_sna_from_drawable(drawable);
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ RegionPtr clip = fbGetCompositeClip(gc);
+ struct sna_fill_op fill;
+ DDXPointRec last;
+ int16_t dx, dy;
+ int first;
+
+ DBG(("%s: alu=%d, fg=%08lx\n", __FUNCTION__, gc->alu, gc->fgPixel));
+
+ if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel))
+ return FALSE;
+
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+
+ last.x = drawable->x;
+ last.y = drawable->y;
+ first = 1;
+
+ while (n--) {
+ int nclip;
+ BoxPtr box;
+ int x, y;
+
+ x = pt->x;
+ y = pt->y;
+ pt++;
+ if (mode == CoordModePrevious) {
+ x += last.x;
+ y += last.x;
+ } else {
+ x += drawable->x;
+ y += drawable->y;
+ }
+
+ if (!first) {
+ for (nclip = REGION_NUM_RECTS(clip), box = REGION_RECTS(clip); nclip--; box++) {
+ BoxRec r;
+
+ if (last.x == x) {
+ r.x1 = last.x;
+ r.x2 = last.x + 1;
+ } else {
+ r.x1 = last.x < x ? last.x : x;
+ r.x2 = last.x > x ? last.x : x;
+ }
+ if (last.y == y) {
+ r.y1 = last.y;
+ r.y2 = last.y + 1;
+ } else {
+ r.y1 = last.y < y ? last.y : y;
+ r.y2 = last.y > y ? last.y : y;
+ }
+ DBG(("%s: (%d, %d) -> (%d, %d) clipping line (%d, %d), (%d, %d) against box (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ last.x, last.y, x, y,
+ r.x1, r.y1, r.x2, r.y2,
+ box->x1, box->y1, box->x2, box->y2));
+ if (box_intersect(&r, box)) {
+ r.x1 += dx;
+ r.x2 += dx;
+ r.y1 += dy;
+ r.y2 += dy;
+ DBG(("%s: blt (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ r.x1, r.y1, r.x2, r.y2));
+ fill.blt(sna, &fill,
+ r.x1, r.y1,
+ r.x2-r.x1, r.y2-r.y1);
+ if (damage) {
+ assert_pixmap_contains_box(pixmap, &r);
+ sna_damage_add_box(damage, &r);
+ }
+ }
+ }
+ }
+
+ last.x = x;
+ last.y = y;
+ first = 0;
+ }
+ fill.done(sna, &fill);
+ return TRUE;
+}
+
+static Bool
+sna_poly_line_extents(DrawablePtr drawable, GCPtr gc,
+ int mode, int n, DDXPointPtr pt,
+ BoxPtr out)
+{
+ BoxRec box;
+ int extra = gc->lineWidth >> 1;
+
+ if (n == 0)
+ return true;
+
+ if (n > 1) {
+ if (gc->joinStyle == JoinMiter)
+ extra = 6 * gc->lineWidth;
+ else if (gc->capStyle == CapProjecting)
+ extra = gc->lineWidth;
+ }
+
+ box.x2 = box.x1 = pt->x;
+ box.y2 = box.y1 = pt->y;
+ if (mode == CoordModePrevious) {
+ int x = box.x1;
+ int y = box.y1;
+ while (--n) {
+ pt++;
+ x += pt->x;
+ y += pt->y;
+ BOX_ADD_PT(box, x, y);
+ }
+ } else {
+ while (--n) {
+ pt++;
+ BOX_ADD_PT(box, pt->x, pt->y);
+ }
+ }
+ box.x2++;
+ box.y2++;
+
+ if (extra) {
+ box.x1 -= extra;
+ box.x2 += extra;
+ box.y1 -= extra;
+ box.y2 += extra;
+ }
+
+ TRIM_AND_TRANSLATE_BOX(box, drawable, gc);
+ *out = box;
+ return BOX_EMPTY(box);
+}
+
+static void
+sna_poly_line(DrawablePtr drawable, GCPtr gc,
+ int mode, int n, DDXPointPtr pt)
+{
+ struct sna *sna = to_sna_from_drawable(drawable);
+ BoxRec extents;
+ RegionRec region;
+
+ DBG(("%s(mode=%d, n=%d, pt[0]=(%d, %d)\n",
+ __FUNCTION__, mode, n, pt[0].x, pt[0].y));
+
+ if (sna_poly_line_extents(drawable, gc, mode, n, pt, &extents))
+ return;
+
+ DBG(("%s: extents (%d, %d), (%d, %d)\n", __FUNCTION__,
+ extents.x1, extents.y1, extents.x2, extents.y2));
+
+ if (sna->kgem.wedged)
+ goto fallback;
+
+ if (gc->fillStyle == FillSolid &&
+ gc->lineStyle == LineSolid &&
+ (gc->lineWidth == 0 || gc->lineWidth == 1) &&
+ PM_IS_SOLID(drawable, gc->planemask) &&
+ sna_poly_line_can_blt(mode, n, pt)) {
+ struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable);
+
+ DBG(("%s: trying solid fill [%08lx]\n",
+ __FUNCTION__, gc->fgPixel));
+
+ if (sna_drawable_use_gpu_bo(drawable, &extents) &&
+ sna_poly_line_blt(drawable,
+ priv->gpu_bo,
+ priv->gpu_only ? NULL : &priv->gpu_damage,
+ gc, mode, n, pt))
+ return;
+
+ if (sna_drawable_use_cpu_bo(drawable, &extents) &&
+ sna_poly_line_blt(drawable,
+ priv->cpu_bo,
+ &priv->cpu_damage,
+ gc, mode, n, pt))
+ return;
+ }
+
+fallback:
+ DBG(("%s: fallback\n", __FUNCTION__));
+ RegionInit(&region, &extents, 1);
+ if (gc->pCompositeClip)
+ RegionIntersect(&region, &region, gc->pCompositeClip);
+ if (!RegionNotEmpty(&region))
+ return;
+
+ sna_gc_move_to_cpu(gc);
+ sna_drawable_move_region_to_cpu(drawable, &region, true);
+ RegionUninit(&region);
+
+ fbPolyLine(drawable, gc, mode, n, pt);
+}
+
+static Bool
+sna_poly_segment_can_blt(int n, xSegment *seg)
+{
+ while (n--) {
+ if (seg->x1 != seg->x2 && seg->y1 != seg->y2)
+ return FALSE;
+
+ seg++;
+ }
+
+ return TRUE;
+}
+
+static Bool
+sna_poly_segment_blt(DrawablePtr drawable,
+ struct kgem_bo *bo,
+ struct sna_damage **damage,
+ GCPtr gc, int n, xSegment *seg)
+{
+ struct sna *sna = to_sna_from_drawable(drawable);
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ RegionPtr clip = fbGetCompositeClip(gc);
+ struct sna_fill_op fill;
+ int16_t dx, dy;
+
+ DBG(("%s: alu=%d, fg=%08lx\n", __FUNCTION__, gc->alu, gc->fgPixel));
+
+ if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel))
+ return FALSE;
+
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+ while (n--) {
+ int x, y, width, height, nclip;
+ BoxPtr box;
+
+ if (seg->x1 < seg->x2) {
+ x = seg->x1;
+ width = seg->x2;
+ } else {
+ x = seg->x2;
+ width = seg->x1;
+ }
+ width -= x - 1;
+ x += drawable->x;
+
+ if (seg->y1 < seg->y2) {
+ y = seg->y1;
+ height = seg->y2;
+ } else {
+ y = seg->y2;
+ height = seg->y1;
+ }
+ height -= y - 1;
+ y += drawable->y;
+
+ /* don't paint last pixel */
+ if (gc->capStyle == CapNotLast) {
+ if (width == 1)
+ height--;
+ else
+ width--;
+ }
+
+ DBG(("%s: [%d] (%d, %d)x(%d, %d) + (%d, %d)\n", __FUNCTION__, n,
+ x, y, width, height, dx, dy));
+ for (nclip = REGION_NUM_RECTS(clip), box = REGION_RECTS(clip); nclip--; box++) {
+ BoxRec r = { x, y, x + width, y + height };
+ if (box_intersect(&r, box)) {
+ r.x1 += dx;
+ r.x2 += dx;
+ r.y1 += dy;
+ r.y2 += dy;
+ fill.blt(sna, &fill,
+ r.x1, r.y1,
+ r.x2-r.x1, r.y2-r.y1);
+ if (damage) {
+ assert_pixmap_contains_box(pixmap, &r);
+ sna_damage_add_box(damage, &r);
+ }
+ }
+ }
+
+ seg++;
+ }
+ fill.done(sna, &fill);
+ return TRUE;
+}
+
+static Bool
+sna_poly_segment_extents(DrawablePtr drawable, GCPtr gc,
+ int n, xSegment *seg,
+ BoxPtr out)
+{
+ BoxRec box;
+ int extra = gc->lineWidth;
+
+ if (n == 0)
+ return true;
+
+ if (gc->capStyle != CapProjecting)
+ extra >>= 1;
+
+ if (seg->x2 > seg->x1) {
+ box.x1 = seg->x1;
+ box.x2 = seg->x2;
+ } else {
+ box.x2 = seg->x1;
+ box.x1 = seg->x2;
+ }
+
+ if (seg->y2 > seg->y1) {
+ box.y1 = seg->y1;
+ box.y2 = seg->y2;
+ } else {
+ box.y2 = seg->y1;
+ box.y1 = seg->y2;
+ }
+
+ while (--n) {
+ seg++;
+ if (seg->x2 > seg->x1) {
+ if (seg->x1 < box.x1) box.x1 = seg->x1;
+ if (seg->x2 > box.x2) box.x2 = seg->x2;
+ } else {
+ if (seg->x2 < box.x1) box.x1 = seg->x2;
+ if (seg->x1 > box.x2) box.x2 = seg->x1;
+ }
+
+ if (seg->y2 > seg->y1) {
+ if (seg->y1 < box.y1) box.y1 = seg->y1;
+ if (seg->y2 > box.y2) box.y2 = seg->y2;
+ } else {
+ if (seg->y2 < box.y1) box.y1 = seg->y2;
+ if (seg->y1 > box.y2) box.y2 = seg->y1;
+ }
+ }
+
+ box.x2++;
+ box.y2++;
+
+ if (extra) {
+ box.x1 -= extra;
+ box.x2 += extra;
+ box.y1 -= extra;
+ box.y2 += extra;
+ }
+
+ TRIM_AND_TRANSLATE_BOX(box, drawable, gc);
+ *out = box;
+ return BOX_EMPTY(box);
+}
+
+static void
+sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
+{
+ struct sna *sna = to_sna_from_drawable(drawable);
+ BoxRec extents;
+ RegionRec region;
+
+ DBG(("%s(n=%d, first=((%d, %d), (%d, %d))\n", __FUNCTION__,
+ n, seg->x1, seg->y1, seg->x2, seg->y2));
+
+ if (sna_poly_segment_extents(drawable, gc, n, seg, &extents))
+ return;
+
+ DBG(("%s: extents=(%d, %d), (%d, %d)\n", __FUNCTION__,
+ extents.x1, extents.y1, extents.x2, extents.y2));
+
+ if (sna->kgem.wedged)
+ goto fallback;
+
+ if (gc->fillStyle == FillSolid &&
+ gc->lineStyle == LineSolid &&
+ gc->lineWidth == 0 &&
+ PM_IS_SOLID(drawable, gc->planemask) &&
+ sna_poly_segment_can_blt(n, seg)) {
+ struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable);
+
+ DBG(("%s: trying blt solid fill [%08lx] paths\n",
+ __FUNCTION__, gc->fgPixel));
+
+ if (sna_drawable_use_gpu_bo(drawable, &extents) &&
+ sna_poly_segment_blt(drawable,
+ priv->gpu_bo,
+ priv->gpu_only ? NULL : &priv->gpu_damage,
+ gc, n, seg))
+ return;
+
+ if (sna_drawable_use_cpu_bo(drawable, &extents) &&
+ sna_poly_segment_blt(drawable,
+ priv->cpu_bo,
+ &priv->cpu_damage,
+ gc, n, seg))
+ return;
+ }
+
+fallback:
+ DBG(("%s: fallback\n", __FUNCTION__));
+ RegionInit(&region, &extents, 1);
+ if (gc->pCompositeClip)
+ RegionIntersect(&region, &region, gc->pCompositeClip);
+ if (!RegionNotEmpty(&region))
+ return;
+
+ sna_gc_move_to_cpu(gc);
+ sna_drawable_move_region_to_cpu(drawable, &region, true);
+ RegionUninit(&region);
+
+ fbPolySegment(drawable, gc, n, seg);
+}
+
+static Bool
+sna_poly_arc_extents(DrawablePtr drawable, GCPtr gc,
+ int n, xArc *arc,
+ BoxPtr out)
+{
+ int extra = gc->lineWidth >> 1;
+ BoxRec box;
+
+ if (n == 0)
+ return true;
+
+ box.x1 = arc->x;
+ box.x2 = box.x1 + arc->width;
+ box.y1 = arc->y;
+ box.y2 = box.y1 + arc->height;
+
+ while (--n) {
+ arc++;
+ if (box.x1 > arc->x)
+ box.x1 = arc->x;
+ if (box.x2 < arc->x + arc->width)
+ box.x2 = arc->x + arc->width;
+ if (box.y1 > arc->y)
+ box.y1 = arc->y;
+ if (box.y2 < arc->y + arc->height)
+ box.y2 = arc->y + arc->height;
+ }
+
+ if (extra) {
+ box.x1 -= extra;
+ box.x2 += extra;
+ box.y1 -= extra;
+ box.y2 += extra;
+ }
+
+ box.x2++;
+ box.y2++;
+
+ TRIM_AND_TRANSLATE_BOX(box, drawable, gc);
+ *out = box;
+ return BOX_EMPTY(box);
+}
+
+static void
+sna_poly_arc(DrawablePtr drawable, GCPtr gc, int n, xArc *arc)
+{
+ BoxRec extents;
+ RegionRec region;
+
+ if (sna_poly_arc_extents(drawable, gc, n, arc, &extents))
+ return;
+
+ DBG(("%s: extents=(%d, %d), (%d, %d)\n", __FUNCTION__,
+ extents.x1, extents.y1, extents.x2, extents.y2));
+
+ RegionInit(&region, &extents, 1);
+ if (gc->pCompositeClip)
+ RegionIntersect(&region, &region, gc->pCompositeClip);
+ if (!RegionNotEmpty(&region))
+ return;
+
+ sna_gc_move_to_cpu(gc);
+ sna_drawable_move_region_to_cpu(drawable, &region, true);
+ RegionUninit(&region);
+
+ fbPolyArc(drawable, gc, n, arc);
+}
+
+static Bool
+sna_poly_fill_rect_blt(DrawablePtr drawable,
+ struct kgem_bo *bo,
+ struct sna_damage **damage,
+ GCPtr gc, int n,
+ xRectangle *rect)
+{
+ struct sna *sna = to_sna_from_drawable(drawable);
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ RegionPtr clip = fbGetCompositeClip(gc);
+ struct sna_fill_op fill;
+ uint32_t pixel = gc->fillStyle == FillSolid ? gc->fgPixel : gc->tile.pixel;
+ int16_t dx, dy;
+
+ DBG(("%s x %d [(%d, %d)+(%d, %d)...]\n",
+ __FUNCTION__, n, rect->x, rect->y, rect->width, rect->height));
+
+ if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, pixel)) {
+ DBG(("%s: unsupported blt\n", __FUNCTION__));
+ return FALSE;
+ }
+
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+ if (REGION_NUM_RECTS(clip) == 1) {
+ BoxPtr box = REGION_RECTS(clip);
+ while (n--) {
+ BoxRec r;
+
+ r.x1 = rect->x + drawable->x;
+ r.y1 = rect->y + drawable->y;
+ r.x2 = r.x1 + rect->width;
+ r.y2 = r.y1 + rect->height;
+ rect++;
+
+ if (box_intersect(&r, box)) {
+ r.x1 += dx;
+ r.x2 += dx;
+ r.y1 += dy;
+ r.y2 += dy;
+ fill.blt(sna, &fill,
+ r.x1, r.y1,
+ r.x2-r.x1, r.y2-r.y1);
+ if (damage) {
+ assert_pixmap_contains_box(pixmap, &r);
+ sna_damage_add_box(damage, &r);
+ }
+ }
+ }
+ } else {
+ while (n--) {
+ RegionRec region;
+ BoxRec r,*box;
+ int nbox;
+
+ r.x1 = rect->x + drawable->x;
+ r.y1 = rect->y + drawable->y;
+ r.x2 = r.x1 + rect->width;
+ r.y2 = r.y1 + rect->height;
+ rect++;
+
+ RegionInit(&region, &r, 1);
+ RegionIntersect(&region, &region, clip);
+
+ nbox = REGION_NUM_RECTS(&region);
+ box = REGION_RECTS(&region);
+ while (nbox--) {
+ box->x1 += dx;
+ box->x2 += dx;
+ box->y1 += dy;
+ box->y2 += dy;
+ fill.blt(sna, &fill,
+ box->x1, box->y1,
+ box->x2-box->x1, box->y2-box->y1);
+ if (damage) {
+ assert_pixmap_contains_box(pixmap, box);
+ sna_damage_add_box(damage, box);
+ }
+ box++;
+ }
+
+ RegionUninit(&region);
+ }
+ }
+ fill.done(sna, &fill);
+ return TRUE;
+}
+
+static uint32_t
+get_pixel(PixmapPtr pixmap)
+{
+ DBG(("%s\n", __FUNCTION__));
+ sna_pixmap_move_to_cpu(pixmap, false);
+ switch (pixmap->drawable.bitsPerPixel) {
+ case 32: return *(uint32_t *)pixmap->devPrivate.ptr;
+ case 16: return *(uint16_t *)pixmap->devPrivate.ptr;
+ default: return *(uint8_t *)pixmap->devPrivate.ptr;
+ }
+}
+
+static Bool
+sna_poly_fill_rect_tiled(DrawablePtr drawable,
+ struct kgem_bo *bo,
+ struct sna_damage **damage,
+ GCPtr gc, int n,
+ xRectangle *rect)
+{
+ struct sna *sna = to_sna_from_drawable(drawable);
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ PixmapPtr tile = gc->tile.pixmap;
+ RegionPtr clip = fbGetCompositeClip(gc);
+ DDXPointPtr origin = &gc->patOrg;
+ CARD32 alu = gc->alu;
+ int tile_width, tile_height;
+ int16_t dx, dy;
+
+ DBG(("%s x %d [(%d, %d)+(%d, %d)...]\n",
+ __FUNCTION__, n, rect->x, rect->y, rect->width, rect->height));
+
+ tile_width = tile->drawable.width;
+ tile_height = tile->drawable.height;
+
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+
+ if (tile_width == 1 && tile_height == 1) {
+ struct sna_fill_op fill;
+
+ if (!sna_fill_init_blt(&fill, sna, pixmap, bo, alu, get_pixel(tile))) {
+ DBG(("%s: unsupported blt\n", __FUNCTION__));
+ return FALSE;
+ }
+
+ if (REGION_NUM_RECTS(clip) == 1) {
+ BoxPtr box = REGION_RECTS(clip);
+ while (n--) {
+ BoxRec r;
+
+ r.x1 = rect->x + drawable->x;
+ r.y1 = rect->y + drawable->y;
+ r.x2 = r.x1 + rect->width;
+ r.y2 = r.y1 + rect->height;
+ rect++;
+
+ if (box_intersect(&r, box)) {
+ r.x1 += dx;
+ r.x2 += dx;
+ r.y1 += dy;
+ r.y2 += dy;
+ fill.blt(sna, &fill,
+ r.x1, r.y1,
+ r.x2-r.x1, r.y2-r.y1);
+ if (damage) {
+ assert_pixmap_contains_box(pixmap, &r);
+ sna_damage_add_box(damage, &r);
+ }
+ }
+ }
+ } else {
+ while (n--) {
+ RegionRec region;
+ BoxRec r,*box;
+ int nbox;
+
+ r.x1 = rect->x + drawable->x;
+ r.y1 = rect->y + drawable->y;
+ r.x2 = r.x1 + rect->width;
+ r.y2 = r.y1 + rect->height;
+ rect++;
+
+ RegionInit(&region, &r, 1);
+ RegionIntersect(&region, &region, clip);
+
+ nbox = REGION_NUM_RECTS(&region);
+ box = REGION_RECTS(&region);
+ while (nbox--) {
+ box->x1 += dx;
+ box->x2 += dx;
+ box->y1 += dy;
+ box->y2 += dy;
+ fill.blt(sna, &fill,
+ box->x1, box->y1,
+ box->x2-box->x1,
+ box->y2-box->y1);
+ if (damage) {
+ assert_pixmap_contains_box(pixmap, box);
+ sna_damage_add_box(damage, box);
+ }
+ box++;
+ }
+
+ RegionUninit(&region);
+ }
+ }
+ fill.done(sna, &fill);
+ } else {
+ struct sna_copy_op copy;
+
+ if (!sna_pixmap_move_to_gpu(tile))
+ return FALSE;
+
+ if (!sna_copy_init_blt(&copy, sna,
+ tile, sna_pixmap_get_bo(tile),
+ pixmap, bo,
+ alu)) {
+ DBG(("%s: unsupported blt\n", __FUNCTION__));
+ return FALSE;
+ }
+
+ if (REGION_NUM_RECTS(clip) == 1) {
+ const BoxPtr box = REGION_RECTS(clip);
+ while (n--) {
+ BoxRec r;
+
+ r.x1 = rect->x + drawable->x;
+ r.y1 = rect->y + drawable->y;
+ r.x2 = r.x1 + rect->width;
+ r.y2 = r.y1 + rect->height;
+ rect++;
+
+ if (box_intersect(&r, box)) {
+ int height = r.y2 - r.y1;
+ int dst_y = r.y1;
+ int tile_y = (r.y1 - drawable->y - origin->y) % tile_height;
+ while (height) {
+ int width = r.x2 - r.x1;
+ int dst_x = r.x1;
+ int tile_x = (r.x1 - drawable->x - origin->x) % tile_width;
+ int h = tile_height - tile_y;
+ if (h > height)
+ h = height;
+ height -= h;
+
+ while (width > 0) {
+ int w = tile_width - tile_x;
+ if (w > width)
+ w = width;
+ width -= w;
+
+ copy.blt(sna, &copy,
+ tile_x, tile_y,
+ w, h,
+ dst_x + dx, dst_y + dy);
+ if (damage) {
+ BoxRec box;
+ box.x1 = dst_x + dx;
+ box.y1 = dst_y + dy;
+ box.x2 = box.x1 + w;
+ box.y2 = box.y1 + h;
+ assert_pixmap_contains_box(pixmap, &box);
+ sna_damage_add_box(damage, &box);
+ }
+
+ dst_x += w;
+ tile_x = 0;
+ }
+ dst_y += h;
+ tile_y = 0;
+ }
+ }
+ }
+ } else {
+ while (n--) {
+ RegionRec region;
+ BoxRec r,*box;
+ int nbox;
+
+ r.x1 = rect->x + drawable->x;
+ r.y1 = rect->y + drawable->y;
+ r.x2 = r.x1 + rect->width;
+ r.y2 = r.y1 + rect->height;
+ rect++;
+
+ RegionInit(&region, &r, 1);
+ RegionIntersect(&region, &region, clip);
+
+ nbox = REGION_NUM_RECTS(&region);
+ box = REGION_RECTS(&region);
+ while (nbox--) {
+ int height = box->y2 - box->y1;
+ int dst_y = box->y1;
+ int tile_y = (box->y1 - drawable->y - origin->y) % tile_height;
+ while (height) {
+ int width = box->x2 - box->x1;
+ int dst_x = box->x1;
+ int tile_x = (box->x1 - drawable->x - origin->x) % tile_width;
+ int h = tile_height - tile_y;
+ if (h > height)
+ h = height;
+ height -= h;
+
+ while (width > 0) {
+ int w = tile_width - tile_x;
+ if (w > width)
+ w = width;
+ width -= w;
+
+ copy.blt(sna, &copy,
+ tile_x, tile_y,
+ w, h,
+ dst_x + dx, dst_y + dy);
+ if (damage) {
+ BoxRec box;
+
+ box.x1 = dst_x + dx;
+ box.y1 = dst_y + dy;
+ box.x2 = box.x1 + w;
+ box.y2 = box.y1 + h;
+
+ assert_pixmap_contains_box(pixmap, &box);
+ sna_damage_add_box(damage, &box);
+ }
+
+ dst_x += w;
+ tile_x = 0;
+ }
+ dst_y += h;
+ tile_y = 0;
+ }
+ box++;
+ }
+
+ RegionUninit(&region);
+ }
+ }
+ copy.done(sna, &copy);
+ }
+ return TRUE;
+}
+
+static Bool
+sna_poly_fill_rect_extents(DrawablePtr drawable, GCPtr gc,
+ int n, xRectangle *rect,
+ BoxPtr out)
+{
+ BoxRec box;
+
+ if (n == 0)
+ return true;
+
+ box.x1 = rect->x;
+ box.x2 = box.x1 + rect->width;
+ box.y1 = rect->y;
+ box.y2 = box.y1 + rect->height;
+
+ while (--n) {
+ rect++;
+ BOX_ADD_RECT(box, rect->x, rect->y, rect->width, rect->height);
+ }
+
+ TRIM_AND_TRANSLATE_BOX(box, drawable, gc);
+ *out = box;
+ return BOX_EMPTY(box);
+}
+
+static void
+sna_poly_fill_rect(DrawablePtr draw, GCPtr gc, int n, xRectangle *rect)
+{
+ struct sna *sna = to_sna_from_drawable(draw);
+ BoxRec extents;
+ RegionRec region;
+
+ DBG(("%s(n=%d, PlaneMask: %lx (solid %d), solid fill: %d [style=%d, tileIsPixel=%d], alu=%d)\n", __FUNCTION__,
+ n, gc->planemask, !!PM_IS_SOLID(draw, gc->planemask),
+ (gc->fillStyle == FillSolid ||
+ (gc->fillStyle == FillTiled && gc->tileIsPixel)),
+ gc->fillStyle, gc->tileIsPixel,
+ gc->alu));
+
+ if (sna_poly_fill_rect_extents(draw, gc, n, rect, &extents))
+ return;
+
+ if (sna->kgem.wedged)
+ goto fallback;
+
+ if (!PM_IS_SOLID(draw, gc->planemask))
+ goto fallback;
+
+ if (gc->fillStyle == FillSolid ||
+ (gc->fillStyle == FillTiled && gc->tileIsPixel)) {
+ struct sna_pixmap *priv = sna_pixmap_from_drawable(draw);
+
+ DBG(("%s: solid fill [%08lx], testing for blt\n",
+ __FUNCTION__,
+ gc->fillStyle == FillSolid ? gc->fgPixel : gc->tile.pixel));
+
+ if (sna_drawable_use_gpu_bo(draw, &extents) &&
+ sna_poly_fill_rect_blt(draw,
+ priv->gpu_bo,
+ priv->gpu_only ? NULL : &priv->gpu_damage,
+ gc, n, rect))
+ return;
+
+ if (sna_drawable_use_cpu_bo(draw, &extents) &&
+ sna_poly_fill_rect_blt(draw,
+ priv->cpu_bo,
+ &priv->cpu_damage,
+ gc, n, rect))
+ return;
+ } else if (gc->fillStyle == FillTiled) {
+ struct sna_pixmap *priv = sna_pixmap_from_drawable(draw);
+
+ DBG(("%s: tiled fill, testing for blt\n", __FUNCTION__));
+
+ if (sna_drawable_use_gpu_bo(draw, &extents) &&
+ sna_poly_fill_rect_tiled(draw,
+ priv->gpu_bo,
+ priv->gpu_only ? NULL : &priv->gpu_damage,
+ gc, n, rect))
+ return;
+
+ if (sna_drawable_use_cpu_bo(draw, &extents) &&
+ sna_poly_fill_rect_tiled(draw,
+ priv->cpu_bo,
+ &priv->cpu_damage,
+ gc, n, rect))
+ return;
+ }
+
+fallback:
+ DBG(("%s: fallback (%d, %d), (%d, %d)\n", __FUNCTION__,
+ extents.x1, extents.y1, extents.x2, extents.y2));
+ RegionInit(&region, &extents, 1);
+ if (gc->pCompositeClip)
+ RegionIntersect(&region, &region, gc->pCompositeClip);
+ if (!RegionNotEmpty(&region))
+ return;
+
+ sna_gc_move_to_cpu(gc);
+ sna_drawable_move_region_to_cpu(draw, &region, true);
+ RegionUninit(&region);
+
+ DBG(("%s: fallback - fbPolyFillRect\n", __FUNCTION__));
+ fbPolyFillRect(draw, gc, n, rect);
+}
+
+static void
+sna_image_glyph(DrawablePtr drawable, GCPtr gc,
+ int x, int y, unsigned int n,
+ CharInfoPtr *info, pointer base)
+{
+ ExtentInfoRec extents;
+ BoxRec box;
+ RegionRec region;
+
+ if (n == 0)
+ return;
+
+ QueryGlyphExtents(gc->font, info, n, &extents);
+ if (extents.overallWidth >= 0) {
+ box.x1 = x;
+ box.x2 = x + extents.overallWidth;
+ } else {
+ box.x2 = x;
+ box.x1 = x + extents.overallWidth;
+ }
+ box.y1 = y - FONTASCENT(gc->font);
+ box.y2 = y + FONTDESCENT(gc->font);
+ TRIM_BOX(box, drawable);
+ TRANSLATE_BOX(box, drawable);
+
+ DBG(("%s: extents(%d, %d), (%d, %d)\n",
+ __FUNCTION__, box.x1, box.y1, box.x2, box.y2));
+
+ RegionInit(&region, &box, 1);
+ if (gc->pCompositeClip)
+ RegionIntersect(&region, &region, gc->pCompositeClip);
+ if (!RegionNotEmpty(&region))
+ return;
+
+ sna_gc_move_to_cpu(gc);
+ sna_drawable_move_region_to_cpu(drawable, &region, true);
+ RegionUninit(&region);
+
+ fbImageGlyphBlt(drawable, gc, x, y, n, info, base);
+}
+
+static void
+sna_poly_glyph(DrawablePtr drawable, GCPtr gc,
+ int x, int y, unsigned int n,
+ CharInfoPtr *info, pointer base)
+{
+ ExtentInfoRec extents;
+ BoxRec box;
+ RegionRec region;
+
+ if (n == 0)
+ return;
+
+ QueryGlyphExtents(gc->font, info, n, &extents);
+ box.x1 = x + extents.overallLeft;
+ box.y1 = y - extents.overallAscent;
+ box.x2 = x + extents.overallRight;
+ box.y2 = y + extents.overallDescent;
+
+ TRIM_BOX(box, drawable);
+ TRANSLATE_BOX(box, drawable);
+
+ DBG(("%s: extents(%d, %d), (%d, %d)\n",
+ __FUNCTION__, box.x1, box.y1, box.x2, box.y2));
+
+ RegionInit(&region, &box, 1);
+ if (gc->pCompositeClip)
+ RegionIntersect(&region, &region, gc->pCompositeClip);
+ if (!RegionNotEmpty(&region))
+ return;
+
+ sna_gc_move_to_cpu(gc);
+ sna_drawable_move_region_to_cpu(drawable, &region, true);
+ RegionUninit(&region);
+
+ fbPolyGlyphBlt(drawable, gc, x, y, n, info, base);
+}
+
+static void
+sna_push_pixels(GCPtr gc, PixmapPtr bitmap, DrawablePtr drawable,
+ int w, int h,
+ int x, int y)
+{
+ BoxRec box;
+ RegionRec region;
+
+ if (w == 0 || h == 0)
+ return;
+
+ DBG(("%s (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
+
+ box.x1 = x;
+ box.y1 = y;
+ if (!gc->miTranslate) {
+ box.x1 += drawable->x;
+ box.y1 += drawable->y;
+ }
+ box.x2 = box.x1 + w;
+ box.y2 = box.y1 + h;
+
+ CLIP_BOX(box, gc);
+ if (BOX_EMPTY(box))
+ return;
+
+ DBG(("%s: extents(%d, %d), (%d, %d)\n",
+ __FUNCTION__, box.x1, box.y1, box.x2, box.y2));
+
+ RegionInit(&region, &box, 1);
+ if (gc->pCompositeClip)
+ RegionIntersect(&region, &region, gc->pCompositeClip);
+ if (!RegionNotEmpty(&region))
+ return;
+
+ sna_gc_move_to_cpu(gc);
+ sna_pixmap_move_to_cpu(bitmap, false);
+ sna_drawable_move_region_to_cpu(drawable, &region, true);
+ RegionUninit(&region);
+
+ fbPushPixels(gc, bitmap, drawable, w, h, x, y);
+}
+
+static const GCOps sna_gc_ops = {
+ sna_fill_spans,
+ sna_set_spans,
+ sna_put_image,
+ sna_copy_area,
+ sna_copy_plane,
+ sna_poly_point,
+ sna_poly_line,
+ sna_poly_segment,
+ miPolyRectangle,
+ sna_poly_arc,
+ miFillPolygon,
+ sna_poly_fill_rect,
+ miPolyFillArc,
+ miPolyText8,
+ miPolyText16,
+ miImageText8,
+ miImageText16,
+ sna_image_glyph,
+ sna_poly_glyph,
+ sna_push_pixels,
+};
+
+static void sna_validate_pixmap(DrawablePtr draw, PixmapPtr pixmap)
+{
+ if (draw->bitsPerPixel == pixmap->drawable.bitsPerPixel &&
+ FbEvenTile(pixmap->drawable.width *
+ pixmap->drawable.bitsPerPixel)) {
+ DBG(("%s: flushing pixmap\n", __FUNCTION__));
+ sna_pixmap_move_to_cpu(pixmap, true);
+ }
+}
+
+static void
+sna_validate_gc(GCPtr gc, unsigned long changes, DrawablePtr drawable)
+{
+ DBG(("%s\n", __FUNCTION__));
+
+ if (changes & GCTile && !gc->tileIsPixel) {
+ DBG(("%s: flushing tile pixmap\n", __FUNCTION__));
+ sna_validate_pixmap(drawable, gc->tile.pixmap);
+ }
+
+ if (changes & GCStipple && gc->stipple) {
+ DBG(("%s: flushing stipple pixmap\n", __FUNCTION__));
+ sna_pixmap_move_to_cpu(gc->stipple, true);
+ }
+
+ fbValidateGC(gc, changes, drawable);
+}
+
+static const GCFuncs sna_gc_funcs = {
+ sna_validate_gc,
+ miChangeGC,
+ miCopyGC,
+ miDestroyGC,
+ miChangeClip,
+ miDestroyClip,
+ miCopyClip
+};
+
+static int sna_create_gc(GCPtr gc)
+{
+ if (!fbCreateGC(gc))
+ return FALSE;
+
+ gc->funcs = (GCFuncs *)&sna_gc_funcs;
+ gc->ops = (GCOps *)&sna_gc_ops;
+ return TRUE;
+}
+
+static void
+sna_get_image(DrawablePtr drawable,
+ int x, int y, int w, int h,
+ unsigned int format, unsigned long mask,
+ char *dst)
+{
+ BoxRec extents;
+ RegionRec region;
+
+ DBG(("%s (%d, %d, %d, %d)\n", __FUNCTION__, x, y, w, h));
+
+ extents.x1 = x + drawable->x;
+ extents.y1 = y + drawable->y;
+ extents.x2 = extents.x1 + w;
+ extents.y2 = extents.y1 + h;
+ RegionInit(&region, &extents, 1);
+
+ sna_drawable_move_region_to_cpu(drawable, &region, false);
+ fbGetImage(drawable, x, y, w, h, format, mask, dst);
+
+ RegionUninit(&region);
+}
+
+static void
+sna_get_spans(DrawablePtr drawable, int wMax,
+ DDXPointPtr pt, int *width, int n, char *start)
+{
+ BoxRec extents;
+ RegionRec region;
+
+ if (sna_spans_extents(drawable, NULL, n, pt, width, &extents))
+ return;
+
+ RegionInit(&region, &extents, 1);
+ sna_drawable_move_region_to_cpu(drawable, &region, false);
+ RegionUninit(&region);
+
+ fbGetSpans(drawable, wMax, pt, width, n, start);
+}
+
+static void
+sna_copy_window(WindowPtr win, DDXPointRec origin, RegionPtr src)
+{
+ struct sna *sna = to_sna_from_drawable(&win->drawable);
+ PixmapPtr pixmap = fbGetWindowPixmap(win);
+ RegionRec dst;
+ int dx, dy;
+
+ DBG(("%s origin=(%d, %d)\n", __FUNCTION__, origin.x, origin.y));
+
+ if (sna->kgem.wedged) {
+ sna_pixmap_move_to_cpu(pixmap, true);
+ fbCopyWindow(win, origin, src);
+ return;
+ }
+
+ dx = origin.x - win->drawable.x;
+ dy = origin.y - win->drawable.y;
+ RegionTranslate(src, -dx, -dy);
+
+ RegionNull(&dst);
+ RegionIntersect(&dst, &win->borderClip, src);
+#ifdef COMPOSITE
+ if (pixmap->screen_x || pixmap->screen_y)
+ RegionTranslate(&dst, -pixmap->screen_x, -pixmap->screen_y);
+#endif
+
+ miCopyRegion(&pixmap->drawable, &pixmap->drawable,
+ NULL, &dst, dx, dy, sna_copy_boxes, 0, NULL);
+
+ RegionUninit(&dst);
+}
+
+static Bool sna_change_window_attributes(WindowPtr win, unsigned long mask)
+{
+ DBG(("%s\n", __FUNCTION__));
+
+ /* Check if the fb layer wishes to modify the attached pixmaps,
+ * to fix up mismatches between the window and pixmap depths.
+ */
+ if (mask & CWBackPixmap && win->backgroundState == BackgroundPixmap) {
+ DBG(("%s: flushing background pixmap\n", __FUNCTION__));
+ sna_validate_pixmap(&win->drawable, win->background.pixmap);
+ }
+
+ if (mask & CWBorderPixmap && win->borderIsPixel == FALSE) {
+ DBG(("%s: flushing border pixmap\n", __FUNCTION__));
+ sna_validate_pixmap(&win->drawable, win->border.pixmap);
+ }
+
+ return fbChangeWindowAttributes(win, mask);
+}
+
+static void
+sna_add_traps(PicturePtr picture, INT16 x, INT16 y, int n, xTrap *t)
+{
+ DBG(("%s (%d, %d) x %d\n", __FUNCTION__, x, y, n));
+
+ sna_drawable_move_to_cpu(picture->pDrawable, true);
+
+ fbAddTraps(picture, x, y, n, t);
+}
+
+static void
+sna_accel_flush_callback(CallbackListPtr *list,
+ pointer user_data, pointer call_data)
+{
+ struct sna *sna = user_data;
+
+ if (sna->kgem.flush == 0 && list_is_empty(&sna->dirty_pixmaps))
+ return;
+
+ DBG(("%s\n", __FUNCTION__));
+
+ /* flush any pending damage from shadow copies to tfp clients */
+ while (!list_is_empty(&sna->dirty_pixmaps)) {
+ struct sna_pixmap *priv = list_first_entry(&sna->dirty_pixmaps,
+ struct sna_pixmap,
+ list);
+ sna_pixmap_move_to_gpu(priv->pixmap);
+ }
+
+ kgem_submit(&sna->kgem);
+}
+
+static void sna_deferred_free(struct sna *sna)
+{
+ struct sna_pixmap *priv, *next;
+
+ list_for_each_entry_safe(priv, next, &sna->deferred_free, list) {
+ if (priv->cpu_bo->gpu)
+ continue;
+
+ list_del(&priv->list);
+ kgem_bo_destroy(&sna->kgem, priv->cpu_bo);
+ fbDestroyPixmap(priv->pixmap);
+ free(priv);
+ }
+}
+
+static uint64_t read_timer(int fd)
+{
+ uint64_t count = 0;
+ int ret = read(fd, &count, sizeof(count));
+ return count;
+ (void)ret;
+}
+
+static struct sna_pixmap *sna_accel_scanout(struct sna *sna)
+{
+ PixmapPtr front = sna->shadow ? sna->shadow : sna->front;
+ struct sna_pixmap *priv = sna_pixmap(front);
+ return priv && priv->gpu_bo ? priv : NULL;
+}
+
+#if HAVE_SYS_TIMERFD_H
+#include <sys/timerfd.h>
+#include <errno.h>
+
+static void _sna_accel_disarm_timer(struct sna *sna, int id)
+{
+ struct itimerspec to;
+
+ DBG(("%s[%d] (time=%ld)\n", __FUNCTION__, id, (long)GetTimeInMillis()));
+
+ memset(&to, 0, sizeof(to));
+ timerfd_settime(sna->timer[id], 0, &to, NULL);
+ sna->timer_active &= ~(1<<id);
+}
+
+#define return_if_timer_active(id) do { \
+ if (sna->timer_active & (1<<(id))) \
+ return read_timer(sna->timer[id]) > 0; \
+} while (0)
+
+static Bool sna_accel_do_flush(struct sna *sna)
+{
+ struct itimerspec to;
+ struct sna_pixmap *priv;
+
+ return_if_timer_active(FLUSH_TIMER);
+
+ priv = sna_accel_scanout(sna);
+ if (priv == NULL)
+ return FALSE;
+
+ if (priv->cpu_damage == NULL && priv->gpu_bo->rq == NULL)
+ return FALSE;
+
+ if (sna->timer[FLUSH_TIMER] == -1)
+ return TRUE;
+
+ DBG(("%s, time=%ld\n", __FUNCTION__, (long)GetTimeInMillis()));
+
+ /* Initial redraw after 10ms. */
+ to.it_value.tv_sec = 0;
+ to.it_value.tv_nsec = 10 * 1000 * 1000;
+
+ /* Then periodic updates at 50Hz.*/
+ to.it_interval.tv_sec = 0;
+ to.it_interval.tv_nsec = 20 * 1000 * 1000;
+ timerfd_settime(sna->timer[FLUSH_TIMER], 0, &to, NULL);
+
+ sna->timer_active |= 1 << FLUSH_TIMER;
+ return FALSE;
+}
+
+static Bool sna_accel_do_expire(struct sna *sna)
+{
+ struct itimerspec to;
+
+ return_if_timer_active(EXPIRE_TIMER);
+
+ if (!kgem_needs_expire(&sna->kgem))
+ return FALSE;
+
+ if (sna->timer[EXPIRE_TIMER] == -1)
+ return TRUE;
+
+ /* Initial expiration after 5s. */
+ to.it_value.tv_sec = 5;
+ to.it_value.tv_nsec = 0;
+
+ /* Then periodic update every 1s.*/
+ to.it_interval.tv_sec = 1;
+ to.it_interval.tv_nsec = 0;
+ timerfd_settime(sna->timer[EXPIRE_TIMER], 0, &to, NULL);
+
+ sna->timer_active |= 1 << EXPIRE_TIMER;
+ return FALSE;
+}
+
+static void sna_accel_create_timers(struct sna *sna)
+{
+ int id;
+
+ for (id = 0; id < NUM_TIMERS; id++)
+ sna->timer[id] = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
+}
+#else
+static void sna_accel_create_timers(struct sna *sna)
+{
+ int id;
+
+ for (id = 0; id < NUM_TIMERS; id++)
+ sna->timer[id] = -1;
+}
+static Bool sna_accel_do_flush(struct sna *sna) { return sna_accel_scanout(sna) != NULL; }
+static Bool sna_accel_arm_expire(struct sna *sna) { return TRUE; }
+static void _sna_accel_disarm_timer(struct sna *sna, int id) { }
+#endif
+
+static void sna_accel_flush(struct sna *sna)
+{
+ struct sna_pixmap *priv = sna_accel_scanout(sna);
+
+ DBG(("%s (time=%ld)\n", __FUNCTION__, (long)GetTimeInMillis()));
+
+ sna_pixmap_move_to_gpu(priv->pixmap);
+ kgem_bo_flush(&sna->kgem, priv->gpu_bo);
+
+ if (priv->gpu_bo->rq == NULL)
+ _sna_accel_disarm_timer(sna, FLUSH_TIMER);
+}
+
+static void sna_accel_expire(struct sna *sna)
+{
+ DBG(("%s (time=%ld)\n", __FUNCTION__, (long)GetTimeInMillis()));
+
+ if (!kgem_expire_cache(&sna->kgem))
+ _sna_accel_disarm_timer(sna, EXPIRE_TIMER);
+}
+
+static void sna_accel_install_timers(struct sna *sna)
+{
+ if (sna->timer[FLUSH_TIMER] != -1)
+ AddGeneralSocket(sna->timer[FLUSH_TIMER]);
+
+ if (sna->timer[EXPIRE_TIMER] != -1)
+ AddGeneralSocket(sna->timer[EXPIRE_TIMER]);
+}
+
+Bool sna_accel_pre_init(struct sna *sna)
+{
+ sna_accel_create_timers(sna);
+ return TRUE;
+}
+
+Bool sna_accel_init(ScreenPtr screen, struct sna *sna)
+{
+ const char *backend;
+
+ if (!dixRegisterPrivateKey(&sna_pixmap_index, PRIVATE_PIXMAP, 0))
+ return FALSE;
+
+ if (!AddCallback(&FlushCallback, sna_accel_flush_callback, sna))
+ return FALSE;
+
+ if (!sna_glyphs_init(screen))
+ return FALSE;
+
+ list_init(&sna->dirty_pixmaps);
+ list_init(&sna->deferred_free);
+
+ sna_accel_install_timers(sna);
+
+ screen->CreateGC = sna_create_gc;
+ screen->GetImage = sna_get_image;
+ screen->GetSpans = sna_get_spans;
+ screen->CopyWindow = sna_copy_window;
+ screen->ChangeWindowAttributes = sna_change_window_attributes;
+ screen->CreatePixmap = sna_create_pixmap;
+ screen->DestroyPixmap = sna_destroy_pixmap;
+
+#ifdef RENDER
+ {
+ PictureScreenPtr ps = GetPictureScreenIfSet(screen);
+ if (ps) {
+ ps->Composite = sna_composite;
+ ps->CompositeRects = sna_composite_rectangles;
+ ps->Glyphs = sna_glyphs;
+ ps->UnrealizeGlyph = sna_glyph_unrealize;
+ ps->AddTraps = sna_add_traps;
+ ps->Trapezoids = sna_composite_trapezoids;
+ }
+ }
+#endif
+
+ backend = "no";
+ sna->have_render = false;
+ no_render_init(sna);
+
+#if !DEBUG_NO_RENDER
+ if (sna->chipset.info->gen >= 70) {
+ } else if (sna->chipset.info->gen >= 60) {
+ if ((sna->have_render = gen6_render_init(sna)))
+ backend = "Sandybridge";
+ } else if (sna->chipset.info->gen >= 50) {
+ if ((sna->have_render = gen5_render_init(sna)))
+ backend = "Ironlake";
+ } else if (sna->chipset.info->gen >= 40) {
+ if ((sna->have_render = gen4_render_init(sna)))
+ backend = "Broadwater";
+ } else if (sna->chipset.info->gen >= 30) {
+ if ((sna->have_render = gen3_render_init(sna)))
+ backend = "gen3";
+ } else if (sna->chipset.info->gen >= 20) {
+ if ((sna->have_render = gen2_render_init(sna)))
+ backend = "gen2";
+ }
+#endif
+ DBG(("%s(backend=%s, have_render=%d)\n",
+ __FUNCTION__, backend, sna->have_render));
+
+ xf86DrvMsg(sna->scrn->scrnIndex, X_INFO,
+ "SNA initialized with %s backend\n",
+ backend);
+
+ return TRUE;
+}
+
+Bool sna_accel_create(struct sna *sna)
+{
+ if (!sna_glyphs_create(sna))
+ return FALSE;
+
+ if (!sna_gradients_create(sna))
+ return FALSE;
+
+ return TRUE;
+}
+
+void sna_accel_close(struct sna *sna)
+{
+ sna_glyphs_close(sna);
+ sna_gradients_close(sna);
+
+ DeleteCallback(&FlushCallback, sna_accel_flush_callback, sna);
+}
+
+static void sna_accel_throttle(struct sna *sna)
+{
+ if (sna->flags & SNA_NO_THROTTLE)
+ return;
+
+ if (list_is_empty(&sna->kgem.requests))
+ return;
+
+ DBG(("%s (time=%ld)\n", __FUNCTION__, (long)GetTimeInMillis()));
+
+ kgem_throttle(&sna->kgem);
+}
+
+void sna_accel_block_handler(struct sna *sna)
+{
+ if (sna_accel_do_flush(sna))
+ sna_accel_flush(sna);
+
+ if (sna_accel_do_expire(sna))
+ sna_accel_expire(sna);
+
+ sna_accel_throttle(sna);
+}
+
+void sna_accel_wakeup_handler(struct sna *sna)
+{
+ _kgem_retire(&sna->kgem);
+ sna_deferred_free(sna);
+
+ if (sna->kgem.need_purge)
+ kgem_expire_cache(&sna->kgem);
+}
+
+void sna_accel_free(struct sna *sna)
+{
+ int id;
+
+ for (id = 0; id < NUM_TIMERS; id++)
+ if (sna->timer[id] != -1) {
+ close(sna->timer[id]);
+ sna->timer[id] = -1;
+ }
+}
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
new file mode 100644
index 00000000..cdcfc3b0
--- /dev/null
+++ b/src/sna/sna_blt.c
@@ -0,0 +1,1339 @@
+/*
+ * Based on code from intel_uxa.c and i830_xaa.c
+ * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+ * Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org>
+ * Copyright (c) 2009-2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_render.h"
+#include "sna_reg.h"
+
+#include <mipict.h>
+#include <fbpict.h>
+#include <xaarop.h>
+
+#if DEBUG_BLT
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+#define NO_BLT_COMPOSITE 0
+#define NO_BLT_COPY 0
+#define NO_BLT_COPY_BOXES 0
+#define NO_BLT_FILL 0
+#define NO_BLT_FILL_BOXES 0
+
+static const uint8_t copy_ROP[] = {
+ ROP_0, /* GXclear */
+ ROP_DSa, /* GXand */
+ ROP_SDna, /* GXandReverse */
+ ROP_S, /* GXcopy */
+ ROP_DSna, /* GXandInverted */
+ ROP_D, /* GXnoop */
+ ROP_DSx, /* GXxor */
+ ROP_DSo, /* GXor */
+ ROP_DSon, /* GXnor */
+ ROP_DSxn, /* GXequiv */
+ ROP_Dn, /* GXinvert */
+ ROP_SDno, /* GXorReverse */
+ ROP_Sn, /* GXcopyInverted */
+ ROP_DSno, /* GXorInverted */
+ ROP_DSan, /* GXnand */
+ ROP_1 /* GXset */
+};
+
+static const uint8_t fill_ROP[] = {
+ ROP_0,
+ ROP_DPa,
+ ROP_PDna,
+ ROP_P,
+ ROP_DPna,
+ ROP_D,
+ ROP_DPx,
+ ROP_DPo,
+ ROP_DPon,
+ ROP_PDxn,
+ ROP_Dn,
+ ROP_PDno,
+ ROP_Pn,
+ ROP_DPno,
+ ROP_DPan,
+ ROP_1
+};
+
+static void nop_done(struct sna *sna, const struct sna_composite_op *op)
+{
+}
+
+static void blt_done(struct sna *sna, const struct sna_composite_op *op)
+{
+ struct kgem *kgem = &sna->kgem;
+
+ DBG(("%s: nbatch=%d\n", __FUNCTION__, kgem->nbatch));
+ _kgem_set_mode(kgem, KGEM_BLT);
+}
+
+static bool sna_blt_fill_init(struct sna *sna,
+ struct sna_blt_state *blt,
+ struct kgem_bo *bo,
+ int bpp,
+ uint8_t alu,
+ uint32_t pixel)
+{
+ struct kgem *kgem = &sna->kgem;
+ int pitch;
+
+
+ blt->bo[0] = bo;
+
+ blt->cmd = XY_COLOR_BLT_CMD;
+ if (bpp == 32)
+ blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+
+ pitch = bo->pitch;
+ if (kgem->gen >= 40 && bo->tiling) {
+ blt->cmd |= BLT_DST_TILED;
+ pitch >>= 2;
+ }
+ if (pitch > MAXSHORT)
+ return FALSE;
+
+ blt->overwrites = alu == GXcopy || alu == GXclear;
+ blt->br13 = (fill_ROP[alu] << 16) | pitch;
+ switch (bpp) {
+ default: assert(0);
+ case 32: blt->br13 |= 1 << 25; /* RGB8888 */
+ case 16: blt->br13 |= 1 << 24; /* RGB565 */
+ case 8: break;
+ }
+
+ blt->pixel = pixel;
+
+ kgem_set_mode(kgem, KGEM_BLT);
+ if (!kgem_check_bo_fenced(kgem, bo, NULL))
+ _kgem_submit(kgem);
+
+ return TRUE;
+}
+
+static void sna_blt_fill_one(struct sna *sna,
+ const struct sna_blt_state *blt,
+ int x, int y,
+ int width, int height)
+{
+ struct kgem *kgem = &sna->kgem;
+ uint32_t *b;
+
+ DBG(("%s: (%d, %d) x (%d, %d): %08x\n",
+ __FUNCTION__, x, y, width, height, blt->pixel));
+
+ assert(x >= 0);
+ assert(y >= 0);
+ assert((y+height) * blt->bo[0]->pitch <= blt->bo[0]->size);
+
+ /* All too frequently one blt completely overwrites the previous */
+ if (kgem->nbatch >= 6 &&
+ blt->overwrites &&
+ kgem->batch[kgem->nbatch-6] == blt->cmd &&
+ kgem->batch[kgem->nbatch-4] == (y << 16 | x) &&
+ kgem->batch[kgem->nbatch-3] == ((y+height) << 16 | (x+width)) &&
+ kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[0]->handle) {
+ DBG(("%s: replacing last fill\n", __FUNCTION__));
+ kgem->batch[kgem->nbatch-5] = blt->br13;
+ kgem->batch[kgem->nbatch-1] = blt->pixel;
+ return;
+ }
+
+ if (!kgem_check_batch(kgem, 6) ||
+ kgem->nreloc + 1 > KGEM_RELOC_SIZE(kgem))
+ _kgem_submit(kgem);
+
+ b = kgem->batch + kgem->nbatch;
+ b[0] = blt->cmd;
+ b[1] = blt->br13;
+ b[2] = (y << 16) | x;
+ b[3] = ((y + height) << 16) | (x + width);
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4,
+ blt->bo[0],
+ I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER | KGEM_RELOC_FENCED,
+ 0);
+ b[5] = blt->pixel;
+ kgem->nbatch += 6;
+}
+
+static Bool sna_blt_copy_init(struct sna *sna,
+ struct sna_blt_state *blt,
+ struct kgem_bo *src,
+ struct kgem_bo *dst,
+ int bpp,
+ uint8_t alu)
+{
+ struct kgem *kgem = &sna->kgem;
+
+ blt->bo[0] = src;
+ blt->bo[1] = dst;
+
+ blt->cmd = XY_SRC_COPY_BLT_CMD;
+ if (bpp == 32)
+ blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+
+ blt->pitch[0] = src->pitch;
+ if (kgem->gen >= 40 && src->tiling) {
+ blt->cmd |= BLT_SRC_TILED;
+ blt->pitch[0] >>= 2;
+ }
+ if (blt->pitch[0] > MAXSHORT)
+ return FALSE;
+
+ blt->pitch[1] = dst->pitch;
+ if (kgem->gen >= 40 && dst->tiling) {
+ blt->cmd |= BLT_DST_TILED;
+ blt->pitch[1] >>= 2;
+ }
+ if (blt->pitch[1] > MAXSHORT)
+ return FALSE;
+
+ blt->overwrites = alu == GXcopy || alu == GXclear;
+ blt->br13 = (copy_ROP[alu] << 16) | blt->pitch[1];
+ switch (bpp) {
+ default: assert(0);
+ case 32: blt->br13 |= 1 << 25; /* RGB8888 */
+ case 16: blt->br13 |= 1 << 24; /* RGB565 */
+ case 8: break;
+ }
+
+ kgem_set_mode(kgem, KGEM_BLT);
+ if (!kgem_check_bo_fenced(kgem, src, dst, NULL))
+ _kgem_submit(kgem);
+
+ return TRUE;
+}
+
+static void sna_blt_copy_one(struct sna *sna,
+ const struct sna_blt_state *blt,
+ int src_x, int src_y,
+ int width, int height,
+ int dst_x, int dst_y)
+{
+ struct kgem *kgem = &sna->kgem;
+ uint32_t *b;
+
+ DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
+ __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
+
+ assert(src_x >= 0);
+ assert(src_y >= 0);
+ assert((src_y + height) * blt->bo[0]->pitch <= blt->bo[0]->size);
+ assert(dst_x >= 0);
+ assert(dst_y >= 0);
+ assert((dst_y + height) * blt->bo[1]->pitch <= blt->bo[1]->size);
+ assert(width > 0);
+ assert(height > 0);
+
+ /* Compare against a previous fill */
+ if (kgem->nbatch >= 6 &&
+ blt->overwrites &&
+ kgem->batch[kgem->nbatch-6] == ((blt->cmd & ~XY_SRC_COPY_BLT_CMD) | XY_COLOR_BLT_CMD) &&
+ kgem->batch[kgem->nbatch-4] == (dst_y << 16 | dst_x) &&
+ kgem->batch[kgem->nbatch-3] == ((dst_y+height) << 16 | (dst_x+width)) &&
+ kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->handle) {
+ DBG(("%s: replacing last fill\n", __FUNCTION__));
+ b = kgem->batch + kgem->nbatch - 6;
+ b[0] = blt->cmd;
+ b[1] = blt->br13;
+ b[5] = (src_y << 16) | src_x;
+ b[6] = blt->pitch[0];
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7 - 6,
+ blt->bo[0],
+ I915_GEM_DOMAIN_RENDER << 16 | KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 8 - 6;
+ return;
+ }
+
+ if (kgem->nbatch + 8 > KGEM_BATCH_SIZE(kgem) ||
+ kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem))
+ _kgem_submit(kgem);
+
+ b = kgem->batch + kgem->nbatch;
+ b[0] = blt->cmd;
+ b[1] = blt->br13;
+ b[2] = (dst_y << 16) | dst_x;
+ b[3] = ((dst_y + height) << 16) | (dst_x + width);
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4,
+ blt->bo[1],
+ I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER | KGEM_RELOC_FENCED,
+ 0);
+ b[5] = (src_y << 16) | src_x;
+ b[6] = blt->pitch[0];
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7,
+ blt->bo[0],
+ I915_GEM_DOMAIN_RENDER << 16 | KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 8;
+}
+
+static Bool
+get_rgba_from_pixel(uint32_t pixel,
+ uint16_t *red,
+ uint16_t *green,
+ uint16_t *blue,
+ uint16_t *alpha,
+ uint32_t format)
+{
+ int rbits, bbits, gbits, abits;
+ int rshift, bshift, gshift, ashift;
+
+ rbits = PICT_FORMAT_R(format);
+ gbits = PICT_FORMAT_G(format);
+ bbits = PICT_FORMAT_B(format);
+ abits = PICT_FORMAT_A(format);
+
+ if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) {
+ rshift = gshift = bshift = ashift = 0;
+ } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
+ bshift = 0;
+ gshift = bbits;
+ rshift = gshift + gbits;
+ ashift = rshift + rbits;
+ } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
+ rshift = 0;
+ gshift = rbits;
+ bshift = gshift + gbits;
+ ashift = bshift + bbits;
+ } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
+ ashift = 0;
+ rshift = abits;
+ if (abits == 0)
+ rshift = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits);
+ gshift = rshift + rbits;
+ bshift = gshift + gbits;
+ } else {
+ return FALSE;
+ }
+
+ if (rbits) {
+ *red = ((pixel >> rshift) & ((1 << rbits) - 1)) << (16 - rbits);
+ while (rbits < 16) {
+ *red |= *red >> rbits;
+ rbits <<= 1;
+ }
+ } else
+ *red = 0;
+
+ if (gbits) {
+ *green = ((pixel >> gshift) & ((1 << gbits) - 1)) << (16 - gbits);
+ while (gbits < 16) {
+ *green |= *green >> gbits;
+ gbits <<= 1;
+ }
+ } else
+ *green = 0;
+
+ if (bbits) {
+ *blue = ((pixel >> bshift) & ((1 << bbits) - 1)) << (16 - bbits);
+ while (bbits < 16) {
+ *blue |= *blue >> bbits;
+ bbits <<= 1;
+ }
+ } else
+ *blue = 0;
+
+ if (abits) {
+ *alpha = ((pixel >> ashift) & ((1 << abits) - 1)) << (16 - abits);
+ while (abits < 16) {
+ *alpha |= *alpha >> abits;
+ abits <<= 1;
+ }
+ } else
+ *alpha = 0xffff;
+
+ return TRUE;
+}
+
+Bool
+sna_get_pixel_from_rgba(uint32_t * pixel,
+ uint16_t red,
+ uint16_t green,
+ uint16_t blue,
+ uint16_t alpha,
+ uint32_t format)
+{
+ int rbits, bbits, gbits, abits;
+ int rshift, bshift, gshift, ashift;
+
+ rbits = PICT_FORMAT_R(format);
+ gbits = PICT_FORMAT_G(format);
+ bbits = PICT_FORMAT_B(format);
+ abits = PICT_FORMAT_A(format);
+ if (abits == 0)
+ abits = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits);
+
+ if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) {
+ *pixel = alpha >> (16 - abits);
+ return TRUE;
+ }
+
+ if (!PICT_FORMAT_COLOR(format))
+ return FALSE;
+
+ if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) {
+ bshift = 0;
+ gshift = bbits;
+ rshift = gshift + gbits;
+ ashift = rshift + rbits;
+ } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
+ rshift = 0;
+ gshift = rbits;
+ bshift = gshift + gbits;
+ ashift = bshift + bbits;
+ } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
+ ashift = 0;
+ rshift = abits;
+ gshift = rshift + rbits;
+ bshift = gshift + gbits;
+ } else
+ return FALSE;
+
+ *pixel = 0;
+ *pixel |= (blue >> (16 - bbits)) << bshift;
+ *pixel |= (green >> (16 - gbits)) << gshift;
+ *pixel |= (red >> (16 - rbits)) << rshift;
+ *pixel |= (alpha >> (16 - abits)) << ashift;
+
+ return TRUE;
+}
+
+static uint32_t
+color_convert(uint32_t pixel,
+ uint32_t src_format,
+ uint32_t dst_format)
+{
+ DBG(("%s: src=%08x [%08x]\n", __FUNCTION__, pixel, src_format));
+
+ if (src_format != dst_format) {
+ uint16_t red, green, blue, alpha;
+
+ if (!get_rgba_from_pixel(pixel,
+ &red, &green, &blue, &alpha,
+ src_format))
+ return 0;
+
+ if (!sna_get_pixel_from_rgba(&pixel,
+ red, green, blue, alpha,
+ dst_format))
+ return 0;
+ }
+
+ DBG(("%s: dst=%08x [%08x]\n", __FUNCTION__, pixel, dst_format));
+ return pixel;
+}
+
+uint32_t
+sna_rgba_for_color(uint32_t color, int depth)
+{
+ return color_convert(color, sna_format_for_depth(depth), PICT_a8r8g8b8);
+}
+
+static uint32_t
+get_pixel(PicturePtr picture)
+{
+ PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable);
+
+ DBG(("%s: %p\n", __FUNCTION__, pixmap));
+
+ sna_pixmap_move_to_cpu(pixmap, false);
+ switch (pixmap->drawable.bitsPerPixel) {
+ case 32: return *(uint32_t *)pixmap->devPrivate.ptr;
+ case 16: return *(uint16_t *)pixmap->devPrivate.ptr;
+ default: return *(uint8_t *)pixmap->devPrivate.ptr;
+ }
+}
+
+static uint32_t
+get_solid_color(PicturePtr picture, uint32_t format)
+{
+ if (picture->pSourcePict) {
+ PictSolidFill *fill = (PictSolidFill *)picture->pSourcePict;
+ return color_convert(fill->color, PICT_a8r8g8b8, format);
+ } else
+ return color_convert(get_pixel(picture), picture->format, format);
+}
+
+static Bool
+is_solid(PicturePtr picture)
+{
+ if (picture->pSourcePict) {
+ if (picture->pSourcePict->type == SourcePictTypeSolidFill)
+ return TRUE;
+ }
+
+ if (picture->pDrawable) {
+ if (picture->pDrawable->width == 1 &&
+ picture->pDrawable->height == 1 &&
+ picture->repeat)
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+Bool
+sna_picture_is_solid(PicturePtr picture, uint32_t *color)
+{
+ if (!is_solid(picture))
+ return FALSE;
+
+ *color = get_solid_color(picture, PICT_a8r8g8b8);
+ return TRUE;
+}
+
+static Bool
+pixel_is_opaque(uint32_t pixel, uint32_t format)
+{
+ int abits;
+
+ abits = PICT_FORMAT_A(format);
+ if (!abits)
+ return TRUE;
+
+ if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A ||
+ PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) {
+ return (pixel & ((1 << abits) - 1)) == ((1 << abits) - 1);
+ } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB ||
+ PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) {
+ int ashift = PICT_FORMAT_BPP(format) - abits;
+ return (pixel >> ashift) == ((1 << abits) - 1);
+ } else
+ return FALSE;
+}
+
+static Bool
+is_opaque_solid(PicturePtr picture)
+{
+ if (picture->pSourcePict) {
+ PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict;
+ return (fill->color >> 24) == 0xff;
+ } else
+ return pixel_is_opaque(get_pixel(picture), picture->format);
+}
+
+fastcall
+static void blt_fill_composite(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ int x1, x2, y1, y2;
+
+ x1 = r->dst.x + op->dst.x;
+ y1 = r->dst.y + op->dst.y;
+ x2 = x1 + r->width;
+ y2 = y1 + r->height;
+
+ if (x1 < 0)
+ x1 = 0;
+ if (y1 < 0)
+ y1 = 0;
+
+ if (x2 > op->dst.width)
+ x2 = op->dst.width;
+ if (y2 > op->dst.height)
+ y2 = op->dst.height;
+
+ if (x2 <= x1 || y2 <= y1)
+ return;
+
+ sna_blt_fill_one(sna, &op->u.blt, x1, y1, x2-x1, y2-y1);
+}
+
+static void blt_fill_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int n)
+{
+ do {
+ sna_blt_fill_one(sna, &op->u.blt,
+ box->x1 + op->dst.x, box->y1 + op->dst.y,
+ box->x2 - box->x1, box->y2 - box->y1);
+ box++;
+ } while (--n);
+}
+
+static Bool
+prepare_blt_clear(struct sna *sna,
+ struct sna_composite_op *op)
+{
+ DBG(("%s\n", __FUNCTION__));
+
+ op->blt = blt_fill_composite;
+ op->boxes = blt_fill_composite_boxes;
+ op->done = blt_done;
+
+ return sna_blt_fill_init(sna, &op->u.blt,
+ op->dst.bo,
+ op->dst.pixmap->drawable.bitsPerPixel,
+ GXclear, 0);
+}
+
+static bool
+prepare_blt_fill(struct sna *sna,
+ struct sna_composite_op *op,
+ PicturePtr source)
+{
+ DBG(("%s\n", __FUNCTION__));
+
+ op->blt = blt_fill_composite;
+ op->boxes = blt_fill_composite_boxes;
+ op->done = blt_done;
+
+ return sna_blt_fill_init(sna, &op->u.blt, op->dst.bo,
+ op->dst.pixmap->drawable.bitsPerPixel,
+ GXcopy,
+ get_solid_color(source, op->dst.format));
+}
+
+fastcall static void
+blt_copy_composite(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ int x1, x2, y1, y2;
+ int src_x, src_y;
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
+ __FUNCTION__,
+ r->src.x, r->src.y,
+ r->dst.x, r->dst.y,
+ r->width, r->height));
+
+ /* XXX higher layer should have clipped? */
+
+ x1 = r->dst.x + op->dst.x;
+ y1 = r->dst.y + op->dst.y;
+ x2 = x1 + r->width;
+ y2 = y1 + r->height;
+
+ src_x = r->src.x - x1;
+ src_y = r->src.y - y1;
+
+ /* clip against dst */
+ if (x1 < 0)
+ x1 = 0;
+ if (y1 < 0)
+ y1 = 0;
+
+ if (x2 > op->dst.width)
+ x2 = op->dst.width;
+
+ if (y2 > op->dst.height)
+ y2 = op->dst.height;
+
+ DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
+
+ if (x2 <= x1 || y2 <= y1)
+ return;
+
+ sna_blt_copy_one(sna, &op->u.blt,
+ x1 + src_x, y1 + src_y,
+ x2 - x1, y2 - y1,
+ x1, y1);
+}
+
+static void blt_copy_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+ do {
+ DBG(("%s: box (%d, %d), (%d, %d)\n",
+ __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
+ sna_blt_copy_one(sna, &op->u.blt,
+ box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
+ box->x2 - box->x1, box->y2 - box->y1,
+ box->x1 + op->dst.x, box->y1 + op->dst.y);
+ box++;
+ } while(--nbox);
+}
+
+static Bool
+prepare_blt_copy(struct sna *sna,
+ struct sna_composite_op *op)
+{
+ PixmapPtr src = op->u.blt.src_pixmap;
+ struct sna_pixmap *priv = sna_pixmap(src);
+
+ if (priv->gpu_bo->tiling == I915_TILING_Y)
+ return FALSE;
+
+ if (!kgem_check_bo_fenced(&sna->kgem, priv->gpu_bo, NULL))
+ _kgem_submit(&sna->kgem);
+
+ DBG(("%s\n", __FUNCTION__));
+
+ op->blt = blt_copy_composite;
+ op->boxes = blt_copy_composite_boxes;
+ op->done = blt_done;
+
+ return sna_blt_copy_init(sna, &op->u.blt,
+ priv->gpu_bo,
+ op->dst.bo,
+ src->drawable.bitsPerPixel,
+ GXcopy);
+}
+
+static void blt_vmap_done(struct sna *sna, const struct sna_composite_op *op)
+{
+ struct kgem_bo *bo = (struct kgem_bo *)op->u.blt.src_pixmap;
+
+ blt_done(sna, op);
+ if (bo) {
+ struct kgem *kgem = &sna->kgem;
+ kgem_bo_sync(kgem, bo, true);
+ kgem_bo_destroy(kgem, bo);
+ }
+}
+
+fastcall static void
+blt_put_composite(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ PixmapPtr dst = op->dst.pixmap;
+ PixmapPtr src = op->u.blt.src_pixmap;
+ struct sna_pixmap *dst_priv = sna_pixmap(dst);
+ int pitch = src->devKind;
+ char *data = src->devPrivate.ptr;
+ int bpp = src->drawable.bitsPerPixel;
+
+ int16_t dst_x = r->dst.x + op->dst.x;
+ int16_t dst_y = r->dst.y + op->dst.y;
+ int16_t src_x = r->src.x + op->u.blt.sx;
+ int16_t src_y = r->src.y + op->u.blt.sy;
+
+ if (!dst_priv->pinned &&
+ dst_x <= 0 && dst_y <= 0 &&
+ dst_x + r->width >= op->dst.width &&
+ dst_y + r->height >= op->dst.height) {
+ data += (src_x - dst_x) * bpp / 8;
+ data += (src_y - dst_y) * pitch;
+
+ dst_priv->gpu_bo =
+ sna_replace(sna, dst_priv->gpu_bo,
+ r->width, r->height, bpp,
+ data, pitch);
+ } else {
+ BoxRec box;
+
+ box.x1 = dst_x;
+ box.y1 = dst_y;
+ box.x2 = dst_x + r->width;
+ box.y2 = dst_y + r->height;
+
+ sna_write_boxes(sna,
+ dst_priv->gpu_bo, 0, 0,
+ data, pitch, bpp, src_x, src_y,
+ &box, 1);
+ }
+}
+
+static void blt_put_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int n)
+{
+ PixmapPtr src = op->u.blt.src_pixmap;
+ struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d) x %d\n", __FUNCTION__,
+ op->u.blt.sx, op->u.blt.sy,
+ op->dst.x, op->dst.y, n));
+
+ if (n == 1 && !dst_priv->pinned &&
+ box->x2 - box->x1 == op->dst.width &&
+ box->y2 - box->y1 == op->dst.height) {
+ int pitch = src->devKind;
+ int bpp = src->drawable.bitsPerPixel / 8;
+ char *data = src->devPrivate.ptr;
+
+ data += (box->y1 + op->u.blt.sy) * pitch;
+ data += (box->x1 + op->u.blt.sx) * bpp;
+
+ dst_priv->gpu_bo =
+ sna_replace(sna,
+ op->dst.bo,
+ op->dst.width,
+ op->dst.height,
+ src->drawable.bitsPerPixel,
+ data, pitch);
+ } else {
+ sna_write_boxes(sna,
+ op->dst.bo, op->dst.x, op->dst.y,
+ src->devPrivate.ptr,
+ src->devKind,
+ src->drawable.bitsPerPixel,
+ op->u.blt.sx, op->u.blt.sy,
+ box, n);
+ }
+}
+
+static Bool
+prepare_blt_put(struct sna *sna,
+ struct sna_composite_op *op)
+{
+ PixmapPtr src = op->u.blt.src_pixmap;
+ struct sna_pixmap *priv = sna_pixmap(src);
+ struct kgem_bo *src_bo = NULL;
+ struct kgem_bo *free_bo = NULL;
+
+ DBG(("%s\n", __FUNCTION__));
+
+ if (priv) {
+ if (!priv->gpu_only) {
+ src_bo = priv->cpu_bo;
+ if (!src_bo) {
+ src_bo = kgem_create_map(&sna->kgem,
+ src->devPrivate.ptr,
+ pixmap_size(src),
+ 1);
+ priv->cpu_bo = src_bo;
+ }
+ }
+ } else {
+ src_bo = kgem_create_map(&sna->kgem,
+ src->devPrivate.ptr,
+ pixmap_size(src),
+ 0);
+ free_bo = src_bo;
+ }
+ if (src_bo) {
+ op->blt = blt_copy_composite;
+ op->boxes = blt_copy_composite_boxes;
+
+ op->u.blt.src_pixmap = (void *)free_bo;
+ op->done = blt_vmap_done;
+
+ src_bo->pitch = src->devKind;
+ if (!sna_blt_copy_init(sna, &op->u.blt,
+ src_bo, op->dst.bo,
+ op->dst.pixmap->drawable.bitsPerPixel,
+ GXcopy))
+ return FALSE;
+ } else {
+ op->blt = blt_put_composite;
+ op->boxes = blt_put_composite_boxes;
+ op->done = nop_done;
+ }
+
+ return TRUE;
+}
+
+static Bool
+has_gpu_area(PixmapPtr pixmap, int x, int y, int w, int h)
+{
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ BoxRec area;
+
+ if (!priv)
+ return FALSE;
+ if (!priv->gpu_bo)
+ return FALSE;
+
+ if (priv->cpu_damage == NULL)
+ return TRUE;
+
+ area.x1 = x;
+ area.y1 = y;
+ area.x2 = x + w;
+ area.y2 = y + h;
+ return sna_damage_contains_box(priv->cpu_damage,
+ &area) == PIXMAN_REGION_OUT;
+}
+
+static Bool
+has_cpu_area(PixmapPtr pixmap, int x, int y, int w, int h)
+{
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ BoxRec area;
+
+ if (!priv)
+ return TRUE;
+ if (!priv->gpu_bo)
+ return TRUE;
+ if (priv->gpu_only)
+ return FALSE;
+
+ if (priv->gpu_damage == NULL)
+ return TRUE;
+
+ area.x1 = x;
+ area.y1 = y;
+ area.x2 = x + w;
+ area.y2 = y + h;
+ return sna_damage_contains_box(priv->gpu_damage,
+ &area) == PIXMAN_REGION_OUT;
+}
+
+Bool
+sna_blt_composite(struct sna *sna,
+ uint32_t op,
+ PicturePtr src,
+ PicturePtr dst,
+ int16_t x, int16_t y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t width, int16_t height,
+ struct sna_composite_op *tmp)
+{
+ struct sna_blt_state *blt = &tmp->u.blt;
+ PictFormat src_format = src->format;
+ struct sna_pixmap *priv;
+ int16_t tx, ty;
+ Bool ret;
+
+#if DEBUG_NO_BLT || NO_BLT_COMPOSITE
+ return FALSE;
+#endif
+
+ DBG(("%s (%d, %d), (%d, %d), %dx%d\n",
+ __FUNCTION__, x, y, dst_x, dst_y, width, height));
+
+ switch (dst->pDrawable->bitsPerPixel) {
+ case 8:
+ case 16:
+ case 32:
+ break;
+ default:
+ DBG(("%s: unhandled bpp: %d\n", __FUNCTION__,
+ dst->pDrawable->bitsPerPixel));
+ return FALSE;
+ }
+
+ tmp->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
+ priv = sna_pixmap_move_to_gpu(tmp->dst.pixmap);
+ if (priv == NULL || priv->gpu_bo->tiling == I915_TILING_Y) {
+ DBG(("%s: dst not on the gpu or using Y-tiling\n",
+ __FUNCTION__));
+ return FALSE;
+ }
+
+ tmp->dst.format = dst->format;
+ tmp->dst.width = tmp->dst.pixmap->drawable.width;
+ tmp->dst.height = tmp->dst.pixmap->drawable.height;
+ get_drawable_deltas(dst->pDrawable, tmp->dst.pixmap,
+ &tmp->dst.x, &tmp->dst.y);
+ tmp->dst.bo = priv->gpu_bo;
+ if (!priv->gpu_only)
+ tmp->damage = &priv->gpu_damage;
+
+ if (!kgem_check_bo_fenced(&sna->kgem, priv->gpu_bo, NULL))
+ _kgem_submit(&sna->kgem);
+
+ if (op == PictOpClear)
+ return prepare_blt_clear(sna, tmp);
+
+ if (is_solid(src)) {
+ if (op == PictOpOver && is_opaque_solid(src))
+ op = PictOpSrc;
+
+ if (op != PictOpSrc) {
+ DBG(("%s: unsuported op [%d] for blitting\n",
+ __FUNCTION__, op));
+ return FALSE;
+ }
+
+ return prepare_blt_fill(sna, tmp, src);
+ }
+
+ if (!src->pDrawable) {
+ DBG(("%s: unsuported procedural source\n",
+ __FUNCTION__));
+ return FALSE;
+ }
+
+ if (src->pDrawable->bitsPerPixel != dst->pDrawable->bitsPerPixel) {
+ DBG(("%s: mismatching bpp src=%d, dst=%d\n",
+ __FUNCTION__,
+ src->pDrawable->bitsPerPixel,
+ dst->pDrawable->bitsPerPixel));
+ return FALSE;
+ }
+
+ if (op == PictOpOver && PICT_FORMAT_A(src_format) == 0)
+ op = PictOpSrc;
+
+ if (op != PictOpSrc) {
+ DBG(("%s: unsuported op [%d] for blitting\n",
+ __FUNCTION__, op));
+ return FALSE;
+ }
+
+ if (src->filter == PictFilterConvolution) {
+ DBG(("%s: convolutions filters not handled\n",
+ __FUNCTION__));
+ return FALSE;
+ }
+
+ if (!(dst->format == src_format ||
+ dst->format == PICT_FORMAT(PICT_FORMAT_BPP(src_format),
+ PICT_FORMAT_TYPE(src_format),
+ 0,
+ PICT_FORMAT_R(src_format),
+ PICT_FORMAT_G(src_format),
+ PICT_FORMAT_B(src_format)))) {
+ DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
+ __FUNCTION__, (unsigned)src_format, dst->format));
+ return FALSE;
+ }
+
+ if (!sna_transform_is_integer_translation(src->transform, &tx, &ty)) {
+ DBG(("%s: source transform is not an integer translation\n",
+ __FUNCTION__));
+ return FALSE;
+ }
+ x += tx;
+ y += ty;
+
+ /* XXX tiling? */
+ if (x < 0 || y < 0 ||
+ x + width > src->pDrawable->width ||
+ y + height > src->pDrawable->height) {
+ DBG(("%s: source extends outside of valid area\n",
+ __FUNCTION__));
+ return FALSE;
+ }
+
+ blt->src_pixmap = get_drawable_pixmap(src->pDrawable);
+ get_drawable_deltas(src->pDrawable, blt->src_pixmap, &tx, &ty);
+ x += tx + src->pDrawable->x;
+ y += ty + src->pDrawable->y;
+ assert(x >= 0);
+ assert(y >= 0);
+ assert(x + width <= blt->src_pixmap->drawable.width);
+ assert(y + height <= blt->src_pixmap->drawable.height);
+
+ tmp->u.blt.sx = x - dst_x;
+ tmp->u.blt.sy = y - dst_y;
+ DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d)\n",
+ __FUNCTION__,
+ tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy));
+
+ if (has_gpu_area(blt->src_pixmap, x, y, width, height))
+ ret = prepare_blt_copy(sna, tmp);
+ else if (has_cpu_area(blt->src_pixmap, x, y, width, height))
+ ret = prepare_blt_put(sna, tmp);
+ else if (sna_pixmap_move_to_gpu(blt->src_pixmap))
+ ret = prepare_blt_copy(sna, tmp);
+ else
+ ret = prepare_blt_put(sna, tmp);
+
+ return ret;
+}
+
+static void sna_blt_fill_op_blt(struct sna *sna,
+ const struct sna_fill_op *op,
+ int16_t x, int16_t y,
+ int16_t width, int16_t height)
+{
+ sna_blt_fill_one(sna, &op->base.u.blt, x, y, width, height);
+}
+
+static void sna_blt_fill_op_done(struct sna *sna,
+ const struct sna_fill_op *fill)
+{
+ blt_done(sna, &fill->base);
+}
+
+bool sna_blt_fill(struct sna *sna, uint8_t alu,
+ struct kgem_bo *bo, int bpp,
+ uint32_t pixel,
+ struct sna_fill_op *fill)
+{
+#if DEBUG_NO_BLT || NO_BLT_FILL
+ return FALSE;
+#endif
+
+ DBG(("%s(alu=%d, pixel=%x, bpp=%d)\n", __FUNCTION__, alu, pixel, bpp));
+
+ if (bo->tiling == I915_TILING_Y) {
+ DBG(("%s: rejected due to incompatible Y-tiling\n",
+ __FUNCTION__));
+ return FALSE;
+ }
+
+ if (!sna_blt_fill_init(sna, &fill->base.u.blt,
+ bo, bpp, alu, pixel))
+ return FALSE;
+
+ fill->blt = sna_blt_fill_op_blt;
+ fill->done = sna_blt_fill_op_done;
+ return TRUE;
+}
+
+static void sna_blt_copy_op_blt(struct sna *sna,
+ const struct sna_copy_op *op,
+ int16_t src_x, int16_t src_y,
+ int16_t width, int16_t height,
+ int16_t dst_x, int16_t dst_y)
+{
+ sna_blt_copy_one(sna, &op->base.u.blt,
+ src_x, src_y,
+ width, height,
+ dst_x, dst_y);
+}
+
+static void sna_blt_copy_op_done(struct sna *sna,
+ const struct sna_copy_op *op)
+{
+ blt_done(sna, &op->base);
+}
+
+bool sna_blt_copy(struct sna *sna, uint8_t alu,
+ struct kgem_bo *src,
+ struct kgem_bo *dst,
+ int bpp,
+ struct sna_copy_op *op)
+{
+#if DEBUG_NO_BLT || NO_BLT_COPY
+ return FALSE;
+#endif
+
+ if (src->tiling == I915_TILING_Y)
+ return FALSE;
+
+ if (dst->tiling == I915_TILING_Y)
+ return FALSE;
+
+ if (!sna_blt_copy_init(sna, &op->base.u.blt,
+ src, dst,
+ bpp, alu))
+ return FALSE;
+
+ op->blt = sna_blt_copy_op_blt;
+ op->done = sna_blt_copy_op_done;
+ return TRUE;
+}
+
+Bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
+ struct kgem_bo *bo, int bpp,
+ uint32_t color,
+ const BoxRec *box, int nbox)
+{
+ struct kgem *kgem = &sna->kgem;
+ int br13, cmd;
+
+#if DEBUG_NO_BLT || NO_BLT_FILL_BOXES
+ return FALSE;
+#endif
+
+ DBG(("%s (%d, %08x, %d) x %d\n",
+ __FUNCTION__, bpp, color, alu, nbox));
+
+ if (bo->tiling == I915_TILING_Y)
+ return FALSE;
+
+ cmd = XY_COLOR_BLT_CMD;
+ if (bpp == 32)
+ cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+
+ br13 = bo->pitch;
+ if (kgem->gen >= 40 && bo->tiling) {
+ cmd |= BLT_DST_TILED;
+ br13 >>= 2;
+ }
+ if (br13 > MAXSHORT)
+ return FALSE;
+
+ br13 |= fill_ROP[alu] << 16;
+ switch (bpp) {
+ default: assert(0);
+ case 32: br13 |= 1 << 25; /* RGB8888 */
+ case 16: br13 |= 1 << 24; /* RGB565 */
+ case 8: break;
+ }
+
+ kgem_set_mode(kgem, KGEM_BLT);
+ if (!kgem_check_batch(kgem, 6) ||
+ !kgem_check_bo_fenced(kgem, bo, NULL) ||
+ kgem->nreloc + 1 > KGEM_RELOC_SIZE(kgem))
+ _kgem_submit(kgem);
+
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (6*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 6;
+ if (nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = KGEM_RELOC_SIZE(kgem) - kgem->nreloc;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG(("%s: box=((%d, %d), (%d, %d))\n", __FUNCTION__,
+ box->x1, box->y1, box->x2, box->y2));
+
+ assert(box->x1 >= 0);
+ assert(box->y1 >= 0);
+
+ b[0] = cmd;
+ b[1] = br13;
+ b[2] = box->y1 << 16 | box->x1;
+ b[3] = box->y2 << 16 | box->x2;
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4,
+ bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = color;
+ kgem->nbatch += 6;
+ box++;
+ } while (--nbox_this_time);
+
+ if (nbox)
+ _kgem_submit(kgem);
+ } while (nbox);
+
+ _kgem_set_mode(kgem, KGEM_BLT);
+ return TRUE;
+}
+
+Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
+ struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+ struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ int bpp, const BoxRec *box, int nbox)
+{
+ struct kgem *kgem = &sna->kgem;
+ int src_pitch, br13, cmd;
+
+#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES
+ return FALSE;
+#endif
+
+ DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n",
+ __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox,
+ src_bo->tiling, dst_bo->tiling,
+ src_bo->pitch, dst_bo->pitch));
+
+ if (src_bo->tiling == I915_TILING_Y)
+ return FALSE;
+
+ if (dst_bo->tiling == I915_TILING_Y)
+ return FALSE;
+
+ cmd = XY_SRC_COPY_BLT_CMD;
+ if (bpp == 32)
+ cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+
+ src_pitch = src_bo->pitch;
+ if (kgem->gen >= 40 && src_bo->tiling) {
+ cmd |= BLT_SRC_TILED;
+ src_pitch >>= 2;
+ }
+ if (src_pitch > MAXSHORT)
+ return FALSE;
+
+ br13 = dst_bo->pitch;
+ if (kgem->gen >= 40 && dst_bo->tiling) {
+ cmd |= BLT_DST_TILED;
+ br13 >>= 2;
+ }
+ if (br13 > MAXSHORT)
+ return FALSE;
+
+ br13 |= copy_ROP[alu] << 16;
+ switch (bpp) {
+ default: assert(0);
+ case 32: br13 |= 1 << 25; /* RGB8888 */
+ case 16: br13 |= 1 << 24; /* RGB565 */
+ case 8: break;
+ }
+
+ kgem_set_mode(kgem, KGEM_BLT);
+ if (!kgem_check_batch(kgem, 8) ||
+ !kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL) ||
+ kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem))
+ _kgem_submit(kgem);
+
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG((" %s: box=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1));
+
+ assert(box->x1 + src_dx >= 0);
+ assert(box->y1 + src_dy >= 0);
+
+ assert(box->x1 + dst_dx >= 0);
+ assert(box->y1 + dst_dy >= 0);
+
+ b[0] = cmd;
+ b[1] = br13;
+ b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
+ b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4,
+ dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
+ b[6] = src_pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7,
+ src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 8;
+ box++;
+ } while (--nbox_this_time);
+
+ if (nbox)
+ _kgem_submit(kgem);
+ } while (nbox);
+
+ _kgem_set_mode(kgem, KGEM_BLT);
+ return TRUE;
+}
diff --git a/src/sna/sna_composite.c b/src/sna/sna_composite.c
new file mode 100644
index 00000000..27b1ff3c
--- /dev/null
+++ b/src/sna/sna_composite.c
@@ -0,0 +1,722 @@
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_render.h"
+#include "sna_render_inline.h"
+
+#include <mipict.h>
+#include <fbpict.h>
+
+#if DEBUG_COMPOSITE
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+static void dst_move_area_to_cpu(PicturePtr picture,
+ uint8_t op,
+ int x, int y,
+ int width, int height)
+{
+ RegionRec area;
+ BoxRec box;
+
+ DBG(("%s: (%d, %d), (%d %d)\n", __FUNCTION__, x, y, width, height));
+
+ box.x1 = x;
+ box.y1 = y;
+ box.x2 = x + width;
+ box.y2 = y + height;
+ RegionInit(&area, &box, 1);
+ if (picture->pCompositeClip)
+ RegionIntersect(&area, &area, picture->pCompositeClip);
+ sna_drawable_move_region_to_cpu(picture->pDrawable, &area, true);
+ RegionUninit(&area);
+}
+
+#define BOUND(v) (INT16) ((v) < MINSHORT ? MINSHORT : (v) > MAXSHORT ? MAXSHORT : (v))
+
+static inline pixman_bool_t
+clip_to_dst(pixman_region16_t *region,
+ pixman_region16_t *clip,
+ int dx,
+ int dy)
+{
+ DBG(("%s: region: %dx[(%d, %d), (%d, %d)], clip: %dx[(%d, %d), (%d, %d)]\n",
+ __FUNCTION__,
+ pixman_region_n_rects(region),
+ region->extents.x1, region->extents.y1,
+ region->extents.x2, region->extents.y2,
+ pixman_region_n_rects(clip),
+ clip->extents.x1, clip->extents.y1,
+ clip->extents.x2, clip->extents.y2));
+
+ if (pixman_region_n_rects(region) == 1 &&
+ pixman_region_n_rects(clip) == 1) {
+ pixman_box16_t *r = pixman_region_rectangles(region, NULL);
+ pixman_box16_t *c = pixman_region_rectangles(clip, NULL);
+ int v;
+
+ if (r->x1 < (v = c->x1 + dx))
+ r->x1 = BOUND(v);
+ if (r->x2 > (v = c->x2 + dx))
+ r->x2 = BOUND(v);
+ if (r->y1 < (v = c->y1 + dy))
+ r->y1 = BOUND(v);
+ if (r->y2 > (v = c->y2 + dy))
+ r->y2 = BOUND(v);
+
+ if (r->x1 >= r->x2 || r->y1 >= r->y2)
+ pixman_region_init(region);
+ } else if (!pixman_region_not_empty(clip)) {
+ return FALSE;
+ } else {
+ if (dx || dy)
+ pixman_region_translate(region, -dx, -dy);
+ if (!pixman_region_intersect(region, region, clip))
+ return FALSE;
+ if (dx || dy)
+ pixman_region_translate(region, dx, dy);
+ }
+ return pixman_region_not_empty(region);
+}
+
+static inline Bool
+clip_to_src(RegionPtr region, PicturePtr p, int dx, int dy)
+{
+ Bool result;
+
+ if (p->clientClipType == CT_NONE)
+ return TRUE;
+
+ pixman_region_translate(p->clientClip,
+ p->clipOrigin.x + dx,
+ p->clipOrigin.y + dy);
+
+ result = RegionIntersect(region, region, p->clientClip);
+
+ pixman_region_translate(p->clientClip,
+ -(p->clipOrigin.x + dx),
+ -(p->clipOrigin.y + dy));
+
+ return result && pixman_region_not_empty(region);
+}
+
+Bool
+sna_compute_composite_region(RegionPtr region,
+ PicturePtr src, PicturePtr mask, PicturePtr dst,
+ INT16 src_x, INT16 src_y,
+ INT16 mask_x, INT16 mask_y,
+ INT16 dst_x, INT16 dst_y,
+ CARD16 width, CARD16 height)
+{
+ int v;
+
+ DBG(("%s: dst=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ dst_x, dst_y,
+ width, height));
+
+ region->extents.x1 = dst_x < 0 ? 0 : dst_x;
+ v = dst_x + width;
+ if (v > dst->pDrawable->width)
+ v = dst->pDrawable->width;
+ region->extents.x2 = v;
+
+ region->extents.y1 = dst_y < 0 ? 0 : dst_y;
+ v = dst_y + height;
+ if (v > dst->pDrawable->height)
+ v = dst->pDrawable->height;
+ region->extents.y2 = v;
+
+ region->data = 0;
+
+ DBG(("%s: initial clip against dst->pDrawable: (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ region->extents.x1, region->extents.y1,
+ region->extents.x2, region->extents.y2));
+
+ if (region->extents.x1 >= region->extents.x2 ||
+ region->extents.y1 >= region->extents.y2)
+ return FALSE;
+
+ region->extents.x1 += dst->pDrawable->x;
+ region->extents.x2 += dst->pDrawable->x;
+ region->extents.y1 += dst->pDrawable->y;
+ region->extents.y2 += dst->pDrawable->y;
+
+ dst_x += dst->pDrawable->x;
+ dst_y += dst->pDrawable->y;
+
+ /* clip against dst */
+ if (!clip_to_dst(region, dst->pCompositeClip, 0, 0))
+ return FALSE;
+
+ DBG(("%s: clip against dst->pCompositeClip: (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ region->extents.x1, region->extents.y1,
+ region->extents.x2, region->extents.y2));
+
+ if (dst->alphaMap) {
+ if (!clip_to_dst(region, dst->alphaMap->pCompositeClip,
+ -dst->alphaOrigin.x,
+ -dst->alphaOrigin.y)) {
+ pixman_region_fini (region);
+ return FALSE;
+ }
+ }
+
+ /* clip against src */
+ if (src->pDrawable) {
+ src_x += src->pDrawable->x;
+ src_y += src->pDrawable->y;
+ }
+ if (!clip_to_src(region, src, dst_x - src_x, dst_y - src_y)) {
+ pixman_region_fini (region);
+ return FALSE;
+ }
+ DBG(("%s: clip against src: (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ region->extents.x1, region->extents.y1,
+ region->extents.x2, region->extents.y2));
+
+ if (src->alphaMap) {
+ if (!clip_to_src(region, src->alphaMap,
+ dst_x - (src_x - src->alphaOrigin.x),
+ dst_y - (src_y - src->alphaOrigin.y))) {
+ pixman_region_fini(region);
+ return FALSE;
+ }
+ }
+
+ /* clip against mask */
+ if (mask) {
+ if (mask->pDrawable) {
+ mask_x += mask->pDrawable->x;
+ mask_y += mask->pDrawable->y;
+ }
+ if (!clip_to_src(region, mask, dst_x - mask_x, dst_y - mask_y)) {
+ pixman_region_fini(region);
+ return FALSE;
+ }
+ if (mask->alphaMap) {
+ if (!clip_to_src(region, mask->alphaMap,
+ dst_x - (mask_x - mask->alphaOrigin.x),
+ dst_y - (mask_y - mask->alphaOrigin.y))) {
+ pixman_region_fini(region);
+ return FALSE;
+ }
+ }
+
+ DBG(("%s: clip against mask: (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ region->extents.x1, region->extents.y1,
+ region->extents.x2, region->extents.y2));
+ }
+
+ return pixman_region_not_empty(region);
+}
+
+static void
+trim_extents(BoxPtr extents, const PicturePtr p, int dx, int dy)
+{
+ const BoxPtr box = REGION_EXTENTS(NULL, p->pCompositeClip);
+
+ DBG(("%s: trim((%d, %d), (%d, %d)) against ((%d, %d), (%d, %d)) + (%d, %d)\n",
+ __FUNCTION__,
+ extents->x1, extents->y1, extents->x2, extents->y2,
+ box->x1, box->y1, box->x2, box->y2,
+ dx, dy));
+
+ if (extents->x1 < box->x1 + dx)
+ extents->x1 = box->x1 + dx;
+ if (extents->x2 > box->x2 + dx)
+ extents->x2 = box->x2 + dx;
+
+ if (extents->y1 < box->y1 + dy)
+ extents->y1 = box->y1 + dy;
+ if (extents->y2 > box->y2 + dy)
+ extents->y2 = box->y2 + dy;
+}
+
+static void
+_trim_source_extents(BoxPtr extents, const PicturePtr p, int dx, int dy)
+{
+ if (p->clientClipType != CT_NONE)
+ trim_extents(extents, p, dx, dy);
+}
+
+static void
+trim_source_extents(BoxPtr extents, const PicturePtr p, int dx, int dy)
+{
+ if (p->pDrawable) {
+ dx += p->pDrawable->x;
+ dy += p->pDrawable->y;
+ }
+ _trim_source_extents(extents, p, dx, dy);
+ if (p->alphaMap)
+ _trim_source_extents(extents, p->alphaMap,
+ dx - p->alphaOrigin.x,
+ dy - p->alphaOrigin.y);
+
+ DBG(("%s: -> (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ extents->x1, extents->y1,
+ extents->x2, extents->y2));
+}
+
+Bool
+sna_compute_composite_extents(BoxPtr extents,
+ PicturePtr src, PicturePtr mask, PicturePtr dst,
+ INT16 src_x, INT16 src_y,
+ INT16 mask_x, INT16 mask_y,
+ INT16 dst_x, INT16 dst_y,
+ CARD16 width, CARD16 height)
+{
+ int v;
+
+ DBG(("%s: dst=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ dst_x, dst_y,
+ width, height));
+
+ extents->x1 = dst_x < 0 ? 0 : dst_x;
+ v = dst_x + width;
+ if (v > dst->pDrawable->width)
+ v = dst->pDrawable->width;
+ extents->x2 = v;
+
+ extents->y1 = dst_y < 0 ? 0 : dst_y;
+ v = dst_y + height;
+ if (v > dst->pDrawable->height)
+ v = dst->pDrawable->height;
+
+ DBG(("%s: initial clip against dst->pDrawable: (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ extents->x1, extents->y1,
+ extents->x2, extents->y2));
+
+ if (extents->x1 >= extents->x2 ||
+ extents->y1 >= extents->y2)
+ return FALSE;
+
+ extents->x1 += dst->pDrawable->x;
+ extents->x2 += dst->pDrawable->x;
+ extents->y1 += dst->pDrawable->y;
+ extents->y2 += dst->pDrawable->y;
+
+ dst_x += dst->pDrawable->x;
+ dst_y += dst->pDrawable->y;
+
+ /* clip against dst */
+ trim_extents(extents, dst, 0, 0);
+ if (dst->alphaMap)
+ trim_extents(extents, dst->alphaMap,
+ -dst->alphaOrigin.x,
+ -dst->alphaOrigin.y);
+
+ DBG(("%s: clip against dst: (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ extents->x1, extents->y1,
+ extents->x2, extents->y2));
+
+ trim_source_extents(extents, src, dst_x - src_x, dst_y - src_y);
+ if (mask)
+ trim_source_extents(extents, mask,
+ dst_x - mask_x, dst_y - mask_y);
+
+ return extents->x1 < extents->x2 && extents->y1 < extents->y2;
+}
+
+#if DEBUG_COMPOSITE
+static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function)
+{
+ if (box->x1 < 0 || box->y1 < 0 ||
+ box->x2 > pixmap->drawable.width ||
+ box->y2 > pixmap->drawable.height)
+ {
+ ErrorF("%s: damage box is beyond the pixmap: box=(%d, %d), (%d, %d), pixmap=(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1, box->x2, box->y2,
+ pixmap->drawable.width,
+ pixmap->drawable.height);
+ assert(0);
+ }
+}
+#define assert_pixmap_contains_box(p, b) _assert_pixmap_contains_box(p, b, __FUNCTION__)
+#else
+#define assert_pixmap_contains_box(p, b)
+#endif
+
+static void apply_damage(struct sna_composite_op *op, RegionPtr region)
+{
+ DBG(("%s: damage=%p, region=%d\n",
+ __FUNCTION__, op->damage, REGION_NUM_RECTS(region)));
+
+ if (op->damage == NULL)
+ return;
+
+ RegionTranslate(region, op->dst.x, op->dst.y);
+
+ assert_pixmap_contains_box(op->dst.pixmap, RegionExtents(region));
+ sna_damage_add(op->damage, region);
+}
+
+void
+sna_composite(CARD8 op,
+ PicturePtr src,
+ PicturePtr mask,
+ PicturePtr dst,
+ INT16 src_x, INT16 src_y,
+ INT16 mask_x, INT16 mask_y,
+ INT16 dst_x, INT16 dst_y,
+ CARD16 width, CARD16 height)
+{
+ struct sna *sna = to_sna_from_drawable(dst->pDrawable);
+ struct sna_composite_op tmp;
+ RegionRec region;
+
+ DBG(("%s(%d src=(%d, %d), mask=(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
+ __FUNCTION__, op,
+ src_x, src_y,
+ mask_x, mask_y,
+ dst_x, dst_y, dst->pDrawable->x, dst->pDrawable->y,
+ width, height));
+
+ if (sna->kgem.wedged) {
+ DBG(("%s: fallback -- wedged\n", __FUNCTION__));
+ goto fallback;
+ }
+
+ if (dst->alphaMap || src->alphaMap || (mask && mask->alphaMap)) {
+ DBG(("%s: fallback due to unhandled alpha-map\n", __FUNCTION__));
+ goto fallback;
+ }
+
+ if (too_small(sna, dst->pDrawable) &&
+ !picture_is_gpu(src) && !picture_is_gpu(mask)) {
+ DBG(("%s: fallback due to too small\n", __FUNCTION__));
+ goto fallback;
+ }
+
+ if (!sna_compute_composite_region(&region,
+ src, mask, dst,
+ src_x, src_y,
+ mask_x, mask_y,
+ dst_x, dst_y,
+ width, height))
+ return;
+
+ DBG(("%s: composite region extents: (%d, %d), (%d, %d) + (%d, %d)\n",
+ __FUNCTION__,
+ region.extents.x1, region.extents.y1,
+ region.extents.x2, region.extents.y2,
+ get_drawable_dx(dst->pDrawable),
+ get_drawable_dy(dst->pDrawable)));
+
+ memset(&tmp, 0, sizeof(tmp));
+ if (!sna->render.composite(sna,
+ op, src, mask, dst,
+ src_x, src_y,
+ mask_x, mask_y,
+ dst_x + dst->pDrawable->x,
+ dst_y + dst->pDrawable->y,
+ width, height,
+ &tmp)) {
+ DBG(("%s: fallback due unhandled composite op\n", __FUNCTION__));
+ goto fallback;
+ }
+
+ tmp.boxes(sna, &tmp,
+ REGION_RECTS(&region),
+ REGION_NUM_RECTS(&region));
+ apply_damage(&tmp, &region);
+ tmp.done(sna, &tmp);
+
+ REGION_UNINIT(NULL, &region);
+ return;
+
+fallback:
+ DBG(("%s -- fallback dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
+ __FUNCTION__,
+ dst_x, dst_y,
+ dst->pDrawable->x, dst->pDrawable->y,
+ width, height));
+
+ dst_move_area_to_cpu(dst, op,
+ dst_x + dst->pDrawable->x,
+ dst_y + dst->pDrawable->y,
+ width, height);
+ if (src->pDrawable)
+ sna_drawable_move_to_cpu(src->pDrawable, false);
+ if (mask && mask->pDrawable)
+ sna_drawable_move_to_cpu(mask->pDrawable, false);
+
+ DBG(("%s: fallback -- fbCompposite\n", __FUNCTION__));
+ fbComposite(op, src, mask, dst,
+ src_x, src_y,
+ mask_x, mask_y,
+ dst_x, dst_y,
+ width, height);
+}
+
+static Bool
+_pixman_region_init_clipped_rectangles(pixman_region16_t *region,
+ int num_rects, xRectangle *rects,
+ int tx, int ty,
+ int maxx, int maxy)
+{
+ pixman_box16_t stack_boxes[64], *boxes = stack_boxes;
+ pixman_bool_t ret;
+ int i, j;
+
+ if (num_rects > ARRAY_SIZE(stack_boxes)) {
+ boxes = malloc(sizeof(pixman_box16_t) * num_rects);
+ if (boxes == NULL)
+ return FALSE;
+ }
+
+ for (i = j = 0; i < num_rects; i++) {
+ boxes[j].x1 = rects[i].x + tx;
+ if (boxes[j].x1 < 0)
+ boxes[j].x1 = 0;
+
+ boxes[j].y1 = rects[i].y + ty;
+ if (boxes[j].y1 < 0)
+ boxes[j].y1 = 0;
+
+ boxes[j].x2 = rects[i].x + rects[i].width;
+ if (boxes[j].x2 > maxx)
+ boxes[j].x2 = maxx;
+ boxes[j].x2 += tx;
+
+ boxes[j].y2 = rects[i].y + rects[i].height;
+ if (boxes[j].y2 > maxy)
+ boxes[j].y2 = maxy;
+ boxes[j].y2 += ty;
+
+ if (boxes[j].x2 > boxes[j].x1 && boxes[j].y2 > boxes[j].y1)
+ j++;
+ }
+
+ ret = TRUE;
+ if (j)
+ ret = pixman_region_init_rects(region, boxes, j);
+ else
+ pixman_region_init(region);
+
+ if (boxes != stack_boxes)
+ free(boxes);
+
+ DBG(("%s: nrects=%d, region=(%d, %d), (%d, %d) x %d\n",
+ __FUNCTION__, num_rects,
+ region->extents.x1, region->extents.y1,
+ region->extents.x2, region->extents.y2,
+ pixman_region_n_rects(region)));
+ return ret;
+}
+
+void
+sna_composite_rectangles(CARD8 op,
+ PicturePtr dst,
+ xRenderColor *color,
+ int num_rects,
+ xRectangle *rects)
+{
+ struct sna *sna = to_sna_from_drawable(dst->pDrawable);
+ PixmapPtr pixmap;
+ struct sna_pixmap *priv;
+ pixman_region16_t region;
+ pixman_box16_t *boxes;
+ int16_t dst_x, dst_y;
+ int num_boxes;
+ int error;
+
+ DBG(("%s(op=%d, %08x x %d [(%d, %d)x(%d, %d) ...])\n",
+ __FUNCTION__, op,
+ (color->alpha >> 8 << 24) |
+ (color->red >> 8 << 16) |
+ (color->green >> 8 << 8) |
+ (color->blue >> 8 << 0),
+ num_rects,
+ rects[0].x, rects[0].y, rects[0].width, rects[0].height));
+
+ if (!num_rects)
+ return;
+
+ if (!pixman_region_not_empty(dst->pCompositeClip)) {
+ DBG(("%s: empty clip, skipping\n", __FUNCTION__));
+ return;
+ }
+
+ if (!_pixman_region_init_clipped_rectangles(&region,
+ num_rects, rects,
+ dst->pDrawable->x, dst->pDrawable->y,
+ dst->pDrawable->width, dst->pDrawable->height))
+ {
+ DBG(("%s: allocation failed for region\n", __FUNCTION__));
+ return;
+ }
+
+ DBG(("%s: drawable extents (%d, %d),(%d, %d) x %d\n",
+ __FUNCTION__,
+ RegionExtents(&region)->x1, RegionExtents(&region)->y1,
+ RegionExtents(&region)->x2, RegionExtents(&region)->y2,
+ RegionNumRects(&region)));
+
+ if (!pixman_region_intersect(&region, &region, dst->pCompositeClip) ||
+ !pixman_region_not_empty(&region)) {
+ DBG(("%s: zero-intersection between rectangles and clip\n",
+ __FUNCTION__));
+ pixman_region_fini(&region);
+ return;
+ }
+
+ DBG(("%s: clipped extents (%d, %d),(%d, %d) x %d\n",
+ __FUNCTION__,
+ RegionExtents(&region)->x1, RegionExtents(&region)->y1,
+ RegionExtents(&region)->x2, RegionExtents(&region)->y2,
+ RegionNumRects(&region)));
+
+ pixmap = get_drawable_pixmap(dst->pDrawable);
+ get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y);
+ pixman_region_translate(&region, dst_x, dst_y);
+
+ DBG(("%s: pixmap +(%d, %d) extents (%d, %d),(%d, %d)\n",
+ __FUNCTION__, dst_x, dst_y,
+ RegionExtents(&region)->x1, RegionExtents(&region)->y1,
+ RegionExtents(&region)->x2, RegionExtents(&region)->y2));
+
+ if (sna->kgem.wedged)
+ goto fallback;
+
+ if (dst->alphaMap) {
+ DBG(("%s: fallback, dst has an alpha-map\n", __FUNCTION__));
+ goto fallback;
+ }
+
+ boxes = pixman_region_rectangles(&region, &num_boxes);
+
+ if (op == PictOpClear) {
+ color->red = color->green = color->blue = color->alpha = 0;
+ } else if (color->alpha >= 0xff00 && op == PictOpOver) {
+ color->alpha = 0xffff;
+ op = PictOpSrc;
+ }
+
+ if (too_small(sna, dst->pDrawable)) {
+ DBG(("%s: fallback, dst is too small\n", __FUNCTION__));
+ goto fallback;
+ }
+
+ priv = sna_pixmap_move_to_gpu(pixmap);
+ if (priv == NULL) {
+ DBG(("%s: fallback due to no GPU bo\n", __FUNCTION__));
+ goto fallback;
+ }
+
+ if (!sna->render.fill_boxes(sna, op, dst->format, color,
+ pixmap, priv->gpu_bo,
+ boxes, num_boxes)) {
+ DBG(("%s: fallback - acceleration failed\n", __FUNCTION__));
+ goto fallback;
+ }
+
+ if (!priv->gpu_only) {
+ assert_pixmap_contains_box(pixmap, RegionExtents(&region));
+ sna_damage_add(&priv->gpu_damage, &region);
+ }
+
+ goto done;
+
+fallback:
+ DBG(("%s: fallback\n", __FUNCTION__));
+ sna_drawable_move_region_to_cpu(&pixmap->drawable, &region, true);
+
+ if (op == PictOpSrc || op == PictOpClear) {
+ PixmapPtr pixmap = get_drawable_pixmap(dst->pDrawable);
+ int nbox = REGION_NUM_RECTS(&region);
+ BoxPtr box = REGION_RECTS(&region);
+ uint32_t pixel;
+
+ if (sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ dst->format)) {
+ do {
+ DBG(("%s: fallback fill: (%d, %d)x(%d, %d) %08x\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1,
+ pixel));
+
+ pixman_fill(pixmap->devPrivate.ptr,
+ pixmap->devKind/sizeof(uint32_t),
+ pixmap->drawable.bitsPerPixel,
+ box->x1, box->y1,
+ box->x2 - box->x1,
+ box->y2 - box->y1,
+ pixel);
+ box++;
+ } while (--nbox);
+ }
+ } else {
+ PicturePtr src;
+
+ src = CreateSolidPicture(0, color, &error);
+ if (src) {
+ do {
+ fbComposite(op, src, NULL, dst,
+ 0, 0,
+ 0, 0,
+ rects->x, rects->y,
+ rects->width, rects->height);
+ rects++;
+ } while (--num_rects);
+ FreePicture(src, 0);
+ }
+ }
+
+done:
+ /* XXX xserver-1.8: CompositeRects is not tracked by Damage, so we must
+ * manually append the damaged regions ourselves.
+ */
+ DamageRegionAppend(&pixmap->drawable, &region);
+ DamageRegionProcessPending(&pixmap->drawable);
+
+ pixman_region_fini(&region);
+ return;
+}
diff --git a/src/sna/sna_damage.c b/src/sna/sna_damage.c
new file mode 100644
index 00000000..21af2d0a
--- /dev/null
+++ b/src/sna/sna_damage.c
@@ -0,0 +1,944 @@
+/**************************************************************************
+
+Copyright (c) 2011 Intel Corporation
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_damage.h"
+
+#if DEBUG_DAMAGE
+#undef DBG
+#define DBG(x) ErrorF x
+
+static const char *_debug_describe_region(char *buf, int max,
+ RegionPtr region)
+{
+ BoxPtr extents;
+ BoxPtr box;
+ int n;
+ int len;
+
+ if (region == NULL)
+ return "nil";
+
+ n = REGION_NUM_RECTS(region);
+ if (n == 0)
+ return "[0]";
+
+ extents = REGION_EXTENTS(NULL, region);
+ if (n == 1) {
+ sprintf(buf,
+ "[(%d, %d), (%d, %d)]",
+ extents->x1, extents->y1,
+ extents->x2, extents->y2);
+ return buf;
+ }
+
+ len = sprintf(buf,
+ "[(%d, %d), (%d, %d) x %d: ",
+ extents->x1, extents->y1,
+ extents->x2, extents->y2,
+ n) + 3;
+ max -= 2;
+ box = REGION_RECTS(region);
+ while (n--) {
+ char tmp[80];
+ int this;
+
+ this = snprintf(tmp, sizeof(tmp),
+ "((%d, %d), (%d, %d))%s",
+ box->x1, box->y1,
+ box->x2, box->y2,
+ n ? ", ..." : "");
+ box++;
+
+ if (this > max - len)
+ break;
+
+ len -= 3;
+ memcpy(buf + len, tmp, this);
+ len += this;
+ }
+ buf[len++] = ']';
+ buf[len] = '\0';
+ return buf;
+}
+
+static const char *_debug_describe_damage(char *buf, int max,
+ struct sna_damage *damage)
+{
+ char damage_str[500], region_str[500];
+ int str_max;
+
+ if (damage == NULL)
+ return "None";
+
+ str_max = max/2 - 6;
+ if (str_max > sizeof(damage_str))
+ str_max = sizeof(damage_str);
+
+ sprintf(damage_str, "[%d : ...]", damage->n);
+ snprintf(buf, max, "[[(%d, %d), (%d, %d)]: %s + %s]",
+ damage->extents.x1, damage->extents.y1,
+ damage->extents.x2, damage->extents.y2,
+ _debug_describe_region(region_str, str_max,
+ &damage->region),
+ damage_str);
+
+ return buf;
+}
+
+#endif
+
+struct sna_damage_box {
+ struct list list;
+ uint16_t size, remain;
+};
+
+struct sna_damage_elt {
+ enum mode {
+ ADD,
+ SUBTRACT,
+ } mode;
+ BoxPtr box;
+ uint16_t n;
+};
+
+static struct sna_damage *_sna_damage_create(void)
+{
+ struct sna_damage *damage;
+
+ damage = malloc(sizeof(*damage));
+ damage->n = 0;
+ damage->size = 16;
+ damage->elts = malloc(sizeof(*damage->elts) * damage->size);
+ list_init(&damage->boxes);
+ damage->last_box = NULL;
+ damage->mode = ADD;
+ pixman_region_init(&damage->region);
+ damage->extents.x1 = damage->extents.y1 = MAXSHORT;
+ damage->extents.x2 = damage->extents.y2 = MINSHORT;
+
+ return damage;
+}
+
+static BoxPtr _sna_damage_create_boxes(struct sna_damage *damage,
+ int count)
+{
+ struct sna_damage_box *box;
+ int n;
+
+ if (damage->last_box && damage->last_box->remain >= count) {
+ box = damage->last_box;
+ n = box->size - box->remain;
+ DBG((" %s(%d): reuse last box, used=%d, remain=%d\n",
+ __FUNCTION__, count, n, box->remain));
+ box->remain -= count;
+ if (box->remain == 0)
+ damage->last_box = NULL;
+ return (BoxPtr)(box+1) + n;
+ }
+
+ n = ALIGN(count, 64);
+
+ DBG((" %s(%d->%d): new\n", __FUNCTION__, count, n));
+
+ box = malloc(sizeof(*box) + sizeof(BoxRec)*n);
+ box->size = n;
+ box->remain = n - count;
+ list_add(&box->list, &damage->boxes);
+
+ damage->last_box = box;
+ return (BoxPtr)(box+1);
+}
+
+static void
+_sna_damage_create_elt(struct sna_damage *damage,
+ enum mode mode,
+ const BoxRec *boxes, int count)
+{
+ struct sna_damage_elt *elt;
+
+ DBG((" %s(%s): n=%d, prev=(%s, remain %d)\n", __FUNCTION__,
+ mode == ADD ? "add" : "subtract",
+ damage->n,
+ damage->n ? damage->elts[damage->n-1].mode == ADD ? "add" : "subtract" : "none",
+ damage->last_box ? damage->last_box->remain : 0));
+
+ if (damage->last_box && damage->elts[damage->n-1].mode == mode) {
+ int n;
+
+ n = count;
+ if (n > damage->last_box->remain)
+ n = damage->last_box->remain;
+
+ elt = damage->elts + damage->n-1;
+ memcpy(elt->box + elt->n, boxes, n * sizeof(BoxRec));
+ elt->n += n;
+ damage->last_box->remain -= n;
+ if (damage->last_box->remain == 0)
+ damage->last_box = NULL;
+
+ count -=n;
+ boxes += n;
+ if (count == 0)
+ return;
+ }
+
+ if (damage->n == damage->size) {
+ int newsize = damage->size * 2;
+ struct sna_damage_elt *newelts = realloc(damage->elts,
+ newsize*sizeof(*elt));
+ if (newelts == NULL)
+ return;
+
+ damage->elts = newelts;
+ damage->size = newsize;
+ }
+
+ DBG((" %s(): new elt\n", __FUNCTION__));
+
+ elt = damage->elts + damage->n++;
+ elt->mode = mode;
+ elt->n = count;
+ elt->box = memcpy(_sna_damage_create_boxes(damage, count),
+ boxes, count * sizeof(BoxRec));
+}
+
+static void free_list(struct list *head)
+{
+ while (!list_is_empty(head)) {
+ struct list *l = head->next;
+ list_del(l);
+ free(l);
+ }
+}
+
+static void __sna_damage_reduce(struct sna_damage *damage)
+{
+ int n, m, j;
+ int nboxes;
+ BoxPtr boxes;
+ pixman_region16_t tmp, *region = &damage->region;
+
+ DBG((" reduce: before damage.n=%d region.n=%d\n",
+ damage->n, REGION_NUM_RECTS(region)));
+
+ m = 0;
+ nboxes = damage->elts[0].n;
+ boxes = damage->elts[0].box;
+ for (n = 1; n < damage->n; n++) {
+ if (damage->elts[n].mode != damage->elts[m].mode) {
+ if (!boxes) {
+ boxes = malloc(sizeof(BoxRec)*nboxes);
+ nboxes = 0;
+ for (j = m; j < n; j++) {
+ memcpy(boxes+nboxes,
+ damage->elts[j].box,
+ damage->elts[j].n*sizeof(BoxRec));
+ nboxes += damage->elts[j].n;
+ }
+ }
+
+ pixman_region_init_rects(&tmp, boxes, nboxes);
+ if (damage->elts[m].mode == ADD)
+ pixman_region_union(region, region, &tmp);
+ else
+ pixman_region_subtract(region, region, &tmp);
+ pixman_region_fini(&tmp);
+
+ if (boxes != damage->elts[m].box)
+ free(boxes);
+
+ m = n;
+ boxes = damage->elts[n].box;
+ nboxes = damage->elts[n].n;
+ } else {
+ boxes = NULL;
+ nboxes += damage->elts[n].n;
+ }
+ }
+
+ if (!boxes) {
+ boxes = malloc(sizeof(BoxRec)*nboxes);
+ nboxes = 0;
+ for (j = m; j < n; j++) {
+ memcpy(boxes+nboxes,
+ damage->elts[j].box,
+ damage->elts[j].n*sizeof(BoxRec));
+ nboxes += damage->elts[j].n;
+ }
+ }
+
+ pixman_region_init_rects(&tmp, boxes, nboxes);
+ if (damage->elts[m].mode == ADD)
+ pixman_region_union(region, region, &tmp);
+ else
+ pixman_region_subtract(region, region, &tmp);
+ pixman_region_fini(&tmp);
+
+ damage->extents = region->extents;
+
+ if (boxes != damage->elts[m].box)
+ free(boxes);
+
+ damage->n = 0;
+ free_list(&damage->boxes);
+ damage->last_box = NULL;
+ damage->mode = ADD;
+
+ DBG((" reduce: after region.n=%d\n", REGION_NUM_RECTS(region)));
+}
+
+inline static struct sna_damage *__sna_damage_add(struct sna_damage *damage,
+ RegionPtr region)
+{
+ if (!RegionNotEmpty(region))
+ return damage;
+
+ if (!damage)
+ damage = _sna_damage_create();
+
+ if (damage->mode == SUBTRACT)
+ __sna_damage_reduce(damage);
+ damage->mode = ADD;
+
+ if (REGION_NUM_RECTS(&damage->region) <= 1) {
+ pixman_region_union(&damage->region, &damage->region, region);
+ damage->extents = damage->region.extents;
+ return damage;
+ }
+
+ if (pixman_region_contains_rectangle(&damage->region,
+ &region->extents) == PIXMAN_REGION_IN)
+ return damage;
+
+ _sna_damage_create_elt(damage, ADD,
+ REGION_RECTS(region),
+ REGION_NUM_RECTS(region));
+
+ if (damage->extents.x1 > region->extents.x1)
+ damage->extents.x1 = region->extents.x1;
+ if (damage->extents.x2 < region->extents.x2)
+ damage->extents.x2 = region->extents.x2;
+
+ if (damage->extents.y1 > region->extents.y1)
+ damage->extents.y1 = region->extents.y1;
+ if (damage->extents.y2 < region->extents.y2)
+ damage->extents.y2 = region->extents.y2;
+
+ return damage;
+}
+
+#if DEBUG_DAMAGE
+fastcall struct sna_damage *_sna_damage_add(struct sna_damage *damage,
+ RegionPtr region)
+{
+ char region_buf[120];
+ char damage_buf[1000];
+
+ DBG(("%s(%s + %s)\n", __FUNCTION__,
+ _debug_describe_damage(damage_buf, sizeof(damage_buf), damage),
+ _debug_describe_region(region_buf, sizeof(region_buf), region)));
+
+ damage = __sna_damage_add(damage, region);
+
+ ErrorF(" = %s\n",
+ _debug_describe_damage(damage_buf, sizeof(damage_buf), damage));
+
+ return damage;
+}
+#else
+fastcall struct sna_damage *_sna_damage_add(struct sna_damage *damage,
+ RegionPtr region)
+{
+ return __sna_damage_add(damage, region);
+}
+#endif
+
+inline static struct sna_damage *__sna_damage_add_box(struct sna_damage *damage,
+ const BoxRec *box)
+{
+ if (box->y2 <= box->y1 || box->x2 <= box->x1)
+ return damage;
+
+ if (!damage)
+ damage = _sna_damage_create();
+
+ if (damage->mode == SUBTRACT)
+ __sna_damage_reduce(damage);
+ damage->mode = ADD;
+
+ if (REGION_NUM_RECTS(&damage->region) == 0) {
+ pixman_region_init_rects(&damage->region, box, 1);
+ damage->extents = *box;
+ return damage;
+ }
+
+ if (pixman_region_contains_rectangle(&damage->region,
+ (BoxPtr)box) == PIXMAN_REGION_IN)
+ return damage;
+
+ _sna_damage_create_elt(damage, ADD, box, 1);
+
+ if (damage->extents.x1 > box->x1)
+ damage->extents.x1 = box->x1;
+ if (damage->extents.x2 < box->x2)
+ damage->extents.x2 = box->x2;
+
+ if (damage->extents.y1 > box->y1)
+ damage->extents.y1 = box->y1;
+ if (damage->extents.y2 < box->y2)
+ damage->extents.y2 = box->y2;
+
+ return damage;
+}
+
+#if DEBUG_DAMAGE
+fastcall struct sna_damage *_sna_damage_add_box(struct sna_damage *damage,
+ const BoxRec *box)
+{
+ char damage_buf[1000];
+
+ DBG(("%s(%s + [(%d, %d), (%d, %d)])\n", __FUNCTION__,
+ _debug_describe_damage(damage_buf, sizeof(damage_buf), damage),
+ box->x1, box->y1, box->x2, box->y2));
+
+ damage = __sna_damage_add_box(damage, box);
+
+ ErrorF(" = %s\n",
+ _debug_describe_damage(damage_buf, sizeof(damage_buf), damage));
+
+ return damage;
+}
+#else
+fastcall struct sna_damage *_sna_damage_add_box(struct sna_damage *damage,
+ const BoxRec *box)
+{
+ return __sna_damage_add_box(damage, box);
+}
+#endif
+
+struct sna_damage *_sna_damage_all(struct sna_damage *damage,
+ int width, int height)
+{
+ DBG(("%s(%d, %d)\n", __FUNCTION__, width, height));
+
+ if (damage) {
+ free_list(&damage->boxes);
+ pixman_region_fini(&damage->region);
+ damage->n = 0;
+ damage->last_box = NULL;
+ } else
+ damage = _sna_damage_create();
+
+ pixman_region_init_rect(&damage->region, 0, 0, width, height);
+ damage->extents = damage->region.extents;
+ damage->mode = ADD;
+
+ return damage;
+}
+
+static inline Bool sna_damage_maybe_contains_box(struct sna_damage *damage,
+ const BoxRec *box)
+{
+ if (box->x2 <= damage->extents.x1 ||
+ box->x1 >= damage->extents.x2)
+ return FALSE;
+
+ if (box->y2 <= damage->extents.y1 ||
+ box->y1 >= damage->extents.y2)
+ return FALSE;
+
+ return TRUE;
+}
+
+static struct sna_damage *__sna_damage_subtract(struct sna_damage *damage,
+ RegionPtr region)
+{
+ if (damage == NULL)
+ return NULL;
+
+ if (!RegionNotEmpty(&damage->region)) {
+ __sna_damage_destroy(damage);
+ return NULL;
+ }
+
+ if (!RegionNotEmpty(region))
+ return damage;
+
+ if (!sna_damage_maybe_contains_box(damage, &region->extents))
+ return damage;
+
+ if (damage->n == 0) {
+ if (pixman_region_equal(region, &damage->region)) {
+ __sna_damage_destroy(damage);
+ return NULL;
+ }
+
+ if (!pixman_region_not_empty(&damage->region)) {
+ __sna_damage_destroy(damage);
+ return NULL;
+ }
+
+ if (REGION_NUM_RECTS(&damage->region) == 1 &&
+ REGION_NUM_RECTS(region) == 1) {
+ pixman_region_subtract(&damage->region,
+ &damage->region,
+ region);
+ damage->extents = damage->region.extents;
+ return damage;
+ }
+ }
+
+ damage->mode = SUBTRACT;
+ _sna_damage_create_elt(damage, SUBTRACT,
+ REGION_RECTS(region),
+ REGION_NUM_RECTS(region));
+
+ return damage;
+}
+
+#if DEBUG_DAMAGE
+fastcall struct sna_damage *_sna_damage_subtract(struct sna_damage *damage,
+ RegionPtr region)
+{
+ char damage_buf[1000];
+ char region_buf[120];
+
+ ErrorF("%s(%s - %s)...\n", __FUNCTION__,
+ _debug_describe_damage(damage_buf, sizeof(damage_buf), damage),
+ _debug_describe_region(region_buf, sizeof(region_buf), region));
+
+ damage = __sna_damage_subtract(damage, region);
+
+ ErrorF(" = %s\n",
+ _debug_describe_damage(damage_buf, sizeof(damage_buf), damage));
+
+ return damage;
+}
+#else
+fastcall struct sna_damage *_sna_damage_subtract(struct sna_damage *damage,
+ RegionPtr region)
+{
+ return __sna_damage_subtract(damage, region);
+}
+#endif
+
+inline static struct sna_damage *__sna_damage_subtract_box(struct sna_damage *damage,
+ const BoxRec *box)
+{
+ if (damage == NULL)
+ return NULL;
+
+ if (!RegionNotEmpty(&damage->region)) {
+ __sna_damage_destroy(damage);
+ return NULL;
+ }
+
+ if (!sna_damage_maybe_contains_box(damage, box))
+ return damage;
+
+ if (damage->n == 0) {
+ if (!pixman_region_not_empty(&damage->region)) {
+ __sna_damage_destroy(damage);
+ return NULL;
+ }
+
+ if (REGION_NUM_RECTS(&damage->region) == 1) {
+ pixman_region16_t region;
+
+ pixman_region_init_rects(&region, box, 1);
+ pixman_region_subtract(&damage->region,
+ &damage->region,
+ &region);
+ damage->extents = damage->region.extents;
+ return damage;
+ }
+ }
+
+ damage->mode = SUBTRACT;
+ _sna_damage_create_elt(damage, SUBTRACT, box, 1);
+
+ return damage;
+}
+
+#if DEBUG_DAMAGE
+fastcall struct sna_damage *_sna_damage_subtract_box(struct sna_damage *damage,
+ const BoxRec *box)
+{
+ char damage_buf[1000];
+
+ ErrorF("%s(%s - (%d, %d), (%d, %d))...\n", __FUNCTION__,
+ _debug_describe_damage(damage_buf, sizeof(damage_buf), damage),
+ box->x1, box->y1, box->x2, box->y2);
+
+ damage = __sna_damage_subtract_box(damage, box);
+
+ ErrorF(" = %s\n",
+ _debug_describe_damage(damage_buf, sizeof(damage_buf), damage));
+
+ return damage;
+}
+#else
+fastcall struct sna_damage *_sna_damage_subtract_box(struct sna_damage *damage,
+ const BoxRec *box)
+{
+ return __sna_damage_subtract_box(damage, box);
+}
+#endif
+
+static int _sna_damage_contains_box(struct sna_damage *damage,
+ const BoxPtr box)
+{
+ if (!damage)
+ return PIXMAN_REGION_OUT;;
+
+ if (!sna_damage_maybe_contains_box(damage, box))
+ return PIXMAN_REGION_OUT;
+
+ if (damage->n)
+ __sna_damage_reduce(damage);
+
+ return pixman_region_contains_rectangle(&damage->region, box);
+}
+
+#if DEBUG_DAMAGE
+int sna_damage_contains_box(struct sna_damage *damage,
+ const BoxPtr box)
+{
+ char damage_buf[1000];
+ int ret;
+
+ DBG(("%s(%s, [(%d, %d), (%d, %d)])\n", __FUNCTION__,
+ _debug_describe_damage(damage_buf, sizeof(damage_buf), damage),
+ box->x1, box->y1, box->x2, box->y2));
+
+ ret = _sna_damage_contains_box(damage, box);
+ ErrorF(" = %d\n", ret);
+
+ return ret;
+}
+#else
+int sna_damage_contains_box(struct sna_damage *damage,
+ const BoxPtr box)
+{
+ return _sna_damage_contains_box(damage, box);
+}
+#endif
+
+static Bool _sna_damage_intersect(struct sna_damage *damage,
+ RegionPtr region, RegionPtr result)
+{
+ if (!damage)
+ return FALSE;
+
+ if (region->extents.x2 <= damage->extents.x1 ||
+ region->extents.x1 >= damage->extents.x2)
+ return FALSE;
+
+ if (region->extents.y2 <= damage->extents.y1 ||
+ region->extents.y1 >= damage->extents.y2)
+ return FALSE;
+
+ if (damage->n)
+ __sna_damage_reduce(damage);
+
+ if (!pixman_region_not_empty(&damage->region))
+ return FALSE;
+
+ RegionNull(result);
+ RegionIntersect(result, &damage->region, region);
+
+ return RegionNotEmpty(result);
+}
+
+#if DEBUG_DAMAGE
+Bool sna_damage_intersect(struct sna_damage *damage,
+ RegionPtr region, RegionPtr result)
+{
+ char damage_buf[1000];
+ char region_buf[120];
+ Bool ret;
+
+ ErrorF("%s(%s, %s)...\n", __FUNCTION__,
+ _debug_describe_damage(damage_buf, sizeof(damage_buf), damage),
+ _debug_describe_region(region_buf, sizeof(region_buf), region));
+
+ ret = _sna_damage_intersect(damage, region, result);
+ ErrorF(" = %d %s\n",
+ ret,
+ _debug_describe_region(region_buf, sizeof(region_buf), result));
+
+ return ret;
+}
+#else
+Bool sna_damage_intersect(struct sna_damage *damage,
+ RegionPtr region, RegionPtr result)
+{
+ return _sna_damage_intersect(damage, region, result);
+}
+#endif
+
+static int _sna_damage_get_boxes(struct sna_damage *damage, BoxPtr *boxes)
+{
+ if (!damage)
+ return 0;
+
+ if (damage->n)
+ __sna_damage_reduce(damage);
+
+ *boxes = REGION_RECTS(&damage->region);
+ return REGION_NUM_RECTS(&damage->region);
+}
+
+struct sna_damage *_sna_damage_reduce(struct sna_damage *damage)
+{
+ DBG(("%s()\n", __FUNCTION__));
+
+ if (damage->n)
+ __sna_damage_reduce(damage);
+
+ if (!pixman_region_not_empty(&damage->region)) {
+ __sna_damage_destroy(damage);
+ damage = NULL;
+ }
+
+ return damage;
+}
+
+#if DEBUG_DAMAGE
+int sna_damage_get_boxes(struct sna_damage *damage, BoxPtr *boxes)
+{
+ char damage_buf[1000];
+ int count;
+
+ ErrorF("%s(%s)...\n", __FUNCTION__,
+ _debug_describe_damage(damage_buf, sizeof(damage_buf), damage));
+
+ count = _sna_damage_get_boxes(damage, boxes);
+ ErrorF(" = %d\n", count);
+
+ return count;
+}
+#else
+int sna_damage_get_boxes(struct sna_damage *damage, BoxPtr *boxes)
+{
+ return _sna_damage_get_boxes(damage, boxes);
+}
+#endif
+
+void __sna_damage_destroy(struct sna_damage *damage)
+{
+ free(damage->elts);
+
+ free_list(&damage->boxes);
+
+ pixman_region_fini(&damage->region);
+ free(damage);
+}
+
+#if DEBUG_DAMAGE && TEST_DAMAGE
+struct sna_damage_selftest{
+ int width, height;
+};
+
+static void st_damage_init_random_box(struct sna_damage_selftest *test,
+ BoxPtr box)
+{
+ int x, y, w, h;
+
+ if (test->width == 1) {
+ x = 0, w = 1;
+ } else {
+ x = rand() % (test->width - 1);
+ w = 1 + rand() % (test->width - x - 1);
+ }
+
+ if (test->height == 1) {
+ y = 0, h = 1;
+ } else {
+ y = rand() % (test->height - 1);
+ h = 1 + rand() % (test->height - y - 1);
+ }
+
+ box->x1 = x;
+ box->x2 = x+w;
+
+ box->y1 = y;
+ box->y2 = y+h;
+}
+
+static void st_damage_init_random_region1(struct sna_damage_selftest *test,
+ pixman_region16_t *region)
+{
+ int x, y, w, h;
+
+ if (test->width == 1) {
+ x = 0, w = 1;
+ } else {
+ x = rand() % (test->width - 1);
+ w = 1 + rand() % (test->width - x - 1);
+ }
+
+ if (test->height == 1) {
+ y = 0, h = 1;
+ } else {
+ y = rand() % (test->height - 1);
+ h = 1 + rand() % (test->height - y - 1);
+ }
+
+ pixman_region_init_rect(region, x, y, w, h);
+}
+
+static void st_damage_add(struct sna_damage_selftest *test,
+ struct sna_damage **damage,
+ pixman_region16_t *region)
+{
+ pixman_region16_t tmp;
+
+ st_damage_init_random_region1(test, &tmp);
+
+ sna_damage_add(damage, &tmp);
+ pixman_region_union(region, region, &tmp);
+}
+
+static void st_damage_add_box(struct sna_damage_selftest *test,
+ struct sna_damage **damage,
+ pixman_region16_t *region)
+{
+ BoxRec box;
+
+ st_damage_init_random_box(test, &box);
+
+ sna_damage_add_box(damage, &box);
+ pixman_region_union_rectangle(region, region,
+ box.x1, box.y2,
+ box.x2 - box.x1,
+ box.y2 - box.y1);
+}
+
+static void st_damage_subtract(struct sna_damage_selftest *test,
+ struct sna_damage **damage,
+ pixman_region16_t *region)
+{
+ pixman_region16_t tmp;
+
+ st_damage_init_random_region1(test, &tmp);
+
+ sna_damage_subtract(damage, &tmp);
+ pixman_region_subtract(region, region, &tmp);
+}
+
+static void st_damage_all(struct sna_damage_selftest *test,
+ struct sna_damage **damage,
+ pixman_region16_t *region)
+{
+ pixman_region16_t tmp;
+
+ pixman_region_init_rect(&tmp, 0, 0, test->width, test->height);
+
+ sna_damage_all(damage, test->width, test->height);
+ pixman_region_union(region, region, &tmp);
+}
+
+static bool st_check_equal(struct sna_damage_selftest *test,
+ struct sna_damage **damage,
+ pixman_region16_t *region)
+{
+ int d_num, r_num;
+ BoxPtr d_boxes, r_boxes;
+
+ d_num = sna_damage_get_boxes(*damage, &d_boxes);
+ r_boxes = pixman_region_rectangles(region, &r_num);
+
+ if (d_num != r_num) {
+ ErrorF("%s: damage and ref contain different number of rectangles\n",
+ __FUNCTION__);
+ return FALSE;
+ }
+
+ if (memcmp(d_boxes, r_boxes, d_num*sizeof(BoxRec))) {
+ ErrorF("%s: damage and ref contain different rectangles\n",
+ __FUNCTION__);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+void sna_damage_selftest(void)
+{
+ void (*const op[])(struct sna_damage_selftest *test,
+ struct sna_damage **damage,
+ pixman_region16_t *region) = {
+ st_damage_add,
+ st_damage_add_box,
+ st_damage_subtract,
+ st_damage_all
+ };
+ bool (*const check[])(struct sna_damage_selftest *test,
+ struct sna_damage **damage,
+ pixman_region16_t *region) = {
+ st_check_equal,
+ //st_check_contains,
+ };
+ char region_buf[120];
+ char damage_buf[1000];
+ int pass;
+
+ for (pass = 0; pass < 1024; pass++) {
+ struct sna_damage_selftest test;
+ struct sna_damage *damage;
+ pixman_region16_t ref;
+ int iter, i;
+
+ iter = rand() % 1024;
+
+ test.width = 1 + rand() % 2048;
+ test.height = 1 + rand() % 2048;
+
+ damage = _sna_damage_create();
+ pixman_region_init(&ref);
+
+ for (i = 0; i < iter; i++) {
+ op[rand() % ARRAY_SIZE(op)](&test, &damage, &ref);
+ }
+
+ if (!check[rand() % ARRAY_SIZE(check)](&test, &damage, &ref)) {
+ ErrorF("%s: failed - region = %s, damage = %s\n", __FUNCTION__,
+ _debug_describe_region(region_buf, sizeof(region_buf), &ref),
+ _debug_describe_damage(damage_buf, sizeof(damage_buf), damage));
+ assert(0);
+ }
+
+ pixman_region_fini(&ref);
+ sna_damage_destroy(&damage);
+ }
+}
+#endif
diff --git a/src/sna/sna_damage.h b/src/sna/sna_damage.h
new file mode 100644
index 00000000..0b335711
--- /dev/null
+++ b/src/sna/sna_damage.h
@@ -0,0 +1,94 @@
+#ifndef SNA_DAMAGE_H
+#define SNA_DAMAGE_H
+
+#include <regionstr.h>
+#include <list.h>
+
+#define fastcall __attribute__((regparm(3)))
+
+struct sna_damage_elt;
+struct sna_damage_box;
+
+struct sna_damage {
+ BoxRec extents;
+ int n, size, mode;
+ pixman_region16_t region;
+ struct sna_damage_elt *elts;
+ struct sna_damage_box *last_box;
+ struct list boxes;
+};
+
+fastcall struct sna_damage *_sna_damage_add(struct sna_damage *damage,
+ RegionPtr region);
+static inline void sna_damage_add(struct sna_damage **damage,
+ RegionPtr region)
+{
+ *damage = _sna_damage_add(*damage, region);
+}
+
+fastcall struct sna_damage *_sna_damage_add_box(struct sna_damage *damage,
+ const BoxRec *box);
+static inline void sna_damage_add_box(struct sna_damage **damage,
+ const BoxRec *box)
+{
+ *damage = _sna_damage_add_box(*damage, box);
+}
+
+struct sna_damage *_sna_damage_all(struct sna_damage *damage,
+ int width, int height);
+static inline void sna_damage_all(struct sna_damage **damage,
+ int width, int height)
+{
+ *damage = _sna_damage_all(*damage, width, height);
+}
+
+fastcall struct sna_damage *_sna_damage_subtract(struct sna_damage *damage,
+ RegionPtr region);
+static inline void sna_damage_subtract(struct sna_damage **damage,
+ RegionPtr region)
+{
+ *damage = _sna_damage_subtract(*damage, region);
+}
+
+fastcall struct sna_damage *_sna_damage_subtract_box(struct sna_damage *damage,
+ const BoxRec *box);
+static inline void sna_damage_subtract_box(struct sna_damage **damage,
+ BoxPtr box)
+{
+ *damage = _sna_damage_subtract_box(*damage, box);
+}
+
+Bool sna_damage_intersect(struct sna_damage *damage,
+ RegionPtr region, RegionPtr result);
+
+int sna_damage_contains_box(struct sna_damage *damage,
+ const BoxPtr box);
+
+int sna_damage_get_boxes(struct sna_damage *damage, BoxPtr *boxes);
+
+struct sna_damage *_sna_damage_reduce(struct sna_damage *damage);
+static inline void sna_damage_reduce(struct sna_damage **damage)
+{
+ if (*damage == NULL)
+ return;
+
+ *damage = _sna_damage_reduce(*damage);
+}
+
+void __sna_damage_destroy(struct sna_damage *damage);
+static inline void sna_damage_destroy(struct sna_damage **damage)
+{
+ if (*damage == NULL)
+ return;
+
+ __sna_damage_destroy(*damage);
+ *damage = NULL;
+}
+
+#if DEBUG_DAMAGE && TEST_DAMAGE
+void sna_damage_selftest(void);
+#else
+static inline void sna_damage_selftest(void) {}
+#endif
+
+#endif /* SNA_DAMAGE_H */
diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
new file mode 100644
index 00000000..d27eafde
--- /dev/null
+++ b/src/sna/sna_display.c
@@ -0,0 +1,1656 @@
+/*
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Dave Airlie <airlied@redhat.com>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <poll.h>
+
+#include <xorgVersion.h>
+#include <X11/Xatom.h>
+
+#include "sna.h"
+
+#if DEBUG_DISPLAY
+#undef DBG
+#define DBG(x) ErrorF x
+#endif
+
+struct sna_crtc {
+ drmModeModeInfo kmode;
+ drmModeCrtcPtr mode_crtc;
+ PixmapPtr shadow;
+ uint32_t shadow_fb_id;
+ uint32_t cursor;
+ xf86CrtcPtr crtc;
+ int pipe;
+ int active;
+ struct list link;
+};
+
+struct sna_property {
+ drmModePropertyPtr mode_prop;
+ uint64_t value;
+ int num_atoms; /* if range prop, num_atoms == 1; if enum prop, num_atoms == num_enums + 1 */
+ Atom *atoms;
+};
+
+struct sna_output {
+ struct sna_mode *mode;
+ int output_id;
+ drmModeConnectorPtr mode_output;
+ drmModeEncoderPtr mode_encoder;
+ int num_props;
+ struct sna_property *props;
+ void *private_data;
+
+ Bool has_panel_limits;
+ int panel_hdisplay;
+ int panel_vdisplay;
+
+ int dpms_mode;
+ const char *backlight_iface;
+ int backlight_active_level;
+ int backlight_max;
+ xf86OutputPtr output;
+ struct list link;
+};
+
+static void
+sna_output_dpms(xf86OutputPtr output, int mode);
+
+#define BACKLIGHT_CLASS "/sys/class/backlight"
+
+/*
+ * List of available kernel interfaces in priority order
+ */
+static const char *backlight_interfaces[] = {
+ "sna", /* prefer our own native backlight driver */
+ "asus-laptop",
+ "eeepc",
+ "thinkpad_screen",
+ "mbp_backlight",
+ "fujitsu-laptop",
+ "sony",
+ "samsung",
+ "acpi_video1", /* finally fallback to the generic acpi drivers */
+ "acpi_video0",
+ NULL,
+};
+/*
+ * Must be long enough for BACKLIGHT_CLASS + '/' + longest in above table +
+ * '/' + "max_backlight"
+ */
+#define BACKLIGHT_PATH_LEN 80
+/* Enough for 10 digits of backlight + '\n' + '\0' */
+#define BACKLIGHT_VALUE_LEN 12
+
+static inline int
+crtc_id(struct sna_crtc *crtc)
+{
+ return crtc->mode_crtc->crtc_id;
+}
+
+int sna_crtc_id(xf86CrtcPtr crtc)
+{
+ return crtc_id(crtc->driver_private);
+}
+
+int sna_crtc_on(xf86CrtcPtr crtc)
+{
+ struct sna_crtc *sna_crtc = crtc->driver_private;
+ return sna_crtc->active;
+}
+
+int sna_crtc_to_pipe(xf86CrtcPtr crtc)
+{
+ struct sna_crtc *sna_crtc = crtc->driver_private;
+ return sna_crtc->pipe;
+}
+
+static uint32_t gem_create(int fd, int size)
+{
+ struct drm_i915_gem_create create;
+
+ create.handle = 0;
+ create.size = ALIGN(size, 4096);
+ (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
+
+ return create.handle;
+}
+
+static void gem_close(int fd, uint32_t handle)
+{
+ struct drm_gem_close close;
+
+ close.handle = handle;
+ (void)drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
+}
+
+static void
+sna_output_backlight_set(xf86OutputPtr output, int level)
+{
+ struct sna_output *sna_output = output->driver_private;
+ char path[BACKLIGHT_PATH_LEN], val[BACKLIGHT_VALUE_LEN];
+ int fd, len, ret;
+
+ if (level > sna_output->backlight_max)
+ level = sna_output->backlight_max;
+ if (! sna_output->backlight_iface || level < 0)
+ return;
+
+ len = snprintf(val, BACKLIGHT_VALUE_LEN, "%d\n", level);
+ sprintf(path, "%s/%s/brightness",
+ BACKLIGHT_CLASS, sna_output->backlight_iface);
+ fd = open(path, O_RDWR);
+ if (fd == -1) {
+ xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, "failed to open %s for backlight "
+ "control: %s\n", path, strerror(errno));
+ return;
+ }
+
+ ret = write(fd, val, len);
+ if (ret == -1) {
+ xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, "write to %s for backlight "
+ "control failed: %s\n", path, strerror(errno));
+ }
+
+ close(fd);
+}
+
+static int
+sna_output_backlight_get(xf86OutputPtr output)
+{
+ struct sna_output *sna_output = output->driver_private;
+ char path[BACKLIGHT_PATH_LEN], val[BACKLIGHT_VALUE_LEN];
+ int fd, level;
+
+ sprintf(path, "%s/%s/actual_brightness",
+ BACKLIGHT_CLASS, sna_output->backlight_iface);
+ fd = open(path, O_RDONLY);
+ if (fd == -1) {
+ xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, "failed to open %s "
+ "for backlight control: %s\n", path, strerror(errno));
+ return -1;
+ }
+
+ memset(val, 0, sizeof(val));
+ if (read(fd, val, BACKLIGHT_VALUE_LEN) == -1) {
+ close(fd);
+ return -1;
+ }
+
+ close(fd);
+
+ level = atoi(val);
+ if (level > sna_output->backlight_max)
+ level = sna_output->backlight_max;
+ if (level < 0)
+ level = -1;
+ return level;
+}
+
+static int
+sna_output_backlight_get_max(xf86OutputPtr output)
+{
+ struct sna_output *sna_output = output->driver_private;
+ char path[BACKLIGHT_PATH_LEN], val[BACKLIGHT_VALUE_LEN];
+ int fd, max = 0;
+
+ sprintf(path, "%s/%s/max_brightness",
+ BACKLIGHT_CLASS, sna_output->backlight_iface);
+ fd = open(path, O_RDONLY);
+ if (fd == -1) {
+ xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, "failed to open %s "
+ "for backlight control: %s\n", path, strerror(errno));
+ return -1;
+ }
+
+ memset(val, 0, sizeof(val));
+ if (read(fd, val, BACKLIGHT_VALUE_LEN) == -1) {
+ close(fd);
+ return -1;
+ }
+
+ close(fd);
+
+ max = atoi(val);
+ if (max <= 0)
+ max = -1;
+ return max;
+}
+
+static void
+sna_output_backlight_init(xf86OutputPtr output)
+{
+ struct sna_output *sna_output = output->driver_private;
+ int i;
+
+ for (i = 0; backlight_interfaces[i] != NULL; i++) {
+ char path[BACKLIGHT_PATH_LEN];
+ struct stat buf;
+
+ sprintf(path, "%s/%s", BACKLIGHT_CLASS, backlight_interfaces[i]);
+ if (!stat(path, &buf)) {
+ sna_output->backlight_iface = backlight_interfaces[i];
+ sna_output->backlight_max = sna_output_backlight_get_max(output);
+ if (sna_output->backlight_max > 0) {
+ sna_output->backlight_active_level = sna_output_backlight_get(output);
+ xf86DrvMsg(output->scrn->scrnIndex, X_INFO,
+ "found backlight control interface %s\n", path);
+ return;
+ }
+ }
+ }
+ sna_output->backlight_iface = NULL;
+}
+
+
+static void
+mode_from_kmode(ScrnInfoPtr scrn,
+ drmModeModeInfoPtr kmode,
+ DisplayModePtr mode)
+{
+ memset(mode, 0, sizeof(DisplayModeRec));
+ mode->status = MODE_OK;
+
+ mode->Clock = kmode->clock;
+
+ mode->HDisplay = kmode->hdisplay;
+ mode->HSyncStart = kmode->hsync_start;
+ mode->HSyncEnd = kmode->hsync_end;
+ mode->HTotal = kmode->htotal;
+ mode->HSkew = kmode->hskew;
+
+ mode->VDisplay = kmode->vdisplay;
+ mode->VSyncStart = kmode->vsync_start;
+ mode->VSyncEnd = kmode->vsync_end;
+ mode->VTotal = kmode->vtotal;
+ mode->VScan = kmode->vscan;
+
+ mode->Flags = kmode->flags; //& FLAG_BITS;
+ mode->name = strdup(kmode->name);
+
+ if (kmode->type & DRM_MODE_TYPE_DRIVER)
+ mode->type = M_T_DRIVER;
+ if (kmode->type & DRM_MODE_TYPE_PREFERRED)
+ mode->type |= M_T_PREFERRED;
+
+ xf86SetModeCrtc (mode, scrn->adjustFlags);
+}
+
+static void
+mode_to_kmode(ScrnInfoPtr scrn,
+ drmModeModeInfoPtr kmode,
+ DisplayModePtr mode)
+{
+ memset(kmode, 0, sizeof(*kmode));
+
+ kmode->clock = mode->Clock;
+ kmode->hdisplay = mode->HDisplay;
+ kmode->hsync_start = mode->HSyncStart;
+ kmode->hsync_end = mode->HSyncEnd;
+ kmode->htotal = mode->HTotal;
+ kmode->hskew = mode->HSkew;
+
+ kmode->vdisplay = mode->VDisplay;
+ kmode->vsync_start = mode->VSyncStart;
+ kmode->vsync_end = mode->VSyncEnd;
+ kmode->vtotal = mode->VTotal;
+ kmode->vscan = mode->VScan;
+
+ kmode->flags = mode->Flags; //& FLAG_BITS;
+ if (mode->name)
+ strncpy(kmode->name, mode->name, DRM_DISPLAY_MODE_LEN);
+ kmode->name[DRM_DISPLAY_MODE_LEN-1] = 0;
+}
+
+static void
+sna_crtc_dpms(xf86CrtcPtr crtc, int mode)
+{
+ struct sna_crtc *sna_crtc = crtc->driver_private;
+ DBG(("%s(pipe %d, dpms mode -> %d):= active=%d\n",
+ __FUNCTION__, sna_crtc->pipe, mode, mode == DPMSModeOn));
+ sna_crtc->active = mode == DPMSModeOn;
+}
+
+static Bool
+sna_crtc_apply(xf86CrtcPtr crtc)
+{
+ ScrnInfoPtr scrn = crtc->scrn;
+ struct sna *sna = to_sna(scrn);
+ struct sna_crtc *sna_crtc = crtc->driver_private;
+ struct sna_mode *mode = &sna->mode;
+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(crtc->scrn);
+ uint32_t *output_ids;
+ int output_count = 0;
+ int fb_id, x, y;
+ int i, ret = FALSE;
+
+ output_ids = calloc(sizeof(uint32_t), xf86_config->num_output);
+ if (!output_ids)
+ return FALSE;
+
+ for (i = 0; i < xf86_config->num_output; i++) {
+ xf86OutputPtr output = xf86_config->output[i];
+ struct sna_output *sna_output;
+
+ if (output->crtc != crtc)
+ continue;
+
+ sna_output = output->driver_private;
+ output_ids[output_count] =
+ sna_output->mode_output->connector_id;
+ output_count++;
+ }
+
+ if (!xf86CrtcRotate(crtc))
+ goto done;
+
+ crtc->funcs->gamma_set(crtc, crtc->gamma_red, crtc->gamma_green,
+ crtc->gamma_blue, crtc->gamma_size);
+
+ x = crtc->x;
+ y = crtc->y;
+ fb_id = mode->fb_id;
+ if (sna_crtc->shadow_fb_id) {
+ fb_id = sna_crtc->shadow_fb_id;
+ x = 0;
+ y = 0;
+ }
+ ret = drmModeSetCrtc(sna->kgem.fd, crtc_id(sna_crtc),
+ fb_id, x, y, output_ids, output_count,
+ &sna_crtc->kmode);
+ if (ret) {
+ xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR,
+ "failed to set mode: %s\n", strerror(-ret));
+ ret = FALSE;
+ } else
+ ret = TRUE;
+
+ if (scrn->pScreen)
+ xf86_reload_cursors(scrn->pScreen);
+
+done:
+ free(output_ids);
+ return ret;
+}
+
+static Bool
+sna_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode,
+ Rotation rotation, int x, int y)
+{
+ ScrnInfoPtr scrn = crtc->scrn;
+ struct sna *sna = to_sna(scrn);
+ struct sna_crtc *sna_crtc = crtc->driver_private;
+ struct sna_mode *sna_mode = &sna->mode;
+ int saved_x, saved_y;
+ Rotation saved_rotation;
+ DisplayModeRec saved_mode;
+ int ret = TRUE;
+
+ DBG(("%s(rotation=%d, x=%d, y=%d)\n",
+ __FUNCTION__, rotation, x, y));
+
+ if (sna_mode->fb_id == 0) {
+ struct kgem_bo *bo = sna_pixmap_pin(sna->front);
+ if (!bo)
+ return FALSE;
+
+ ret = drmModeAddFB(sna->kgem.fd,
+ scrn->virtualX, scrn->virtualY,
+ scrn->depth, scrn->bitsPerPixel,
+ bo->pitch, bo->handle,
+ &sna_mode->fb_id);
+ if (ret < 0) {
+ ErrorF("%s: failed to add fb: %dx%d depth=%d, bpp=%d, pitch=%d\n",
+ __FUNCTION__,
+ scrn->virtualX, scrn->virtualY,
+ scrn->depth, scrn->bitsPerPixel, bo->pitch);
+ return FALSE;
+ }
+
+ DBG(("%s: handle %d attached to fb %d\n",
+ __FUNCTION__, bo->handle, sna_mode->fb_id));
+ }
+
+ saved_mode = crtc->mode;
+ saved_x = crtc->x;
+ saved_y = crtc->y;
+ saved_rotation = crtc->rotation;
+
+ crtc->mode = *mode;
+ crtc->x = x;
+ crtc->y = y;
+ crtc->rotation = rotation;
+
+ kgem_submit(&sna->kgem);
+
+ mode_to_kmode(scrn, &sna_crtc->kmode, mode);
+ ret = sna_crtc_apply(crtc);
+ if (!ret) {
+ crtc->x = saved_x;
+ crtc->y = saved_y;
+ crtc->rotation = saved_rotation;
+ crtc->mode = saved_mode;
+ }
+
+ return ret;
+}
+
+static void
+sna_crtc_set_cursor_colors(xf86CrtcPtr crtc, int bg, int fg)
+{
+
+}
+
+static void
+sna_crtc_set_cursor_position (xf86CrtcPtr crtc, int x, int y)
+{
+ struct sna *sna = to_sna(crtc->scrn);
+ struct sna_crtc *sna_crtc = crtc->driver_private;
+
+ drmModeMoveCursor(sna->kgem.fd, crtc_id(sna_crtc), x, y);
+}
+
+static void
+sna_crtc_load_cursor_argb(xf86CrtcPtr crtc, CARD32 *image)
+{
+ struct sna *sna = to_sna(crtc->scrn);
+ struct sna_crtc *sna_crtc = crtc->driver_private;
+ struct drm_i915_gem_pwrite pwrite;
+
+ pwrite.handle = sna_crtc->cursor;
+ pwrite.offset = 0;
+ pwrite.size = 64*64*4;
+ pwrite.data_ptr = (uintptr_t)image;
+ (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
+}
+
+static void
+sna_crtc_hide_cursor(xf86CrtcPtr crtc)
+{
+ struct sna *sna = to_sna(crtc->scrn);
+ struct sna_crtc *sna_crtc = crtc->driver_private;
+
+ drmModeSetCursor(sna->kgem.fd, crtc_id(sna_crtc), 0, 64, 64);
+}
+
+static void
+sna_crtc_show_cursor(xf86CrtcPtr crtc)
+{
+ struct sna *sna = to_sna(crtc->scrn);
+ struct sna_crtc *sna_crtc = crtc->driver_private;
+
+ drmModeSetCursor(sna->kgem.fd, crtc_id(sna_crtc),
+ sna_crtc->cursor, 64, 64);
+}
+
+static void *
+sna_crtc_shadow_allocate(xf86CrtcPtr crtc, int width, int height)
+{
+ ScrnInfoPtr scrn = crtc->scrn;
+ struct sna *sna = to_sna(scrn);
+ struct sna_crtc *sna_crtc = crtc->driver_private;
+ PixmapPtr shadow;
+ struct kgem_bo *bo;
+
+ DBG(("%s(%d, %d)\n", __FUNCTION__, width, height));
+
+ shadow = scrn->pScreen->CreatePixmap(scrn->pScreen, width, height, scrn->depth, 0);
+ if (!shadow)
+ return NULL;
+
+ bo = sna_pixmap_pin(shadow);
+ if (!bo) {
+ scrn->pScreen->DestroyPixmap(shadow);
+ return NULL;
+ }
+
+ if (drmModeAddFB(sna->kgem.fd,
+ width, height, scrn->depth, scrn->bitsPerPixel,
+ bo->pitch, bo->handle,
+ &sna_crtc->shadow_fb_id)) {
+ ErrorF("%s: failed to add rotate fb: %dx%d depth=%d, bpp=%d, pitch=%d\n",
+ __FUNCTION__,
+ width, height,
+ scrn->depth, scrn->bitsPerPixel, bo->pitch);
+ scrn->pScreen->DestroyPixmap(shadow);
+ return NULL;
+ }
+
+ DBG(("%s: attached handle %d to fb %d\n",
+ __FUNCTION__, bo->handle, sna_crtc->shadow_fb_id));
+ return sna_crtc->shadow = shadow;
+}
+
+static PixmapPtr
+sna_crtc_shadow_create(xf86CrtcPtr crtc, void *data, int width, int height)
+{
+ return data;
+}
+
+static void
+sna_crtc_shadow_destroy(xf86CrtcPtr crtc, PixmapPtr pixmap, void *data)
+{
+ struct sna *sna = to_sna(crtc->scrn);
+ struct sna_crtc *sna_crtc = crtc->driver_private;
+
+ DBG(("%s(fb=%d, handle=%d)\n", __FUNCTION__,
+ sna_crtc->shadow_fb_id, sna_pixmap_get_bo(pixmap)->handle));
+
+ drmModeRmFB(sna->kgem.fd, sna_crtc->shadow_fb_id);
+ sna_crtc->shadow_fb_id = 0;
+
+ pixmap->drawable.pScreen->DestroyPixmap(pixmap);
+ sna_crtc->shadow = NULL;
+}
+
+static void
+sna_crtc_gamma_set(xf86CrtcPtr crtc,
+ CARD16 *red, CARD16 *green, CARD16 *blue, int size)
+{
+ struct sna *sna = to_sna(crtc->scrn);
+ struct sna_crtc *sna_crtc = crtc->driver_private;
+
+ drmModeCrtcSetGamma(sna->kgem.fd, crtc_id(sna_crtc),
+ size, red, green, blue);
+}
+
+static void
+sna_crtc_destroy(xf86CrtcPtr crtc)
+{
+ struct sna *sna = to_sna(crtc->scrn);
+ struct sna_crtc *sna_crtc = crtc->driver_private;
+
+ drmModeSetCursor(sna->kgem.fd, crtc_id(sna_crtc), 0, 64, 64);
+ gem_close(sna->kgem.fd, sna_crtc->cursor);
+
+ list_del(&sna_crtc->link);
+ free(sna_crtc);
+
+ crtc->driver_private = NULL;
+}
+
+static const xf86CrtcFuncsRec sna_crtc_funcs = {
+ .dpms = sna_crtc_dpms,
+ .set_mode_major = sna_crtc_set_mode_major,
+ .set_cursor_colors = sna_crtc_set_cursor_colors,
+ .set_cursor_position = sna_crtc_set_cursor_position,
+ .show_cursor = sna_crtc_show_cursor,
+ .hide_cursor = sna_crtc_hide_cursor,
+ .load_cursor_argb = sna_crtc_load_cursor_argb,
+ .shadow_create = sna_crtc_shadow_create,
+ .shadow_allocate = sna_crtc_shadow_allocate,
+ .shadow_destroy = sna_crtc_shadow_destroy,
+ .gamma_set = sna_crtc_gamma_set,
+ .destroy = sna_crtc_destroy,
+};
+
+static void
+sna_crtc_init(ScrnInfoPtr scrn, struct sna_mode *mode, int num)
+{
+ struct sna *sna = to_sna(scrn);
+ xf86CrtcPtr crtc;
+ struct sna_crtc *sna_crtc;
+ struct drm_i915_get_pipe_from_crtc_id get_pipe;
+
+ sna_crtc = calloc(sizeof(struct sna_crtc), 1);
+ if (sna_crtc == NULL)
+ return;
+
+ crtc = xf86CrtcCreate(scrn, &sna_crtc_funcs);
+ if (crtc == NULL) {
+ free(sna_crtc);
+ return;
+ }
+
+ sna_crtc->mode_crtc = drmModeGetCrtc(sna->kgem.fd,
+ mode->mode_res->crtcs[num]);
+ get_pipe.pipe = 0;
+ get_pipe.crtc_id = sna_crtc->mode_crtc->crtc_id;
+ drmIoctl(sna->kgem.fd,
+ DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
+ &get_pipe);
+ sna_crtc->pipe = get_pipe.pipe;
+
+ crtc->driver_private = sna_crtc;
+
+ sna_crtc->cursor = gem_create(sna->kgem.fd, 64*64*4);
+
+ sna_crtc->crtc = crtc;
+ list_add(&sna_crtc->link, &mode->crtcs);
+}
+
+static Bool
+is_panel(int type)
+{
+ return (type == DRM_MODE_CONNECTOR_LVDS ||
+ type == DRM_MODE_CONNECTOR_eDP);
+}
+
+static xf86OutputStatus
+sna_output_detect(xf86OutputPtr output)
+{
+ /* go to the hw and retrieve a new output struct */
+ struct sna *sna = to_sna(output->scrn);
+ struct sna_output *sna_output = output->driver_private;
+ xf86OutputStatus status;
+
+ drmModeFreeConnector(sna_output->mode_output);
+ sna_output->mode_output =
+ drmModeGetConnector(sna->kgem.fd, sna_output->output_id);
+
+ switch (sna_output->mode_output->connection) {
+ case DRM_MODE_CONNECTED:
+ status = XF86OutputStatusConnected;
+ break;
+ case DRM_MODE_DISCONNECTED:
+ status = XF86OutputStatusDisconnected;
+ break;
+ default:
+ case DRM_MODE_UNKNOWNCONNECTION:
+ status = XF86OutputStatusUnknown;
+ break;
+ }
+ return status;
+}
+
+static Bool
+sna_output_mode_valid(xf86OutputPtr output, DisplayModePtr pModes)
+{
+ struct sna_output *sna_output = output->driver_private;
+
+ /*
+ * If the connector type is a panel, we will use the panel limit to
+ * verfiy whether the mode is valid.
+ */
+ if (sna_output->has_panel_limits) {
+ if (pModes->HDisplay > sna_output->panel_hdisplay ||
+ pModes->VDisplay > sna_output->panel_vdisplay)
+ return MODE_PANEL;
+ }
+
+ return MODE_OK;
+}
+
+static void
+sna_output_attach_edid(xf86OutputPtr output)
+{
+ struct sna *sna = to_sna(output->scrn);
+ struct sna_output *sna_output = output->driver_private;
+ drmModeConnectorPtr koutput = sna_output->mode_output;
+ drmModePropertyBlobPtr edid_blob = NULL;
+ xf86MonPtr mon = NULL;
+ int i;
+
+ /* look for an EDID property */
+ for (i = 0; i < koutput->count_props; i++) {
+ drmModePropertyPtr props;
+
+ props = drmModeGetProperty(sna->kgem.fd, koutput->props[i]);
+ if (!props)
+ continue;
+
+ if (!(props->flags & DRM_MODE_PROP_BLOB)) {
+ drmModeFreeProperty(props);
+ continue;
+ }
+
+ if (!strcmp(props->name, "EDID")) {
+ drmModeFreePropertyBlob(edid_blob);
+ edid_blob =
+ drmModeGetPropertyBlob(sna->kgem.fd,
+ koutput->prop_values[i]);
+ }
+ drmModeFreeProperty(props);
+ }
+
+ if (edid_blob) {
+ mon = xf86InterpretEDID(output->scrn->scrnIndex,
+ edid_blob->data);
+
+ if (mon && edid_blob->length > 128)
+ mon->flags |= MONITOR_EDID_COMPLETE_RAWDATA;
+ }
+
+ xf86OutputSetEDID(output, mon);
+
+ if (edid_blob)
+ drmModeFreePropertyBlob(edid_blob);
+}
+
+static DisplayModePtr
+sna_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes)
+{
+ xf86MonPtr mon = output->MonInfo;
+
+ if (!mon || !GTF_SUPPORTED(mon->features.msc)) {
+ DisplayModePtr i, m, p = NULL;
+ int max_x = 0, max_y = 0;
+ float max_vrefresh = 0.0;
+
+ for (m = modes; m; m = m->next) {
+ if (m->type & M_T_PREFERRED)
+ p = m;
+ max_x = max(max_x, m->HDisplay);
+ max_y = max(max_y, m->VDisplay);
+ max_vrefresh = max(max_vrefresh, xf86ModeVRefresh(m));
+ }
+
+ max_vrefresh = max(max_vrefresh, 60.0);
+ max_vrefresh *= (1 + SYNC_TOLERANCE);
+
+ m = xf86GetDefaultModes();
+ xf86ValidateModesSize(output->scrn, m, max_x, max_y, 0);
+
+ for (i = m; i; i = i->next) {
+ if (xf86ModeVRefresh(i) > max_vrefresh)
+ i->status = MODE_VSYNC;
+ if (p && i->HDisplay >= p->HDisplay &&
+ i->VDisplay >= p->VDisplay &&
+ xf86ModeVRefresh(i) >= xf86ModeVRefresh(p))
+ i->status = MODE_VSYNC;
+ }
+
+ xf86PruneInvalidModes(output->scrn, &m, FALSE);
+
+ modes = xf86ModesAdd(modes, m);
+ }
+
+ return modes;
+}
+
+static DisplayModePtr
+sna_output_get_modes(xf86OutputPtr output)
+{
+ struct sna_output *sna_output = output->driver_private;
+ drmModeConnectorPtr koutput = sna_output->mode_output;
+ DisplayModePtr Modes = NULL;
+ int i;
+
+ sna_output_attach_edid(output);
+
+ /* modes should already be available */
+ for (i = 0; i < koutput->count_modes; i++) {
+ DisplayModePtr Mode;
+
+ Mode = calloc(1, sizeof(DisplayModeRec));
+ if (Mode) {
+ mode_from_kmode(output->scrn, &koutput->modes[i], Mode);
+ Modes = xf86ModesAdd(Modes, Mode);
+ }
+ }
+
+ /*
+ * If the connector type is a panel, we will traverse the kernel mode to
+ * get the panel limit. And then add all the standard modes to fake
+ * the fullscreen experience.
+ * If it is incorrect, please fix me.
+ */
+ sna_output->has_panel_limits = FALSE;
+ if (is_panel(koutput->connector_type)) {
+ for (i = 0; i < koutput->count_modes; i++) {
+ drmModeModeInfo *mode_ptr;
+
+ mode_ptr = &koutput->modes[i];
+ if (mode_ptr->hdisplay > sna_output->panel_hdisplay)
+ sna_output->panel_hdisplay = mode_ptr->hdisplay;
+ if (mode_ptr->vdisplay > sna_output->panel_vdisplay)
+ sna_output->panel_vdisplay = mode_ptr->vdisplay;
+ }
+
+ sna_output->has_panel_limits =
+ sna_output->panel_hdisplay &&
+ sna_output->panel_vdisplay;
+
+ Modes = sna_output_panel_edid(output, Modes);
+ }
+
+ return Modes;
+}
+
+static void
+sna_output_destroy(xf86OutputPtr output)
+{
+ struct sna_output *sna_output = output->driver_private;
+ int i;
+
+ for (i = 0; i < sna_output->num_props; i++) {
+ drmModeFreeProperty(sna_output->props[i].mode_prop);
+ free(sna_output->props[i].atoms);
+ }
+ free(sna_output->props);
+
+ drmModeFreeConnector(sna_output->mode_output);
+ sna_output->mode_output = NULL;
+
+ list_del(&sna_output->link);
+ free(sna_output);
+
+ output->driver_private = NULL;
+}
+
+static void
+sna_output_dpms_backlight(xf86OutputPtr output, int oldmode, int mode)
+{
+ struct sna_output *sna_output = output->driver_private;
+
+ if (!sna_output->backlight_iface)
+ return;
+
+ if (mode == DPMSModeOn) {
+ /* If we're going from off->on we may need to turn on the backlight. */
+ if (oldmode != DPMSModeOn)
+ sna_output_backlight_set(output,
+ sna_output->backlight_active_level);
+ } else {
+ /* Only save the current backlight value if we're going from on to off. */
+ if (oldmode == DPMSModeOn)
+ sna_output->backlight_active_level = sna_output_backlight_get(output);
+ sna_output_backlight_set(output, 0);
+ }
+}
+
+static void
+sna_output_dpms(xf86OutputPtr output, int dpms)
+{
+ struct sna *sna = to_sna(output->scrn);
+ struct sna_output *sna_output = output->driver_private;
+ drmModeConnectorPtr koutput = sna_output->mode_output;
+ int i;
+
+ for (i = 0; i < koutput->count_props; i++) {
+ drmModePropertyPtr props;
+
+ props = drmModeGetProperty(sna->kgem.fd, koutput->props[i]);
+ if (!props)
+ continue;
+
+ if (!strcmp(props->name, "DPMS")) {
+ drmModeConnectorSetProperty(sna->kgem.fd,
+ sna_output->output_id,
+ props->prop_id,
+ dpms);
+ sna_output_dpms_backlight(output,
+ sna_output->dpms_mode,
+ dpms);
+ sna_output->dpms_mode = dpms;
+ drmModeFreeProperty(props);
+ return;
+ }
+
+ drmModeFreeProperty(props);
+ }
+}
+
+int
+sna_output_dpms_status(xf86OutputPtr output)
+{
+ struct sna_output *sna_output = output->driver_private;
+ return sna_output->dpms_mode;
+}
+
+static Bool
+sna_property_ignore(drmModePropertyPtr prop)
+{
+ if (!prop)
+ return TRUE;
+
+ /* ignore blob prop */
+ if (prop->flags & DRM_MODE_PROP_BLOB)
+ return TRUE;
+
+ /* ignore standard property */
+ if (!strcmp(prop->name, "EDID") ||
+ !strcmp(prop->name, "DPMS"))
+ return TRUE;
+
+ return FALSE;
+}
+
+#define BACKLIGHT_NAME "Backlight"
+#define BACKLIGHT_DEPRECATED_NAME "BACKLIGHT"
+static Atom backlight_atom, backlight_deprecated_atom;
+
+static void
+sna_output_create_resources(xf86OutputPtr output)
+{
+ struct sna *sna = to_sna(output->scrn);
+ struct sna_output *sna_output = output->driver_private;
+ drmModeConnectorPtr mode_output = sna_output->mode_output;
+ int i, j, err;
+
+ sna_output->props = calloc(mode_output->count_props,
+ sizeof(struct sna_property));
+ if (!sna_output->props)
+ return;
+
+ sna_output->num_props = 0;
+ for (i = j = 0; i < mode_output->count_props; i++) {
+ drmModePropertyPtr drmmode_prop;
+
+ drmmode_prop = drmModeGetProperty(sna->kgem.fd,
+ mode_output->props[i]);
+ if (sna_property_ignore(drmmode_prop)) {
+ drmModeFreeProperty(drmmode_prop);
+ continue;
+ }
+
+ sna_output->props[j].mode_prop = drmmode_prop;
+ sna_output->props[j].value = mode_output->prop_values[i];
+ j++;
+ }
+ sna_output->num_props = j;
+
+ for (i = 0; i < sna_output->num_props; i++) {
+ struct sna_property *p = &sna_output->props[i];
+ drmModePropertyPtr drmmode_prop = p->mode_prop;
+
+ if (drmmode_prop->flags & DRM_MODE_PROP_RANGE) {
+ INT32 range[2];
+
+ p->num_atoms = 1;
+ p->atoms = calloc(p->num_atoms, sizeof(Atom));
+ if (!p->atoms)
+ continue;
+
+ p->atoms[0] = MakeAtom(drmmode_prop->name, strlen(drmmode_prop->name), TRUE);
+ range[0] = drmmode_prop->values[0];
+ range[1] = drmmode_prop->values[1];
+ err = RRConfigureOutputProperty(output->randr_output, p->atoms[0],
+ FALSE, TRUE,
+ drmmode_prop->flags & DRM_MODE_PROP_IMMUTABLE ? TRUE : FALSE,
+ 2, range);
+ if (err != 0) {
+ xf86DrvMsg(output->scrn->scrnIndex, X_ERROR,
+ "RRConfigureOutputProperty error, %d\n", err);
+ }
+ err = RRChangeOutputProperty(output->randr_output, p->atoms[0],
+ XA_INTEGER, 32, PropModeReplace, 1, &p->value, FALSE, TRUE);
+ if (err != 0) {
+ xf86DrvMsg(output->scrn->scrnIndex, X_ERROR,
+ "RRChangeOutputProperty error, %d\n", err);
+ }
+ } else if (drmmode_prop->flags & DRM_MODE_PROP_ENUM) {
+ p->num_atoms = drmmode_prop->count_enums + 1;
+ p->atoms = calloc(p->num_atoms, sizeof(Atom));
+ if (!p->atoms)
+ continue;
+
+ p->atoms[0] = MakeAtom(drmmode_prop->name, strlen(drmmode_prop->name), TRUE);
+ for (j = 1; j <= drmmode_prop->count_enums; j++) {
+ struct drm_mode_property_enum *e = &drmmode_prop->enums[j-1];
+ p->atoms[j] = MakeAtom(e->name, strlen(e->name), TRUE);
+ }
+
+ err = RRConfigureOutputProperty(output->randr_output, p->atoms[0],
+ FALSE, FALSE,
+ drmmode_prop->flags & DRM_MODE_PROP_IMMUTABLE ? TRUE : FALSE,
+ p->num_atoms - 1, (INT32 *)&p->atoms[1]);
+ if (err != 0) {
+ xf86DrvMsg(output->scrn->scrnIndex, X_ERROR,
+ "RRConfigureOutputProperty error, %d\n", err);
+ }
+
+ for (j = 0; j < drmmode_prop->count_enums; j++)
+ if (drmmode_prop->enums[j].value == p->value)
+ break;
+ /* there's always a matching value */
+ err = RRChangeOutputProperty(output->randr_output, p->atoms[0],
+ XA_ATOM, 32, PropModeReplace, 1, &p->atoms[j+1], FALSE, TRUE);
+ if (err != 0) {
+ xf86DrvMsg(output->scrn->scrnIndex, X_ERROR,
+ "RRChangeOutputProperty error, %d\n", err);
+ }
+ }
+ }
+
+ if (sna_output->backlight_iface) {
+ INT32 data, backlight_range[2];
+
+ /* Set up the backlight property, which takes effect
+ * immediately and accepts values only within the
+ * backlight_range.
+ */
+ backlight_atom = MakeAtom(BACKLIGHT_NAME, sizeof(BACKLIGHT_NAME) - 1, TRUE);
+ backlight_deprecated_atom = MakeAtom(BACKLIGHT_DEPRECATED_NAME,
+ sizeof(BACKLIGHT_DEPRECATED_NAME) - 1, TRUE);
+
+ backlight_range[0] = 0;
+ backlight_range[1] = sna_output->backlight_max;
+ err = RRConfigureOutputProperty(output->randr_output,
+ backlight_atom,
+ FALSE, TRUE, FALSE,
+ 2, backlight_range);
+ if (err != 0) {
+ xf86DrvMsg(output->scrn->scrnIndex, X_ERROR,
+ "RRConfigureOutputProperty error, %d\n", err);
+ }
+ err = RRConfigureOutputProperty(output->randr_output,
+ backlight_deprecated_atom,
+ FALSE, TRUE, FALSE,
+ 2, backlight_range);
+ if (err != 0) {
+ xf86DrvMsg(output->scrn->scrnIndex, X_ERROR,
+ "RRConfigureOutputProperty error, %d\n", err);
+ }
+ /* Set the current value of the backlight property */
+ data = sna_output->backlight_active_level;
+ err = RRChangeOutputProperty(output->randr_output, backlight_atom,
+ XA_INTEGER, 32, PropModeReplace, 1, &data,
+ FALSE, TRUE);
+ if (err != 0) {
+ xf86DrvMsg(output->scrn->scrnIndex, X_ERROR,
+ "RRChangeOutputProperty error, %d\n", err);
+ }
+ err = RRChangeOutputProperty(output->randr_output, backlight_deprecated_atom,
+ XA_INTEGER, 32, PropModeReplace, 1, &data,
+ FALSE, TRUE);
+ if (err != 0) {
+ xf86DrvMsg(output->scrn->scrnIndex, X_ERROR,
+ "RRChangeOutputProperty error, %d\n", err);
+ }
+ }
+}
+
+static Bool
+sna_output_set_property(xf86OutputPtr output, Atom property,
+ RRPropertyValuePtr value)
+{
+ struct sna *sna = to_sna(output->scrn);
+ struct sna_output *sna_output = output->driver_private;
+ int i;
+
+ if (property == backlight_atom || property == backlight_deprecated_atom) {
+ INT32 val;
+
+ if (value->type != XA_INTEGER || value->format != 32 ||
+ value->size != 1)
+ {
+ return FALSE;
+ }
+
+ val = *(INT32 *)value->data;
+ if (val < 0 || val > sna_output->backlight_max)
+ return FALSE;
+
+ if (sna_output->dpms_mode == DPMSModeOn)
+ sna_output_backlight_set(output, val);
+ sna_output->backlight_active_level = val;
+ return TRUE;
+ }
+
+ for (i = 0; i < sna_output->num_props; i++) {
+ struct sna_property *p = &sna_output->props[i];
+
+ if (p->atoms[0] != property)
+ continue;
+
+ if (p->mode_prop->flags & DRM_MODE_PROP_RANGE) {
+ uint32_t val;
+
+ if (value->type != XA_INTEGER || value->format != 32 ||
+ value->size != 1)
+ return FALSE;
+ val = *(uint32_t *)value->data;
+
+ drmModeConnectorSetProperty(sna->kgem.fd, sna_output->output_id,
+ p->mode_prop->prop_id, (uint64_t)val);
+ return TRUE;
+ } else if (p->mode_prop->flags & DRM_MODE_PROP_ENUM) {
+ Atom atom;
+ const char *name;
+ int j;
+
+ if (value->type != XA_ATOM || value->format != 32 || value->size != 1)
+ return FALSE;
+ memcpy(&atom, value->data, 4);
+ name = NameForAtom(atom);
+
+ /* search for matching name string, then set its value down */
+ for (j = 0; j < p->mode_prop->count_enums; j++) {
+ if (!strcmp(p->mode_prop->enums[j].name, name)) {
+ drmModeConnectorSetProperty(sna->kgem.fd, sna_output->output_id,
+ p->mode_prop->prop_id, p->mode_prop->enums[j].value);
+ return TRUE;
+ }
+ }
+ return FALSE;
+ }
+ }
+
+ /* We didn't recognise this property, just report success in order
+ * to allow the set to continue, otherwise we break setting of
+ * common properties like EDID.
+ */
+ return TRUE;
+}
+
+static Bool
+sna_output_get_property(xf86OutputPtr output, Atom property)
+{
+ struct sna_output *sna_output = output->driver_private;
+ int err;
+
+ if (property == backlight_atom || property == backlight_deprecated_atom) {
+ INT32 val;
+
+ if (! sna_output->backlight_iface)
+ return FALSE;
+
+ val = sna_output_backlight_get(output);
+ if (val < 0)
+ return FALSE;
+
+ err = RRChangeOutputProperty(output->randr_output, property,
+ XA_INTEGER, 32, PropModeReplace, 1, &val,
+ FALSE, TRUE);
+ if (err != 0) {
+ xf86DrvMsg(output->scrn->scrnIndex, X_ERROR,
+ "RRChangeOutputProperty error, %d\n", err);
+ return FALSE;
+ }
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static const xf86OutputFuncsRec sna_output_funcs = {
+ .create_resources = sna_output_create_resources,
+#ifdef RANDR_12_INTERFACE
+ .set_property = sna_output_set_property,
+ .get_property = sna_output_get_property,
+#endif
+ .dpms = sna_output_dpms,
+ .detect = sna_output_detect,
+ .mode_valid = sna_output_mode_valid,
+
+ .get_modes = sna_output_get_modes,
+ .destroy = sna_output_destroy
+};
+
+static const int subpixel_conv_table[7] = {
+ 0,
+ SubPixelUnknown,
+ SubPixelHorizontalRGB,
+ SubPixelHorizontalBGR,
+ SubPixelVerticalRGB,
+ SubPixelVerticalBGR,
+ SubPixelNone
+};
+
+static const char *output_names[] = {
+ "None",
+ "VGA",
+ "DVI",
+ "DVI",
+ "DVI",
+ "Composite",
+ "TV",
+ "LVDS",
+ "CTV",
+ "DIN",
+ "DP",
+ "HDMI",
+ "HDMI",
+ "TV",
+ "eDP",
+};
+
+static void
+sna_output_init(ScrnInfoPtr scrn, struct sna_mode *mode, int num)
+{
+ struct sna *sna = to_sna(scrn);
+ xf86OutputPtr output;
+ drmModeConnectorPtr koutput;
+ drmModeEncoderPtr kencoder;
+ struct sna_output *sna_output;
+ const char *output_name;
+ char name[32];
+
+ koutput = drmModeGetConnector(sna->kgem.fd,
+ mode->mode_res->connectors[num]);
+ if (!koutput)
+ return;
+
+ kencoder = drmModeGetEncoder(sna->kgem.fd, koutput->encoders[0]);
+ if (!kencoder) {
+ drmModeFreeConnector(koutput);
+ return;
+ }
+
+ if (koutput->connector_type < ARRAY_SIZE(output_names))
+ output_name = output_names[koutput->connector_type];
+ else
+ output_name = "UNKNOWN";
+ snprintf(name, 32, "%s%d", output_name, koutput->connector_type_id);
+
+ output = xf86OutputCreate (scrn, &sna_output_funcs, name);
+ if (!output) {
+ drmModeFreeEncoder(kencoder);
+ drmModeFreeConnector(koutput);
+ return;
+ }
+
+ sna_output = calloc(sizeof(struct sna_output), 1);
+ if (!sna_output) {
+ xf86OutputDestroy(output);
+ drmModeFreeConnector(koutput);
+ drmModeFreeEncoder(kencoder);
+ return;
+ }
+
+ sna_output->output_id = mode->mode_res->connectors[num];
+ sna_output->mode_output = koutput;
+ sna_output->mode_encoder = kencoder;
+ sna_output->mode = mode;
+
+ output->mm_width = koutput->mmWidth;
+ output->mm_height = koutput->mmHeight;
+
+ output->subpixel_order = subpixel_conv_table[koutput->subpixel];
+ output->driver_private = sna_output;
+
+ if (is_panel(koutput->connector_type))
+ sna_output_backlight_init(output);
+
+ output->possible_crtcs = kencoder->possible_crtcs;
+ output->possible_clones = kencoder->possible_clones;
+ output->interlaceAllowed = TRUE;
+
+ sna_output->output = output;
+ list_add(&sna_output->link, &mode->outputs);
+}
+
+struct sna_visit_set_pixmap_window {
+ PixmapPtr old, new;
+};
+
+static int
+sna_visit_set_window_pixmap(WindowPtr window, pointer data)
+{
+ struct sna_visit_set_pixmap_window *visit = data;
+ ScreenPtr screen = window->drawable.pScreen;
+
+ if (screen->GetWindowPixmap(window) == visit->old) {
+ screen->SetWindowPixmap(window, visit->new);
+ return WT_WALKCHILDREN;
+ }
+
+ return WT_DONTWALKCHILDREN;
+}
+
+static void
+sn_redirect_screen_pixmap(ScrnInfoPtr scrn, PixmapPtr old, PixmapPtr new)
+{
+ ScreenPtr screen = scrn->pScreen;
+ struct sna_visit_set_pixmap_window visit;
+
+ visit.old = old;
+ visit.new = new;
+ TraverseTree(screen->root, sna_visit_set_window_pixmap, &visit);
+
+ screen->SetScreenPixmap(new);
+}
+
+static Bool
+sna_xf86crtc_resize(ScrnInfoPtr scrn, int width, int height)
+{
+ struct sna *sna = to_sna(scrn);
+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn);
+ struct sna_mode *mode = &sna->mode;
+ PixmapPtr old_front;
+ uint32_t old_fb_id;
+ struct kgem_bo *bo;
+ int i;
+
+ DBG(("%s (%d, %d) -> (%d, %d)\n",
+ __FUNCTION__,
+ scrn->virtualX, scrn->virtualY,
+ width, height));
+
+ if (scrn->virtualX == width && scrn->virtualY == height)
+ return TRUE;
+
+ kgem_submit(&sna->kgem);
+
+ old_fb_id = mode->fb_id;
+ old_front = sna->front;
+
+ sna->front = scrn->pScreen->CreatePixmap(scrn->pScreen,
+ width, height,
+ scrn->depth,
+ SNA_CREATE_FB);
+ if (!sna->front)
+ goto fail;
+
+ bo = sna_pixmap_pin(sna->front);
+ if (!bo)
+ goto fail;
+
+ if (drmModeAddFB(sna->kgem.fd, width, height,
+ scrn->depth, scrn->bitsPerPixel,
+ bo->pitch, bo->handle,
+ &mode->fb_id)) {
+ ErrorF("%s: failed to add fb: %dx%d depth=%d, bpp=%d, pitch=%d\n",
+ __FUNCTION__,
+ width, height,
+ scrn->depth, scrn->bitsPerPixel, bo->pitch);
+ goto fail;
+ }
+
+ for (i = 0; i < xf86_config->num_crtc; i++) {
+ xf86CrtcPtr crtc = xf86_config->crtc[i];
+
+ if (!crtc->enabled)
+ continue;
+
+ if (!sna_crtc_apply(crtc))
+ goto fail;
+ }
+
+ scrn->virtualX = width;
+ scrn->virtualY = height;
+ scrn->displayWidth = bo->pitch / sna->mode.cpp;
+
+ sn_redirect_screen_pixmap(scrn, old_front, sna->front);
+
+ if (old_fb_id)
+ drmModeRmFB(sna->kgem.fd, old_fb_id);
+ scrn->pScreen->DestroyPixmap(old_front);
+
+ return TRUE;
+
+fail:
+ if (old_fb_id != mode->fb_id)
+ drmModeRmFB(sna->kgem.fd, mode->fb_id);
+ mode->fb_id = old_fb_id;
+
+ if (sna->front)
+ scrn->pScreen->DestroyPixmap(sna->front);
+ sna->front = old_front;
+ return FALSE;
+}
+
+static Bool do_page_flip(struct sna *sna,
+ int ref_crtc_hw_id)
+{
+ xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn);
+ int i;
+
+ /*
+ * Queue flips on all enabled CRTCs
+ * Note that if/when we get per-CRTC buffers, we'll have to update this.
+ * Right now it assumes a single shared fb across all CRTCs, with the
+ * kernel fixing up the offset of each CRTC as necessary.
+ *
+ * Also, flips queued on disabled or incorrectly configured displays
+ * may never complete; this is a configuration error.
+ */
+ for (i = 0; i < config->num_crtc; i++) {
+ struct sna_crtc *crtc = config->crtc[i]->driver_private;
+ uintptr_t data;
+
+ if (!config->crtc[i]->enabled)
+ continue;
+
+ /* Only the reference crtc will finally deliver its page flip
+ * completion event. All other crtc's events will be discarded.
+ */
+
+ data = (uintptr_t)sna;
+ data |= sna_crtc_to_pipe(crtc->crtc) == ref_crtc_hw_id;
+
+ if (drmModePageFlip(sna->kgem.fd,
+ crtc_id(crtc),
+ sna->mode.fb_id,
+ DRM_MODE_PAGE_FLIP_EVENT,
+ (void*)data)) {
+ xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING,
+ "flip queue failed: %s\n", strerror(errno));
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+Bool
+sna_do_pageflip(struct sna *sna,
+ PixmapPtr pixmap,
+ DRI2FrameEventPtr flip_info, int ref_crtc_hw_id)
+{
+ ScrnInfoPtr scrn = sna->scrn;
+ struct sna_mode *mode = &sna->mode;
+ struct kgem_bo *bo = sna_pixmap_pin(pixmap);
+ int old_fb_id;
+
+ if (!bo)
+ return FALSE;
+
+ /*
+ * Create a new handle for the back buffer
+ */
+ old_fb_id = mode->fb_id;
+ if (drmModeAddFB(sna->kgem.fd, scrn->virtualX, scrn->virtualY,
+ scrn->depth, scrn->bitsPerPixel,
+ bo->pitch, bo->handle,
+ &mode->fb_id)) {
+ ErrorF("%s: failed to add fb: %dx%d depth=%d, bpp=%d, pitch=%d\n",
+ __FUNCTION__,
+ scrn->virtualX, scrn->virtualY,
+ scrn->depth, scrn->bitsPerPixel, bo->pitch);
+ return FALSE;
+ }
+
+ kgem_submit(&sna->kgem);
+
+ /*
+ * Queue flips on all enabled CRTCs
+ * Note that if/when we get per-CRTC buffers, we'll have to update this.
+ * Right now it assumes a single shared fb across all CRTCs, with the
+ * kernel fixing up the offset of each CRTC as necessary.
+ *
+ * Also, flips queued on disabled or incorrectly configured displays
+ * may never complete; this is a configuration error.
+ */
+ mode->fe_frame = 0;
+ mode->fe_tv_sec = 0;
+ mode->fe_tv_usec = 0;
+
+ mode->flip_info = flip_info;
+ mode->flip_count++;
+
+ if (do_page_flip(sna, ref_crtc_hw_id)) {
+ PixmapPtr old_front = sna->front;
+
+ sna->front = pixmap;
+ pixmap->refcnt++;
+ sn_redirect_screen_pixmap(scrn, old_front, sna->front);
+ scrn->displayWidth = bo->pitch / sna->mode.cpp;
+
+ drmModeRmFB(sna->kgem.fd, old_fb_id);
+ scrn->pScreen->DestroyPixmap(old_front);
+ return TRUE;
+ } else {
+ drmModeRmFB(sna->kgem.fd, mode->fb_id);
+ mode->fb_id = old_fb_id;
+ return FALSE;
+ }
+}
+
+static const xf86CrtcConfigFuncsRec sna_xf86crtc_config_funcs = {
+ sna_xf86crtc_resize
+};
+
+static void
+sna_vblank_handler(int fd, unsigned int frame, unsigned int tv_sec,
+ unsigned int tv_usec, void *event_data)
+{
+ sna_dri2_frame_event(frame, tv_sec, tv_usec, event_data);
+}
+
+static void
+sna_page_flip_handler(int fd, unsigned int frame, unsigned int tv_sec,
+ unsigned int tv_usec, void *event_data)
+{
+ struct sna *sna = (struct sna *)((uintptr_t)event_data & ~1);
+ struct sna_mode *mode = &sna->mode;
+
+ /* Is this the event whose info shall be delivered to higher level? */
+ if ((uintptr_t)event_data & 1) {
+ /* Yes: Cache msc, ust for later delivery. */
+ mode->fe_frame = frame;
+ mode->fe_tv_sec = tv_sec;
+ mode->fe_tv_usec = tv_usec;
+ }
+
+ /* Last crtc completed flip? */
+ if (--mode->flip_count > 0)
+ return;
+
+ if (mode->flip_info == NULL)
+ return;
+
+ /* Deliver cached msc, ust from reference crtc to flip event handler */
+ sna_dri2_flip_event(mode->fe_frame, mode->fe_tv_sec,
+ mode->fe_tv_usec, mode->flip_info);
+}
+
+static void
+drm_wakeup_handler(pointer data, int err, pointer p)
+{
+ struct sna *sna;
+ fd_set *read_mask;
+
+ if (data == NULL || err < 0)
+ return;
+
+ sna = data;
+ read_mask = p;
+ if (FD_ISSET(sna->kgem.fd, read_mask))
+ drmHandleEvent(sna->kgem.fd, &sna->mode.event_context);
+}
+
+Bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna)
+{
+ struct sna_mode *mode = &sna->mode;
+ unsigned int i;
+
+ list_init(&mode->crtcs);
+ list_init(&mode->outputs);
+
+ xf86CrtcConfigInit(scrn, &sna_xf86crtc_config_funcs);
+
+ mode->mode_res = drmModeGetResources(sna->kgem.fd);
+ if (!mode->mode_res) {
+ xf86DrvMsg(scrn->scrnIndex, X_ERROR,
+ "failed to get resources: %s\n", strerror(errno));
+ return FALSE;
+ }
+
+ xf86CrtcSetSizeRange(scrn,
+ 320, 200,
+ mode->mode_res->max_width,
+ mode->mode_res->max_height);
+ for (i = 0; i < mode->mode_res->count_crtcs; i++)
+ sna_crtc_init(scrn, mode, i);
+
+ for (i = 0; i < mode->mode_res->count_connectors; i++)
+ sna_output_init(scrn, mode, i);
+
+ xf86InitialConfiguration(scrn, TRUE);
+
+ mode->event_context.version = DRM_EVENT_CONTEXT_VERSION;
+ mode->event_context.vblank_handler = sna_vblank_handler;
+ mode->event_context.page_flip_handler = sna_page_flip_handler;
+
+ return TRUE;
+}
+
+void
+sna_mode_init(struct sna *sna)
+{
+ struct sna_mode *mode = &sna->mode;
+
+ /* We need to re-register the mode->fd for the synchronisation
+ * feedback on every server generation, so perform the
+ * registration within ScreenInit and not PreInit.
+ */
+ mode->flip_count = 0;
+ AddGeneralSocket(sna->kgem.fd);
+ RegisterBlockAndWakeupHandlers((BlockHandlerProcPtr)NoopDDA,
+ drm_wakeup_handler, sna);
+}
+
+void
+sna_mode_remove_fb(struct sna *sna)
+{
+ struct sna_mode *mode = &sna->mode;
+
+ if (mode->fb_id) {
+ drmModeRmFB(sna->kgem.fd, mode->fb_id);
+ mode->fb_id = 0;
+ }
+}
+
+void
+sna_mode_fini(struct sna *sna)
+{
+ struct sna_mode *mode = &sna->mode;
+
+#if 0
+ while (!list_is_empty(&mode->crtcs)) {
+ xf86CrtcDestroy(list_first_entry(&mode->crtcs,
+ struct sna_crtc,
+ link)->crtc);
+ }
+
+ while (!list_is_empty(&mode->outputs)) {
+ xf86OutputDestroy(list_first_entry(&mode->outputs,
+ struct sna_output,
+ link)->output);
+ }
+#endif
+
+ if (mode->fb_id) {
+ drmModeRmFB(sna->kgem.fd, mode->fb_id);
+ mode->fb_id = 0;
+ }
+
+ /* mode->shadow_fb_id should have been destroyed already */
+}
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
new file mode 100644
index 00000000..ea84fb21
--- /dev/null
+++ b/src/sna/sna_dri.c
@@ -0,0 +1,1446 @@
+/**************************************************************************
+
+Copyright 2001 VA Linux Systems Inc., Fremont, California.
+Copyright © 2002 by David Dawes
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors: Jeff Hartmann <jhartmann@valinux.com>
+ * David Dawes <dawes@xfree86.org>
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <errno.h>
+
+#include "xf86.h"
+#include "xf86_OSproc.h"
+#include "xf86Priv.h"
+
+#include "xf86PciInfo.h"
+#include "xf86Pci.h"
+
+#include "windowstr.h"
+#include "gcstruct.h"
+
+#include "sna.h"
+#include "sna_reg.h"
+
+#include "i915_drm.h"
+
+#include "dri2.h"
+
+#if DEBUG_DRI
+#undef DBG
+#define DBG(x) ErrorF x
+#endif
+
+struct sna_dri2_private {
+ int refcnt;
+ PixmapPtr pixmap;
+ struct kgem_bo *bo;
+ unsigned int attachment;
+};
+
+static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna,
+ PixmapPtr pixmap)
+{
+ struct sna_pixmap *priv;
+
+ priv = sna_pixmap_force_to_gpu(pixmap);
+ if (priv == NULL)
+ return NULL;
+
+ if (priv->flush)
+ return priv->gpu_bo;
+
+ if (priv->cpu_damage)
+ list_add(&priv->list, &sna->dirty_pixmaps);
+
+ priv->flush = 1;
+ priv->gpu_bo->flush = 1;
+ if (priv->gpu_bo->exec)
+ sna->kgem.flush = 1;
+
+ priv->pinned = 1;
+ return priv->gpu_bo;
+}
+
+#if DRI2INFOREC_VERSION < 2
+static DRI2BufferPtr
+sna_dri2_create_buffers(DrawablePtr drawable, unsigned int *attachments,
+ int count)
+{
+ ScreenPtr screen = drawable->pScreen;
+ ScrnInfoPtr scrn = xf86Screens[screen->myNum];
+ struct sna *sna = to_sna(scrn);
+ DRI2BufferPtr buffers;
+ struct sna_dri2_private *privates;
+ int depth = -1;
+ int i;
+
+ buffers = calloc(count, sizeof *buffers);
+ if (buffers == NULL)
+ return NULL;
+ privates = calloc(count, sizeof *privates);
+ if (privates == NULL) {
+ free(buffers);
+ return NULL;
+ }
+
+ for (i = 0; i < count; i++) {
+ PixmapPtr pixmap = NULL;
+ if (attachments[i] == DRI2BufferFrontLeft) {
+ pixmap = get_drawable_pixmap(drawable);
+ pixmap->refcnt++;
+ bo = sna_pixmap_set_dri(sna, pixmap);
+ } else if (attachments[i] == DRI2BufferBackLeft) {
+ pixmap = screen->CreatePixmap(screen,
+ drawable->width, drawable->height, drawable->depth,
+ 0);
+ if (!pixmap)
+ goto unwind;
+
+ bo = sna_pixmap_set_dri(sna, pixmap);
+ } else if (attachments[i] == DRI2BufferStencil && depth != -1) {
+ buffers[i] = buffers[depth];
+ buffers[i].attachment = attachments[i];
+ privates[depth].refcnt++;
+ continue;
+ } else {
+ unsigned int tiling = I915_TILING_X;
+ if (SUPPORTS_YTILING(intel)) {
+ switch (attachment) {
+ case DRI2BufferDepth:
+ case DRI2BufferDepthStencil:
+ tiling = I915_TILING_Y;
+ break;
+ }
+ }
+
+ bo = kgem_create_2d(&intel->kgem,
+ drawable->width,
+ drawable->height,
+ 32, tiling);
+ if (!bo)
+ goto unwind;
+ }
+
+ if (attachments[i] == DRI2BufferDepth)
+ depth = i;
+
+ buffers[i].attachment = attachments[i];
+ buffers[i].pitch = pitch;
+ buffers[i].cpp = bpp / 8;
+ buffers[i].driverPrivate = &privates[i];
+ buffers[i].flags = 0; /* not tiled */
+ buffers[i].name = kgem_bo_flink(&intel->kgem, bo);
+ privates[i].refcnt = 1;
+ privates[i].pixmap = pixmap;
+ privates[i].bo = bo;
+ privates[i].attachment = attachments[i];
+
+ if (buffers[i].name == 0)
+ goto unwind;
+ }
+
+ return buffers;
+
+unwind:
+ do {
+ if (--privates[i].refcnt == 0) {
+ if (privates[i].pixmap)
+ screen->DestroyPixmap(privates[i].pixmap);
+ else
+ gem_close(privates[i].handle);
+ }
+ } while (i--);
+ free(privates);
+ free(buffers);
+ return NULL;
+}
+
+static void
+sna_dri2_destroy_buffers(DrawablePtr drawable, DRI2BufferPtr buffers, int count)
+{
+ ScreenPtr screen = drawable->pScreen;
+ sna_dri2_private *private;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ private = buffers[i].driverPrivate;
+ if (private->pixmap)
+ screen->DestroyPixmap(private->pixmap);
+ else
+ kgem_delete(&intel->kgem, private->bo);
+ }
+
+ if (buffers) {
+ free(buffers[0].driverPrivate);
+ free(buffers);
+ }
+}
+
+#else
+
+static DRI2Buffer2Ptr
+sna_dri2_create_buffer(DrawablePtr drawable, unsigned int attachment,
+ unsigned int format)
+{
+ ScreenPtr screen = drawable->pScreen;
+ ScrnInfoPtr scrn = xf86Screens[screen->myNum];
+ struct sna *sna = to_sna(scrn);
+ DRI2Buffer2Ptr buffer;
+ struct sna_dri2_private *private;
+ PixmapPtr pixmap;
+ struct kgem_bo *bo;
+ int bpp, usage;
+
+ DBG(("%s(attachment=%d, format=%d)\n",
+ __FUNCTION__, attachment, format));
+
+ buffer = calloc(1, sizeof *buffer + sizeof *private);
+ if (buffer == NULL)
+ return NULL;
+ private = (struct sna_dri2_private *)(buffer + 1);
+
+ pixmap = NULL;
+ usage = CREATE_PIXMAP_USAGE_SCRATCH;
+ switch (attachment) {
+ case DRI2BufferFrontLeft:
+ pixmap = get_drawable_pixmap(drawable);
+ pixmap->refcnt++;
+ bo = sna_pixmap_set_dri(sna, pixmap);
+ bpp = pixmap->drawable.bitsPerPixel;
+ break;
+
+ case DRI2BufferFakeFrontLeft:
+ case DRI2BufferFakeFrontRight:
+ usage = 0;
+ case DRI2BufferFrontRight:
+ case DRI2BufferBackLeft:
+ case DRI2BufferBackRight:
+ pixmap = screen->CreatePixmap(screen,
+ drawable->width,
+ drawable->height,
+ drawable->depth,
+ usage);
+ if (!pixmap)
+ goto err;
+
+ bo = sna_pixmap_set_dri(sna, pixmap);
+ bpp = pixmap->drawable.bitsPerPixel;
+ break;
+
+ default:
+ bpp = format ? format : drawable->bitsPerPixel,
+ bo = kgem_create_2d(&sna->kgem,
+ drawable->width, drawable->height, bpp,
+ //sna->kgem.gen >= 40 ? I915_TILING_Y : I915_TILING_X,
+ I915_TILING_Y,
+ CREATE_EXACT);
+ break;
+ }
+ if (bo == NULL)
+ goto err;
+
+ buffer->attachment = attachment;
+ buffer->pitch = bo->pitch;
+ buffer->cpp = bpp / 8;
+ buffer->driverPrivate = private;
+ buffer->format = format;
+ buffer->flags = 0;
+ buffer->name = kgem_bo_flink(&sna->kgem, bo);
+ private->refcnt = 1;
+ private->pixmap = pixmap;
+ private->bo = bo;
+ private->attachment = attachment;
+
+ if (buffer->name == 0) {
+ /* failed to name buffer */
+ if (pixmap)
+ screen->DestroyPixmap(pixmap);
+ else
+ kgem_bo_destroy(&sna->kgem, bo);
+ goto err;
+ }
+
+ return buffer;
+
+err:
+ free(buffer);
+ return NULL;
+}
+
+static void sna_dri2_destroy_buffer(DrawablePtr drawable, DRI2Buffer2Ptr buffer)
+{
+ if (buffer && buffer->driverPrivate) {
+ struct sna_dri2_private *private = buffer->driverPrivate;
+ if (--private->refcnt == 0) {
+ if (private->pixmap) {
+ ScreenPtr screen = private->pixmap->drawable.pScreen;
+ screen->DestroyPixmap(private->pixmap);
+ } else {
+ struct sna *sna = to_sna_from_drawable(drawable);
+ kgem_bo_destroy(&sna->kgem, private->bo);
+ }
+
+ free(buffer);
+ }
+ } else
+ free(buffer);
+}
+
+#endif
+
+static void sna_dri2_reference_buffer(DRI2Buffer2Ptr buffer)
+{
+ if (buffer) {
+ struct sna_dri2_private *private = buffer->driverPrivate;
+ private->refcnt++;
+ }
+}
+
+static void damage(DrawablePtr drawable, RegionPtr region)
+{
+ PixmapPtr pixmap;
+ struct sna_pixmap *priv;
+ int16_t dx, dy;
+
+ pixmap = get_drawable_pixmap(drawable);
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+
+ priv = sna_pixmap(pixmap);
+ if (priv->gpu_only)
+ return;
+
+ if (region) {
+ BoxPtr box;
+
+ RegionTranslate(region,
+ drawable->x + dx,
+ drawable->y + dy);
+ box = RegionExtents(region);
+ if (RegionNumRects(region) == 1 &&
+ box->x1 <= 0 && box->y1 <= 0 &&
+ box->x2 >= pixmap->drawable.width &&
+ box->y2 >= pixmap->drawable.height) {
+ sna_damage_all(&priv->gpu_damage,
+ pixmap->drawable.width,
+ pixmap->drawable.height);
+ sna_damage_destroy(&priv->cpu_damage);
+ } else {
+ sna_damage_add(&priv->gpu_damage, region);
+ sna_damage_subtract(&priv->cpu_damage, region);
+ }
+
+ RegionTranslate(region,
+ -(drawable->x + dx),
+ -(drawable->y + dy));
+ } else {
+ BoxRec box;
+
+ box.x1 = drawable->x + dx;
+ box.x2 = box.x1 + drawable->width;
+
+ box.y1 = drawable->y + dy;
+ box.y2 = box.y1 + drawable->height;
+ if (box.x1 == 0 && box.y1 == 0 &&
+ box.x2 == pixmap->drawable.width &&
+ box.y2 == pixmap->drawable.height) {
+ sna_damage_all(&priv->gpu_damage,
+ pixmap->drawable.width,
+ pixmap->drawable.height);
+ sna_damage_destroy(&priv->cpu_damage);
+ } else {
+ sna_damage_add_box(&priv->gpu_damage, &box);
+ sna_damage_subtract_box(&priv->gpu_damage, &box);
+ }
+ }
+}
+
+static void
+sna_dri2_copy_region(DrawablePtr drawable, RegionPtr region,
+ DRI2BufferPtr destBuffer, DRI2BufferPtr sourceBuffer)
+{
+ struct sna *sna = to_sna_from_drawable(drawable);
+ struct sna_dri2_private *srcPrivate = sourceBuffer->driverPrivate;
+ struct sna_dri2_private *dstPrivate = destBuffer->driverPrivate;
+ ScreenPtr screen = drawable->pScreen;
+ DrawablePtr src = (srcPrivate->attachment == DRI2BufferFrontLeft)
+ ? drawable : &srcPrivate->pixmap->drawable;
+ DrawablePtr dst = (dstPrivate->attachment == DRI2BufferFrontLeft)
+ ? drawable : &dstPrivate->pixmap->drawable;
+ GCPtr gc;
+ bool flush = false;
+
+ DBG(("%s(region=(%d, %d), (%d, %d)))\n", __FUNCTION__,
+ region ? REGION_EXTENTS(NULL, region)->x1 : 0,
+ region ? REGION_EXTENTS(NULL, region)->y1 : 0,
+ region ? REGION_EXTENTS(NULL, region)->x2 : dst->width,
+ region ? REGION_EXTENTS(NULL, region)->y2 : dst->height));
+
+ gc = GetScratchGC(dst->depth, screen);
+ if (!gc)
+ return;
+
+ if (region) {
+ RegionPtr clip;
+
+ clip = REGION_CREATE(screen, NULL, 0);
+ pixman_region_intersect_rect(clip, region,
+ 0, 0, dst->width, dst->height);
+ (*gc->funcs->ChangeClip)(gc, CT_REGION, clip, 0);
+ region = clip;
+ }
+ ValidateGC(dst, gc);
+
+ /* Invalidate src to reflect unknown modifications made by the client */
+ damage(src, region);
+
+ /* Wait for the scanline to be outside the region to be copied */
+ if (sna->flags & SNA_SWAP_WAIT)
+ flush = sna_wait_for_scanline(sna, get_drawable_pixmap(dst),
+ NULL, region);
+
+ /* It's important that this copy gets submitted before the
+ * direct rendering client submits rendering for the next
+ * frame, but we don't actually need to submit right now. The
+ * client will wait for the DRI2CopyRegion reply or the swap
+ * buffer event before rendering, and we'll hit the flush
+ * callback chain before those messages are sent. We submit
+ * our batch buffers from the flush callback chain so we know
+ * that will happen before the client tries to render
+ * again.
+ */
+ gc->ops->CopyArea(src, dst, gc,
+ 0, 0,
+ drawable->width, drawable->height,
+ 0, 0);
+ FreeScratchGC(gc);
+
+ DBG(("%s: flushing? %d\n", __FUNCTION__, flush));
+ if (flush) /* STAT! */
+ kgem_submit(&sna->kgem);
+}
+
+#if DRI2INFOREC_VERSION >= 4
+
+
+static int
+sna_dri2_get_pipe(DrawablePtr pDraw)
+{
+ ScreenPtr pScreen = pDraw->pScreen;
+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+ BoxRec box, crtcbox;
+ xf86CrtcPtr crtc;
+ int pipe = -1;
+
+ box.x1 = pDraw->x;
+ box.y1 = pDraw->y;
+ box.x2 = box.x1 + pDraw->width;
+ box.y2 = box.y1 + pDraw->height;
+
+ crtc = sna_covering_crtc(pScrn, &box, NULL, &crtcbox);
+
+ /* Make sure the CRTC is valid and this is the real front buffer */
+ if (crtc != NULL && !crtc->rotatedData)
+ pipe = sna_crtc_to_pipe(crtc);
+
+ DBG(("%s(box=((%d, %d), (%d, %d)), crtcbox=((%d, %d), (%d, %d)), pipe=%d)\n",
+ __FUNCTION__,
+ box.x1, box.y1, box.x2, box.y2,
+ crtcbox.x1, crtcbox.y1, crtcbox.x2, crtcbox.y2,
+ pipe));
+
+ return pipe;
+}
+
+static RESTYPE frame_event_client_type, frame_event_drawable_type;
+
+static int
+sna_dri2_frame_event_client_gone(void *data, XID id)
+{
+ DRI2FrameEventPtr frame_event = data;
+
+ frame_event->client = NULL;
+ frame_event->client_id = None;
+ return Success;
+}
+
+static int
+sna_dri2_frame_event_drawable_gone(void *data, XID id)
+{
+ DRI2FrameEventPtr frame_event = data;
+
+ frame_event->drawable_id = None;
+ return Success;
+}
+
+static Bool
+sna_dri2_register_frame_event_resource_types(void)
+{
+ frame_event_client_type =
+ CreateNewResourceType(sna_dri2_frame_event_client_gone,
+ "Frame Event Client");
+ if (!frame_event_client_type)
+ return FALSE;
+
+ frame_event_drawable_type =
+ CreateNewResourceType(sna_dri2_frame_event_drawable_gone,
+ "Frame Event Drawable");
+ if (!frame_event_drawable_type)
+ return FALSE;
+
+ return TRUE;
+}
+
+/*
+ * Hook this frame event into the server resource
+ * database so we can clean it up if the drawable or
+ * client exits while the swap is pending
+ */
+static Bool
+sna_dri2_add_frame_event(DRI2FrameEventPtr frame_event)
+{
+ frame_event->client_id = FakeClientID(frame_event->client->index);
+
+ if (!AddResource(frame_event->client_id,
+ frame_event_client_type,
+ frame_event))
+ return FALSE;
+
+ if (!AddResource(frame_event->drawable_id,
+ frame_event_drawable_type,
+ frame_event)) {
+ FreeResourceByType(frame_event->client_id,
+ frame_event_client_type,
+ TRUE);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static void
+sna_dri2_del_frame_event(DRI2FrameEventPtr frame_event)
+{
+ if (frame_event->client_id)
+ FreeResourceByType(frame_event->client_id,
+ frame_event_client_type,
+ TRUE);
+
+ if (frame_event->drawable_id)
+ FreeResourceByType(frame_event->drawable_id,
+ frame_event_drawable_type,
+ TRUE);
+}
+
+static void
+sna_dri2_exchange_buffers(DrawablePtr draw,
+ DRI2BufferPtr front, DRI2BufferPtr back)
+{
+ struct sna_dri2_private *front_priv, *back_priv;
+ struct sna_pixmap *front_sna, *back_sna;
+ struct kgem_bo *bo;
+ int tmp;
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ front_priv = front->driverPrivate;
+ back_priv = back->driverPrivate;
+
+ front_sna = sna_pixmap(front_priv->pixmap);
+ back_sna = sna_pixmap(back_priv->pixmap);
+
+ /* Force a copy/readback for the next CPU access */
+ if (!front_sna->gpu_only) {
+ sna_damage_all(&front_sna->gpu_damage,
+ front_priv->pixmap->drawable.width,
+ front_priv->pixmap->drawable.height);
+ sna_damage_destroy(&front_sna->cpu_damage);
+ }
+ if (front_sna->mapped) {
+ munmap(front_priv->pixmap->devPrivate.ptr,
+ front_sna->gpu_bo->size);
+ front_sna->mapped = false;
+ }
+ if (!back_sna->gpu_only) {
+ sna_damage_all(&back_sna->gpu_damage,
+ back_priv->pixmap->drawable.width,
+ back_priv->pixmap->drawable.height);
+ sna_damage_destroy(&back_sna->cpu_damage);
+ }
+ if (back_sna->mapped) {
+ munmap(back_priv->pixmap->devPrivate.ptr,
+ back_sna->gpu_bo->size);
+ back_sna->mapped = false;
+ }
+
+ /* Swap BO names so DRI works */
+ tmp = front->name;
+ front->name = back->name;
+ back->name = tmp;
+
+ /* and swap bo so future flips work */
+ bo = front_priv->bo;
+ front_priv->bo = back_priv->bo;
+ back_priv->bo = bo;
+
+ bo = front_sna->gpu_bo;
+ front_sna->gpu_bo = back_sna->gpu_bo;
+ back_sna->gpu_bo = bo;
+}
+
+/*
+ * Our internal swap routine takes care of actually exchanging, blitting, or
+ * flipping buffers as necessary.
+ */
+static Bool
+sna_dri2_schedule_flip(struct sna *sna,
+ ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
+ DRI2BufferPtr back, DRI2SwapEventPtr func, void *data,
+ unsigned int target_msc)
+{
+ struct sna_dri2_private *back_priv;
+ DRI2FrameEventPtr flip_info;
+
+ /* Main crtc for this drawable shall finally deliver pageflip event. */
+ int ref_crtc_hw_id = sna_dri2_get_pipe(draw);
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ flip_info = calloc(1, sizeof(DRI2FrameEventRec));
+ if (!flip_info)
+ return FALSE;
+
+ flip_info->drawable_id = draw->id;
+ flip_info->client = client;
+ flip_info->type = DRI2_SWAP;
+ flip_info->event_complete = func;
+ flip_info->event_data = data;
+ flip_info->frame = target_msc;
+
+ if (!sna_dri2_add_frame_event(flip_info)) {
+ free(flip_info);
+ return FALSE;
+ }
+
+ /* Page flip the full screen buffer */
+ back_priv = back->driverPrivate;
+ if (sna_do_pageflip(sna,
+ back_priv->pixmap,
+ flip_info, ref_crtc_hw_id))
+ return TRUE;
+
+ sna_dri2_del_frame_event(flip_info);
+ free(flip_info);
+ return FALSE;
+}
+
+static Bool
+can_exchange(DRI2BufferPtr front, DRI2BufferPtr back)
+{
+ struct sna_dri2_private *front_priv = front->driverPrivate;
+ struct sna_dri2_private *back_priv = back->driverPrivate;
+ PixmapPtr front_pixmap = front_priv->pixmap;
+ PixmapPtr back_pixmap = back_priv->pixmap;
+ struct sna_pixmap *front_sna = sna_pixmap(front_pixmap);
+ struct sna_pixmap *back_sna = sna_pixmap(back_pixmap);
+
+ if (front_pixmap->drawable.width != back_pixmap->drawable.width) {
+ DBG(("%s -- no, size mismatch: front width=%d, back=%d\n",
+ __FUNCTION__,
+ front_pixmap->drawable.width,
+ back_pixmap->drawable.width));
+ return FALSE;
+ }
+
+ if (front_pixmap->drawable.height != back_pixmap->drawable.height) {
+ DBG(("%s -- no, size mismatch: front height=%d, back=%d\n",
+ __FUNCTION__,
+ front_pixmap->drawable.height,
+ back_pixmap->drawable.height));
+ return FALSE;
+ }
+
+ if (front_pixmap->drawable.bitsPerPixel != back_pixmap->drawable.bitsPerPixel) {
+ DBG(("%s -- no, depth mismatch: front bpp=%d, back=%d\n",
+ __FUNCTION__,
+ front_pixmap->drawable.bitsPerPixel,
+ back_pixmap->drawable.bitsPerPixel));
+ return FALSE;
+ }
+
+ /* prevent an implicit tiling mode change */
+ if (front_sna->gpu_bo->tiling != back_sna->gpu_bo->tiling) {
+ DBG(("%s -- no, tiling mismatch: front %d, back=%d\n",
+ __FUNCTION__,
+ front_sna->gpu_bo->tiling,
+ back_sna->gpu_bo->tiling));
+ return FALSE;
+ }
+
+ if (front_sna->gpu_only != back_sna->gpu_only) {
+ DBG(("%s -- no, mismatch in gpu_only: front %d, back=%d\n",
+ __FUNCTION__, front_sna->gpu_only, back_sna->gpu_only));
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+void sna_dri2_frame_event(unsigned int frame, unsigned int tv_sec,
+ unsigned int tv_usec, DRI2FrameEventPtr swap_info)
+{
+ DrawablePtr drawable;
+ ScreenPtr screen;
+ ScrnInfoPtr scrn;
+ struct sna *sna;
+ int status;
+
+ DBG(("%s(id=%d, type=%d)\n", __FUNCTION__,
+ (int)swap_info->drawable_id, swap_info->type));
+
+ status = BadDrawable;
+ if (swap_info->drawable_id)
+ status = dixLookupDrawable(&drawable,
+ swap_info->drawable_id,
+ serverClient,
+ M_ANY, DixWriteAccess);
+ if (status != Success)
+ goto done;
+
+ screen = drawable->pScreen;
+ scrn = xf86Screens[screen->myNum];
+ sna = to_sna(scrn);
+
+ switch (swap_info->type) {
+ case DRI2_FLIP:
+ /* If we can still flip... */
+ if (DRI2CanFlip(drawable) &&
+ !sna->shadow &&
+ can_exchange(swap_info->front, swap_info->back) &&
+ sna_dri2_schedule_flip(sna,
+ swap_info->client,
+ drawable,
+ swap_info->front,
+ swap_info->back,
+ swap_info->event_complete,
+ swap_info->event_data,
+ swap_info->frame)) {
+ sna_dri2_exchange_buffers(drawable,
+ swap_info->front,
+ swap_info->back);
+ break;
+ }
+ /* else fall through to exchange/blit */
+ case DRI2_SWAP: {
+ int swap_type;
+
+ if (DRI2CanExchange(drawable) &&
+ can_exchange(swap_info->front, swap_info->back)) {
+ sna_dri2_exchange_buffers(drawable,
+ swap_info->front,
+ swap_info->back);
+ swap_type = DRI2_EXCHANGE_COMPLETE;
+ } else {
+ sna_dri2_copy_region(drawable, NULL,
+ swap_info->front,
+ swap_info->back);
+ swap_type = DRI2_BLIT_COMPLETE;
+ }
+ DRI2SwapComplete(swap_info->client,
+ drawable, frame,
+ tv_sec, tv_usec,
+ swap_type,
+ swap_info->client ? swap_info->event_complete : NULL,
+ swap_info->event_data);
+ break;
+ }
+ case DRI2_WAITMSC:
+ if (swap_info->client)
+ DRI2WaitMSCComplete(swap_info->client, drawable,
+ frame, tv_sec, tv_usec);
+ break;
+ default:
+ xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+ "%s: unknown vblank event received\n", __func__);
+ /* Unknown type */
+ break;
+ }
+
+done:
+ sna_dri2_del_frame_event(swap_info);
+ sna_dri2_destroy_buffer(drawable, swap_info->front);
+ sna_dri2_destroy_buffer(drawable, swap_info->back);
+ free(swap_info);
+}
+
+void sna_dri2_flip_event(unsigned int frame, unsigned int tv_sec,
+ unsigned int tv_usec, DRI2FrameEventPtr flip)
+{
+ DrawablePtr drawable;
+ ScreenPtr screen;
+ ScrnInfoPtr scrn;
+ int status;
+
+ DBG(("%s(frame=%d, tv=%d.%06d, type=%d)\n",
+ __FUNCTION__, frame, tv_sec, tv_usec, flip->type));
+
+ if (!flip->drawable_id)
+ status = BadDrawable;
+ else
+ status = dixLookupDrawable(&drawable,
+ flip->drawable_id,
+ serverClient,
+ M_ANY, DixWriteAccess);
+ if (status != Success) {
+ sna_dri2_del_frame_event(flip);
+ free(flip);
+ return;
+ }
+
+ screen = drawable->pScreen;
+ scrn = xf86Screens[screen->myNum];
+
+ /* We assume our flips arrive in order, so we don't check the frame */
+ switch (flip->type) {
+ case DRI2_SWAP:
+ /* Check for too small vblank count of pageflip completion, taking wraparound
+ * into account. This usually means some defective kms pageflip completion,
+ * causing wrong (msc, ust) return values and possible visual corruption.
+ */
+ if ((frame < flip->frame) && (flip->frame - frame < 5)) {
+ static int limit = 5;
+
+ /* XXX we are currently hitting this path with older
+ * kernels, so make it quieter.
+ */
+ if (limit) {
+ xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+ "%s: Pageflip completion has impossible msc %d < target_msc %d\n",
+ __func__, frame, flip->frame);
+ limit--;
+ }
+
+ /* All-0 values signal timestamping failure. */
+ frame = tv_sec = tv_usec = 0;
+ }
+
+ DBG(("%s: swap complete\n", __FUNCTION__));
+ DRI2SwapComplete(flip->client, drawable, frame, tv_sec, tv_usec,
+ DRI2_FLIP_COMPLETE, flip->client ? flip->event_complete : NULL,
+ flip->event_data);
+ break;
+ case DRI2_ASYNC_SWAP:
+ DBG(("%s: asunc swap flip completed\n", __FUNCTION__));
+ to_sna(scrn)->mode.flip_pending[flip->pipe]--;
+ break;
+ default:
+ xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+ "%s: unknown vblank event received\n", __func__);
+ /* Unknown type */
+ break;
+ }
+
+ sna_dri2_del_frame_event(flip);
+ free(flip);
+}
+
+/*
+ * ScheduleSwap is responsible for requesting a DRM vblank event for the
+ * appropriate frame.
+ *
+ * In the case of a blit (e.g. for a windowed swap) or buffer exchange,
+ * the vblank requested can simply be the last queued swap frame + the swap
+ * interval for the drawable.
+ *
+ * In the case of a page flip, we request an event for the last queued swap
+ * frame + swap interval - 1, since we'll need to queue the flip for the frame
+ * immediately following the received event.
+ *
+ * The client will be blocked if it tries to perform further GL commands
+ * after queueing a swap, though in the Intel case after queueing a flip, the
+ * client is free to queue more commands; they'll block in the kernel if
+ * they access buffers busy with the flip.
+ *
+ * When the swap is complete, the driver should call into the server so it
+ * can send any swap complete events that have been requested.
+ */
+static int
+sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
+ DRI2BufferPtr back, CARD64 *target_msc, CARD64 divisor,
+ CARD64 remainder, DRI2SwapEventPtr func, void *data)
+{
+ ScreenPtr screen = draw->pScreen;
+ ScrnInfoPtr scrn = xf86Screens[screen->myNum];
+ struct sna *sna = to_sna(scrn);
+ drmVBlank vbl;
+ int ret, pipe = sna_dri2_get_pipe(draw), flip = 0;
+ DRI2FrameEventPtr swap_info = NULL;
+ enum DRI2FrameEventType swap_type = DRI2_SWAP;
+ CARD64 current_msc;
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ /* Drawable not displayed... just complete the swap */
+ if (pipe == -1)
+ goto blit_fallback;
+
+ /* Truncate to match kernel interfaces; means occasional overflow
+ * misses, but that's generally not a big deal */
+ *target_msc &= 0xffffffff;
+ divisor &= 0xffffffff;
+ remainder &= 0xffffffff;
+
+ swap_info = calloc(1, sizeof(DRI2FrameEventRec));
+ if (!swap_info)
+ goto blit_fallback;
+
+ swap_info->drawable_id = draw->id;
+ swap_info->client = client;
+ swap_info->event_complete = func;
+ swap_info->event_data = data;
+ swap_info->front = front;
+ swap_info->back = back;
+
+ if (!sna_dri2_add_frame_event(swap_info)) {
+ free(swap_info);
+ swap_info = NULL;
+ goto blit_fallback;
+ }
+
+ sna_dri2_reference_buffer(front);
+ sna_dri2_reference_buffer(back);
+
+ /* Get current count */
+ vbl.request.type = DRM_VBLANK_RELATIVE;
+ if (pipe > 0)
+ vbl.request.type |= DRM_VBLANK_SECONDARY;
+ vbl.request.sequence = 0;
+ ret = drmWaitVBlank(sna->kgem.fd, &vbl);
+ if (ret) {
+ xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+ "first get vblank counter failed: %s\n",
+ strerror(errno));
+ goto blit_fallback;
+ }
+
+ current_msc = vbl.reply.sequence;
+
+ /* Flips need to be submitted one frame before */
+ if (!sna->shadow && DRI2CanFlip(draw) && can_exchange(front, back)) {
+ DBG(("%s: can flip\n", __FUNCTION__));
+ swap_type = DRI2_FLIP;
+ flip = 1;
+ }
+
+ swap_info->type = swap_type;
+
+ /* Correct target_msc by 'flip' if swap_type == DRI2_FLIP.
+ * Do it early, so handling of different timing constraints
+ * for divisor, remainder and msc vs. target_msc works.
+ */
+ if (*target_msc > 0)
+ *target_msc -= flip;
+
+ /*
+ * If divisor is zero, or current_msc is smaller than target_msc
+ * we just need to make sure target_msc passes before initiating
+ * the swap.
+ */
+ if (divisor == 0 || current_msc < *target_msc) {
+ vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT;
+ if (pipe > 0)
+ vbl.request.type |= DRM_VBLANK_SECONDARY;
+
+ /* If non-pageflipping, but blitting/exchanging, we need to use
+ * DRM_VBLANK_NEXTONMISS to avoid unreliable timestamping later
+ * on.
+ */
+ if (flip == 0)
+ vbl.request.type |= DRM_VBLANK_NEXTONMISS;
+ if (pipe > 0)
+ vbl.request.type |= DRM_VBLANK_SECONDARY;
+
+ /* If target_msc already reached or passed, set it to
+ * current_msc to ensure we return a reasonable value back
+ * to the caller. This makes swap_interval logic more robust.
+ */
+ if (current_msc >= *target_msc)
+ *target_msc = current_msc;
+
+ vbl.request.sequence = *target_msc;
+ vbl.request.signal = (unsigned long)swap_info;
+ ret = drmWaitVBlank(sna->kgem.fd, &vbl);
+ if (ret) {
+ xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+ "divisor 0 get vblank counter failed: %s\n",
+ strerror(errno));
+ goto blit_fallback;
+ }
+
+ *target_msc = vbl.reply.sequence + flip;
+ swap_info->frame = *target_msc;
+ return TRUE;
+ }
+
+ /*
+ * If we get here, target_msc has already passed or we don't have one,
+ * and we need to queue an event that will satisfy the divisor/remainder
+ * equation.
+ */
+ vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT;
+ if (flip == 0)
+ vbl.request.type |= DRM_VBLANK_NEXTONMISS;
+ if (pipe > 0)
+ vbl.request.type |= DRM_VBLANK_SECONDARY;
+
+ vbl.request.sequence = current_msc - (current_msc % divisor) +
+ remainder;
+
+ /*
+ * If the calculated deadline vbl.request.sequence is smaller than
+ * or equal to current_msc, it means we've passed the last point
+ * when effective onset frame seq could satisfy
+ * seq % divisor == remainder, so we need to wait for the next time
+ * this will happen.
+
+ * This comparison takes the 1 frame swap delay in pageflipping mode
+ * into account, as well as a potential DRM_VBLANK_NEXTONMISS delay
+ * if we are blitting/exchanging instead of flipping.
+ */
+ if (vbl.request.sequence <= current_msc)
+ vbl.request.sequence += divisor;
+
+ /* Account for 1 frame extra pageflip delay if flip > 0 */
+ vbl.request.sequence -= flip;
+
+ vbl.request.signal = (unsigned long)swap_info;
+ ret = drmWaitVBlank(sna->kgem.fd, &vbl);
+ if (ret) {
+ xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+ "final get vblank counter failed: %s\n",
+ strerror(errno));
+ goto blit_fallback;
+ }
+
+ /* Adjust returned value for 1 fame pageflip offset of flip > 0 */
+ *target_msc = vbl.reply.sequence + flip;
+ swap_info->frame = *target_msc;
+ return TRUE;
+
+blit_fallback:
+ DBG(("%s -- blit\n", __FUNCTION__));
+ sna_dri2_copy_region(draw, NULL, front, back);
+
+ DRI2SwapComplete(client, draw, 0, 0, 0, DRI2_BLIT_COMPLETE, func, data);
+ if (swap_info) {
+ sna_dri2_del_frame_event(swap_info);
+ sna_dri2_destroy_buffer(draw, swap_info->front);
+ sna_dri2_destroy_buffer(draw, swap_info->back);
+ free(swap_info);
+ }
+ *target_msc = 0; /* offscreen, so zero out target vblank count */
+ return TRUE;
+}
+
+#if DRI2INFOREC_VERSION >= 6
+static void
+sna_dri2_async_swap(ClientPtr client, DrawablePtr draw,
+ DRI2BufferPtr front, DRI2BufferPtr back,
+ DRI2SwapEventPtr func, void *data)
+{
+ ScreenPtr screen = draw->pScreen;
+ ScrnInfoPtr scrn = xf86Screens[screen->myNum];
+ struct sna *sna = to_sna(scrn);
+ int pipe = sna_dri2_get_pipe(draw);
+ int type = DRI2_EXCHANGE_COMPLETE;
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ /* Drawable not displayed... just complete the swap */
+ if (pipe == -1)
+ goto exchange;
+
+ if (sna->shadow ||
+ !DRI2CanFlip(draw) ||
+ !can_exchange(front, back)) {
+ sna_dri2_copy_region(draw, NULL, front, back);
+ DRI2SwapComplete(client, draw, 0, 0, 0,
+ DRI2_BLIT_COMPLETE, func, data);
+ return;
+ }
+
+ if (!sna->mode.flip_pending[pipe]) {
+ DRI2FrameEventPtr info;
+ struct sna_dri2_private *backPrivate = back->driverPrivate;
+ DrawablePtr src = &backPrivate->pixmap->drawable;
+ PixmapPtr copy;
+ GCPtr gc;
+
+ copy = screen->CreatePixmap(screen,
+ src->width, src->height, src->depth,
+ 0);
+ if (!copy)
+ goto exchange;
+
+ if (!sna_pixmap_force_to_gpu(copy)) {
+ screen->DestroyPixmap(copy);
+ goto exchange;
+ }
+
+ /* copy back to new buffer, and schedule flip */
+ gc = GetScratchGC(src->depth, screen);
+ if (!gc) {
+ screen->DestroyPixmap(copy);
+ goto exchange;
+ }
+ ValidateGC(src, gc);
+
+ gc->ops->CopyArea(src, &copy->drawable, gc,
+ 0, 0,
+ draw->width, draw->height,
+ 0, 0);
+ FreeScratchGC(gc);
+
+ info = calloc(1, sizeof(DRI2FrameEventRec));
+ if (!info) {
+ screen->DestroyPixmap(copy);
+ goto exchange;
+ }
+
+ info->drawable_id = draw->id;
+ info->client = client;
+ info->type = DRI2_ASYNC_SWAP;
+ info->pipe = pipe;
+
+ sna->mode.flip_pending[pipe]++;
+ sna_do_pageflip(sna, copy, info,
+ sna_dri2_get_pipe(draw));
+ screen->DestroyPixmap(copy);
+
+ type = DRI2_FLIP_COMPLETE;
+ }
+
+exchange:
+ sna_dri2_exchange_buffers(draw, front, back);
+ DRI2SwapComplete(client, draw, 0, 0, 0, type, func, data);
+}
+#endif
+
+/*
+ * Get current frame count and frame count timestamp, based on drawable's
+ * crtc.
+ */
+static int
+sna_dri2_get_msc(DrawablePtr draw, CARD64 *ust, CARD64 *msc)
+{
+ ScreenPtr screen = draw->pScreen;
+ ScrnInfoPtr scrn = xf86Screens[screen->myNum];
+ struct sna *sna = to_sna(scrn);
+ drmVBlank vbl;
+ int ret, pipe = sna_dri2_get_pipe(draw);
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ /* Drawable not displayed, make up a value */
+ if (pipe == -1) {
+ *ust = 0;
+ *msc = 0;
+ return TRUE;
+ }
+
+ vbl.request.type = DRM_VBLANK_RELATIVE;
+ if (pipe > 0)
+ vbl.request.type |= DRM_VBLANK_SECONDARY;
+ vbl.request.sequence = 0;
+
+ ret = drmWaitVBlank(sna->kgem.fd, &vbl);
+ if (ret) {
+ static int limit = 5;
+ if (limit) {
+ xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+ "%s:%d get vblank counter failed: %s\n",
+ __FUNCTION__, __LINE__,
+ strerror(errno));
+ limit--;
+ }
+ return FALSE;
+ }
+
+ *ust = ((CARD64)vbl.reply.tval_sec * 1000000) + vbl.reply.tval_usec;
+ *msc = vbl.reply.sequence;
+
+ return TRUE;
+}
+
+/*
+ * Request a DRM event when the requested conditions will be satisfied.
+ *
+ * We need to handle the event and ask the server to wake up the client when
+ * we receive it.
+ */
+static int
+sna_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc,
+ CARD64 divisor, CARD64 remainder)
+{
+ ScreenPtr screen = draw->pScreen;
+ ScrnInfoPtr scrn = xf86Screens[screen->myNum];
+ struct sna *sna = to_sna(scrn);
+ DRI2FrameEventPtr wait_info;
+ drmVBlank vbl;
+ int ret, pipe = sna_dri2_get_pipe(draw);
+ CARD64 current_msc;
+
+ DBG(("%s(target_msc=%llu, divisor=%llu, rem=%llu)\n",
+ __FUNCTION__,
+ (long long)target_msc,
+ (long long)divisor,
+ (long long)remainder));
+
+ /* Truncate to match kernel interfaces; means occasional overflow
+ * misses, but that's generally not a big deal */
+ target_msc &= 0xffffffff;
+ divisor &= 0xffffffff;
+ remainder &= 0xffffffff;
+
+ /* Drawable not visible, return immediately */
+ if (pipe == -1)
+ goto out_complete;
+
+ wait_info = calloc(1, sizeof(DRI2FrameEventRec));
+ if (!wait_info)
+ goto out_complete;
+
+ wait_info->drawable_id = draw->id;
+ wait_info->client = client;
+ wait_info->type = DRI2_WAITMSC;
+
+ /* Get current count */
+ vbl.request.type = DRM_VBLANK_RELATIVE;
+ if (pipe > 0)
+ vbl.request.type |= DRM_VBLANK_SECONDARY;
+ vbl.request.sequence = 0;
+ ret = drmWaitVBlank(sna->kgem.fd, &vbl);
+ if (ret) {
+ static int limit = 5;
+ if (limit) {
+ xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+ "%s:%d get vblank counter failed: %s\n",
+ __FUNCTION__, __LINE__,
+ strerror(errno));
+ limit--;
+ }
+ goto out_complete;
+ }
+
+ current_msc = vbl.reply.sequence;
+
+ /*
+ * If divisor is zero, or current_msc is smaller than target_msc,
+ * we just need to make sure target_msc passes before waking up the
+ * client.
+ */
+ if (divisor == 0 || current_msc < target_msc) {
+ /* If target_msc already reached or passed, set it to
+ * current_msc to ensure we return a reasonable value back
+ * to the caller. This keeps the client from continually
+ * sending us MSC targets from the past by forcibly updating
+ * their count on this call.
+ */
+ if (current_msc >= target_msc)
+ target_msc = current_msc;
+ vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT;
+ if (pipe > 0)
+ vbl.request.type |= DRM_VBLANK_SECONDARY;
+ vbl.request.sequence = target_msc;
+ vbl.request.signal = (unsigned long)wait_info;
+ ret = drmWaitVBlank(sna->kgem.fd, &vbl);
+ if (ret) {
+ static int limit = 5;
+ if (limit) {
+ xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+ "%s:%d get vblank counter failed: %s\n",
+ __FUNCTION__, __LINE__,
+ strerror(errno));
+ limit--;
+ }
+ goto out_complete;
+ }
+
+ wait_info->frame = vbl.reply.sequence;
+ DRI2BlockClient(client, draw);
+ return TRUE;
+ }
+
+ /*
+ * If we get here, target_msc has already passed or we don't have one,
+ * so we queue an event that will satisfy the divisor/remainder equation.
+ */
+ vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT;
+ if (pipe > 0)
+ vbl.request.type |= DRM_VBLANK_SECONDARY;
+
+ vbl.request.sequence = current_msc - (current_msc % divisor) +
+ remainder;
+
+ /*
+ * If calculated remainder is larger than requested remainder,
+ * it means we've passed the last point where
+ * seq % divisor == remainder, so we need to wait for the next time
+ * that will happen.
+ */
+ if ((current_msc % divisor) >= remainder)
+ vbl.request.sequence += divisor;
+
+ vbl.request.signal = (unsigned long)wait_info;
+ ret = drmWaitVBlank(sna->kgem.fd, &vbl);
+ if (ret) {
+ static int limit = 5;
+ if (limit) {
+ xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+ "%s:%d get vblank counter failed: %s\n",
+ __FUNCTION__, __LINE__,
+ strerror(errno));
+ limit--;
+ }
+ goto out_complete;
+ }
+
+ wait_info->frame = vbl.reply.sequence;
+ DRI2BlockClient(client, draw);
+
+ return TRUE;
+
+out_complete:
+ DRI2WaitMSCComplete(client, draw, target_msc, 0, 0);
+ return TRUE;
+}
+#endif
+
+static int dri2_server_generation;
+
+Bool sna_dri2_open(struct sna *sna, ScreenPtr screen)
+{
+ DRI2InfoRec info;
+ int dri2_major = 1;
+ int dri2_minor = 0;
+#if DRI2INFOREC_VERSION >= 4
+ const char *driverNames[1];
+#endif
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ if (sna->kgem.wedged) {
+ xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING,
+ "cannot enable DRI2 whilst forcing software fallbacks\n");
+ return FALSE;
+ }
+
+ if (xf86LoaderCheckSymbol("DRI2Version"))
+ DRI2Version(&dri2_major, &dri2_minor);
+
+ if (dri2_minor < 1) {
+ xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING,
+ "DRI2 requires DRI2 module version 1.1.0 or later\n");
+ return FALSE;
+ }
+
+ if (serverGeneration != dri2_server_generation) {
+ dri2_server_generation = serverGeneration;
+ if (!sna_dri2_register_frame_event_resource_types()) {
+ xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING,
+ "Cannot register DRI2 frame event resources\n");
+ return FALSE;
+ }
+ }
+ sna->deviceName = drmGetDeviceNameFromFd(sna->kgem.fd);
+ memset(&info, '\0', sizeof(info));
+ info.fd = sna->kgem.fd;
+ info.driverName = sna->kgem.gen < 40 ? "i915" : "i965";
+ info.deviceName = sna->deviceName;
+
+ DBG(("%s: loading dri driver '%s' [gen=%d] for device '%s'\n",
+ __FUNCTION__, info.driverName, sna->kgem.gen, info.deviceName));
+
+#if DRI2INFOREC_VERSION == 1
+ info.version = 1;
+ info.CreateBuffers = sna_dri2_create_buffers;
+ info.DestroyBuffers = sna_dri2_destroy_buffers;
+#elif DRI2INFOREC_VERSION == 2
+ /* The ABI between 2 and 3 was broken so we could get rid of
+ * the multi-buffer alloc functions. Make sure we indicate the
+ * right version so DRI2 can reject us if it's version 3 or above. */
+ info.version = 2;
+ info.CreateBuffer = sna_dri2_create_buffer;
+ info.DestroyBuffer = sna_dri2_destroy_buffer;
+#else
+ info.version = 3;
+ info.CreateBuffer = sna_dri2_create_buffer;
+ info.DestroyBuffer = sna_dri2_destroy_buffer;
+#endif
+
+ info.CopyRegion = sna_dri2_copy_region;
+#if DRI2INFOREC_VERSION >= 4
+ {
+ info.version = 4;
+ info.ScheduleSwap = sna_dri2_schedule_swap;
+ info.GetMSC = sna_dri2_get_msc;
+ info.ScheduleWaitMSC = sna_dri2_schedule_wait_msc;
+ info.numDrivers = 1;
+ info.driverNames = driverNames;
+ driverNames[0] = info.driverName;
+#if DRI2INFOREC_VERSION >= 6
+ info.version = 6;
+ info.AsyncSwap = sna_dri2_async_swap;
+#endif
+ }
+#endif
+
+ return DRI2ScreenInit(screen, &info);
+}
+
+void sna_dri2_close(struct sna *sna, ScreenPtr screen)
+{
+ DBG(("%s()\n", __FUNCTION__));
+ DRI2CloseScreen(screen);
+ drmFree(sna->deviceName);
+}
diff --git a/src/sna/sna_driver.c b/src/sna/sna_driver.c
new file mode 100644
index 00000000..b0df9aa5
--- /dev/null
+++ b/src/sna/sna_driver.c
@@ -0,0 +1,925 @@
+/**************************************************************************
+
+Copyright 2001 VA Linux Systems Inc., Fremont, California.
+Copyright © 2002 by David Dawes
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors: Jeff Hartmann <jhartmann@valinux.com>
+ * Abraham van der Merwe <abraham@2d3d.co.za>
+ * David Dawes <dawes@xfree86.org>
+ * Alan Hourihane <alanh@tungstengraphics.com>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+
+#include "xf86.h"
+#include "xf86_OSproc.h"
+#include "xf86Priv.h"
+#include "xf86cmap.h"
+#include "compiler.h"
+#include "mibstore.h"
+#include "vgaHW.h"
+#include "mipointer.h"
+#include "micmap.h"
+#include "shadowfb.h"
+#include <X11/extensions/randr.h>
+#include "fb.h"
+#include "miscstruct.h"
+#include "dixstruct.h"
+#include "xf86xv.h"
+#include <X11/extensions/Xv.h>
+#include "sna.h"
+#include "sna_module.h"
+#include "sna_video.h"
+
+#include "intel_driver.h"
+
+#include <sys/ioctl.h>
+#include "i915_drm.h"
+
+static OptionInfoRec sna_options[] = {
+ {OPTION_TILING_FB, "LinearFramebuffer", OPTV_BOOLEAN, {0}, FALSE},
+ {OPTION_TILING_2D, "Tiling", OPTV_BOOLEAN, {0}, TRUE},
+ {OPTION_PREFER_OVERLAY, "XvPreferOverlay", OPTV_BOOLEAN, {0}, FALSE},
+ {OPTION_COLOR_KEY, "ColorKey", OPTV_INTEGER, {0}, FALSE},
+ {OPTION_VIDEO_KEY, "VideoKey", OPTV_INTEGER, {0}, FALSE},
+ {OPTION_SWAPBUFFERS_WAIT, "SwapbuffersWait", OPTV_BOOLEAN, {0}, TRUE},
+ {OPTION_HOTPLUG, "HotPlug", OPTV_BOOLEAN, {0}, TRUE},
+ {OPTION_THROTTLE, "Throttle", OPTV_BOOLEAN, {0}, TRUE},
+ {OPTION_RELAXED_FENCING, "UseRelaxedFencing", OPTV_BOOLEAN, {0}, TRUE},
+ {OPTION_VMAP, "UseVmap", OPTV_BOOLEAN, {0}, TRUE},
+ {-1, NULL, OPTV_NONE, {0}, FALSE}
+};
+
+static Bool sna_enter_vt(int scrnIndex, int flags);
+
+/* temporary */
+extern void xf86SetCursor(ScreenPtr screen, CursorPtr pCurs, int x, int y);
+
+const OptionInfoRec *sna_available_options(int chipid, int busid)
+{
+ return sna_options;
+}
+
+static void
+sna_load_palette(ScrnInfoPtr scrn, int numColors, int *indices,
+ LOCO * colors, VisualPtr pVisual)
+{
+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn);
+ int i, j, index;
+ int p;
+ uint16_t lut_r[256], lut_g[256], lut_b[256];
+
+ for (p = 0; p < xf86_config->num_crtc; p++) {
+ xf86CrtcPtr crtc = xf86_config->crtc[p];
+
+ switch (scrn->depth) {
+ case 15:
+ for (i = 0; i < numColors; i++) {
+ index = indices[i];
+ for (j = 0; j < 8; j++) {
+ lut_r[index * 8 + j] =
+ colors[index].red << 8;
+ lut_g[index * 8 + j] =
+ colors[index].green << 8;
+ lut_b[index * 8 + j] =
+ colors[index].blue << 8;
+ }
+ }
+ break;
+ case 16:
+ for (i = 0; i < numColors; i++) {
+ index = indices[i];
+
+ if (index <= 31) {
+ for (j = 0; j < 8; j++) {
+ lut_r[index * 8 + j] =
+ colors[index].red << 8;
+ lut_b[index * 8 + j] =
+ colors[index].blue << 8;
+ }
+ }
+
+ for (j = 0; j < 4; j++) {
+ lut_g[index * 4 + j] =
+ colors[index].green << 8;
+ }
+ }
+ break;
+ default:
+ for (i = 0; i < numColors; i++) {
+ index = indices[i];
+ lut_r[index] = colors[index].red << 8;
+ lut_g[index] = colors[index].green << 8;
+ lut_b[index] = colors[index].blue << 8;
+ }
+ break;
+ }
+
+ /* Make the change through RandR */
+#ifdef RANDR_12_INTERFACE
+ RRCrtcGammaSet(crtc->randr_crtc, lut_r, lut_g, lut_b);
+#else
+ crtc->funcs->gamma_set(crtc, lut_r, lut_g, lut_b, 256);
+#endif
+ }
+}
+
+/**
+ * Adjust the screen pixmap for the current location of the front buffer.
+ * This is done at EnterVT when buffers are bound as long as the resources
+ * have already been created, but the first EnterVT happens before
+ * CreateScreenResources.
+ */
+static Bool sna_create_screen_resources(ScreenPtr screen)
+{
+ ScrnInfoPtr scrn = xf86Screens[screen->myNum];
+ struct sna *sna = to_sna(scrn);
+
+ free(screen->devPrivate);
+ screen->devPrivate = NULL;
+
+ sna->front = screen->CreatePixmap(screen,
+ screen->width,
+ screen->height,
+ screen->rootDepth,
+ SNA_CREATE_FB);
+ if (!sna->front)
+ return FALSE;
+
+ if (!sna_pixmap_force_to_gpu(sna->front))
+ goto cleanup_front;
+
+ screen->SetScreenPixmap(sna->front);
+
+ if (!sna_accel_create(sna))
+ goto cleanup_front;
+
+ if (!sna_enter_vt(screen->myNum, 0))
+ goto cleanup_front;
+
+ return TRUE;
+
+cleanup_front:
+ screen->DestroyPixmap(sna->front);
+ sna->front = NULL;
+ return FALSE;
+}
+
+static void PreInitCleanup(ScrnInfoPtr scrn)
+{
+ if (!scrn || !scrn->driverPrivate)
+ return;
+
+ free(scrn->driverPrivate);
+ scrn->driverPrivate = NULL;
+}
+
+static void sna_check_chipset_option(ScrnInfoPtr scrn)
+{
+ struct sna *sna = to_sna(scrn);
+ MessageType from = X_PROBED;
+
+ intel_detect_chipset(scrn, sna->PciInfo, &sna->chipset);
+
+ /* Set the Chipset and ChipRev, allowing config file entries to override. */
+ if (sna->pEnt->device->chipset && *sna->pEnt->device->chipset) {
+ scrn->chipset = sna->pEnt->device->chipset;
+ from = X_CONFIG;
+ } else if (sna->pEnt->device->chipID >= 0) {
+ scrn->chipset = (char *)xf86TokenToString(intel_chipsets,
+ sna->pEnt->device->chipID);
+ from = X_CONFIG;
+ xf86DrvMsg(scrn->scrnIndex, X_CONFIG,
+ "ChipID override: 0x%04X\n",
+ sna->pEnt->device->chipID);
+ DEVICE_ID(sna->PciInfo) = sna->pEnt->device->chipID;
+ } else {
+ from = X_PROBED;
+ scrn->chipset = (char *)xf86TokenToString(intel_chipsets,
+ DEVICE_ID(sna->PciInfo));
+ }
+
+ if (sna->pEnt->device->chipRev >= 0) {
+ xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "ChipRev override: %d\n",
+ sna->pEnt->device->chipRev);
+ }
+
+ xf86DrvMsg(scrn->scrnIndex, from, "Chipset: \"%s\"\n",
+ (scrn->chipset != NULL) ? scrn->chipset : "Unknown i8xx");
+}
+
+static Bool sna_get_early_options(ScrnInfoPtr scrn)
+{
+ struct sna *sna = to_sna(scrn);
+
+ /* Process the options */
+ xf86CollectOptions(scrn, NULL);
+ if (!(sna->Options = malloc(sizeof(sna_options))))
+ return FALSE;
+
+ memcpy(sna->Options, sna_options, sizeof(sna_options));
+ xf86ProcessOptions(scrn->scrnIndex, scrn->options, sna->Options);
+
+ return TRUE;
+}
+
+static int sna_open_drm_master(ScrnInfoPtr scrn)
+{
+ struct sna *sna = to_sna(scrn);
+ struct pci_device *dev = sna->PciInfo;
+ drmSetVersion sv;
+ struct drm_i915_getparam gp;
+ int err, val;
+ char busid[20];
+ int fd;
+
+ snprintf(busid, sizeof(busid), "pci:%04x:%02x:%02x.%d",
+ dev->domain, dev->bus, dev->dev, dev->func);
+
+ fd = drmOpen("i915", busid);
+ if (fd == -1) {
+ xf86DrvMsg(scrn->scrnIndex, X_ERROR,
+ "[drm] Failed to open DRM device for %s: %s\n",
+ busid, strerror(errno));
+ return -1;
+ }
+
+ /* Check that what we opened was a master or a master-capable FD,
+ * by setting the version of the interface we'll use to talk to it.
+ * (see DRIOpenDRMMaster() in DRI1)
+ */
+ sv.drm_di_major = 1;
+ sv.drm_di_minor = 1;
+ sv.drm_dd_major = -1;
+ sv.drm_dd_minor = -1;
+ err = drmSetInterfaceVersion(fd, &sv);
+ if (err != 0) {
+ xf86DrvMsg(scrn->scrnIndex, X_ERROR,
+ "[drm] failed to set drm interface version.\n");
+ drmClose(fd);
+ return -1;
+ }
+
+ val = FALSE;
+ gp.param = I915_PARAM_HAS_BLT;
+ gp.value = &val;
+ if (drmCommandWriteRead(fd, DRM_I915_GETPARAM,
+ &gp, sizeof(gp))) {
+ xf86DrvMsg(scrn->scrnIndex, X_ERROR,
+ "Failed to detect BLT. Kernel 2.6.37 required.\n");
+ drmClose(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+static void sna_close_drm_master(struct sna *sna)
+{
+ if (sna && sna->kgem.fd > 0) {
+ drmClose(sna->kgem.fd);
+ sna->kgem.fd = -1;
+ }
+}
+
+static void sna_selftest(void)
+{
+ sna_damage_selftest();
+}
+
+
+/**
+ * This is called before ScreenInit to do any require probing of screen
+ * configuration.
+ *
+ * This code generally covers probing, module loading, option handling
+ * card mapping, and RandR setup.
+ *
+ * Since xf86InitialConfiguration ends up requiring that we set video modes
+ * in order to detect configuration, we end up having to do a lot of driver
+ * setup (talking to the DRM, mapping the device, etc.) in this function.
+ * As a result, we want to set up that server initialization once rather
+ * that doing it per generation.
+ */
+static Bool sna_pre_init(ScrnInfoPtr scrn, int flags)
+{
+ struct sna *sna;
+ rgb defaultWeight = { 0, 0, 0 };
+ EntityInfoPtr pEnt;
+ int flags24;
+ Gamma zeros = { 0.0, 0.0, 0.0 };
+ int fd;
+
+ sna_selftest();
+
+ if (scrn->numEntities != 1)
+ return FALSE;
+
+ pEnt = xf86GetEntityInfo(scrn->entityList[0]);
+
+ if (flags & PROBE_DETECT)
+ return TRUE;
+
+ sna = to_sna(scrn);
+ if (sna == NULL) {
+ sna = xnfcalloc(sizeof(struct sna), 1);
+ if (sna == NULL)
+ return FALSE;
+
+ scrn->driverPrivate = sna;
+ }
+ sna->scrn = scrn;
+ sna->pEnt = pEnt;
+
+ scrn->displayWidth = 640; /* default it */
+
+ if (sna->pEnt->location.type != BUS_PCI)
+ return FALSE;
+
+ sna->PciInfo = xf86GetPciInfoForEntity(sna->pEnt->index);
+
+ fd = sna_open_drm_master(scrn);
+ if (fd == -1) {
+ xf86DrvMsg(scrn->scrnIndex, X_ERROR,
+ "Failed to become DRM master.\n");
+ return FALSE;
+ }
+
+ scrn->monitor = scrn->confScreen->monitor;
+ scrn->progClock = TRUE;
+ scrn->rgbBits = 8;
+
+ flags24 = Support32bppFb | PreferConvert24to32 | SupportConvert24to32;
+
+ if (!xf86SetDepthBpp(scrn, 0, 0, 0, flags24))
+ return FALSE;
+
+ switch (scrn->depth) {
+ case 8:
+ case 15:
+ case 16:
+ case 24:
+ break;
+ default:
+ xf86DrvMsg(scrn->scrnIndex, X_ERROR,
+ "Given depth (%d) is not supported by Intel driver\n",
+ scrn->depth);
+ return FALSE;
+ }
+ xf86PrintDepthBpp(scrn);
+
+ if (!xf86SetWeight(scrn, defaultWeight, defaultWeight))
+ return FALSE;
+ if (!xf86SetDefaultVisual(scrn, -1))
+ return FALSE;
+
+ sna->mode.cpp = scrn->bitsPerPixel / 8;
+
+ if (!sna_get_early_options(scrn))
+ return FALSE;
+
+ sna_check_chipset_option(scrn);
+ kgem_init(&sna->kgem, fd, sna->chipset.info->gen);
+ if (!xf86ReturnOptValBool(sna->Options,
+ OPTION_RELAXED_FENCING,
+ sna->kgem.has_relaxed_fencing)) {
+ xf86DrvMsg(scrn->scrnIndex,
+ sna->kgem.has_relaxed_fencing ? X_CONFIG : X_PROBED,
+ "Disabling use of relaxed fencing\n");
+ sna->kgem.has_relaxed_fencing = 0;
+ }
+ if (!xf86ReturnOptValBool(sna->Options,
+ OPTION_VMAP,
+ sna->kgem.has_vmap)) {
+ xf86DrvMsg(scrn->scrnIndex,
+ sna->kgem.has_vmap ? X_CONFIG : X_PROBED,
+ "Disabling use of vmap\n");
+ sna->kgem.has_vmap = 0;
+ }
+
+ /* Enable tiling by default */
+ sna->tiling = SNA_TILING_ALL;
+
+ /* Allow user override if they set a value */
+ if (!xf86ReturnOptValBool(sna->Options, OPTION_TILING_2D, TRUE))
+ sna->tiling &= ~SNA_TILING_2D;
+ if (xf86ReturnOptValBool(sna->Options, OPTION_TILING_FB, FALSE))
+ sna->tiling &= ~SNA_TILING_FB;
+
+ sna->flags = 0;
+ if (!xf86ReturnOptValBool(sna->Options, OPTION_THROTTLE, TRUE))
+ sna->flags |= SNA_NO_THROTTLE;
+ if (xf86ReturnOptValBool(sna->Options, OPTION_SWAPBUFFERS_WAIT, TRUE))
+ sna->flags |= SNA_SWAP_WAIT;
+
+ xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Framebuffer %s\n",
+ sna->tiling & SNA_TILING_FB ? "tiled" : "linear");
+ xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Pixmaps %s\n",
+ sna->tiling & SNA_TILING_2D ? "tiled" : "linear");
+ xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "3D buffers %s\n",
+ sna->tiling & SNA_TILING_3D ? "tiled" : "linear");
+ xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "SwapBuffers wait %sabled\n",
+ sna->flags & SNA_SWAP_WAIT ? "en" : "dis");
+ xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Throttling %sabled\n",
+ sna->flags & SNA_NO_THROTTLE ? "dis" : "en");
+
+ if (!sna_mode_pre_init(scrn, sna)) {
+ PreInitCleanup(scrn);
+ return FALSE;
+ }
+
+ if (!xf86SetGamma(scrn, zeros)) {
+ PreInitCleanup(scrn);
+ return FALSE;
+ }
+
+ if (scrn->modes == NULL) {
+ xf86DrvMsg(scrn->scrnIndex, X_ERROR, "No modes.\n");
+ PreInitCleanup(scrn);
+ return FALSE;
+ }
+ scrn->currentMode = scrn->modes;
+
+ /* Set display resolution */
+ xf86SetDpi(scrn, 0, 0);
+
+ /* Load the required sub modules */
+ if (!xf86LoadSubModule(scrn, "fb")) {
+ PreInitCleanup(scrn);
+ return FALSE;
+ }
+
+ /* Load the dri2 module if requested. */
+ xf86LoadSubModule(scrn, "dri2");
+
+ return sna_accel_pre_init(sna);
+}
+
+/**
+ * Intialiazes the hardware for the 3D pipeline use in the 2D driver.
+ *
+ * Some state caching is performed to avoid redundant state emits. This
+ * function is also responsible for marking the state as clobbered for DRI
+ * clients.
+ */
+static void
+sna_block_handler(int i, pointer data, pointer timeout, pointer read_mask)
+{
+ ScreenPtr screen = screenInfo.screens[i];
+ ScrnInfoPtr scrn = xf86Screens[i];
+ struct sna *sna = to_sna(scrn);
+
+ screen->BlockHandler = sna->BlockHandler;
+
+ (*screen->BlockHandler) (i, data, timeout, read_mask);
+
+ sna->BlockHandler = screen->BlockHandler;
+ screen->BlockHandler = sna_block_handler;
+
+ sna_accel_block_handler(sna);
+}
+
+static void
+sna_wakeup_handler(int i, pointer data, unsigned long result, pointer read_mask)
+{
+ ScreenPtr screen = screenInfo.screens[i];
+ ScrnInfoPtr scrn = xf86Screens[i];
+ struct sna *sna = to_sna(scrn);
+
+ screen->WakeupHandler = sna->WakeupHandler;
+
+ (*screen->WakeupHandler) (i, data, result, read_mask);
+
+ sna->WakeupHandler = screen->WakeupHandler;
+ screen->WakeupHandler = sna_wakeup_handler;
+
+ sna_accel_wakeup_handler(sna);
+}
+
+#if HAVE_UDEV
+static void
+sna_handle_uevents(int fd, void *closure)
+{
+ ScrnInfoPtr scrn = closure;
+ struct sna *sna = to_sna(scrn);
+ struct udev_device *dev;
+ const char *hotplug;
+ struct stat s;
+ dev_t udev_devnum;
+
+ dev = udev_monitor_receive_device(sna->uevent_monitor);
+ if (!dev)
+ return;
+
+ udev_devnum = udev_device_get_devnum(dev);
+ fstat(sna->kgem.fd, &s);
+ /*
+ * Check to make sure this event is directed at our
+ * device (by comparing dev_t values), then make
+ * sure it's a hotplug event (HOTPLUG=1)
+ */
+
+ hotplug = udev_device_get_property_value(dev, "HOTPLUG");
+
+ if (memcmp(&s.st_rdev, &udev_devnum, sizeof (dev_t)) == 0 &&
+ hotplug && atoi(hotplug) == 1)
+ RRGetInfo(screenInfo.screens[scrn->scrnIndex], TRUE);
+
+ udev_device_unref(dev);
+}
+
+static void
+sna_uevent_init(ScrnInfoPtr scrn)
+{
+ struct sna *sna = to_sna(scrn);
+ struct udev *u;
+ struct udev_monitor *mon;
+ Bool hotplug;
+ MessageType from = X_CONFIG;
+
+ if (!xf86GetOptValBool(sna->Options, OPTION_HOTPLUG, &hotplug)) {
+ from = X_DEFAULT;
+ hotplug = TRUE;
+ }
+
+ xf86DrvMsg(scrn->scrnIndex, from, "hotplug detection: \"%s\"\n",
+ hotplug ? "enabled" : "disabled");
+ if (!hotplug)
+ return;
+
+ u = udev_new();
+ if (!u)
+ return;
+
+ mon = udev_monitor_new_from_netlink(u, "udev");
+
+ if (!mon) {
+ udev_unref(u);
+ return;
+ }
+
+ if (udev_monitor_filter_add_match_subsystem_devtype(mon,
+ "drm",
+ "drm_minor") < 0 ||
+ udev_monitor_enable_receiving(mon) < 0)
+ {
+ udev_monitor_unref(mon);
+ udev_unref(u);
+ return;
+ }
+
+ sna->uevent_handler =
+ xf86AddGeneralHandler(udev_monitor_get_fd(mon),
+ sna_handle_uevents,
+ scrn);
+ if (!sna->uevent_handler) {
+ udev_monitor_unref(mon);
+ udev_unref(u);
+ return;
+ }
+
+ sna->uevent_monitor = mon;
+}
+
+static void
+sna_uevent_fini(ScrnInfoPtr scrn)
+{
+ struct sna *sna = to_sna(scrn);
+
+ if (sna->uevent_handler) {
+ struct udev *u = udev_monitor_get_udev(sna->uevent_monitor);
+
+ xf86RemoveGeneralHandler(sna->uevent_handler);
+
+ udev_monitor_unref(sna->uevent_monitor);
+ udev_unref(u);
+ sna->uevent_handler = NULL;
+ sna->uevent_monitor = NULL;
+ }
+}
+#endif /* HAVE_UDEV */
+
+static void sna_leave_vt(int scrnIndex, int flags)
+{
+ ScrnInfoPtr scrn = xf86Screens[scrnIndex];
+ struct sna *sna = to_sna(scrn);
+ int ret;
+
+ xf86RotateFreeShadow(scrn);
+
+ xf86_hide_cursors(scrn);
+
+ ret = drmDropMaster(sna->kgem.fd);
+ if (ret)
+ xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+ "drmDropMaster failed: %s\n", strerror(errno));
+}
+
+
+static Bool sna_close_screen(int scrnIndex, ScreenPtr screen)
+{
+ ScrnInfoPtr scrn = xf86Screens[scrnIndex];
+ struct sna *sna = to_sna(scrn);
+
+#if HAVE_UDEV
+ sna_uevent_fini(scrn);
+#endif
+
+ if (scrn->vtSema == TRUE)
+ sna_leave_vt(scrnIndex, 0);
+
+ sna_accel_close(sna);
+
+ xf86_cursors_fini(screen);
+
+ screen->CloseScreen = sna->CloseScreen;
+ (*screen->CloseScreen) (scrnIndex, screen);
+
+ if (sna->directRenderingOpen) {
+ sna_dri2_close(sna, screen);
+ sna->directRenderingOpen = FALSE;
+ }
+
+ xf86GARTCloseScreen(scrnIndex);
+
+ scrn->vtSema = FALSE;
+ return TRUE;
+}
+
+static Bool
+sna_screen_init(int scrnIndex, ScreenPtr screen, int argc, char **argv)
+{
+ ScrnInfoPtr scrn = xf86Screens[screen->myNum];
+ struct sna *sna = to_sna(scrn);
+ VisualPtr visual;
+ struct pci_device *const device = sna->PciInfo;
+
+ scrn->videoRam = device->regions[2].size / 1024;
+
+#ifdef DRI2
+ sna->directRenderingOpen = sna_dri2_open(sna, screen);
+ if (sna->directRenderingOpen)
+ xf86DrvMsg(scrn->scrnIndex, X_INFO,
+ "direct rendering: DRI2 Enabled\n");
+#endif
+
+ miClearVisualTypes();
+ if (!miSetVisualTypes(scrn->depth,
+ miGetDefaultVisualMask(scrn->depth),
+ scrn->rgbBits, scrn->defaultVisual))
+ return FALSE;
+ if (!miSetPixmapDepths())
+ return FALSE;
+
+ if (!fbScreenInit(screen, NULL,
+ scrn->virtualX, scrn->virtualY,
+ scrn->xDpi, scrn->yDpi,
+ scrn->displayWidth, scrn->bitsPerPixel))
+ return FALSE;
+
+ if (scrn->bitsPerPixel > 8) {
+ /* Fixup RGB ordering */
+ visual = screen->visuals + screen->numVisuals;
+ while (--visual >= screen->visuals) {
+ if ((visual->class | DynamicClass) == DirectColor) {
+ visual->offsetRed = scrn->offset.red;
+ visual->offsetGreen = scrn->offset.green;
+ visual->offsetBlue = scrn->offset.blue;
+ visual->redMask = scrn->mask.red;
+ visual->greenMask = scrn->mask.green;
+ visual->blueMask = scrn->mask.blue;
+ }
+ }
+ }
+
+ fbPictureInit(screen, NULL, 0);
+
+ xf86SetBlackWhitePixels(screen);
+
+ if (!sna_accel_init(screen, sna)) {
+ xf86DrvMsg(scrn->scrnIndex, X_ERROR,
+ "Hardware acceleration initialization failed\n");
+ return FALSE;
+ }
+
+ miInitializeBackingStore(screen);
+ xf86SetBackingStore(screen);
+ xf86SetSilkenMouse(screen);
+ miDCInitialize(screen, xf86GetPointerScreenFuncs());
+
+ xf86DrvMsg(scrn->scrnIndex, X_INFO, "Initializing HW Cursor\n");
+ if (!xf86_cursors_init(screen, SNA_CURSOR_X, SNA_CURSOR_Y,
+ HARDWARE_CURSOR_TRUECOLOR_AT_8BPP |
+ HARDWARE_CURSOR_BIT_ORDER_MSBFIRST |
+ HARDWARE_CURSOR_INVERT_MASK |
+ HARDWARE_CURSOR_SWAP_SOURCE_AND_MASK |
+ HARDWARE_CURSOR_AND_SOURCE_WITH_MASK |
+ HARDWARE_CURSOR_SOURCE_MASK_INTERLEAVE_64 |
+ HARDWARE_CURSOR_UPDATE_UNHIDDEN |
+ HARDWARE_CURSOR_ARGB)) {
+ xf86DrvMsg(scrn->scrnIndex, X_ERROR,
+ "Hardware cursor initialization failed\n");
+ }
+
+ /* Must force it before EnterVT, so we are in control of VT and
+ * later memory should be bound when allocating, e.g rotate_mem */
+ scrn->vtSema = TRUE;
+
+ sna->BlockHandler = screen->BlockHandler;
+ screen->BlockHandler = sna_block_handler;
+
+ sna->WakeupHandler = screen->WakeupHandler;
+ screen->WakeupHandler = sna_wakeup_handler;
+
+ screen->SaveScreen = xf86SaveScreen;
+ sna->CloseScreen = screen->CloseScreen;
+ screen->CloseScreen = sna_close_screen;
+ screen->CreateScreenResources = sna_create_screen_resources;
+
+ if (!xf86CrtcScreenInit(screen))
+ return FALSE;
+
+ if (!miCreateDefColormap(screen))
+ return FALSE;
+
+ if (!xf86HandleColormaps(screen, 256, 8, sna_load_palette, NULL,
+ CMAP_RELOAD_ON_MODE_SWITCH |
+ CMAP_PALETTED_TRUECOLOR)) {
+ return FALSE;
+ }
+
+ xf86DPMSInit(screen, xf86DPMSSet, 0);
+
+ sna_video_init(sna, screen);
+
+ if (serverGeneration == 1)
+ xf86ShowUnusedOptions(scrn->scrnIndex, scrn->options);
+
+ sna_mode_init(sna);
+
+ sna->suspended = FALSE;
+
+#if HAVE_UDEV
+ sna_uevent_init(scrn);
+#endif
+
+ return TRUE;
+}
+
+static void sna_adjust_frame(int scrnIndex, int x, int y, int flags)
+{
+}
+
+static void sna_free_screen(int scrnIndex, int flags)
+{
+ ScrnInfoPtr scrn = xf86Screens[scrnIndex];
+ struct sna *sna = to_sna(scrn);
+
+ if (sna) {
+ sna_mode_fini(sna);
+ sna_close_drm_master(sna);
+
+ free(sna);
+ scrn->driverPrivate = NULL;
+ }
+
+ if (xf86LoaderCheckSymbol("vgaHWFreeHWRec"))
+ vgaHWFreeHWRec(xf86Screens[scrnIndex]);
+}
+
+/*
+ * This gets called when gaining control of the VT, and from ScreenInit().
+ */
+static Bool sna_enter_vt(int scrnIndex, int flags)
+{
+ ScrnInfoPtr scrn = xf86Screens[scrnIndex];
+ struct sna *sna = to_sna(scrn);
+
+ if (drmSetMaster(sna->kgem.fd)) {
+ xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+ "drmSetMaster failed: %s\n",
+ strerror(errno));
+ }
+
+ return xf86SetDesiredModes(scrn);
+}
+
+static Bool sna_switch_mode(int scrnIndex, DisplayModePtr mode, int flags)
+{
+ return xf86SetSingleMode(xf86Screens[scrnIndex], mode, RR_Rotate_0);
+}
+
+static ModeStatus
+sna_valid_mode(int scrnIndex, DisplayModePtr mode, Bool verbose, int flags)
+{
+ return MODE_OK;
+}
+
+#ifndef SUSPEND_SLEEP
+#define SUSPEND_SLEEP 0
+#endif
+#ifndef RESUME_SLEEP
+#define RESUME_SLEEP 0
+#endif
+
+/*
+ * This function is only required if we need to do anything differently from
+ * DoApmEvent() in common/xf86PM.c, including if we want to see events other
+ * than suspend/resume.
+ */
+static Bool sna_pm_event(int scrnIndex, pmEvent event, Bool undo)
+{
+ ScrnInfoPtr scrn = xf86Screens[scrnIndex];
+ struct sna *sna = to_sna(scrn);
+
+ switch (event) {
+ case XF86_APM_SYS_SUSPEND:
+ case XF86_APM_CRITICAL_SUSPEND: /*do we want to delay a critical suspend? */
+ case XF86_APM_USER_SUSPEND:
+ case XF86_APM_SYS_STANDBY:
+ case XF86_APM_USER_STANDBY:
+ if (!undo && !sna->suspended) {
+ scrn->LeaveVT(scrnIndex, 0);
+ sna->suspended = TRUE;
+ sleep(SUSPEND_SLEEP);
+ } else if (undo && sna->suspended) {
+ sleep(RESUME_SLEEP);
+ scrn->EnterVT(scrnIndex, 0);
+ sna->suspended = FALSE;
+ }
+ break;
+ case XF86_APM_STANDBY_RESUME:
+ case XF86_APM_NORMAL_RESUME:
+ case XF86_APM_CRITICAL_RESUME:
+ if (sna->suspended) {
+ sleep(RESUME_SLEEP);
+ scrn->EnterVT(scrnIndex, 0);
+ sna->suspended = FALSE;
+ /*
+ * Turn the screen saver off when resuming. This seems to be
+ * needed to stop xscreensaver kicking in (when used).
+ *
+ * XXX DoApmEvent() should probably call this just like
+ * xf86VTSwitch() does. Maybe do it here only in 4.2
+ * compatibility mode.
+ */
+ SaveScreens(SCREEN_SAVER_FORCER, ScreenSaverReset);
+ }
+ break;
+ /* This is currently used for ACPI */
+ case XF86_APM_CAPABILITY_CHANGED:
+ SaveScreens(SCREEN_SAVER_FORCER, ScreenSaverReset);
+ break;
+
+ default:
+ ErrorF("sna_pm_event: received APM event %d\n", event);
+ }
+ return TRUE;
+}
+
+void sna_init_scrn(ScrnInfoPtr scrn)
+{
+ scrn->PreInit = sna_pre_init;
+ scrn->ScreenInit = sna_screen_init;
+ scrn->SwitchMode = sna_switch_mode;
+ scrn->AdjustFrame = sna_adjust_frame;
+ scrn->EnterVT = sna_enter_vt;
+ scrn->LeaveVT = sna_leave_vt;
+ scrn->FreeScreen = sna_free_screen;
+ scrn->ValidMode = sna_valid_mode;
+ scrn->PMEvent = sna_pm_event;
+}
diff --git a/src/sna/sna_glyphs.c b/src/sna/sna_glyphs.c
new file mode 100644
index 00000000..bb4b9cde
--- /dev/null
+++ b/src/sna/sna_glyphs.c
@@ -0,0 +1,1145 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ * Partly based on code Copyright © 2008 Red Hat, Inc.
+ * Partly based on code Copyright © 2000 SuSE, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Intel not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. Intel makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL INTEL
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. Red Hat makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * Red Hat DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL Red Hat
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. SuSE makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Author: Chris Wilson <chris@chris-wilson.co.uk>
+ * Based on code by: Keith Packard <keithp@keithp.com> and Owen Taylor <otaylor@fishsoup.net>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_render.h"
+#include "sna_render_inline.h"
+
+#include <mipict.h>
+#include <fbpict.h>
+#include <fb.h>
+
+#if DEBUG_GLYPHS
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+#define CACHE_PICTURE_SIZE 1024
+#define GLYPH_MIN_SIZE 8
+#define GLYPH_MAX_SIZE 64
+#define GLYPH_CACHE_SIZE (CACHE_PICTURE_SIZE * CACHE_PICTURE_SIZE / (GLYPH_MIN_SIZE * GLYPH_MIN_SIZE))
+
+#if DEBUG_GLYPHS
+static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function)
+{
+ if (box->x1 < 0 || box->y1 < 0 ||
+ box->x2 > pixmap->drawable.width ||
+ box->y2 > pixmap->drawable.height)
+ {
+ ErrorF("%s: damage box is beyond the pixmap: box=(%d, %d), (%d, %d), pixmap=(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1, box->x2, box->y2,
+ pixmap->drawable.width,
+ pixmap->drawable.height);
+ assert(0);
+ }
+}
+#define assert_pixmap_contains_box(p, b) _assert_pixmap_contains_box(p, b, __FUNCTION__)
+#else
+#define assert_pixmap_contains_box(p, b)
+#endif
+
+struct sna_glyph {
+ PicturePtr atlas;
+ struct sna_coordinate coordinate;
+ uint16_t size, pos;
+};
+
+static DevPrivateKeyRec sna_glyph_key;
+
+static inline struct sna_glyph *glyph_get_private(GlyphPtr glyph)
+{
+ return dixGetPrivateAddr(&glyph->devPrivates, &sna_glyph_key);
+}
+
+#define NeedsComponent(f) (PICT_FORMAT_A(f) != 0 && PICT_FORMAT_RGB(f) != 0)
+
+static void unrealize_glyph_caches(struct sna *sna)
+{
+ struct sna_render *render = &sna->render;
+ int i;
+
+ DBG(("%s\n", __FUNCTION__));
+
+ for (i = 0; i < ARRAY_SIZE(render->glyph); i++) {
+ struct sna_glyph_cache *cache = &render->glyph[i];
+
+ if (cache->picture)
+ FreePicture(cache->picture, 0);
+
+ free(cache->glyphs);
+ }
+ memset(render->glyph, 0, sizeof(render->glyph));
+}
+
+/* All caches for a single format share a single pixmap for glyph storage,
+ * allowing mixing glyphs of different sizes without paying a penalty
+ * for switching between source pixmaps. (Note that for a size of font
+ * right at the border between two sizes, we might be switching for almost
+ * every glyph.)
+ *
+ * This function allocates the storage pixmap, and then fills in the
+ * rest of the allocated structures for all caches with the given format.
+ */
+static Bool realize_glyph_caches(struct sna *sna)
+{
+ ScreenPtr screen = sna->scrn->pScreen;
+ unsigned int formats[] = {
+ PIXMAN_a8,
+ PIXMAN_a8r8g8b8,
+ };
+ int i;
+
+ DBG(("%s\n", __FUNCTION__));
+
+ for (i = 0; i < ARRAY_SIZE(formats); i++) {
+ struct sna_glyph_cache *cache = &sna->render.glyph[i];
+ PixmapPtr pixmap;
+ PicturePtr picture;
+ CARD32 component_alpha;
+ int depth = PIXMAN_FORMAT_DEPTH(formats[i]);
+ int error;
+ PictFormatPtr pPictFormat = PictureMatchFormat(screen, depth, formats[i]);
+ if (!pPictFormat)
+ goto bail;
+
+ /* Now allocate the pixmap and picture */
+ pixmap = screen->CreatePixmap(screen,
+ CACHE_PICTURE_SIZE,
+ CACHE_PICTURE_SIZE,
+ depth,
+ CREATE_PIXMAP_USAGE_SCRATCH);
+ if (!pixmap)
+ goto bail;
+
+ component_alpha = NeedsComponent(pPictFormat->format);
+ picture = CreatePicture(0, &pixmap->drawable, pPictFormat,
+ CPComponentAlpha, &component_alpha,
+ serverClient, &error);
+
+ screen->DestroyPixmap(pixmap);
+
+ if (!picture)
+ goto bail;
+
+ ValidatePicture(picture);
+
+ cache->count = cache->evict = 0;
+ cache->picture = picture;
+ cache->glyphs = calloc(sizeof(struct sna_glyph *),
+ GLYPH_CACHE_SIZE);
+ if (!cache->glyphs)
+ goto bail;
+
+ cache->evict = rand() % GLYPH_CACHE_SIZE;
+ }
+
+ return TRUE;
+
+bail:
+ unrealize_glyph_caches(sna);
+ return FALSE;
+}
+
+static void
+glyph_cache_upload(ScreenPtr screen,
+ struct sna_glyph_cache *cache,
+ GlyphPtr glyph,
+ int16_t x, int16_t y)
+{
+ DBG(("%s: upload glyph %p to cache (%d, %d)x(%d, %d)\n",
+ __FUNCTION__, glyph, x, y, glyph->info.width, glyph->info.height));
+ sna_composite(PictOpSrc,
+ GlyphPicture(glyph)[screen->myNum], 0, cache->picture,
+ 0, 0,
+ 0, 0,
+ x, y,
+ glyph->info.width,
+ glyph->info.height);
+}
+
+static void
+glyph_extents(int nlist,
+ GlyphListPtr list,
+ GlyphPtr *glyphs,
+ BoxPtr extents)
+{
+ int16_t x1, x2, y1, y2;
+ int16_t x, y;
+
+ x1 = y1 = MAXSHORT;
+ x2 = y2 = MINSHORT;
+ x = y = 0;
+ while (nlist--) {
+ int n = list->len;
+ x += list->xOff;
+ y += list->yOff;
+ list++;
+ while (n--) {
+ GlyphPtr glyph = *glyphs++;
+
+ if (glyph->info.width && glyph->info.height) {
+ int v;
+
+ v = x - glyph->info.x;
+ if (v < x1)
+ x1 = v;
+ v += glyph->info.width;
+ if (v > x2)
+ x2 = v;
+
+ v = y - glyph->info.y;
+ if (v < y1)
+ y1 = v;
+ v += glyph->info.height;
+ if (v > y2)
+ y2 = v;
+ }
+
+ x += glyph->info.xOff;
+ y += glyph->info.yOff;
+ }
+ }
+
+ extents->x1 = x1;
+ extents->x2 = x2;
+ extents->y1 = y1;
+ extents->y2 = y2;
+}
+
+static inline unsigned int
+glyph_size_to_count(int size)
+{
+ size /= GLYPH_MIN_SIZE;
+ return size * size;
+}
+
+static inline unsigned int
+glyph_count_to_mask(int count)
+{
+ return ~(count - 1);
+}
+
+static inline unsigned int
+glyph_size_to_mask(int size)
+{
+ return glyph_count_to_mask(glyph_size_to_count(size));
+}
+
+static int
+glyph_cache(ScreenPtr screen,
+ struct sna_render *render,
+ GlyphPtr glyph)
+{
+ PicturePtr glyph_picture = GlyphPicture(glyph)[screen->myNum];
+ struct sna_glyph_cache *cache = &render->glyph[PICT_FORMAT_RGB(glyph_picture->format) != 0];
+ struct sna_glyph *priv;
+ int size, mask, pos, s;
+
+ if (glyph->info.width > GLYPH_MAX_SIZE ||
+ glyph->info.height > GLYPH_MAX_SIZE)
+ return FALSE;
+
+ for (size = GLYPH_MIN_SIZE; size <= GLYPH_MAX_SIZE; size *= 2)
+ if (glyph->info.width <= size && glyph->info.height <= size)
+ break;
+
+ s = glyph_size_to_count(size);
+ mask = glyph_count_to_mask(s);
+ pos = (cache->count + s - 1) & mask;
+ if (pos < GLYPH_CACHE_SIZE) {
+ cache->count = pos + s;
+ } else {
+ priv = NULL;
+ for (s = size; s <= GLYPH_MAX_SIZE; s *= 2) {
+ int i = cache->evict & glyph_size_to_mask(s);
+ priv = cache->glyphs[i];
+ if (priv == NULL)
+ continue;
+
+ if (priv->size >= s) {
+ cache->glyphs[i] = NULL;
+ priv->atlas = NULL;
+ pos = i;
+ } else
+ priv = NULL;
+ break;
+ }
+ if (priv == NULL) {
+ int count = glyph_size_to_count(size);
+ pos = cache->evict & glyph_count_to_mask(count);
+ for (s = 0; s < count; s++) {
+ priv = cache->glyphs[pos + s];
+ if (priv != NULL) {
+ priv->atlas =NULL;
+ cache->glyphs[pos + s] = NULL;
+ }
+ }
+ }
+
+ /* And pick a new eviction position */
+ cache->evict = rand() % GLYPH_CACHE_SIZE;
+ }
+ assert(cache->glyphs[pos] == NULL);
+
+ priv = glyph_get_private(glyph);
+ cache->glyphs[pos] = priv;
+ priv->atlas = cache->picture;
+ priv->size = size;
+ priv->pos = pos << 1 | (PICT_FORMAT_RGB(glyph_picture->format) != 0);
+ s = pos / ((GLYPH_MAX_SIZE / GLYPH_MIN_SIZE) * (GLYPH_MAX_SIZE / GLYPH_MIN_SIZE));
+ priv->coordinate.x = s % (CACHE_PICTURE_SIZE / GLYPH_MAX_SIZE) * GLYPH_MAX_SIZE;
+ priv->coordinate.y = (s / (CACHE_PICTURE_SIZE / GLYPH_MAX_SIZE)) * GLYPH_MAX_SIZE;
+ for (s = GLYPH_MIN_SIZE; s < GLYPH_MAX_SIZE; s *= 2) {
+ if (pos & 1)
+ priv->coordinate.x += s;
+ if (pos & 2)
+ priv->coordinate.y += s;
+ pos >>= 2;
+ }
+
+ glyph_cache_upload(screen, cache, glyph,
+ priv->coordinate.x, priv->coordinate.y);
+
+ return TRUE;
+}
+
+static void apply_damage(struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ BoxRec box;
+
+ if (op->damage == NULL)
+ return;
+
+ box.x1 = r->dst.x + op->dst.x;
+ box.y1 = r->dst.y + op->dst.y;
+ box.x2 = box.x1 + r->width;
+ box.y2 = box.y1 + r->height;
+
+ assert_pixmap_contains_box(op->dst.pixmap, &box);
+ sna_damage_add_box(op->damage, &box);
+}
+
+#define GET_PRIVATE(g) ((struct sna_glyph *)((char *)(g)->devPrivates + priv_offset))
+static Bool
+glyphs_to_dst(struct sna *sna,
+ CARD8 op,
+ PicturePtr src,
+ PicturePtr dst,
+ INT16 src_x, INT16 src_y,
+ int nlist, GlyphListPtr list, GlyphPtr *glyphs)
+{
+ struct sna_composite_op tmp;
+ ScreenPtr screen = dst->pDrawable->pScreen;
+ const int priv_offset = sna_glyph_key.offset;
+ int index = screen->myNum;
+ PicturePtr glyph_atlas;
+ BoxPtr rects;
+ int nrect;
+ int16_t x, y;
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ DBG(("%s(op=%d, src=(%d, %d), nlist=%d, dst=(%d, %d)+(%d, %d))\n",
+ __FUNCTION__, op, src_x, src_y, nlist,
+ list->xOff, list->yOff, dst->pDrawable->x, dst->pDrawable->y));
+
+ rects = REGION_RECTS(dst->pCompositeClip);
+ nrect = REGION_NUM_RECTS(dst->pCompositeClip);
+
+ x = dst->pDrawable->x;
+ y = dst->pDrawable->y;
+ src_x -= list->xOff + x;
+ src_y -= list->yOff + y;
+
+ glyph_atlas = NULL;
+ while (nlist--) {
+ int n = list->len;
+ x += list->xOff;
+ y += list->yOff;
+ while (n--) {
+ GlyphPtr glyph = *glyphs++;
+ struct sna_glyph priv;
+ int i;
+
+ if (glyph->info.width == 0 || glyph->info.height == 0)
+ goto next_glyph;
+
+ priv = *GET_PRIVATE(glyph);
+ if (priv.atlas == NULL) {
+ if (glyph_atlas) {
+ tmp.done(sna, &tmp);
+ glyph_atlas = NULL;
+ }
+ if (!glyph_cache(screen, &sna->render, glyph)) {
+ /* no cache for this glyph */
+ priv.atlas = GlyphPicture(glyph)[index];
+ priv.coordinate.x = priv.coordinate.y = 0;
+ } else
+ priv = *GET_PRIVATE(glyph);
+ }
+
+ if (priv.atlas != glyph_atlas) {
+ if (glyph_atlas)
+ tmp.done(sna, &tmp);
+
+ if (!sna->render.composite(sna,
+ op, src, priv.atlas, dst,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0,
+ &tmp))
+ return FALSE;
+
+ glyph_atlas = priv.atlas;
+ }
+
+ for (i = 0; i < nrect; i++) {
+ struct sna_composite_rectangles r;
+ int16_t dx, dy;
+ int16_t x2, y2;
+
+ r.dst.x = x - glyph->info.x;
+ r.dst.y = y - glyph->info.y;
+ x2 = r.dst.x + glyph->info.width;
+ y2 = r.dst.y + glyph->info.height;
+ dx = dy = 0;
+
+ DBG(("%s: glyph=(%d, %d), (%d, %d), clip=(%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ r.dst.x, r.dst.y, x2, y2,
+ rects[i].x1, rects[i].y1,
+ rects[i].x2, rects[i].y2));
+ if (rects[i].y1 >= y2)
+ break;
+
+ if (r.dst.x < rects[i].x1)
+ dx = rects[i].x1 - r.dst.x, r.dst.x = rects[i].x1;
+ if (x2 > rects[i].x2)
+ x2 = rects[i].x2;
+ if (r.dst.y < rects[i].y1)
+ dy = rects[i].y1 - r.dst.y, r.dst.y = rects[i].y1;
+ if (y2 > rects[i].y2)
+ y2 = rects[i].y2;
+
+ if (r.dst.x < x2 && r.dst.y < y2) {
+ DBG(("%s: blt=(%d, %d), (%d, %d)\n",
+ __FUNCTION__, r.dst.x, r.dst.y, x2, y2));
+
+ r.src.x = r.dst.x + src_x;
+ r.src.y = r.dst.y + src_y;
+ r.mask.x = dx + priv.coordinate.x;
+ r.mask.y = dy + priv.coordinate.y;
+ r.width = x2 - r.dst.x;
+ r.height = y2 - r.dst.y;
+ tmp.blt(sna, &tmp, &r);
+ apply_damage(&tmp, &r);
+ }
+ }
+
+next_glyph:
+ x += glyph->info.xOff;
+ y += glyph->info.yOff;
+ }
+ list++;
+ }
+ if (glyph_atlas)
+ tmp.done(sna, &tmp);
+
+ return TRUE;
+}
+
+static Bool
+glyphs_to_dst_slow(struct sna *sna,
+ CARD8 op,
+ PicturePtr src,
+ PicturePtr dst,
+ INT16 src_x, INT16 src_y,
+ int nlist, GlyphListPtr list, GlyphPtr *glyphs)
+{
+ struct sna_composite_op tmp;
+ ScreenPtr screen = dst->pDrawable->pScreen;
+ const int priv_offset = sna_glyph_key.offset;
+ int index = screen->myNum;
+ int x, y, n;
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ DBG(("%s(op=%d, src=(%d, %d), nlist=%d, dst=(%d, %d)+(%d, %d))\n",
+ __FUNCTION__, op, src_x, src_y, nlist,
+ list->xOff, list->yOff, dst->pDrawable->x, dst->pDrawable->y));
+
+ x = dst->pDrawable->x;
+ y = dst->pDrawable->y;
+ src_x -= list->xOff + x;
+ src_y -= list->yOff + y;
+
+ while (nlist--) {
+ x += list->xOff;
+ y += list->yOff;
+ n = list->len;
+ while (n--) {
+ GlyphPtr glyph = *glyphs++;
+ struct sna_glyph priv;
+ BoxPtr rects;
+ int nrect;
+
+ if (glyph->info.width == 0 || glyph->info.height == 0)
+ goto next_glyph;
+
+ priv = *GET_PRIVATE(glyph);
+ if (priv.atlas == NULL) {
+ if (!glyph_cache(screen, &sna->render, glyph)) {
+ /* no cache for this glyph */
+ priv.atlas = GlyphPicture(glyph)[index];
+ priv.coordinate.x = priv.coordinate.y = 0;
+ } else
+ priv = *GET_PRIVATE(glyph);
+ }
+
+ if (!sna->render.composite(sna,
+ op, src, priv.atlas, dst,
+ src_x + x - glyph->info.x,
+ src_y + y - glyph->info.y,
+ priv.coordinate.x, priv.coordinate.y,
+ x - glyph->info.x,
+ y - glyph->info.y,
+ glyph->info.width,
+ glyph->info.height,
+ &tmp))
+ return FALSE;
+
+ rects = REGION_RECTS(dst->pCompositeClip);
+ nrect = REGION_NUM_RECTS(dst->pCompositeClip);
+ do {
+ struct sna_composite_rectangles r;
+ int16_t dx, dy;
+ int16_t x2, y2;
+
+ r.dst.x = x - glyph->info.x;
+ r.dst.y = y - glyph->info.y;
+ x2 = r.dst.x + glyph->info.width;
+ y2 = r.dst.y + glyph->info.height;
+ dx = dy = 0;
+
+ DBG(("%s: glyph=(%d, %d), (%d, %d), clip=(%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ r.dst.x, r.dst.y, x2, y2,
+ rects->x1, rects->y1,
+ rects->x2, rects->y2));
+ if (rects->y1 >= y2)
+ break;
+
+ if (r.dst.x < rects->x1)
+ dx = rects->x1 - r.dst.x, r.dst.x = rects->x1;
+ if (x2 > rects->x2)
+ x2 = rects->x2;
+ if (r.dst.y < rects->y1)
+ dy = rects->y1 - r.dst.y, r.dst.y = rects->y1;
+ if (y2 > rects->y2)
+ y2 = rects->y2;
+
+ if (r.dst.x < x2 && r.dst.y < y2) {
+ DBG(("%s: blt=(%d, %d), (%d, %d)\n",
+ __FUNCTION__, r.dst.x, r.dst.y, x2, y2));
+
+ r.src.x = r.dst.x + src_x;
+ r.src.y = r.dst.y + src_y;
+ r.mask.x = dx + priv.coordinate.x;
+ r.mask.y = dy + priv.coordinate.y;
+ r.width = x2 - r.dst.x;
+ r.height = y2 - r.dst.y;
+ tmp.blt(sna, &tmp, &r);
+ apply_damage(&tmp, &r);
+ }
+ rects++;
+ } while (--nrect);
+ tmp.done(sna, &tmp);
+
+next_glyph:
+ x += glyph->info.xOff;
+ y += glyph->info.yOff;
+ }
+ list++;
+ }
+
+ return TRUE;
+}
+
+static Bool
+clear_pixmap(struct sna *sna, PixmapPtr pixmap, PictFormat format)
+{
+ BoxRec box;
+ xRenderColor color = { 0 };
+
+ box.x1 = box.y1 = 0;
+ box.x2 = pixmap->drawable.width;
+ box.y2 = pixmap->drawable.height;
+
+ return sna->render.fill_boxes(sna, PictOpClear, format, &color,
+ pixmap, sna_pixmap_get_bo(pixmap),
+ &box, 1);
+}
+
+static Bool
+glyphs_via_mask(struct sna *sna,
+ CARD8 op,
+ PicturePtr src,
+ PicturePtr dst,
+ PictFormatPtr format,
+ INT16 src_x, INT16 src_y,
+ int nlist, GlyphListPtr list, GlyphPtr *glyphs)
+{
+ ScreenPtr screen = dst->pDrawable->pScreen;
+ struct sna_composite_op tmp;
+ const int priv_offset = sna_glyph_key.offset;
+ int index = screen->myNum;
+ CARD32 component_alpha;
+ PixmapPtr pixmap;
+ PicturePtr glyph_atlas, mask;
+ int16_t x, y, width, height;
+ int n, error;
+ BoxRec box;
+
+ DBG(("%s(op=%d, src=(%d, %d), nlist=%d, dst=(%d, %d)+(%d, %d))\n",
+ __FUNCTION__, op, src_x, src_y, nlist,
+ list->xOff, list->yOff, dst->pDrawable->x, dst->pDrawable->y));
+
+ glyph_extents(nlist, list, glyphs, &box);
+ if (box.x2 <= box.x1 || box.y2 <= box.y1)
+ return TRUE;
+
+ DBG(("%s: bounds=((%d, %d), (%d, %d))\n", __FUNCTION__,
+ box.x1, box.y1, box.x2, box.y2));
+
+ if (!sna_compute_composite_extents(&box,
+ src, NULL, dst,
+ src_x, src_y,
+ 0, 0,
+ box.x1, box.y1,
+ box.x2 - box.x1,
+ box.y2 - box.y1))
+ return TRUE;
+
+ DBG(("%s: extents=((%d, %d), (%d, %d))\n", __FUNCTION__,
+ box.x1, box.y1, box.x2, box.y2));
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ width = box.x2 - box.x1;
+ height = box.y2 - box.y1;
+ box.x1 -= dst->pDrawable->x;
+ box.y1 -= dst->pDrawable->y;
+ x = -box.x1;
+ y = -box.y1;
+ src_x += box.x1 - list->xOff;
+ src_y += box.y1 - list->yOff;
+
+ if (format->depth == 1) {
+ PictFormatPtr a8Format =
+ PictureMatchFormat(screen, 8, PICT_a8);
+ if (!a8Format)
+ return FALSE;
+
+ format = a8Format;
+ }
+
+ pixmap = screen->CreatePixmap(screen,
+ width, height, format->depth,
+ CREATE_PIXMAP_USAGE_SCRATCH);
+ if (!pixmap)
+ return FALSE;
+
+ component_alpha = NeedsComponent(format->format);
+ mask = CreatePicture(0, &pixmap->drawable,
+ format, CPComponentAlpha,
+ &component_alpha, serverClient, &error);
+ screen->DestroyPixmap(pixmap);
+ if (!mask)
+ return FALSE;
+
+ ValidatePicture(mask);
+
+ if (!clear_pixmap(sna, pixmap, mask->format)) {
+ FreePicture(mask, 0);
+ return FALSE;
+ }
+
+ glyph_atlas = NULL;
+ do {
+ x += list->xOff;
+ y += list->yOff;
+ n = list->len;
+ while (n--) {
+ GlyphPtr glyph = *glyphs++;
+ struct sna_glyph *priv;
+ PicturePtr this_atlas;
+ struct sna_composite_rectangles r;
+
+ if (glyph->info.width == 0 || glyph->info.height == 0)
+ goto next_glyph;
+
+ priv = GET_PRIVATE(glyph);
+ if (priv->atlas != NULL) {
+ this_atlas = priv->atlas;
+ r.src = priv->coordinate;
+ } else {
+ if (glyph_atlas) {
+ tmp.done(sna, &tmp);
+ glyph_atlas = NULL;
+ }
+ if (glyph_cache(screen, &sna->render, glyph)) {
+ this_atlas = priv->atlas;
+ r.src = priv->coordinate;
+ } else {
+ /* no cache for this glyph */
+ this_atlas = GlyphPicture(glyph)[index];
+ r.src.x = r.src.y = 0;
+ }
+ }
+
+ if (this_atlas != glyph_atlas) {
+ if (glyph_atlas)
+ tmp.done(sna, &tmp);
+
+ if (!sna->render.composite(sna, PictOpAdd,
+ this_atlas, NULL, mask,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0,
+ &tmp)) {
+ FreePicture(mask, 0);
+ return FALSE;
+ }
+
+ glyph_atlas = this_atlas;
+ }
+
+ DBG(("%s: blt glyph origin (%d, %d), offset (%d, %d), src (%d, %d), size (%d, %d)\n",
+ __FUNCTION__,
+ x, y,
+ glyph->info.x, glyph->info.y,
+ r.src.x, r.src.y,
+ glyph->info.width, glyph->info.height));
+
+ r.dst.x = x - glyph->info.x;
+ r.dst.y = y - glyph->info.y;
+ r.width = glyph->info.width;
+ r.height = glyph->info.height;
+ tmp.blt(sna, &tmp, &r);
+
+next_glyph:
+ x += glyph->info.xOff;
+ y += glyph->info.yOff;
+ }
+ list++;
+ } while (--nlist);
+ if (glyph_atlas)
+ tmp.done(sna, &tmp);
+
+ sna_composite(op,
+ src, mask, dst,
+ src_x, src_y,
+ 0, 0,
+ box.x1, box.y1,
+ width, height);
+
+ FreePicture(mask, 0);
+ return TRUE;
+}
+
+Bool sna_glyphs_init(ScreenPtr screen)
+{
+ if (!dixRegisterPrivateKey(&sna_glyph_key,
+ PRIVATE_GLYPH,
+ sizeof(struct sna_glyph)))
+ return FALSE;
+
+ return TRUE;
+}
+
+Bool sna_glyphs_create(struct sna *sna)
+{
+ return realize_glyph_caches(sna);
+}
+
+static PictFormatPtr
+glyphs_format(int nlist, GlyphListPtr list, GlyphPtr * glyphs)
+{
+ PictFormatPtr format = list[0].format;
+ int16_t x1, x2, y1, y2;
+ int16_t x, y;
+ BoxRec extents;
+ Bool first = TRUE;
+
+ x = 0;
+ y = 0;
+ extents.x1 = 0;
+ extents.y1 = 0;
+ extents.x2 = 0;
+ extents.y2 = 0;
+ while (nlist--) {
+ int n = list->len;
+
+ if (format->format != list->format->format)
+ return NULL;
+
+ x += list->xOff;
+ y += list->yOff;
+ list++;
+ while (n--) {
+ GlyphPtr glyph = *glyphs++;
+
+ if (glyph->info.width == 0 || glyph->info.height == 0) {
+ x += glyph->info.xOff;
+ y += glyph->info.yOff;
+ continue;
+ }
+
+ x1 = x - glyph->info.x;
+ if (x1 < MINSHORT)
+ x1 = MINSHORT;
+ y1 = y - glyph->info.y;
+ if (y1 < MINSHORT)
+ y1 = MINSHORT;
+ x2 = x1 + glyph->info.width;
+ if (x2 > MAXSHORT)
+ x2 = MAXSHORT;
+ y2 = y1 + glyph->info.height;
+ if (y2 > MAXSHORT)
+ y2 = MAXSHORT;
+
+ if (first) {
+ extents.x1 = x1;
+ extents.y1 = y1;
+ extents.x2 = x2;
+ extents.y2 = y2;
+ first = FALSE;
+ } else {
+ /* Potential overlap */
+ if (x1 < extents.x2 && x2 > extents.x1 &&
+ y1 < extents.y2 && y2 > extents.y1)
+ return NULL;
+
+ if (x1 < extents.x1)
+ extents.x1 = x1;
+ if (x2 > extents.x2)
+ extents.x2 = x2;
+ if (y1 < extents.y1)
+ extents.y1 = y1;
+ if (y2 > extents.y2)
+ extents.y2 = y2;
+ }
+ x += glyph->info.xOff;
+ y += glyph->info.yOff;
+ }
+ }
+
+ return format;
+}
+
+static void
+glyphs_fallback(CARD8 op,
+ PicturePtr src,
+ PicturePtr dst,
+ PictFormatPtr mask_format,
+ int src_x,
+ int src_y,
+ int nlist,
+ GlyphListPtr list,
+ GlyphPtr *glyphs)
+{
+ int screen = dst->pDrawable->pScreen->myNum;
+ pixman_image_t *dst_image, *mask_image, *src_image;
+ int dx, dy, x, y;
+ BoxRec box;
+ RegionRec region;
+
+ glyph_extents(nlist, list, glyphs, &box);
+ if (box.x2 <= box.x1 || box.y2 <= box.y1)
+ return;
+
+ DBG(("%s: (%d, %d), (%d, %d)\n",
+ __FUNCTION__, box.x1, box.y1, box.x2, box.y2));
+
+ RegionInit(&region, &box, 1);
+ RegionTranslate(&region, dst->pDrawable->x, dst->pDrawable->y);
+ if (dst->pCompositeClip)
+ RegionIntersect(&region, &region, dst->pCompositeClip);
+ DBG(("%s: clipped extents (%d, %d), (%d, %d)\n",
+ __FUNCTION__,
+ RegionExtents(&region)->x1, RegionExtents(&region)->y1,
+ RegionExtents(&region)->x2, RegionExtents(&region)->y2));
+ if (!RegionNotEmpty(&region))
+ return;
+
+ sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
+ true);
+ if (src->pDrawable)
+ sna_drawable_move_to_cpu(src->pDrawable, false);
+
+ dst_image = image_from_pict(dst, TRUE, &x, &y);
+ DBG(("%s: dst offset (%d, %d)\n", __FUNCTION__, x, y));
+ box.x1 += x;
+ box.x2 += x;
+ box.y1 += y;
+ box.y2 += y;
+
+ src_image = image_from_pict(src, FALSE, &dx, &dy);
+ DBG(("%s: src offset (%d, %d)\n", __FUNCTION__, dx, dy));
+ src_x += dx - list->xOff - x;
+ src_y += dy - list->yOff - y;
+
+ if (mask_format) {
+ mask_image =
+ pixman_image_create_bits(mask_format->depth << 24 | mask_format->format,
+ box.x2 - box.x1, box.y2 - box.y1,
+ NULL, 0);
+ if (NeedsComponent(mask_format->format))
+ pixman_image_set_component_alpha(mask_image, TRUE);
+
+ x -= box.x1;
+ y -= box.y1;
+ } else
+ mask_image = dst_image;
+
+ do {
+ int n = list->len;
+ x += list->xOff;
+ y += list->yOff;
+ while (n--) {
+ GlyphPtr g = *glyphs++;
+ PicturePtr picture;
+ pixman_image_t *glyph_image;
+ int dx, dy;
+
+ if (g->info.width == 0 || g->info.height == 0)
+ goto next_glyph;
+
+ picture = GlyphPicture(g)[screen];
+ if (picture == NULL)
+ goto next_glyph;
+
+ glyph_image = image_from_pict(picture, FALSE, &dx, &dy);
+ if (!glyph_image)
+ goto next_glyph;
+
+ if (mask_format) {
+ DBG(("%s: glyph+(%d,%d) to mask (%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ dx,dy,
+ x - g->info.x,
+ y - g->info.y,
+ g->info.width,
+ g->info.height));
+
+ pixman_image_composite(PictOpAdd,
+ glyph_image,
+ NULL,
+ mask_image,
+ dx, dy,
+ 0, 0,
+ x - g->info.x,
+ y - g->info.y,
+ g->info.width,
+ g->info.height);
+ } else {
+ DBG(("%s: glyph+(%d, %d) to dst (%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ dx, dy,
+ x - g->info.x,
+ y - g->info.y,
+ g->info.width,
+ g->info.height));
+
+ pixman_image_composite(op,
+ src_image,
+ glyph_image,
+ dst_image,
+ src_x + (x - g->info.x),
+ src_y + (y - g->info.y),
+ dx, dy,
+ x - g->info.x,
+ y - g->info.y,
+ g->info.width,
+ g->info.height);
+ }
+ free_pixman_pict(picture,glyph_image);
+
+next_glyph:
+ x += g->info.xOff;
+ y += g->info.yOff;
+ }
+ list++;
+ } while (--nlist);
+
+ if (mask_format) {
+ DBG(("%s: glyph mask composite src=(%d,%d) dst=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ src_x + box.x1,
+ src_y + box.y1,
+ box.x1, box.y1,
+ box.x2-box.x1, box.y2-box.y1));
+ pixman_image_composite(op, src_image, mask_image, dst_image,
+ src_x + box.x1,
+ src_y + box.y1,
+ 0, 0,
+ box.x1, box.y1,
+ box.x2 - box.x1,
+ box.y2 - box.y1);
+ pixman_image_unref(mask_image);
+ }
+
+ free_pixman_pict(src, src_image);
+ free_pixman_pict(dst, dst_image);
+ RegionUninit(&region);
+}
+
+void
+sna_glyphs(CARD8 op,
+ PicturePtr src,
+ PicturePtr dst,
+ PictFormatPtr mask,
+ INT16 src_x, INT16 src_y,
+ int nlist, GlyphListPtr list, GlyphPtr *glyphs)
+{
+ struct sna *sna = to_sna_from_drawable(dst->pDrawable);
+ PictFormatPtr _mask;
+
+ DBG(("%s(op=%d, nlist=%d, src=(%d, %d))\n",
+ __FUNCTION__, op, nlist, src_x, src_y));
+
+ if (REGION_NUM_RECTS(dst->pCompositeClip) == 0)
+ return;
+
+ if (sna->kgem.wedged || !sna->have_render) {
+ DBG(("%s: no render (wedged=%d)\n",
+ __FUNCTION__, sna->kgem.wedged));
+ goto fallback;
+ }
+
+ if (too_small(sna, dst->pDrawable) && !picture_is_gpu(src)) {
+ DBG(("%s: fallback -- too small\n", __FUNCTION__));
+ goto fallback;
+ }
+
+ if (dst->alphaMap || src->alphaMap) {
+ DBG(("%s: fallback -- alpha maps\n", __FUNCTION__));
+ goto fallback;
+ }
+
+ /* XXX discard the mask for non-overlapping glyphs? */
+
+ if (!mask) {
+ if (glyphs_to_dst(sna, op,
+ src, dst,
+ src_x, src_y,
+ nlist, list, glyphs))
+ return;
+ }
+
+ _mask = mask;
+ if (!_mask)
+ _mask = glyphs_format(nlist, list, glyphs);
+ if (_mask) {
+ if (glyphs_via_mask(sna, op,
+ src, dst, _mask,
+ src_x, src_y,
+ nlist, list, glyphs))
+ return;
+ } else {
+ if (glyphs_to_dst_slow(sna, op,
+ src, dst,
+ src_x, src_y,
+ nlist, list, glyphs))
+ return;
+ }
+
+fallback:
+ glyphs_fallback(op, src, dst, mask, src_x, src_y, nlist, list, glyphs);
+}
+
+void
+sna_glyph_unrealize(ScreenPtr screen, GlyphPtr glyph)
+{
+ struct sna_glyph_cache *cache;
+ struct sna_glyph *priv;
+ struct sna *sna;
+
+ priv = glyph_get_private(glyph);
+ if (priv->atlas == NULL)
+ return;
+
+ sna = to_sna_from_screen(screen);
+ cache = &sna->render.glyph[priv->pos & 1];
+ assert(cache->glyphs[priv->pos >> 1] == priv);
+ cache->glyphs[priv->pos >> 1] = NULL;
+ priv->atlas = NULL;
+}
+
+void sna_glyphs_close(struct sna *sna)
+{
+ unrealize_glyph_caches(sna);
+}
diff --git a/src/sna/sna_gradient.c b/src/sna/sna_gradient.c
new file mode 100644
index 00000000..5cfc81aa
--- /dev/null
+++ b/src/sna/sna_gradient.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_render.h"
+
+#if DEBUG_GRADIENT
+#undef DBG
+#define DBG(x) ErrorF x
+#endif
+
+#define xFixedToDouble(f) pixman_fixed_to_double(f)
+
+static int
+sna_gradient_sample_width(PictGradient *gradient)
+{
+ unsigned int n;
+ int width;
+
+ width = 2;
+ for (n = 1; n < gradient->nstops; n++) {
+ xFixed dx = gradient->stops[n].x - gradient->stops[n-1].x;
+ uint16_t delta, max;
+ int ramp;
+
+ if (dx == 0)
+ return 0;
+
+ max = gradient->stops[n].color.red -
+ gradient->stops[n-1].color.red;
+
+ delta = gradient->stops[n].color.green -
+ gradient->stops[n-1].color.green;
+ if (delta > max)
+ max = delta;
+
+ delta = gradient->stops[n].color.blue -
+ gradient->stops[n-1].color.blue;
+ if (delta > max)
+ max = delta;
+
+ delta = gradient->stops[n].color.alpha -
+ gradient->stops[n-1].color.alpha;
+ if (delta > max)
+ max = delta;
+
+ ramp = 128 * max / xFixedToDouble(dx);
+ if (ramp > width)
+ width = ramp;
+ }
+
+ width *= gradient->nstops-1;
+ width = (width + 7) & -8;
+ return min(width, 1024);
+}
+
+static Bool
+_gradient_color_stops_equal(PictGradient *pattern,
+ struct sna_gradient_cache *cache)
+{
+ if (cache->nstops != pattern->nstops)
+ return FALSE;
+
+ return memcmp(cache->stops,
+ pattern->stops,
+ sizeof(PictGradientStop)*cache->nstops) == 0;
+}
+
+struct kgem_bo *
+sna_render_get_gradient(struct sna *sna,
+ PictGradient *pattern)
+{
+ struct sna_render *render = &sna->render;
+ struct sna_gradient_cache *cache;
+ pixman_image_t *gradient, *image;
+ pixman_point_fixed_t p1, p2;
+ unsigned int i, width;
+ struct kgem_bo *bo;
+
+ DBG(("%s: %dx[%f:%x...%f:%x...%f:%x]\n", __FUNCTION__,
+ pattern->nstops,
+ pattern->stops[0].x / 65536.,
+ pattern->stops[0].color.alpha >> 8 << 24 |
+ pattern->stops[0].color.red >> 8 << 16 |
+ pattern->stops[0].color.green >> 8 << 8 |
+ pattern->stops[0].color.blue >> 8 << 0,
+ pattern->stops[pattern->nstops/2].x / 65536.,
+ pattern->stops[pattern->nstops/2].color.alpha >> 8 << 24 |
+ pattern->stops[pattern->nstops/2].color.red >> 8 << 16 |
+ pattern->stops[pattern->nstops/2].color.green >> 8 << 8 |
+ pattern->stops[pattern->nstops/2].color.blue >> 8 << 0,
+ pattern->stops[pattern->nstops-1].x / 65536.,
+ pattern->stops[pattern->nstops-1].color.alpha >> 8 << 24 |
+ pattern->stops[pattern->nstops-1].color.red >> 8 << 16 |
+ pattern->stops[pattern->nstops-1].color.green >> 8 << 8 |
+ pattern->stops[pattern->nstops-1].color.blue >> 8 << 0));
+
+ for (i = 0; i < render->gradient_cache.size; i++) {
+ cache = &render->gradient_cache.cache[i];
+ if (_gradient_color_stops_equal(pattern, cache)) {
+ DBG(("%s: old --> %d\n", __FUNCTION__, i));
+ return kgem_bo_reference(cache->bo);
+ }
+ }
+
+ width = sna_gradient_sample_width(pattern);
+ DBG(("%s: sample width = %d\n", __FUNCTION__, width));
+ if (width == 0)
+ return NULL;
+
+ p1.x = 0;
+ p1.y = 0;
+ p2.x = width << 16;
+ p2.y = 0;
+
+ gradient = pixman_image_create_linear_gradient(&p1, &p2,
+ (pixman_gradient_stop_t *)pattern->stops,
+ pattern->nstops);
+ if (gradient == NULL)
+ return NULL;
+
+ pixman_image_set_filter(gradient, PIXMAN_FILTER_BILINEAR, NULL, 0);
+ pixman_image_set_repeat(gradient, PIXMAN_REPEAT_PAD);
+
+ image = pixman_image_create_bits(PIXMAN_a8r8g8b8, width, 1, NULL, 0);
+ if (image == NULL) {
+ pixman_image_unref(gradient);
+ return NULL;
+ }
+
+ pixman_image_composite(PIXMAN_OP_SRC,
+ gradient, NULL, image,
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ width, 1);
+ pixman_image_unref(gradient);
+
+ DBG(("%s: [0]=%x, [%d]=%x [%d]=%x\n", __FUNCTION__,
+ pixman_image_get_data(image)[0],
+ width/2, pixman_image_get_data(image)[width/2],
+ width-1, pixman_image_get_data(image)[width-1]));
+
+ bo = kgem_create_linear(&sna->kgem, width*4);
+ if (!bo) {
+ pixman_image_unref(image);
+ return NULL;
+ }
+
+ bo->pitch = 4*width;
+ kgem_bo_write(&sna->kgem, bo, pixman_image_get_data(image), 4*width);
+
+ pixman_image_unref(image);
+
+ if (render->gradient_cache.size < GRADIENT_CACHE_SIZE)
+ i = render->gradient_cache.size++;
+ else
+ i = rand () % GRADIENT_CACHE_SIZE;
+
+ cache = &render->gradient_cache.cache[i];
+ if (cache->nstops < pattern->nstops) {
+ PictGradientStop *newstops;
+
+ newstops = malloc(sizeof(PictGradientStop) * pattern->nstops);
+ if (newstops == NULL)
+ return bo;
+
+ free(cache->stops);
+ cache->stops = newstops;
+ }
+
+ memcpy(cache->stops, pattern->stops,
+ sizeof(PictGradientStop) * pattern->nstops);
+ cache->nstops = pattern->nstops;
+
+ if (cache->bo)
+ kgem_bo_destroy(&sna->kgem, cache->bo);
+ cache->bo = kgem_bo_reference(bo);
+
+ return bo;
+}
+
+void
+sna_render_flush_solid(struct sna *sna)
+{
+ struct sna_solid_cache *cache = &sna->render.solid_cache;
+
+ DBG(("sna_render_flush_solid(size=%d)\n", cache->size));
+
+ kgem_bo_write(&sna->kgem, cache->cache_bo,
+ cache->color, cache->size*sizeof(uint32_t));
+ cache->dirty = 0;
+}
+
+static void
+sna_render_finish_solid(struct sna *sna, bool force)
+{
+ struct sna_solid_cache *cache = &sna->render.solid_cache;
+ int i;
+
+ DBG(("sna_render_finish_solid(force=%d, busy=%d, dirty=%d)\n",
+ force, cache->cache_bo->gpu, cache->dirty));
+
+ if (!force && !cache->cache_bo->gpu)
+ return;
+
+ if (cache->dirty)
+ sna_render_flush_solid(sna);
+
+ for (i = 0; i < cache->size; i++)
+ kgem_bo_destroy(&sna->kgem, cache->bo[i]);
+ kgem_bo_destroy(&sna->kgem, cache->cache_bo);
+
+ DBG(("sna_render_finish_solid reset\n"));
+
+ cache->cache_bo = kgem_create_linear(&sna->kgem, sizeof(cache->color));
+ cache->bo[0] = kgem_create_proxy(cache->cache_bo, 0, sizeof(uint32_t));
+ cache->bo[0]->pitch = 4;
+ cache->size = 1;
+}
+
+struct kgem_bo *
+sna_render_get_solid(struct sna *sna, uint32_t color)
+{
+ struct sna_solid_cache *cache = &sna->render.solid_cache;
+ unsigned int i;
+
+ if (color == 0) {
+ DBG(("%s(clear)\n", __FUNCTION__));
+ return kgem_bo_reference(cache->bo[0]);
+ }
+
+ if (cache->color[cache->last] == color) {
+ DBG(("sna_render_get_solid(%d) = %x (last)\n",
+ cache->last, color));
+ return kgem_bo_reference(cache->bo[cache->last]);
+ }
+
+ for (i = 1; i < cache->size; i++) {
+ if (cache->color[i] == color) {
+ DBG(("sna_render_get_solid(%d) = %x (old)\n",
+ i, color));
+ goto done;
+ }
+ }
+
+ sna_render_finish_solid(sna, i == ARRAY_SIZE(cache->color));
+
+ i = cache->size++;
+ cache->color[i] = color;
+ cache->bo[i] = kgem_create_proxy(cache->cache_bo,
+ i*sizeof(uint32_t), sizeof(uint32_t));
+ cache->bo[i]->pitch = 4;
+ cache->dirty = 1;
+ DBG(("sna_render_get_solid(%d) = %x (new)\n", i, color));
+
+done:
+ cache->last = i;
+ return kgem_bo_reference(cache->bo[i]);
+}
+
+static Bool sna_solid_cache_init(struct sna *sna)
+{
+ struct sna_solid_cache *cache = &sna->render.solid_cache;
+
+ cache->cache_bo =
+ kgem_create_linear(&sna->kgem, sizeof(cache->color));
+ if (!cache->cache_bo)
+ return FALSE;
+
+ cache->bo[0] = kgem_create_proxy(cache->cache_bo, 0, sizeof(uint32_t));
+ cache->bo[0]->pitch = 4;
+ cache->size = 1;
+ cache->last = 0;
+ return TRUE;
+}
+
+Bool sna_gradients_create(struct sna *sna)
+{
+ return sna_solid_cache_init(sna);
+}
+
+void sna_gradients_close(struct sna *sna)
+{
+ int i;
+
+ if (sna->render.solid_cache.cache_bo)
+ kgem_bo_destroy(&sna->kgem, sna->render.solid_cache.cache_bo);
+ for (i = 0; i < sna->render.solid_cache.size; i++)
+ kgem_bo_destroy(&sna->kgem, sna->render.solid_cache.bo[i]);
+ sna->render.solid_cache.cache_bo = 0;
+ sna->render.solid_cache.size = 0;
+ sna->render.solid_cache.dirty = 0;
+
+ for (i = 0; i < sna->render.gradient_cache.size; i++) {
+ struct sna_gradient_cache *cache =
+ &sna->render.gradient_cache.cache[i];
+
+ if (cache->bo)
+ kgem_bo_destroy(&sna->kgem, cache->bo);
+
+ free(cache->stops);
+ cache->stops = NULL;
+ cache->nstops = 0;
+ }
+ sna->render.gradient_cache.size = 0;
+}
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
new file mode 100644
index 00000000..41f36713
--- /dev/null
+++ b/src/sna/sna_io.c
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_render.h"
+#include "sna_reg.h"
+
+#include <sys/mman.h>
+
+#if DEBUG_IO
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+#define PITCH(x, y) ALIGN((x)*(y), 4)
+
+static void read_boxes_inplace(struct kgem *kgem,
+ struct kgem_bo *bo, int16_t src_dx, int16_t src_dy,
+ PixmapPtr pixmap, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int n)
+{
+ int bpp = pixmap->drawable.bitsPerPixel;
+ void *src, *dst = pixmap->devPrivate.ptr;
+ int src_pitch = bo->pitch;
+ int dst_pitch = pixmap->devKind;
+
+ DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling));
+
+ kgem_bo_submit(kgem, bo);
+
+ src = kgem_bo_map(kgem, bo, PROT_READ);
+ if (src == NULL)
+ return;
+
+ do {
+ memcpy_blt(src, dst, bpp,
+ src_pitch, dst_pitch,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ box->x2 - box->x1, box->y2 - box->y1);
+ box++;
+ } while (--n);
+
+ munmap(src, bo->size);
+}
+
+void sna_read_boxes(struct sna *sna,
+ struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+ PixmapPtr dst, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int nbox)
+{
+ struct kgem *kgem = &sna->kgem;
+ struct kgem_bo *dst_bo;
+ int tmp_nbox;
+ const BoxRec *tmp_box;
+ char *src;
+ void *ptr;
+ int src_pitch, cpp, offset;
+ int n, cmd, br13;
+
+ DBG(("%s x %d, src=(handle=%d, offset=(%d,%d)), dst=(size=(%d, %d), offset=(%d,%d))\n",
+ __FUNCTION__, nbox, src_bo->handle, src_dx, src_dy,
+ dst->drawable.width, dst->drawable.height, dst_dx, dst_dy));
+
+ if (DEBUG_NO_IO || kgem->wedged ||
+ !kgem_bo_is_busy(kgem, src_bo) ||
+ src_bo->tiling == I915_TILING_Y) {
+ read_boxes_inplace(kgem,
+ src_bo, src_dx, src_dy,
+ dst, dst_dx, dst_dy,
+ box, nbox);
+ return;
+ }
+
+ /* count the total number of bytes to be read and allocate a bo */
+ cpp = dst->drawable.bitsPerPixel / 8;
+ offset = 0;
+ for (n = 0; n < nbox; n++) {
+ int height = box[n].y2 - box[n].y1;
+ int width = box[n].x2 - box[n].x1;
+ offset += PITCH(width, cpp) * height;
+ }
+
+ DBG((" read buffer size=%d\n", offset));
+
+ dst_bo = kgem_create_buffer(kgem, offset, KGEM_BUFFER_LAST, &ptr);
+ if (!dst_bo) {
+ read_boxes_inplace(kgem,
+ src_bo, src_dx, src_dy,
+ dst, dst_dx, dst_dy,
+ box, nbox);
+ return;
+ }
+
+ cmd = XY_SRC_COPY_BLT_CMD;
+ if (cpp == 4)
+ cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ src_pitch = src_bo->pitch;
+ if (kgem->gen >= 40 && src_bo->tiling) {
+ cmd |= BLT_SRC_TILED;
+ src_pitch >>= 2;
+ }
+
+ br13 = 0xcc << 16;
+ switch (cpp) {
+ default:
+ case 4: br13 |= 1 << 25; /* RGB8888 */
+ case 2: br13 |= 1 << 24; /* RGB565 */
+ case 1: break;
+ }
+
+ kgem_set_mode(kgem, KGEM_BLT);
+ if (kgem->nexec + 2 > KGEM_EXEC_SIZE(kgem) ||
+ kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem) ||
+ !kgem_check_batch(kgem, 8) ||
+ !kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL))
+ _kgem_submit(kgem);
+
+ tmp_nbox = nbox;
+ tmp_box = box;
+ offset = 0;
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = tmp_nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
+ assert(nbox_this_time);
+ tmp_nbox -= nbox_this_time;
+
+ for (n = 0; n < nbox_this_time; n++) {
+ int height = tmp_box[n].y2 - tmp_box[n].y1;
+ int width = tmp_box[n].x2 - tmp_box[n].x1;
+ int pitch = PITCH(width, cpp);
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG((" blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n",
+ offset,
+ tmp_box[n].x1 + src_dx,
+ tmp_box[n].y1 + src_dy,
+ width, height, pitch));
+
+ assert(tmp_box[n].x1 + src_dx >= 0);
+ assert((tmp_box[n].x2 + src_dx) * dst->drawable.bitsPerPixel/8 <= src_bo->pitch);
+ assert(tmp_box[n].y1 + src_dy >= 0);
+ assert((tmp_box[n].y2 + src_dy) * src_bo->pitch <= src_bo->size);
+
+ b[0] = cmd;
+ b[1] = br13 | pitch;
+ b[2] = 0;
+ b[3] = height << 16 | width;
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ offset);
+ b[5] = (tmp_box[n].y1 + src_dy) << 16 | (tmp_box[n].x1 + src_dx);
+ b[6] = src_pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 8;
+
+ offset += pitch * height;
+ }
+ tmp_box += nbox_this_time;
+
+ _kgem_submit(kgem);
+ } while (tmp_nbox);
+ assert(offset == dst_bo->size);
+
+ kgem_buffer_sync(kgem, dst_bo);
+
+ src = ptr;
+ do {
+ int height = box->y2 - box->y1;
+ int width = box->x2 - box->x1;
+ int pitch = PITCH(width, cpp);
+
+ DBG((" copy offset %lx [%08x...%08x]: (%d, %d) x (%d, %d), src pitch=%d, dst pitch=%d, bpp=%d\n",
+ (long)((char *)src - (char *)ptr),
+ *(uint32_t*)src, *(uint32_t*)(src+pitch*height - 4),
+ box->x1 + dst_dx,
+ box->y1 + dst_dy,
+ width, height,
+ pitch, dst->devKind, cpp*8));
+
+ assert(box->x1 + dst_dx >= 0);
+ assert(box->x2 + dst_dx <= dst->drawable.width);
+ assert(box->y1 + dst_dy >= 0);
+ assert(box->y2 + dst_dy <= dst->drawable.height);
+
+ memcpy_blt(src, dst->devPrivate.ptr, cpp*8,
+ pitch, dst->devKind,
+ 0, 0,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ width, height);
+ box++;
+
+ src += pitch * height;
+ } while (--nbox);
+ assert(src - (char *)ptr == dst_bo->size);
+ kgem_bo_destroy(kgem, dst_bo);
+}
+
+static void write_boxes_inplace(struct kgem *kgem,
+ const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
+ struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int n)
+{
+ int dst_pitch = bo->pitch;
+ int src_pitch = stride;
+ void *dst;
+
+ DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling));
+
+ kgem_bo_submit(kgem, bo);
+
+ dst = kgem_bo_map(kgem, bo, PROT_READ | PROT_WRITE);
+ if (dst == NULL)
+ return;
+
+ do {
+ DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ box->x2 - box->x1, box->y2 - box->y1,
+ bpp, src_pitch, dst_pitch));
+
+ memcpy_blt(src, dst, bpp,
+ src_pitch, dst_pitch,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ box->x2 - box->x1, box->y2 - box->y1);
+ box++;
+ } while (--n);
+
+ munmap(dst, bo->size);
+}
+
+void sna_write_boxes(struct sna *sna,
+ struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
+ const BoxRec *box, int nbox)
+{
+ struct kgem *kgem = &sna->kgem;
+ struct kgem_bo *src_bo;
+ void *ptr;
+ int offset;
+ int n, cmd, br13;
+
+ DBG(("%s x %d\n", __FUNCTION__, nbox));
+
+ if (DEBUG_NO_IO || kgem->wedged ||
+ !kgem_bo_is_busy(kgem, dst_bo) ||
+ dst_bo->tiling == I915_TILING_Y) {
+ write_boxes_inplace(kgem,
+ src, stride, bpp, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ box, nbox);
+ return;
+ }
+
+ cmd = XY_SRC_COPY_BLT_CMD;
+ if (bpp == 32)
+ cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ br13 = dst_bo->pitch;
+ if (kgem->gen >= 40 && dst_bo->tiling) {
+ cmd |= BLT_DST_TILED;
+ br13 >>= 2;
+ }
+ br13 |= 0xcc << 16;
+ switch (bpp) {
+ default:
+ case 32: br13 |= 1 << 25; /* RGB8888 */
+ case 16: br13 |= 1 << 24; /* RGB565 */
+ case 8: break;
+ }
+
+ kgem_set_mode(kgem, KGEM_BLT);
+ if (kgem->nexec + 2 > KGEM_EXEC_SIZE(kgem) ||
+ kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem) ||
+ !kgem_check_batch(kgem, 8) ||
+ !kgem_check_bo_fenced(kgem, dst_bo, NULL))
+ _kgem_submit(kgem);
+
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ /* Count the total number of bytes to be read and allocate a
+ * single buffer large enough. Or if it is very small, combine
+ * with other allocations. */
+ offset = 0;
+ for (n = 0; n < nbox_this_time; n++) {
+ int height = box[n].y2 - box[n].y1;
+ int width = box[n].x2 - box[n].x1;
+ offset += PITCH(width, bpp >> 3) * height;
+ }
+
+ src_bo = kgem_create_buffer(kgem, offset,
+ KGEM_BUFFER_WRITE | KGEM_BUFFER_LAST,
+ &ptr);
+ if (!src_bo)
+ break;
+
+ offset = 0;
+ do {
+ int height = box->y2 - box->y1;
+ int width = box->x2 - box->x1;
+ int pitch = PITCH(width, bpp >> 3);
+ uint32_t *b;
+
+ DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
+ __FUNCTION__,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ width, height,
+ offset, pitch));
+
+ assert(box->x1 + src_dx >= 0);
+ assert((box->x2 + src_dx)*bpp <= 8*stride);
+ assert(box->y1 + src_dy >= 0);
+
+ assert(box->x1 + dst_dx >= 0);
+ assert(box->y1 + dst_dy >= 0);
+
+ memcpy_blt(src, (char *)ptr + offset, bpp,
+ stride, pitch,
+ box->x1 + src_dx, box->y1 + src_dy,
+ 0, 0,
+ width, height);
+
+ b = kgem->batch + kgem->nbatch;
+ b[0] = cmd;
+ b[1] = br13;
+ b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
+ b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = 0;
+ b[6] = pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ offset);
+ kgem->nbatch += 8;
+
+ box++;
+ offset += pitch * height;
+ } while (--nbox_this_time);
+ assert(offset == src_bo->size);
+
+ if (nbox)
+ _kgem_submit(kgem);
+
+ kgem_bo_destroy(kgem, src_bo);
+ } while (nbox);
+
+ _kgem_set_mode(kgem, KGEM_BLT);
+}
+
+struct kgem_bo *sna_replace(struct sna *sna,
+ struct kgem_bo *bo,
+ int width, int height, int bpp,
+ const void *src, int stride)
+{
+ struct kgem *kgem = &sna->kgem;
+ void *dst;
+
+ DBG(("%s(%d, %d)\n", __FUNCTION__, width, height));
+
+ assert(bo->reusable);
+ if (kgem_bo_is_busy(kgem, bo)) {
+ struct kgem_bo *new_bo;
+ int tiling = bo->tiling;
+
+ /* As we use fences for GPU BLTs, we often have
+ * lots of contention upon the limited number of fences.
+ */
+ if (sna->kgem.gen < 40)
+ tiling = I915_TILING_NONE;
+
+ new_bo = kgem_create_2d(kgem,
+ width, height, bpp, tiling,
+ CREATE_INACTIVE);
+ if (new_bo) {
+ kgem_bo_destroy(kgem, bo);
+ bo = new_bo;
+ }
+ }
+
+ if (bo->tiling == I915_TILING_NONE && bo->pitch == stride) {
+ kgem_bo_write(kgem, bo, src, (height-1)*stride + width*bpp/8);
+ return bo;
+ }
+
+ dst = kgem_bo_map(kgem, bo, PROT_READ | PROT_WRITE);
+ if (dst) {
+ memcpy_blt(src, dst, bpp,
+ stride, bo->pitch,
+ 0, 0,
+ 0, 0,
+ width, height);
+ munmap(dst, bo->size);
+ }
+
+ return bo;
+}
diff --git a/src/sna/sna_module.h b/src/sna/sna_module.h
new file mode 100644
index 00000000..9b14acc3
--- /dev/null
+++ b/src/sna/sna_module.h
@@ -0,0 +1,3 @@
+const OptionInfoRec *sna_available_options(int chipid, int busid);
+void sna_init_scrn(ScrnInfoPtr scrn);
+
diff --git a/src/sna/sna_reg.h b/src/sna/sna_reg.h
new file mode 100644
index 00000000..f6e53979
--- /dev/null
+++ b/src/sna/sna_reg.h
@@ -0,0 +1,108 @@
+#ifndef SNA_REG_H
+#define SNA_REG_H
+
+/* Flush */
+#define MI_FLUSH (0x04<<23)
+#define MI_FLUSH_DW (0x26<<23)
+
+#define MI_WRITE_DIRTY_STATE (1<<4)
+#define MI_END_SCENE (1<<3)
+#define MI_GLOBAL_SNAPSHOT_COUNT_RESET (1<<3)
+#define MI_INHIBIT_RENDER_CACHE_FLUSH (1<<2)
+#define MI_STATE_INSTRUCTION_CACHE_FLUSH (1<<1)
+#define MI_INVALIDATE_MAP_CACHE (1<<0)
+/* broadwater flush bits */
+#define BRW_MI_GLOBAL_SNAPSHOT_RESET (1 << 3)
+
+#define MI_BATCH_BUFFER_END (0xA << 23)
+
+/* Noop */
+#define MI_NOOP 0x00
+#define MI_NOOP_WRITE_ID (1<<22)
+#define MI_NOOP_ID_MASK (1<<22 - 1)
+
+/* Wait for Events */
+#define MI_WAIT_FOR_EVENT (0x03<<23)
+#define MI_WAIT_FOR_PIPEB_SVBLANK (1<<18)
+#define MI_WAIT_FOR_PIPEA_SVBLANK (1<<17)
+#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16)
+#define MI_WAIT_FOR_PIPEB_VBLANK (1<<7)
+#define MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW (1<<5)
+#define MI_WAIT_FOR_PIPEA_VBLANK (1<<3)
+#define MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW (1<<1)
+
+/* Set the scan line for MI_WAIT_FOR_PIPE?_SCAN_LINE_WINDOW */
+#define MI_LOAD_SCAN_LINES_INCL (0x12<<23)
+#define MI_LOAD_SCAN_LINES_DISPLAY_PIPEA (0)
+#define MI_LOAD_SCAN_LINES_DISPLAY_PIPEB (0x1<<20)
+
+/* BLT commands */
+#define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3))
+#define COLOR_BLT_WRITE_ALPHA (1<<21)
+#define COLOR_BLT_WRITE_RGB (1<<20)
+
+#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|(0x4))
+#define XY_COLOR_BLT_WRITE_ALPHA (1<<21)
+#define XY_COLOR_BLT_WRITE_RGB (1<<20)
+#define XY_COLOR_BLT_TILED (1<<11)
+
+#define XY_SETUP_CLIP_BLT_CMD ((2<<29)|(3<<22)|1)
+
+#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6)
+#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21)
+#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20)
+#define XY_SRC_COPY_BLT_SRC_TILED (1<<15)
+#define XY_SRC_COPY_BLT_DST_TILED (1<<11)
+
+#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|0x4)
+#define SRC_COPY_BLT_WRITE_ALPHA (1<<21)
+#define SRC_COPY_BLT_WRITE_RGB (1<<20)
+
+#define XY_PAT_BLT_IMMEDIATE ((2<<29)|(0x72<<22))
+
+#define XY_MONO_PAT_BLT_CMD ((0x2<<29)|(0x52<<22)|0x7)
+#define XY_MONO_PAT_VERT_SEED ((1<<10)|(1<<9)|(1<<8))
+#define XY_MONO_PAT_HORT_SEED ((1<<14)|(1<<13)|(1<<12))
+#define XY_MONO_PAT_BLT_WRITE_ALPHA (1<<21)
+#define XY_MONO_PAT_BLT_WRITE_RGB (1<<20)
+
+#define XY_MONO_SRC_BLT_CMD ((0x2<<29)|(0x54<<22)|(0x6))
+#define XY_MONO_SRC_BLT_WRITE_ALPHA (1<<21)
+#define XY_MONO_SRC_BLT_WRITE_RGB (1<<20)
+
+/* BLT commands */
+#define BLT_WRITE_ALPHA (1<<21)
+#define BLT_WRITE_RGB (1<<20)
+#define BLT_SRC_TILED (1<<15)
+#define BLT_DST_TILED (1<<11)
+
+#define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3))
+#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|(0x4))
+#define XY_SETUP_CLIP_BLT_CMD ((2<<29)|(3<<22)|1)
+#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6)
+#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|0x4)
+#define XY_PAT_BLT_IMMEDIATE ((2<<29)|(0x72<<22))
+#define XY_MONO_PAT_BLT_CMD ((0x2<<29)|(0x52<<22)|0x7)
+#define XY_MONO_SRC_BLT_CMD ((0x2<<29)|(0x54<<22)|(0x6))
+
+/* FLUSH commands */
+#define BRW_3D(Pipeline,Opcode,Subopcode) \
+ ((3 << 29) | \
+ ((Pipeline) << 27) | \
+ ((Opcode) << 24) | \
+ ((Subopcode) << 16))
+#define PIPE_CONTROL BRW_3D(3, 2, 0)
+#define PIPE_CONTROL_NOWRITE (0 << 14)
+#define PIPE_CONTROL_WRITE_QWORD (1 << 14)
+#define PIPE_CONTROL_WRITE_DEPTH (2 << 14)
+#define PIPE_CONTROL_WRITE_TIME (3 << 14)
+#define PIPE_CONTROL_DEPTH_STALL (1 << 13)
+#define PIPE_CONTROL_WC_FLUSH (1 << 12)
+#define PIPE_CONTROL_IS_FLUSH (1 << 11)
+#define PIPE_CONTROL_TC_FLUSH (1 << 10)
+#define PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
+#define PIPE_CONTROL_GLOBAL_GTT (1 << 2)
+#define PIPE_CONTROL_LOCAL_PGTT (0 << 2)
+#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
+
+#endif
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
new file mode 100644
index 00000000..b6a44d26
--- /dev/null
+++ b/src/sna/sna_render.c
@@ -0,0 +1,888 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "sna.h"
+#include "sna_render.h"
+
+#include <fb.h>
+
+#if DEBUG_RENDER
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+#define NO_REDIRECT 0
+#define NO_CONVERT 0
+#define NO_FIXUP 0
+#define NO_EXTRACT 0
+
+void sna_kgem_reset(struct kgem *kgem)
+{
+ struct sna *sna = container_of(kgem, struct sna, kgem);
+
+ sna->render.reset(sna);
+}
+
+void sna_kgem_flush(struct kgem *kgem)
+{
+ struct sna *sna = container_of(kgem, struct sna, kgem);
+
+ sna->render.flush(sna);
+
+ if (sna->render.solid_cache.dirty)
+ sna_render_flush_solid(sna);
+}
+
+void sna_kgem_context_switch(struct kgem *kgem, int new_mode)
+{
+ struct sna *sna = container_of(kgem, struct sna, kgem);
+
+ sna->render.context_switch(sna, new_mode);
+}
+
+CARD32
+sna_format_for_depth(int depth)
+{
+ switch (depth) {
+ case 1: return PICT_a1;
+ case 4: return PICT_a4;
+ case 8: return PICT_a8;
+ case 15: return PICT_x1r5g5b5;
+ case 16: return PICT_r5g6b5;
+ default:
+ case 24: return PICT_x8r8g8b8;
+ case 30: return PICT_x2r10g10b10;
+ case 32: return PICT_a8r8g8b8;
+ }
+}
+
+static Bool
+no_render_composite(struct sna *sna,
+ uint8_t op,
+ PicturePtr src,
+ PicturePtr mask,
+ PicturePtr dst,
+ int16_t src_x, int16_t src_y,
+ int16_t mask_x, int16_t mask_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t width, int16_t height,
+ struct sna_composite_op *tmp)
+{
+ DBG(("%s ()\n", __FUNCTION__));
+
+ if (mask == NULL &&
+ sna_blt_composite(sna,
+ op, src, dst,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height,
+ tmp))
+ return TRUE;
+
+ return FALSE;
+}
+
+static Bool
+no_render_copy_boxes(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+ PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int n)
+{
+ DBG(("%s (n=%d)\n", __FUNCTION__, n));
+
+ return sna_blt_copy_boxes(sna, alu,
+ src_bo, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ dst->drawable.bitsPerPixel,
+ box, n);
+}
+
+static Bool
+no_render_copy(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ struct sna_copy_op *tmp)
+{
+ DBG(("%s ()\n", __FUNCTION__));
+
+ if (src->drawable.bitsPerPixel != dst->drawable.bitsPerPixel &&
+ sna_blt_copy(sna, alu,
+ src_bo, dst_bo, dst->drawable.bitsPerPixel,
+ tmp))
+ return TRUE;
+
+ return FALSE;
+}
+
+static Bool
+no_render_fill_boxes(struct sna *sna,
+ CARD8 op,
+ PictFormat format,
+ const xRenderColor *color,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ const BoxRec *box, int n)
+{
+ uint8_t alu = GXcopy;
+ uint32_t pixel;
+
+ DBG(("%s (op=%d, color=(%04x,%04x,%04x, %04x))\n",
+ __FUNCTION__, op,
+ color->red, color->green, color->blue, color->alpha));
+
+ if (color == 0)
+ op = PictOpClear;
+
+ if (op == PictOpClear) {
+ alu = GXclear;
+ op = PictOpSrc;
+ }
+
+ if (op == PictOpOver) {
+ if ((color->alpha >= 0xff00))
+ op = PictOpSrc;
+ }
+
+ if (op != PictOpSrc)
+ return FALSE;
+
+ if (!sna_get_pixel_from_rgba(&pixel,
+ color->red,
+ color->green,
+ color->blue,
+ color->alpha,
+ format))
+ return FALSE;
+
+ return sna_blt_fill_boxes(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ pixel, box, n);
+}
+
+static Bool
+no_render_fill(struct sna *sna, uint8_t alu,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ uint32_t color,
+ struct sna_fill_op *tmp)
+{
+ DBG(("%s (alu=%d, color=%08x)\n", __FUNCTION__, alu, color));
+ return sna_blt_fill(sna, alu,
+ dst_bo, dst->drawable.bitsPerPixel,
+ color,
+ tmp);
+}
+
+static void no_render_reset(struct sna *sna)
+{
+}
+
+static void no_render_flush(struct sna *sna)
+{
+}
+
+static void
+no_render_context_switch(struct sna *sna,
+ int new_mode)
+{
+}
+
+static void
+no_render_fini(struct sna *sna)
+{
+}
+
+void no_render_init(struct sna *sna)
+{
+ struct sna_render *render = &sna->render;
+
+ render->composite = no_render_composite;
+
+ render->copy_boxes = no_render_copy_boxes;
+ render->copy = no_render_copy;
+
+ render->fill_boxes = no_render_fill_boxes;
+ render->fill = no_render_fill;
+
+ render->reset = no_render_reset;
+ render->flush = no_render_flush;
+ render->context_switch = no_render_context_switch;
+ render->fini = no_render_fini;
+
+ if (sna->kgem.gen >= 60)
+ sna->kgem.ring = KGEM_BLT;
+}
+
+static Bool
+move_to_gpu(PixmapPtr pixmap, const BoxPtr box)
+{
+ struct sna_pixmap *priv;
+ int count, w, h;
+
+ if (pixmap->usage_hint)
+ return FALSE;
+
+ w = box->x2 - box->x1;
+ h = box->y2 - box->y1;
+ if (w == pixmap->drawable.width || h == pixmap->drawable.height)
+ return TRUE;
+
+ count = SOURCE_BIAS;
+ priv = sna_pixmap(pixmap);
+ if (priv)
+ count = ++priv->source_count;
+
+ DBG(("%s: migrate box (%d, %d), (%d, %d)? source count=%d, fraction=%d/%d [%d]\n",
+ __FUNCTION__,
+ box->x1, box->y1, box->x2, box->y2,
+ count, w*h,
+ pixmap->drawable.width * pixmap->drawable.height,
+ pixmap->drawable.width * pixmap->drawable.height / (w*h)));
+
+ return count*w*h >= pixmap->drawable.width * pixmap->drawable.height;
+}
+
+static Bool
+texture_is_cpu(PixmapPtr pixmap, const BoxPtr box)
+{
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+
+ if (priv == NULL)
+ return TRUE;
+
+ if (priv->gpu_only)
+ return FALSE;
+
+ if (priv->gpu_bo == NULL)
+ return TRUE;
+
+ if (!priv->cpu_damage)
+ return FALSE;
+
+ if (sna_damage_contains_box(priv->gpu_damage, box) != PIXMAN_REGION_OUT)
+ return FALSE;
+
+ return sna_damage_contains_box(priv->cpu_damage, box) != PIXMAN_REGION_OUT;
+}
+
+static struct kgem_bo *upload(struct sna *sna,
+ struct sna_composite_channel *channel,
+ PixmapPtr pixmap,
+ int16_t x, int16_t y, int16_t w, int16_t h,
+ BoxPtr box)
+{
+ struct kgem_bo *bo;
+
+ DBG(("%s: origin=(%d, %d), box=(%d, %d), (%d, %d), pixmap=%dx%d\n",
+ __FUNCTION__, x, y, box->x1, box->y1, box->x2, box->y2, pixmap->drawable.width, pixmap->drawable.height));
+
+ bo = kgem_upload_source_image(&sna->kgem,
+ pixmap->devPrivate.ptr,
+ box->x1, box->y1, w, h,
+ pixmap->devKind,
+ pixmap->drawable.bitsPerPixel);
+ if (bo) {
+ channel->offset[0] -= box->x1;
+ channel->offset[1] -= box->y1;
+ channel->scale[0] = 1./w;
+ channel->scale[1] = 1./h;
+ channel->width = w;
+ channel->height = h;
+ }
+
+ return bo;
+}
+
+int
+sna_render_pixmap_bo(struct sna *sna,
+ struct sna_composite_channel *channel,
+ PixmapPtr pixmap,
+ int16_t x, int16_t y,
+ int16_t w, int16_t h,
+ int16_t dst_x, int16_t dst_y)
+{
+ struct kgem_bo *bo = NULL;
+ struct sna_pixmap *priv;
+ BoxRec box;
+
+ DBG(("%s (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w,h));
+
+ /* XXX handle transformed repeat */
+ if (w == 0 || h == 0 || channel->transform) {
+ box.x1 = box.y1 = 0;
+ box.x2 = pixmap->drawable.width;
+ box.y2 = pixmap->drawable.height;
+ } else {
+ box.x1 = x;
+ box.y1 = y;
+ box.x2 = x + w;
+ box.y2 = y + h;
+
+ if (channel->repeat != RepeatNone) {
+ if (box.x1 < 0 ||
+ box.y1 < 0 ||
+ box.x2 > pixmap->drawable.width ||
+ box.y2 > pixmap->drawable.height) {
+ box.x1 = box.y1 = 0;
+ box.x2 = pixmap->drawable.width;
+ box.y2 = pixmap->drawable.height;
+ }
+ } else {
+ if (box.x1 < 0)
+ box.x1 = 0;
+ if (box.y1 < 0)
+ box.y1 = 0;
+ if (box.x2 > pixmap->drawable.width)
+ box.x2 = pixmap->drawable.width;
+ if (box.y2 > pixmap->drawable.height)
+ box.y2 = pixmap->drawable.height;
+ }
+ }
+
+ w = box.x2 - box.x1;
+ h = box.y2 - box.y1;
+ DBG(("%s box=(%d, %d), (%d, %d): (%d, %d)/(%d, %d)\n", __FUNCTION__,
+ box.x1, box.y1, box.x2, box.y2, w, h,
+ pixmap->drawable.width, pixmap->drawable.height));
+ if (w <= 0 || h <= 0) {
+ DBG(("%s: sample extents outside of texture -> clear\n",
+ __FUNCTION__));
+ return 0;
+ }
+
+ channel->height = pixmap->drawable.height;
+ channel->width = pixmap->drawable.width;
+ channel->scale[0] = 1. / pixmap->drawable.width;
+ channel->scale[1] = 1. / pixmap->drawable.height;
+ channel->offset[0] = x - dst_x;
+ channel->offset[1] = y - dst_y;
+
+ DBG(("%s: offset=(%d, %d), size=(%d, %d)\n",
+ __FUNCTION__,
+ channel->offset[0], channel->offset[1],
+ pixmap->drawable.width, pixmap->drawable.height));
+
+ if (texture_is_cpu(pixmap, &box) && !move_to_gpu(pixmap, &box)) {
+ /* If we are using transient data, it is better to copy
+ * to an amalgamated upload buffer so that we don't
+ * stall on releasing the cpu bo immediately upon
+ * completion of the operation.
+ */
+ if (pixmap->usage_hint != CREATE_PIXMAP_USAGE_SCRATCH_HEADER &&
+ w * pixmap->drawable.bitsPerPixel * h > 8*4096) {
+ priv = sna_pixmap_attach(pixmap);
+ bo = pixmap_vmap(&sna->kgem, pixmap);
+ if (bo)
+ bo = kgem_bo_reference(bo);
+ }
+
+ if (bo == NULL) {
+ DBG(("%s: uploading CPU box\n", __FUNCTION__));
+ bo = upload(sna, channel, pixmap, x,y, w,h, &box);
+ }
+ }
+
+ if (bo == NULL) {
+ priv = sna_pixmap_force_to_gpu(pixmap);
+ if (priv)
+ bo = kgem_bo_reference(priv->gpu_bo);
+ else
+ bo = upload(sna, channel, pixmap, x,y, w,h, &box);
+ }
+
+ channel->bo = bo;
+ return bo != NULL;
+}
+
+int
+sna_render_picture_extract(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int16_t x, int16_t y,
+ int16_t w, int16_t h,
+ int16_t dst_x, int16_t dst_y)
+{
+ struct kgem_bo *bo = NULL;
+ PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable);
+ int16_t ox, oy;
+ BoxRec box;
+
+#if NO_EXTRACT
+ return -1;
+#endif
+
+ DBG(("%s (%d, %d)x(%d, %d) [dst=(%d, %d)]\n",
+ __FUNCTION__, x, y, w, h, dst_x, dst_y));
+
+ if (w == 0 || h == 0) {
+ DBG(("%s: fallback -- unknown bounds\n", __FUNCTION__));
+ return -1;
+ }
+
+ ox = box.x1 = x;
+ oy = box.y1 = y;
+ box.x2 = x + w;
+ box.y2 = y + h;
+ if (channel->transform) {
+ pixman_vector_t v;
+
+ pixman_transform_bounds(channel->transform, &box);
+
+ v.vector[0] = ox << 16;
+ v.vector[1] = oy << 16;
+ v.vector[2] = 1 << 16;
+ pixman_transform_point(channel->transform, &v);
+ ox = v.vector[0] / v.vector[2];
+ oy = v.vector[1] / v.vector[2];
+ }
+
+ if (channel->repeat != RepeatNone) {
+ if (box.x1 < 0 ||
+ box.y1 < 0 ||
+ box.x2 > pixmap->drawable.width ||
+ box.y2 > pixmap->drawable.height) {
+ /* XXX tiled repeats? */
+ box.x1 = box.y1 = 0;
+ box.x2 = pixmap->drawable.width;
+ box.y2 = pixmap->drawable.height;
+
+ if (!channel->is_affine) {
+ DBG(("%s: fallback -- repeating project transform too large for texture\n",
+ __FUNCTION__));
+ return -1;
+ }
+ }
+ } else {
+ if (box.x1 < 0)
+ box.x1 = 0;
+ if (box.y1 < 0)
+ box.y1 = 0;
+ if (box.x2 > pixmap->drawable.width)
+ box.x2 = pixmap->drawable.width;
+ if (box.y2 > pixmap->drawable.height)
+ box.y2 = pixmap->drawable.height;
+ }
+
+ w = box.x2 - box.x1;
+ h = box.y2 - box.y1;
+ DBG(("%s box=(%d, %d), (%d, %d): (%d, %d)/(%d, %d)\n", __FUNCTION__,
+ box.x1, box.y1, box.x2, box.y2, w, h,
+ pixmap->drawable.width, pixmap->drawable.height));
+ if (w <= 0 || h <= 0) {
+ DBG(("%s: sample extents outside of texture -> clear\n",
+ __FUNCTION__));
+ return 0;
+ }
+
+ if (w > sna->render.max_3d_size || h > sna->render.max_3d_size) {
+ DBG(("%s: fallback -- sample too large for texture (%d, %d)x(%d, %d)\n",
+ __FUNCTION__, box.x1, box.y1, w, h));
+ return -1;
+ }
+
+ if (texture_is_cpu(pixmap, &box) && !move_to_gpu(pixmap, &box)) {
+ bo = kgem_upload_source_image(&sna->kgem,
+ pixmap->devPrivate.ptr,
+ box.x1, box.y1, w, h,
+ pixmap->devKind,
+ pixmap->drawable.bitsPerPixel);
+ if (!bo) {
+ DBG(("%s: failed to upload source image, using clear\n",
+ __FUNCTION__));
+ return 0;
+ }
+ } else {
+ if (!sna_pixmap_move_to_gpu(pixmap)) {
+ DBG(("%s: falback -- pixmap is not on the GPU\n",
+ __FUNCTION__));
+ return -1;
+ }
+
+ bo = kgem_create_2d(&sna->kgem, w, h,
+ pixmap->drawable.bitsPerPixel,
+ kgem_choose_tiling(&sna->kgem,
+ I915_TILING_X, w, h,
+ pixmap->drawable.bitsPerPixel),
+ 0);
+ if (!bo) {
+ DBG(("%s: failed to create bo, using clear\n",
+ __FUNCTION__));
+ return 0;
+ }
+
+ if (!sna_blt_copy_boxes(sna, GXcopy,
+ sna_pixmap_get_bo(pixmap), 0, 0,
+ bo, -box.x1, -box.y1,
+ pixmap->drawable.bitsPerPixel,
+ &box, 1)) {
+ DBG(("%s: fallback -- unable to copy boxes\n",
+ __FUNCTION__));
+ return -1;
+ }
+ }
+
+ if (ox == x && oy == y) {
+ x = y = 0;
+ } else if (channel->transform) {
+ pixman_vector_t v;
+ pixman_transform_t m;
+
+ v.vector[0] = (ox - box.x1) << 16;
+ v.vector[1] = (oy - box.y1) << 16;
+ v.vector[2] = 1 << 16;
+ pixman_transform_invert(&m, channel->transform);
+ pixman_transform_point(&m, &v);
+ x = v.vector[0] / v.vector[2];
+ y = v.vector[1] / v.vector[2];
+ } else {
+ x = ox - box.x1;
+ y = oy - box.y1;
+ }
+
+ channel->offset[0] = x - dst_x;
+ channel->offset[1] = y - dst_y;
+ channel->scale[0] = 1./w;
+ channel->scale[1] = 1./h;
+ channel->width = w;
+ channel->height = h;
+ channel->bo = bo;
+ return 1;
+}
+
+int
+sna_render_picture_fixup(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int16_t x, int16_t y,
+ int16_t w, int16_t h,
+ int16_t dst_x, int16_t dst_y)
+{
+ pixman_image_t *dst, *src;
+ uint32_t pitch;
+ int dx, dy;
+ void *ptr;
+
+#if NO_FIXUP
+ return -1;
+#endif
+
+ DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
+
+ if (w == 0 || h == 0) {
+ DBG(("%s: fallback - unknown bounds\n", __FUNCTION__));
+ return -1;
+ }
+ if (w > sna->render.max_3d_size || h > sna->render.max_3d_size) {
+ DBG(("%s: fallback - too large (%dx%d)\n", __FUNCTION__, w, h));
+ return -1;
+ }
+
+ if (PICT_FORMAT_RGB(picture->format) == 0) {
+ pitch = ALIGN(w, 4);
+ channel->pict_format = PIXMAN_a8;
+ } else {
+ pitch = sizeof(uint32_t)*w;
+ channel->pict_format = PIXMAN_a8r8g8b8;
+ }
+ if (channel->pict_format != picture->format) {
+ DBG(("%s: converting to %08x (pitch=%d) from %08x\n",
+ __FUNCTION__, channel->pict_format, pitch, picture->format));
+ }
+
+ channel->bo = kgem_create_buffer(&sna->kgem,
+ pitch*h, KGEM_BUFFER_WRITE,
+ &ptr);
+ if (!channel->bo) {
+ DBG(("%s: failed to create upload buffer, using clear\n",
+ __FUNCTION__));
+ return 0;
+ }
+
+ /* XXX Convolution filter? */
+ memset(ptr, 0, pitch*h);
+ channel->bo->pitch = pitch;
+
+ /* Composite in the original format to preserve idiosyncracies */
+ if (picture->format == channel->pict_format)
+ dst = pixman_image_create_bits(picture->format, w, h, ptr, pitch);
+ else
+ dst = pixman_image_create_bits(picture->format, w, h, NULL, 0);
+ if (!dst) {
+ kgem_bo_destroy(&sna->kgem, channel->bo);
+ return 0;
+ }
+
+ if (picture->pDrawable)
+ sna_drawable_move_to_cpu(picture->pDrawable, false);
+
+ src = image_from_pict(picture, FALSE, &dx, &dy);
+ if (src == NULL) {
+ pixman_image_unref(dst);
+ kgem_bo_destroy(&sna->kgem, channel->bo);
+ return 0;
+ }
+
+ DBG(("%s: compositing tmp=(%d+%d, %d+%d)x(%d, %d)\n",
+ __FUNCTION__, x, dx, y, dy, w, h));
+ pixman_image_composite(PictOpSrc, src, NULL, dst,
+ x + dx, y + dy,
+ 0, 0,
+ 0, 0,
+ w, h);
+ free_pixman_pict(picture, src);
+
+ /* Then convert to card format */
+ if (picture->format != channel->pict_format) {
+ DBG(("%s: performing post-conversion %08x->%08x (%d, %d)\n",
+ __FUNCTION__,
+ picture->format, channel->pict_format,
+ w, h));
+
+ src = dst;
+ dst = pixman_image_create_bits(channel->pict_format,
+ w, h, ptr, pitch);
+
+ pixman_image_composite(PictOpSrc, src, NULL, dst,
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ w, h);
+ pixman_image_unref(src);
+ }
+ pixman_image_unref(dst);
+
+ channel->width = w;
+ channel->height = h;
+
+ channel->filter = PictFilterNearest;
+ channel->repeat = RepeatNone;
+ channel->is_affine = TRUE;
+
+ channel->scale[0] = 1./w;
+ channel->scale[1] = 1./h;
+ channel->offset[0] = -dst_x;
+ channel->offset[1] = -dst_y;
+ channel->transform = NULL;
+
+ return 1;
+}
+
+int
+sna_render_picture_convert(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ PixmapPtr pixmap,
+ int16_t x, int16_t y,
+ int16_t w, int16_t h,
+ int16_t dst_x, int16_t dst_y)
+{
+ uint32_t pitch;
+ pixman_image_t *src, *dst;
+ BoxRec box;
+ void *ptr;
+
+#if NO_CONVERT
+ return -1;
+#endif
+
+ box.x1 = x;
+ box.y1 = y;
+ box.x2 = x + w;
+ box.y2 = y + h;
+
+ if (channel->transform) {
+ DBG(("%s: has transform, uploading whole surface\n",
+ __FUNCTION__));
+ box.x1 = box.y1 = 0;
+ box.x2 = pixmap->drawable.width;
+ box.y2 = pixmap->drawable.height;
+ }
+
+ if (box.x1 < 0)
+ box.x1 = 0;
+ if (box.y1 < 0)
+ box.y1 = 0;
+ if (box.x2 > pixmap->drawable.width)
+ box.x2 = pixmap->drawable.width;
+ if (box.y2 > pixmap->drawable.height)
+ box.y2 = pixmap->drawable.height;
+
+ w = box.x2 - box.x1;
+ h = box.y2 - box.y1;
+
+ DBG(("%s: convert (%d, %d)x(%d, %d), source size %dx%d\n",
+ __FUNCTION__, box.x1, box.y1, w, h,
+ pixmap->drawable.width,
+ pixmap->drawable.height));
+
+ sna_pixmap_move_to_cpu(pixmap, false);
+
+ src = pixman_image_create_bits(picture->format,
+ pixmap->drawable.width,
+ pixmap->drawable.height,
+ pixmap->devPrivate.ptr,
+ pixmap->devKind);
+ if (!src)
+ return 0;
+
+ if (PICT_FORMAT_RGB(picture->format) == 0) {
+ pitch = ALIGN(w, 4);
+ channel->pict_format = PIXMAN_a8;
+ DBG(("%s: converting to a8 (pitch=%d) from %08x\n",
+ __FUNCTION__, pitch, picture->format));
+ } else {
+ pitch = sizeof(uint32_t)*w;
+ channel->pict_format = PIXMAN_a8r8g8b8;
+ DBG(("%s: converting to a8r8g8b8 (pitch=%d) from %08x\n",
+ __FUNCTION__, pitch, picture->format));
+ }
+
+ channel->bo = kgem_create_buffer(&sna->kgem,
+ pitch*h, KGEM_BUFFER_WRITE,
+ &ptr);
+ if (!channel->bo) {
+ pixman_image_unref(src);
+ return 0;
+ }
+
+ channel->bo->pitch = pitch;
+ dst = pixman_image_create_bits(channel->pict_format, w, h, ptr, pitch);
+ if (!dst) {
+ kgem_bo_destroy(&sna->kgem, channel->bo);
+ pixman_image_unref(src);
+ return 0;
+ }
+
+ pixman_image_composite(PictOpSrc, src, NULL, dst,
+ box.x1, box.y1,
+ 0, 0,
+ 0, 0,
+ w, h);
+ pixman_image_unref(dst);
+ pixman_image_unref(src);
+
+ channel->width = w;
+ channel->height = h;
+
+ channel->scale[0] = 1. / w;
+ channel->scale[1] = 1. / h;
+ channel->offset[0] = x - dst_x - box.x1;
+ channel->offset[1] = y - dst_y - box.y1;
+
+ DBG(("%s: offset=(%d, %d), size=(%d, %d) ptr[0]=%08x\n",
+ __FUNCTION__,
+ channel->offset[0], channel->offset[1],
+ channel->width, channel->height,
+ *(uint32_t*)ptr));
+ return 1;
+}
+
+Bool
+sna_render_composite_redirect(struct sna *sna,
+ struct sna_composite_op *op,
+ int x, int y, int width, int height)
+{
+ struct sna_composite_redirect *t = &op->redirect;
+ int bpp = op->dst.pixmap->drawable.bitsPerPixel;
+ struct sna_pixmap *priv;
+ struct kgem_bo *bo;
+
+#if NO_REDIRECT
+ return FALSE;
+#endif
+
+ DBG(("%s: target too large (%dx%d), copying to temporary %dx%d\n",
+ __FUNCTION__, op->dst.width, op->dst.height, width,height));
+
+ if (!width || !height)
+ return FALSE;
+
+ priv = sna_pixmap(op->dst.pixmap);
+ if (priv->gpu_bo == NULL) {
+ DBG(("%s: fallback -- no GPU bo attached\n", __FUNCTION__));
+ return FALSE;
+ }
+
+ if (!sna_pixmap_move_to_gpu(op->dst.pixmap))
+ return FALSE;
+
+ /* We can process the operation in a single pass,
+ * but the target is too large for the 3D pipeline.
+ * Copy into a smaller surface and replace afterwards.
+ */
+ bo = kgem_create_2d(&sna->kgem,
+ width, height, bpp,
+ kgem_choose_tiling(&sna->kgem, I915_TILING_X,
+ width, height, bpp),
+ 0);
+ if (!bo)
+ return FALSE;
+
+ t->box.x1 = x + op->dst.x;
+ t->box.y1 = y + op->dst.y;
+ t->box.x2 = t->box.x1 + width;
+ t->box.y2 = t->box.y1 + height;
+
+ DBG(("%s: original box (%d, %d), (%d, %d)\n",
+ __FUNCTION__, t->box.x1, t->box.y1, t->box.x2, t->box.y2));
+
+ if (!sna_blt_copy_boxes(sna, GXcopy,
+ op->dst.bo, 0, 0,
+ bo, -t->box.x1, -t->box.y1,
+ bpp, &t->box, 1)) {
+ kgem_bo_destroy(&sna->kgem, bo);
+ return FALSE;
+ }
+
+ t->real_bo = priv->gpu_bo;
+ op->dst.bo = bo;
+ op->dst.x = -x;
+ op->dst.y = -y;
+ op->dst.width = width;
+ op->dst.height = height;
+ op->damage = &priv->gpu_damage;
+ return TRUE;
+}
+
+void
+sna_render_composite_redirect_done(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ const struct sna_composite_redirect *t = &op->redirect;
+
+ if (t->real_bo) {
+ DBG(("%s: copying temporary to dst\n", __FUNCTION__));
+
+ sna_blt_copy_boxes(sna, GXcopy,
+ op->dst.bo, -t->box.x1, -t->box.y1,
+ t->real_bo, 0, 0,
+ op->dst.pixmap->drawable.bitsPerPixel,
+ &t->box, 1);
+
+ kgem_bo_destroy(&sna->kgem, op->dst.bo);
+ }
+}
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
new file mode 100644
index 00000000..328eaf78
--- /dev/null
+++ b/src/sna/sna_render.h
@@ -0,0 +1,511 @@
+#ifndef SNA_RENDER_H
+#define SNA_RENDER_H
+
+#define GRADIENT_CACHE_SIZE 16
+
+#define fastcall __attribute__((regparm(3)))
+
+struct sna;
+struct sna_glyph;
+struct sna_video;
+struct sna_video_frame;
+
+struct sna_composite_rectangles {
+ struct sna_coordinate {
+ int16_t x, y;
+ } src, mask, dst;
+ int16_t width, height;
+};
+
+struct sna_composite_op {
+ fastcall void (*blt)(struct sna *sna, const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r);
+ void (*boxes)(struct sna *sna, const struct sna_composite_op *op,
+ const BoxRec *box, int nbox);
+ void (*done)(struct sna *sna, const struct sna_composite_op *op);
+
+ struct sna_damage **damage;
+
+ uint32_t op;
+
+ struct {
+ PixmapPtr pixmap;
+ CARD32 format;
+ struct kgem_bo *bo;
+ int16_t x, y;
+ uint16_t width, height;
+ } dst;
+
+ struct sna_composite_channel {
+ struct kgem_bo *bo;
+ PictTransform *transform;
+ uint16_t width;
+ uint16_t height;
+ uint32_t pict_format;
+ uint32_t card_format;
+ uint32_t filter;
+ uint32_t repeat;
+ uint32_t is_affine : 1;
+ uint32_t is_solid : 1;
+ uint32_t is_opaque : 1;
+ uint32_t alpha_fixup : 1;
+ uint32_t rb_reversed : 1;
+ int16_t offset[2];
+ float scale[2];
+
+ struct gen3_shader_channel {
+ int type;
+ uint32_t mode;
+ uint32_t constants;
+ } gen3;
+ } src, mask;
+ uint32_t is_affine : 1;
+ uint32_t has_component_alpha : 1;
+ uint32_t need_magic_ca_pass : 1;
+ uint32_t rb_reversed : 1;
+
+ int floats_per_vertex;
+ fastcall void (*prim_emit)(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r);
+
+ struct sna_composite_redirect {
+ struct kgem_bo *real_bo;
+ BoxRec box;
+ } redirect;
+
+ union {
+ struct sna_blt_state {
+ PixmapPtr src_pixmap;
+ int16_t sx, sy;
+
+ uint32_t inplace :1;
+ uint32_t overwrites:1;
+
+ int hdr;
+ uint32_t cmd;
+ uint32_t br13;
+ uint32_t pitch[2];
+ uint32_t pixel;
+ struct kgem_bo *bo[3];
+ } blt;
+
+ struct {
+ int nothing;
+ } gen2;
+
+ struct {
+ float constants[8];
+ uint32_t num_constants;
+ } gen3;
+
+ struct {
+ int wm_kernel;
+ int ve_id;
+ } gen4;
+
+ struct {
+ int wm_kernel;
+ int ve_id;
+ } gen5;
+
+ struct {
+ int wm_kernel;
+ int nr_surfaces;
+ int nr_inputs;
+ int ve_id;
+ } gen6;
+
+ void *priv;
+ } u;
+};
+
+struct sna_composite_spans_op {
+ struct sna_composite_op base;
+
+ void (*boxes)(struct sna *sna, const struct sna_composite_spans_op *op,
+ const BoxRec *box, int nbox,
+ float opacity);
+ void (*done)(struct sna *sna, const struct sna_composite_spans_op *op);
+
+ void (*prim_emit)(struct sna *sna,
+ const struct sna_composite_spans_op *op,
+ const BoxRec *box,
+ float opacity);
+};
+
+struct sna_fill_op {
+ struct sna_composite_op base;
+
+ void (*blt)(struct sna *sna, const struct sna_fill_op *op,
+ int16_t x, int16_t y, int16_t w, int16_t h);
+ void (*done)(struct sna *sna, const struct sna_fill_op *op);
+};
+
+struct sna_copy_op {
+ struct sna_composite_op base;
+
+ void (*blt)(struct sna *sna, const struct sna_copy_op *op,
+ int16_t sx, int16_t sy,
+ int16_t w, int16_t h,
+ int16_t dx, int16_t dy);
+ void (*done)(struct sna *sna, const struct sna_copy_op *op);
+};
+
+struct sna_render {
+ int max_3d_size;
+
+ Bool (*composite)(struct sna *sna, uint8_t op,
+ PicturePtr dst, PicturePtr src, PicturePtr mask,
+ int16_t src_x, int16_t src_y,
+ int16_t msk_x, int16_t msk_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t w, int16_t h,
+ struct sna_composite_op *tmp);
+
+ Bool (*composite_spans)(struct sna *sna, uint8_t op,
+ PicturePtr dst, PicturePtr src,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t w, int16_t h,
+ struct sna_composite_spans_op *tmp);
+
+ Bool (*video)(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ RegionPtr dstRegion,
+ short src_w, short src_h,
+ short drw_w, short drw_h,
+ PixmapPtr pixmap);
+
+ Bool (*fill_boxes)(struct sna *sna,
+ CARD8 op,
+ PictFormat format,
+ const xRenderColor *color,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ const BoxRec *box, int n);
+ Bool (*fill)(struct sna *sna, uint8_t alu,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ uint32_t color,
+ struct sna_fill_op *tmp);
+
+ Bool (*copy_boxes)(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+ PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int n);
+ Bool (*copy)(struct sna *sna, uint8_t alu,
+ PixmapPtr src, struct kgem_bo *src_bo,
+ PixmapPtr dst, struct kgem_bo *dst_bo,
+ struct sna_copy_op *op);
+
+ void (*flush)(struct sna *sna);
+ void (*reset)(struct sna *sna);
+ void (*context_switch)(struct sna *sna, int new_mode);
+ void (*fini)(struct sna *sna);
+
+ struct sna_solid_cache {
+ struct kgem_bo *cache_bo;
+ uint32_t color[1024];
+ struct kgem_bo *bo[1024];
+ int last;
+ int size;
+ int dirty;
+ } solid_cache;
+
+ struct {
+ struct sna_gradient_cache {
+ struct kgem_bo *bo;
+ int nstops;
+ PictGradientStop *stops;
+ } cache[GRADIENT_CACHE_SIZE];
+ int size;
+ } gradient_cache;
+
+ struct sna_glyph_cache{
+ PicturePtr picture;
+ struct sna_glyph **glyphs;
+ uint16_t count;
+ uint16_t evict;
+ } glyph[2];
+
+ uint16_t vertex_start;
+ uint16_t vertex_index;
+ uint16_t vertex_used;
+ uint16_t vertex_reloc[8];
+
+ float vertex_data[16*1024];
+ const struct sna_composite_op *op;
+};
+
+struct gen2_render_state {
+ Bool need_invariant;
+ uint16_t vertex_offset;
+};
+
+struct gen3_render_state {
+ uint32_t current_dst;
+ Bool need_invariant;
+ uint32_t tex_count;
+ uint32_t last_drawrect_limit;
+ uint32_t last_target;
+ uint32_t last_blend;
+ uint32_t last_constants;
+ uint32_t last_sampler;
+ uint32_t last_shader;
+ uint32_t last_diffuse;
+ uint32_t last_specular;
+
+ uint16_t vertex_offset;
+ uint16_t last_vertex_offset;
+ uint16_t floats_per_vertex;
+ uint16_t last_floats_per_vertex;
+
+ uint32_t tex_map[4];
+ uint32_t tex_handle[2];
+ uint32_t tex_delta[2];
+};
+
+struct gen4_render_state {
+ struct kgem_bo *general_bo;
+
+ uint32_t vs;
+ uint32_t sf[2];
+ uint32_t wm;
+ uint32_t cc;
+
+ int ve_id;
+ uint32_t drawrect_offset;
+ uint32_t drawrect_limit;
+ uint32_t vb_id;
+ uint16_t vertex_offset;
+ uint16_t last_primitive;
+ int16_t floats_per_vertex;
+ uint16_t surface_table;
+ uint16_t last_pipelined_pointers;
+
+ Bool needs_invariant;
+};
+
+struct gen5_render_state {
+ struct kgem_bo *general_bo;
+
+ uint32_t vs;
+ uint32_t sf[2];
+ uint32_t wm;
+ uint32_t cc;
+
+ int ve_id;
+ uint32_t drawrect_offset;
+ uint32_t drawrect_limit;
+ uint32_t vb_id;
+ uint16_t vertex_offset;
+ uint16_t last_primitive;
+ int16_t floats_per_vertex;
+ uint16_t surface_table;
+ uint16_t last_pipelined_pointers;
+
+ Bool needs_invariant;
+};
+
+enum {
+ GEN6_WM_KERNEL_NOMASK = 0,
+ GEN6_WM_KERNEL_NOMASK_PROJECTIVE,
+
+ GEN6_WM_KERNEL_MASK,
+ GEN6_WM_KERNEL_MASK_PROJECTIVE,
+
+ GEN6_WM_KERNEL_MASKCA,
+ GEN6_WM_KERNEL_MASKCA_PROJECTIVE,
+
+ GEN6_WM_KERNEL_MASKCA_SRCALPHA,
+ GEN6_WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
+
+ GEN6_WM_KERNEL_VIDEO_PLANAR,
+ GEN6_WM_KERNEL_VIDEO_PACKED,
+ GEN6_KERNEL_COUNT
+};
+
+struct gen6_render_state {
+ struct kgem_bo *general_bo;
+
+ uint32_t vs_state;
+ uint32_t sf_state;
+ uint32_t sf_mask_state;
+ uint32_t wm_state;
+ uint32_t wm_kernel[GEN6_KERNEL_COUNT];
+
+ uint32_t cc_vp;
+ uint32_t cc_blend;
+
+ uint32_t drawrect_offset;
+ uint32_t drawrect_limit;
+ uint32_t blend;
+ uint32_t samplers;
+ uint32_t kernel;
+
+ uint16_t num_sf_outputs;
+ uint16_t vb_id;
+ uint16_t ve_id;
+ uint16_t vertex_offset;
+ uint16_t last_primitive;
+ int16_t floats_per_vertex;
+ uint16_t surface_table;
+
+ Bool needs_invariant;
+};
+
+struct sna_static_stream {
+ uint32_t size, used;
+ uint8_t *data;
+};
+
+int sna_static_stream_init(struct sna_static_stream *stream);
+uint32_t sna_static_stream_add(struct sna_static_stream *stream,
+ const void *data, uint32_t len, uint32_t align);
+void *sna_static_stream_map(struct sna_static_stream *stream,
+ uint32_t len, uint32_t align);
+uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream,
+ void *ptr);
+struct kgem_bo *sna_static_stream_fini(struct sna *sna,
+ struct sna_static_stream *stream);
+
+struct kgem_bo *
+sna_render_get_solid(struct sna *sna,
+ uint32_t color);
+
+void
+sna_render_flush_solid(struct sna *sna);
+
+struct kgem_bo *
+sna_render_get_gradient(struct sna *sna,
+ PictGradient *pattern);
+
+uint32_t sna_rgba_for_color(uint32_t color, int depth);
+Bool sna_picture_is_solid(PicturePtr picture, uint32_t *color);
+
+void no_render_init(struct sna *sna);
+
+#ifdef SNA_GEN2
+Bool gen2_render_init(struct sna *sna);
+#else
+static inline Bool gen2_render_init(struct sna *sna) { return FALSE; }
+#endif
+
+#ifdef SNA_GEN3
+Bool gen3_render_init(struct sna *sna);
+#else
+static inline Bool gen3_render_init(struct sna *sna) { return FALSE; }
+#endif
+
+#ifdef SNA_GEN4
+Bool gen4_render_init(struct sna *sna);
+#else
+static inline Bool gen4_render_init(struct sna *sna) { return FALSE; }
+#endif
+
+#ifdef SNA_GEN5
+Bool gen5_render_init(struct sna *sna);
+#else
+static inline Bool gen5_render_init(struct sna *sna) { return FALSE; }
+#endif
+
+#ifdef SNA_GEN6
+Bool gen6_render_init(struct sna *sna);
+#else
+static inline Bool gen6_render_init(struct sna *sna) { return FALSE; }
+#endif
+
+Bool sna_tiling_composite(struct sna *sna,
+ uint32_t op,
+ PicturePtr src,
+ PicturePtr mask,
+ PicturePtr dst,
+ int16_t src_x, int16_t src_y,
+ int16_t mask_x, int16_t mask_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t width, int16_t height,
+ struct sna_composite_op *tmp);
+
+Bool sna_blt_composite(struct sna *sna,
+ uint32_t op,
+ PicturePtr src,
+ PicturePtr dst,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t width, int16_t height,
+ struct sna_composite_op *tmp);
+
+bool sna_blt_fill(struct sna *sna, uint8_t alu,
+ struct kgem_bo *bo,
+ int bpp,
+ uint32_t pixel,
+ struct sna_fill_op *fill);
+
+bool sna_blt_copy(struct sna *sna, uint8_t alu,
+ struct kgem_bo *src,
+ struct kgem_bo *dst,
+ int bpp,
+ struct sna_copy_op *copy);
+
+Bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
+ struct kgem_bo *bo,
+ int bpp,
+ uint32_t pixel,
+ const BoxRec *box, int n);
+
+Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
+ struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+ struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ int bpp,
+ const BoxRec *box, int n);
+
+Bool sna_get_pixel_from_rgba(uint32_t *pixel,
+ uint16_t red,
+ uint16_t green,
+ uint16_t blue,
+ uint16_t alpha,
+ uint32_t format);
+
+int
+sna_render_pixmap_bo(struct sna *sna,
+ struct sna_composite_channel *channel,
+ PixmapPtr pixmap,
+ int16_t x, int16_t y,
+ int16_t w, int16_t h,
+ int16_t dst_x, int16_t dst_y);
+
+int
+sna_render_picture_extract(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int16_t x, int16_t y,
+ int16_t w, int16_t h,
+ int16_t dst_x, int16_t dst_y);
+
+int
+sna_render_picture_fixup(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ int16_t x, int16_t y,
+ int16_t w, int16_t h,
+ int16_t dst_x, int16_t dst_y);
+
+int
+sna_render_picture_convert(struct sna *sna,
+ PicturePtr picture,
+ struct sna_composite_channel *channel,
+ PixmapPtr pixmap,
+ int16_t x, int16_t y,
+ int16_t w, int16_t h,
+ int16_t dst_x, int16_t dst_y);
+
+Bool
+sna_render_composite_redirect(struct sna *sna,
+ struct sna_composite_op *op,
+ int x, int y, int width, int height);
+
+void
+sna_render_composite_redirect_done(struct sna *sna,
+ const struct sna_composite_op *op);
+
+#endif /* SNA_RENDER_H */
diff --git a/src/sna/sna_render_inline.h b/src/sna/sna_render_inline.h
new file mode 100644
index 00000000..33d84d4c
--- /dev/null
+++ b/src/sna/sna_render_inline.h
@@ -0,0 +1,102 @@
+#ifndef SNA_RENDER_INLINE_H
+#define SNA_RENDER_INLINE_H
+
+static inline bool need_tiling(struct sna *sna, int16_t width, int16_t height)
+{
+ /* Is the damage area too large to fit in 3D pipeline,
+ * and so do we need to split the operation up into tiles?
+ */
+ return (width > sna->render.max_3d_size ||
+ height > sna->render.max_3d_size);
+}
+
+static inline bool need_redirect(struct sna *sna, PixmapPtr dst)
+{
+ /* Is the pixmap too large to render to? */
+ return (dst->drawable.width > sna->render.max_3d_size ||
+ dst->drawable.height > sna->render.max_3d_size);
+}
+
+static inline int vertex_space(struct sna *sna)
+{
+ return ARRAY_SIZE(sna->render.vertex_data) - sna->render.vertex_used;
+}
+static inline void vertex_emit(struct sna *sna, float v)
+{
+ sna->render.vertex_data[sna->render.vertex_used++] = v;
+}
+static inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y)
+{
+ int16_t *v = (int16_t *)&sna->render.vertex_data[sna->render.vertex_used++];
+ v[0] = x;
+ v[1] = y;
+}
+
+static inline float pack_2s(int16_t x, int16_t y)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } u;
+ u.p.x = x;
+ u.p.y = y;
+ return u.f;
+}
+
+static inline int batch_space(struct sna *sna)
+{
+ return KGEM_BATCH_SIZE(&sna->kgem) - sna->kgem.nbatch;
+}
+
+static inline void batch_emit(struct sna *sna, uint32_t dword)
+{
+ sna->kgem.batch[sna->kgem.nbatch++] = dword;
+}
+
+static inline void batch_emit_float(struct sna *sna, float f)
+{
+ union {
+ uint32_t dw;
+ float f;
+ } u;
+ u.f = f;
+ batch_emit(sna, u.dw);
+}
+
+static inline Bool
+is_gpu(DrawablePtr drawable)
+{
+ struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable);
+ return priv && priv->gpu_bo;
+}
+
+static inline Bool
+is_cpu(DrawablePtr drawable)
+{
+ struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable);
+ return !priv || priv->gpu_bo == NULL;
+}
+
+static inline Bool
+is_dirty_gpu(struct sna *sna, DrawablePtr drawable)
+{
+ struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable);
+ return priv && priv->gpu_bo && priv->gpu_damage;
+}
+
+static inline Bool
+too_small(struct sna *sna, DrawablePtr drawable)
+{
+ return (drawable->width * drawable->height <= 256) &&
+ !is_dirty_gpu(sna, drawable);
+}
+
+static inline Bool
+picture_is_gpu(PicturePtr picture)
+{
+ if (!picture || !picture->pDrawable)
+ return FALSE;
+ return is_gpu(picture->pDrawable);
+}
+
+#endif /* SNA_RENDER_INLINE_H */
diff --git a/src/sna/sna_stream.c b/src/sna/sna_stream.c
new file mode 100644
index 00000000..d6d817d3
--- /dev/null
+++ b/src/sna/sna_stream.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#include "sna.h"
+#include "sna_render.h"
+
+#if DEBUG_STREAM
+#undef DBG
+#define DBG(x) ErrorF x
+#endif
+
+int sna_static_stream_init(struct sna_static_stream *stream)
+{
+ stream->used = 0;
+ stream->size = 64*1024;
+
+ stream->data = malloc(stream->size);
+ return stream->data != NULL;
+}
+
+static uint32_t sna_static_stream_alloc(struct sna_static_stream *stream,
+ uint32_t len, uint32_t align)
+{
+ uint32_t offset = ALIGN(stream->used, align);
+ uint32_t size = offset + len;
+
+ if (size > stream->size) {
+ do
+ stream->size *= 2;
+ while (stream->size < size);
+
+ stream->data = realloc(stream->data, stream->size);
+ }
+
+ stream->used = size;
+ return offset;
+}
+
+uint32_t sna_static_stream_add(struct sna_static_stream *stream,
+ const void *data, uint32_t len, uint32_t align)
+{
+ uint32_t offset = sna_static_stream_alloc(stream, len, align);
+ memcpy(stream->data + offset, data, len);
+ return offset;
+}
+
+void *sna_static_stream_map(struct sna_static_stream *stream,
+ uint32_t len, uint32_t align)
+{
+ uint32_t offset = sna_static_stream_alloc(stream, len, align);
+ return memset(stream->data + offset, 0, len);
+}
+
+uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream, void *ptr)
+{
+ return (uint8_t *)ptr - stream->data;
+}
+
+struct kgem_bo *sna_static_stream_fini(struct sna *sna,
+ struct sna_static_stream *stream)
+{
+ struct kgem_bo *bo;
+
+ DBG(("uploaded %d bytes of static state\n", stream->used));
+
+ bo = kgem_create_linear(&sna->kgem, stream->used);
+ if (bo && !kgem_bo_write(&sna->kgem, bo, stream->data, stream->used)) {
+ kgem_bo_destroy(&sna->kgem, bo);
+ return NULL;
+ }
+
+ free(stream->data);
+
+ return bo;
+}
diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c
new file mode 100644
index 00000000..f69c3ef2
--- /dev/null
+++ b/src/sna/sna_tiling.c
@@ -0,0 +1,264 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_render.h"
+
+#include <fbpict.h>
+
+#if DEBUG_RENDER
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+struct sna_tile_state {
+ int op;
+ PicturePtr src, mask, dst;
+ PixmapPtr dst_pixmap;
+ uint32_t dst_format;
+ int16_t src_x, src_y;
+ int16_t mask_x, mask_y;
+ int16_t dst_x, dst_y;
+ int16_t width, height;
+
+ int rect_count;
+ int rect_size;
+ struct sna_composite_rectangles rects_embedded[16], *rects;
+};
+
+static void
+sna_tiling_composite_add_rect(struct sna_tile_state *tile,
+ const struct sna_composite_rectangles *r)
+{
+ if (tile->rect_count == tile->rect_size) {
+ struct sna_composite_rectangles *a;
+ int newsize = tile->rect_size * 2;
+
+ if (tile->rects == tile->rects_embedded) {
+ a = malloc (sizeof(struct sna_composite_rectangles) * newsize);
+ if (a == NULL)
+ return;
+
+ memcpy(a,
+ tile->rects_embedded,
+ sizeof(struct sna_composite_rectangles) * tile->rect_count);
+ } else {
+ a = realloc(tile->rects,
+ sizeof(struct sna_composite_rectangles) * newsize);
+ if (a == NULL)
+ return;
+ }
+
+ tile->rects = a;
+ tile->rect_size = newsize;
+ }
+
+ tile->rects[tile->rect_count++] = *r;
+}
+
+fastcall static void
+sna_tiling_composite_blt(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ sna_tiling_composite_add_rect(op->u.priv, r);
+}
+
+static void
+sna_tiling_composite_boxes(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ while (nbox--) {
+ struct sna_composite_rectangles r;
+
+ r.dst.x = box->x1;
+ r.dst.y = box->y1;
+ r.mask = r.src = r.dst;
+
+ r.width = box->x2 - box->x1;
+ r.height = box->y2 - box->y1;
+
+ sna_tiling_composite_add_rect(op->u.priv, &r);
+ box++;
+ }
+}
+
+static void
+sna_tiling_composite_done(struct sna *sna,
+ const struct sna_composite_op *op)
+{
+ struct sna_tile_state *tile = op->u.priv;
+ struct sna_composite_op tmp;
+ int x, y, n, step = sna->render.max_3d_size;
+
+ DBG(("%s -- %dx%d, count=%d\n", __FUNCTION__,
+ tile->width, tile->height, tile->rect_count));
+
+ if (tile->rect_count == 0)
+ goto done;
+
+ for (y = 0; y < tile->height; y += step) {
+ int height = step;
+ if (y + height > tile->height)
+ height = tile->height - y;
+ for (x = 0; x < tile->width; x += step) {
+ int width = step;
+ if (x + width > tile->width)
+ width = tile->width - x;
+ memset(&tmp, 0, sizeof(tmp));
+ if (sna->render.composite(sna, tile->op,
+ tile->src, tile->mask, tile->dst,
+ tile->src_x + x, tile->src_y + y,
+ tile->mask_x + x, tile->mask_y + y,
+ tile->dst_x + x, tile->dst_y + y,
+ width, height,
+ &tmp)) {
+ for (n = 0; n < tile->rect_count; n++) {
+ const struct sna_composite_rectangles *r = &tile->rects[n];
+ int x1, x2, dx, y1, y2, dy;
+
+ x1 = r->dst.x - tile->dst_x, dx = 0;
+ if (x1 < x)
+ dx = x - x1, x1 = x;
+ y1 = r->dst.y - tile->dst_y, dy = 0;
+ if (y1 < y)
+ dy = y - y1, y1 = y;
+
+ x2 = r->dst.x + r->width - tile->dst_x;
+ if (x2 > x + width)
+ x2 = x + width;
+ y2 = r->dst.y + r->height - tile->dst_y;
+ if (y2 > y + height)
+ y2 = y + height;
+
+ if (y2 > y1 && x2 > x1) {
+ struct sna_composite_rectangles rr;
+ rr.src.x = dx + r->src.x;
+ rr.src.y = dy + r->src.y;
+
+ rr.mask.x = dx + r->mask.x;
+ rr.mask.y = dy + r->mask.y;
+
+ rr.dst.x = dx + r->dst.x;
+ rr.dst.y = dy + r->dst.y;
+
+ rr.width = x2 - x1;
+ rr.height = y2 - y1;
+
+ tmp.blt(sna, &tmp, &rr);
+ }
+ }
+ tmp.done(sna, &tmp);
+ } else {
+ DBG(("%s -- falback\n", __FUNCTION__));
+
+ sna_drawable_move_to_cpu(tile->dst->pDrawable, true);
+ if (tile->src->pDrawable)
+ sna_drawable_move_to_cpu(tile->src->pDrawable, false);
+ if (tile->mask && tile->mask->pDrawable)
+ sna_drawable_move_to_cpu(tile->mask->pDrawable, false);
+
+ fbComposite(tile->op,
+ tile->src, tile->mask, tile->dst,
+ tile->src_x + x, tile->src_y + y,
+ tile->mask_x + x, tile->mask_y + y,
+ tile->dst_x + x, tile->dst_y + y,
+ width, height);
+ }
+ }
+ }
+
+done:
+ if (tile->rects != tile->rects_embedded)
+ free(tile->rects);
+ free(tile);
+}
+
+static inline int split(int x, int y)
+{
+ int n = x / y + 1;
+ return (x + n - 1) / n;
+}
+
+Bool
+sna_tiling_composite(struct sna *sna,
+ uint32_t op,
+ PicturePtr src,
+ PicturePtr mask,
+ PicturePtr dst,
+ int16_t src_x, int16_t src_y,
+ int16_t mask_x, int16_t mask_y,
+ int16_t dst_x, int16_t dst_y,
+ int16_t width, int16_t height,
+ struct sna_composite_op *tmp)
+{
+ struct sna_tile_state *tile;
+ struct sna_pixmap *priv;
+
+ DBG(("%s size=(%d, %d), tile=%d\n",
+ __FUNCTION__, width, height, sna->render.max_3d_size));
+
+ priv = sna_pixmap(get_drawable_pixmap(dst->pDrawable));
+ if (priv == NULL || priv->gpu_bo == NULL)
+ return FALSE;
+
+ tile = malloc(sizeof(*tile));
+ if (!tile)
+ return FALSE;
+
+ tile->op = op;
+
+ tile->src = src;
+ tile->mask = mask;
+ tile->dst = dst;
+
+ tile->src_x = src_x;
+ tile->src_y = src_y;
+ tile->mask_x = mask_x;
+ tile->mask_y = mask_y;
+ tile->dst_x = dst_x;
+ tile->dst_y = dst_y;
+ tile->width = width;
+ tile->height = height;
+ tile->rects = tile->rects_embedded;
+ tile->rect_count = 0;
+ tile->rect_size = ARRAY_SIZE(tile->rects_embedded);
+
+ tmp->blt = sna_tiling_composite_blt;
+ tmp->boxes = sna_tiling_composite_boxes;
+ tmp->done = sna_tiling_composite_done;
+
+ tmp->u.priv = tile;
+ return TRUE;
+}
diff --git a/src/sna/sna_transform.c b/src/sna/sna_transform.c
new file mode 100644
index 00000000..3cd9b07a
--- /dev/null
+++ b/src/sna/sna_transform.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. All Rights Reserved.
+ * Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org>
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Jesse Barns <jbarnes@virtuousgeek.org>
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "xf86.h"
+#include "sna.h"
+
+/**
+ * Returns whether the provided transform is affine.
+ *
+ * transform may be null.
+ */
+Bool sna_transform_is_affine(const PictTransform *t)
+{
+ if (t == NULL)
+ return TRUE;
+
+ return t->matrix[2][0] == 0 && t->matrix[2][1] == 0;
+}
+
+Bool
+sna_transform_is_translation(const PictTransform *t,
+ pixman_fixed_t *tx,
+ pixman_fixed_t *ty)
+{
+ if (t == NULL) {
+ *tx = *ty = 0;
+ return TRUE;
+ }
+
+ if (t->matrix[0][0] != IntToxFixed(1) ||
+ t->matrix[0][1] != 0 ||
+ t->matrix[1][0] != 0 ||
+ t->matrix[1][1] != IntToxFixed(1) ||
+ t->matrix[2][0] != 0 ||
+ t->matrix[2][1] != 0 ||
+ t->matrix[2][2] != IntToxFixed(1))
+ return FALSE;
+
+ *tx = t->matrix[0][2];
+ *ty = t->matrix[1][2];
+ return TRUE;
+}
+
+Bool
+sna_transform_is_integer_translation(const PictTransform *t, int16_t *tx, int16_t *ty)
+{
+ if (t == NULL) {
+ *tx = *ty = 0;
+ return TRUE;
+ }
+
+ if (t->matrix[0][0] != IntToxFixed(1) ||
+ t->matrix[0][1] != 0 ||
+ t->matrix[1][0] != 0 ||
+ t->matrix[1][1] != IntToxFixed(1) ||
+ t->matrix[2][0] != 0 ||
+ t->matrix[2][1] != 0 ||
+ t->matrix[2][2] != IntToxFixed(1))
+ return FALSE;
+
+ if (pixman_fixed_fraction(t->matrix[0][2]) ||
+ pixman_fixed_fraction(t->matrix[1][2]))
+ return FALSE;
+
+ *tx = pixman_fixed_to_int(t->matrix[0][2]);
+ *ty = pixman_fixed_to_int(t->matrix[1][2]);
+ return TRUE;
+}
+
+/**
+ * Returns the floating-point coordinates transformed by the given transform.
+ */
+void
+sna_get_transformed_coordinates(int x, int y,
+ const PictTransform *transform,
+ float *x_out, float *y_out)
+{
+ if (transform == NULL) {
+ *x_out = x;
+ *y_out = y;
+ } else
+ _sna_get_transformed_coordinates(x, y, transform, x_out, y_out);
+}
+
+/**
+ * Returns the un-normalized floating-point coordinates transformed by the given transform.
+ */
+Bool
+sna_get_transformed_coordinates_3d(int x, int y,
+ const PictTransform *transform,
+ float *x_out, float *y_out, float *w_out)
+{
+ if (transform == NULL) {
+ *x_out = x;
+ *y_out = y;
+ *w_out = 1;
+ } else {
+ int64_t result[3];
+
+ if (!_sna_transform_point(transform, x, y, result))
+ return FALSE;
+
+ *x_out = result[0] / 65536.;
+ *y_out = result[1] / 65536.;
+ *w_out = result[2] / 65536.;
+ }
+
+ return TRUE;
+}
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
new file mode 100644
index 00000000..db9e085b
--- /dev/null
+++ b/src/sna/sna_trapezoids.c
@@ -0,0 +1,2375 @@
+/*
+ * Copyright (c) 2007 David Turner
+ * Copyright (c) 2008 M Joonas Pihlaja
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_render.h"
+#include "sna_render_inline.h"
+
+#include <mipict.h>
+#include <fbpict.h>
+
+#if DEBUG_TRAPEZOIDS
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+#define NO_ACCEL 0
+
+#define unlikely(x) x
+
+#define SAMPLES_X 17
+#define SAMPLES_Y 15
+
+#define FAST_SAMPLES_X_shift 8
+#define FAST_SAMPLES_Y_shift 4
+
+#define FAST_SAMPLES_X (1<<FAST_SAMPLES_X_shift)
+#define FAST_SAMPLES_Y (1<<FAST_SAMPLES_Y_shift)
+
+#if DEBUG_TRAPEZOIDS
+static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function)
+{
+ if (box->x1 < 0 || box->y1 < 0 ||
+ box->x2 > pixmap->drawable.width ||
+ box->y2 > pixmap->drawable.height)
+ {
+ ErrorF("%s: damage box is beyond the pixmap: box=(%d, %d), (%d, %d), pixmap=(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1, box->x2, box->y2,
+ pixmap->drawable.width,
+ pixmap->drawable.height);
+ assert(0);
+ }
+}
+#define assert_pixmap_contains_box(p, b) _assert_pixmap_contains_box(p, b, __FUNCTION__)
+#else
+#define assert_pixmap_contains_box(p, b)
+#endif
+
+static void apply_damage(struct sna_composite_op *op, RegionPtr region)
+{
+ DBG(("%s: damage=%p, region=%d\n",
+ __FUNCTION__, op->damage, REGION_NUM_RECTS(region)));
+
+ if (op->damage == NULL)
+ return;
+
+ RegionTranslate(region, op->dst.x, op->dst.y);
+
+ assert_pixmap_contains_box(op->dst.pixmap, RegionExtents(region));
+ sna_damage_add(op->damage, region);
+}
+
+static void apply_damage_box(struct sna_composite_op *op, const BoxRec *box)
+{
+ BoxRec r;
+
+ if (op->damage == NULL)
+ return;
+
+ r.x1 = box->x1 + op->dst.x;
+ r.x2 = box->x2 + op->dst.x;
+ r.y1 = box->y1 + op->dst.y;
+ r.y2 = box->y2 + op->dst.y;
+
+ assert_pixmap_contains_box(op->dst.pixmap, &r);
+ sna_damage_add_box(op->damage, &r);
+}
+
+typedef int grid_scaled_x_t;
+typedef int grid_scaled_y_t;
+
+#define FAST_SAMPLES_X_TO_INT_FRAC(x, i, f) \
+ _GRID_TO_INT_FRAC_shift(x, i, f, FAST_SAMPLES_X_shift)
+
+#define _GRID_TO_INT_FRAC_shift(t, i, f, b) do { \
+ (f) = (t) & ((1 << (b)) - 1); \
+ (i) = (t) >> (b); \
+} while (0)
+
+/* A grid area is a real in [0,1] scaled by 2*SAMPLES_X*SAMPLES_Y. We want
+ * to be able to represent exactly areas of subpixel trapezoids whose
+ * vertices are given in grid scaled coordinates. The scale factor
+ * comes from needing to accurately represent the area 0.5*dx*dy of a
+ * triangle with base dx and height dy in grid scaled numbers. */
+typedef int grid_area_t;
+#define FAST_SAMPLES_XY (2*FAST_SAMPLES_X*FAST_SAMPLES_Y) /* Unit area on the grid. */
+
+#define AREA_TO_ALPHA(c) ((c) / (float)FAST_SAMPLES_XY)
+
+struct quorem {
+ int32_t quo;
+ int32_t rem;
+};
+
+struct _pool_chunk {
+ size_t size;
+ size_t capacity;
+
+ struct _pool_chunk *prev_chunk;
+ /* Actual data starts here. Well aligned for pointers. */
+};
+
+/* A memory pool. This is supposed to be embedded on the stack or
+ * within some other structure. It may optionally be followed by an
+ * embedded array from which requests are fulfilled until
+ * malloc needs to be called to allocate a first real chunk. */
+struct pool {
+ struct _pool_chunk *current;
+ struct _pool_chunk *first_free;
+
+ /* The default capacity of a chunk. */
+ size_t default_capacity;
+
+ /* Header for the sentinel chunk. Directly following the pool
+ * struct should be some space for embedded elements from which
+ * the sentinel chunk allocates from. */
+ struct _pool_chunk sentinel[1];
+};
+
+/* A polygon edge. */
+struct edge {
+ /* Next in y-bucket or active list. */
+ struct edge *next;
+
+ /* Current x coordinate while the edge is on the active
+ * list. Initialised to the x coordinate of the top of the
+ * edge. The quotient is in grid_scaled_x_t units and the
+ * remainder is mod dy in grid_scaled_y_t units.*/
+ struct quorem x;
+
+ /* Advance of the current x when moving down a subsample line. */
+ struct quorem dxdy;
+
+ /* Advance of the current x when moving down a full pixel
+ * row. Only initialised when the height of the edge is large
+ * enough that there's a chance the edge could be stepped by a
+ * full row's worth of subsample rows at a time. */
+ struct quorem dxdy_full;
+
+ /* The clipped y of the top of the edge. */
+ grid_scaled_y_t ytop;
+
+ /* y2-y1 after orienting the edge downwards. */
+ grid_scaled_y_t dy;
+
+ /* Number of subsample rows remaining to scan convert of this
+ * edge. */
+ grid_scaled_y_t height_left;
+
+ /* Original sign of the edge: +1 for downwards, -1 for upwards
+ * edges. */
+ int dir;
+ int vertical;
+};
+
+/* Number of subsample rows per y-bucket. Must be SAMPLES_Y. */
+#define EDGE_Y_BUCKET_HEIGHT FAST_SAMPLES_Y
+#define EDGE_Y_BUCKET_INDEX(y, ymin) (((y) - (ymin))/EDGE_Y_BUCKET_HEIGHT)
+
+/* A collection of sorted and vertically clipped edges of the polygon.
+ * Edges are moved from the polygon to an active list while scan
+ * converting. */
+struct polygon {
+ /* The vertical clip extents. */
+ grid_scaled_y_t ymin, ymax;
+
+ /* Array of edges all starting in the same bucket. An edge is put
+ * into bucket EDGE_BUCKET_INDEX(edge->ytop, polygon->ymin) when
+ * it is added to the polygon. */
+ struct edge **y_buckets;
+ struct edge *y_buckets_embedded[64];
+
+ struct edge edges_embedded[32];
+ struct edge *edges;
+ int num_edges;
+};
+
+/* A cell records the effect on pixel coverage of polygon edges
+ * passing through a pixel. It contains two accumulators of pixel
+ * coverage.
+ *
+ * Consider the effects of a polygon edge on the coverage of a pixel
+ * it intersects and that of the following one. The coverage of the
+ * following pixel is the height of the edge multiplied by the width
+ * of the pixel, and the coverage of the pixel itself is the area of
+ * the trapezoid formed by the edge and the right side of the pixel.
+ *
+ * +-----------------------+-----------------------+
+ * | | |
+ * | | |
+ * |_______________________|_______________________|
+ * | \...................|.......................|\
+ * | \..................|.......................| |
+ * | \.................|.......................| |
+ * | \....covered.....|.......................| |
+ * | \....area.......|.......................| } covered height
+ * | \..............|.......................| |
+ * |uncovered\.............|.......................| |
+ * | area \............|.......................| |
+ * |___________\...........|.......................|/
+ * | | |
+ * | | |
+ * | | |
+ * +-----------------------+-----------------------+
+ *
+ * Since the coverage of the following pixel will always be a multiple
+ * of the width of the pixel, we can store the height of the covered
+ * area instead. The coverage of the pixel itself is the total
+ * coverage minus the area of the uncovered area to the left of the
+ * edge. As it's faster to compute the uncovered area we only store
+ * that and subtract it from the total coverage later when forming
+ * spans to blit.
+ *
+ * The heights and areas are signed, with left edges of the polygon
+ * having positive sign and right edges having negative sign. When
+ * two edges intersect they swap their left/rightness so their
+ * contribution above and below the intersection point must be
+ * computed separately. */
+struct cell {
+ struct cell *next;
+ int x;
+ grid_area_t uncovered_area;
+ grid_scaled_y_t covered_height;
+};
+
+/* A cell list represents the scan line sparsely as cells ordered by
+ * ascending x. It is geared towards scanning the cells in order
+ * using an internal cursor. */
+struct cell_list {
+ /* Points to the left-most cell in the scan line. */
+ struct cell *head;
+ /* Sentinel node */
+ struct cell tail;
+
+ struct cell **cursor;
+
+ /* Cells in the cell list are owned by the cell list and are
+ * allocated from this pool. */
+ struct {
+ struct pool base[1];
+ struct cell embedded[32];
+ } cell_pool;
+};
+
+/* The active list contains edges in the current scan line ordered by
+ * the x-coordinate of the intercept of the edge and the scan line. */
+struct active_list {
+ /* Leftmost edge on the current scan line. */
+ struct edge *head;
+
+ /* A lower bound on the height of the active edges is used to
+ * estimate how soon some active edge ends. We can't advance the
+ * scan conversion by a full pixel row if an edge ends somewhere
+ * within it. */
+ grid_scaled_y_t min_height;
+};
+
+struct tor {
+ struct polygon polygon[1];
+ struct active_list active[1];
+ struct cell_list coverages[1];
+
+ /* Clip box. */
+ grid_scaled_x_t xmin, xmax;
+ grid_scaled_y_t ymin, ymax;
+};
+
+/* Compute the floored division a/b. Assumes / and % perform symmetric
+ * division. */
+inline static struct quorem
+floored_divrem(int a, int b)
+{
+ struct quorem qr;
+ qr.quo = a/b;
+ qr.rem = a%b;
+ if ((a^b)<0 && qr.rem) {
+ qr.quo -= 1;
+ qr.rem += b;
+ }
+ return qr;
+}
+
+/* Compute the floored division (x*a)/b. Assumes / and % perform symmetric
+ * division. */
+static struct quorem
+floored_muldivrem(int x, int a, int b)
+{
+ struct quorem qr;
+ long long xa = (long long)x*a;
+ qr.quo = xa/b;
+ qr.rem = xa%b;
+ if ((xa>=0) != (b>=0) && qr.rem) {
+ qr.quo -= 1;
+ qr.rem += b;
+ }
+ return qr;
+}
+
+static void
+_pool_chunk_init(
+ struct _pool_chunk *p,
+ struct _pool_chunk *prev_chunk,
+ size_t capacity)
+{
+ p->prev_chunk = prev_chunk;
+ p->size = 0;
+ p->capacity = capacity;
+}
+
+static struct _pool_chunk *
+_pool_chunk_create(struct _pool_chunk *prev_chunk, size_t size)
+{
+ struct _pool_chunk *p;
+ size_t size_with_head = size + sizeof(struct _pool_chunk);
+
+ if (size_with_head < size)
+ return NULL;
+
+ p = malloc(size_with_head);
+ if (p)
+ _pool_chunk_init(p, prev_chunk, size);
+
+ return p;
+}
+
+static void
+pool_init(struct pool *pool,
+ size_t default_capacity,
+ size_t embedded_capacity)
+{
+ pool->current = pool->sentinel;
+ pool->first_free = NULL;
+ pool->default_capacity = default_capacity;
+ _pool_chunk_init(pool->sentinel, NULL, embedded_capacity);
+}
+
+static void
+pool_fini(struct pool *pool)
+{
+ struct _pool_chunk *p = pool->current;
+ do {
+ while (NULL != p) {
+ struct _pool_chunk *prev = p->prev_chunk;
+ if (p != pool->sentinel)
+ free(p);
+ p = prev;
+ }
+ p = pool->first_free;
+ pool->first_free = NULL;
+ } while (NULL != p);
+ pool_init(pool, 0, 0);
+}
+
+/* Satisfy an allocation by first allocating a new large enough chunk
+ * and adding it to the head of the pool's chunk list. This function
+ * is called as a fallback if pool_alloc() couldn't do a quick
+ * allocation from the current chunk in the pool. */
+static void *
+_pool_alloc_from_new_chunk(struct pool *pool, size_t size)
+{
+ struct _pool_chunk *chunk;
+ void *obj;
+ size_t capacity;
+
+ /* If the allocation is smaller than the default chunk size then
+ * try getting a chunk off the free list. Force alloc of a new
+ * chunk for large requests. */
+ capacity = size;
+ chunk = NULL;
+ if (size < pool->default_capacity) {
+ capacity = pool->default_capacity;
+ chunk = pool->first_free;
+ if (chunk) {
+ pool->first_free = chunk->prev_chunk;
+ _pool_chunk_init(chunk, pool->current, chunk->capacity);
+ }
+ }
+
+ if (NULL == chunk) {
+ chunk = _pool_chunk_create (pool->current, capacity);
+ if (unlikely (NULL == chunk))
+ return NULL;
+ }
+ pool->current = chunk;
+
+ obj = ((unsigned char*)chunk + sizeof(*chunk) + chunk->size);
+ chunk->size += size;
+ return obj;
+}
+
+inline static void *
+pool_alloc(struct pool *pool, size_t size)
+{
+ struct _pool_chunk *chunk = pool->current;
+
+ if (size <= chunk->capacity - chunk->size) {
+ void *obj = ((unsigned char*)chunk + sizeof(*chunk) + chunk->size);
+ chunk->size += size;
+ return obj;
+ } else
+ return _pool_alloc_from_new_chunk(pool, size);
+}
+
+static void
+pool_reset(struct pool *pool)
+{
+ /* Transfer all used chunks to the chunk free list. */
+ struct _pool_chunk *chunk = pool->current;
+ if (chunk != pool->sentinel) {
+ while (chunk->prev_chunk != pool->sentinel)
+ chunk = chunk->prev_chunk;
+
+ chunk->prev_chunk = pool->first_free;
+ pool->first_free = pool->current;
+ }
+
+ /* Reset the sentinel as the current chunk. */
+ pool->current = pool->sentinel;
+ pool->sentinel->size = 0;
+}
+
+/* Rewinds the cell list's cursor to the beginning. After rewinding
+ * we're good to cell_list_find() the cell any x coordinate. */
+inline static void
+cell_list_rewind(struct cell_list *cells)
+{
+ cells->cursor = &cells->head;
+}
+
+/* Rewind the cell list if its cursor has been advanced past x. */
+inline static void
+cell_list_maybe_rewind(struct cell_list *cells, int x)
+{
+ if ((*cells->cursor)->x > x)
+ cell_list_rewind(cells);
+}
+
+static void
+cell_list_init(struct cell_list *cells)
+{
+ pool_init(cells->cell_pool.base,
+ 256*sizeof(struct cell),
+ sizeof(cells->cell_pool.embedded));
+ cells->tail.next = NULL;
+ cells->tail.x = INT_MAX;
+ cells->head = &cells->tail;
+ cell_list_rewind(cells);
+}
+
+static void
+cell_list_fini(struct cell_list *cells)
+{
+ pool_fini(cells->cell_pool.base);
+}
+
+inline static void
+cell_list_reset(struct cell_list *cells)
+{
+ cell_list_rewind(cells);
+ cells->head = &cells->tail;
+ pool_reset(cells->cell_pool.base);
+}
+
+static struct cell *
+cell_list_alloc(struct cell_list *cells,
+ struct cell *tail,
+ int x)
+{
+ struct cell *cell;
+
+ cell = pool_alloc(cells->cell_pool.base, sizeof (struct cell));
+ if (unlikely(NULL == cell))
+ abort();
+
+ *cells->cursor = cell;
+ cell->next = tail;
+ cell->x = x;
+ cell->uncovered_area = 0;
+ cell->covered_height = 0;
+ return cell;
+}
+
+/* Find a cell at the given x-coordinate. Returns %NULL if a new cell
+ * needed to be allocated but couldn't be. Cells must be found with
+ * non-decreasing x-coordinate until the cell list is rewound using
+ * cell_list_rewind(). Ownership of the returned cell is retained by
+ * the cell list. */
+inline static struct cell *
+cell_list_find(struct cell_list *cells, int x)
+{
+ struct cell **cursor = cells->cursor;
+ struct cell *cell;
+
+ do {
+ cell = *cursor;
+ if (cell->x >= x)
+ break;
+
+ cursor = &cell->next;
+ } while (1);
+ cells->cursor = cursor;
+
+ if (cell->x == x)
+ return cell;
+
+ return cell_list_alloc(cells, cell, x);
+}
+
+/* Add an unbounded subpixel span covering subpixels >= x to the
+ * coverage cells. */
+static void
+cell_list_add_unbounded_subspan(struct cell_list *cells, grid_scaled_x_t x)
+{
+ struct cell *cell;
+ int ix, fx;
+
+ FAST_SAMPLES_X_TO_INT_FRAC(x, ix, fx);
+
+ DBG(("%s: x=%d (%d+%d)\n", __FUNCTION__, x, ix, fx));
+
+ cell = cell_list_find(cells, ix);
+ cell->uncovered_area += 2*fx;
+ cell->covered_height++;
+}
+
+/* Add a subpixel span covering [x1, x2) to the coverage cells. */
+inline static void
+cell_list_add_subspan(struct cell_list *cells,
+ grid_scaled_x_t x1,
+ grid_scaled_x_t x2)
+{
+ struct cell *cell;
+ int ix1, fx1;
+ int ix2, fx2;
+
+ FAST_SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1);
+ FAST_SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2);
+
+ DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__,
+ x1, ix1, fx1, x2, ix2, fx2));
+
+ cell = cell_list_find(cells, ix1);
+ if (ix1 != ix2) {
+ cell->uncovered_area += 2*fx1;
+ ++cell->covered_height;
+
+ cell = cell_list_find(cells, ix2);
+ cell->uncovered_area -= 2*fx2;
+ --cell->covered_height;
+ } else
+ cell->uncovered_area += 2*(fx1-fx2);
+}
+
+/* Adds the analytical coverage of an edge crossing the current pixel
+ * row to the coverage cells and advances the edge's x position to the
+ * following row.
+ *
+ * This function is only called when we know that during this pixel row:
+ *
+ * 1) The relative order of all edges on the active list doesn't
+ * change. In particular, no edges intersect within this row to pixel
+ * precision.
+ *
+ * 2) No new edges start in this row.
+ *
+ * 3) No existing edges end mid-row.
+ *
+ * This function depends on being called with all edges from the
+ * active list in the order they appear on the list (i.e. with
+ * non-decreasing x-coordinate.) */
+static void
+cell_list_render_edge(struct cell_list *cells, struct edge *edge, int sign)
+{
+ grid_scaled_y_t y1, y2, dy;
+ grid_scaled_x_t fx1, fx2, dx;
+ int ix1, ix2;
+ struct quorem x1 = edge->x;
+ struct quorem x2 = x1;
+
+ if (!edge->vertical) {
+ x2.quo += edge->dxdy_full.quo;
+ x2.rem += edge->dxdy_full.rem;
+ if (x2.rem >= 0) {
+ ++x2.quo;
+ x2.rem -= edge->dy;
+ }
+
+ edge->x = x2;
+ }
+
+ FAST_SAMPLES_X_TO_INT_FRAC(x1.quo, ix1, fx1);
+ FAST_SAMPLES_X_TO_INT_FRAC(x2.quo, ix2, fx2);
+
+ DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__,
+ x1.quo, ix1, fx1, x2.quo, ix2, fx2));
+
+ /* Edge is entirely within a column? */
+ if (ix1 == ix2) {
+ /* We always know that ix1 is >= the cell list cursor in this
+ * case due to the no-intersections precondition. */
+ struct cell *cell = cell_list_find(cells, ix1);
+ cell->covered_height += sign*FAST_SAMPLES_Y;
+ cell->uncovered_area += sign*(fx1 + fx2)*FAST_SAMPLES_Y;
+ return;
+ }
+
+ /* Orient the edge left-to-right. */
+ dx = x2.quo - x1.quo;
+ if (dx >= 0) {
+ y1 = 0;
+ y2 = FAST_SAMPLES_Y;
+ } else {
+ int tmp;
+ tmp = ix1; ix1 = ix2; ix2 = tmp;
+ tmp = fx1; fx1 = fx2; fx2 = tmp;
+ dx = -dx;
+ sign = -sign;
+ y1 = FAST_SAMPLES_Y;
+ y2 = 0;
+ }
+ dy = y2 - y1;
+
+ /* Add coverage for all pixels [ix1,ix2] on this row crossed
+ * by the edge. */
+ {
+ struct quorem y = floored_divrem((FAST_SAMPLES_X - fx1)*dy, dx);
+ struct cell *cell;
+
+ /* When rendering a previous edge on the active list we may
+ * advance the cell list cursor past the leftmost pixel of the
+ * current edge even though the two edges don't intersect.
+ * e.g. consider two edges going down and rightwards:
+ *
+ * --\_+---\_+-----+-----+----
+ * \_ \_ | |
+ * | \_ | \_ | |
+ * | \_| \_| |
+ * | \_ \_ |
+ * ----+-----+-\---+-\---+----
+ *
+ * The left edge touches cells past the starting cell of the
+ * right edge. Fortunately such cases are rare.
+ *
+ * The rewinding is never necessary if the current edge stays
+ * within a single column because we've checked before calling
+ * this function that the active list order won't change. */
+ cell_list_maybe_rewind(cells, ix1);
+
+ cell = cell_list_find(cells, ix1);
+ cell->uncovered_area += sign*y.quo*(FAST_SAMPLES_X + fx1);
+ cell->covered_height += sign*y.quo;
+ y.quo += y1;
+
+ cell = cell_list_find(cells, ++ix1);
+ if (ix1 < ix2) {
+ struct quorem dydx_full = floored_divrem(FAST_SAMPLES_X*dy, dx);
+ do {
+ grid_scaled_y_t y_skip = dydx_full.quo;
+ y.rem += dydx_full.rem;
+ if (y.rem >= dx) {
+ ++y_skip;
+ y.rem -= dx;
+ }
+
+ y.quo += y_skip;
+
+ y_skip *= sign;
+ cell->uncovered_area += y_skip*FAST_SAMPLES_X;
+ cell->covered_height += y_skip;
+
+ cell = cell_list_find(cells, ++ix1);
+ } while (ix1 != ix2);
+ }
+ cell->uncovered_area += sign*(y2 - y.quo)*fx2;
+ cell->covered_height += sign*(y2 - y.quo);
+ }
+}
+
+
+static void
+polygon_fini(struct polygon *polygon)
+{
+ if (polygon->y_buckets != polygon->y_buckets_embedded)
+ free(polygon->y_buckets);
+
+ if (polygon->edges != polygon->edges_embedded)
+ free(polygon->edges);
+}
+
+static int
+polygon_init(struct polygon *polygon,
+ int num_edges,
+ grid_scaled_y_t ymin,
+ grid_scaled_y_t ymax)
+{
+ unsigned h = ymax - ymin;
+ unsigned num_buckets = EDGE_Y_BUCKET_INDEX(ymax+EDGE_Y_BUCKET_HEIGHT-1,
+ ymin);
+
+ if (unlikely(h > 0x7FFFFFFFU - EDGE_Y_BUCKET_HEIGHT))
+ goto bail_no_mem; /* even if you could, you wouldn't want to. */
+
+ polygon->edges = polygon->edges_embedded;
+ polygon->y_buckets = polygon->y_buckets_embedded;
+
+ polygon->num_edges = 0;
+ if (num_edges > ARRAY_SIZE(polygon->edges_embedded)) {
+ polygon->edges = malloc(sizeof(struct edge)*num_edges);
+ if (unlikely(NULL == polygon->edges))
+ goto bail_no_mem;
+ }
+
+ if (num_buckets > ARRAY_SIZE(polygon->y_buckets_embedded)) {
+ polygon->y_buckets = malloc(num_buckets*sizeof(struct edge *));
+ if (unlikely(NULL == polygon->y_buckets))
+ goto bail_no_mem;
+ }
+ memset(polygon->y_buckets, 0, num_buckets * sizeof(struct edge *));
+
+ polygon->ymin = ymin;
+ polygon->ymax = ymax;
+ return 0;
+
+bail_no_mem:
+ polygon_fini(polygon);
+ return -1;
+}
+
+static void
+_polygon_insert_edge_into_its_y_bucket(struct polygon *polygon, struct edge *e)
+{
+ unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop, polygon->ymin);
+ struct edge **ptail = &polygon->y_buckets[ix];
+ e->next = *ptail;
+ *ptail = e;
+}
+
+inline static void
+polygon_add_edge(struct polygon *polygon,
+ grid_scaled_x_t x1,
+ grid_scaled_x_t x2,
+ grid_scaled_y_t y1,
+ grid_scaled_y_t y2,
+ grid_scaled_y_t top,
+ grid_scaled_y_t bottom,
+ int dir)
+{
+ struct edge *e = &polygon->edges[polygon->num_edges++];
+ grid_scaled_x_t dx = x2 - x1;
+ grid_scaled_y_t dy = y2 - y1;
+ grid_scaled_y_t ytop, ybot;
+ grid_scaled_y_t ymin = polygon->ymin;
+ grid_scaled_y_t ymax = polygon->ymax;
+
+ e->dy = dy;
+ e->dir = dir;
+
+ ytop = top >= ymin ? top : ymin;
+ ybot = bottom <= ymax ? bottom : ymax;
+ e->ytop = ytop;
+ e->height_left = ybot - ytop;
+
+ if (dx == 0) {
+ e->vertical = true;
+ e->x.quo = x1;
+ e->x.rem = 0;
+ e->dxdy.quo = 0;
+ e->dxdy.rem = 0;
+ e->dxdy_full.quo = 0;
+ e->dxdy_full.rem = 0;
+ } else {
+ e->vertical = false;
+ e->dxdy = floored_divrem(dx, dy);
+ if (ytop == y1) {
+ e->x.quo = x1;
+ e->x.rem = 0;
+ } else {
+ e->x = floored_muldivrem(ytop - y1, dx, dy);
+ e->x.quo += x1;
+ }
+
+ if (e->height_left >= FAST_SAMPLES_Y) {
+ e->dxdy_full = floored_muldivrem(FAST_SAMPLES_Y, dx, dy);
+ } else {
+ e->dxdy_full.quo = 0;
+ e->dxdy_full.rem = 0;
+ }
+ }
+
+ _polygon_insert_edge_into_its_y_bucket(polygon, e);
+
+ e->x.rem -= dy; /* Bias the remainder for faster edge advancement. */
+}
+
+static void
+active_list_reset(struct active_list *active)
+{
+ active->head = NULL;
+ active->min_height = 0;
+}
+
+/*
+ * Merge two sorted edge lists.
+ * Input:
+ * - head_a: The head of the first list.
+ * - head_b: The head of the second list; head_b cannot be NULL.
+ * Output:
+ * Returns the head of the merged list.
+ *
+ * Implementation notes:
+ * To make it fast (in particular, to reduce to an insertion sort whenever
+ * one of the two input lists only has a single element) we iterate through
+ * a list until its head becomes greater than the head of the other list,
+ * then we switch their roles. As soon as one of the two lists is empty, we
+ * just attach the other one to the current list and exit.
+ * Writes to memory are only needed to "switch" lists (as it also requires
+ * attaching to the output list the list which we will be iterating next) and
+ * to attach the last non-empty list.
+ */
+static struct edge *
+merge_sorted_edges(struct edge *head_a, struct edge *head_b)
+{
+ struct edge *head, **next;
+
+ head = head_a;
+ next = &head;
+
+ while (1) {
+ while (head_a != NULL && head_a->x.quo <= head_b->x.quo) {
+ next = &head_a->next;
+ head_a = head_a->next;
+ }
+
+ *next = head_b;
+ if (head_a == NULL)
+ return head;
+
+ while (head_b != NULL && head_b->x.quo <= head_a->x.quo) {
+ next = &head_b->next;
+ head_b = head_b->next;
+ }
+
+ *next = head_a;
+ if (head_b == NULL)
+ return head;
+ }
+}
+
+/*
+ * Sort (part of) a list.
+ * Input:
+ * - list: The list to be sorted; list cannot be NULL.
+ * - limit: Recursion limit.
+ * Output:
+ * - head_out: The head of the sorted list containing the first 2^(level+1) elements of the
+ * input list; if the input list has fewer elements, head_out be a sorted list
+ * containing all the elements of the input list.
+ * Returns the head of the list of unprocessed elements (NULL if the sorted list contains
+ * all the elements of the input list).
+ *
+ * Implementation notes:
+ * Special case single element list, unroll/inline the sorting of the first two elements.
+ * Some tail recursion is used since we iterate on the bottom-up solution of the problem
+ * (we start with a small sorted list and keep merging other lists of the same size to it).
+ */
+static struct edge *
+sort_edges(struct edge *list,
+ unsigned int level,
+ struct edge **head_out)
+{
+ struct edge *head_other, *remaining;
+ unsigned int i;
+
+ head_other = list->next;
+
+ /* Single element list -> return */
+ if (head_other == NULL) {
+ *head_out = list;
+ return NULL;
+ }
+
+ /* Unroll the first iteration of the following loop (halves the number of calls to merge_sorted_edges):
+ * - Initialize remaining to be the list containing the elements after the second in the input list.
+ * - Initialize *head_out to be the sorted list containing the first two element.
+ */
+ remaining = head_other->next;
+ if (list->x.quo <= head_other->x.quo) {
+ *head_out = list;
+ /* list->next = head_other; */ /* The input list is already like this. */
+ head_other->next = NULL;
+ } else {
+ *head_out = head_other;
+ head_other->next = list;
+ list->next = NULL;
+ }
+
+ for (i = 0; i < level && remaining; i++) {
+ /* Extract a sorted list of the same size as *head_out
+ * (2^(i+1) elements) from the list of remaining elements. */
+ remaining = sort_edges(remaining, i, &head_other);
+ *head_out = merge_sorted_edges(*head_out, head_other);
+ }
+
+ /* *head_out now contains (at most) 2^(level+1) elements. */
+
+ return remaining;
+}
+
+/* Test if the edges on the active list can be safely advanced by a
+ * full row without intersections or any edges ending. */
+inline static bool
+active_list_can_step_full_row(struct active_list *active)
+{
+ const struct edge *e;
+ int prev_x = INT_MIN;
+
+ /* Recomputes the minimum height of all edges on the active
+ * list if we have been dropping edges. */
+ if (active->min_height <= 0) {
+ int min_height = INT_MAX;
+
+ e = active->head;
+ while (NULL != e) {
+ if (e->height_left < min_height)
+ min_height = e->height_left;
+ e = e->next;
+ }
+
+ active->min_height = min_height;
+ }
+
+ if (active->min_height < FAST_SAMPLES_Y)
+ return false;
+
+ /* Check for intersections as no edges end during the next row. */
+ e = active->head;
+ while (NULL != e) {
+ struct quorem x = e->x;
+
+ if (!e->vertical) {
+ x.quo += e->dxdy_full.quo;
+ x.rem += e->dxdy_full.rem;
+ if (x.rem >= 0)
+ ++x.quo;
+ }
+
+ if (x.quo <= prev_x)
+ return false;
+
+ prev_x = x.quo;
+ e = e->next;
+ }
+
+ return true;
+}
+
+/* Merges edges on the given subpixel row from the polygon to the
+ * active_list. */
+inline static void
+merge_edges(struct active_list *active,
+ grid_scaled_y_t y,
+ struct edge **ptail)
+{
+ /* Split off the edges on the current subrow and merge them into
+ * the active list. */
+ int min_height = active->min_height;
+ struct edge *subrow_edges = NULL;
+
+ do {
+ struct edge *tail = *ptail;
+ if (NULL == tail)
+ break;
+
+ if (y == tail->ytop) {
+ *ptail = tail->next;
+ tail->next = subrow_edges;
+ subrow_edges = tail;
+ if (tail->height_left < min_height)
+ min_height = tail->height_left;
+ } else
+ ptail = &tail->next;
+ } while (1);
+
+ if (subrow_edges) {
+ sort_edges(subrow_edges, UINT_MAX, &subrow_edges);
+ active->head = merge_sorted_edges(active->head, subrow_edges);
+ active->min_height = min_height;
+ }
+}
+
+/* Advance the edges on the active list by one subsample row by
+ * updating their x positions. Drop edges from the list that end. */
+inline static void
+substep_edges(struct active_list *active)
+{
+ struct edge **cursor = &active->head;
+ grid_scaled_x_t prev_x = INT_MIN;
+ struct edge *unsorted = NULL;
+
+ do {
+ struct edge *edge = *cursor;
+ if (NULL == edge)
+ break;
+
+ if (0 != --edge->height_left) {
+ edge->x.quo += edge->dxdy.quo;
+ edge->x.rem += edge->dxdy.rem;
+ if (edge->x.rem >= 0) {
+ ++edge->x.quo;
+ edge->x.rem -= edge->dy;
+ }
+
+ if (edge->x.quo < prev_x) {
+ *cursor = edge->next;
+ edge->next = unsorted;
+ unsorted = edge;
+ } else {
+ prev_x = edge->x.quo;
+ cursor = &edge->next;
+ }
+ } else
+ *cursor = edge->next;
+ } while (1);
+
+ if (unsorted) {
+ sort_edges(unsorted, UINT_MAX, &unsorted);
+ active->head = merge_sorted_edges(active->head, unsorted);
+ }
+}
+
+inline static void
+apply_nonzero_fill_rule_for_subrow(struct active_list *active,
+ struct cell_list *coverages)
+{
+ struct edge *edge = active->head;
+ int winding = 0;
+ int xstart;
+ int xend;
+
+ cell_list_rewind (coverages);
+
+ while (NULL != edge) {
+ xstart = edge->x.quo;
+ winding = edge->dir;
+ while (1) {
+ edge = edge->next;
+ if (NULL == edge)
+ return cell_list_add_unbounded_subspan(coverages, xstart);
+
+ winding += edge->dir;
+ if (0 == winding) {
+ if (edge->next == NULL ||
+ edge->next->x.quo != edge->x.quo)
+ break;
+ }
+ }
+
+ xend = edge->x.quo;
+ cell_list_add_subspan(coverages, xstart, xend);
+
+ edge = edge->next;
+ }
+}
+
+static void
+apply_nonzero_fill_rule_and_step_edges(struct active_list *active,
+ struct cell_list *coverages)
+{
+ struct edge **cursor = &active->head;
+ struct edge *left_edge;
+
+ left_edge = *cursor;
+ while (NULL != left_edge) {
+ struct edge *right_edge;
+ int winding = left_edge->dir;
+
+ left_edge->height_left -= FAST_SAMPLES_Y;
+ if (left_edge->height_left)
+ cursor = &left_edge->next;
+ else
+ *cursor = left_edge->next;
+
+ do {
+ right_edge = *cursor;
+ if (NULL == right_edge)
+ return cell_list_render_edge(coverages,
+ left_edge,
+ +1);
+
+ right_edge->height_left -= FAST_SAMPLES_Y;
+ if (right_edge->height_left)
+ cursor = &right_edge->next;
+ else
+ *cursor = right_edge->next;
+
+ winding += right_edge->dir;
+ if (0 == winding) {
+ if (right_edge->next == NULL ||
+ right_edge->next->x.quo != right_edge->x.quo)
+ break;
+ }
+
+ if (!right_edge->vertical) {
+ right_edge->x.quo += right_edge->dxdy_full.quo;
+ right_edge->x.rem += right_edge->dxdy_full.rem;
+ if (right_edge->x.rem >= 0) {
+ ++right_edge->x.quo;
+ right_edge->x.rem -= right_edge->dy;
+ }
+ }
+ } while (1);
+
+ cell_list_render_edge(coverages, left_edge, +1);
+ cell_list_render_edge(coverages, right_edge, -1);
+
+ left_edge = *cursor;
+ }
+}
+
+static void
+tor_fini(struct tor *converter)
+{
+ polygon_fini(converter->polygon);
+ cell_list_fini(converter->coverages);
+}
+
+static int
+tor_init(struct tor *converter, const BoxRec *box, int num_edges)
+{
+ DBG(("%s: (%d, %d),(%d, %d) x (%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1, box->x2, box->y2,
+ FAST_SAMPLES_X, FAST_SAMPLES_Y));
+
+ converter->xmin = box->x1;
+ converter->ymin = box->y1;
+ converter->xmax = box->x2;
+ converter->ymax = box->y2;
+
+ cell_list_init(converter->coverages);
+ active_list_reset(converter->active);
+ return polygon_init(converter->polygon,
+ num_edges,
+ box->y1 * FAST_SAMPLES_Y,
+ box->y2 * FAST_SAMPLES_Y);
+}
+
+static void
+tor_add_edge(struct tor *converter,
+ int dx, int dy,
+ int top, int bottom,
+ const xLineFixed *edge,
+ int dir)
+{
+ int x1, x2;
+ int y1, y2;
+
+ y1 = dy + (edge->p1.y >> (16 - FAST_SAMPLES_Y_shift));
+ y2 = dy + (edge->p2.y >> (16 - FAST_SAMPLES_Y_shift));
+ if (y1 == y2)
+ return;
+
+ x1 = dx + (edge->p1.x >> (16 - FAST_SAMPLES_X_shift));
+ x2 = dx + (edge->p2.x >> (16 - FAST_SAMPLES_X_shift));
+
+ DBG(("%s: edge=(%d, %d), (%d, %d), top=%d, bottom=%d, dir=%d\n",
+ __FUNCTION__, x1, y1, x2, y2, top, bottom, dir));
+ polygon_add_edge(converter->polygon,
+ x1, x2,
+ y1, y2,
+ top, bottom,
+ dir);
+}
+
+static bool
+active_list_is_vertical(struct active_list *active)
+{
+ struct edge *e;
+
+ for (e = active->head; e != NULL; e = e->next)
+ if (!e->vertical)
+ return false;
+
+ return true;
+}
+
+static void
+step_edges(struct active_list *active, int count)
+{
+ struct edge **cursor = &active->head;
+ struct edge *edge;
+
+ for (edge = *cursor; edge != NULL; edge = *cursor) {
+ edge->height_left -= FAST_SAMPLES_Y * count;
+ if (edge->height_left)
+ cursor = &edge->next;
+ else
+ *cursor = edge->next;
+ }
+}
+
+static void
+tor_blt_span(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage)
+{
+ DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
+
+ op->boxes(sna, op, box, 1, AREA_TO_ALPHA(coverage));
+ apply_damage_box(&op->base, box);
+}
+
+static void
+tor_blt_span_clipped(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage)
+{
+ pixman_region16_t region;
+ float opacity;
+
+ opacity = AREA_TO_ALPHA(coverage);
+ DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, opacity));
+
+ pixman_region_init_rects(&region, box, 1);
+ RegionIntersect(&region, &region, clip);
+ if (REGION_NUM_RECTS(&region)) {
+ op->boxes(sna, op,
+ REGION_RECTS(&region),
+ REGION_NUM_RECTS(&region),
+ opacity);
+ apply_damage(&op->base, &region);
+ }
+ pixman_region_fini(&region);
+}
+
+static void
+tor_blt_span_mono(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage)
+{
+ if (coverage < FAST_SAMPLES_XY/2)
+ return;
+
+ tor_blt_span(sna, op, clip, box, FAST_SAMPLES_XY);
+}
+
+static void
+tor_blt_span_mono_clipped(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage)
+{
+ if (coverage < FAST_SAMPLES_XY/2)
+ return;
+
+ tor_blt_span_clipped(sna, op, clip, box, FAST_SAMPLES_XY);
+}
+
+static void
+tor_blt_span_mono_unbounded(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage)
+{
+ tor_blt_span(sna, op, clip, box,
+ coverage < FAST_SAMPLES_XY/2 ? 0 :FAST_SAMPLES_XY);
+}
+
+static void
+tor_blt_span_mono_unbounded_clipped(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage)
+{
+ tor_blt_span_clipped(sna, op, clip, box,
+ coverage < FAST_SAMPLES_XY/2 ? 0 :FAST_SAMPLES_XY);
+}
+
+static void
+tor_blt(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ void (*span)(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage),
+ struct cell_list *cells,
+ int y, int height,
+ int xmin, int xmax,
+ int unbounded)
+{
+ struct cell *cell = cells->head;
+ BoxRec box;
+ int cover = 0;
+
+ /* Skip cells to the left of the clip region. */
+ while (cell != NULL && cell->x < xmin) {
+ DBG(("%s: skipping cell (%d, %d, %d)\n",
+ __FUNCTION__,
+ cell->x, cell->covered_height, cell->uncovered_area));
+
+ cover += cell->covered_height;
+ cell = cell->next;
+ }
+ cover *= FAST_SAMPLES_X*2;
+
+ box.y1 = y;
+ box.y2 = y + height;
+ box.x1 = xmin;
+
+ /* Form the spans from the coverages and areas. */
+ for (; cell != NULL; cell = cell->next) {
+ int x = cell->x;
+
+ DBG(("%s: cell=(%d, %d, %d), cover=%d, max=%d\n", __FUNCTION__,
+ cell->x, cell->covered_height, cell->uncovered_area,
+ cover, xmax));
+
+ if (x >= xmax)
+ break;
+
+ box.x2 = x;
+ if (box.x2 > box.x1 && (unbounded || cover))
+ span(sna, op, clip, &box, cover);
+ box.x1 = box.x2;
+
+ cover += cell->covered_height*FAST_SAMPLES_X*2;
+
+ if (cell->uncovered_area) {
+ int area = cover - cell->uncovered_area;
+ box.x2 = x + 1;
+ if (unbounded || area)
+ span(sna, op, clip, &box, area);
+ box.x1 = box.x2;
+ }
+ }
+
+ box.x2 = xmax;
+ if (box.x2 > box.x1 && (unbounded || cover))
+ span(sna, op, clip, &box, cover);
+}
+
+static void
+tor_blt_empty(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ void (*span)(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage),
+ int y, int height,
+ int xmin, int xmax)
+{
+ BoxRec box;
+
+ box.x1 = xmin;
+ box.x2 = xmax;
+ box.y1 = y;
+ box.y2 = y + height;
+
+ span(sna, op, clip, &box, 0);
+}
+
+static void
+tor_render(struct sna *sna,
+ struct tor *converter,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ void (*span)(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage),
+ int unbounded)
+{
+ int ymin = converter->ymin;
+ int xmin = converter->xmin;
+ int xmax = converter->xmax;
+ int i, j, h = converter->ymax - ymin;
+ struct polygon *polygon = converter->polygon;
+ struct cell_list *coverages = converter->coverages;
+ struct active_list *active = converter->active;
+
+ DBG(("%s: unbounded=%d\n", __FUNCTION__, unbounded));
+
+ /* Render each pixel row. */
+ for (i = 0; i < h; i = j) {
+ int do_full_step = 0;
+
+ j = i + 1;
+
+ /* Determine if we can ignore this row or use the full pixel
+ * stepper. */
+ if (!polygon->y_buckets[i]) {
+ if (!active->head) {
+ for (; j < h && !polygon->y_buckets[j]; j++)
+ ;
+ DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
+ __FUNCTION__, i, j));
+
+ if (unbounded)
+ tor_blt_empty(sna, op, clip, span, i+ymin, j-i, xmin, xmax);
+ continue;
+ }
+
+ do_full_step = active_list_can_step_full_row(active);
+ }
+
+ DBG(("%s: do_full_step=%d, new edges=%d\n",
+ __FUNCTION__, do_full_step, polygon->y_buckets[i] != NULL));
+ if (do_full_step) {
+ /* Step by a full pixel row's worth. */
+ apply_nonzero_fill_rule_and_step_edges(active,
+ coverages);
+
+ if (active_list_is_vertical(active)) {
+ while (j < h &&
+ polygon->y_buckets[j] == NULL &&
+ active->min_height >= 2*FAST_SAMPLES_Y)
+ {
+ active->min_height -= FAST_SAMPLES_Y;
+ j++;
+ }
+ if (j != i + 1)
+ step_edges(active, j - (i + 1));
+
+ DBG(("%s: vertical edges, full step (%d, %d)\n",
+ __FUNCTION__, i, j));
+ }
+ } else {
+ grid_scaled_y_t y = (i+ymin)*FAST_SAMPLES_Y;
+ grid_scaled_y_t suby;
+
+ /* Subsample this row. */
+ for (suby = 0; suby < FAST_SAMPLES_Y; suby++) {
+ if (polygon->y_buckets[i])
+ merge_edges(active,
+ y + suby,
+ &polygon->y_buckets[i]);
+
+ apply_nonzero_fill_rule_for_subrow(active,
+ coverages);
+ substep_edges(active);
+ }
+ }
+
+ if (coverages->head != &coverages->tail) {
+ tor_blt(sna, op, clip, span, coverages,
+ i+ymin, j-i, xmin, xmax,
+ unbounded);
+ cell_list_reset(coverages);
+ } else if (unbounded)
+ tor_blt_empty(sna, op, clip, span, i+ymin, j-i, xmin, xmax);
+
+ if (!active->head)
+ active->min_height = INT_MAX;
+ else
+ active->min_height -= FAST_SAMPLES_Y;
+ }
+}
+
+static int operator_is_bounded(uint8_t op)
+{
+ switch (op) {
+ case PictOpOver:
+ case PictOpOutReverse:
+ case PictOpAdd:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static void
+trapezoids_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
+ PictFormatPtr maskFormat, INT16 xSrc, INT16 ySrc,
+ int ntrap, xTrapezoid * traps)
+{
+ ScreenPtr screen = dst->pDrawable->pScreen;
+
+ if (maskFormat) {
+ PixmapPtr scratch;
+ PicturePtr mask;
+ INT16 dst_x, dst_y;
+ BoxRec bounds;
+ int width, height, depth;
+ pixman_image_t *image;
+ pixman_format_code_t format;
+ int error;
+
+ dst_x = pixman_fixed_to_int(traps[0].left.p1.x);
+ dst_y = pixman_fixed_to_int(traps[0].left.p1.y);
+
+ miTrapezoidBounds(ntrap, traps, &bounds);
+ if (bounds.y1 >= bounds.y2 || bounds.x1 >= bounds.x2)
+ return;
+
+ DBG(("%s: bounds (%d, %d), (%d, %d)\n",
+ __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2));
+
+ if (!sna_compute_composite_extents(&bounds,
+ src, NULL, dst,
+ xSrc, ySrc,
+ 0, 0,
+ bounds.x1, bounds.y1,
+ bounds.x2 - bounds.x1,
+ bounds.y2 - bounds.y1))
+ return;
+
+ DBG(("%s: extents (%d, %d), (%d, %d)\n",
+ __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2));
+
+ width = bounds.x2 - bounds.x1;
+ height = bounds.y2 - bounds.y1;
+ bounds.x1 -= dst->pDrawable->x;
+ bounds.y1 -= dst->pDrawable->y;
+ depth = maskFormat->depth;
+ format = maskFormat->format | (BitsPerPixel(depth) << 24);
+
+ DBG(("%s: mask (%dx%d) depth=%d, format=%08x\n",
+ __FUNCTION__, width, height, depth, format));
+ scratch = sna_pixmap_create_upload(screen,
+ width, height, depth);
+ if (!scratch)
+ return;
+
+ memset(scratch->devPrivate.ptr, 0, scratch->devKind*height);
+ image = pixman_image_create_bits(format, width, height,
+ scratch->devPrivate.ptr,
+ scratch->devKind);
+ if (image) {
+ for (; ntrap; ntrap--, traps++)
+ pixman_rasterize_trapezoid(image,
+ (pixman_trapezoid_t *)traps,
+ -bounds.x1, -bounds.y1);
+
+ pixman_image_unref(image);
+ }
+
+ mask = CreatePicture(0, &scratch->drawable,
+ PictureMatchFormat(screen, depth, format),
+ 0, 0, serverClient, &error);
+ screen->DestroyPixmap(scratch);
+ if (!mask)
+ return;
+
+ CompositePicture(op, src, mask, dst,
+ xSrc + bounds.x1 - dst_x,
+ ySrc + bounds.y1 - dst_y,
+ 0, 0,
+ bounds.x1, bounds.y1,
+ width, height);
+ FreePicture(mask, 0);
+ } else {
+ if (dst->polyEdge == PolyEdgeSharp)
+ maskFormat = PictureMatchFormat(screen, 1, PICT_a1);
+ else
+ maskFormat = PictureMatchFormat(screen, 8, PICT_a8);
+
+ for (; ntrap; ntrap--, traps++)
+ trapezoids_fallback(op,
+ src, dst, maskFormat,
+ xSrc, ySrc, 1, traps);
+ }
+}
+
+static Bool
+composite_aligned_boxes(CARD8 op,
+ PicturePtr src,
+ PicturePtr dst,
+ PictFormatPtr maskFormat,
+ INT16 src_x, INT16 src_y,
+ int ntrap, xTrapezoid *traps)
+{
+ BoxRec stack_boxes[64], *boxes, extents;
+ pixman_region16_t region, clip;
+ struct sna *sna;
+ struct sna_composite_op tmp;
+ Bool ret = true;
+ int dx, dy, n, num_boxes;
+
+ DBG(("%s\n", __FUNCTION__));
+
+ boxes = stack_boxes;
+ if (ntrap > ARRAY_SIZE(stack_boxes))
+ boxes = malloc(sizeof(BoxRec)*ntrap);
+
+ dx = dst->pDrawable->x;
+ dy = dst->pDrawable->y;
+
+ extents.x1 = extents.y1 = 32767;
+ extents.x2 = extents.y2 = -32767;
+ num_boxes = 0;
+ for (n = 0; n < ntrap; n++) {
+ boxes[num_boxes].x1 = dx + pixman_fixed_to_int(traps[n].left.p1.x + pixman_fixed_1_minus_e/2);
+ boxes[num_boxes].y1 = dy + pixman_fixed_to_int(traps[n].top + pixman_fixed_1_minus_e/2);
+ boxes[num_boxes].x2 = dx + pixman_fixed_to_int(traps[n].right.p2.x + pixman_fixed_1_minus_e/2);
+ boxes[num_boxes].y2 = dy + pixman_fixed_to_int(traps[n].bottom + pixman_fixed_1_minus_e/2);
+
+ if (boxes[num_boxes].x1 >= boxes[num_boxes].x2)
+ continue;
+ if (boxes[num_boxes].y1 >= boxes[num_boxes].y2)
+ continue;
+
+ if (boxes[num_boxes].x1 < extents.x1)
+ extents.x1 = boxes[num_boxes].x1;
+ if (boxes[num_boxes].x2 > extents.x2)
+ extents.x2 = boxes[num_boxes].x2;
+
+ if (boxes[num_boxes].y1 < extents.y1)
+ extents.y1 = boxes[num_boxes].y1;
+ if (boxes[num_boxes].y2 > extents.y2)
+ extents.y2 = boxes[num_boxes].y2;
+
+ num_boxes++;
+ }
+
+ if (num_boxes == 0)
+ return true;
+
+ DBG(("%s: extents (%d, %d), (%d, %d) offset of (%d, %d)\n",
+ __FUNCTION__,
+ extents.x1, extents.y1,
+ extents.x2, extents.y2,
+ extents.x1 - boxes[0].x1,
+ extents.y1 - boxes[0].y1));
+
+ src_x += extents.x1 - boxes[0].x1;
+ src_y += extents.y1 - boxes[0].y1;
+
+ if (!sna_compute_composite_region(&clip,
+ src, NULL, dst,
+ src_x, src_y,
+ 0, 0,
+ extents.x1 - dx, extents.y1 - dy,
+ extents.x2 - extents.x1,
+ extents.y2 - extents.y1)) {
+ DBG(("%s: trapezoids do not intersect drawable clips\n",
+ __FUNCTION__)) ;
+ goto done;
+ }
+
+ memset(&tmp, 0, sizeof(tmp));
+ sna = to_sna_from_drawable(dst->pDrawable);
+ if (!sna->render.composite(sna, op, src, NULL, dst,
+ src_x, src_y,
+ 0, 0,
+ extents.x1, extents.y1,
+ extents.x2 - extents.x1,
+ extents.y2 - extents.y1,
+ &tmp)) {
+ DBG(("%s: composite render op not supported\n",
+ __FUNCTION__));
+ ret = false;
+ goto done;
+ }
+
+ if (maskFormat ||
+ (op == PictOpSrc || op == PictOpClear) ||
+ num_boxes == 1) {
+ pixman_region_init_rects(&region, boxes, num_boxes);
+ RegionIntersect(&region, &region, &clip);
+ if (REGION_NUM_RECTS(&region)) {
+ tmp.boxes(sna, &tmp,
+ REGION_RECTS(&region),
+ REGION_NUM_RECTS(&region));
+ apply_damage(&tmp, &region);
+ }
+ pixman_region_fini(&region);
+ } else {
+ for (n = 0; n < num_boxes; n++) {
+ pixman_region_init_rects(&region, &boxes[n], 1);
+ RegionIntersect(&region, &region, &clip);
+ if (REGION_NUM_RECTS(&region)) {
+ tmp.boxes(sna, &tmp,
+ REGION_RECTS(&region),
+ REGION_NUM_RECTS(&region));
+ apply_damage(&tmp, &region);
+ }
+ pixman_region_fini(&region);
+ }
+ }
+ tmp.done(sna, &tmp);
+
+done:
+ REGION_UNINIT(NULL, &clip);
+ if (boxes != stack_boxes)
+ free(boxes);
+
+ return ret;
+}
+
+static inline int coverage(int samples, pixman_fixed_t f)
+{
+ return (samples * pixman_fixed_frac(f) + pixman_fixed_1/2) / pixman_fixed_1;
+}
+
+static void
+composite_unaligned_box(struct sna *sna,
+ struct sna_composite_spans_op *tmp,
+ const BoxRec *box,
+ float opacity,
+ pixman_region16_t *clip)
+{
+ pixman_region16_t region;
+
+ pixman_region_init_rects(&region, box, 1);
+ RegionIntersect(&region, &region, clip);
+ if (REGION_NUM_RECTS(&region)) {
+ tmp->boxes(sna, tmp,
+ REGION_RECTS(&region),
+ REGION_NUM_RECTS(&region),
+ opacity);
+ apply_damage(&tmp->base, &region);
+ }
+ pixman_region_fini(&region);
+}
+
+static void
+composite_unaligned_trap_row(struct sna *sna,
+ struct sna_composite_spans_op *tmp,
+ xTrapezoid *trap, int dx,
+ int y1, int y2, int covered,
+ pixman_region16_t *clip)
+{
+ BoxRec box;
+ int opacity;
+ int x1, x2;
+
+ if (covered == 0)
+ return;
+
+ if (y2 > clip->extents.y2)
+ y2 = clip->extents.y2;
+ if (y1 < clip->extents.y1)
+ y1 = clip->extents.y1;
+ if (y1 >= y2)
+ return;
+
+ x1 = dx + pixman_fixed_to_int(trap->left.p1.x);
+ x2 = dx + pixman_fixed_to_int(trap->right.p1.x);
+ if (x2 < clip->extents.x1 || x1 > clip->extents.x2)
+ return;
+
+ box.y1 = y1;
+ box.y2 = y2;
+
+ if (x1 == x2) {
+ box.x1 = x1;
+ box.x2 = x2 + 1;
+
+ opacity = covered;
+ opacity *= coverage(SAMPLES_X, trap->right.p1.x) - coverage(SAMPLES_X, trap->left.p1.x);
+
+ if (opacity)
+ composite_unaligned_box(sna, tmp, &box,
+ opacity/255., clip);
+ } else {
+ if (pixman_fixed_frac(trap->left.p1.x)) {
+ box.x1 = x1;
+ box.x2 = x1++;
+
+ opacity = covered;
+ opacity *= SAMPLES_X - coverage(SAMPLES_X, trap->left.p1.x);
+
+ if (opacity)
+ composite_unaligned_box(sna, tmp, &box,
+ opacity/255., clip);
+ }
+
+ if (x2 > x1) {
+ box.x1 = x1;
+ box.x2 = x2;
+
+ composite_unaligned_box(sna, tmp, &box,
+ covered*SAMPLES_X/255., clip);
+ }
+
+ if (pixman_fixed_frac(trap->right.p1.x)) {
+ box.x1 = x2;
+ box.x2 = x2 + 1;
+
+ opacity = covered;
+ opacity *= coverage(SAMPLES_X, trap->right.p1.x);
+
+ if (opacity)
+ composite_unaligned_box(sna, tmp, &box,
+ opacity/255., clip);
+ }
+ }
+}
+
+static void
+composite_unaligned_trap(struct sna *sna,
+ struct sna_composite_spans_op *tmp,
+ xTrapezoid *trap,
+ int dx, int dy,
+ pixman_region16_t *clip)
+{
+ int y1, y2;
+
+ y1 = dy + pixman_fixed_to_int(trap->top);
+ y2 = dy + pixman_fixed_to_int(trap->bottom);
+
+ if (y1 == y2) {
+ composite_unaligned_trap_row(sna, tmp, trap, dx,
+ y1, y1 + 1,
+ coverage(SAMPLES_Y, trap->bottom) - coverage(SAMPLES_Y, trap->top),
+ clip);
+ } else {
+ if (pixman_fixed_frac(trap->top)) {
+ composite_unaligned_trap_row(sna, tmp, trap, dx,
+ y1, y1 + 1,
+ SAMPLES_Y - coverage(SAMPLES_Y, trap->top),
+ clip);
+ y1++;
+ }
+
+ if (y2 > y1)
+ composite_unaligned_trap_row(sna, tmp, trap, dx,
+ y1, y2,
+ SAMPLES_Y,
+ clip);
+
+ if (pixman_fixed_frac(trap->bottom))
+ composite_unaligned_trap_row(sna, tmp, trap, dx,
+ y2, y2 + 1,
+ coverage(SAMPLES_Y, trap->bottom),
+ clip);
+ }
+}
+
+inline static void
+blt_opacity(PixmapPtr scratch,
+ int x1, int x2,
+ int y, int h,
+ uint8_t opacity)
+{
+ uint8_t *ptr;
+
+ if (opacity == 0xff)
+ return;
+
+ if (x1 < 0)
+ x1 = 0;
+ if (x2 > scratch->drawable.width)
+ x2 = scratch->drawable.width;
+ if (x1 >= x2)
+ return;
+
+ x2 -= x1;
+
+ ptr = scratch->devPrivate.ptr;
+ ptr += scratch->devKind * y;
+ ptr += x1;
+ do {
+ if (x2 == 1)
+ *ptr = opacity;
+ else
+ memset(ptr, opacity, x2);
+ ptr += scratch->devKind;
+ } while (--h);
+}
+
+static void
+blt_unaligned_box_row(PixmapPtr scratch,
+ BoxPtr extents,
+ xTrapezoid *trap,
+ int y1, int y2,
+ int covered)
+{
+ int x1, x2;
+
+ if (y2 > scratch->drawable.height)
+ y2 = scratch->drawable.height;
+ if (y1 < 0)
+ y1 = 0;
+ if (y1 >= y2)
+ return;
+
+ y2 -= y1;
+
+ x1 = pixman_fixed_to_int(trap->left.p1.x);
+ x2 = pixman_fixed_to_int(trap->right.p1.x);
+
+ x1 -= extents->x1;
+ x2 -= extents->x1;
+
+ if (x1 == x2) {
+ blt_opacity(scratch,
+ x1, x1+1,
+ y1, y2,
+ covered * (coverage(SAMPLES_X, trap->right.p1.x) - coverage(SAMPLES_X, trap->left.p1.x)));
+ } else {
+ if (pixman_fixed_frac(trap->left.p1.x))
+ blt_opacity(scratch,
+ x1, x1+1,
+ y1, y2,
+ covered * (SAMPLES_X - coverage(SAMPLES_X, trap->left.p1.x)));
+
+ if (x2 > x1 + 1) {
+ blt_opacity(scratch,
+ x1 + 1, x2,
+ y1, y2,
+ covered*SAMPLES_X);
+ }
+
+ if (pixman_fixed_frac(trap->right.p1.x))
+ blt_opacity(scratch,
+ x2, x2 + 1,
+ y1, y2,
+ covered * coverage(SAMPLES_X, trap->right.p1.x));
+ }
+}
+
+static Bool
+composite_unaligned_boxes_fallback(CARD8 op,
+ PicturePtr src,
+ PicturePtr dst,
+ INT16 src_x, INT16 src_y,
+ int ntrap, xTrapezoid *traps)
+{
+ ScreenPtr screen = dst->pDrawable->pScreen;
+ INT16 dst_x = pixman_fixed_to_int(traps[0].left.p1.x);
+ INT16 dst_y = pixman_fixed_to_int(traps[0].left.p1.y);
+ int dx = dst->pDrawable->x;
+ int dy = dst->pDrawable->y;
+ int n;
+
+ for (n = 0; n < ntrap; n++) {
+ xTrapezoid *t = &traps[n];
+ PixmapPtr scratch;
+ PicturePtr mask;
+ BoxRec extents;
+ int error;
+ int y1, y2;
+
+ extents.x1 = pixman_fixed_to_int(t->left.p1.x);
+ extents.x2 = pixman_fixed_to_int(t->right.p1.x + pixman_fixed_1_minus_e);
+ extents.y1 = pixman_fixed_to_int(t->top);
+ extents.y2 = pixman_fixed_to_int(t->bottom + pixman_fixed_1_minus_e);
+
+ if (!sna_compute_composite_extents(&extents,
+ src, NULL, dst,
+ src_x, src_y,
+ 0, 0,
+ extents.x1, extents.y1,
+ extents.x2 - extents.x1,
+ extents.y2 - extents.y1))
+ continue;
+
+ scratch = sna_pixmap_create_upload(screen,
+ extents.x2 - extents.x1,
+ extents.y2 - extents.y1,
+ 8);
+ if (!scratch)
+ continue;
+
+ memset(scratch->devPrivate.ptr, 0xff,
+ scratch->devKind * (extents.y2 - extents.y1));
+
+ extents.x1 -= dx;
+ extents.x2 -= dx;
+ extents.y1 -= dy;
+ extents.y2 -= dy;
+
+ y1 = pixman_fixed_to_int(t->top) - extents.y1;
+ y2 = pixman_fixed_to_int(t->bottom) - extents.y1;
+
+ if (y1 == y2) {
+ blt_unaligned_box_row(scratch, &extents, t, y1, y1 + 1,
+ coverage(SAMPLES_Y, t->bottom) - coverage(SAMPLES_Y, t->top));
+ } else {
+ if (pixman_fixed_frac(t->top))
+ blt_unaligned_box_row(scratch, &extents, t, y1, y1 + 1,
+ SAMPLES_Y - coverage(SAMPLES_Y, t->top));
+
+ if (y2 > y1 + 1)
+ blt_unaligned_box_row(scratch, &extents, t, y1+1, y2,
+ SAMPLES_Y);
+
+ if (pixman_fixed_frac(t->bottom))
+ blt_unaligned_box_row(scratch, &extents, t, y2, y2+1,
+ coverage(SAMPLES_Y, t->bottom));
+ }
+
+ mask = CreatePicture(0, &scratch->drawable,
+ PictureMatchFormat(screen, 8, PICT_a8),
+ 0, 0, serverClient, &error);
+ screen->DestroyPixmap(scratch);
+ if (mask) {
+ CompositePicture(op, src, mask, dst,
+ src_x + extents.x1 - dst_x,
+ src_y + extents.y1 - dst_y,
+ 0, 0,
+ extents.x1, extents.y1,
+ extents.x2 - extents.x1,
+ extents.y2 - extents.y1);
+ FreePicture(mask, 0);
+ }
+ }
+
+ return TRUE;
+}
+
+static Bool
+composite_unaligned_boxes(CARD8 op,
+ PicturePtr src,
+ PicturePtr dst,
+ PictFormatPtr maskFormat,
+ INT16 src_x, INT16 src_y,
+ int ntrap, xTrapezoid *traps)
+{
+ struct sna *sna;
+ BoxRec extents;
+ struct sna_composite_spans_op tmp;
+ pixman_region16_t clip;
+ int dst_x, dst_y;
+ int dx, dy, n;
+
+ DBG(("%s\n", __FUNCTION__));
+
+ /* XXX need a span converter to handle overlapping traps */
+ if (ntrap > 1 && maskFormat)
+ return false;
+
+ sna = to_sna_from_drawable(dst->pDrawable);
+ if (!sna->render.composite_spans)
+ return composite_unaligned_boxes_fallback(op, src, dst, src_x, src_y, ntrap, traps);
+
+ dst_x = extents.x1 = pixman_fixed_to_int(traps[0].left.p1.x);
+ extents.x2 = pixman_fixed_to_int(traps[0].right.p1.x + pixman_fixed_1_minus_e);
+ dst_y = extents.y1 = pixman_fixed_to_int(traps[0].top);
+ extents.y2 = pixman_fixed_to_int(traps[0].bottom + pixman_fixed_1_minus_e);
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d)\n",
+ __FUNCTION__, src_x, src_y, dst_x, dst_y));
+
+ for (n = 1; n < ntrap; n++) {
+ int x1 = pixman_fixed_to_int(traps[n].left.p1.x);
+ int x2 = pixman_fixed_to_int(traps[n].right.p1.x + pixman_fixed_1_minus_e);
+ int y1 = pixman_fixed_to_int(traps[n].top);
+ int y2 = pixman_fixed_to_int(traps[n].bottom + pixman_fixed_1_minus_e);
+
+ if (x1 < extents.x1)
+ extents.x1 = x1;
+ if (x2 > extents.x2)
+ extents.x2 = x2;
+ if (y1 < extents.y1)
+ extents.y1 = y1;
+ if (y2 > extents.y2)
+ extents.y2 = y2;
+ }
+
+ DBG(("%s: extents (%d, %d), (%d, %d)\n", __FUNCTION__,
+ extents.x1, extents.y1, extents.x2, extents.y2));
+
+ if (!sna_compute_composite_region(&clip,
+ src, NULL, dst,
+ src_x + extents.x1 - dst_x,
+ src_y + extents.y1 - dst_y,
+ 0, 0,
+ extents.x1, extents.y1,
+ extents.x2 - extents.x1,
+ extents.y2 - extents.y1)) {
+ DBG(("%s: trapezoids do not intersect drawable clips\n",
+ __FUNCTION__)) ;
+ return true;
+ }
+
+ extents = *RegionExtents(&clip);
+ dx = dst->pDrawable->x;
+ dy = dst->pDrawable->y;
+
+ DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
+ __FUNCTION__,
+ extents.x1, extents.y1,
+ extents.x2, extents.y2,
+ dx, dy,
+ src_x + extents.x1 - dst_x - dx,
+ src_y + extents.y1 - dst_y - dy));
+
+ memset(&tmp, 0, sizeof(tmp));
+ if (!sna->render.composite_spans(sna, op, src, dst,
+ src_x + extents.x1 - dst_x - dx,
+ src_y + extents.y1 - dst_y - dy,
+ extents.x1, extents.y1,
+ extents.x2 - extents.x1,
+ extents.y2 - extents.y1,
+ &tmp)) {
+ DBG(("%s: composite spans render op not supported\n",
+ __FUNCTION__));
+ return false;
+ }
+
+ for (n = 0; n < ntrap; n++)
+ composite_unaligned_trap(sna, &tmp, &traps[n], dx, dy, &clip);
+ tmp.done(sna, &tmp);
+
+ REGION_UNINIT(NULL, &clip);
+ return true;
+}
+
+static bool
+tor_scan_converter(CARD8 op, PicturePtr src, PicturePtr dst,
+ PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+ int ntrap, xTrapezoid *traps)
+{
+ struct sna *sna;
+ struct sna_composite_spans_op tmp;
+ struct tor tor;
+ void (*span)(struct sna *sna,
+ struct sna_composite_spans_op *op,
+ pixman_region16_t *clip,
+ const BoxRec *box,
+ int coverage);
+ BoxRec extents;
+ pixman_region16_t clip;
+ int16_t dst_x, dst_y;
+ int16_t dx, dy;
+ int n;
+
+ /* XXX strict adhernce to the Reneder specification */
+ if (dst->polyMode == PolyModePrecise) {
+ DBG(("%s: fallback -- precise rasterisation requested\n",
+ __FUNCTION__));
+ return false;
+ }
+
+ sna = to_sna_from_drawable(dst->pDrawable);
+ if (!sna->render.composite_spans) {
+ DBG(("%s: fallback -- composite spans not supported\n",
+ __FUNCTION__));
+ return false;
+ }
+
+ dst_x = pixman_fixed_to_int(traps[0].left.p1.x);
+ dst_y = pixman_fixed_to_int(traps[0].left.p1.y);
+
+ miTrapezoidBounds(ntrap, traps, &extents);
+ if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
+ return true;
+
+ DBG(("%s: extents (%d, %d), (%d, %d)\n",
+ __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
+
+ if (!sna_compute_composite_region(&clip,
+ src, NULL, dst,
+ src_x + extents.x1 - dst_x,
+ src_y + extents.y1 - dst_y,
+ 0, 0,
+ extents.x1, extents.y1,
+ extents.x2 - extents.x1,
+ extents.y2 - extents.y1)) {
+ DBG(("%s: trapezoids do not intersect drawable clips\n",
+ __FUNCTION__)) ;
+ return true;
+ }
+
+ extents = *RegionExtents(&clip);
+ dx = dst->pDrawable->x;
+ dy = dst->pDrawable->y;
+
+ DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
+ __FUNCTION__,
+ extents.x1, extents.y1,
+ extents.x2, extents.y2,
+ dx, dy,
+ src_x + extents.x1 - dst_x - dx,
+ src_y + extents.y1 - dst_y - dy));
+
+ memset(&tmp, 0, sizeof(tmp));
+ if (!sna->render.composite_spans(sna, op, src, dst,
+ src_x + extents.x1 - dst_x - dx,
+ src_y + extents.y1 - dst_y - dy,
+ extents.x1, extents.y1,
+ extents.x2 - extents.x1,
+ extents.y2 - extents.y1,
+ &tmp)) {
+ DBG(("%s: fallback -- composite spans render op not supported\n",
+ __FUNCTION__));
+ return false;
+ }
+
+ dx *= FAST_SAMPLES_X;
+ dy *= FAST_SAMPLES_Y;
+ if (tor_init(&tor, &extents, 2*ntrap))
+ goto skip;
+
+ for (n = 0; n < ntrap; n++) {
+ int top, bottom;
+
+ if (!xTrapezoidValid(&traps[n]))
+ continue;
+
+ if (pixman_fixed_to_int(traps[n].top) + dst->pDrawable->y >= extents.y2 ||
+ pixman_fixed_to_int(traps[n].bottom) + dst->pDrawable->y < extents.y1)
+ continue;
+
+ top = dy + (traps[n].top >> (16 - FAST_SAMPLES_Y_shift));
+ bottom = dy + (traps[n].bottom >> (16 - FAST_SAMPLES_Y_shift));
+ if (top >= bottom)
+ continue;
+
+ tor_add_edge(&tor, dx, dy, top, bottom, &traps[n].left, 1);
+ tor_add_edge(&tor, dx, dy, top, bottom, &traps[n].right, -1);
+ }
+
+ if (maskFormat ? maskFormat->depth < 8 : dst->polyEdge == PolyEdgeSharp) {
+ /* XXX An imprecise approximation */
+ if (maskFormat && !operator_is_bounded(op)) {
+ span = tor_blt_span_mono_unbounded;
+ if (REGION_NUM_RECTS(&clip) > 1)
+ span = tor_blt_span_mono_unbounded_clipped;
+ } else {
+ span = tor_blt_span_mono;
+ if (REGION_NUM_RECTS(&clip) > 1)
+ span = tor_blt_span_mono_clipped;
+ }
+ } else {
+ span = tor_blt_span;
+ if (REGION_NUM_RECTS(&clip) > 1)
+ span = tor_blt_span_clipped;
+ }
+
+ tor_render(sna, &tor, &tmp, &clip, span,
+ maskFormat && !operator_is_bounded(op));
+
+skip:
+ tor_fini(&tor);
+ tmp.done(sna, &tmp);
+
+ REGION_UNINIT(NULL, &clip);
+ return true;
+}
+
+void
+sna_composite_trapezoids(CARD8 op,
+ PicturePtr src,
+ PicturePtr dst,
+ PictFormatPtr maskFormat,
+ INT16 xSrc, INT16 ySrc,
+ int ntrap, xTrapezoid *traps)
+{
+ struct sna *sna = to_sna_from_drawable(dst->pDrawable);
+ bool rectilinear = true;
+ bool pixel_aligned = true;
+ int n;
+
+ DBG(("%s(op=%d, src=(%d, %d), mask=%08x, ntrap=%d)\n", __FUNCTION__,
+ op, xSrc, ySrc,
+ maskFormat ? (int)maskFormat->format : 0,
+ ntrap));
+
+ if (ntrap == 0)
+ return;
+
+ if (NO_ACCEL)
+ goto fallback;
+
+ if (sna->kgem.wedged || !sna->have_render) {
+ DBG(("%s: fallback -- wedged=%d, have_render=%d\n",
+ __FUNCTION__, sna->kgem.wedged, sna->have_render));
+ goto fallback;
+ }
+
+ if (dst->alphaMap || src->alphaMap) {
+ DBG(("%s: fallback -- alpha maps=(dst=%p, src=%p)\n",
+ __FUNCTION__, dst->alphaMap, src->alphaMap));
+ goto fallback;
+ }
+
+ if (too_small(sna, dst->pDrawable) && !picture_is_gpu(src)) {
+ DBG(("%s: fallback -- dst is too small, %dx%d\n",
+ __FUNCTION__,
+ dst->pDrawable->width,
+ dst->pDrawable->height));
+ goto fallback;
+ }
+
+ /* scan through for fast rectangles */
+ for (n = 0; n < ntrap && rectilinear; n++) {
+ rectilinear &=
+ traps[n].left.p1.x == traps[n].left.p2.x &&
+ traps[n].right.p1.x == traps[n].right.p2.x;
+ pixel_aligned &=
+ ((traps[n].top | traps[n].bottom |
+ traps[n].left.p1.x | traps[n].left.p2.x |
+ traps[n].right.p1.x | traps[n].right.p2.x)
+ & pixman_fixed_1_minus_e) == 0;
+ }
+
+ if (rectilinear) {
+ pixel_aligned |= maskFormat ?
+ maskFormat->depth == 1 :
+ dst->polyEdge == PolyEdgeSharp;
+ if (pixel_aligned) {
+ if (composite_aligned_boxes(op, src, dst,
+ maskFormat,
+ xSrc, ySrc,
+ ntrap, traps))
+ return;
+ } else {
+ if (composite_unaligned_boxes(op, src, dst,
+ maskFormat,
+ xSrc, ySrc,
+ ntrap, traps))
+ return;
+ }
+ }
+
+ if (tor_scan_converter(op, src, dst, maskFormat,
+ xSrc, ySrc, ntrap, traps))
+ return;
+
+fallback:
+ DBG(("%s: fallback mask=%08x, ntrap=%d\n", __FUNCTION__,
+ maskFormat ? (unsigned)maskFormat->format : 0, ntrap));
+ trapezoids_fallback(op, src, dst, maskFormat,
+ xSrc, ySrc,
+ ntrap, traps);
+}
diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
new file mode 100644
index 00000000..b6cbda22
--- /dev/null
+++ b/src/sna/sna_video.c
@@ -0,0 +1,737 @@
+/***************************************************************************
+
+ Copyright 2000 Intel Corporation. All Rights Reserved.
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sub license, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial portions
+ of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ IN NO EVENT SHALL INTEL, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **************************************************************************/
+
+/*
+ * i830_video.c: i830/i845 Xv driver.
+ *
+ * Copyright © 2002 by Alan Hourihane and David Dawes
+ *
+ * Authors:
+ * Alan Hourihane <alanh@tungstengraphics.com>
+ * David Dawes <dawes@xfree86.org>
+ *
+ * Derived from i810 Xv driver:
+ *
+ * Authors of i810 code:
+ * Jonathan Bian <jonathan.bian@intel.com>
+ * Offscreen Images:
+ * Matt Sottek <matthew.j.sottek@intel.com>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <inttypes.h>
+#include <math.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#include <sys/mman.h>
+
+#include "sna.h"
+#include "sna_reg.h"
+#include "sna_video.h"
+
+#include <xf86xv.h>
+#include <X11/extensions/Xv.h>
+
+#ifdef SNA_XVMC
+#define _SNA_XVMC_SERVER_
+#include "sna_video_hwmc.h"
+#else
+static inline Bool sna_video_xvmc_setup(struct sna *sna,
+ ScreenPtr ptr,
+ XF86VideoAdaptorPtr target)
+{
+ return FALSE;
+}
+#endif
+
+#if DEBUG_VIDEO_TEXTURED
+#undef DBG
+#define DBG(x) ErrorF x
+#endif
+
+void sna_video_free_buffers(struct sna *sna, struct sna_video *video)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(video->old_buf); i++) {
+ if (video->old_buf[i]) {
+ kgem_bo_destroy(&sna->kgem, video->old_buf[i]);
+ video->old_buf[i] = NULL;
+ }
+ }
+
+ if (video->buf) {
+ kgem_bo_destroy(&sna->kgem, video->buf);
+ video->buf = NULL;
+ }
+}
+
+void sna_video_frame_fini(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame)
+{
+ struct kgem_bo *bo;
+
+ if (!frame->bo->reusable) {
+ kgem_bo_destroy(&sna->kgem, frame->bo);
+ return;
+ }
+
+ bo = video->old_buf[1];
+ video->old_buf[1] = video->old_buf[0];
+ video->old_buf[0] = video->buf;
+ video->buf = bo;
+}
+
+Bool
+sna_video_clip_helper(ScrnInfoPtr scrn,
+ struct sna_video *video,
+ xf86CrtcPtr * crtc_ret,
+ BoxPtr dst,
+ short src_x, short src_y,
+ short drw_x, short drw_y,
+ short src_w, short src_h,
+ short drw_w, short drw_h,
+ int id,
+ int *top, int* left, int* npixels, int *nlines,
+ RegionPtr reg, INT32 width, INT32 height)
+{
+ Bool ret;
+ RegionRec crtc_region_local;
+ RegionPtr crtc_region = reg;
+ BoxRec crtc_box;
+ INT32 x1, x2, y1, y2;
+ xf86CrtcPtr crtc;
+
+ x1 = src_x;
+ x2 = src_x + src_w;
+ y1 = src_y;
+ y2 = src_y + src_h;
+
+ dst->x1 = drw_x;
+ dst->x2 = drw_x + drw_w;
+ dst->y1 = drw_y;
+ dst->y2 = drw_y + drw_h;
+
+ /*
+ * For overlay video, compute the relevant CRTC and
+ * clip video to that
+ */
+ crtc = sna_covering_crtc(scrn, dst, video->desired_crtc,
+ &crtc_box);
+
+ /* For textured video, we don't actually want to clip at all. */
+ if (crtc && !video->textured) {
+ RegionInit(&crtc_region_local, &crtc_box, 1);
+ crtc_region = &crtc_region_local;
+ RegionIntersect(crtc_region, crtc_region, reg);
+ }
+ *crtc_ret = crtc;
+
+ ret = xf86XVClipVideoHelper(dst, &x1, &x2, &y1, &y2,
+ crtc_region, width, height);
+ if (crtc_region != reg)
+ RegionUninit(&crtc_region_local);
+
+ *top = y1 >> 16;
+ *left = (x1 >> 16) & ~1;
+ *npixels = ALIGN(((x2 + 0xffff) >> 16), 2) - *left;
+ if (is_planar_fourcc(id)) {
+ *top &= ~1;
+ *nlines = ALIGN(((y2 + 0xffff) >> 16), 2) - *top;
+ } else
+ *nlines = ((y2 + 0xffff) >> 16) - *top;
+
+ return ret;
+}
+
+void
+sna_video_frame_init(struct sna *sna,
+ struct sna_video *video,
+ int id, short width, short height,
+ struct sna_video_frame *frame)
+{
+ int align;
+
+ frame->id = id;
+ frame->width = width;
+ frame->height = height;
+
+ /* Only needs to be DWORD-aligned for textured on i915, but overlay has
+ * stricter requirements.
+ */
+ if (video->textured) {
+ align = 4;
+ } else {
+ if (sna->kgem.gen >= 40)
+ /* Actually the alignment is 64 bytes, too. But the
+ * stride must be at least 512 bytes. Take the easy fix
+ * and align on 512 bytes unconditionally. */
+ align = 512;
+ else if (IS_I830(sna) || IS_845G(sna))
+ /* Harsh, errata on these chipsets limit the stride
+ * to be a multiple of 256 bytes.
+ */
+ align = 256;
+ else
+ align = 64;
+ }
+
+#if SNA_XVMC
+ /* for i915 xvmc, hw requires 1kb aligned surfaces */
+ if (id == FOURCC_XVMC && sna->kgem.gen < 40)
+ align = 1024;
+#endif
+
+
+ /* Determine the desired destination pitch (representing the chroma's pitch,
+ * in the planar case.
+ */
+ if (is_planar_fourcc(id)) {
+ if (video->rotation & (RR_Rotate_90 | RR_Rotate_270)) {
+ frame->pitch[0] = ALIGN((height / 2), align);
+ frame->pitch[1] = ALIGN(height, align);
+ frame->size = frame->pitch[0] * width * 3;
+ } else {
+ frame->pitch[0] = ALIGN((width / 2), align);
+ frame->pitch[1] = ALIGN(width, align);
+ frame->size = frame->pitch[0] * height * 3;
+ }
+ } else {
+ if (video->rotation & (RR_Rotate_90 | RR_Rotate_270)) {
+ frame->pitch[0] = ALIGN((height << 1), align);
+ frame->size = frame->pitch[0] * width;
+ } else {
+ frame->pitch[0] = ALIGN((width << 1), align);
+ frame->size = frame->pitch[0] * height;
+ }
+ frame->pitch[1] = 0;
+ }
+
+ frame->YBufOffset = 0;
+
+ if (video->rotation & (RR_Rotate_90 | RR_Rotate_270)) {
+ frame->UBufOffset =
+ frame->YBufOffset + frame->pitch[1] * width;
+ frame->VBufOffset =
+ frame->UBufOffset + frame->pitch[0] * width / 2;
+ } else {
+ frame->UBufOffset =
+ frame->YBufOffset + frame->pitch[1] * height;
+ frame->VBufOffset =
+ frame->UBufOffset + frame->pitch[0] * height / 2;
+ }
+}
+
+static struct kgem_bo *
+sna_video_buffer(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame)
+{
+ /* Free the current buffer if we're going to have to reallocate */
+ if (video->buf && video->buf->size < frame->size)
+ sna_video_free_buffers(sna, video);
+
+ if (video->buf == NULL)
+ video->buf = kgem_create_linear(&sna->kgem, frame->size);
+
+ return video->buf;
+}
+
+static void sna_memcpy_plane(unsigned char *dst, unsigned char *src,
+ int height, int width,
+ int dstPitch, int srcPitch, Rotation rotation)
+{
+ int i, j = 0;
+ unsigned char *s;
+
+ switch (rotation) {
+ case RR_Rotate_0:
+ /* optimise for the case of no clipping */
+ if (srcPitch == dstPitch && srcPitch == width)
+ memcpy(dst, src, srcPitch * height);
+ else
+ for (i = 0; i < height; i++) {
+ memcpy(dst, src, width);
+ src += srcPitch;
+ dst += dstPitch;
+ }
+ break;
+ case RR_Rotate_90:
+ for (i = 0; i < height; i++) {
+ s = src;
+ for (j = 0; j < width; j++) {
+ dst[(i) + ((width - j - 1) * dstPitch)] = *s++;
+ }
+ src += srcPitch;
+ }
+ break;
+ case RR_Rotate_180:
+ for (i = 0; i < height; i++) {
+ s = src;
+ for (j = 0; j < width; j++) {
+ dst[(width - j - 1) +
+ ((height - i - 1) * dstPitch)] = *s++;
+ }
+ src += srcPitch;
+ }
+ break;
+ case RR_Rotate_270:
+ for (i = 0; i < height; i++) {
+ s = src;
+ for (j = 0; j < width; j++) {
+ dst[(height - i - 1) + (j * dstPitch)] = *s++;
+ }
+ src += srcPitch;
+ }
+ break;
+ }
+}
+
+static void
+sna_copy_planar_data(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ unsigned char *buf,
+ unsigned char *dst,
+ int srcPitch, int srcPitch2,
+ int srcH, int top, int left)
+{
+ unsigned char *src1, *src2, *src3, *dst1, *dst2, *dst3;
+
+ /* Copy Y data */
+ src1 = buf + (top * srcPitch) + left;
+
+ dst1 = dst + frame->YBufOffset;
+
+ sna_memcpy_plane(dst1, src1,
+ frame->height, frame->width,
+ frame->pitch[1], srcPitch,
+ video->rotation);
+
+ /* Copy V data for YV12, or U data for I420 */
+ src2 = buf + /* start of YUV data */
+ (srcH * srcPitch) + /* move over Luma plane */
+ ((top >> 1) * srcPitch2) + /* move down from by top lines */
+ (left >> 1); /* move left by left pixels */
+
+ if (frame->id == FOURCC_I420)
+ dst2 = dst + frame->UBufOffset;
+ else
+ dst2 = dst + frame->VBufOffset;
+
+ sna_memcpy_plane(dst2, src2,
+ frame->height / 2, frame->width / 2,
+ frame->pitch[0], srcPitch2,
+ video->rotation);
+
+ /* Copy U data for YV12, or V data for I420 */
+ src3 = buf + /* start of YUV data */
+ (srcH * srcPitch) + /* move over Luma plane */
+ ((srcH >> 1) * srcPitch2) + /* move over Chroma plane */
+ ((top >> 1) * srcPitch2) + /* move down from by top lines */
+ (left >> 1); /* move left by left pixels */
+ if (frame->id == FOURCC_I420)
+ dst3 = dst + frame->VBufOffset;
+ else
+ dst3 = dst + frame->UBufOffset;
+
+ sna_memcpy_plane(dst3, src3,
+ frame->height / 2, frame->width / 2,
+ frame->pitch[0], srcPitch2,
+ video->rotation);
+}
+
+static void
+sna_copy_packed_data(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ unsigned char *buf,
+ unsigned char *dst,
+ int srcPitch,
+ int top, int left)
+{
+ unsigned char *src;
+ unsigned char *s;
+ int i, j;
+
+ src = buf + (top * srcPitch) + (left << 1);
+
+ dst += frame->YBufOffset;
+
+ switch (video->rotation) {
+ case RR_Rotate_0:
+ frame->width <<= 1;
+ for (i = 0; i < frame->height; i++) {
+ memcpy(dst, src, frame->width);
+ src += srcPitch;
+ dst += frame->pitch[0];
+ }
+ break;
+ case RR_Rotate_90:
+ frame->height <<= 1;
+ for (i = 0; i < frame->height; i += 2) {
+ s = src;
+ for (j = 0; j < frame->width; j++) {
+ /* Copy Y */
+ dst[(i + 0) + ((frame->width - j - 1) * frame->pitch[0])] = *s++;
+ (void)*s++;
+ }
+ src += srcPitch;
+ }
+ frame->height >>= 1;
+ src = buf + (top * srcPitch) + (left << 1);
+ for (i = 0; i < frame->height; i += 2) {
+ for (j = 0; j < frame->width; j += 2) {
+ /* Copy U */
+ dst[((i * 2) + 1) + ((frame->width - j - 1) * frame->pitch[0])] =
+ src[(j * 2) + 1 + (i * srcPitch)];
+ dst[((i * 2) + 1) + ((frame->width - j - 2) * frame->pitch[0])] =
+ src[(j * 2) + 1 + ((i + 1) * srcPitch)];
+ /* Copy V */
+ dst[((i * 2) + 3) + ((frame->width - j - 1) * frame->pitch[0])] =
+ src[(j * 2) + 3 + (i * srcPitch)];
+ dst[((i * 2) + 3) + ((frame->width - j - 2) * frame->pitch[0])] =
+ src[(j * 2) + 3 + ((i + 1) * srcPitch)];
+ }
+ }
+ break;
+ case RR_Rotate_180:
+ frame->width <<= 1;
+ for (i = 0; i < frame->height; i++) {
+ s = src;
+ for (j = 0; j < frame->width; j += 4) {
+ dst[(frame->width - j - 4) + ((frame->height - i - 1) * frame->pitch[0])] =
+ *s++;
+ dst[(frame->width - j - 3) + ((frame->height - i - 1) * frame->pitch[0])] =
+ *s++;
+ dst[(frame->width - j - 2) + ((frame->height - i - 1) * frame->pitch[0])] =
+ *s++;
+ dst[(frame->width - j - 1) + ((frame->height - i - 1) * frame->pitch[0])] =
+ *s++;
+ }
+ src += srcPitch;
+ }
+ break;
+ case RR_Rotate_270:
+ frame->height <<= 1;
+ for (i = 0; i < frame->height; i += 2) {
+ s = src;
+ for (j = 0; j < frame->width; j++) {
+ /* Copy Y */
+ dst[(frame->height - i - 2) + (j * frame->pitch[0])] = *s++;
+ (void)*s++;
+ }
+ src += srcPitch;
+ }
+ frame->height >>= 1;
+ src = buf + (top * srcPitch) + (left << 1);
+ for (i = 0; i < frame->height; i += 2) {
+ for (j = 0; j < frame->width; j += 2) {
+ /* Copy U */
+ dst[(((frame->height - i) * 2) - 3) + (j * frame->pitch[0])] =
+ src[(j * 2) + 1 + (i * srcPitch)];
+ dst[(((frame->height - i) * 2) - 3) +
+ ((j + 1) * frame->pitch[0])] =
+ src[(j * 2) + 1 + ((i + 1) * srcPitch)];
+ /* Copy V */
+ dst[(((frame->height - i) * 2) - 1) + (j * frame->pitch[0])] =
+ src[(j * 2) + 3 + (i * srcPitch)];
+ dst[(((frame->height - i) * 2) - 1) +
+ ((j + 1) * frame->pitch[0])] =
+ src[(j * 2) + 3 + ((i + 1) * srcPitch)];
+ }
+ }
+ break;
+ }
+}
+
+Bool
+sna_video_copy_data(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ int top, int left,
+ int npixels, int nlines,
+ unsigned char *buf)
+{
+ unsigned char *dst;
+
+ frame->bo = sna_video_buffer(sna, video, frame);
+ if (frame->bo == NULL)
+ return FALSE;
+
+ /* copy data */
+ dst = kgem_bo_map(&sna->kgem, frame->bo, PROT_READ | PROT_WRITE);
+ if (dst == NULL)
+ return FALSE;
+
+ if (is_planar_fourcc(frame->id)) {
+ int srcPitch = ALIGN(frame->width, 0x4);
+ int srcPitch2 = ALIGN((frame->width >> 1), 0x4);
+
+ sna_copy_planar_data(sna, video, frame,
+ buf, dst,
+ srcPitch, srcPitch2,
+ nlines, top, left);
+ } else {
+ int srcPitch = frame->width << 1;
+
+ sna_copy_packed_data(sna, video, frame,
+ buf, dst,
+ srcPitch,
+ top, left);
+ }
+
+ munmap(dst, video->buf->size);
+ return TRUE;
+}
+
+static void sna_crtc_box(xf86CrtcPtr crtc, BoxPtr crtc_box)
+{
+ if (crtc->enabled) {
+ crtc_box->x1 = crtc->x;
+ crtc_box->x2 =
+ crtc->x + xf86ModeWidth(&crtc->mode, crtc->rotation);
+ crtc_box->y1 = crtc->y;
+ crtc_box->y2 =
+ crtc->y + xf86ModeHeight(&crtc->mode, crtc->rotation);
+ } else
+ crtc_box->x1 = crtc_box->x2 = crtc_box->y1 = crtc_box->y2 = 0;
+}
+
+static void sna_box_intersect(BoxPtr dest, BoxPtr a, BoxPtr b)
+{
+ dest->x1 = a->x1 > b->x1 ? a->x1 : b->x1;
+ dest->x2 = a->x2 < b->x2 ? a->x2 : b->x2;
+ dest->y1 = a->y1 > b->y1 ? a->y1 : b->y1;
+ dest->y2 = a->y2 < b->y2 ? a->y2 : b->y2;
+ if (dest->x1 >= dest->x2 || dest->y1 >= dest->y2)
+ dest->x1 = dest->x2 = dest->y1 = dest->y2 = 0;
+}
+
+static int sna_box_area(BoxPtr box)
+{
+ return (int)(box->x2 - box->x1) * (int)(box->y2 - box->y1);
+}
+
+/*
+ * Return the crtc covering 'box'. If two crtcs cover a portion of
+ * 'box', then prefer 'desired'. If 'desired' is NULL, then prefer the crtc
+ * with greater coverage
+ */
+
+xf86CrtcPtr
+sna_covering_crtc(ScrnInfoPtr scrn,
+ BoxPtr box, xf86CrtcPtr desired, BoxPtr crtc_box_ret)
+{
+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn);
+ xf86CrtcPtr crtc, best_crtc;
+ int coverage, best_coverage;
+ int c;
+ BoxRec crtc_box, cover_box;
+
+ DBG(("%s for box=(%d, %d), (%d, %d)\n",
+ __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
+
+ best_crtc = NULL;
+ best_coverage = 0;
+ crtc_box_ret->x1 = 0;
+ crtc_box_ret->x2 = 0;
+ crtc_box_ret->y1 = 0;
+ crtc_box_ret->y2 = 0;
+ for (c = 0; c < xf86_config->num_crtc; c++) {
+ crtc = xf86_config->crtc[c];
+
+ /* If the CRTC is off, treat it as not covering */
+ if (!sna_crtc_on(crtc)) {
+ DBG(("%s: crtc %d off, skipping\n", __FUNCTION__, c));
+ continue;
+ }
+
+ sna_crtc_box(crtc, &crtc_box);
+ sna_box_intersect(&cover_box, &crtc_box, box);
+ coverage = sna_box_area(&cover_box);
+ if (coverage && crtc == desired) {
+ DBG(("%s: box is on desired crtc [%p]\n",
+ __FUNCTION__, crtc));
+ *crtc_box_ret = crtc_box;
+ return crtc;
+ }
+ if (coverage > best_coverage) {
+ *crtc_box_ret = crtc_box;
+ best_crtc = crtc;
+ best_coverage = coverage;
+ }
+ }
+ DBG(("%s: best crtc = %p\n", __FUNCTION__, best_crtc));
+ return best_crtc;
+}
+
+bool
+sna_wait_for_scanline(struct sna *sna, PixmapPtr pixmap,
+ xf86CrtcPtr crtc, RegionPtr clip)
+{
+ pixman_box16_t box, crtc_box;
+ int pipe, event;
+ Bool full_height;
+ int y1, y2;
+ uint32_t *b;
+
+ /* XXX no wait for scanline support on SNB? */
+ if (sna->kgem.gen >= 60)
+ return false;
+
+ if (!pixmap_is_scanout(pixmap))
+ return false;
+
+ if (crtc == NULL) {
+ if (clip) {
+ crtc_box = *REGION_EXTENTS(NULL, clip);
+ } else {
+ crtc_box.x1 = 0; /* XXX drawable offsets? */
+ crtc_box.y1 = 0;
+ crtc_box.x2 = pixmap->drawable.width;
+ crtc_box.y2 = pixmap->drawable.height;
+ }
+ crtc = sna_covering_crtc(sna->scrn, &crtc_box, NULL, &crtc_box);
+ }
+
+ if (crtc == NULL)
+ return false;
+
+ if (clip) {
+ box = *REGION_EXTENTS(unused, clip);
+
+ if (crtc->transform_in_use)
+ pixman_f_transform_bounds(&crtc->f_framebuffer_to_crtc, &box);
+
+ /* We could presume the clip was correctly computed... */
+ sna_crtc_box(crtc, &crtc_box);
+ sna_box_intersect(&box, &crtc_box, &box);
+
+ /*
+ * Make sure we don't wait for a scanline that will
+ * never occur
+ */
+ y1 = (crtc_box.y1 <= box.y1) ? box.y1 - crtc_box.y1 : 0;
+ y2 = (box.y2 <= crtc_box.y2) ?
+ box.y2 - crtc_box.y1 : crtc_box.y2 - crtc_box.y1;
+ if (y2 <= y1)
+ return false;
+
+ full_height = FALSE;
+ if (y1 == 0 && y2 == (crtc_box.y2 - crtc_box.y1))
+ full_height = TRUE;
+ } else {
+ sna_crtc_box(crtc, &crtc_box);
+ y1 = crtc_box.y1;
+ y2 = crtc_box.y2;
+ full_height = TRUE;
+ }
+
+ /*
+ * Pre-965 doesn't have SVBLANK, so we need a bit
+ * of extra time for the blitter to start up and
+ * do its job for a full height blit
+ */
+ if (sna_crtc_to_pipe(crtc) == 0) {
+ pipe = MI_LOAD_SCAN_LINES_DISPLAY_PIPEA;
+ event = MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW;
+ if (full_height)
+ event = MI_WAIT_FOR_PIPEA_SVBLANK;
+ } else {
+ pipe = MI_LOAD_SCAN_LINES_DISPLAY_PIPEB;
+ event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW;
+ if (full_height)
+ event = MI_WAIT_FOR_PIPEB_SVBLANK;
+ }
+
+ if (crtc->mode.Flags & V_INTERLACE) {
+ /* DSL count field lines */
+ y1 /= 2;
+ y2 /= 2;
+ }
+
+ b = kgem_get_batch(&sna->kgem, 5);
+ /* The documentation says that the LOAD_SCAN_LINES command
+ * always comes in pairs. Don't ask me why. */
+ b[0] = MI_LOAD_SCAN_LINES_INCL | pipe;
+ b[1] = (y1 << 16) | (y2-1);
+ b[2] = MI_LOAD_SCAN_LINES_INCL | pipe;
+ b[3] = (y1 << 16) | (y2-1);
+ b[4] = MI_WAIT_FOR_EVENT | event;
+ kgem_advance_batch(&sna->kgem, 5);
+ return true;
+}
+
+void sna_video_init(struct sna *sna, ScreenPtr screen)
+{
+ XF86VideoAdaptorPtr *adaptors, *newAdaptors;
+ XF86VideoAdaptorPtr textured, overlay;
+ int num_adaptors;
+ int prefer_overlay =
+ xf86ReturnOptValBool(sna->Options, OPTION_PREFER_OVERLAY, FALSE);
+
+ num_adaptors = xf86XVListGenericAdaptors(sna->scrn, &adaptors);
+ newAdaptors =
+ malloc((num_adaptors + 2) * sizeof(XF86VideoAdaptorPtr *));
+ if (newAdaptors == NULL)
+ return;
+
+ memcpy(newAdaptors, adaptors,
+ num_adaptors * sizeof(XF86VideoAdaptorPtr));
+ adaptors = newAdaptors;
+
+ /* Set up textured video if we can do it at this depth and we are on
+ * supported hardware.
+ */
+ textured = sna_video_textured_setup(sna, screen);
+ overlay = sna_video_overlay_setup(sna, screen);
+
+ if (overlay && prefer_overlay)
+ adaptors[num_adaptors++] = overlay;
+
+ if (textured)
+ adaptors[num_adaptors++] = textured;
+
+ if (overlay && !prefer_overlay)
+ adaptors[num_adaptors++] = overlay;
+
+ if (num_adaptors)
+ xf86XVScreenInit(screen, adaptors, num_adaptors);
+ else
+ xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING,
+ "Disabling Xv because no adaptors could be initialized.\n");
+ if (textured)
+ sna_video_xvmc_setup(sna, screen, textured);
+
+ free(adaptors);
+}
diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h
new file mode 100644
index 00000000..f66a6977
--- /dev/null
+++ b/src/sna/sna_video.h
@@ -0,0 +1,130 @@
+/***************************************************************************
+
+Copyright 2000 Intel Corporation. All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL INTEL, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+#ifndef SNA_VIDEO_H
+#define SNA_VIDEO_H
+
+#include <xf86.h>
+#include <xf86_OSproc.h>
+#include <fourcc.h>
+
+#if defined(XvMCExtension) && defined(ENABLE_XVMC)
+#define SNA_XVMC 1
+#endif
+
+struct sna_video {
+ int brightness;
+ int contrast;
+ int saturation;
+ xf86CrtcPtr desired_crtc;
+
+ RegionRec clip;
+
+ uint32_t gamma0;
+ uint32_t gamma1;
+ uint32_t gamma2;
+ uint32_t gamma3;
+ uint32_t gamma4;
+ uint32_t gamma5;
+
+ int color_key;
+
+ /** YUV data buffers */
+ struct kgem_bo *old_buf[2];
+ struct kgem_bo *buf;
+
+ Bool textured;
+ Rotation rotation;
+
+ int SyncToVblank; /* -1: auto, 0: off, 1: on */
+};
+
+struct sna_video_frame {
+ struct kgem_bo *bo;
+ int id;
+ int width, height;
+ int pitch[2];
+ int size;
+ uint32_t YBufOffset;
+ uint32_t UBufOffset;
+ uint32_t VBufOffset;
+};
+
+void sna_video_init(struct sna *sna, ScreenPtr screen);
+XF86VideoAdaptorPtr sna_video_overlay_setup(struct sna *sna,
+ ScreenPtr screen);
+XF86VideoAdaptorPtr sna_video_textured_setup(struct sna *sna,
+ ScreenPtr screen);
+
+#define FOURCC_XVMC (('C' << 24) + ('M' << 16) + ('V' << 8) + 'X')
+
+static inline int is_planar_fourcc(int id)
+{
+ switch (id) {
+ case FOURCC_YV12:
+ case FOURCC_I420:
+ case FOURCC_XVMC:
+ return 1;
+ case FOURCC_UYVY:
+ case FOURCC_YUY2:
+ default:
+ return 0;
+ }
+}
+
+Bool
+sna_video_clip_helper(ScrnInfoPtr scrn,
+ struct sna_video *adaptor_priv,
+ xf86CrtcPtr * crtc_ret,
+ BoxPtr dst,
+ short src_x, short src_y,
+ short drw_x, short drw_y,
+ short src_w, short src_h,
+ short drw_w, short drw_h,
+ int id,
+ int *top, int* left, int* npixels, int *nlines,
+ RegionPtr reg, INT32 width, INT32 height);
+
+void
+sna_video_frame_init(struct sna *sna,
+ struct sna_video *video,
+ int id, short width, short height,
+ struct sna_video_frame *frame);
+
+Bool
+sna_video_copy_data(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ int top, int left,
+ int npixels, int nlines,
+ unsigned char *buf);
+
+void sna_video_frame_fini(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame);
+void sna_video_free_buffers(struct sna *sna, struct sna_video *video);
+
+#endif /* SNA_VIDEO_H */
diff --git a/src/sna/sna_video_hwmc.c b/src/sna/sna_video_hwmc.c
new file mode 100644
index 00000000..3da7d3a5
--- /dev/null
+++ b/src/sna/sna_video_hwmc.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Zhenyu Wang <zhenyu.z.wang@sna.com>
+ *
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#define _SNA_XVMC_SERVER_
+#include "sna.h"
+#include "sna_video_hwmc.h"
+
+#include <X11/extensions/Xv.h>
+#include <X11/extensions/XvMC.h>
+#include <fourcc.h>
+
+static int create_subpicture(ScrnInfoPtr scrn, XvMCSubpicturePtr subpicture,
+ int *num_priv, CARD32 ** priv)
+{
+ return Success;
+}
+
+static void destroy_subpicture(ScrnInfoPtr scrn, XvMCSubpicturePtr subpicture)
+{
+}
+
+static int create_surface(ScrnInfoPtr scrn, XvMCSurfacePtr surface,
+ int *num_priv, CARD32 ** priv)
+{
+ return Success;
+}
+
+static void destroy_surface(ScrnInfoPtr scrn, XvMCSurfacePtr surface)
+{
+}
+
+static int create_context(ScrnInfoPtr scrn, XvMCContextPtr pContext,
+ int *num_priv, CARD32 **priv)
+{
+ struct sna *sna = to_sna(scrn);
+ struct sna_xvmc_hw_context *contextRec;
+
+ *priv = calloc(1, sizeof(struct sna_xvmc_hw_context));
+ contextRec = (struct sna_xvmc_hw_context *) *priv;
+ if (!contextRec) {
+ *num_priv = 0;
+ return BadAlloc;
+ }
+
+ *num_priv = sizeof(struct sna_xvmc_hw_context) >> 2;
+
+ if (sna->kgem.gen >= 40) {
+ if (sna->kgem.gen >= 45)
+ contextRec->type = XVMC_I965_MPEG2_VLD;
+ else
+ contextRec->type = XVMC_I965_MPEG2_MC;
+ contextRec->i965.is_g4x = sna->kgem.gen == 45;
+ contextRec->i965.is_965_q = IS_965_Q(sna);
+ contextRec->i965.is_igdng = IS_GEN5(sna);
+ } else {
+ contextRec->type = XVMC_I915_MPEG2_MC;
+ contextRec->i915.use_phys_addr = 0;
+ }
+
+ return Success;
+}
+
+static void destroy_context(ScrnInfoPtr scrn, XvMCContextPtr context)
+{
+}
+
+/* i915 hwmc support */
+static XF86MCSurfaceInfoRec i915_YV12_mpg2_surface = {
+ FOURCC_YV12,
+ XVMC_CHROMA_FORMAT_420,
+ 0,
+ 720,
+ 576,
+ 720,
+ 576,
+ XVMC_MPEG_2,
+ /* XVMC_OVERLAID_SURFACE | XVMC_SUBPICTURE_INDEPENDENT_SCALING, */
+ 0,
+ /* &yv12_subpicture_list */
+ NULL,
+};
+
+static XF86MCSurfaceInfoRec i915_YV12_mpg1_surface = {
+ FOURCC_YV12,
+ XVMC_CHROMA_FORMAT_420,
+ 0,
+ 720,
+ 576,
+ 720,
+ 576,
+ XVMC_MPEG_1,
+ /* XVMC_OVERLAID_SURFACE | XVMC_SUBPICTURE_INDEPENDENT_SCALING, */
+ 0,
+ NULL,
+};
+
+static XF86MCSurfaceInfoPtr surface_info_i915[2] = {
+ (XF86MCSurfaceInfoPtr) & i915_YV12_mpg2_surface,
+ (XF86MCSurfaceInfoPtr) & i915_YV12_mpg1_surface
+};
+
+/* i965 and later hwmc support */
+#ifndef XVMC_VLD
+#define XVMC_VLD 0x00020000
+#endif
+
+static XF86MCSurfaceInfoRec yv12_mpeg2_vld_surface = {
+ FOURCC_YV12,
+ XVMC_CHROMA_FORMAT_420,
+ 0,
+ 1936,
+ 1096,
+ 1920,
+ 1080,
+ XVMC_MPEG_2 | XVMC_VLD,
+ XVMC_INTRA_UNSIGNED,
+ NULL
+};
+
+static XF86MCSurfaceInfoRec yv12_mpeg2_i965_surface = {
+ FOURCC_YV12,
+ XVMC_CHROMA_FORMAT_420,
+ 0,
+ 1936,
+ 1096,
+ 1920,
+ 1080,
+ XVMC_MPEG_2 | XVMC_MOCOMP,
+ /* XVMC_OVERLAID_SURFACE | XVMC_SUBPICTURE_INDEPENDENT_SCALING, */
+ XVMC_INTRA_UNSIGNED,
+ /* &yv12_subpicture_list */
+ NULL
+};
+
+static XF86MCSurfaceInfoRec yv12_mpeg1_i965_surface = {
+ FOURCC_YV12,
+ XVMC_CHROMA_FORMAT_420,
+ 0,
+ 1920,
+ 1080,
+ 1920,
+ 1080,
+ XVMC_MPEG_1 | XVMC_MOCOMP,
+ /*XVMC_OVERLAID_SURFACE | XVMC_SUBPICTURE_INDEPENDENT_SCALING |
+ XVMC_INTRA_UNSIGNED, */
+ XVMC_INTRA_UNSIGNED,
+
+ /*&yv12_subpicture_list */
+ NULL
+};
+
+static XF86MCSurfaceInfoPtr surface_info_i965[] = {
+ &yv12_mpeg2_i965_surface,
+ &yv12_mpeg1_i965_surface
+};
+
+static XF86MCSurfaceInfoPtr surface_info_vld[] = {
+ &yv12_mpeg2_vld_surface,
+ &yv12_mpeg2_i965_surface,
+};
+
+/* check chip type and load xvmc driver */
+Bool sna_video_xvmc_setup(struct sna *sna,
+ ScreenPtr screen,
+ XF86VideoAdaptorPtr target)
+{
+ XF86MCAdaptorRec *pAdapt;
+ char *name;
+ char buf[64];
+
+ /* Needs KMS support. */
+ if (IS_I915G(sna) || IS_I915GM(sna))
+ return FALSE;
+
+ if (IS_GEN2(sna))
+ return FALSE;
+
+ pAdapt = calloc(1, sizeof(XF86MCAdaptorRec));
+ if (!pAdapt)
+ return FALSE;
+
+ pAdapt->name = target->name;
+ pAdapt->num_subpictures = 0;
+ pAdapt->subpictures = NULL;
+ pAdapt->CreateContext = create_context;
+ pAdapt->DestroyContext = destroy_context;
+ pAdapt->CreateSurface = create_surface;
+ pAdapt->DestroySurface = destroy_surface;
+ pAdapt->CreateSubpicture = create_subpicture;
+ pAdapt->DestroySubpicture = destroy_subpicture;
+
+ if (sna->kgem.gen >= 45) {
+ name = "xvmc_vld",
+ pAdapt->num_surfaces = ARRAY_SIZE(surface_info_vld);
+ pAdapt->surfaces = surface_info_vld;
+ } else if (sna->kgem.gen >= 40) {
+ name = "i965_xvmc",
+ pAdapt->num_surfaces = ARRAY_SIZE(surface_info_i965);
+ pAdapt->surfaces = surface_info_i965;
+ } else {
+ name = "i915_xvmc",
+ pAdapt->num_surfaces = ARRAY_SIZE(surface_info_i915);
+ pAdapt->surfaces = surface_info_i915;
+ }
+
+ if (xf86XvMCScreenInit(screen, 1, &pAdapt)) {
+ xf86DrvMsg(sna->scrn->scrnIndex, X_INFO,
+ "[XvMC] %s driver initialized.\n",
+ name);
+ } else {
+ xf86DrvMsg(sna->scrn->scrnIndex, X_INFO,
+ "[XvMC] Failed to initialize XvMC.\n");
+ return FALSE;
+ }
+
+ sprintf(buf, "pci:%04x:%02x:%02x.%d",
+ sna->PciInfo->domain,
+ sna->PciInfo->bus, sna->PciInfo->dev, sna->PciInfo->func);
+
+ xf86XvMCRegisterDRInfo(screen, SNA_XVMC_LIBNAME,
+ buf,
+ SNA_XVMC_MAJOR, SNA_XVMC_MINOR,
+ SNA_XVMC_PATCHLEVEL);
+ return TRUE;
+}
diff --git a/src/sna/sna_video_hwmc.h b/src/sna/sna_video_hwmc.h
new file mode 100644
index 00000000..2494d44b
--- /dev/null
+++ b/src/sna/sna_video_hwmc.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Zhenyu Wang <zhenyu.z.wang@sna.com>
+ *
+ */
+#ifndef SNA_VIDEO_HWMC_H
+#define SNA_VIDEO_HWMC_H
+
+#define SNA_XVMC_LIBNAME "IntelXvMC"
+#define SNA_XVMC_MAJOR 0
+#define SNA_XVMC_MINOR 1
+#define SNA_XVMC_PATCHLEVEL 0
+
+/*
+ * Commands that client submits through XvPutImage:
+ */
+
+#define SNA_XVMC_COMMAND_DISPLAY 0x00
+#define SNA_XVMC_COMMAND_UNDISPLAY 0x01
+
+/* hw xvmc support type */
+#define XVMC_I915_MPEG2_MC 0x01
+#define XVMC_I965_MPEG2_MC 0x02
+#define XVMC_I945_MPEG2_VLD 0x04
+#define XVMC_I965_MPEG2_VLD 0x08
+
+struct sna_xvmc_hw_context {
+ unsigned int type;
+ union {
+ struct {
+ unsigned int use_phys_addr : 1;
+ } i915;
+ struct {
+ unsigned int is_g4x:1;
+ unsigned int is_965_q:1;
+ unsigned int is_igdng:1;
+ } i965;
+ };
+};
+
+/* Intel private XvMC command to DDX driver */
+struct sna_xvmc_command {
+ uint32_t handle;
+};
+
+#ifdef _SNA_XVMC_SERVER_
+#include <xf86xvmc.h>
+Bool sna_video_xvmc_setup(struct sna *sna,
+ ScreenPtr screen,
+ XF86VideoAdaptorPtr target);
+#endif
+
+#endif
diff --git a/src/sna/sna_video_overlay.c b/src/sna/sna_video_overlay.c
new file mode 100644
index 00000000..3f7d9557
--- /dev/null
+++ b/src/sna/sna_video_overlay.c
@@ -0,0 +1,731 @@
+/***************************************************************************
+
+ Copyright 2000-2011 Intel Corporation. All Rights Reserved.
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sub license, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial portions
+ of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ IN NO EVENT SHALL INTEL, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_video.h"
+
+#include <xf86xv.h>
+#include <X11/extensions/Xv.h>
+#include <fourcc.h>
+#include <i915_drm.h>
+
+#if DEBUG_VIDEO_OVERLAY
+#undef DBG
+#define DBG(x) ErrorF x
+#endif
+
+#define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE)
+
+#define HAS_GAMMA(sna) ((sna)->kgem.gen >= 30)
+
+static Atom xvBrightness, xvContrast, xvSaturation, xvColorKey, xvPipe;
+static Atom xvGamma0, xvGamma1, xvGamma2, xvGamma3, xvGamma4, xvGamma5;
+static Atom xvSyncToVblank;
+
+/* Limits for the overlay/textured video source sizes. The documented hardware
+ * limits are 2048x2048 or better for overlay and both of our textured video
+ * implementations. Additionally, on the 830 and 845, larger sizes resulted in
+ * the card hanging, so we keep the limits lower there.
+ */
+#define IMAGE_MAX_WIDTH 2048
+#define IMAGE_MAX_HEIGHT 2048
+#define IMAGE_MAX_WIDTH_LEGACY 1024
+#define IMAGE_MAX_HEIGHT_LEGACY 1088
+
+/* client libraries expect an encoding */
+static const XF86VideoEncodingRec DummyEncoding[1] = {
+ {
+ 0,
+ "XV_IMAGE",
+ IMAGE_MAX_WIDTH, IMAGE_MAX_HEIGHT,
+ {1, 1}
+ }
+};
+
+#define NUM_FORMATS 3
+
+static XF86VideoFormatRec Formats[NUM_FORMATS] = {
+ {15, TrueColor}, {16, TrueColor}, {24, TrueColor}
+};
+
+#define NUM_ATTRIBUTES 5
+static XF86AttributeRec Attributes[NUM_ATTRIBUTES] = {
+ {XvSettable | XvGettable, 0, (1 << 24) - 1, "XV_COLORKEY"},
+ {XvSettable | XvGettable, -128, 127, "XV_BRIGHTNESS"},
+ {XvSettable | XvGettable, 0, 255, "XV_CONTRAST"},
+ {XvSettable | XvGettable, 0, 1023, "XV_SATURATION"},
+ {XvSettable | XvGettable, -1, 1, "XV_PIPE"}
+};
+
+#define GAMMA_ATTRIBUTES 6
+static XF86AttributeRec GammaAttributes[GAMMA_ATTRIBUTES] = {
+ {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA0"},
+ {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA1"},
+ {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA2"},
+ {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA3"},
+ {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA4"},
+ {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA5"}
+};
+
+#define NUM_IMAGES 4
+
+static XF86ImageRec Images[NUM_IMAGES] = {
+ XVIMAGE_YUY2,
+ XVIMAGE_YV12,
+ XVIMAGE_I420,
+ XVIMAGE_UYVY,
+};
+
+/* kernel modesetting overlay functions */
+static Bool sna_has_overlay(struct sna *sna)
+{
+ struct drm_i915_getparam gp;
+ int has_overlay = 0;
+ int ret;
+
+ gp.param = I915_PARAM_HAS_OVERLAY;
+ gp.value = &has_overlay;
+ ret = drmCommandWriteRead(sna->kgem.fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
+
+ return !! has_overlay;
+ (void)ret;
+}
+
+static Bool sna_video_overlay_update_attrs(struct sna *sna,
+ struct sna_video *video)
+{
+ struct drm_intel_overlay_attrs attrs;
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ attrs.flags = I915_OVERLAY_UPDATE_ATTRS;
+ attrs.brightness = video->brightness;
+ attrs.contrast = video->contrast;
+ attrs.saturation = video->saturation;
+ attrs.color_key = video->color_key;
+ attrs.gamma0 = video->gamma0;
+ attrs.gamma1 = video->gamma1;
+ attrs.gamma2 = video->gamma2;
+ attrs.gamma3 = video->gamma3;
+ attrs.gamma4 = video->gamma4;
+ attrs.gamma5 = video->gamma5;
+
+ return drmCommandWriteRead(sna->kgem.fd, DRM_I915_OVERLAY_ATTRS,
+ &attrs, sizeof(attrs)) == 0;
+}
+
+static void sna_video_overlay_off(struct sna *sna)
+{
+ struct drm_intel_overlay_put_image request;
+ int ret;
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ request.flags = 0;
+
+ ret = drmCommandWrite(sna->kgem.fd, DRM_I915_OVERLAY_PUT_IMAGE,
+ &request, sizeof(request));
+ (void)ret;
+}
+
+static void sna_video_overlay_stop(ScrnInfoPtr scrn,
+ pointer data,
+ Bool shutdown)
+{
+ struct sna *sna = to_sna(scrn);
+ struct sna_video *video = data;
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ REGION_EMPTY(scrn->pScreen, &video->clip);
+
+ if (!shutdown)
+ return;
+
+ sna_video_overlay_off(sna);
+ sna_video_free_buffers(sna, video);
+}
+
+static int
+sna_video_overlay_set_port_attribute(ScrnInfoPtr scrn,
+ Atom attribute, INT32 value, pointer data)
+{
+ struct sna *sna = to_sna(scrn);
+ struct sna_video *video = data;
+
+ if (attribute == xvBrightness) {
+ if ((value < -128) || (value > 127))
+ return BadValue;
+ DBG(("%s: BRIGHTNESS %d -> %d\n", __FUNCTION__,
+ video->contrast, (int)value));
+ video->brightness = value;
+ } else if (attribute == xvContrast) {
+ if ((value < 0) || (value > 255))
+ return BadValue;
+ DBG(("%s: CONTRAST %d -> %d\n", __FUNCTION__,
+ video->contrast, (int)value));
+ video->contrast = value;
+ } else if (attribute == xvSaturation) {
+ if ((value < 0) || (value > 1023))
+ return BadValue;
+ DBG(("%s: SATURATION %d -> %d\n", __FUNCTION__,
+ video->saturation, (int)value));
+ video->saturation = value;
+ } else if (attribute == xvPipe) {
+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn);
+ if ((value < -1) || (value > xf86_config->num_crtc))
+ return BadValue;
+ if (value < 0)
+ video->desired_crtc = NULL;
+ else
+ video->desired_crtc = xf86_config->crtc[value];
+ } else if (attribute == xvGamma0 && HAS_GAMMA(sna)) {
+ video->gamma0 = value;
+ } else if (attribute == xvGamma1 && HAS_GAMMA(sna)) {
+ video->gamma1 = value;
+ } else if (attribute == xvGamma2 && HAS_GAMMA(sna)) {
+ video->gamma2 = value;
+ } else if (attribute == xvGamma3 && HAS_GAMMA(sna)) {
+ video->gamma3 = value;
+ } else if (attribute == xvGamma4 && HAS_GAMMA(sna)) {
+ video->gamma4 = value;
+ } else if (attribute == xvGamma5 && HAS_GAMMA(sna)) {
+ video->gamma5 = value;
+ } else if (attribute == xvColorKey) {
+ video->color_key = value;
+ DBG(("COLORKEY\n"));
+ } else
+ return BadMatch;
+
+ if ((attribute == xvGamma0 ||
+ attribute == xvGamma1 ||
+ attribute == xvGamma2 ||
+ attribute == xvGamma3 ||
+ attribute == xvGamma4 ||
+ attribute == xvGamma5) && HAS_GAMMA(sna)) {
+ DBG(("%s: GAMMA\n", __FUNCTION__));
+ }
+
+ if (!sna_video_overlay_update_attrs(sna, data))
+ return BadValue;
+
+ if (attribute == xvColorKey)
+ REGION_EMPTY(scrn->pScreen, &video->clip);
+
+ return Success;
+}
+
+static int
+sna_video_overlay_get_port_attribute(ScrnInfoPtr scrn,
+ Atom attribute, INT32 * value, pointer data)
+{
+ struct sna *sna = to_sna(scrn);
+ struct sna_video *video = (struct sna_video *) data;
+
+ if (attribute == xvBrightness) {
+ *value = video->brightness;
+ } else if (attribute == xvContrast) {
+ *value = video->contrast;
+ } else if (attribute == xvSaturation) {
+ *value = video->saturation;
+ } else if (attribute == xvPipe) {
+ int c;
+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn);
+ for (c = 0; c < xf86_config->num_crtc; c++)
+ if (xf86_config->crtc[c] == video->desired_crtc)
+ break;
+ if (c == xf86_config->num_crtc)
+ c = -1;
+ *value = c;
+ } else if (attribute == xvGamma0 && HAS_GAMMA(sna)) {
+ *value = video->gamma0;
+ } else if (attribute == xvGamma1 && HAS_GAMMA(sna)) {
+ *value = video->gamma1;
+ } else if (attribute == xvGamma2 && HAS_GAMMA(sna)) {
+ *value = video->gamma2;
+ } else if (attribute == xvGamma3 && HAS_GAMMA(sna)) {
+ *value = video->gamma3;
+ } else if (attribute == xvGamma4 && HAS_GAMMA(sna)) {
+ *value = video->gamma4;
+ } else if (attribute == xvGamma5 && HAS_GAMMA(sna)) {
+ *value = video->gamma5;
+ } else if (attribute == xvColorKey) {
+ *value = video->color_key;
+ } else if (attribute == xvSyncToVblank) {
+ *value = video->SyncToVblank;
+ } else
+ return BadMatch;
+
+ return Success;
+}
+
+static void
+sna_video_overlay_query_best_size(ScrnInfoPtr scrn,
+ Bool motion,
+ short vid_w, short vid_h,
+ short drw_w, short drw_h,
+ unsigned int *p_w, unsigned int *p_h, pointer data)
+{
+ if (vid_w > (drw_w << 1))
+ drw_w = vid_w >> 1;
+ if (vid_h > (drw_h << 1))
+ drw_h = vid_h >> 1;
+
+ *p_w = drw_w;
+ *p_h = drw_h;
+}
+
+static void
+update_dst_box_to_crtc_coords(struct sna *sna, xf86CrtcPtr crtc, BoxPtr dstBox)
+{
+ ScrnInfoPtr scrn = sna->scrn;
+ int tmp;
+
+ /* for overlay, we should take it from crtc's screen
+ * coordinate to current crtc's display mode.
+ * yeah, a bit confusing.
+ */
+ switch (crtc->rotation & 0xf) {
+ case RR_Rotate_0:
+ dstBox->x1 -= crtc->x;
+ dstBox->x2 -= crtc->x;
+ dstBox->y1 -= crtc->y;
+ dstBox->y2 -= crtc->y;
+ break;
+ case RR_Rotate_90:
+ tmp = dstBox->x1;
+ dstBox->x1 = dstBox->y1 - crtc->x;
+ dstBox->y1 = scrn->virtualX - tmp - crtc->y;
+ tmp = dstBox->x2;
+ dstBox->x2 = dstBox->y2 - crtc->x;
+ dstBox->y2 = scrn->virtualX - tmp - crtc->y;
+ tmp = dstBox->y1;
+ dstBox->y1 = dstBox->y2;
+ dstBox->y2 = tmp;
+ break;
+ case RR_Rotate_180:
+ tmp = dstBox->x1;
+ dstBox->x1 = scrn->virtualX - dstBox->x2 - crtc->x;
+ dstBox->x2 = scrn->virtualX - tmp - crtc->x;
+ tmp = dstBox->y1;
+ dstBox->y1 = scrn->virtualY - dstBox->y2 - crtc->y;
+ dstBox->y2 = scrn->virtualY - tmp - crtc->y;
+ break;
+ case RR_Rotate_270:
+ tmp = dstBox->x1;
+ dstBox->x1 = scrn->virtualY - dstBox->y1 - crtc->x;
+ dstBox->y1 = tmp - crtc->y;
+ tmp = dstBox->x2;
+ dstBox->x2 = scrn->virtualY - dstBox->y2 - crtc->x;
+ dstBox->y2 = tmp - crtc->y;
+ tmp = dstBox->x1;
+ dstBox->x1 = dstBox->x2;
+ dstBox->x2 = tmp;
+ break;
+ }
+
+ return;
+}
+
+static Bool
+sna_video_overlay_show(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ xf86CrtcPtr crtc,
+ BoxPtr dstBox,
+ short src_w, short src_h,
+ short drw_w, short drw_h)
+{
+ struct drm_intel_overlay_put_image request;
+ bool planar = is_planar_fourcc(frame->id);
+ float scale;
+
+ DBG(("%s: src=(%dx%d), dst=(%dx%d)\n", __FUNCTION__,
+ src_w, src_h, drw_w, drw_h));
+
+ update_dst_box_to_crtc_coords(sna, crtc, dstBox);
+ if (crtc->rotation & (RR_Rotate_90 | RR_Rotate_270)) {
+ int tmp;
+
+ tmp = frame->width;
+ frame->width = frame->height;
+ frame->height = tmp;
+
+ tmp = drw_w;
+ drw_w = drw_h;
+ drw_h = tmp;
+
+ tmp = src_w;
+ src_w = src_h;
+ src_h = tmp;
+ }
+
+ memset(&request, 0, sizeof(request));
+ request.flags = I915_OVERLAY_ENABLE;
+
+ request.bo_handle = frame->bo->handle;
+ if (planar) {
+ request.stride_Y = frame->pitch[1];
+ request.stride_UV = frame->pitch[0];
+ } else {
+ request.stride_Y = frame->pitch[0];
+ request.stride_UV = 0;
+ }
+ request.offset_Y = frame->YBufOffset;
+ request.offset_U = frame->UBufOffset;
+ request.offset_V = frame->VBufOffset;
+ DBG(("%s: handle=%d, stride_Y=%d, stride_UV=%d, off_Y: %i, off_U: %i, off_V: %i\n",
+ __FUNCTION__,
+ request.bo_handle, request.stride_Y, request.stride_UV,
+ request.offset_Y, request.offset_U, request.offset_V));
+
+ request.crtc_id = sna_crtc_id(crtc);
+ request.dst_x = dstBox->x1;
+ request.dst_y = dstBox->y1;
+ request.dst_width = dstBox->x2 - dstBox->x1;
+ request.dst_height = dstBox->y2 - dstBox->y1;
+
+ DBG(("%s: crtc=%d, dst=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__, request.crtc_id,
+ request.dst_x, request.dst_y,
+ request.dst_width, request.dst_height));
+
+ request.src_width = frame->width;
+ request.src_height = frame->height;
+ /* adjust src dimensions */
+ if (request.dst_height > 1) {
+ scale = ((float)request.dst_height - 1) / ((float)drw_h - 1);
+ request.src_scan_height = src_h * scale;
+ } else
+ request.src_scan_height = 1;
+
+ if (request.dst_width > 1) {
+ scale = ((float)request.dst_width - 1) / ((float)drw_w - 1);
+ request.src_scan_width = src_w * scale;
+ } else
+ request.src_scan_width = 1;
+
+ DBG(("%s: src=(%d, %d) scan=(%d, %d)\n",
+ __FUNCTION__,
+ request.src_width, request.src_height,
+ request.src_scan_width, request.src_scan_height));
+
+ if (planar) {
+ request.flags |= I915_OVERLAY_YUV_PLANAR | I915_OVERLAY_YUV420;
+ } else {
+ request.flags |= I915_OVERLAY_YUV_PACKED | I915_OVERLAY_YUV422;
+ if (frame->id == FOURCC_UYVY)
+ request.flags |= I915_OVERLAY_Y_SWAP;
+ }
+
+ DBG(("%s: flags=%x\n", __FUNCTION__, request.flags));
+
+ return drmCommandWrite(sna->kgem.fd, DRM_I915_OVERLAY_PUT_IMAGE,
+ &request, sizeof(request)) == 0;
+}
+
+static int
+sna_video_overlay_put_image(ScrnInfoPtr scrn,
+ short src_x, short src_y,
+ short drw_x, short drw_y,
+ short src_w, short src_h,
+ short drw_w, short drw_h,
+ int id, unsigned char *buf,
+ short width, short height,
+ Bool sync, RegionPtr clip, pointer data,
+ DrawablePtr drawable)
+{
+ struct sna *sna = to_sna(scrn);
+ struct sna_video *video = data;
+ struct sna_video_frame frame;
+ BoxRec dstBox;
+ xf86CrtcPtr crtc;
+ int top, left, npixels, nlines;
+
+ DBG(("%s: src: (%d,%d)(%d,%d), dst: (%d,%d)(%d,%d), width %d, height %d\n",
+ __FUNCTION__,
+ src_x, src_y, src_w, src_h, drw_x,
+ drw_y, drw_w, drw_h, width, height));
+
+ /* If dst width and height are less than 1/8th the src size, the
+ * src/dst scale factor becomes larger than 8 and doesn't fit in
+ * the scale register. */
+ if (src_w >= (drw_w * 8))
+ drw_w = src_w / 7;
+
+ if (src_h >= (drw_h * 8))
+ drw_h = src_h / 7;
+
+ if (!sna_video_clip_helper(scrn,
+ video,
+ &crtc,
+ &dstBox,
+ src_x, src_y, drw_x, drw_y,
+ src_w, src_h, drw_w, drw_h,
+ id,
+ &top, &left, &npixels, &nlines, clip,
+ width, height))
+ return Success;
+
+ if (!crtc) {
+ /*
+ * If the video isn't visible on any CRTC, turn it off
+ */
+ sna_video_overlay_off(sna);
+ return Success;
+ }
+
+ sna_video_frame_init(sna, video, id, width, height, &frame);
+
+ /* overlay can't handle rotation natively, store it for the copy func */
+ video->rotation = crtc->rotation;
+ if (!sna_video_copy_data(sna, video, &frame,
+ top, left, npixels, nlines, buf)) {
+ DBG(("%s: failed to copy video data\n", __FUNCTION__));
+ return BadAlloc;
+ }
+
+ if (!sna_video_overlay_show
+ (sna, video, &frame, crtc, &dstBox, src_w, src_h, drw_w, drw_h)) {
+ DBG(("%s: failed to show video frame\n", __FUNCTION__));
+ return BadAlloc;
+ }
+
+ sna_video_frame_fini(sna, video, &frame);
+
+ /* update cliplist */
+ if (!REGION_EQUAL(scrn->pScreen, &video->clip, clip)) {
+ REGION_COPY(scrn->pScreen, &video->clip, clip);
+ xf86XVFillKeyHelperDrawable(drawable, video->color_key, clip);
+ }
+
+ return Success;
+}
+
+static int
+sna_video_overlay_query_video_attributes(ScrnInfoPtr scrn,
+ int id,
+ unsigned short *w, unsigned short *h,
+ int *pitches, int *offsets)
+{
+ struct sna *sna = to_sna(scrn);
+ int size, tmp;
+
+ DBG(("%s: w is %d, h is %d\n", __FUNCTION__, *w, *h));
+
+ if (IS_845G(sna) || IS_I830(sna)) {
+ if (*w > IMAGE_MAX_WIDTH_LEGACY)
+ *w = IMAGE_MAX_WIDTH_LEGACY;
+ if (*h > IMAGE_MAX_HEIGHT_LEGACY)
+ *h = IMAGE_MAX_HEIGHT_LEGACY;
+ } else {
+ if (*w > IMAGE_MAX_WIDTH)
+ *w = IMAGE_MAX_WIDTH;
+ if (*h > IMAGE_MAX_HEIGHT)
+ *h = IMAGE_MAX_HEIGHT;
+ }
+
+ *w = (*w + 1) & ~1;
+ if (offsets)
+ offsets[0] = 0;
+
+ switch (id) {
+ /* IA44 is for XvMC only */
+ case FOURCC_IA44:
+ case FOURCC_AI44:
+ if (pitches)
+ pitches[0] = *w;
+ size = *w * *h;
+ break;
+ case FOURCC_YV12:
+ case FOURCC_I420:
+ *h = (*h + 1) & ~1;
+ size = (*w + 3) & ~3;
+ if (pitches)
+ pitches[0] = size;
+ size *= *h;
+ if (offsets)
+ offsets[1] = size;
+ tmp = ((*w >> 1) + 3) & ~3;
+ if (pitches)
+ pitches[1] = pitches[2] = tmp;
+ tmp *= (*h >> 1);
+ size += tmp;
+ if (offsets)
+ offsets[2] = size;
+ size += tmp;
+#if 0
+ if (pitches)
+ ErrorF("pitch 0 is %d, pitch 1 is %d, pitch 2 is %d\n",
+ pitches[0], pitches[1], pitches[2]);
+ if (offsets)
+ ErrorF("offset 1 is %d, offset 2 is %d\n", offsets[1],
+ offsets[2]);
+ if (offsets)
+ ErrorF("size is %d\n", size);
+#endif
+ break;
+ case FOURCC_UYVY:
+ case FOURCC_YUY2:
+ default:
+ size = *w << 1;
+ if (pitches)
+ pitches[0] = size;
+ size *= *h;
+ break;
+ }
+
+ return size;
+}
+
+static int sna_video_overlay_color_key(struct sna *sna)
+{
+ ScrnInfoPtr scrn = sna->scrn;
+ int color_key;
+
+ if (xf86GetOptValInteger(sna->Options, OPTION_VIDEO_KEY,
+ &color_key)) {
+ } else if (xf86GetOptValInteger(sna->Options, OPTION_COLOR_KEY,
+ &color_key)) {
+ } else {
+ color_key =
+ (1 << scrn->offset.red) |
+ (1 << scrn->offset.green) |
+ (((scrn->mask.blue >> scrn->offset.blue) - 1) << scrn->offset.blue);
+ }
+
+ return color_key & ((1 << scrn->depth) - 1);
+}
+
+XF86VideoAdaptorPtr sna_video_overlay_setup(struct sna *sna,
+ ScreenPtr screen)
+{
+ XF86VideoAdaptorPtr adaptor;
+ struct sna_video *video;
+ XF86AttributePtr att;
+
+ if (!sna_has_overlay(sna)) {
+ xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING,
+ "Overlay video not supported on this hardware\n");
+ return NULL;
+ }
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ if (!(adaptor = calloc(1,
+ sizeof(XF86VideoAdaptorRec) +
+ sizeof(struct sna_video) +
+ sizeof(DevUnion))))
+ return NULL;
+
+ adaptor->type = XvWindowMask | XvInputMask | XvImageMask;
+ adaptor->flags = VIDEO_OVERLAID_IMAGES /*| VIDEO_CLIP_TO_VIEWPORT */ ;
+ adaptor->name = "Intel(R) Video Overlay";
+ adaptor->nEncodings = 1;
+ adaptor->pEncodings = xnfalloc(sizeof(DummyEncoding));
+ memcpy(adaptor->pEncodings, DummyEncoding, sizeof(DummyEncoding));
+ if (IS_845G(sna) || IS_I830(sna)) {
+ adaptor->pEncodings->width = IMAGE_MAX_WIDTH_LEGACY;
+ adaptor->pEncodings->height = IMAGE_MAX_HEIGHT_LEGACY;
+ }
+ adaptor->nFormats = NUM_FORMATS;
+ adaptor->pFormats = Formats;
+ adaptor->nPorts = 1;
+ adaptor->pPortPrivates = (DevUnion *)&adaptor[1];
+
+ video = (struct sna_video *)&adaptor->pPortPrivates[1];
+
+ adaptor->pPortPrivates[0].ptr = video;
+ adaptor->nAttributes = NUM_ATTRIBUTES;
+ if (HAS_GAMMA(sna))
+ adaptor->nAttributes += GAMMA_ATTRIBUTES;
+ adaptor->pAttributes =
+ xnfalloc(sizeof(XF86AttributeRec) * adaptor->nAttributes);
+ /* Now copy the attributes */
+ att = adaptor->pAttributes;
+ memcpy(att, Attributes, sizeof(XF86AttributeRec) * NUM_ATTRIBUTES);
+ att += NUM_ATTRIBUTES;
+ if (HAS_GAMMA(sna)) {
+ memcpy(att, GammaAttributes,
+ sizeof(XF86AttributeRec) * GAMMA_ATTRIBUTES);
+ att += GAMMA_ATTRIBUTES;
+ }
+ adaptor->nImages = NUM_IMAGES;
+ adaptor->pImages = Images;
+ adaptor->PutVideo = NULL;
+ adaptor->PutStill = NULL;
+ adaptor->GetVideo = NULL;
+ adaptor->GetStill = NULL;
+ adaptor->StopVideo = sna_video_overlay_stop;
+ adaptor->SetPortAttribute = sna_video_overlay_set_port_attribute;
+ adaptor->GetPortAttribute = sna_video_overlay_get_port_attribute;
+ adaptor->QueryBestSize = sna_video_overlay_query_best_size;
+ adaptor->PutImage = sna_video_overlay_put_image;
+ adaptor->QueryImageAttributes = sna_video_overlay_query_video_attributes;
+
+ video->textured = FALSE;
+ video->color_key = sna_video_overlay_color_key(sna);
+ video->brightness = -19; /* (255/219) * -16 */
+ video->contrast = 75; /* 255/219 * 64 */
+ video->saturation = 146; /* 128/112 * 128 */
+ video->desired_crtc = NULL;
+ video->gamma5 = 0xc0c0c0;
+ video->gamma4 = 0x808080;
+ video->gamma3 = 0x404040;
+ video->gamma2 = 0x202020;
+ video->gamma1 = 0x101010;
+ video->gamma0 = 0x080808;
+
+ video->rotation = RR_Rotate_0;
+
+ /* gotta uninit this someplace */
+ REGION_NULL(screen, &video->clip);
+
+ xvColorKey = MAKE_ATOM("XV_COLORKEY");
+ xvBrightness = MAKE_ATOM("XV_BRIGHTNESS");
+ xvContrast = MAKE_ATOM("XV_CONTRAST");
+ xvSaturation = MAKE_ATOM("XV_SATURATION");
+
+ /* Allow the pipe to be switched from pipe A to B when in clone mode */
+ xvPipe = MAKE_ATOM("XV_PIPE");
+
+ if (HAS_GAMMA(sna)) {
+ xvGamma0 = MAKE_ATOM("XV_GAMMA0");
+ xvGamma1 = MAKE_ATOM("XV_GAMMA1");
+ xvGamma2 = MAKE_ATOM("XV_GAMMA2");
+ xvGamma3 = MAKE_ATOM("XV_GAMMA3");
+ xvGamma4 = MAKE_ATOM("XV_GAMMA4");
+ xvGamma5 = MAKE_ATOM("XV_GAMMA5");
+ }
+
+ sna_video_overlay_update_attrs(sna, video);
+
+ return adaptor;
+}
diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c
new file mode 100644
index 00000000..66c70d4a
--- /dev/null
+++ b/src/sna/sna_video_textured.c
@@ -0,0 +1,428 @@
+/***************************************************************************
+
+ Copyright 2000-2011 Intel Corporation. All Rights Reserved.
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sub license, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial portions
+ of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ IN NO EVENT SHALL INTEL, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "sna.h"
+#include "sna_video.h"
+
+#include <xf86xv.h>
+#include <X11/extensions/Xv.h>
+
+#ifdef SNA_XVMC
+#define _SNA_XVMC_SERVER_
+#include "sna_video_hwmc.h"
+#endif
+
+#if DEBUG_VIDEO_TEXTURED
+#undef DBG
+#define DBG(x) ErrorF x
+#else
+#define NDEBUG 1
+#endif
+
+#define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE)
+
+static Atom xvBrightness, xvContrast, xvSyncToVblank;
+
+/* client libraries expect an encoding */
+static const XF86VideoEncodingRec DummyEncoding[1] = {
+ {
+ 0,
+ "XV_IMAGE",
+ 8192, 8192,
+ {1, 1}
+ }
+};
+
+#define NUM_FORMATS 3
+
+static XF86VideoFormatRec Formats[NUM_FORMATS] = {
+ {15, TrueColor}, {16, TrueColor}, {24, TrueColor}
+};
+
+#define NUM_TEXTURED_ATTRIBUTES 3
+static XF86AttributeRec TexturedAttributes[NUM_TEXTURED_ATTRIBUTES] = {
+ {XvSettable | XvGettable, -128, 127, "XV_BRIGHTNESS"},
+ {XvSettable | XvGettable, 0, 255, "XV_CONTRAST"},
+ {XvSettable | XvGettable, -1, 1, "XV_SYNC_TO_VBLANK"},
+};
+
+#ifdef SNA_XVMC
+#define NUM_IMAGES 5
+#define XVMC_IMAGE 1
+#else
+#define NUM_IMAGES 4
+#define XVMC_IMAGE 0
+#endif
+
+static XF86ImageRec Images[NUM_IMAGES] = {
+ XVIMAGE_YUY2,
+ XVIMAGE_YV12,
+ XVIMAGE_I420,
+ XVIMAGE_UYVY,
+#ifdef SNA_XVMC
+ {
+ /*
+ * Below, a dummy picture type that is used in XvPutImage
+ * only to do an overlay update.
+ * Introduced for the XvMC client lib.
+ * Defined to have a zero data size.
+ */
+ FOURCC_XVMC,
+ XvYUV,
+ LSBFirst,
+ {'X', 'V', 'M', 'C',
+ 0x00, 0x00, 0x00, 0x10, 0x80, 0x00, 0x00, 0xAA, 0x00,
+ 0x38, 0x9B, 0x71},
+ 12,
+ XvPlanar,
+ 3,
+ 0, 0, 0, 0,
+ 8, 8, 8,
+ 1, 2, 2,
+ 1, 2, 2,
+ {'Y', 'V', 'U',
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ XvTopToBottom},
+#endif
+};
+
+static int xvmc_passthrough(int id)
+{
+#ifdef SNA_XVMC
+ return id == FOURCC_XVMC;
+#else
+ return 0;
+#endif
+}
+
+static void sna_video_textured_stop(ScrnInfoPtr scrn,
+ pointer data,
+ Bool shutdown)
+{
+ struct sna *sna = to_sna(scrn);
+ struct sna_video *video = data;
+
+ DBG(("%s()\n", __FUNCTION__));
+
+ REGION_EMPTY(scrn->pScreen, &video->clip);
+
+ if (!shutdown)
+ return;
+
+ sna_video_free_buffers(sna, video);
+}
+
+static int
+sna_video_textured_set_attribute(ScrnInfoPtr scrn,
+ Atom attribute,
+ INT32 value,
+ pointer data)
+{
+ struct sna_video *video = data;
+
+ if (attribute == xvBrightness) {
+ if (value < -128 || value > 127)
+ return BadValue;
+
+ video->brightness = value;
+ } else if (attribute == xvContrast) {
+ if (value < 0 || value > 255)
+ return BadValue;
+
+ video->contrast = value;
+ } else if (attribute == xvSyncToVblank) {
+ if (value < -1 || value > 1)
+ return BadValue;
+
+ video->SyncToVblank = value;
+ } else
+ return BadMatch;
+
+ return Success;
+}
+
+static int
+sna_video_textured_get_attribute(ScrnInfoPtr scrn,
+ Atom attribute,
+ INT32 *value,
+ pointer data)
+{
+ struct sna_video *video = data;
+
+ if (attribute == xvBrightness)
+ *value = video->brightness;
+ else if (attribute == xvContrast)
+ *value = video->contrast;
+ else if (attribute == xvSyncToVblank)
+ *value = video->SyncToVblank;
+ else
+ return BadMatch;
+
+ return Success;
+}
+
+static void
+sna_video_textured_best_size(ScrnInfoPtr scrn,
+ Bool motion,
+ short vid_w, short vid_h,
+ short drw_w, short drw_h,
+ unsigned int *p_w,
+ unsigned int *p_h,
+ pointer data)
+{
+ if (vid_w > (drw_w << 1))
+ drw_w = vid_w >> 1;
+ if (vid_h > (drw_h << 1))
+ drw_h = vid_h >> 1;
+
+ *p_w = drw_w;
+ *p_h = drw_h;
+}
+
+/*
+ * The source rectangle of the video is defined by (src_x, src_y, src_w, src_h).
+ * The dest rectangle of the video is defined by (drw_x, drw_y, drw_w, drw_h).
+ * id is a fourcc code for the format of the video.
+ * buf is the pointer to the source data in system memory.
+ * width and height are the w/h of the source data.
+ * If "sync" is TRUE, then we must be finished with *buf at the point of return
+ * (which we always are).
+ * clip is the clipping region in screen space.
+ * data is a pointer to our port private.
+ * drawable is some Drawable, which might not be the screen in the case of
+ * compositing. It's a new argument to the function in the 1.1 server.
+ */
+static int
+sna_video_textured_put_image(ScrnInfoPtr scrn,
+ short src_x, short src_y,
+ short drw_x, short drw_y,
+ short src_w, short src_h,
+ short drw_w, short drw_h,
+ int id, unsigned char *buf,
+ short width, short height,
+ Bool sync, RegionPtr clip, pointer data,
+ DrawablePtr drawable)
+{
+ struct sna *sna = to_sna(scrn);
+ struct sna_video *video = data;
+ struct sna_video_frame frame;
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ BoxRec dstBox;
+ xf86CrtcPtr crtc;
+ int top, left, npixels, nlines;
+
+ if (!sna_video_clip_helper(scrn, video, &crtc, &dstBox,
+ src_x, src_y, drw_x, drw_y,
+ src_w, src_h, drw_w, drw_h,
+ id,
+ &top, &left, &npixels, &nlines,
+ clip, width, height))
+ return Success;
+
+ sna_video_frame_init(sna, video, id, width, height, &frame);
+
+ if (xvmc_passthrough(id)) {
+ if (IS_I915G(sna) || IS_I915GM(sna)) {
+ /* XXX: i915 is not support and needs some
+ * serious care. grep for KMS in i915_hwmc.c */
+ return BadAlloc;
+ }
+
+ frame.bo = kgem_create_for_name(&sna->kgem, *(uint32_t*)buf);
+ if (frame.bo == NULL)
+ return BadAlloc;
+ } else {
+ if (!sna_video_copy_data(sna, video, &frame,
+ top, left, npixels, nlines,
+ buf))
+ return BadAlloc;
+ }
+
+ if (crtc && video->SyncToVblank != 0)
+ sna_wait_for_scanline(sna, pixmap, crtc, clip);
+
+ sna->render.video(sna, video, &frame, clip,
+ src_w, src_h,
+ drw_w, drw_h,
+ pixmap);
+
+ sna_video_frame_fini(sna, video, &frame);
+
+ DamageDamageRegion(drawable, clip);
+
+ /* Push the frame to the GPU as soon as possible so
+ * we can hit the next vsync.
+ */
+ kgem_submit(&sna->kgem);
+
+ return Success;
+}
+
+static int
+sna_video_textured_query(ScrnInfoPtr scrn,
+ int id,
+ unsigned short *w, unsigned short *h,
+ int *pitches, int *offsets)
+{
+ int size, tmp;
+
+ if (*w > 8192)
+ *w = 8192;
+ if (*h > 8192)
+ *h = 8192;
+
+ *w = (*w + 1) & ~1;
+ if (offsets)
+ offsets[0] = 0;
+
+ switch (id) {
+ /* IA44 is for XvMC only */
+ case FOURCC_IA44:
+ case FOURCC_AI44:
+ if (pitches)
+ pitches[0] = *w;
+ size = *w * *h;
+ break;
+ case FOURCC_YV12:
+ case FOURCC_I420:
+ *h = (*h + 1) & ~1;
+ size = (*w + 3) & ~3;
+ if (pitches)
+ pitches[0] = size;
+ size *= *h;
+ if (offsets)
+ offsets[1] = size;
+ tmp = ((*w >> 1) + 3) & ~3;
+ if (pitches)
+ pitches[1] = pitches[2] = tmp;
+ tmp *= (*h >> 1);
+ size += tmp;
+ if (offsets)
+ offsets[2] = size;
+ size += tmp;
+ break;
+ case FOURCC_UYVY:
+ case FOURCC_YUY2:
+ default:
+ size = *w << 1;
+ if (pitches)
+ pitches[0] = size;
+ size *= *h;
+ break;
+#ifdef SNA_XVMC
+ case FOURCC_XVMC:
+ *h = (*h + 1) & ~1;
+ size = sizeof(struct sna_xvmc_command);
+ if (pitches)
+ pitches[0] = size;
+ break;
+#endif
+ }
+
+ return size;
+}
+
+XF86VideoAdaptorPtr sna_video_textured_setup(struct sna *sna,
+ ScreenPtr screen)
+{
+ XF86VideoAdaptorPtr adaptor;
+ XF86AttributePtr attrs;
+ struct sna_video *video;
+ DevUnion *devUnions;
+ int nports = 16, i;
+
+ if (!sna->render.video) {
+ xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING,
+ "Textured video not supported on this hardware\n");
+ return NULL;
+ }
+
+ adaptor = calloc(1, sizeof(XF86VideoAdaptorRec));
+ video = calloc(nports, sizeof(struct sna_video));
+ devUnions = calloc(nports, sizeof(DevUnion));
+ attrs = calloc(NUM_TEXTURED_ATTRIBUTES, sizeof(XF86AttributeRec));
+ if (adaptor == NULL ||
+ video == NULL ||
+ devUnions == NULL ||
+ attrs == NULL) {
+ free(adaptor);
+ free(video);
+ free(devUnions);
+ free(attrs);
+ return NULL;
+ }
+
+ adaptor->type = XvWindowMask | XvInputMask | XvImageMask;
+ adaptor->flags = 0;
+ adaptor->name = "Intel(R) Textured Video";
+ adaptor->nEncodings = 1;
+ adaptor->pEncodings = xnfalloc(sizeof(DummyEncoding));
+ memcpy(adaptor->pEncodings, DummyEncoding, sizeof(DummyEncoding));
+ adaptor->nFormats = NUM_FORMATS;
+ adaptor->pFormats = Formats;
+ adaptor->nPorts = nports;
+ adaptor->pPortPrivates = devUnions;
+ adaptor->nAttributes = NUM_TEXTURED_ATTRIBUTES;
+ adaptor->pAttributes = attrs;
+ memcpy(attrs, TexturedAttributes,
+ NUM_TEXTURED_ATTRIBUTES * sizeof(XF86AttributeRec));
+ adaptor->nImages = NUM_IMAGES;
+ adaptor->pImages = Images;
+ adaptor->PutVideo = NULL;
+ adaptor->PutStill = NULL;
+ adaptor->GetVideo = NULL;
+ adaptor->GetStill = NULL;
+ adaptor->StopVideo = sna_video_textured_stop;
+ adaptor->SetPortAttribute = sna_video_textured_set_attribute;
+ adaptor->GetPortAttribute = sna_video_textured_get_attribute;
+ adaptor->QueryBestSize = sna_video_textured_best_size;
+ adaptor->PutImage = sna_video_textured_put_image;
+ adaptor->QueryImageAttributes = sna_video_textured_query;
+
+ for (i = 0; i < nports; i++) {
+ struct sna_video *v = &video[i];
+
+ v->textured = TRUE;
+ v->rotation = RR_Rotate_0;
+ v->SyncToVblank = 1;
+
+ /* gotta uninit this someplace, XXX: shouldn't be necessary for textured */
+ RegionNull(&v->clip);
+
+ adaptor->pPortPrivates[i].ptr = v;
+ }
+
+ xvBrightness = MAKE_ATOM("XV_BRIGHTNESS");
+ xvContrast = MAKE_ATOM("XV_CONTRAST");
+ xvSyncToVblank = MAKE_ATOM("XV_SYNC_TO_VBLANK");
+
+ return adaptor;
+}