summaryrefslogtreecommitdiff
path: root/src/sna/gen4_render.h
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2011-04-08 07:17:14 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2011-06-04 09:19:46 +0100
commitbcef98af561939aa48d9236b2dfa2c5626adf4cb (patch)
tree9d05558947a97595a6fdece968b50eeae45bbfb1 /src/sna/gen4_render.h
parent340cfb7f5271fd1df4c8948e5c9336f5b69a6e6c (diff)
sna: Introduce a new acceleration model.
The premise is that switching between rings (i.e. the BLT and RENDER rings) on SandyBridge imposes a large latency overhead whilst rendering. The cause is that in order to switch rings, we need to split the batch earlier than is desired and to add serialisation between the rings. Both of which incur large overhead. By switching to using a pure 3D blit engine (ok, not so pure as the BLT engine still has uses for the core drawing model which can not be easily represented without a combinatorial explosion of shaders) we can take advantage of additional efficiencies, such as relative relocations, that have been incorporated into recent hardware advances. However, even older hardware performs better from avoiding the implicit context switches and from the batching efficiency of the 3D pipeline... But this is X, and PolyGlyphBlt still exists and remains in use. So for the operations that are not worth accelerating in hardware, we introduce a shadow buffer mechanism through out and reintroduce pixmap migration. Doing this efficiently is the cornerstone of ensuring that we do exploit the increased potential of recent hardware for running old applications and environments (i.e. so that the latest and greatest chip is actually faster than gen2!) For the curious, sna is SandyBridge's New Acceleration. If you are running older chipsets and welcome the performance increase offered by this patch, then you may choose to call it Snazzy instead. Speedups ======== gen3 firefox-fishtank 1203584.56 (1203842.75 0.01%) -> 85561.71 (125146.44 14.87%): 14.07x speedup gen5 grads-heat-map 3385.42 (3489.73 1.44%) -> 350.29 (350.75 0.18%): 9.66x speedup gen3 xfce4-terminal-a1 4179.02 (4180.09 0.06%) -> 503.90 (531.88 4.48%): 8.29x speedup gen4 grads-heat-map 2458.66 (2826.34 4.64%) -> 348.82 (349.20 0.29%): 7.05x speedup gen3 grads-heat-map 1443.33 (1445.32 0.09%) -> 298.55 (298.76 0.05%): 4.83x speedup gen3 swfdec-youtube 3836.14 (3894.14 0.95%) -> 889.84 (979.56 5.99%): 4.31x speedup gen6 grads-heat-map 742.11 (744.44 0.15%) -> 172.51 (172.93 0.20%): 4.30x speedup gen3 firefox-talos-svg 71740.44 (72370.13 0.59%) -> 21959.29 (21995.09 0.68%): 3.27x speedup gen5 gvim 8045.51 (8071.47 0.17%) -> 2589.38 (3246.78 10.74%): 3.11x speedup gen6 poppler 3800.78 (3817.92 0.24%) -> 1227.36 (1230.12 0.30%): 3.10x speedup gen6 gnome-terminal-vim 9106.84 (9111.56 0.03%) -> 3459.49 (3478.52 0.25%): 2.63x speedup gen5 midori-zoomed 9564.53 (9586.58 0.17%) -> 3677.73 (3837.02 2.02%): 2.60x speedup gen5 gnome-terminal-vim 38167.25 (38215.82 0.08%) -> 14901.09 (14902.28 0.01%): 2.56x speedup gen5 poppler 13575.66 (13605.04 0.16%) -> 5554.27 (5555.84 0.01%): 2.44x speedup gen5 swfdec-giant-steps 8941.61 (8988.72 0.52%) -> 3851.98 (3871.01 0.93%): 2.32x speedup gen5 xfce4-terminal-a1 18956.60 (18986.90 0.07%) -> 8362.75 (8365.70 0.01%): 2.27x speedup gen5 firefox-fishtank 88750.31 (88858.23 0.14%) -> 39164.57 (39835.54 0.80%): 2.27x speedup gen3 midori-zoomed 2392.13 (2397.82 0.14%) -> 1109.96 (1303.10 30.35%): 2.16x speedup gen6 gvim 2510.34 (2513.34 0.20%) -> 1200.76 (1204.30 0.22%): 2.09x speedup gen5 firefox-planet-gnome 40478.16 (40565.68 0.09%) -> 19606.22 (19648.79 0.16%): 2.06x speedup gen5 gnome-system-monitor 10344.47 (10385.62 0.29%) -> 5136.69 (5256.85 1.15%): 2.01x speedup gen3 poppler 2595.23 (2603.10 0.17%) -> 1297.56 (1302.42 0.61%): 2.00x speedup gen6 firefox-talos-gfx 7184.03 (7194.97 0.13%) -> 3806.31 (3811.66 0.06%): 1.89x speedup gen5 evolution 8739.25 (8766.12 0.27%) -> 4817.54 (5050.96 1.54%): 1.81x speedup gen3 evolution 1684.06 (1696.88 0.35%) -> 1004.99 (1008.55 0.85%): 1.68x speedup gen3 gnome-terminal-vim 4285.13 (4287.68 0.04%) -> 2715.97 (3202.17 13.52%): 1.58x speedup gen5 swfdec-youtube 5843.94 (5951.07 0.91%) -> 3810.86 (3826.04 1.32%): 1.53x speedup gen4 poppler 7496.72 (7558.83 0.58%) -> 5125.08 (5247.65 1.44%): 1.46x speedup gen4 gnome-terminal-vim 21126.24 (21292.08 0.85%) -> 14590.25 (15066.33 1.80%): 1.45x speedup gen5 firefox-talos-svg 99873.69 (100300.95 0.37%) -> 70745.66 (70818.86 0.05%): 1.41x speedup gen4 firefox-planet-gnome 28205.10 (28304.45 0.27%) -> 19996.11 (20081.44 0.56%): 1.41x speedup gen5 firefox-talos-gfx 93070.85 (93194.72 0.10%) -> 67687.93 (70374.37 1.30%): 1.37x speedup gen4 evolution 6696.25 (6854.14 0.85%) -> 4958.62 (5027.73 0.85%): 1.35x speedup gen3 swfdec-giant-steps 2538.03 (2539.30 0.04%) -> 1895.71 (2050.62 62.43%): 1.34x speedup gen4 gvim 4356.18 (4422.78 0.70%) -> 3276.31 (3281.69 0.13%): 1.33x speedup gen6 evolution 1242.13 (1245.44 0.72%) -> 953.76 (954.54 0.07%): 1.30x speedup gen6 firefox-planet-gnome 4554.23 (4560.69 0.08%) -> 3758.76 (3768.97 0.28%): 1.21x speedup gen3 firefox-talos-gfx 6264.13 (6284.65 0.30%) -> 5261.56 (5370.87 1.28%): 1.19x speedup gen4 midori-zoomed 4771.13 (4809.90 0.73%) -> 4037.03 (4118.93 0.85%): 1.18x speedup gen6 swfdec-giant-steps 1557.06 (1560.13 0.12%) -> 1336.34 (1341.29 0.32%): 1.17x speedup gen4 firefox-talos-gfx 80767.28 (80986.31 0.17%) -> 69629.08 (69721.71 0.06%): 1.16x speedup gen6 midori-zoomed 1463.70 (1463.76 0.08%) -> 1331.45 (1336.56 0.22%): 1.10x speedup Slowdowns ========= gen6 xfce4-terminal-a1 2030.25 (2036.23 0.25%) -> 2144.60 (2240.31 4.29%): 1.06x slowdown gen4 swfdec-youtube 3580.00 (3597.23 3.92%) -> 3826.90 (3862.24 0.91%): 1.07x slowdown gen4 firefox-talos-svg 66112.25 (66256.51 0.11%) -> 71433.40 (71584.31 0.14%): 1.08x slowdown gen4 gnome-system-monitor 5691.60 (5724.03 0.56%) -> 6707.56 (6747.83 0.33%): 1.18x slowdown gen3 ocitysmap 3494.05 (3502.44 0.20%) -> 4321.99 (4524.42 2.78%): 1.24x slowdown gen4 ocitysmap 3628.42 (3641.66 9.37%) -> 5177.16 (5828.74 8.38%): 1.43x slowdown gen5 ocitysmap 4027.77 (4068.11 0.80%) -> 5748.26 (6282.25 7.38%): 1.43x slowdown gen6 ocitysmap 1401.61 (1402.24 0.40%) -> 2365.74 (2379.14 4.12%): 1.69x slowdown [Note the performance regression for ocitysmap comes from that we now attempt to support rendering to and (more importantly) from large surfaces. By enabling such operations is the only way to one day be faster than purely using the CPU, in the meantime we suffer regression due to the increased migration and aperture thrashing. The other couple of regressions will be eliminated with improved span and shader support, now that the framework for such is in place.] The performance increase for Cairo completely overlooks the other critical aspects of the architecture: World of Padman: gen3 (800x600): 57.5 -> 96.2 gen4 (800x600): 47.8 -> 74.6 gen6 (1366x768): 100.4 -> 140.3 [F15] 144.3 -> 146.4 [drm-intel-next] x11perf (gen6); aa10text: 3.47 -> 14.3 Mglyphs/s [unthrottled!] copywinwin10: 1.66 -> 1.99 Mops/s copywinpix10: 2.28 -> 2.98 Mops/s And we do not have a good measure for how much improvement the reworking of the fallback paths give, except that xterm is now over 4x faster... PS: This depends upon the Xorg patchset "Remove the cacheing of the last scratch PixmapRec" for correct invalidations of scratch Pixmaps (used by the dix to implement SHM operations, used by chromium and gtk+ pixbufs. PPS: ./configure --enable-sna Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/sna/gen4_render.h')
-rw-r--r--src/sna/gen4_render.h2643
1 files changed, 2643 insertions, 0 deletions
diff --git a/src/sna/gen4_render.h b/src/sna/gen4_render.h
new file mode 100644
index 00000000..a014e52f
--- /dev/null
+++ b/src/sna/gen4_render.h
@@ -0,0 +1,2643 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef GEN5_RENDER_H
+#define GEN5_RENDER_H
+
+#define GEN4_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
+ ((Pipeline) << 27) | \
+ ((Opcode) << 24) | \
+ ((Subopcode) << 16))
+
+#define GEN4_URB_FENCE GEN4_3D(0, 0, 0)
+#define GEN4_CS_URB_STATE GEN4_3D(0, 0, 1)
+#define GEN4_CONSTANT_BUFFER GEN4_3D(0, 0, 2)
+#define GEN4_STATE_PREFETCH GEN4_3D(0, 0, 3)
+
+#define GEN4_STATE_BASE_ADDRESS GEN4_3D(0, 1, 1)
+#define GEN4_STATE_SIP GEN4_3D(0, 1, 2)
+#define GEN4_PIPELINE_SELECT GEN4_3D(0, 1, 4)
+
+#define NEW_PIPELINE_SELECT GEN4_3D(1, 1, 4)
+
+#define GEN4_MEDIA_STATE_POINTERS GEN4_3D(2, 0, 0)
+#define GEN4_MEDIA_OBJECT GEN4_3D(2, 1, 0)
+
+#define GEN4_3DSTATE_PIPELINED_POINTERS GEN4_3D(3, 0, 0)
+#define GEN4_3DSTATE_BINDING_TABLE_POINTERS GEN4_3D(3, 0, 1)
+
+#define GEN4_3DSTATE_VERTEX_BUFFERS GEN4_3D(3, 0, 8)
+#define GEN4_3DSTATE_VERTEX_ELEMENTS GEN4_3D(3, 0, 9)
+#define GEN4_3DSTATE_INDEX_BUFFER GEN4_3D(3, 0, 0xa)
+#define GEN4_3DSTATE_VF_STATISTICS GEN4_3D(3, 0, 0xb)
+
+#define GEN4_3DSTATE_DRAWING_RECTANGLE GEN4_3D(3, 1, 0)
+#define GEN4_3DSTATE_CONSTANT_COLOR GEN4_3D(3, 1, 1)
+#define GEN4_3DSTATE_SAMPLER_PALETTE_LOAD GEN4_3D(3, 1, 2)
+#define GEN4_3DSTATE_CHROMA_KEY GEN4_3D(3, 1, 4)
+#define GEN4_3DSTATE_DEPTH_BUFFER GEN4_3D(3, 1, 5)
+# define GEN4_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
+# define GEN4_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
+
+#define GEN4_3DSTATE_POLY_STIPPLE_OFFSET GEN4_3D(3, 1, 6)
+#define GEN4_3DSTATE_POLY_STIPPLE_PATTERN GEN4_3D(3, 1, 7)
+#define GEN4_3DSTATE_LINE_STIPPLE GEN4_3D(3, 1, 8)
+#define GEN4_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN4_3D(3, 1, 9)
+/* These two are BLC and CTG only, not BW or CL */
+#define GEN4_3DSTATE_AA_LINE_PARAMS GEN4_3D(3, 1, 0xa)
+#define GEN4_3DSTATE_GS_SVB_INDEX GEN4_3D(3, 1, 0xb)
+
+#define GEN4_PIPE_CONTROL GEN4_3D(3, 2, 0)
+
+#define GEN4_3DPRIMITIVE GEN4_3D(3, 3, 0)
+
+#define GEN4_3DSTATE_CLEAR_PARAMS GEN4_3D(3, 1, 0x10)
+/* DW1 */
+# define GEN4_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
+
+#define PIPELINE_SELECT_3D 0
+#define PIPELINE_SELECT_MEDIA 1
+
+#define UF0_CS_REALLOC (1 << 13)
+#define UF0_VFE_REALLOC (1 << 12)
+#define UF0_SF_REALLOC (1 << 11)
+#define UF0_CLIP_REALLOC (1 << 10)
+#define UF0_GS_REALLOC (1 << 9)
+#define UF0_VS_REALLOC (1 << 8)
+#define UF1_CLIP_FENCE_SHIFT 20
+#define UF1_GS_FENCE_SHIFT 10
+#define UF1_VS_FENCE_SHIFT 0
+#define UF2_CS_FENCE_SHIFT 20
+#define UF2_VFE_FENCE_SHIFT 10
+#define UF2_SF_FENCE_SHIFT 0
+
+/* for GEN4_STATE_BASE_ADDRESS */
+#define BASE_ADDRESS_MODIFY (1 << 0)
+
+/* for GEN4_3DSTATE_PIPELINED_POINTERS */
+#define GEN4_GS_DISABLE 0
+#define GEN4_GS_ENABLE 1
+#define GEN4_CLIP_DISABLE 0
+#define GEN4_CLIP_ENABLE 1
+
+/* for GEN4_PIPE_CONTROL */
+#define GEN4_PIPE_CONTROL_NOWRITE (0 << 14)
+#define GEN4_PIPE_CONTROL_WRITE_QWORD (1 << 14)
+#define GEN4_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
+#define GEN4_PIPE_CONTROL_WRITE_TIME (3 << 14)
+#define GEN4_PIPE_CONTROL_DEPTH_STALL (1 << 13)
+#define GEN4_PIPE_CONTROL_WC_FLUSH (1 << 12)
+#define GEN4_PIPE_CONTROL_IS_FLUSH (1 << 11)
+#define GEN4_PIPE_CONTROL_TC_FLUSH (1 << 10)
+#define GEN4_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
+#define GEN4_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
+#define GEN4_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
+#define GEN4_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
+
+/* VERTEX_BUFFER_STATE Structure */
+#define VB0_BUFFER_INDEX_SHIFT 27
+#define VB0_VERTEXDATA (0 << 26)
+#define VB0_INSTANCEDATA (1 << 26)
+#define VB0_BUFFER_PITCH_SHIFT 0
+
+/* VERTEX_ELEMENT_STATE Structure */
+#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27
+#define VE0_VALID (1 << 26)
+#define VE0_FORMAT_SHIFT 16
+#define VE0_OFFSET_SHIFT 0
+#define VE1_VFCOMPONENT_0_SHIFT 28
+#define VE1_VFCOMPONENT_1_SHIFT 24
+#define VE1_VFCOMPONENT_2_SHIFT 20
+#define VE1_VFCOMPONENT_3_SHIFT 16
+#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
+
+/* 3DPRIMITIVE bits */
+#define GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
+#define GEN4_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
+/* Primitive types are in gen4_defines.h */
+#define GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT 10
+
+#define GEN4_SVG_CTL 0x7400
+
+#define GEN4_SVG_CTL_GS_BA (0 << 8)
+#define GEN4_SVG_CTL_SS_BA (1 << 8)
+#define GEN4_SVG_CTL_IO_BA (2 << 8)
+#define GEN4_SVG_CTL_GS_AUB (3 << 8)
+#define GEN4_SVG_CTL_IO_AUB (4 << 8)
+#define GEN4_SVG_CTL_SIP (5 << 8)
+
+#define GEN4_SVG_RDATA 0x7404
+#define GEN4_SVG_WORK_CTL 0x7408
+
+#define GEN4_VF_CTL 0x7500
+
+#define GEN4_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN4_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
+#define GEN4_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
+#define GEN4_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
+#define GEN4_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
+#define GEN4_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
+#define GEN4_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
+#define GEN4_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
+#define GEN4_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN4_VF_STRG_VAL 0x7504
+#define GEN4_VF_STR_VL_OVR 0x7508
+#define GEN4_VF_VC_OVR 0x750c
+#define GEN4_VF_STR_PSKIP 0x7510
+#define GEN4_VF_MAX_PRIM 0x7514
+#define GEN4_VF_RDATA 0x7518
+
+#define GEN4_VS_CTL 0x7600
+#define GEN4_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN4_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
+#define GEN4_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
+#define GEN4_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
+#define GEN4_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
+#define GEN4_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN4_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN4_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN4_VS_STRG_VAL 0x7604
+#define GEN4_VS_RDATA 0x7608
+
+#define GEN4_SF_CTL 0x7b00
+#define GEN4_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
+#define GEN4_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
+#define GEN4_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
+#define GEN4_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
+#define GEN4_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN4_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN4_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN4_SF_STRG_VAL 0x7b04
+#define GEN4_SF_RDATA 0x7b18
+
+#define GEN4_WIZ_CTL 0x7c00
+#define GEN4_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN4_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
+#define GEN4_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
+#define GEN4_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
+#define GEN4_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
+#define GEN4_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
+#define GEN4_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
+#define GEN4_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
+#define GEN4_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
+#define GEN4_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
+#define GEN4_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
+#define GEN4_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN4_WIZ_STRG_VAL 0x7c04
+#define GEN4_WIZ_RDATA 0x7c18
+
+#define GEN4_TS_CTL 0x7e00
+#define GEN4_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
+#define GEN4_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
+#define GEN4_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
+#define GEN4_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
+#define GEN4_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
+#define GEN4_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
+
+#define GEN4_TS_STRG_VAL 0x7e04
+#define GEN4_TS_RDATA 0x7e08
+
+#define GEN4_TD_CTL 0x8000
+#define GEN4_TD_CTL_MUX_SHIFT 8
+#define GEN4_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
+#define GEN4_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
+#define GEN4_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
+#define GEN4_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
+#define GEN4_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
+#define GEN4_TD_CTL2 0x8004
+#define GEN4_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
+#define GEN4_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
+#define GEN4_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
+#define GEN4_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
+#define GEN4_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
+#define GEN4_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
+#define GEN4_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
+#define GEN4_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
+#define GEN4_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
+#define GEN4_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
+#define GEN4_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
+#define GEN4_TD_VF_VS_EMSK 0x8008
+#define GEN4_TD_GS_EMSK 0x800c
+#define GEN4_TD_CLIP_EMSK 0x8010
+#define GEN4_TD_SF_EMSK 0x8014
+#define GEN4_TD_WIZ_EMSK 0x8018
+#define GEN4_TD_0_6_EHTRG_VAL 0x801c
+#define GEN4_TD_0_7_EHTRG_VAL 0x8020
+#define GEN4_TD_0_6_EHTRG_MSK 0x8024
+#define GEN4_TD_0_7_EHTRG_MSK 0x8028
+#define GEN4_TD_RDATA 0x802c
+#define GEN4_TD_TS_EMSK 0x8030
+
+#define GEN4_EU_CTL 0x8800
+#define GEN4_EU_CTL_SELECT_SHIFT 16
+#define GEN4_EU_CTL_DATA_MUX_SHIFT 8
+#define GEN4_EU_ATT_0 0x8810
+#define GEN4_EU_ATT_1 0x8814
+#define GEN4_EU_ATT_DATA_0 0x8820
+#define GEN4_EU_ATT_DATA_1 0x8824
+#define GEN4_EU_ATT_CLR_0 0x8830
+#define GEN4_EU_ATT_CLR_1 0x8834
+#define GEN4_EU_RDATA 0x8840
+
+/* 3D state:
+ */
+#define _3DOP_3DSTATE_PIPELINED 0x0
+#define _3DOP_3DSTATE_NONPIPELINED 0x1
+#define _3DOP_3DCONTROL 0x2
+#define _3DOP_3DPRIMITIVE 0x3
+
+#define _3DSTATE_PIPELINED_POINTERS 0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
+#define _3DSTATE_VERTEX_BUFFERS 0x08
+#define _3DSTATE_VERTEX_ELEMENTS 0x09
+#define _3DSTATE_INDEX_BUFFER 0x0A
+#define _3DSTATE_VF_STATISTICS 0x0B
+#define _3DSTATE_DRAWING_RECTANGLE 0x00
+#define _3DSTATE_CONSTANT_COLOR 0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
+#define _3DSTATE_CHROMA_KEY 0x04
+#define _3DSTATE_DEPTH_BUFFER 0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
+#define _3DSTATE_LINE_STIPPLE 0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
+#define _3DCONTROL 0x00
+#define _3DPRIMITIVE 0x00
+
+#define _3DPRIM_POINTLIST 0x01
+#define _3DPRIM_LINELIST 0x02
+#define _3DPRIM_LINESTRIP 0x03
+#define _3DPRIM_TRILIST 0x04
+#define _3DPRIM_TRISTRIP 0x05
+#define _3DPRIM_TRIFAN 0x06
+#define _3DPRIM_QUADLIST 0x07
+#define _3DPRIM_QUADSTRIP 0x08
+#define _3DPRIM_LINELIST_ADJ 0x09
+#define _3DPRIM_LINESTRIP_ADJ 0x0A
+#define _3DPRIM_TRILIST_ADJ 0x0B
+#define _3DPRIM_TRISTRIP_ADJ 0x0C
+#define _3DPRIM_TRISTRIP_REVERSE 0x0D
+#define _3DPRIM_POLYGON 0x0E
+#define _3DPRIM_RECTLIST 0x0F
+#define _3DPRIM_LINELOOP 0x10
+#define _3DPRIM_POINTLIST_BF 0x11
+#define _3DPRIM_LINESTRIP_CONT 0x12
+#define _3DPRIM_LINESTRIP_BF 0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
+
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
+
+#define GEN4_ANISORATIO_2 0
+#define GEN4_ANISORATIO_4 1
+#define GEN4_ANISORATIO_6 2
+#define GEN4_ANISORATIO_8 3
+#define GEN4_ANISORATIO_10 4
+#define GEN4_ANISORATIO_12 5
+#define GEN4_ANISORATIO_14 6
+#define GEN4_ANISORATIO_16 7
+
+#define GEN4_BLENDFACTOR_ONE 0x1
+#define GEN4_BLENDFACTOR_SRC_COLOR 0x2
+#define GEN4_BLENDFACTOR_SRC_ALPHA 0x3
+#define GEN4_BLENDFACTOR_DST_ALPHA 0x4
+#define GEN4_BLENDFACTOR_DST_COLOR 0x5
+#define GEN4_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
+#define GEN4_BLENDFACTOR_CONST_COLOR 0x7
+#define GEN4_BLENDFACTOR_CONST_ALPHA 0x8
+#define GEN4_BLENDFACTOR_SRC1_COLOR 0x9
+#define GEN4_BLENDFACTOR_SRC1_ALPHA 0x0A
+#define GEN4_BLENDFACTOR_ZERO 0x11
+#define GEN4_BLENDFACTOR_INV_SRC_COLOR 0x12
+#define GEN4_BLENDFACTOR_INV_SRC_ALPHA 0x13
+#define GEN4_BLENDFACTOR_INV_DST_ALPHA 0x14
+#define GEN4_BLENDFACTOR_INV_DST_COLOR 0x15
+#define GEN4_BLENDFACTOR_INV_CONST_COLOR 0x17
+#define GEN4_BLENDFACTOR_INV_CONST_ALPHA 0x18
+#define GEN4_BLENDFACTOR_INV_SRC1_COLOR 0x19
+#define GEN4_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
+
+#define GEN4_BLENDFUNCTION_ADD 0
+#define GEN4_BLENDFUNCTION_SUBTRACT 1
+#define GEN4_BLENDFUNCTION_REVERSE_SUBTRACT 2
+#define GEN4_BLENDFUNCTION_MIN 3
+#define GEN4_BLENDFUNCTION_MAX 4
+
+#define GEN4_ALPHATEST_FORMAT_UNORM8 0
+#define GEN4_ALPHATEST_FORMAT_FLOAT32 1
+
+#define GEN4_CHROMAKEY_KILL_ON_ANY_MATCH 0
+#define GEN4_CHROMAKEY_REPLACE_BLACK 1
+
+#define GEN4_CLIP_API_OGL 0
+#define GEN4_CLIP_API_DX 1
+
+#define GEN4_CLIPMODE_NORMAL 0
+#define GEN4_CLIPMODE_CLIP_ALL 1
+#define GEN4_CLIPMODE_CLIP_NON_REJECTED 2
+#define GEN4_CLIPMODE_REJECT_ALL 3
+#define GEN4_CLIPMODE_ACCEPT_ALL 4
+
+#define GEN4_CLIP_NDCSPACE 0
+#define GEN4_CLIP_SCREENSPACE 1
+
+#define GEN4_COMPAREFUNCTION_ALWAYS 0
+#define GEN4_COMPAREFUNCTION_NEVER 1
+#define GEN4_COMPAREFUNCTION_LESS 2
+#define GEN4_COMPAREFUNCTION_EQUAL 3
+#define GEN4_COMPAREFUNCTION_LEQUAL 4
+#define GEN4_COMPAREFUNCTION_GREATER 5
+#define GEN4_COMPAREFUNCTION_NOTEQUAL 6
+#define GEN4_COMPAREFUNCTION_GEQUAL 7
+
+#define GEN4_COVERAGE_PIXELS_HALF 0
+#define GEN4_COVERAGE_PIXELS_1 1
+#define GEN4_COVERAGE_PIXELS_2 2
+#define GEN4_COVERAGE_PIXELS_4 3
+
+#define GEN4_CULLMODE_BOTH 0
+#define GEN4_CULLMODE_NONE 1
+#define GEN4_CULLMODE_FRONT 2
+#define GEN4_CULLMODE_BACK 3
+
+#define GEN4_DEFAULTCOLOR_R8G8B8A8_UNORM 0
+#define GEN4_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
+
+#define GEN4_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
+#define GEN4_DEPTHFORMAT_D32_FLOAT 1
+#define GEN4_DEPTHFORMAT_D24_UNORM_S8_UINT 2
+#define GEN4_DEPTHFORMAT_D16_UNORM 5
+
+#define GEN4_FLOATING_POINT_IEEE_754 0
+#define GEN4_FLOATING_POINT_NON_IEEE_754 1
+
+#define GEN4_FRONTWINDING_CW 0
+#define GEN4_FRONTWINDING_CCW 1
+
+#define GEN4_INDEX_BYTE 0
+#define GEN4_INDEX_WORD 1
+#define GEN4_INDEX_DWORD 2
+
+#define GEN4_LOGICOPFUNCTION_CLEAR 0
+#define GEN4_LOGICOPFUNCTION_NOR 1
+#define GEN4_LOGICOPFUNCTION_AND_INVERTED 2
+#define GEN4_LOGICOPFUNCTION_COPY_INVERTED 3
+#define GEN4_LOGICOPFUNCTION_AND_REVERSE 4
+#define GEN4_LOGICOPFUNCTION_INVERT 5
+#define GEN4_LOGICOPFUNCTION_XOR 6
+#define GEN4_LOGICOPFUNCTION_NAND 7
+#define GEN4_LOGICOPFUNCTION_AND 8
+#define GEN4_LOGICOPFUNCTION_EQUIV 9
+#define GEN4_LOGICOPFUNCTION_NOOP 10
+#define GEN4_LOGICOPFUNCTION_OR_INVERTED 11
+#define GEN4_LOGICOPFUNCTION_COPY 12
+#define GEN4_LOGICOPFUNCTION_OR_REVERSE 13
+#define GEN4_LOGICOPFUNCTION_OR 14
+#define GEN4_LOGICOPFUNCTION_SET 15
+
+#define GEN4_MAPFILTER_NEAREST 0x0
+#define GEN4_MAPFILTER_LINEAR 0x1
+#define GEN4_MAPFILTER_ANISOTROPIC 0x2
+
+#define GEN4_MIPFILTER_NONE 0
+#define GEN4_MIPFILTER_NEAREST 1
+#define GEN4_MIPFILTER_LINEAR 3
+
+#define GEN4_POLYGON_FRONT_FACING 0
+#define GEN4_POLYGON_BACK_FACING 1
+
+#define GEN4_PREFILTER_ALWAYS 0x0
+#define GEN4_PREFILTER_NEVER 0x1
+#define GEN4_PREFILTER_LESS 0x2
+#define GEN4_PREFILTER_EQUAL 0x3
+#define GEN4_PREFILTER_LEQUAL 0x4
+#define GEN4_PREFILTER_GREATER 0x5
+#define GEN4_PREFILTER_NOTEQUAL 0x6
+#define GEN4_PREFILTER_GEQUAL 0x7
+
+#define GEN4_PROVOKING_VERTEX_0 0
+#define GEN4_PROVOKING_VERTEX_1 1
+#define GEN4_PROVOKING_VERTEX_2 2
+
+#define GEN4_RASTRULE_UPPER_LEFT 0
+#define GEN4_RASTRULE_UPPER_RIGHT 1
+
+#define GEN4_RENDERTARGET_CLAMPRANGE_UNORM 0
+#define GEN4_RENDERTARGET_CLAMPRANGE_SNORM 1
+#define GEN4_RENDERTARGET_CLAMPRANGE_FORMAT 2
+
+#define GEN4_STENCILOP_KEEP 0
+#define GEN4_STENCILOP_ZERO 1
+#define GEN4_STENCILOP_REPLACE 2
+#define GEN4_STENCILOP_INCRSAT 3
+#define GEN4_STENCILOP_DECRSAT 4
+#define GEN4_STENCILOP_INCR 5
+#define GEN4_STENCILOP_DECR 6
+#define GEN4_STENCILOP_INVERT 7
+
+#define GEN4_SURFACE_MIPMAPLAYOUT_BELOW 0
+#define GEN4_SURFACE_MIPMAPLAYOUT_RIGHT 1
+
+#define GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
+#define GEN4_SURFACEFORMAT_R32G32B32A32_SINT 0x001
+#define GEN4_SURFACEFORMAT_R32G32B32A32_UINT 0x002
+#define GEN4_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
+#define GEN4_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
+#define GEN4_SURFACEFORMAT_R64G64_FLOAT 0x005
+#define GEN4_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
+#define GEN4_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
+#define GEN4_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
+#define GEN4_SURFACEFORMAT_R32G32B32_FLOAT 0x040
+#define GEN4_SURFACEFORMAT_R32G32B32_SINT 0x041
+#define GEN4_SURFACEFORMAT_R32G32B32_UINT 0x042
+#define GEN4_SURFACEFORMAT_R32G32B32_UNORM 0x043
+#define GEN4_SURFACEFORMAT_R32G32B32_SNORM 0x044
+#define GEN4_SURFACEFORMAT_R32G32B32_SSCALED 0x045
+#define GEN4_SURFACEFORMAT_R32G32B32_USCALED 0x046
+#define GEN4_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
+#define GEN4_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
+#define GEN4_SURFACEFORMAT_R16G16B16A16_SINT 0x082
+#define GEN4_SURFACEFORMAT_R16G16B16A16_UINT 0x083
+#define GEN4_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
+#define GEN4_SURFACEFORMAT_R32G32_FLOAT 0x085
+#define GEN4_SURFACEFORMAT_R32G32_SINT 0x086
+#define GEN4_SURFACEFORMAT_R32G32_UINT 0x087
+#define GEN4_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
+#define GEN4_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
+#define GEN4_SURFACEFORMAT_L32A32_FLOAT 0x08A
+#define GEN4_SURFACEFORMAT_R32G32_UNORM 0x08B
+#define GEN4_SURFACEFORMAT_R32G32_SNORM 0x08C
+#define GEN4_SURFACEFORMAT_R64_FLOAT 0x08D
+#define GEN4_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
+#define GEN4_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
+#define GEN4_SURFACEFORMAT_A32X32_FLOAT 0x090
+#define GEN4_SURFACEFORMAT_L32X32_FLOAT 0x091
+#define GEN4_SURFACEFORMAT_I32X32_FLOAT 0x092
+#define GEN4_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
+#define GEN4_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
+#define GEN4_SURFACEFORMAT_R32G32_SSCALED 0x095
+#define GEN4_SURFACEFORMAT_R32G32_USCALED 0x096
+#define GEN4_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
+#define GEN4_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
+#define GEN4_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
+#define GEN4_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
+#define GEN4_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
+#define GEN4_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
+#define GEN4_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
+#define GEN4_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
+#define GEN4_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
+#define GEN4_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
+#define GEN4_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
+#define GEN4_SURFACEFORMAT_R16G16_UNORM 0x0CC
+#define GEN4_SURFACEFORMAT_R16G16_SNORM 0x0CD
+#define GEN4_SURFACEFORMAT_R16G16_SINT 0x0CE
+#define GEN4_SURFACEFORMAT_R16G16_UINT 0x0CF
+#define GEN4_SURFACEFORMAT_R16G16_FLOAT 0x0D0
+#define GEN4_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
+#define GEN4_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
+#define GEN4_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
+#define GEN4_SURFACEFORMAT_R32_SINT 0x0D6
+#define GEN4_SURFACEFORMAT_R32_UINT 0x0D7
+#define GEN4_SURFACEFORMAT_R32_FLOAT 0x0D8
+#define GEN4_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
+#define GEN4_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
+#define GEN4_SURFACEFORMAT_L16A16_UNORM 0x0DF
+#define GEN4_SURFACEFORMAT_I24X8_UNORM 0x0E0
+#define GEN4_SURFACEFORMAT_L24X8_UNORM 0x0E1
+#define GEN4_SURFACEFORMAT_A24X8_UNORM 0x0E2
+#define GEN4_SURFACEFORMAT_I32_FLOAT 0x0E3
+#define GEN4_SURFACEFORMAT_L32_FLOAT 0x0E4
+#define GEN4_SURFACEFORMAT_A32_FLOAT 0x0E5
+#define GEN4_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
+#define GEN4_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
+#define GEN4_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
+#define GEN4_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
+#define GEN4_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
+#define GEN4_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
+#define GEN4_SURFACEFORMAT_L16A16_FLOAT 0x0F0
+#define GEN4_SURFACEFORMAT_R32_UNORM 0x0F1
+#define GEN4_SURFACEFORMAT_R32_SNORM 0x0F2
+#define GEN4_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
+#define GEN4_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
+#define GEN4_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
+#define GEN4_SURFACEFORMAT_R16G16_SSCALED 0x0F6
+#define GEN4_SURFACEFORMAT_R16G16_USCALED 0x0F7
+#define GEN4_SURFACEFORMAT_R32_SSCALED 0x0F8
+#define GEN4_SURFACEFORMAT_R32_USCALED 0x0F9
+#define GEN4_SURFACEFORMAT_B5G6R5_UNORM 0x100
+#define GEN4_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
+#define GEN4_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
+#define GEN4_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
+#define GEN4_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
+#define GEN4_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
+#define GEN4_SURFACEFORMAT_R8G8_UNORM 0x106
+#define GEN4_SURFACEFORMAT_R8G8_SNORM 0x107
+#define GEN4_SURFACEFORMAT_R8G8_SINT 0x108
+#define GEN4_SURFACEFORMAT_R8G8_UINT 0x109
+#define GEN4_SURFACEFORMAT_R16_UNORM 0x10A
+#define GEN4_SURFACEFORMAT_R16_SNORM 0x10B
+#define GEN4_SURFACEFORMAT_R16_SINT 0x10C
+#define GEN4_SURFACEFORMAT_R16_UINT 0x10D
+#define GEN4_SURFACEFORMAT_R16_FLOAT 0x10E
+#define GEN4_SURFACEFORMAT_I16_UNORM 0x111
+#define GEN4_SURFACEFORMAT_L16_UNORM 0x112
+#define GEN4_SURFACEFORMAT_A16_UNORM 0x113
+#define GEN4_SURFACEFORMAT_L8A8_UNORM 0x114
+#define GEN4_SURFACEFORMAT_I16_FLOAT 0x115
+#define GEN4_SURFACEFORMAT_L16_FLOAT 0x116
+#define GEN4_SURFACEFORMAT_A16_FLOAT 0x117
+#define GEN4_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
+#define GEN4_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
+#define GEN4_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
+#define GEN4_SURFACEFORMAT_R8G8_SSCALED 0x11C
+#define GEN4_SURFACEFORMAT_R8G8_USCALED 0x11D
+#define GEN4_SURFACEFORMAT_R16_SSCALED 0x11E
+#define GEN4_SURFACEFORMAT_R16_USCALED 0x11F
+#define GEN4_SURFACEFORMAT_R8_UNORM 0x140
+#define GEN4_SURFACEFORMAT_R8_SNORM 0x141
+#define GEN4_SURFACEFORMAT_R8_SINT 0x142
+#define GEN4_SURFACEFORMAT_R8_UINT 0x143
+#define GEN4_SURFACEFORMAT_A8_UNORM 0x144
+#define GEN4_SURFACEFORMAT_I8_UNORM 0x145
+#define GEN4_SURFACEFORMAT_L8_UNORM 0x146
+#define GEN4_SURFACEFORMAT_P4A4_UNORM 0x147
+#define GEN4_SURFACEFORMAT_A4P4_UNORM 0x148
+#define GEN4_SURFACEFORMAT_R8_SSCALED 0x149
+#define GEN4_SURFACEFORMAT_R8_USCALED 0x14A
+#define GEN4_SURFACEFORMAT_R1_UINT 0x181
+#define GEN4_SURFACEFORMAT_YCRCB_NORMAL 0x182
+#define GEN4_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
+#define GEN4_SURFACEFORMAT_BC1_UNORM 0x186
+#define GEN4_SURFACEFORMAT_BC2_UNORM 0x187
+#define GEN4_SURFACEFORMAT_BC3_UNORM 0x188
+#define GEN4_SURFACEFORMAT_BC4_UNORM 0x189
+#define GEN4_SURFACEFORMAT_BC5_UNORM 0x18A
+#define GEN4_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
+#define GEN4_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
+#define GEN4_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
+#define GEN4_SURFACEFORMAT_MONO8 0x18E
+#define GEN4_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
+#define GEN4_SURFACEFORMAT_YCRCB_SWAPY 0x190
+#define GEN4_SURFACEFORMAT_DXT1_RGB 0x191
+#define GEN4_SURFACEFORMAT_FXT1 0x192
+#define GEN4_SURFACEFORMAT_R8G8B8_UNORM 0x193
+#define GEN4_SURFACEFORMAT_R8G8B8_SNORM 0x194
+#define GEN4_SURFACEFORMAT_R8G8B8_SSCALED 0x195
+#define GEN4_SURFACEFORMAT_R8G8B8_USCALED 0x196
+#define GEN4_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
+#define GEN4_SURFACEFORMAT_R64G64B64_FLOAT 0x198
+#define GEN4_SURFACEFORMAT_BC4_SNORM 0x199
+#define GEN4_SURFACEFORMAT_BC5_SNORM 0x19A
+#define GEN4_SURFACEFORMAT_R16G16B16_UNORM 0x19C
+#define GEN4_SURFACEFORMAT_R16G16B16_SNORM 0x19D
+#define GEN4_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
+#define GEN4_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+
+#define GEN4_SURFACERETURNFORMAT_FLOAT32 0
+#define GEN4_SURFACERETURNFORMAT_S1 1
+
+#define GEN4_SURFACE_1D 0
+#define GEN4_SURFACE_2D 1
+#define GEN4_SURFACE_3D 2
+#define GEN4_SURFACE_CUBE 3
+#define GEN4_SURFACE_BUFFER 4
+#define GEN4_SURFACE_NULL 7
+
+#define GEN4_BORDER_COLOR_MODE_DEFAULT 0
+#define GEN4_BORDER_COLOR_MODE_LEGACY 1
+
+#define GEN4_TEXCOORDMODE_WRAP 0
+#define GEN4_TEXCOORDMODE_MIRROR 1
+#define GEN4_TEXCOORDMODE_CLAMP 2
+#define GEN4_TEXCOORDMODE_CUBE 3
+#define GEN4_TEXCOORDMODE_CLAMP_BORDER 4
+#define GEN4_TEXCOORDMODE_MIRROR_ONCE 5
+
+#define GEN4_THREAD_PRIORITY_NORMAL 0
+#define GEN4_THREAD_PRIORITY_HIGH 1
+
+#define GEN4_TILEWALK_XMAJOR 0
+#define GEN4_TILEWALK_YMAJOR 1
+
+#define GEN4_VERTEX_SUBPIXEL_PRECISION_8BITS 0
+#define GEN4_VERTEX_SUBPIXEL_PRECISION_4BITS 1
+
+#define GEN4_VERTEXBUFFER_ACCESS_VERTEXDATA 0
+#define GEN4_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
+
+#define GEN4_VFCOMPONENT_NOSTORE 0
+#define GEN4_VFCOMPONENT_STORE_SRC 1
+#define GEN4_VFCOMPONENT_STORE_0 2
+#define GEN4_VFCOMPONENT_STORE_1_FLT 3
+#define GEN4_VFCOMPONENT_STORE_1_INT 4
+#define GEN4_VFCOMPONENT_STORE_VID 5
+#define GEN4_VFCOMPONENT_STORE_IID 6
+#define GEN4_VFCOMPONENT_STORE_PID 7
+
+
+
+/* Execution Unit (EU) defines
+ */
+
+#define GEN4_ALIGN_1 0
+#define GEN4_ALIGN_16 1
+
+#define GEN4_ADDRESS_DIRECT 0
+#define GEN4_ADDRESS_REGISTER_INDIRECT_REGISTER 1
+
+#define GEN4_CHANNEL_X 0
+#define GEN4_CHANNEL_Y 1
+#define GEN4_CHANNEL_Z 2
+#define GEN4_CHANNEL_W 3
+
+#define GEN4_COMPRESSION_NONE 0
+#define GEN4_COMPRESSION_2NDHALF 1
+#define GEN4_COMPRESSION_COMPRESSED 2
+
+#define GEN4_CONDITIONAL_NONE 0
+#define GEN4_CONDITIONAL_Z 1
+#define GEN4_CONDITIONAL_NZ 2
+#define GEN4_CONDITIONAL_EQ 1 /* Z */
+#define GEN4_CONDITIONAL_NEQ 2 /* NZ */
+#define GEN4_CONDITIONAL_G 3
+#define GEN4_CONDITIONAL_GE 4
+#define GEN4_CONDITIONAL_L 5
+#define GEN4_CONDITIONAL_LE 6
+#define GEN4_CONDITIONAL_C 7
+#define GEN4_CONDITIONAL_O 8
+
+#define GEN4_DEBUG_NONE 0
+#define GEN4_DEBUG_BREAKPOINT 1
+
+#define GEN4_DEPENDENCY_NORMAL 0
+#define GEN4_DEPENDENCY_NOTCLEARED 1
+#define GEN4_DEPENDENCY_NOTCHECKED 2
+#define GEN4_DEPENDENCY_DISABLE 3
+
+#define GEN4_EXECUTE_1 0
+#define GEN4_EXECUTE_2 1
+#define GEN4_EXECUTE_4 2
+#define GEN4_EXECUTE_8 3
+#define GEN4_EXECUTE_16 4
+#define GEN4_EXECUTE_32 5
+
+#define GEN4_HORIZONTAL_STRIDE_0 0
+#define GEN4_HORIZONTAL_STRIDE_1 1
+#define GEN4_HORIZONTAL_STRIDE_2 2
+#define GEN4_HORIZONTAL_STRIDE_4 3
+
+#define GEN4_INSTRUCTION_NORMAL 0
+#define GEN4_INSTRUCTION_SATURATE 1
+
+#define GEN4_MASK_ENABLE 0
+#define GEN4_MASK_DISABLE 1
+
+#define GEN4_OPCODE_MOV 1
+#define GEN4_OPCODE_SEL 2
+#define GEN4_OPCODE_NOT 4
+#define GEN4_OPCODE_AND 5
+#define GEN4_OPCODE_OR 6
+#define GEN4_OPCODE_XOR 7
+#define GEN4_OPCODE_SHR 8
+#define GEN4_OPCODE_SHL 9
+#define GEN4_OPCODE_RSR 10
+#define GEN4_OPCODE_RSL 11
+#define GEN4_OPCODE_ASR 12
+#define GEN4_OPCODE_CMP 16
+#define GEN4_OPCODE_JMPI 32
+#define GEN4_OPCODE_IF 34
+#define GEN4_OPCODE_IFF 35
+#define GEN4_OPCODE_ELSE 36
+#define GEN4_OPCODE_ENDIF 37
+#define GEN4_OPCODE_DO 38
+#define GEN4_OPCODE_WHILE 39
+#define GEN4_OPCODE_BREAK 40
+#define GEN4_OPCODE_CONTINUE 41
+#define GEN4_OPCODE_HALT 42
+#define GEN4_OPCODE_MSAVE 44
+#define GEN4_OPCODE_MRESTORE 45
+#define GEN4_OPCODE_PUSH 46
+#define GEN4_OPCODE_POP 47
+#define GEN4_OPCODE_WAIT 48
+#define GEN4_OPCODE_SEND 49
+#define GEN4_OPCODE_ADD 64
+#define GEN4_OPCODE_MUL 65
+#define GEN4_OPCODE_AVG 66
+#define GEN4_OPCODE_FRC 67
+#define GEN4_OPCODE_RNDU 68
+#define GEN4_OPCODE_RNDD 69
+#define GEN4_OPCODE_RNDE 70
+#define GEN4_OPCODE_RNDZ 71
+#define GEN4_OPCODE_MAC 72
+#define GEN4_OPCODE_MACH 73
+#define GEN4_OPCODE_LZD 74
+#define GEN4_OPCODE_SAD2 80
+#define GEN4_OPCODE_SADA2 81
+#define GEN4_OPCODE_DP4 84
+#define GEN4_OPCODE_DPH 85
+#define GEN4_OPCODE_DP3 86
+#define GEN4_OPCODE_DP2 87
+#define GEN4_OPCODE_DPA2 88
+#define GEN4_OPCODE_LINE 89
+#define GEN4_OPCODE_NOP 126
+
+#define GEN4_PREDICATE_NONE 0
+#define GEN4_PREDICATE_NORMAL 1
+#define GEN4_PREDICATE_ALIGN1_ANYV 2
+#define GEN4_PREDICATE_ALIGN1_ALLV 3
+#define GEN4_PREDICATE_ALIGN1_ANY2H 4
+#define GEN4_PREDICATE_ALIGN1_ALL2H 5
+#define GEN4_PREDICATE_ALIGN1_ANY4H 6
+#define GEN4_PREDICATE_ALIGN1_ALL4H 7
+#define GEN4_PREDICATE_ALIGN1_ANY8H 8
+#define GEN4_PREDICATE_ALIGN1_ALL8H 9
+#define GEN4_PREDICATE_ALIGN1_ANY16H 10
+#define GEN4_PREDICATE_ALIGN1_ALL16H 11
+#define GEN4_PREDICATE_ALIGN16_REPLICATE_X 2
+#define GEN4_PREDICATE_ALIGN16_REPLICATE_Y 3
+#define GEN4_PREDICATE_ALIGN16_REPLICATE_Z 4
+#define GEN4_PREDICATE_ALIGN16_REPLICATE_W 5
+#define GEN4_PREDICATE_ALIGN16_ANY4H 6
+#define GEN4_PREDICATE_ALIGN16_ALL4H 7
+
+#define GEN4_ARCHITECTURE_REGISTER_FILE 0
+#define GEN4_GENERAL_REGISTER_FILE 1
+#define GEN4_MESSAGE_REGISTER_FILE 2
+#define GEN4_IMMEDIATE_VALUE 3
+
+#define GEN4_REGISTER_TYPE_UD 0
+#define GEN4_REGISTER_TYPE_D 1
+#define GEN4_REGISTER_TYPE_UW 2
+#define GEN4_REGISTER_TYPE_W 3
+#define GEN4_REGISTER_TYPE_UB 4
+#define GEN4_REGISTER_TYPE_B 5
+#define GEN4_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
+#define GEN4_REGISTER_TYPE_HF 6
+#define GEN4_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
+#define GEN4_REGISTER_TYPE_F 7
+
+#define GEN4_ARF_NULL 0x00
+#define GEN4_ARF_ADDRESS 0x10
+#define GEN4_ARF_ACCUMULATOR 0x20
+#define GEN4_ARF_FLAG 0x30
+#define GEN4_ARF_MASK 0x40
+#define GEN4_ARF_MASK_STACK 0x50
+#define GEN4_ARF_MASK_STACK_DEPTH 0x60
+#define GEN4_ARF_STATE 0x70
+#define GEN4_ARF_CONTROL 0x80
+#define GEN4_ARF_NOTIFICATION_COUNT 0x90
+#define GEN4_ARF_IP 0xA0
+
+#define GEN4_AMASK 0
+#define GEN4_IMASK 1
+#define GEN4_LMASK 2
+#define GEN4_CMASK 3
+
+
+
+#define GEN4_THREAD_NORMAL 0
+#define GEN4_THREAD_ATOMIC 1
+#define GEN4_THREAD_SWITCH 2
+
+#define GEN4_VERTICAL_STRIDE_0 0
+#define GEN4_VERTICAL_STRIDE_1 1
+#define GEN4_VERTICAL_STRIDE_2 2
+#define GEN4_VERTICAL_STRIDE_4 3
+#define GEN4_VERTICAL_STRIDE_8 4
+#define GEN4_VERTICAL_STRIDE_16 5
+#define GEN4_VERTICAL_STRIDE_32 6
+#define GEN4_VERTICAL_STRIDE_64 7
+#define GEN4_VERTICAL_STRIDE_128 8
+#define GEN4_VERTICAL_STRIDE_256 9
+#define GEN4_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
+
+#define GEN4_WIDTH_1 0
+#define GEN4_WIDTH_2 1
+#define GEN4_WIDTH_4 2
+#define GEN4_WIDTH_8 3
+#define GEN4_WIDTH_16 4
+
+#define GEN4_STATELESS_BUFFER_BOUNDARY_1K 0
+#define GEN4_STATELESS_BUFFER_BOUNDARY_2K 1
+#define GEN4_STATELESS_BUFFER_BOUNDARY_4K 2
+#define GEN4_STATELESS_BUFFER_BOUNDARY_8K 3
+#define GEN4_STATELESS_BUFFER_BOUNDARY_16K 4
+#define GEN4_STATELESS_BUFFER_BOUNDARY_32K 5
+#define GEN4_STATELESS_BUFFER_BOUNDARY_64K 6
+#define GEN4_STATELESS_BUFFER_BOUNDARY_128K 7
+#define GEN4_STATELESS_BUFFER_BOUNDARY_256K 8
+#define GEN4_STATELESS_BUFFER_BOUNDARY_512K 9
+#define GEN4_STATELESS_BUFFER_BOUNDARY_1M 10
+#define GEN4_STATELESS_BUFFER_BOUNDARY_2M 11
+
+#define GEN4_POLYGON_FACING_FRONT 0
+#define GEN4_POLYGON_FACING_BACK 1
+
+#define GEN4_MESSAGE_TARGET_NULL 0
+#define GEN4_MESSAGE_TARGET_MATH 1
+#define GEN4_MESSAGE_TARGET_SAMPLER 2
+#define GEN4_MESSAGE_TARGET_GATEWAY 3
+#define GEN4_MESSAGE_TARGET_DATAPORT_READ 4
+#define GEN4_MESSAGE_TARGET_DATAPORT_WRITE 5
+#define GEN4_MESSAGE_TARGET_URB 6
+#define GEN4_MESSAGE_TARGET_THREAD_SPAWNER 7
+
+#define GEN4_SAMPLER_RETURN_FORMAT_FLOAT32 0
+#define GEN4_SAMPLER_RETURN_FORMAT_UINT32 2
+#define GEN4_SAMPLER_RETURN_FORMAT_SINT32 3
+
+#define GEN4_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
+#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
+#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
+#define GEN4_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
+#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
+#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
+#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
+#define GEN4_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
+#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
+#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
+#define GEN4_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
+#define GEN4_SAMPLER_MESSAGE_SIMD8_RESINFO 2
+#define GEN4_SAMPLER_MESSAGE_SIMD16_RESINFO 2
+#define GEN4_SAMPLER_MESSAGE_SIMD4X2_LD 3
+#define GEN4_SAMPLER_MESSAGE_SIMD8_LD 3
+#define GEN4_SAMPLER_MESSAGE_SIMD16_LD 3
+
+#define GEN4_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
+#define GEN4_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
+#define GEN4_DATAPORT_OWORD_BLOCK_2_OWORDS 2
+#define GEN4_DATAPORT_OWORD_BLOCK_4_OWORDS 3
+#define GEN4_DATAPORT_OWORD_BLOCK_8_OWORDS 4
+
+#define GEN4_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
+#define GEN4_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
+
+#define GEN4_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
+#define GEN4_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
+
+#define GEN4_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
+#define GEN4_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
+#define GEN4_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
+#define GEN4_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
+
+#define GEN4_DATAPORT_READ_TARGET_DATA_CACHE 0
+#define GEN4_DATAPORT_READ_TARGET_RENDER_CACHE 1
+#define GEN4_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
+
+#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
+#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
+#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
+#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
+#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
+
+#define GEN4_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
+#define GEN4_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
+#define GEN4_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
+#define GEN4_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
+#define GEN4_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
+#define GEN4_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
+#define GEN4_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
+
+#define GEN4_MATH_FUNCTION_INV 1
+#define GEN4_MATH_FUNCTION_LOG 2
+#define GEN4_MATH_FUNCTION_EXP 3
+#define GEN4_MATH_FUNCTION_SQRT 4
+#define GEN4_MATH_FUNCTION_RSQ 5
+#define GEN4_MATH_FUNCTION_SIN 6 /* was 7 */
+#define GEN4_MATH_FUNCTION_COS 7 /* was 8 */
+#define GEN4_MATH_FUNCTION_SINCOS 8 /* was 6 */
+#define GEN4_MATH_FUNCTION_TAN 9
+#define GEN4_MATH_FUNCTION_POW 10
+#define GEN4_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
+#define GEN4_MATH_FUNCTION_INT_DIV_QUOTIENT 12
+#define GEN4_MATH_FUNCTION_INT_DIV_REMAINDER 13
+
+#define GEN4_MATH_INTEGER_UNSIGNED 0
+#define GEN4_MATH_INTEGER_SIGNED 1
+
+#define GEN4_MATH_PRECISION_FULL 0
+#define GEN4_MATH_PRECISION_PARTIAL 1
+
+#define GEN4_MATH_SATURATE_NONE 0
+#define GEN4_MATH_SATURATE_SATURATE 1
+
+#define GEN4_MATH_DATA_VECTOR 0
+#define GEN4_MATH_DATA_SCALAR 1
+
+#define GEN4_URB_OPCODE_WRITE 0
+
+#define GEN4_URB_SWIZZLE_NONE 0
+#define GEN4_URB_SWIZZLE_INTERLEAVE 1
+#define GEN4_URB_SWIZZLE_TRANSPOSE 2
+
+#define GEN4_SCRATCH_SPACE_SIZE_1K 0
+#define GEN4_SCRATCH_SPACE_SIZE_2K 1
+#define GEN4_SCRATCH_SPACE_SIZE_4K 2
+#define GEN4_SCRATCH_SPACE_SIZE_8K 3
+#define GEN4_SCRATCH_SPACE_SIZE_16K 4
+#define GEN4_SCRATCH_SPACE_SIZE_32K 5
+#define GEN4_SCRATCH_SPACE_SIZE_64K 6
+#define GEN4_SCRATCH_SPACE_SIZE_128K 7
+#define GEN4_SCRATCH_SPACE_SIZE_256K 8
+#define GEN4_SCRATCH_SPACE_SIZE_512K 9
+#define GEN4_SCRATCH_SPACE_SIZE_1M 10
+#define GEN4_SCRATCH_SPACE_SIZE_2M 11
+
+
+
+
+#define CMD_URB_FENCE 0x6000
+#define CMD_CONST_BUFFER_STATE 0x6001
+#define CMD_CONST_BUFFER 0x6002
+
+#define CMD_STATE_BASE_ADDRESS 0x6101
+#define CMD_STATE_INSN_POINTER 0x6102
+#define CMD_PIPELINE_SELECT 0x6104
+
+#define CMD_PIPELINED_STATE_POINTERS 0x7800
+#define CMD_BINDING_TABLE_PTRS 0x7801
+#define CMD_VERTEX_BUFFER 0x7808
+#define CMD_VERTEX_ELEMENT 0x7809
+#define CMD_INDEX_BUFFER 0x780a
+#define CMD_VF_STATISTICS 0x780b
+
+#define CMD_DRAW_RECT 0x7900
+#define CMD_BLEND_CONSTANT_COLOR 0x7901
+#define CMD_CHROMA_KEY 0x7904
+#define CMD_DEPTH_BUFFER 0x7905
+#define CMD_POLY_STIPPLE_OFFSET 0x7906
+#define CMD_POLY_STIPPLE_PATTERN 0x7907
+#define CMD_LINE_STIPPLE_PATTERN 0x7908
+#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7908
+
+#define CMD_PIPE_CONTROL 0x7a00
+
+#define CMD_3D_PRIM 0x7b00
+
+#define CMD_MI_FLUSH 0x0200
+
+
+/* Various values from the R0 vertex header:
+ */
+#define R02_PRIM_END 0x1
+#define R02_PRIM_START 0x2
+
+/* media pipeline */
+
+#define GEN4_VFE_MODE_GENERIC 0x0
+#define GEN4_VFE_MODE_VLD_MPEG2 0x1
+#define GEN4_VFE_MODE_IS 0x2
+#define GEN4_VFE_MODE_AVC_MC 0x4
+#define GEN4_VFE_MODE_AVC_IT 0x7
+#define GEN4_VFE_MODE_VC1_IT 0xB
+
+#define GEN4_VFE_DEBUG_COUNTER_FREE 0
+#define GEN4_VFE_DEBUG_COUNTER_FROZEN 1
+#define GEN4_VFE_DEBUG_COUNTER_ONCE 2
+#define GEN4_VFE_DEBUG_COUNTER_ALWAYS 3
+
+/* VLD_STATE */
+#define GEN4_MPEG_TOP_FIELD 1
+#define GEN4_MPEG_BOTTOM_FIELD 2
+#define GEN4_MPEG_FRAME 3
+#define GEN4_MPEG_QSCALE_LINEAR 0
+#define GEN4_MPEG_QSCALE_NONLINEAR 1
+#define GEN4_MPEG_ZIGZAG_SCAN 0
+#define GEN4_MPEG_ALTER_VERTICAL_SCAN 1
+#define GEN4_MPEG_I_PICTURE 1
+#define GEN4_MPEG_P_PICTURE 2
+#define GEN4_MPEG_B_PICTURE 3
+
+/* Command packets:
+ */
+struct header
+{
+ unsigned int length:16;
+ unsigned int opcode:16;
+};
+
+
+union header_union
+{
+ struct header bits;
+ unsigned int dword;
+};
+
+struct gen4_3d_control
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int notify_enable:1;
+ unsigned int pad:3;
+ unsigned int wc_flush_enable:1;
+ unsigned int depth_stall_enable:1;
+ unsigned int operation:2;
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int pad:2;
+ unsigned int dest_addr_type:1;
+ unsigned int dest_addr:29;
+ } dest;
+
+ unsigned int dword2;
+ unsigned int dword3;
+};
+
+
+struct gen4_3d_primitive
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int pad:2;
+ unsigned int topology:5;
+ unsigned int indexed:1;
+ unsigned int opcode:16;
+ } header;
+
+ unsigned int verts_per_instance;
+ unsigned int start_vert_location;
+ unsigned int instance_count;
+ unsigned int start_instance_location;
+ unsigned int base_vert_location;
+};
+
+/* These seem to be passed around as function args, so it works out
+ * better to keep them as #defines:
+ */
+#define GEN4_FLUSH_READ_CACHE 0x1
+#define GEN4_FLUSH_STATE_CACHE 0x2
+#define GEN4_INHIBIT_FLUSH_RENDER_CACHE 0x4
+#define GEN4_FLUSH_SNAPSHOT_COUNTERS 0x8
+
+struct gen4_mi_flush
+{
+ unsigned int flags:4;
+ unsigned int pad:12;
+ unsigned int opcode:16;
+};
+
+struct gen4_vf_statistics
+{
+ unsigned int statistics_enable:1;
+ unsigned int pad:15;
+ unsigned int opcode:16;
+};
+
+
+
+struct gen4_binding_table_pointers
+{
+ struct header header;
+ unsigned int vs;
+ unsigned int gs;
+ unsigned int clp;
+ unsigned int sf;
+ unsigned int wm;
+};
+
+
+struct gen4_blend_constant_color
+{
+ struct header header;
+ float blend_constant_color[4];
+};
+
+
+struct gen4_depthbuffer
+{
+ union header_union header;
+
+ union {
+ struct {
+ unsigned int pitch:18;
+ unsigned int format:3;
+ unsigned int pad:4;
+ unsigned int depth_offset_disable:1;
+ unsigned int tile_walk:1;
+ unsigned int tiled_surface:1;
+ unsigned int pad2:1;
+ unsigned int surface_type:3;
+ } bits;
+ unsigned int dword;
+ } dword1;
+
+ unsigned int dword2_base_addr;
+
+ union {
+ struct {
+ unsigned int pad:1;
+ unsigned int mipmap_layout:1;
+ unsigned int lod:4;
+ unsigned int width:13;
+ unsigned int height:13;
+ } bits;
+ unsigned int dword;
+ } dword3;
+
+ union {
+ struct {
+ unsigned int pad:12;
+ unsigned int min_array_element:9;
+ unsigned int depth:11;
+ } bits;
+ unsigned int dword;
+ } dword4;
+};
+
+struct gen4_drawrect
+{
+ struct header header;
+ unsigned int xmin:16;
+ unsigned int ymin:16;
+ unsigned int xmax:16;
+ unsigned int ymax:16;
+ unsigned int xorg:16;
+ unsigned int yorg:16;
+};
+
+
+
+
+struct gen4_global_depth_offset_clamp
+{
+ struct header header;
+ float depth_offset_clamp;
+};
+
+struct gen4_indexbuffer
+{
+ union {
+ struct
+ {
+ unsigned int length:8;
+ unsigned int index_format:2;
+ unsigned int cut_index_enable:1;
+ unsigned int pad:5;
+ unsigned int opcode:16;
+ } bits;
+ unsigned int dword;
+
+ } header;
+
+ unsigned int buffer_start;
+ unsigned int buffer_end;
+};
+
+
+struct gen4_line_stipple
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int pattern:16;
+ unsigned int pad:16;
+ } bits0;
+
+ struct
+ {
+ unsigned int repeat_count:9;
+ unsigned int pad:7;
+ unsigned int inverse_repeat_count:16;
+ } bits1;
+};
+
+
+struct gen4_pipelined_state_pointers
+{
+ struct header header;
+
+ struct {
+ unsigned int pad:5;
+ unsigned int offset:27;
+ } vs;
+
+ struct
+ {
+ unsigned int enable:1;
+ unsigned int pad:4;
+ unsigned int offset:27;
+ } gs;
+
+ struct
+ {
+ unsigned int enable:1;
+ unsigned int pad:4;
+ unsigned int offset:27;
+ } clp;
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int offset:27;
+ } sf;
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int offset:27;
+ } wm;
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int offset:27; /* KW: check me! */
+ } cc;
+};
+
+
+struct gen4_polygon_stipple_offset
+{
+ struct header header;
+
+ struct {
+ unsigned int y_offset:5;
+ unsigned int pad:3;
+ unsigned int x_offset:5;
+ unsigned int pad0:19;
+ } bits0;
+};
+
+
+
+struct gen4_polygon_stipple
+{
+ struct header header;
+ unsigned int stipple[32];
+};
+
+
+
+struct gen4_pipeline_select
+{
+ struct
+ {
+ unsigned int pipeline_select:1;
+ unsigned int pad:15;
+ unsigned int opcode:16;
+ } header;
+};
+
+
+struct gen4_pipe_control
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int notify_enable:1;
+ unsigned int pad:2;
+ unsigned int instruction_state_cache_flush_enable:1;
+ unsigned int write_cache_flush_enable:1;
+ unsigned int depth_stall_enable:1;
+ unsigned int post_sync_operation:2;
+
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int pad:2;
+ unsigned int dest_addr_type:1;
+ unsigned int dest_addr:29;
+ } bits1;
+
+ unsigned int data0;
+ unsigned int data1;
+};
+
+
+struct gen4_urb_fence
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int vs_realloc:1;
+ unsigned int gs_realloc:1;
+ unsigned int clp_realloc:1;
+ unsigned int sf_realloc:1;
+ unsigned int vfe_realloc:1;
+ unsigned int cs_realloc:1;
+ unsigned int pad:2;
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int vs_fence:10;
+ unsigned int gs_fence:10;
+ unsigned int clp_fence:10;
+ unsigned int pad:2;
+ } bits0;
+
+ struct
+ {
+ unsigned int sf_fence:10;
+ unsigned int vf_fence:10;
+ unsigned int cs_fence:10;
+ unsigned int pad:2;
+ } bits1;
+};
+
+struct gen4_constant_buffer_state /* previously gen4_command_streamer */
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int nr_urb_entries:3;
+ unsigned int pad:1;
+ unsigned int urb_entry_size:5;
+ unsigned int pad0:23;
+ } bits0;
+};
+
+struct gen4_constant_buffer
+{
+ struct
+ {
+ unsigned int length:8;
+ unsigned int valid:1;
+ unsigned int pad:7;
+ unsigned int opcode:16;
+ } header;
+
+ struct
+ {
+ unsigned int buffer_length:6;
+ unsigned int buffer_address:26;
+ } bits0;
+};
+
+struct gen4_state_base_address
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:4;
+ unsigned int general_state_address:27;
+ } bits0;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:4;
+ unsigned int surface_state_address:27;
+ } bits1;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:4;
+ unsigned int indirect_object_state_address:27;
+ } bits2;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:11;
+ unsigned int general_state_upper_bound:20;
+ } bits3;
+
+ struct
+ {
+ unsigned int modify_enable:1;
+ unsigned int pad:11;
+ unsigned int indirect_object_state_upper_bound:20;
+ } bits4;
+};
+
+struct gen4_state_prefetch
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int prefetch_count:3;
+ unsigned int pad:3;
+ unsigned int prefetch_pointer:26;
+ } bits0;
+};
+
+struct gen4_system_instruction_pointer
+{
+ struct header header;
+
+ struct
+ {
+ unsigned int pad:4;
+ unsigned int system_instruction_pointer:28;
+ } bits0;
+};
+
+
+
+
+/* State structs for the various fixed function units:
+ */
+
+
+struct thread0
+{
+ unsigned int pad0:1;
+ unsigned int grf_reg_count:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer:26;
+};
+
+struct thread1
+{
+ unsigned int ext_halt_exception_enable:1;
+ unsigned int sw_exception_enable:1;
+ unsigned int mask_stack_exception_enable:1;
+ unsigned int timeout_exception_enable:1;
+ unsigned int illegal_op_exception_enable:1;
+ unsigned int pad0:3;
+ unsigned int depth_coef_urb_read_offset:6; /* WM only */
+ unsigned int pad1:2;
+ unsigned int floating_point_mode:1;
+ unsigned int thread_priority:1;
+ unsigned int binding_table_entry_count:8;
+ unsigned int pad3:5;
+ unsigned int single_program_flow:1;
+};
+
+struct thread2
+{
+ unsigned int per_thread_scratch_space:4;
+ unsigned int pad0:6;
+ unsigned int scratch_space_base_pointer:22;
+};
+
+
+struct thread3
+{
+ unsigned int dispatch_grf_start_reg:4;
+ unsigned int urb_entry_read_offset:6;
+ unsigned int pad0:1;
+ unsigned int urb_entry_read_length:6;
+ unsigned int pad1:1;
+ unsigned int const_urb_entry_read_offset:6;
+ unsigned int pad2:1;
+ unsigned int const_urb_entry_read_length:6;
+ unsigned int pad3:1;
+};
+
+
+
+struct gen4_clip_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:9;
+ unsigned int gs_output_stats:1; /* not always */
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:6; /* may be less */
+ unsigned int pad3:1;
+ } thread4;
+
+ struct
+ {
+ unsigned int pad0:13;
+ unsigned int clip_mode:3;
+ unsigned int userclip_enable_flags:8;
+ unsigned int userclip_must_clip:1;
+ unsigned int pad1:1;
+ unsigned int guard_band_enable:1;
+ unsigned int viewport_z_clip_enable:1;
+ unsigned int viewport_xy_clip_enable:1;
+ unsigned int vertex_position_space:1;
+ unsigned int api_mode:1;
+ unsigned int pad2:1;
+ } clip5;
+
+ struct
+ {
+ unsigned int pad0:5;
+ unsigned int clipper_viewport_state_ptr:27;
+ } clip6;
+
+
+ float viewport_xmin;
+ float viewport_xmax;
+ float viewport_ymin;
+ float viewport_ymax;
+};
+
+
+
+struct gen4_cc_unit_state
+{
+ struct
+ {
+ unsigned int pad0:3;
+ unsigned int bf_stencil_pass_depth_pass_op:3;
+ unsigned int bf_stencil_pass_depth_fail_op:3;
+ unsigned int bf_stencil_fail_op:3;
+ unsigned int bf_stencil_func:3;
+ unsigned int bf_stencil_enable:1;
+ unsigned int pad1:2;
+ unsigned int stencil_write_enable:1;
+ unsigned int stencil_pass_depth_pass_op:3;
+ unsigned int stencil_pass_depth_fail_op:3;
+ unsigned int stencil_fail_op:3;
+ unsigned int stencil_func:3;
+ unsigned int stencil_enable:1;
+ } cc0;
+
+
+ struct
+ {
+ unsigned int bf_stencil_ref:8;
+ unsigned int stencil_write_mask:8;
+ unsigned int stencil_test_mask:8;
+ unsigned int stencil_ref:8;
+ } cc1;
+
+
+ struct
+ {
+ unsigned int logicop_enable:1;
+ unsigned int pad0:10;
+ unsigned int depth_write_enable:1;
+ unsigned int depth_test_function:3;
+ unsigned int depth_test:1;
+ unsigned int bf_stencil_write_mask:8;
+ unsigned int bf_stencil_test_mask:8;
+ } cc2;
+
+
+ struct
+ {
+ unsigned int pad0:8;
+ unsigned int alpha_test_func:3;
+ unsigned int alpha_test:1;
+ unsigned int blend_enable:1;
+ unsigned int ia_blend_enable:1;
+ unsigned int pad1:1;
+ unsigned int alpha_test_format:1;
+ unsigned int pad2:16;
+ } cc3;
+
+ struct
+ {
+ unsigned int pad0:5;
+ unsigned int cc_viewport_state_offset:27;
+ } cc4;
+
+ struct
+ {
+ unsigned int pad0:2;
+ unsigned int ia_dest_blend_factor:5;
+ unsigned int ia_src_blend_factor:5;
+ unsigned int ia_blend_function:3;
+ unsigned int statistics_enable:1;
+ unsigned int logicop_func:4;
+ unsigned int pad1:11;
+ unsigned int dither_enable:1;
+ } cc5;
+
+ struct
+ {
+ unsigned int clamp_post_alpha_blend:1;
+ unsigned int clamp_pre_alpha_blend:1;
+ unsigned int clamp_range:2;
+ unsigned int pad0:11;
+ unsigned int y_dither_offset:2;
+ unsigned int x_dither_offset:2;
+ unsigned int dest_blend_factor:5;
+ unsigned int src_blend_factor:5;
+ unsigned int blend_function:3;
+ } cc6;
+
+ struct {
+ union {
+ float f;
+ unsigned char ub[4];
+ } alpha_ref;
+ } cc7;
+};
+
+
+
+struct gen4_sf_unit_state
+{
+ struct thread0 thread0;
+ struct {
+ unsigned int pad0:7;
+ unsigned int sw_exception_enable:1;
+ unsigned int pad1:3;
+ unsigned int mask_stack_exception_enable:1;
+ unsigned int pad2:1;
+ unsigned int illegal_op_exception_enable:1;
+ unsigned int pad3:2;
+ unsigned int floating_point_mode:1;
+ unsigned int thread_priority:1;
+ unsigned int binding_table_entry_count:8;
+ unsigned int pad4:5;
+ unsigned int single_program_flow:1;
+ } sf1;
+
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:10;
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:6;
+ unsigned int pad3:1;
+ } thread4;
+
+ struct
+ {
+ unsigned int front_winding:1;
+ unsigned int viewport_transform:1;
+ unsigned int pad0:3;
+ unsigned int sf_viewport_state_offset:27;
+ } sf5;
+
+ struct
+ {
+ unsigned int pad0:9;
+ unsigned int dest_org_vbias:4;
+ unsigned int dest_org_hbias:4;
+ unsigned int scissor:1;
+ unsigned int disable_2x2_trifilter:1;
+ unsigned int disable_zero_pix_trifilter:1;
+ unsigned int point_rast_rule:2;
+ unsigned int line_endcap_aa_region_width:2;
+ unsigned int line_width:4;
+ unsigned int fast_scissor_disable:1;
+ unsigned int cull_mode:2;
+ unsigned int aa_enable:1;
+ } sf6;
+
+ struct
+ {
+ unsigned int point_size:11;
+ unsigned int use_point_size_state:1;
+ unsigned int subpixel_precision:1;
+ unsigned int sprite_point:1;
+ unsigned int pad0:11;
+ unsigned int trifan_pv:2;
+ unsigned int linestrip_pv:2;
+ unsigned int tristrip_pv:2;
+ unsigned int line_last_pixel_enable:1;
+ } sf7;
+
+};
+
+
+struct gen4_gs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:10;
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:1;
+ unsigned int pad3:6;
+ } thread4;
+
+ struct
+ {
+ unsigned int sampler_count:3;
+ unsigned int pad0:2;
+ unsigned int sampler_state_pointer:27;
+ } gs5;
+
+
+ struct
+ {
+ unsigned int max_vp_index:4;
+ unsigned int pad0:26;
+ unsigned int reorder_enable:1;
+ unsigned int pad1:1;
+ } gs6;
+};
+
+
+struct gen4_vs_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct
+ {
+ unsigned int pad0:10;
+ unsigned int stats_enable:1;
+ unsigned int nr_urb_entries:7;
+ unsigned int pad1:1;
+ unsigned int urb_entry_allocation_size:5;
+ unsigned int pad2:1;
+ unsigned int max_threads:4;
+ unsigned int pad3:3;
+ } thread4;
+
+ struct
+ {
+ unsigned int sampler_count:3;
+ unsigned int pad0:2;
+ unsigned int sampler_state_pointer:27;
+ } vs5;
+
+ struct
+ {
+ unsigned int vs_enable:1;
+ unsigned int vert_cache_disable:1;
+ unsigned int pad0:30;
+ } vs6;
+};
+
+
+struct gen4_wm_unit_state
+{
+ struct thread0 thread0;
+ struct thread1 thread1;
+ struct thread2 thread2;
+ struct thread3 thread3;
+
+ struct {
+ unsigned int stats_enable:1;
+ unsigned int pad0:1;
+ unsigned int sampler_count:3;
+ unsigned int sampler_state_pointer:27;
+ } wm4;
+
+ struct
+ {
+ unsigned int enable_8_pix:1;
+ unsigned int enable_16_pix:1;
+ unsigned int enable_32_pix:1;
+ unsigned int pad0:7;
+ unsigned int legacy_global_depth_bias:1;
+ unsigned int line_stipple:1;
+ unsigned int depth_offset:1;
+ unsigned int polygon_stipple:1;
+ unsigned int line_aa_region_width:2;
+ unsigned int line_endcap_aa_region_width:2;
+ unsigned int early_depth_test:1;
+ unsigned int thread_dispatch_enable:1;
+ unsigned int program_uses_depth:1;
+ unsigned int program_computes_depth:1;
+ unsigned int program_uses_killpixel:1;
+ unsigned int legacy_line_rast: 1;
+ unsigned int transposed_urb_read:1;
+ unsigned int max_threads:7;
+ } wm5;
+
+ float global_depth_offset_constant;
+ float global_depth_offset_scale;
+
+ struct {
+ unsigned int pad0:1;
+ unsigned int grf_reg_count_1:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer_1:26;
+ } wm8;
+
+ struct {
+ unsigned int pad0:1;
+ unsigned int grf_reg_count_2:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer_2:26;
+ } wm9;
+
+ struct {
+ unsigned int pad0:1;
+ unsigned int grf_reg_count_3:3;
+ unsigned int pad1:2;
+ unsigned int kernel_start_pointer_3:26;
+ } wm10;
+};
+
+struct gen4_wm_unit_state_padded {
+ struct gen4_wm_unit_state state;
+ char pad[64 - sizeof(struct gen4_wm_unit_state)];
+};
+
+/* The hardware supports two different modes for border color. The
+ * default (OpenGL) mode uses floating-point color channels, while the
+ * legacy mode uses 4 bytes.
+ *
+ * More significantly, the legacy mode respects the components of the
+ * border color for channels not present in the source, (whereas the
+ * default mode will ignore the border color's alpha channel and use
+ * alpha==1 for an RGB source, for example).
+ *
+ * The legacy mode matches the semantics specified by the Render
+ * extension.
+ */
+struct gen4_sampler_default_border_color {
+ float color[4];
+};
+
+struct gen4_sampler_legacy_border_color {
+ uint8_t color[4];
+};
+
+struct gen4_sampler_state
+{
+
+ struct
+ {
+ unsigned int shadow_function:3;
+ unsigned int lod_bias:11;
+ unsigned int min_filter:3;
+ unsigned int mag_filter:3;
+ unsigned int mip_filter:2;
+ unsigned int base_level:5;
+ unsigned int pad:1;
+ unsigned int lod_preclamp:1;
+ unsigned int border_color_mode:1;
+ unsigned int pad0:1;
+ unsigned int disable:1;
+ } ss0;
+
+ struct
+ {
+ unsigned int r_wrap_mode:3;
+ unsigned int t_wrap_mode:3;
+ unsigned int s_wrap_mode:3;
+ unsigned int pad:3;
+ unsigned int max_lod:10;
+ unsigned int min_lod:10;
+ } ss1;
+
+
+ struct
+ {
+ unsigned int pad:5;
+ unsigned int border_color_pointer:27;
+ } ss2;
+
+ struct
+ {
+ unsigned int pad:19;
+ unsigned int max_aniso:3;
+ unsigned int chroma_key_mode:1;
+ unsigned int chroma_key_index:2;
+ unsigned int chroma_key_enable:1;
+ unsigned int monochrome_filter_width:3;
+ unsigned int monochrome_filter_height:3;
+ } ss3;
+};
+
+
+struct gen4_clipper_viewport
+{
+ float xmin;
+ float xmax;
+ float ymin;
+ float ymax;
+};
+
+struct gen4_cc_viewport
+{
+ float min_depth;
+ float max_depth;
+};
+
+struct gen4_sf_viewport
+{
+ struct {
+ float m00;
+ float m11;
+ float m22;
+ float m30;
+ float m31;
+ float m32;
+ } viewport;
+
+ struct {
+ short xmin;
+ short ymin;
+ short xmax;
+ short ymax;
+ } scissor;
+};
+
+/* Documented in the subsystem/shared-functions/sampler chapter...
+ */
+struct gen4_surface_state
+{
+ struct {
+ unsigned int cube_pos_z:1;
+ unsigned int cube_neg_z:1;
+ unsigned int cube_pos_y:1;
+ unsigned int cube_neg_y:1;
+ unsigned int cube_pos_x:1;
+ unsigned int cube_neg_x:1;
+ unsigned int pad:3;
+ unsigned int render_cache_read_mode:1;
+ unsigned int mipmap_layout_mode:1;
+ unsigned int vert_line_stride_ofs:1;
+ unsigned int vert_line_stride:1;
+ unsigned int color_blend:1;
+ unsigned int writedisable_blue:1;
+ unsigned int writedisable_green:1;
+ unsigned int writedisable_red:1;
+ unsigned int writedisable_alpha:1;
+ unsigned int surface_format:9;
+ unsigned int data_return_format:1;
+ unsigned int pad0:1;
+ unsigned int surface_type:3;
+ } ss0;
+
+ struct {
+ unsigned int base_addr;
+ } ss1;
+
+ struct {
+ unsigned int render_target_rotation:2;
+ unsigned int mip_count:4;
+ unsigned int width:13;
+ unsigned int height:13;
+ } ss2;
+
+ struct {
+ unsigned int tile_walk:1;
+ unsigned int tiled_surface:1;
+ unsigned int pad:1;
+ unsigned int pitch:18;
+ unsigned int depth:11;
+ } ss3;
+
+ struct {
+ unsigned int pad:19;
+ unsigned int min_array_elt:9;
+ unsigned int min_lod:4;
+ } ss4;
+
+ struct {
+ unsigned int pad:20;
+ unsigned int y_offset:4;
+ unsigned int pad2:1;
+ unsigned int x_offset:7;
+ } ss5;
+};
+
+
+
+struct gen4_vertex_buffer_state
+{
+ struct {
+ unsigned int pitch:11;
+ unsigned int pad:15;
+ unsigned int access_type:1;
+ unsigned int vb_index:5;
+ } vb0;
+
+ unsigned int start_addr;
+ unsigned int max_index;
+#if 1
+ unsigned int instance_data_step_rate; /* not included for sequential/random vertices? */
+#endif
+};
+
+#define GEN4_VBP_MAX 17
+
+struct gen4_vb_array_state {
+ struct header header;
+ struct gen4_vertex_buffer_state vb[GEN4_VBP_MAX];
+};
+
+
+struct gen4_vertex_element_state
+{
+ struct
+ {
+ unsigned int src_offset:11;
+ unsigned int pad:5;
+ unsigned int src_format:9;
+ unsigned int pad0:1;
+ unsigned int valid:1;
+ unsigned int vertex_buffer_index:5;
+ } ve0;
+
+ struct
+ {
+ unsigned int dst_offset:8;
+ unsigned int pad:8;
+ unsigned int vfcomponent3:4;
+ unsigned int vfcomponent2:4;
+ unsigned int vfcomponent1:4;
+ unsigned int vfcomponent0:4;
+ } ve1;
+};
+
+#define GEN4_VEP_MAX 18
+
+struct gen4_vertex_element_packet {
+ struct header header;
+ struct gen4_vertex_element_state ve[GEN4_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
+};
+
+
+struct gen4_urb_immediate {
+ unsigned int opcode:4;
+ unsigned int offset:6;
+ unsigned int swizzle_control:2;
+ unsigned int pad:1;
+ unsigned int allocate:1;
+ unsigned int used:1;
+ unsigned int complete:1;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+};
+
+/* Instruction format for the execution units:
+ */
+
+struct gen4_instruction
+{
+ struct
+ {
+ unsigned int opcode:7;
+ unsigned int pad:1;
+ unsigned int access_mode:1;
+ unsigned int mask_control:1;
+ unsigned int dependency_control:2;
+ unsigned int compression_control:2;
+ unsigned int thread_control:2;
+ unsigned int predicate_control:4;
+ unsigned int predicate_inverse:1;
+ unsigned int execution_size:3;
+ unsigned int destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */
+ unsigned int pad0:2;
+ unsigned int debug_control:1;
+ unsigned int saturate:1;
+ } header;
+
+ union {
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int src1_reg_file:2;
+ unsigned int src1_reg_type:3;
+ unsigned int pad:1;
+ unsigned int dest_subreg_nr:5;
+ unsigned int dest_reg_nr:8;
+ unsigned int dest_horiz_stride:2;
+ unsigned int dest_address_mode:1;
+ } da1;
+
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int pad:6;
+ int dest_indirect_offset:10; /* offset against the deref'd address reg */
+ unsigned int dest_subreg_nr:3; /* subnr for the address reg a0.x */
+ unsigned int dest_horiz_stride:2;
+ unsigned int dest_address_mode:1;
+ } ia1;
+
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int src1_reg_file:2;
+ unsigned int src1_reg_type:3;
+ unsigned int pad0:1;
+ unsigned int dest_writemask:4;
+ unsigned int dest_subreg_nr:1;
+ unsigned int dest_reg_nr:8;
+ unsigned int pad1:2;
+ unsigned int dest_address_mode:1;
+ } da16;
+
+ struct
+ {
+ unsigned int dest_reg_file:2;
+ unsigned int dest_reg_type:3;
+ unsigned int src0_reg_file:2;
+ unsigned int src0_reg_type:3;
+ unsigned int pad0:6;
+ unsigned int dest_writemask:4;
+ int dest_indirect_offset:6;
+ unsigned int dest_subreg_nr:3;
+ unsigned int pad1:2;
+ unsigned int dest_address_mode:1;
+ } ia16;
+ } bits1;
+
+
+ union {
+ struct
+ {
+ unsigned int src0_subreg_nr:5;
+ unsigned int src0_reg_nr:8;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_horiz_stride:2;
+ unsigned int src0_width:3;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad:6;
+ } da1;
+
+ struct
+ {
+ int src0_indirect_offset:10;
+ unsigned int src0_subreg_nr:3;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_horiz_stride:2;
+ unsigned int src0_width:3;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad:6;
+ } ia1;
+
+ struct
+ {
+ unsigned int src0_swz_x:2;
+ unsigned int src0_swz_y:2;
+ unsigned int src0_subreg_nr:1;
+ unsigned int src0_reg_nr:8;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_swz_z:2;
+ unsigned int src0_swz_w:2;
+ unsigned int pad0:1;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad1:6;
+ } da16;
+
+ struct
+ {
+ unsigned int src0_swz_x:2;
+ unsigned int src0_swz_y:2;
+ int src0_indirect_offset:6;
+ unsigned int src0_subreg_nr:3;
+ unsigned int src0_abs:1;
+ unsigned int src0_negate:1;
+ unsigned int src0_address_mode:1;
+ unsigned int src0_swz_z:2;
+ unsigned int src0_swz_w:2;
+ unsigned int pad0:1;
+ unsigned int src0_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad1:6;
+ } ia16;
+
+ } bits2;
+
+ union
+ {
+ struct
+ {
+ unsigned int src1_subreg_nr:5;
+ unsigned int src1_reg_nr:8;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad:1;
+ unsigned int src1_horiz_stride:2;
+ unsigned int src1_width:3;
+ unsigned int src1_vert_stride:4;
+ unsigned int pad0:7;
+ } da1;
+
+ struct
+ {
+ unsigned int src1_swz_x:2;
+ unsigned int src1_swz_y:2;
+ unsigned int src1_subreg_nr:1;
+ unsigned int src1_reg_nr:8;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad0:1;
+ unsigned int src1_swz_z:2;
+ unsigned int src1_swz_w:2;
+ unsigned int pad1:1;
+ unsigned int src1_vert_stride:4;
+ unsigned int pad2:7;
+ } da16;
+
+ struct
+ {
+ int src1_indirect_offset:10;
+ unsigned int src1_subreg_nr:3;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad0:1;
+ unsigned int src1_horiz_stride:2;
+ unsigned int src1_width:3;
+ unsigned int src1_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad1:6;
+ } ia1;
+
+ struct
+ {
+ unsigned int src1_swz_x:2;
+ unsigned int src1_swz_y:2;
+ int src1_indirect_offset:6;
+ unsigned int src1_subreg_nr:3;
+ unsigned int src1_abs:1;
+ unsigned int src1_negate:1;
+ unsigned int pad0:1;
+ unsigned int src1_swz_z:2;
+ unsigned int src1_swz_w:2;
+ unsigned int pad1:1;
+ unsigned int src1_vert_stride:4;
+ unsigned int flag_reg_nr:1;
+ unsigned int pad2:6;
+ } ia16;
+
+
+ struct
+ {
+ int jump_count:16; /* note: signed */
+ unsigned int pop_count:4;
+ unsigned int pad0:12;
+ } if_else;
+
+ struct {
+ unsigned int function:4;
+ unsigned int int_type:1;
+ unsigned int precision:1;
+ unsigned int saturate:1;
+ unsigned int data_type:1;
+ unsigned int pad0:8;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } math;
+
+ struct {
+ unsigned int binding_table_index:8;
+ unsigned int sampler:4;
+ unsigned int return_format:2;
+ unsigned int msg_type:2;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } sampler;
+
+ struct gen4_urb_immediate urb;
+
+ struct {
+ unsigned int binding_table_index:8;
+ unsigned int msg_control:4;
+ unsigned int msg_type:2;
+ unsigned int target_cache:2;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } dp_read;
+
+ struct {
+ unsigned int binding_table_index:8;
+ unsigned int msg_control:3;
+ unsigned int pixel_scoreboard_clear:1;
+ unsigned int msg_type:3;
+ unsigned int send_commit_msg:1;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } dp_write;
+
+ struct {
+ unsigned int pad:16;
+ unsigned int response_length:4;
+ unsigned int msg_length:4;
+ unsigned int msg_target:4;
+ unsigned int pad1:3;
+ unsigned int end_of_thread:1;
+ } generic;
+
+ unsigned int ud;
+ } bits3;
+};
+
+/* media pipeline */
+
+struct gen4_vfe_state {
+ struct {
+ unsigned int per_thread_scratch_space:4;
+ unsigned int pad3:3;
+ unsigned int extend_vfe_state_present:1;
+ unsigned int pad2:2;
+ unsigned int scratch_base:22;
+ } vfe0;
+
+ struct {
+ unsigned int debug_counter_control:2;
+ unsigned int children_present:1;
+ unsigned int vfe_mode:4;
+ unsigned int pad2:2;
+ unsigned int num_urb_entries:7;
+ unsigned int urb_entry_alloc_size:9;
+ unsigned int max_threads:7;
+ } vfe1;
+
+ struct {
+ unsigned int pad4:4;
+ unsigned int interface_descriptor_base:28;
+ } vfe2;
+};
+
+struct gen4_vld_state {
+ struct {
+ unsigned int pad6:6;
+ unsigned int scan_order:1;
+ unsigned int intra_vlc_format:1;
+ unsigned int quantizer_scale_type:1;
+ unsigned int concealment_motion_vector:1;
+ unsigned int frame_predict_frame_dct:1;
+ unsigned int top_field_first:1;
+ unsigned int picture_structure:2;
+ unsigned int intra_dc_precision:2;
+ unsigned int f_code_0_0:4;
+ unsigned int f_code_0_1:4;
+ unsigned int f_code_1_0:4;
+ unsigned int f_code_1_1:4;
+ } vld0;
+
+ struct {
+ unsigned int pad2:9;
+ unsigned int picture_coding_type:2;
+ unsigned int pad:21;
+ } vld1;
+
+ struct {
+ unsigned int index_0:4;
+ unsigned int index_1:4;
+ unsigned int index_2:4;
+ unsigned int index_3:4;
+ unsigned int index_4:4;
+ unsigned int index_5:4;
+ unsigned int index_6:4;
+ unsigned int index_7:4;
+ } desc_remap_table0;
+
+ struct {
+ unsigned int index_8:4;
+ unsigned int index_9:4;
+ unsigned int index_10:4;
+ unsigned int index_11:4;
+ unsigned int index_12:4;
+ unsigned int index_13:4;
+ unsigned int index_14:4;
+ unsigned int index_15:4;
+ } desc_remap_table1;
+};
+
+struct gen4_interface_descriptor {
+ struct {
+ unsigned int grf_reg_blocks:4;
+ unsigned int pad:2;
+ unsigned int kernel_start_pointer:26;
+ } desc0;
+
+ struct {
+ unsigned int pad:7;
+ unsigned int software_exception:1;
+ unsigned int pad2:3;
+ unsigned int maskstack_exception:1;
+ unsigned int pad3:1;
+ unsigned int illegal_opcode_exception:1;
+ unsigned int pad4:2;
+ unsigned int floating_point_mode:1;
+ unsigned int thread_priority:1;
+ unsigned int single_program_flow:1;
+ unsigned int pad5:1;
+ unsigned int const_urb_entry_read_offset:6;
+ unsigned int const_urb_entry_read_len:6;
+ } desc1;
+
+ struct {
+ unsigned int pad:2;
+ unsigned int sampler_count:3;
+ unsigned int sampler_state_pointer:27;
+ } desc2;
+
+ struct {
+ unsigned int binding_table_entry_count:5;
+ unsigned int binding_table_pointer:27;
+ } desc3;
+};
+
+struct gen6_blend_state
+{
+ struct {
+ unsigned int dest_blend_factor:5;
+ unsigned int source_blend_factor:5;
+ unsigned int pad3:1;
+ unsigned int blend_func:3;
+ unsigned int pad2:1;
+ unsigned int ia_dest_blend_factor:5;
+ unsigned int ia_source_blend_factor:5;
+ unsigned int pad1:1;
+ unsigned int ia_blend_func:3;
+ unsigned int pad0:1;
+ unsigned int ia_blend_enable:1;
+ unsigned int blend_enable:1;
+ } blend0;
+
+ struct {
+ unsigned int post_blend_clamp_enable:1;
+ unsigned int pre_blend_clamp_enable:1;
+ unsigned int clamp_range:2;
+ unsigned int pad0:4;
+ unsigned int x_dither_offset:2;
+ unsigned int y_dither_offset:2;
+ unsigned int dither_enable:1;
+ unsigned int alpha_test_func:3;
+ unsigned int alpha_test_enable:1;
+ unsigned int pad1:1;
+ unsigned int logic_op_func:4;
+ unsigned int logic_op_enable:1;
+ unsigned int pad2:1;
+ unsigned int write_disable_b:1;
+ unsigned int write_disable_g:1;
+ unsigned int write_disable_r:1;
+ unsigned int write_disable_a:1;
+ unsigned int pad3:1;
+ unsigned int alpha_to_coverage_dither:1;
+ unsigned int alpha_to_one:1;
+ unsigned int alpha_to_coverage:1;
+ } blend1;
+};
+
+struct gen6_color_calc_state
+{
+ struct {
+ unsigned int alpha_test_format:1;
+ unsigned int pad0:14;
+ unsigned int round_disable:1;
+ unsigned int bf_stencil_ref:8;
+ unsigned int stencil_ref:8;
+ } cc0;
+
+ union {
+ float alpha_ref_f;
+ struct {
+ unsigned int ui:8;
+ unsigned int pad0:24;
+ } alpha_ref_fi;
+ } cc1;
+
+ float constant_r;
+ float constant_g;
+ float constant_b;
+ float constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+ struct {
+ unsigned int pad0:3;
+ unsigned int bf_stencil_pass_depth_pass_op:3;
+ unsigned int bf_stencil_pass_depth_fail_op:3;
+ unsigned int bf_stencil_fail_op:3;
+ unsigned int bf_stencil_func:3;
+ unsigned int bf_stencil_enable:1;
+ unsigned int pad1:2;
+ unsigned int stencil_write_enable:1;
+ unsigned int stencil_pass_depth_pass_op:3;
+ unsigned int stencil_pass_depth_fail_op:3;
+ unsigned int stencil_fail_op:3;
+ unsigned int stencil_func:3;
+ unsigned int stencil_enable:1;
+ } ds0;
+
+ struct {
+ unsigned int bf_stencil_write_mask:8;
+ unsigned int bf_stencil_test_mask:8;
+ unsigned int stencil_write_mask:8;
+ unsigned int stencil_test_mask:8;
+ } ds1;
+
+ struct {
+ unsigned int pad0:26;
+ unsigned int depth_write_enable:1;
+ unsigned int depth_test_func:3;
+ unsigned int pad1:1;
+ unsigned int depth_test_enable:1;
+ } ds2;
+};
+
+typedef enum {
+ SAMPLER_FILTER_NEAREST = 0,
+ SAMPLER_FILTER_BILINEAR,
+ FILTER_COUNT
+} sampler_filter_t;
+
+typedef enum {
+ SAMPLER_EXTEND_NONE = 0,
+ SAMPLER_EXTEND_REPEAT,
+ SAMPLER_EXTEND_PAD,
+ SAMPLER_EXTEND_REFLECT,
+ EXTEND_COUNT
+} sampler_extend_t;
+
+typedef enum {
+ WM_KERNEL = 0,
+ WM_KERNEL_PROJECTIVE,
+
+ WM_KERNEL_MASK,
+ WM_KERNEL_MASK_PROJECTIVE,
+
+ WM_KERNEL_MASKCA,
+ WM_KERNEL_MASKCA_PROJECTIVE,
+
+ WM_KERNEL_MASKCA_SRCALPHA,
+ WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
+
+ WM_KERNEL_VIDEO_PLANAR,
+ WM_KERNEL_VIDEO_PACKED,
+ KERNEL_COUNT
+} wm_kernel_t;
+
+#endif