diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2013-07-26 09:35:06 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2013-07-28 14:49:07 +0100 |
commit | 45d4e8dcf9aee37015b1ee026997ed4dabdf112e (patch) | |
tree | c140efa6a28d80266849d08d5ea368419e95c977 /src/uxa | |
parent | ab28526ea43728fb675448515e1519a970fb5f56 (diff) |
uxa: Clear up the common intel directory
Move all the UXA backend specifc files into their own subdirectory.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/uxa')
30 files changed, 23989 insertions, 0 deletions
diff --git a/src/uxa/Makefile.am b/src/uxa/Makefile.am new file mode 100644 index 00000000..5f89cfc2 --- /dev/null +++ b/src/uxa/Makefile.am @@ -0,0 +1,79 @@ +# Copyright 2005 Adam Jackson. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# on the rights to use, copy, modify, merge, publish, distribute, sub +# license, and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +# ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +AM_CFLAGS = @CWARNFLAGS@ @XORG_CFLAGS@ @DRM_CFLAGS@ @PCIACCESS_CFLAGS@ +AM_CFLAGS += @UDEV_CFLAGS@ @DRM_CFLAGS@ @DRMINTEL_CFLAGS@ +AM_CFLAGS += -I$(top_srcdir)/uxa -I$(top_srcdir)/src -I$(top_srcdir)/src/render_program + +noinst_LTLIBRARIES = libuxa.la +libuxa_la_LIBADD = @UDEV_LIBS@ @DRMINTEL_LIBS@ @DRM_LIBS@ $(top_builddir)/uxa/libuxa.la +libuxa_la_SOURCES = \ + brw_defines.h \ + brw_structs.h \ + common.h \ + intel.h \ + intel_batchbuffer.c \ + intel_batchbuffer.h \ + intel_display.c \ + intel_driver.c \ + intel_glamor.h \ + intel_memory.c \ + intel_uxa.c \ + intel_video.c \ + intel_video.h \ + i830_3d.c \ + i830_render.c \ + i830_reg.h \ + i915_3d.h \ + i915_reg.h \ + i915_3d.c \ + i915_render.c \ + i915_video.c \ + i965_reg.h \ + i965_3d.c \ + i965_video.c \ + i965_render.c \ + uxa_module.h \ + $(NULL) + +if GLAMOR +AM_CFLAGS += @LIBGLAMOR_CFLAGS@ +libuxa_la_LIBADD += @LIBGLAMOR_LIBS@ +libuxa_la_SOURCES += \ + intel_glamor.c \ + $(NULL) +endif + +if DRI2 +libuxa_la_SOURCES += \ + intel_dri.c \ + $(NULL) +libuxa_la_LIBADD += \ + $(DRI_LIBS) \ + @CLOCK_GETTIME_LIBS@ \ + $(NULL) +endif + +if XVMC +AM_CFLAGS += -I$(top_srcdir)/xvmc +libuxa_la_SOURCES += \ + intel_hwmc.c \ + $(NULL) +endif diff --git a/src/uxa/brw_defines.h b/src/uxa/brw_defines.h new file mode 100644 index 00000000..e580a8f4 --- /dev/null +++ b/src/uxa/brw_defines.h @@ -0,0 +1,881 @@ + /************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_DEFINES_H +#define BRW_DEFINES_H + +/* + */ +#if 0 +#define MI_NOOP 0x00 +#define MI_USER_INTERRUPT 0x02 +#define MI_WAIT_FOR_EVENT 0x03 +#define MI_FLUSH 0x04 +#define MI_REPORT_HEAD 0x07 +#define MI_ARB_ON_OFF 0x08 +#define MI_BATCH_BUFFER_END 0x0A +#define MI_OVERLAY_FLIP 0x11 +#define MI_LOAD_SCAN_LINES_INCL 0x12 +#define MI_LOAD_SCAN_LINES_EXCL 0x13 +#define MI_DISPLAY_BUFFER_INFO 0x14 +#define MI_SET_CONTEXT 0x18 +#define MI_STORE_DATA_IMM 0x20 +#define MI_STORE_DATA_INDEX 0x21 +#define MI_LOAD_REGISTER_IMM 0x22 +#define MI_STORE_REGISTER_MEM 0x24 +#define MI_BATCH_BUFFER_START 0x31 + +#define MI_SYNCHRONOUS_FLIP 0x0 +#define MI_ASYNCHRONOUS_FLIP 0x1 + +#define MI_BUFFER_SECURE 0x0 +#define MI_BUFFER_NONSECURE 0x1 + +#define MI_ARBITRATE_AT_CHAIN_POINTS 0x0 +#define MI_ARBITRATE_BETWEEN_INSTS 0x1 +#define MI_NO_ARBITRATION 0x3 + +#define MI_CONDITION_CODE_WAIT_DISABLED 0x0 +#define MI_CONDITION_CODE_WAIT_0 0x1 +#define MI_CONDITION_CODE_WAIT_1 0x2 +#define MI_CONDITION_CODE_WAIT_2 0x3 +#define MI_CONDITION_CODE_WAIT_3 0x4 +#define MI_CONDITION_CODE_WAIT_4 0x5 + +#define MI_DISPLAY_PIPE_A 0x0 +#define MI_DISPLAY_PIPE_B 0x1 + +#define MI_DISPLAY_PLANE_A 0x0 +#define MI_DISPLAY_PLANE_B 0x1 +#define MI_DISPLAY_PLANE_C 0x2 + +#define MI_STANDARD_FLIP 0x0 +#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD 0x1 +#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE 0x2 +#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER 0x3 + +#define MI_PHYSICAL_ADDRESS 0x0 +#define MI_VIRTUAL_ADDRESS 0x1 + +#define MI_BUFFER_MEMORY_MAIN 0x0 +#define MI_BUFFER_MEMORY_GTT 0x2 +#define MI_BUFFER_MEMORY_PER_PROCESS_GTT 0x3 + +#define MI_FLIP_CONTINUE 0x0 +#define MI_FLIP_ON 0x1 +#define MI_FLIP_OFF 0x2 + +#define MI_UNTRUSTED_REGISTER_SPACE 0x0 +#define MI_TRUSTED_REGISTER_SPACE 0x1 +#endif + +/* 3D state: + */ +#define _3DOP_3DSTATE_PIPELINED 0x0 +#define _3DOP_3DSTATE_NONPIPELINED 0x1 +#define _3DOP_3DCONTROL 0x2 +#define _3DOP_3DPRIMITIVE 0x3 + +#define _3DSTATE_PIPELINED_POINTERS 0x00 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 +#define _3DSTATE_VERTEX_BUFFERS 0x08 +#define _3DSTATE_VERTEX_ELEMENTS 0x09 +#define _3DSTATE_INDEX_BUFFER 0x0A +#define _3DSTATE_VF_STATISTICS 0x0B +#define _3DSTATE_DRAWING_RECTANGLE 0x00 +#define _3DSTATE_CONSTANT_COLOR 0x01 +#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 +#define _3DSTATE_CHROMA_KEY 0x04 +#define _3DSTATE_DEPTH_BUFFER 0x05 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 +#define _3DSTATE_LINE_STIPPLE 0x08 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 +#define _3DCONTROL 0x00 +#define _3DPRIMITIVE 0x00 + +#define PIPE_CONTROL_NOWRITE 0x00 +#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 +#define PIPE_CONTROL_WRITEDEPTH 0x02 +#define PIPE_CONTROL_WRITETIMESTAMP 0x03 + +#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 +#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 +#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 + +#define BRW_ANISORATIO_2 0 +#define BRW_ANISORATIO_4 1 +#define BRW_ANISORATIO_6 2 +#define BRW_ANISORATIO_8 3 +#define BRW_ANISORATIO_10 4 +#define BRW_ANISORATIO_12 5 +#define BRW_ANISORATIO_14 6 +#define BRW_ANISORATIO_16 7 + +#define BRW_BLENDFACTOR_ONE 0x1 +#define BRW_BLENDFACTOR_SRC_COLOR 0x2 +#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 +#define BRW_BLENDFACTOR_DST_ALPHA 0x4 +#define BRW_BLENDFACTOR_DST_COLOR 0x5 +#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define BRW_BLENDFACTOR_CONST_COLOR 0x7 +#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 +#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 +#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A +#define BRW_BLENDFACTOR_ZERO 0x11 +#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 +#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define BRW_BLENDFUNCTION_ADD 0 +#define BRW_BLENDFUNCTION_SUBTRACT 1 +#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BRW_BLENDFUNCTION_MIN 3 +#define BRW_BLENDFUNCTION_MAX 4 + +#define BRW_ALPHATEST_FORMAT_UNORM8 0 +#define BRW_ALPHATEST_FORMAT_FLOAT32 1 + +#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define BRW_CHROMAKEY_REPLACE_BLACK 1 + +#define BRW_CLIP_API_OGL 0 +#define BRW_CLIP_API_DX 1 + +#define BRW_CLIPMODE_NORMAL 0 +#define BRW_CLIPMODE_CLIP_ALL 1 +#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 +#define BRW_CLIPMODE_REJECT_ALL 3 +#define BRW_CLIPMODE_ACCEPT_ALL 4 + +#define BRW_CLIP_NDCSPACE 0 +#define BRW_CLIP_SCREENSPACE 1 + +#define BRW_COMPAREFUNCTION_ALWAYS 0 +#define BRW_COMPAREFUNCTION_NEVER 1 +#define BRW_COMPAREFUNCTION_LESS 2 +#define BRW_COMPAREFUNCTION_EQUAL 3 +#define BRW_COMPAREFUNCTION_LEQUAL 4 +#define BRW_COMPAREFUNCTION_GREATER 5 +#define BRW_COMPAREFUNCTION_NOTEQUAL 6 +#define BRW_COMPAREFUNCTION_GEQUAL 7 + +#define BRW_COVERAGE_PIXELS_HALF 0 +#define BRW_COVERAGE_PIXELS_1 1 +#define BRW_COVERAGE_PIXELS_2 2 +#define BRW_COVERAGE_PIXELS_4 3 + +#define BRW_CULLMODE_BOTH 0 +#define BRW_CULLMODE_NONE 1 +#define BRW_CULLMODE_FRONT 2 +#define BRW_CULLMODE_BACK 3 + +#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define BRW_DEPTHFORMAT_D32_FLOAT 1 +#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define BRW_DEPTHFORMAT_D16_UNORM 5 + +#define BRW_FLOATING_POINT_IEEE_754 0 +#define BRW_FLOATING_POINT_NON_IEEE_754 1 + +#define BRW_FRONTWINDING_CW 0 +#define BRW_FRONTWINDING_CCW 1 + +#define BRW_INDEX_BYTE 0 +#define BRW_INDEX_WORD 1 +#define BRW_INDEX_DWORD 2 + +#define BRW_LOGICOPFUNCTION_CLEAR 0 +#define BRW_LOGICOPFUNCTION_NOR 1 +#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 +#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 +#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 +#define BRW_LOGICOPFUNCTION_INVERT 5 +#define BRW_LOGICOPFUNCTION_XOR 6 +#define BRW_LOGICOPFUNCTION_NAND 7 +#define BRW_LOGICOPFUNCTION_AND 8 +#define BRW_LOGICOPFUNCTION_EQUIV 9 +#define BRW_LOGICOPFUNCTION_NOOP 10 +#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 +#define BRW_LOGICOPFUNCTION_COPY 12 +#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 +#define BRW_LOGICOPFUNCTION_OR 14 +#define BRW_LOGICOPFUNCTION_SET 15 + +#define BRW_MAPFILTER_NEAREST 0x0 +#define BRW_MAPFILTER_LINEAR 0x1 +#define BRW_MAPFILTER_ANISOTROPIC 0x2 + +#define BRW_MIPFILTER_NONE 0 +#define BRW_MIPFILTER_NEAREST 1 +#define BRW_MIPFILTER_LINEAR 3 + +#define BRW_POLYGON_FRONT_FACING 0 +#define BRW_POLYGON_BACK_FACING 1 + +#define BRW_PREFILTER_ALWAYS 0x0 +#define BRW_PREFILTER_NEVER 0x1 +#define BRW_PREFILTER_LESS 0x2 +#define BRW_PREFILTER_EQUAL 0x3 +#define BRW_PREFILTER_LEQUAL 0x4 +#define BRW_PREFILTER_GREATER 0x5 +#define BRW_PREFILTER_NOTEQUAL 0x6 +#define BRW_PREFILTER_GEQUAL 0x7 + +#define BRW_PROVOKING_VERTEX_0 0 +#define BRW_PROVOKING_VERTEX_1 1 +#define BRW_PROVOKING_VERTEX_2 2 + +#define BRW_RASTRULE_UPPER_LEFT 0 +#define BRW_RASTRULE_UPPER_RIGHT 1 + +#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define BRW_STENCILOP_KEEP 0 +#define BRW_STENCILOP_ZERO 1 +#define BRW_STENCILOP_REPLACE 2 +#define BRW_STENCILOP_INCRSAT 3 +#define BRW_STENCILOP_DECRSAT 4 +#define BRW_STENCILOP_INCR 5 +#define BRW_STENCILOP_DECR 6 +#define BRW_STENCILOP_INVERT 7 + +#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 + +#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 +#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 +#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B +#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C +#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D +#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE +#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF +#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 +#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 +#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 +#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 +#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 +#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 +#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 +#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 +#define BRW_SURFACEFORMAT_R16_UNORM 0x10A +#define BRW_SURFACEFORMAT_R16_SNORM 0x10B +#define BRW_SURFACEFORMAT_R16_SINT 0x10C +#define BRW_SURFACEFORMAT_R16_UINT 0x10D +#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E +#define BRW_SURFACEFORMAT_I16_UNORM 0x111 +#define BRW_SURFACEFORMAT_L16_UNORM 0x112 +#define BRW_SURFACEFORMAT_A16_UNORM 0x113 +#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 +#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 +#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D +#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E +#define BRW_SURFACEFORMAT_R16_USCALED 0x11F +#define BRW_SURFACEFORMAT_R8_UNORM 0x140 +#define BRW_SURFACEFORMAT_R8_SNORM 0x141 +#define BRW_SURFACEFORMAT_R8_SINT 0x142 +#define BRW_SURFACEFORMAT_R8_UINT 0x143 +#define BRW_SURFACEFORMAT_A8_UNORM 0x144 +#define BRW_SURFACEFORMAT_I8_UNORM 0x145 +#define BRW_SURFACEFORMAT_L8_UNORM 0x146 +#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 +#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 +#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 +#define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_R1_UINT 0x181 +#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 +#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 +#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 +#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 +#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A +#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define BRW_SURFACEFORMAT_MONO8 0x18E +#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 +#define BRW_SURFACEFORMAT_FXT1 0x192 +#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 +#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A +#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F + +#define BRW_SURFACERETURNFORMAT_FLOAT32 0 +#define BRW_SURFACERETURNFORMAT_S1 1 + +#define BRW_SURFACE_1D 0 +#define BRW_SURFACE_2D 1 +#define BRW_SURFACE_3D 2 +#define BRW_SURFACE_CUBE 3 +#define BRW_SURFACE_BUFFER 4 +#define BRW_SURFACE_NULL 7 + +#define BRW_BORDER_COLOR_MODE_DEFAULT 0 +#define BRW_BORDER_COLOR_MODE_LEGACY 1 + +#define HSW_SCS_ZERO 0 +#define HSW_SCS_ONE 1 +#define HSW_SCS_RED 4 +#define HSW_SCS_GREEN 5 +#define HSW_SCS_BLUE 6 +#define HSW_SCS_ALPHA 7 + +#define BRW_TEXCOORDMODE_WRAP 0 +#define BRW_TEXCOORDMODE_MIRROR 1 +#define BRW_TEXCOORDMODE_CLAMP 2 +#define BRW_TEXCOORDMODE_CUBE 3 +#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 +#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 + +#define BRW_THREAD_PRIORITY_NORMAL 0 +#define BRW_THREAD_PRIORITY_HIGH 1 + +#define BRW_TILEWALK_XMAJOR 0 +#define BRW_TILEWALK_YMAJOR 1 + +#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA 0 +#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 + +#define BRW_VFCOMPONENT_NOSTORE 0 +#define BRW_VFCOMPONENT_STORE_SRC 1 +#define BRW_VFCOMPONENT_STORE_0 2 +#define BRW_VFCOMPONENT_STORE_1_FLT 3 +#define BRW_VFCOMPONENT_STORE_1_INT 4 +#define BRW_VFCOMPONENT_STORE_VID 5 +#define BRW_VFCOMPONENT_STORE_IID 6 +#define BRW_VFCOMPONENT_STORE_PID 7 + + + +/* Execution Unit (EU) defines + */ + +#define BRW_ALIGN_1 0 +#define BRW_ALIGN_16 1 + +#define BRW_ADDRESS_DIRECT 0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define BRW_CHANNEL_X 0 +#define BRW_CHANNEL_Y 1 +#define BRW_CHANNEL_Z 2 +#define BRW_CHANNEL_W 3 + +#define BRW_COMPRESSION_NONE 0 +#define BRW_COMPRESSION_2NDHALF 1 +#define BRW_COMPRESSION_COMPRESSED 2 + +#define BRW_CONDITIONAL_NONE 0 +#define BRW_CONDITIONAL_Z 1 +#define BRW_CONDITIONAL_NZ 2 +#define BRW_CONDITIONAL_EQ 1 /* Z */ +#define BRW_CONDITIONAL_NEQ 2 /* NZ */ +#define BRW_CONDITIONAL_G 3 +#define BRW_CONDITIONAL_GE 4 +#define BRW_CONDITIONAL_L 5 +#define BRW_CONDITIONAL_LE 6 +#define BRW_CONDITIONAL_C 7 +#define BRW_CONDITIONAL_O 8 + +#define BRW_DEBUG_NONE 0 +#define BRW_DEBUG_BREAKPOINT 1 + +#define BRW_DEPENDENCY_NORMAL 0 +#define BRW_DEPENDENCY_NOTCLEARED 1 +#define BRW_DEPENDENCY_NOTCHECKED 2 +#define BRW_DEPENDENCY_DISABLE 3 + +#define BRW_EXECUTE_1 0 +#define BRW_EXECUTE_2 1 +#define BRW_EXECUTE_4 2 +#define BRW_EXECUTE_8 3 +#define BRW_EXECUTE_16 4 +#define BRW_EXECUTE_32 5 + +#define BRW_HORIZONTAL_STRIDE_0 0 +#define BRW_HORIZONTAL_STRIDE_1 1 +#define BRW_HORIZONTAL_STRIDE_2 2 +#define BRW_HORIZONTAL_STRIDE_4 3 + +#define BRW_INSTRUCTION_NORMAL 0 +#define BRW_INSTRUCTION_SATURATE 1 + +#define BRW_MASK_ENABLE 0 +#define BRW_MASK_DISABLE 1 + +#define BRW_OPCODE_MOV 1 +#define BRW_OPCODE_SEL 2 +#define BRW_OPCODE_NOT 4 +#define BRW_OPCODE_AND 5 +#define BRW_OPCODE_OR 6 +#define BRW_OPCODE_XOR 7 +#define BRW_OPCODE_SHR 8 +#define BRW_OPCODE_SHL 9 +#define BRW_OPCODE_RSR 10 +#define BRW_OPCODE_RSL 11 +#define BRW_OPCODE_ASR 12 +#define BRW_OPCODE_CMP 16 +#define BRW_OPCODE_JMPI 32 +#define BRW_OPCODE_IF 34 +#define BRW_OPCODE_IFF 35 +#define BRW_OPCODE_ELSE 36 +#define BRW_OPCODE_ENDIF 37 +#define BRW_OPCODE_DO 38 +#define BRW_OPCODE_WHILE 39 +#define BRW_OPCODE_BREAK 40 +#define BRW_OPCODE_CONTINUE 41 +#define BRW_OPCODE_HALT 42 +#define BRW_OPCODE_MSAVE 44 +#define BRW_OPCODE_MRESTORE 45 +#define BRW_OPCODE_PUSH 46 +#define BRW_OPCODE_POP 47 +#define BRW_OPCODE_WAIT 48 +#define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_ADD 64 +#define BRW_OPCODE_MUL 65 +#define BRW_OPCODE_AVG 66 +#define BRW_OPCODE_FRC 67 +#define BRW_OPCODE_RNDU 68 +#define BRW_OPCODE_RNDD 69 +#define BRW_OPCODE_RNDE 70 +#define BRW_OPCODE_RNDZ 71 +#define BRW_OPCODE_MAC 72 +#define BRW_OPCODE_MACH 73 +#define BRW_OPCODE_LZD 74 +#define BRW_OPCODE_SAD2 80 +#define BRW_OPCODE_SADA2 81 +#define BRW_OPCODE_DP4 84 +#define BRW_OPCODE_DPH 85 +#define BRW_OPCODE_DP3 86 +#define BRW_OPCODE_DP2 87 +#define BRW_OPCODE_DPA2 88 +#define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_NOP 126 + +#define BRW_PREDICATE_NONE 0 +#define BRW_PREDICATE_NORMAL 1 +#define BRW_PREDICATE_ALIGN1_ANYV 2 +#define BRW_PREDICATE_ALIGN1_ALLV 3 +#define BRW_PREDICATE_ALIGN1_ANY2H 4 +#define BRW_PREDICATE_ALIGN1_ALL2H 5 +#define BRW_PREDICATE_ALIGN1_ANY4H 6 +#define BRW_PREDICATE_ALIGN1_ALL4H 7 +#define BRW_PREDICATE_ALIGN1_ANY8H 8 +#define BRW_PREDICATE_ALIGN1_ALL8H 9 +#define BRW_PREDICATE_ALIGN1_ANY16H 10 +#define BRW_PREDICATE_ALIGN1_ALL16H 11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 +#define BRW_PREDICATE_ALIGN16_ANY4H 6 +#define BRW_PREDICATE_ALIGN16_ALL4H 7 + +#define BRW_ARCHITECTURE_REGISTER_FILE 0 +#define BRW_GENERAL_REGISTER_FILE 1 +#define BRW_MESSAGE_REGISTER_FILE 2 +#define BRW_IMMEDIATE_VALUE 3 + +#define BRW_REGISTER_TYPE_UD 0 +#define BRW_REGISTER_TYPE_D 1 +#define BRW_REGISTER_TYPE_UW 2 +#define BRW_REGISTER_TYPE_W 3 +#define BRW_REGISTER_TYPE_UB 4 +#define BRW_REGISTER_TYPE_B 5 +#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF 6 +#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F 7 + +#define BRW_ARF_NULL 0x00 +#define BRW_ARF_ADDRESS 0x10 +#define BRW_ARF_ACCUMULATOR 0x20 +#define BRW_ARF_FLAG 0x30 +#define BRW_ARF_MASK 0x40 +#define BRW_ARF_MASK_STACK 0x50 +#define BRW_ARF_MASK_STACK_DEPTH 0x60 +#define BRW_ARF_STATE 0x70 +#define BRW_ARF_CONTROL 0x80 +#define BRW_ARF_NOTIFICATION_COUNT 0x90 +#define BRW_ARF_IP 0xA0 + +#define BRW_AMASK 0 +#define BRW_IMASK 1 +#define BRW_LMASK 2 +#define BRW_CMASK 3 + + + +#define BRW_THREAD_NORMAL 0 +#define BRW_THREAD_ATOMIC 1 +#define BRW_THREAD_SWITCH 2 + +#define BRW_VERTICAL_STRIDE_0 0 +#define BRW_VERTICAL_STRIDE_1 1 +#define BRW_VERTICAL_STRIDE_2 2 +#define BRW_VERTICAL_STRIDE_4 3 +#define BRW_VERTICAL_STRIDE_8 4 +#define BRW_VERTICAL_STRIDE_16 5 +#define BRW_VERTICAL_STRIDE_32 6 +#define BRW_VERTICAL_STRIDE_64 7 +#define BRW_VERTICAL_STRIDE_128 8 +#define BRW_VERTICAL_STRIDE_256 9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define BRW_WIDTH_1 0 +#define BRW_WIDTH_2 1 +#define BRW_WIDTH_4 2 +#define BRW_WIDTH_8 3 +#define BRW_WIDTH_16 4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define BRW_POLYGON_FACING_FRONT 0 +#define BRW_POLYGON_FACING_BACK 1 + +#define BRW_MESSAGE_TARGET_NULL 0 +#define BRW_MESSAGE_TARGET_MATH 1 +#define BRW_MESSAGE_TARGET_SAMPLER 2 +#define BRW_MESSAGE_TARGET_GATEWAY 3 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_URB 6 +#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +#define BRW_MATH_FUNCTION_INV 1 +#define BRW_MATH_FUNCTION_LOG 2 +#define BRW_MATH_FUNCTION_EXP 3 +#define BRW_MATH_FUNCTION_SQRT 4 +#define BRW_MATH_FUNCTION_RSQ 5 +#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_POW 10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define BRW_MATH_INTEGER_UNSIGNED 0 +#define BRW_MATH_INTEGER_SIGNED 1 + +#define BRW_MATH_PRECISION_FULL 0 +#define BRW_MATH_PRECISION_PARTIAL 1 + +#define BRW_MATH_SATURATE_NONE 0 +#define BRW_MATH_SATURATE_SATURATE 1 + +#define BRW_MATH_DATA_VECTOR 0 +#define BRW_MATH_DATA_SCALAR 1 + +#define BRW_URB_OPCODE_WRITE 0 + +#define BRW_URB_SWIZZLE_NONE 0 +#define BRW_URB_SWIZZLE_INTERLEAVE 1 +#define BRW_URB_SWIZZLE_TRANSPOSE 2 + +#define BRW_SCRATCH_SPACE_SIZE_1K 0 +#define BRW_SCRATCH_SPACE_SIZE_2K 1 +#define BRW_SCRATCH_SPACE_SIZE_4K 2 +#define BRW_SCRATCH_SPACE_SIZE_8K 3 +#define BRW_SCRATCH_SPACE_SIZE_16K 4 +#define BRW_SCRATCH_SPACE_SIZE_32K 5 +#define BRW_SCRATCH_SPACE_SIZE_64K 6 +#define BRW_SCRATCH_SPACE_SIZE_128K 7 +#define BRW_SCRATCH_SPACE_SIZE_256K 8 +#define BRW_SCRATCH_SPACE_SIZE_512K 9 +#define BRW_SCRATCH_SPACE_SIZE_1M 10 +#define BRW_SCRATCH_SPACE_SIZE_2M 11 + + + + +#define CMD_URB_FENCE 0x6000 +#define CMD_CONST_BUFFER_STATE 0x6001 +#define CMD_CONST_BUFFER 0x6002 + +#define CMD_STATE_BASE_ADDRESS 0x6101 +#define CMD_STATE_INSN_POINTER 0x6102 +#define CMD_PIPELINE_SELECT 0x6104 + +#define CMD_PIPELINED_STATE_POINTERS 0x7800 +#define CMD_BINDING_TABLE_PTRS 0x7801 +#define CMD_VERTEX_BUFFER 0x7808 +#define CMD_VERTEX_ELEMENT 0x7809 +#define CMD_INDEX_BUFFER 0x780a +#define CMD_VF_STATISTICS 0x780b + +#define CMD_DRAW_RECT 0x7900 +#define CMD_BLEND_CONSTANT_COLOR 0x7901 +#define CMD_CHROMA_KEY 0x7904 +#define CMD_DEPTH_BUFFER 0x7905 +#define CMD_POLY_STIPPLE_OFFSET 0x7906 +#define CMD_POLY_STIPPLE_PATTERN 0x7907 +#define CMD_LINE_STIPPLE_PATTERN 0x7908 +#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7908 + +#define CMD_PIPE_CONTROL 0x7a00 + +#define CMD_3D_PRIM 0x7b00 + +#define CMD_MI_FLUSH 0x0200 + + +/* Various values from the R0 vertex header: + */ +#define R02_PRIM_END 0x1 +#define R02_PRIM_START 0x2 + +/* media pipeline */ + +#define BRW_VFE_MODE_GENERIC 0x0 +#define BRW_VFE_MODE_VLD_MPEG2 0x1 +#define BRW_VFE_MODE_IS 0x2 +#define BRW_VFE_MODE_AVC_MC 0x4 +#define BRW_VFE_MODE_AVC_IT 0x7 +#define BRW_VFE_MODE_VC1_IT 0xB + +#define BRW_VFE_DEBUG_COUNTER_FREE 0 +#define BRW_VFE_DEBUG_COUNTER_FROZEN 1 +#define BRW_VFE_DEBUG_COUNTER_ONCE 2 +#define BRW_VFE_DEBUG_COUNTER_ALWAYS 3 + +/* VLD_STATE */ +#define BRW_MPEG_TOP_FIELD 1 +#define BRW_MPEG_BOTTOM_FIELD 2 +#define BRW_MPEG_FRAME 3 +#define BRW_MPEG_QSCALE_LINEAR 0 +#define BRW_MPEG_QSCALE_NONLINEAR 1 +#define BRW_MPEG_ZIGZAG_SCAN 0 +#define BRW_MPEG_ALTER_VERTICAL_SCAN 1 +#define BRW_MPEG_I_PICTURE 1 +#define BRW_MPEG_P_PICTURE 2 +#define BRW_MPEG_B_PICTURE 3 + +#endif diff --git a/src/uxa/brw_structs.h b/src/uxa/brw_structs.h new file mode 100644 index 00000000..20c2f857 --- /dev/null +++ b/src/uxa/brw_structs.h @@ -0,0 +1,1723 @@ + /************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_STRUCTS_H +#define BRW_STRUCTS_H + +/* Command packets: + */ +struct header +{ + unsigned int length:16; + unsigned int opcode:16; +}; + + +union header_union +{ + struct header bits; + unsigned int dword; +}; + +struct brw_3d_control +{ + struct + { + unsigned int length:8; + unsigned int notify_enable:1; + unsigned int pad:3; + unsigned int wc_flush_enable:1; + unsigned int depth_stall_enable:1; + unsigned int operation:2; + unsigned int opcode:16; + } header; + + struct + { + unsigned int pad:2; + unsigned int dest_addr_type:1; + unsigned int dest_addr:29; + } dest; + + unsigned int dword2; + unsigned int dword3; +}; + + +struct brw_3d_primitive +{ + struct + { + unsigned int length:8; + unsigned int pad:2; + unsigned int topology:5; + unsigned int indexed:1; + unsigned int opcode:16; + } header; + + unsigned int verts_per_instance; + unsigned int start_vert_location; + unsigned int instance_count; + unsigned int start_instance_location; + unsigned int base_vert_location; +}; + +/* These seem to be passed around as function args, so it works out + * better to keep them as #defines: + */ +#define BRW_FLUSH_READ_CACHE 0x1 +#define BRW_FLUSH_STATE_CACHE 0x2 +#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 +#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 + +struct brw_mi_flush +{ + unsigned int flags:4; + unsigned int pad:12; + unsigned int opcode:16; +}; + +struct brw_vf_statistics +{ + unsigned int statistics_enable:1; + unsigned int pad:15; + unsigned int opcode:16; +}; + + + +struct brw_binding_table_pointers +{ + struct header header; + unsigned int vs; + unsigned int gs; + unsigned int clp; + unsigned int sf; + unsigned int wm; +}; + + +struct brw_blend_constant_color +{ + struct header header; + float blend_constant_color[4]; +}; + + +struct brw_depthbuffer +{ + union header_union header; + + union { + struct { + unsigned int pitch:18; + unsigned int format:3; + unsigned int pad:4; + unsigned int depth_offset_disable:1; + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int pad2:1; + unsigned int surface_type:3; + } bits; + unsigned int dword; + } dword1; + + unsigned int dword2_base_addr; + + union { + struct { + unsigned int pad:1; + unsigned int mipmap_layout:1; + unsigned int lod:4; + unsigned int width:13; + unsigned int height:13; + } bits; + unsigned int dword; + } dword3; + + union { + struct { + unsigned int pad:12; + unsigned int min_array_element:9; + unsigned int depth:11; + } bits; + unsigned int dword; + } dword4; +}; + +struct brw_drawrect +{ + struct header header; + unsigned int xmin:16; + unsigned int ymin:16; + unsigned int xmax:16; + unsigned int ymax:16; + unsigned int xorg:16; + unsigned int yorg:16; +}; + + + + +struct brw_global_depth_offset_clamp +{ + struct header header; + float depth_offset_clamp; +}; + +struct brw_indexbuffer +{ + union { + struct + { + unsigned int length:8; + unsigned int index_format:2; + unsigned int cut_index_enable:1; + unsigned int pad:5; + unsigned int opcode:16; + } bits; + unsigned int dword; + + } header; + + unsigned int buffer_start; + unsigned int buffer_end; +}; + + +struct brw_line_stipple +{ + struct header header; + + struct + { + unsigned int pattern:16; + unsigned int pad:16; + } bits0; + + struct + { + unsigned int repeat_count:9; + unsigned int pad:7; + unsigned int inverse_repeat_count:16; + } bits1; +}; + + +struct brw_pipelined_state_pointers +{ + struct header header; + + struct { + unsigned int pad:5; + unsigned int offset:27; + } vs; + + struct + { + unsigned int enable:1; + unsigned int pad:4; + unsigned int offset:27; + } gs; + + struct + { + unsigned int enable:1; + unsigned int pad:4; + unsigned int offset:27; + } clp; + + struct + { + unsigned int pad:5; + unsigned int offset:27; + } sf; + + struct + { + unsigned int pad:5; + unsigned int offset:27; + } wm; + + struct + { + unsigned int pad:5; + unsigned int offset:27; /* KW: check me! */ + } cc; +}; + + +struct brw_polygon_stipple_offset +{ + struct header header; + + struct { + unsigned int y_offset:5; + unsigned int pad:3; + unsigned int x_offset:5; + unsigned int pad0:19; + } bits0; +}; + + + +struct brw_polygon_stipple +{ + struct header header; + unsigned int stipple[32]; +}; + + + +struct brw_pipeline_select +{ + struct + { + unsigned int pipeline_select:1; + unsigned int pad:15; + unsigned int opcode:16; + } header; +}; + + +struct brw_pipe_control +{ + struct + { + unsigned int length:8; + unsigned int notify_enable:1; + unsigned int pad:2; + unsigned int instruction_state_cache_flush_enable:1; + unsigned int write_cache_flush_enable:1; + unsigned int depth_stall_enable:1; + unsigned int post_sync_operation:2; + + unsigned int opcode:16; + } header; + + struct + { + unsigned int pad:2; + unsigned int dest_addr_type:1; + unsigned int dest_addr:29; + } bits1; + + unsigned int data0; + unsigned int data1; +}; + + +struct brw_urb_fence +{ + struct + { + unsigned int length:8; + unsigned int vs_realloc:1; + unsigned int gs_realloc:1; + unsigned int clp_realloc:1; + unsigned int sf_realloc:1; + unsigned int vfe_realloc:1; + unsigned int cs_realloc:1; + unsigned int pad:2; + unsigned int opcode:16; + } header; + + struct + { + unsigned int vs_fence:10; + unsigned int gs_fence:10; + unsigned int clp_fence:10; + unsigned int pad:2; + } bits0; + + struct + { + unsigned int sf_fence:10; + unsigned int vf_fence:10; + unsigned int cs_fence:10; + unsigned int pad:2; + } bits1; +}; + +struct brw_constant_buffer_state /* previously brw_command_streamer */ +{ + struct header header; + + struct + { + unsigned int nr_urb_entries:3; + unsigned int pad:1; + unsigned int urb_entry_size:5; + unsigned int pad0:23; + } bits0; +}; + +struct brw_constant_buffer +{ + struct + { + unsigned int length:8; + unsigned int valid:1; + unsigned int pad:7; + unsigned int opcode:16; + } header; + + struct + { + unsigned int buffer_length:6; + unsigned int buffer_address:26; + } bits0; +}; + +struct brw_state_base_address +{ + struct header header; + + struct + { + unsigned int modify_enable:1; + unsigned int pad:4; + unsigned int general_state_address:27; + } bits0; + + struct + { + unsigned int modify_enable:1; + unsigned int pad:4; + unsigned int surface_state_address:27; + } bits1; + + struct + { + unsigned int modify_enable:1; + unsigned int pad:4; + unsigned int indirect_object_state_address:27; + } bits2; + + struct + { + unsigned int modify_enable:1; + unsigned int pad:11; + unsigned int general_state_upper_bound:20; + } bits3; + + struct + { + unsigned int modify_enable:1; + unsigned int pad:11; + unsigned int indirect_object_state_upper_bound:20; + } bits4; +}; + +struct brw_state_prefetch +{ + struct header header; + + struct + { + unsigned int prefetch_count:3; + unsigned int pad:3; + unsigned int prefetch_pointer:26; + } bits0; +}; + +struct brw_system_instruction_pointer +{ + struct header header; + + struct + { + unsigned int pad:4; + unsigned int system_instruction_pointer:28; + } bits0; +}; + + + + +/* State structs for the various fixed function units: + */ + + +struct thread0 +{ + unsigned int pad0:1; + unsigned int grf_reg_count:3; + unsigned int pad1:2; + unsigned int kernel_start_pointer:26; +}; + +struct thread1 +{ + unsigned int ext_halt_exception_enable:1; + unsigned int sw_exception_enable:1; + unsigned int mask_stack_exception_enable:1; + unsigned int timeout_exception_enable:1; + unsigned int illegal_op_exception_enable:1; + unsigned int pad0:3; + unsigned int depth_coef_urb_read_offset:6; /* WM only */ + unsigned int pad1:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int binding_table_entry_count:8; + unsigned int pad3:5; + unsigned int single_program_flow:1; +}; + +struct thread2 +{ + unsigned int per_thread_scratch_space:4; + unsigned int pad0:6; + unsigned int scratch_space_base_pointer:22; +}; + + +struct thread3 +{ + unsigned int dispatch_grf_start_reg:4; + unsigned int urb_entry_read_offset:6; + unsigned int pad0:1; + unsigned int urb_entry_read_length:6; + unsigned int pad1:1; + unsigned int const_urb_entry_read_offset:6; + unsigned int pad2:1; + unsigned int const_urb_entry_read_length:6; + unsigned int pad3:1; +}; + + + +struct brw_clip_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned int pad0:9; + unsigned int gs_output_stats:1; /* not always */ + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:6; /* may be less */ + unsigned int pad3:1; + } thread4; + + struct + { + unsigned int pad0:13; + unsigned int clip_mode:3; + unsigned int userclip_enable_flags:8; + unsigned int userclip_must_clip:1; + unsigned int pad1:1; + unsigned int guard_band_enable:1; + unsigned int viewport_z_clip_enable:1; + unsigned int viewport_xy_clip_enable:1; + unsigned int vertex_position_space:1; + unsigned int api_mode:1; + unsigned int pad2:1; + } clip5; + + struct + { + unsigned int pad0:5; + unsigned int clipper_viewport_state_ptr:27; + } clip6; + + + float viewport_xmin; + float viewport_xmax; + float viewport_ymin; + float viewport_ymax; +}; + + + +struct brw_cc_unit_state +{ + struct + { + unsigned int pad0:3; + unsigned int bf_stencil_pass_depth_pass_op:3; + unsigned int bf_stencil_pass_depth_fail_op:3; + unsigned int bf_stencil_fail_op:3; + unsigned int bf_stencil_func:3; + unsigned int bf_stencil_enable:1; + unsigned int pad1:2; + unsigned int stencil_write_enable:1; + unsigned int stencil_pass_depth_pass_op:3; + unsigned int stencil_pass_depth_fail_op:3; + unsigned int stencil_fail_op:3; + unsigned int stencil_func:3; + unsigned int stencil_enable:1; + } cc0; + + + struct + { + unsigned int bf_stencil_ref:8; + unsigned int stencil_write_mask:8; + unsigned int stencil_test_mask:8; + unsigned int stencil_ref:8; + } cc1; + + + struct + { + unsigned int logicop_enable:1; + unsigned int pad0:10; + unsigned int depth_write_enable:1; + unsigned int depth_test_function:3; + unsigned int depth_test:1; + unsigned int bf_stencil_write_mask:8; + unsigned int bf_stencil_test_mask:8; + } cc2; + + + struct + { + unsigned int pad0:8; + unsigned int alpha_test_func:3; + unsigned int alpha_test:1; + unsigned int blend_enable:1; + unsigned int ia_blend_enable:1; + unsigned int pad1:1; + unsigned int alpha_test_format:1; + unsigned int pad2:16; + } cc3; + + struct + { + unsigned int pad0:5; + unsigned int cc_viewport_state_offset:27; + } cc4; + + struct + { + unsigned int pad0:2; + unsigned int ia_dest_blend_factor:5; + unsigned int ia_src_blend_factor:5; + unsigned int ia_blend_function:3; + unsigned int statistics_enable:1; + unsigned int logicop_func:4; + unsigned int pad1:11; + unsigned int dither_enable:1; + } cc5; + + struct + { + unsigned int clamp_post_alpha_blend:1; + unsigned int clamp_pre_alpha_blend:1; + unsigned int clamp_range:2; + unsigned int pad0:11; + unsigned int y_dither_offset:2; + unsigned int x_dither_offset:2; + unsigned int dest_blend_factor:5; + unsigned int src_blend_factor:5; + unsigned int blend_function:3; + } cc6; + + struct { + union { + float f; + unsigned char ub[4]; + } alpha_ref; + } cc7; +}; + + + +struct brw_sf_unit_state +{ + struct thread0 thread0; + struct { + unsigned int pad0:7; + unsigned int sw_exception_enable:1; + unsigned int pad1:3; + unsigned int mask_stack_exception_enable:1; + unsigned int pad2:1; + unsigned int illegal_op_exception_enable:1; + unsigned int pad3:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int binding_table_entry_count:8; + unsigned int pad4:5; + unsigned int single_program_flow:1; + } sf1; + + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned int pad0:10; + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:6; + unsigned int pad3:1; + } thread4; + + struct + { + unsigned int front_winding:1; + unsigned int viewport_transform:1; + unsigned int pad0:3; + unsigned int sf_viewport_state_offset:27; + } sf5; + + struct + { + unsigned int pad0:9; + unsigned int dest_org_vbias:4; + unsigned int dest_org_hbias:4; + unsigned int scissor:1; + unsigned int disable_2x2_trifilter:1; + unsigned int disable_zero_pix_trifilter:1; + unsigned int point_rast_rule:2; + unsigned int line_endcap_aa_region_width:2; + unsigned int line_width:4; + unsigned int fast_scissor_disable:1; + unsigned int cull_mode:2; + unsigned int aa_enable:1; + } sf6; + + struct + { + unsigned int point_size:11; + unsigned int use_point_size_state:1; + unsigned int subpixel_precision:1; + unsigned int sprite_point:1; + unsigned int pad0:11; + unsigned int trifan_pv:2; + unsigned int linestrip_pv:2; + unsigned int tristrip_pv:2; + unsigned int line_last_pixel_enable:1; + } sf7; + +}; + + +struct brw_gs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned int pad0:10; + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:1; + unsigned int pad3:6; + } thread4; + + struct + { + unsigned int sampler_count:3; + unsigned int pad0:2; + unsigned int sampler_state_pointer:27; + } gs5; + + + struct + { + unsigned int max_vp_index:4; + unsigned int pad0:26; + unsigned int reorder_enable:1; + unsigned int pad1:1; + } gs6; +}; + + +struct brw_vs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned int pad0:10; + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:4; + unsigned int pad3:3; + } thread4; + + struct + { + unsigned int sampler_count:3; + unsigned int pad0:2; + unsigned int sampler_state_pointer:27; + } vs5; + + struct + { + unsigned int vs_enable:1; + unsigned int vert_cache_disable:1; + unsigned int pad0:30; + } vs6; +}; + + +struct brw_wm_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned int stats_enable:1; + unsigned int pad0:1; + unsigned int sampler_count:3; + unsigned int sampler_state_pointer:27; + } wm4; + + struct + { + unsigned int enable_8_pix:1; + unsigned int enable_16_pix:1; + unsigned int enable_32_pix:1; + unsigned int pad0:7; + unsigned int legacy_global_depth_bias:1; + unsigned int line_stipple:1; + unsigned int depth_offset:1; + unsigned int polygon_stipple:1; + unsigned int line_aa_region_width:2; + unsigned int line_endcap_aa_region_width:2; + unsigned int early_depth_test:1; + unsigned int thread_dispatch_enable:1; + unsigned int program_uses_depth:1; + unsigned int program_computes_depth:1; + unsigned int program_uses_killpixel:1; + unsigned int legacy_line_rast: 1; + unsigned int transposed_urb_read:1; + unsigned int max_threads:7; + } wm5; + + float global_depth_offset_constant; + float global_depth_offset_scale; + + struct { + unsigned int pad0:1; + unsigned int grf_reg_count_1:3; + unsigned int pad1:2; + unsigned int kernel_start_pointer_1:26; + } wm8; + + struct { + unsigned int pad0:1; + unsigned int grf_reg_count_2:3; + unsigned int pad1:2; + unsigned int kernel_start_pointer_2:26; + } wm9; + + struct { + unsigned int pad0:1; + unsigned int grf_reg_count_3:3; + unsigned int pad1:2; + unsigned int kernel_start_pointer_3:26; + } wm10; +}; + +struct brw_wm_unit_state_padded { + struct brw_wm_unit_state state; + char pad[64 - sizeof(struct brw_wm_unit_state)]; +}; + +/* The hardware supports two different modes for border color. The + * default (OpenGL) mode uses floating-point color channels, while the + * legacy mode uses 4 bytes. + * + * More significantly, the legacy mode respects the components of the + * border color for channels not present in the source, (whereas the + * default mode will ignore the border color's alpha channel and use + * alpha==1 for an RGB source, for example). + * + * The legacy mode matches the semantics specified by the Render + * extension. + */ +struct brw_sampler_default_border_color { + float color[4]; +}; + +struct brw_sampler_legacy_border_color { + uint8_t color[4]; +}; + +struct brw_sampler_state +{ + + struct + { + unsigned int shadow_function:3; + unsigned int lod_bias:11; + unsigned int min_filter:3; + unsigned int mag_filter:3; + unsigned int mip_filter:2; + unsigned int base_level:5; + unsigned int pad:1; + unsigned int lod_preclamp:1; + unsigned int border_color_mode:1; + unsigned int pad0:1; + unsigned int disable:1; + } ss0; + + struct + { + unsigned int r_wrap_mode:3; + unsigned int t_wrap_mode:3; + unsigned int s_wrap_mode:3; + unsigned int pad:3; + unsigned int max_lod:10; + unsigned int min_lod:10; + } ss1; + + + struct + { + unsigned int pad:5; + unsigned int border_color_pointer:27; + } ss2; + + struct + { + unsigned int pad:19; + unsigned int max_aniso:3; + unsigned int chroma_key_mode:1; + unsigned int chroma_key_index:2; + unsigned int chroma_key_enable:1; + unsigned int monochrome_filter_width:3; + unsigned int monochrome_filter_height:3; + } ss3; +}; + + +struct brw_clipper_viewport +{ + float xmin; + float xmax; + float ymin; + float ymax; +}; + +struct brw_cc_viewport +{ + float min_depth; + float max_depth; +}; + +struct brw_sf_viewport +{ + struct { + float m00; + float m11; + float m22; + float m30; + float m31; + float m32; + } viewport; + + struct { + short xmin; + short ymin; + short xmax; + short ymax; + } scissor; +}; + +/* Documented in the subsystem/shared-functions/sampler chapter... + */ +struct brw_surface_state +{ + struct { + unsigned int cube_pos_z:1; + unsigned int cube_neg_z:1; + unsigned int cube_pos_y:1; + unsigned int cube_neg_y:1; + unsigned int cube_pos_x:1; + unsigned int cube_neg_x:1; + unsigned int pad:3; + unsigned int render_cache_read_mode:1; + unsigned int mipmap_layout_mode:1; + unsigned int vert_line_stride_ofs:1; + unsigned int vert_line_stride:1; + unsigned int color_blend:1; + unsigned int writedisable_blue:1; + unsigned int writedisable_green:1; + unsigned int writedisable_red:1; + unsigned int writedisable_alpha:1; + unsigned int surface_format:9; + unsigned int data_return_format:1; + unsigned int pad0:1; + unsigned int surface_type:3; + } ss0; + + struct { + unsigned int base_addr; + } ss1; + + struct { + unsigned int render_target_rotation:2; + unsigned int mip_count:4; + unsigned int width:13; + unsigned int height:13; + } ss2; + + struct { + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int pad:1; + unsigned int pitch:18; + unsigned int depth:11; + } ss3; + + struct { + unsigned int pad:19; + unsigned int min_array_elt:9; + unsigned int min_lod:4; + } ss4; + + struct { + unsigned int pad:20; + unsigned int y_offset:4; + unsigned int pad2:1; + unsigned int x_offset:7; + } ss5; +}; + + + +struct brw_vertex_buffer_state +{ + struct { + unsigned int pitch:11; + unsigned int pad:15; + unsigned int access_type:1; + unsigned int vb_index:5; + } vb0; + + unsigned int start_addr; + unsigned int max_index; +#if 1 + unsigned int instance_data_step_rate; /* not included for sequential/random vertices? */ +#endif +}; + +#define BRW_VBP_MAX 17 + +struct brw_vb_array_state { + struct header header; + struct brw_vertex_buffer_state vb[BRW_VBP_MAX]; +}; + + +struct brw_vertex_element_state +{ + struct + { + unsigned int src_offset:11; + unsigned int pad:5; + unsigned int src_format:9; + unsigned int pad0:1; + unsigned int valid:1; + unsigned int vertex_buffer_index:5; + } ve0; + + struct + { + unsigned int dst_offset:8; + unsigned int pad:8; + unsigned int vfcomponent3:4; + unsigned int vfcomponent2:4; + unsigned int vfcomponent1:4; + unsigned int vfcomponent0:4; + } ve1; +}; + +#define BRW_VEP_MAX 18 + +struct brw_vertex_element_packet { + struct header header; + struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */ +}; + + +struct brw_urb_immediate { + unsigned int opcode:4; + unsigned int offset:6; + unsigned int swizzle_control:2; + unsigned int pad:1; + unsigned int allocate:1; + unsigned int used:1; + unsigned int complete:1; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; +}; + +/* Instruction format for the execution units: + */ + +struct brw_instruction +{ + struct + { + unsigned int opcode:7; + unsigned int pad:1; + unsigned int access_mode:1; + unsigned int mask_control:1; + unsigned int dependency_control:2; + unsigned int compression_control:2; + unsigned int thread_control:2; + unsigned int predicate_control:4; + unsigned int predicate_inverse:1; + unsigned int execution_size:3; + unsigned int destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ + unsigned int pad0:2; + unsigned int debug_control:1; + unsigned int saturate:1; + } header; + + union { + struct + { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int src1_reg_file:2; + unsigned int src1_reg_type:3; + unsigned int pad:1; + unsigned int dest_subreg_nr:5; + unsigned int dest_reg_nr:8; + unsigned int dest_horiz_stride:2; + unsigned int dest_address_mode:1; + } da1; + + struct + { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int pad:6; + int dest_indirect_offset:10; /* offset against the deref'd address reg */ + unsigned int dest_subreg_nr:3; /* subnr for the address reg a0.x */ + unsigned int dest_horiz_stride:2; + unsigned int dest_address_mode:1; + } ia1; + + struct + { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int src1_reg_file:2; + unsigned int src1_reg_type:3; + unsigned int pad0:1; + unsigned int dest_writemask:4; + unsigned int dest_subreg_nr:1; + unsigned int dest_reg_nr:8; + unsigned int pad1:2; + unsigned int dest_address_mode:1; + } da16; + + struct + { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int pad0:6; + unsigned int dest_writemask:4; + int dest_indirect_offset:6; + unsigned int dest_subreg_nr:3; + unsigned int pad1:2; + unsigned int dest_address_mode:1; + } ia16; + } bits1; + + + union { + struct + { + unsigned int src0_subreg_nr:5; + unsigned int src0_reg_nr:8; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_horiz_stride:2; + unsigned int src0_width:3; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad:6; + } da1; + + struct + { + int src0_indirect_offset:10; + unsigned int src0_subreg_nr:3; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_horiz_stride:2; + unsigned int src0_width:3; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad:6; + } ia1; + + struct + { + unsigned int src0_swz_x:2; + unsigned int src0_swz_y:2; + unsigned int src0_subreg_nr:1; + unsigned int src0_reg_nr:8; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_swz_z:2; + unsigned int src0_swz_w:2; + unsigned int pad0:1; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad1:6; + } da16; + + struct + { + unsigned int src0_swz_x:2; + unsigned int src0_swz_y:2; + int src0_indirect_offset:6; + unsigned int src0_subreg_nr:3; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_swz_z:2; + unsigned int src0_swz_w:2; + unsigned int pad0:1; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad1:6; + } ia16; + + } bits2; + + union + { + struct + { + unsigned int src1_subreg_nr:5; + unsigned int src1_reg_nr:8; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad:1; + unsigned int src1_horiz_stride:2; + unsigned int src1_width:3; + unsigned int src1_vert_stride:4; + unsigned int pad0:7; + } da1; + + struct + { + unsigned int src1_swz_x:2; + unsigned int src1_swz_y:2; + unsigned int src1_subreg_nr:1; + unsigned int src1_reg_nr:8; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad0:1; + unsigned int src1_swz_z:2; + unsigned int src1_swz_w:2; + unsigned int pad1:1; + unsigned int src1_vert_stride:4; + unsigned int pad2:7; + } da16; + + struct + { + int src1_indirect_offset:10; + unsigned int src1_subreg_nr:3; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad0:1; + unsigned int src1_horiz_stride:2; + unsigned int src1_width:3; + unsigned int src1_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad1:6; + } ia1; + + struct + { + unsigned int src1_swz_x:2; + unsigned int src1_swz_y:2; + int src1_indirect_offset:6; + unsigned int src1_subreg_nr:3; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad0:1; + unsigned int src1_swz_z:2; + unsigned int src1_swz_w:2; + unsigned int pad1:1; + unsigned int src1_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad2:6; + } ia16; + + + struct + { + int jump_count:16; /* note: signed */ + unsigned int pop_count:4; + unsigned int pad0:12; + } if_else; + + struct { + unsigned int function:4; + unsigned int int_type:1; + unsigned int precision:1; + unsigned int saturate:1; + unsigned int data_type:1; + unsigned int pad0:8; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } math; + + struct { + unsigned int binding_table_index:8; + unsigned int sampler:4; + unsigned int return_format:2; + unsigned int msg_type:2; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } sampler; + + struct brw_urb_immediate urb; + + struct { + unsigned int binding_table_index:8; + unsigned int msg_control:4; + unsigned int msg_type:2; + unsigned int target_cache:2; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } dp_read; + + struct { + unsigned int binding_table_index:8; + unsigned int msg_control:3; + unsigned int pixel_scoreboard_clear:1; + unsigned int msg_type:3; + unsigned int send_commit_msg:1; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } dp_write; + + struct { + unsigned int pad:16; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } generic; + + unsigned int ud; + } bits3; +}; + +/* media pipeline */ + +struct brw_vfe_state { + struct { + unsigned int per_thread_scratch_space:4; + unsigned int pad3:3; + unsigned int extend_vfe_state_present:1; + unsigned int pad2:2; + unsigned int scratch_base:22; + } vfe0; + + struct { + unsigned int debug_counter_control:2; + unsigned int children_present:1; + unsigned int vfe_mode:4; + unsigned int pad2:2; + unsigned int num_urb_entries:7; + unsigned int urb_entry_alloc_size:9; + unsigned int max_threads:7; + } vfe1; + + struct { + unsigned int pad4:4; + unsigned int interface_descriptor_base:28; + } vfe2; +}; + +struct brw_vld_state { + struct { + unsigned int pad6:6; + unsigned int scan_order:1; + unsigned int intra_vlc_format:1; + unsigned int quantizer_scale_type:1; + unsigned int concealment_motion_vector:1; + unsigned int frame_predict_frame_dct:1; + unsigned int top_field_first:1; + unsigned int picture_structure:2; + unsigned int intra_dc_precision:2; + unsigned int f_code_0_0:4; + unsigned int f_code_0_1:4; + unsigned int f_code_1_0:4; + unsigned int f_code_1_1:4; + } vld0; + + struct { + unsigned int pad2:9; + unsigned int picture_coding_type:2; + unsigned int pad:21; + } vld1; + + struct { + unsigned int index_0:4; + unsigned int index_1:4; + unsigned int index_2:4; + unsigned int index_3:4; + unsigned int index_4:4; + unsigned int index_5:4; + unsigned int index_6:4; + unsigned int index_7:4; + } desc_remap_table0; + + struct { + unsigned int index_8:4; + unsigned int index_9:4; + unsigned int index_10:4; + unsigned int index_11:4; + unsigned int index_12:4; + unsigned int index_13:4; + unsigned int index_14:4; + unsigned int index_15:4; + } desc_remap_table1; +}; + +struct brw_interface_descriptor { + struct { + unsigned int grf_reg_blocks:4; + unsigned int pad:2; + unsigned int kernel_start_pointer:26; + } desc0; + + struct { + unsigned int pad:7; + unsigned int software_exception:1; + unsigned int pad2:3; + unsigned int maskstack_exception:1; + unsigned int pad3:1; + unsigned int illegal_opcode_exception:1; + unsigned int pad4:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int single_program_flow:1; + unsigned int pad5:1; + unsigned int const_urb_entry_read_offset:6; + unsigned int const_urb_entry_read_len:6; + } desc1; + + struct { + unsigned int pad:2; + unsigned int sampler_count:3; + unsigned int sampler_state_pointer:27; + } desc2; + + struct { + unsigned int binding_table_entry_count:5; + unsigned int binding_table_pointer:27; + } desc3; +}; + +struct gen6_blend_state +{ + struct { + unsigned int dest_blend_factor:5; + unsigned int source_blend_factor:5; + unsigned int pad3:1; + unsigned int blend_func:3; + unsigned int pad2:1; + unsigned int ia_dest_blend_factor:5; + unsigned int ia_source_blend_factor:5; + unsigned int pad1:1; + unsigned int ia_blend_func:3; + unsigned int pad0:1; + unsigned int ia_blend_enable:1; + unsigned int blend_enable:1; + } blend0; + + struct { + unsigned int post_blend_clamp_enable:1; + unsigned int pre_blend_clamp_enable:1; + unsigned int clamp_range:2; + unsigned int pad0:4; + unsigned int x_dither_offset:2; + unsigned int y_dither_offset:2; + unsigned int dither_enable:1; + unsigned int alpha_test_func:3; + unsigned int alpha_test_enable:1; + unsigned int pad1:1; + unsigned int logic_op_func:4; + unsigned int logic_op_enable:1; + unsigned int pad2:1; + unsigned int write_disable_b:1; + unsigned int write_disable_g:1; + unsigned int write_disable_r:1; + unsigned int write_disable_a:1; + unsigned int pad3:1; + unsigned int alpha_to_coverage_dither:1; + unsigned int alpha_to_one:1; + unsigned int alpha_to_coverage:1; + } blend1; +}; + +struct gen6_color_calc_state +{ + struct { + unsigned int alpha_test_format:1; + unsigned int pad0:14; + unsigned int round_disable:1; + unsigned int bf_stencil_ref:8; + unsigned int stencil_ref:8; + } cc0; + + union { + float alpha_ref_f; + struct { + unsigned int ui:8; + unsigned int pad0:24; + } alpha_ref_fi; + } cc1; + + float constant_r; + float constant_g; + float constant_b; + float constant_a; +}; + +struct gen6_depth_stencil_state +{ + struct { + unsigned int pad0:3; + unsigned int bf_stencil_pass_depth_pass_op:3; + unsigned int bf_stencil_pass_depth_fail_op:3; + unsigned int bf_stencil_fail_op:3; + unsigned int bf_stencil_func:3; + unsigned int bf_stencil_enable:1; + unsigned int pad1:2; + unsigned int stencil_write_enable:1; + unsigned int stencil_pass_depth_pass_op:3; + unsigned int stencil_pass_depth_fail_op:3; + unsigned int stencil_fail_op:3; + unsigned int stencil_func:3; + unsigned int stencil_enable:1; + } ds0; + + struct { + unsigned int bf_stencil_write_mask:8; + unsigned int bf_stencil_test_mask:8; + unsigned int stencil_write_mask:8; + unsigned int stencil_test_mask:8; + } ds1; + + struct { + unsigned int pad0:26; + unsigned int depth_write_enable:1; + unsigned int depth_test_func:3; + unsigned int pad1:1; + unsigned int depth_test_enable:1; + } ds2; +}; + +struct gen7_surface_state +{ + struct { + unsigned int cube_pos_z:1; + unsigned int cube_neg_z:1; + unsigned int cube_pos_y:1; + unsigned int cube_neg_y:1; + unsigned int cube_pos_x:1; + unsigned int cube_neg_x:1; + unsigned int pad2:2; + unsigned int render_cache_read_write:1; + unsigned int pad1:1; + unsigned int surface_array_spacing:1; + unsigned int vert_line_stride_ofs:1; + unsigned int vert_line_stride:1; + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int horizontal_alignment:1; + unsigned int vertical_alignment:2; + unsigned int surface_format:9; /**< BRW_SURFACEFORMAT_x */ + unsigned int pad0:1; + unsigned int is_array:1; + unsigned int surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */ + } ss0; + + struct { + unsigned int base_addr; + } ss1; + + struct { + unsigned int width:14; + unsigned int pad1:2; + unsigned int height:14; + unsigned int pad0:2; + } ss2; + + struct { + unsigned int pitch:18; + unsigned int pad:3; + unsigned int depth:11; + } ss3; + + struct { + unsigned int multisample_position_palette_index:3; + unsigned int num_multisamples:3; + unsigned int multisampled_surface_storage_format:1; + unsigned int render_target_view_extent:11; + unsigned int min_array_elt:11; + unsigned int rotation:2; + unsigned int pad0:1; + } ss4; + + struct { + unsigned int mip_count:4; + unsigned int min_lod:4; + unsigned int pad1:12; + unsigned int y_offset:4; + unsigned int pad0:1; + unsigned int x_offset:7; + } ss5; + + struct { + unsigned int pad; /* Multisample Control Surface stuff */ + } ss6; + + struct { + unsigned int resource_min_lod:12; + unsigned int pad0:4; + unsigned int shader_chanel_select_a:3; + unsigned int shader_chanel_select_b:3; + unsigned int shader_chanel_select_g:3; + unsigned int shader_chanel_select_r:3; + unsigned int alpha_clear_color:1; + unsigned int blue_clear_color:1; + unsigned int green_clear_color:1; + unsigned int red_clear_color:1; + } ss7; +}; + +struct gen7_sampler_state +{ + struct + { + unsigned int aniso_algorithm:1; + unsigned int lod_bias:13; + unsigned int min_filter:3; + unsigned int mag_filter:3; + unsigned int mip_filter:2; + unsigned int base_level:5; + unsigned int pad1:1; + unsigned int lod_preclamp:1; + unsigned int default_color_mode:1; + unsigned int pad0:1; + unsigned int disable:1; + } ss0; + + struct + { + unsigned int cube_control_mode:1; + unsigned int shadow_function:3; + unsigned int pad:4; + unsigned int max_lod:12; + unsigned int min_lod:12; + } ss1; + + struct + { + unsigned int pad:5; + unsigned int default_color_pointer:27; + } ss2; + + struct + { + unsigned int r_wrap_mode:3; + unsigned int t_wrap_mode:3; + unsigned int s_wrap_mode:3; + unsigned int pad:1; + unsigned int non_normalized_coord:1; + unsigned int trilinear_quality:2; + unsigned int address_round:6; + unsigned int max_aniso:3; + unsigned int chroma_key_mode:1; + unsigned int chroma_key_index:2; + unsigned int chroma_key_enable:1; + unsigned int pad0:6; + } ss3; +}; + +#endif diff --git a/src/uxa/common.h b/src/uxa/common.h new file mode 100644 index 00000000..4f55846b --- /dev/null +++ b/src/uxa/common.h @@ -0,0 +1,71 @@ + +/************************************************************************** + +Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. +Copyright © 2002 David Dawes + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * David Dawes <dawes@xfree86.org> + * + */ + +#ifndef _INTEL_COMMON_H_ +#define _INTEL_COMMON_H_ + +#include <xf86.h> + +/* Provide substitutes for gcc's __FUNCTION__ on other compilers */ +#if !defined(__GNUC__) && !defined(__FUNCTION__) +# if defined(__STDC__) && (__STDC_VERSION__>=199901L) /* C99 */ +# define __FUNCTION__ __func__ +# else +# define __FUNCTION__ "" +# endif +#endif + +#define PFX __FILE__,__LINE__,__FUNCTION__ +#define FUNCTION_NAME __FUNCTION__ + +#define KB(x) ((x) * 1024) +#define MB(x) ((x) * KB(1024)) + +/** + * Hints to CreatePixmap to tell the driver how the pixmap is going to be + * used. + * + * Compare to CREATE_PIXMAP_USAGE_* in the server. + */ +enum { + INTEL_CREATE_PIXMAP_TILING_X = 0x10000000, + INTEL_CREATE_PIXMAP_TILING_Y = 0x20000000, + INTEL_CREATE_PIXMAP_TILING_NONE = 0x40000000, + INTEL_CREATE_PIXMAP_DRI2 = 0x80000000, +}; + +#endif /* _INTEL_COMMON_H_ */ diff --git a/src/uxa/i830_3d.c b/src/uxa/i830_3d.c new file mode 100644 index 00000000..10432011 --- /dev/null +++ b/src/uxa/i830_3d.c @@ -0,0 +1,225 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "xf86.h" +#include "intel.h" + +#include "i830_reg.h" + +void I830EmitInvarientState(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + assert(intel->in_batch_atomic); + + OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(0)); + OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(1)); + OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(2)); + OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(3)); + + OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_Z_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_FOG_MODE_CMD); + OUT_BATCH(FOGFUNC_ENABLE | + FOG_LINEAR_CONST | FOGSRC_INDEX_Z | ENABLE_FOG_DENSITY); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + MAP_UNIT(0) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(0) | + ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(0)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + MAP_UNIT(1) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(1) | + ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(1)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + MAP_UNIT(2) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(2) | + ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(2)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + MAP_UNIT(3) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(3) | + ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(3)); + + OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); + OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(0)); + OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); + OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(1)); + OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); + OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(2)); + OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); + OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(3)); + + OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + ENABLE_TRI_STRIP_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | TRI_STRIP_PROVOKE_VRTX(2)); + + OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); + + OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_VERTEX_TRANSFORM); + OUT_BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE); + + OUT_BATCH(_3DSTATE_W_STATE_CMD); + OUT_BATCH(MAGIC_W_STATE_DWORD1); + OUT_BATCH(0x3f800000 /* 1.0 in IEEE float */ ); + + OUT_BATCH(_3DSTATE_COLOR_FACTOR_CMD); + OUT_BATCH(0x80808080); /* .5 required in alpha for GL_DOT3_RGBA_EXT */ + + OUT_BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD); + OUT_BATCH(TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) | + TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) | + TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | + TEXBIND_SET0(TEXCOORDSRC_VTXSET_0)); + + /* copy from mesa */ + OUT_BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD | + DISABLE_INDPT_ALPHA_BLEND | + ENABLE_ALPHA_BLENDFUNC | ABLENDFUNC_ADD); + + OUT_BATCH(_3DSTATE_FOG_COLOR_CMD | + FOG_COLOR_RED(0) | FOG_COLOR_GREEN(0) | FOG_COLOR_BLUE(0)); + + OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_MODES_1_CMD | + ENABLE_COLR_BLND_FUNC | + BLENDFUNC_ADD | + ENABLE_SRC_BLND_FACTOR | + SRC_BLND_FACT(BLENDFACTOR_ONE) | + ENABLE_DST_BLND_FACTOR | DST_BLND_FACT(BLENDFACTOR_ZERO)); + OUT_BATCH(_3DSTATE_MODES_2_CMD | ENABLE_GLOBAL_DEPTH_BIAS | GLOBAL_DEPTH_BIAS(0) | ENABLE_ALPHA_TEST_FUNC | ALPHA_TEST_FUNC(0) | /* always */ + ALPHA_REF_VALUE(0)); + OUT_BATCH(_3DSTATE_MODES_3_CMD | + ENABLE_DEPTH_TEST_FUNC | + DEPTH_TEST_FUNC(0x2) | /* COMPAREFUNC_LESS */ + ENABLE_ALPHA_SHADE_MODE | + ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_FOG_SHADE_MODE | + FOG_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_SPEC_SHADE_MODE | + SPEC_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_COLOR_SHADE_MODE | + COLOR_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_CULL_MODE | CULLMODE_NONE); + + OUT_BATCH(_3DSTATE_MODES_4_CMD | + ENABLE_LOGIC_OP_FUNC | + LOGIC_OP_FUNC(LOGICOP_COPY) | + ENABLE_STENCIL_TEST_MASK | + STENCIL_TEST_MASK(0xff) | + ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff)); + + OUT_BATCH(_3DSTATE_STENCIL_TEST_CMD | + ENABLE_STENCIL_PARMS | + STENCIL_FAIL_OP(0) | /* STENCILOP_KEEP */ + STENCIL_PASS_DEPTH_FAIL_OP(0) | /* STENCILOP_KEEP */ + STENCIL_PASS_DEPTH_PASS_OP(0) | /* STENCILOP_KEEP */ + ENABLE_STENCIL_TEST_FUNC | + STENCIL_TEST_FUNC(0) | /* COMPAREFUNC_ALWAYS */ + ENABLE_STENCIL_REF_VALUE | + STENCIL_REF_VALUE(0)); + + OUT_BATCH(_3DSTATE_MODES_5_CMD | + FLUSH_TEXTURE_CACHE | + ENABLE_SPRITE_POINT_TEX | SPRITE_POINT_TEX_OFF | + ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(0x2) | /* 1.0 */ + ENABLE_FIXED_POINT_WIDTH | FIXED_POINT_WIDTH(1)); + + OUT_BATCH(_3DSTATE_ENABLES_1_CMD | + DISABLE_LOGIC_OP | + DISABLE_STENCIL_TEST | + DISABLE_DEPTH_BIAS | + DISABLE_SPEC_ADD | + DISABLE_FOG | + DISABLE_ALPHA_TEST | ENABLE_COLOR_BLEND | DISABLE_DEPTH_TEST); + OUT_BATCH(_3DSTATE_ENABLES_2_CMD | + DISABLE_STENCIL_WRITE | + ENABLE_TEX_CACHE | + DISABLE_DITHER | + ENABLE_COLOR_MASK | ENABLE_COLOR_WRITE | DISABLE_DEPTH_WRITE); + + OUT_BATCH(_3DSTATE_STIPPLE); + + /* Set default blend state */ + OUT_BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | + TEXPIPE_COLOR | + ENABLE_TEXOUTPUT_WRT_SEL | + TEXOP_OUTPUT_CURRENT | + DISABLE_TEX_CNTRL_STAGE | + TEXOP_SCALE_1X | + TEXOP_MODIFY_PARMS | TEXOP_LAST_STAGE | TEXBLENDOP_ARG1); + OUT_BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | + TEXPIPE_ALPHA | + ENABLE_TEXOUTPUT_WRT_SEL | + TEXOP_OUTPUT_CURRENT | + TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1); + OUT_BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) | + TEXPIPE_COLOR | + TEXBLEND_ARG1 | + TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_DIFFUSE); + OUT_BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) | + TEXPIPE_ALPHA | + TEXBLEND_ARG1 | + TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_DIFFUSE); + + OUT_BATCH(_3DSTATE_AA_CMD | + AA_LINE_ECAAR_WIDTH_ENABLE | + AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | + AA_LINE_REGION_WIDTH_1_0 | AA_LINE_DISABLE); +} diff --git a/src/uxa/i830_reg.h b/src/uxa/i830_reg.h new file mode 100644 index 00000000..93d03cf3 --- /dev/null +++ b/src/uxa/i830_reg.h @@ -0,0 +1,805 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _I830_REG_H_ +#define _I830_REG_H_ + +#define I830_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value) + +/* Flush */ +#define MI_FLUSH (0x04<<23) +#define MI_FLUSH_DW (0x26<<23) + +#define MI_WRITE_DIRTY_STATE (1<<4) +#define MI_END_SCENE (1<<3) +#define MI_GLOBAL_SNAPSHOT_COUNT_RESET (1<<3) +#define MI_INHIBIT_RENDER_CACHE_FLUSH (1<<2) +#define MI_STATE_INSTRUCTION_CACHE_FLUSH (1<<1) +#define MI_INVALIDATE_MAP_CACHE (1<<0) +/* broadwater flush bits */ +#define BRW_MI_GLOBAL_SNAPSHOT_RESET (1 << 3) + +#define MI_BATCH_BUFFER_END (0xA << 23) + +/* Noop */ +#define MI_NOOP 0x00 +#define MI_NOOP_WRITE_ID (1<<22) +#define MI_NOOP_ID_MASK (1<<22 - 1) + +/* Wait for Events */ +#define MI_WAIT_FOR_EVENT (0x03<<23) +#define MI_WAIT_FOR_PIPEB_SVBLANK (1<<18) +#define MI_WAIT_FOR_PIPEA_SVBLANK (1<<17) +#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16) +#define MI_WAIT_FOR_PIPEB_VBLANK (1<<7) +#define MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW (1<<5) +#define MI_WAIT_FOR_PIPEA_VBLANK (1<<3) +#define MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW (1<<1) + +/* Set the scan line for MI_WAIT_FOR_PIPE?_SCAN_LINE_WINDOW */ +#define MI_LOAD_SCAN_LINES_INCL (0x12<<23) +#define MI_LOAD_SCAN_LINES_DISPLAY_PIPEA (0) +#define MI_LOAD_SCAN_LINES_DISPLAY_PIPEB (0x1<<20) + +/* BLT commands */ +#define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3)) +#define COLOR_BLT_WRITE_ALPHA (1<<21) +#define COLOR_BLT_WRITE_RGB (1<<20) + +#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|(0x4)) +#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) +#define XY_COLOR_BLT_WRITE_RGB (1<<20) +#define XY_COLOR_BLT_TILED (1<<11) + +#define XY_SETUP_CLIP_BLT_CMD ((2<<29)|(3<<22)|1) + +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) +#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) +#define XY_SRC_COPY_BLT_SRC_TILED (1<<15) +#define XY_SRC_COPY_BLT_DST_TILED (1<<11) + +#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|0x4) +#define SRC_COPY_BLT_WRITE_ALPHA (1<<21) +#define SRC_COPY_BLT_WRITE_RGB (1<<20) + +#define XY_PAT_BLT_IMMEDIATE ((2<<29)|(0x72<<22)) + +#define XY_MONO_PAT_BLT_CMD ((0x2<<29)|(0x52<<22)|0x7) +#define XY_MONO_PAT_VERT_SEED ((1<<10)|(1<<9)|(1<<8)) +#define XY_MONO_PAT_HORT_SEED ((1<<14)|(1<<13)|(1<<12)) +#define XY_MONO_PAT_BLT_WRITE_ALPHA (1<<21) +#define XY_MONO_PAT_BLT_WRITE_RGB (1<<20) + +#define XY_MONO_SRC_BLT_CMD ((0x2<<29)|(0x54<<22)|(0x6)) +#define XY_MONO_SRC_BLT_WRITE_ALPHA (1<<21) +#define XY_MONO_SRC_BLT_WRITE_RGB (1<<20) + +#define CMD_3D (0x3<<29) + +#define PRIM3D_INLINE (CMD_3D | (0x1f<<24)) +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_CLEAR_RECT (0xa<<18) +#define PRIM3D_ZONE_INIT (0xd<<18) +#define PRIM3D_MASK (0x1f<<18) + +#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24)) +#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16) +#define AA_LINE_ECAAR_WIDTH_0_5 0 +#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14) +#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14) +#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14) +#define AA_LINE_REGION_WIDTH_ENABLE (1<<8) +#define AA_LINE_REGION_WIDTH_0_5 0 +#define AA_LINE_REGION_WIDTH_1_0 (1<<6) +#define AA_LINE_REGION_WIDTH_2_0 (2<<6) +#define AA_LINE_REGION_WIDTH_4_0 (3<<6) +#define AA_LINE_ENABLE ((1<<1) | 1) +#define AA_LINE_DISABLE (1<<1) + +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + +#define _3DSTATE_COLOR_FACTOR_CMD (CMD_3D | (0x1d<<24) | (0x1<<16)) + +#define _3DSTATE_COLOR_FACTOR_N_CMD(stage) (CMD_3D | (0x1d<<24) | \ + ((0x90+(stage))<<16)) + +#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16)) + +#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16)) + +#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16)) + +#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16)) + +#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) +/* Dword 1 */ +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) +#define COLOR_4_2_2_CHNL_WRT_ALL 0 +#define COLOR_4_2_2_CHNL_WRT_Y (1<<12) +#define COLOR_4_2_2_CHNL_WRT_CR (2<<12) +#define COLOR_4_2_2_CHNL_WRT_CB (3<<12) +#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12) +#define COLR_BUF_8BIT 0 +#define COLR_BUF_RGB555 (1<<8) +#define COLR_BUF_RGB565 (2<<8) +#define COLR_BUF_ARGB8888 (3<<8) +#define COLR_BUF_ARGB4444 (8<<8) +#define COLR_BUF_ARGB1555 (9<<8) +#define DEPTH_IS_Z 0 +#define DEPTH_IS_W (1<<6) +#define DEPTH_FRMT_16_FIXED 0 +#define DEPTH_FRMT_16_FLOAT (1<<2) +#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2) +#define DEPTH_FRMT_24_FLOAT_8_OTHER (3<<2) +#define VERT_LINE_STRIDE_1 (1<<1) +#define VERT_LINE_STRIDE_0 0 +#define VERT_LINE_STRIDE_OFS_1 1 +#define VERT_LINE_STRIDE_OFS_0 0 + +#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3) +/* Dword 1 */ +#define DRAW_RECT_DIS_DEPTH_OFS (1<<30) +#define DRAW_DITHER_OFS_X(x) ((x)<<26) +#define DRAW_DITHER_OFS_Y(x) ((x)<<24) +/* Dword 2 */ +#define DRAW_YMIN(x) ((x)<<16) +#define DRAW_XMIN(x) (x) +/* Dword 3 */ +#define DRAW_YMAX(x) ((x)<<16) +#define DRAW_XMAX(x) (x) +/* Dword 4 */ +#define DRAW_YORG(x) ((x)<<16) +#define DRAW_XORG(x) (x) + +#define _3DSTATE_ENABLES_1_CMD (CMD_3D|(0x3<<24)) +#define ENABLE_LOGIC_OP_MASK ((1<<23)|(1<<22)) +#define ENABLE_LOGIC_OP ((1<<23)|(1<<22)) +#define DISABLE_LOGIC_OP (1<<23) +#define ENABLE_STENCIL_TEST ((1<<21)|(1<<20)) +#define DISABLE_STENCIL_TEST (1<<21) +#define ENABLE_DEPTH_BIAS ((1<<11)|(1<<10)) +#define DISABLE_DEPTH_BIAS (1<<11) +#define ENABLE_SPEC_ADD_MASK ((1<<9)|(1<<8)) +#define ENABLE_SPEC_ADD ((1<<9)|(1<<8)) +#define DISABLE_SPEC_ADD (1<<9) +#define ENABLE_DIS_FOG_MASK ((1<<7)|(1<<6)) +#define ENABLE_FOG ((1<<7)|(1<<6)) +#define DISABLE_FOG (1<<7) +#define ENABLE_DIS_ALPHA_TEST_MASK ((1<<5)|(1<<4)) +#define ENABLE_ALPHA_TEST ((1<<5)|(1<<4)) +#define DISABLE_ALPHA_TEST (1<<5) +#define ENABLE_DIS_CBLEND_MASK ((1<<3)|(1<<2)) +#define ENABLE_COLOR_BLEND ((1<<3)|(1<<2)) +#define DISABLE_COLOR_BLEND (1<<3) +#define ENABLE_DIS_DEPTH_TEST_MASK ((1<<1)|1) +#define ENABLE_DEPTH_TEST ((1<<1)|1) +#define DISABLE_DEPTH_TEST (1<<1) + +/* _3DSTATE_ENABLES_2, p138 */ +#define _3DSTATE_ENABLES_2_CMD (CMD_3D|(0x4<<24)) +#define ENABLE_STENCIL_WRITE ((1<<21)|(1<<20)) +#define DISABLE_STENCIL_WRITE (1<<21) +#define ENABLE_TEX_CACHE ((1<<17)|(1<<16)) +#define DISABLE_TEX_CACHE (1<<17) +#define ENABLE_DITHER ((1<<9)|(1<<8)) +#define DISABLE_DITHER (1<<9) +#define ENABLE_COLOR_MASK (1<<10) +#define WRITEMASK_ALPHA (1<<7) +#define WRITEMASK_ALPHA_SHIFT 7 +#define WRITEMASK_RED (1<<6) +#define WRITEMASK_RED_SHIFT 6 +#define WRITEMASK_GREEN (1<<5) +#define WRITEMASK_GREEN_SHIFT 5 +#define WRITEMASK_BLUE (1<<4) +#define WRITEMASK_BLUE_SHIFT 4 +#define WRITEMASK_MASK ((1<<4)|(1<<5)|(1<<6)|(1<<7)) +#define ENABLE_COLOR_WRITE ((1<<3)|(1<<2)) +#define DISABLE_COLOR_WRITE (1<<3) +#define ENABLE_DIS_DEPTH_WRITE_MASK 0x3 +#define ENABLE_DEPTH_WRITE ((1<<1)|1) +#define DISABLE_DEPTH_WRITE (1<<1) + +/* _3DSTATE_FOG_COLOR, p139 */ +#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24)) +#define FOG_COLOR_RED(x) ((x)<<16) +#define FOG_COLOR_GREEN(x) ((x)<<8) +#define FOG_COLOR_BLUE(x) (x) + +/* _3DSTATE_FOG_MODE, p140 */ +#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2) +/* Dword 1 */ +#define FOGFUNC_ENABLE (1<<31) +#define FOGFUNC_VERTEX 0 +#define FOGFUNC_PIXEL_EXP (1<<28) +#define FOGFUNC_PIXEL_EXP2 (2<<28) +#define FOGFUNC_PIXEL_LINEAR (3<<28) +#define FOGSRC_INDEX_Z (1<<27) +#define FOGSRC_INDEX_W ((1<<27)|(1<<25)) +#define FOG_LINEAR_CONST (1<<24) +#define FOG_CONST_1(x) ((x)<<4) +#define ENABLE_FOG_DENSITY (1<<23) +/* Dword 2 */ +#define FOG_CONST_2(x) (x) +/* Dword 3 */ +#define FOG_DENSITY(x) (x) + +/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p142 */ +#define _3DSTATE_INDPT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24)) +#define ENABLE_INDPT_ALPHA_BLEND ((1<<23)|(1<<22)) +#define DISABLE_INDPT_ALPHA_BLEND (1<<23) +#define ALPHA_BLENDFUNC_MASK 0x3f0000 +#define ENABLE_ALPHA_BLENDFUNC (1<<21) +#define ABLENDFUNC_ADD 0 +#define ABLENDFUNC_SUB (1<<16) +#define ABLENDFUNC_RVSE_SUB (2<<16) +#define ABLENDFUNC_MIN (3<<16) +#define ABLENDFUNC_MAX (4<<16) +#define SRC_DST_ABLEND_MASK 0xfff +#define ENABLE_SRC_ABLEND_FACTOR (1<<11) +#define SRC_ABLEND_FACT(x) ((x)<<6) +#define ENABLE_DST_ABLEND_FACTOR (1<<5) +#define DST_ABLEND_FACT(x) (x) + +#define BLENDFACTOR_ZERO 0x01 +#define BLENDFACTOR_ONE 0x02 +#define BLENDFACTOR_SRC_COLR 0x03 +#define BLENDFACTOR_INV_SRC_COLR 0x04 +#define BLENDFACTOR_SRC_ALPHA 0x05 +#define BLENDFACTOR_INV_SRC_ALPHA 0x06 +#define BLENDFACTOR_DST_ALPHA 0x07 +#define BLENDFACTOR_INV_DST_ALPHA 0x08 +#define BLENDFACTOR_DST_COLR 0x09 +#define BLENDFACTOR_INV_DST_COLR 0x0a +#define BLENDFACTOR_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACTOR_CONST_COLOR 0x0c +#define BLENDFACTOR_INV_CONST_COLOR 0x0d +#define BLENDFACTOR_CONST_ALPHA 0x0e +#define BLENDFACTOR_INV_CONST_ALPHA 0x0f +#define BLENDFACTOR_MASK 0x0f + +/* _3DSTATE_MAP_BLEND_ARG, p152 */ +#define _3DSTATE_MAP_BLEND_ARG_CMD(stage) (CMD_3D|(0x0e<<24)|((stage)<<20)) + +#define TEXPIPE_COLOR 0 +#define TEXPIPE_ALPHA (1<<18) +#define TEXPIPE_KILL (2<<18) +#define TEXBLEND_ARG0 0 +#define TEXBLEND_ARG1 (1<<15) +#define TEXBLEND_ARG2 (2<<15) +#define TEXBLEND_ARG3 (3<<15) +#define TEXBLENDARG_MODIFY_PARMS (1<<6) +#define TEXBLENDARG_REPLICATE_ALPHA (1<<5) +#define TEXBLENDARG_INV_ARG (1<<4) +#define TEXBLENDARG_ONE 0 +#define TEXBLENDARG_FACTOR 0x01 +#define TEXBLENDARG_ACCUM 0x02 +#define TEXBLENDARG_DIFFUSE 0x03 +#define TEXBLENDARG_SPEC 0x04 +#define TEXBLENDARG_CURRENT 0x05 +#define TEXBLENDARG_TEXEL0 0x06 +#define TEXBLENDARG_TEXEL1 0x07 +#define TEXBLENDARG_TEXEL2 0x08 +#define TEXBLENDARG_TEXEL3 0x09 +#define TEXBLENDARG_FACTOR_N 0x0e + +/* _3DSTATE_MAP_BLEND_OP, p155 */ +#define _3DSTATE_MAP_BLEND_OP_CMD(stage) (CMD_3D|(0x0d<<24)|((stage)<<20)) +#if 0 +# define TEXPIPE_COLOR 0 +# define TEXPIPE_ALPHA (1<<18) +# define TEXPIPE_KILL (2<<18) +#endif +#define ENABLE_TEXOUTPUT_WRT_SEL (1<<17) +#define TEXOP_OUTPUT_CURRENT 0 +#define TEXOP_OUTPUT_ACCUM (1<<15) +#define ENABLE_TEX_CNTRL_STAGE ((1<<12)|(1<<11)) +#define DISABLE_TEX_CNTRL_STAGE (1<<12) +#define TEXOP_SCALE_SHIFT 9 +#define TEXOP_SCALE_1X (0 << TEXOP_SCALE_SHIFT) +#define TEXOP_SCALE_2X (1 << TEXOP_SCALE_SHIFT) +#define TEXOP_SCALE_4X (2 << TEXOP_SCALE_SHIFT) +#define TEXOP_MODIFY_PARMS (1<<8) +#define TEXOP_LAST_STAGE (1<<7) +#define TEXBLENDOP_KILLPIXEL 0x02 +#define TEXBLENDOP_ARG1 0x01 +#define TEXBLENDOP_ARG2 0x02 +#define TEXBLENDOP_MODULATE 0x03 +#define TEXBLENDOP_ADD 0x06 +#define TEXBLENDOP_ADDSIGNED 0x07 +#define TEXBLENDOP_BLEND 0x08 +#define TEXBLENDOP_BLEND_AND_ADD 0x09 +#define TEXBLENDOP_SUBTRACT 0x0a +#define TEXBLENDOP_DOT3 0x0b +#define TEXBLENDOP_DOT4 0x0c +#define TEXBLENDOP_MODULATE_AND_ADD 0x0d +#define TEXBLENDOP_MODULATE_2X_AND_ADD 0x0e +#define TEXBLENDOP_MODULATE_4X_AND_ADD 0x0f + +/* _3DSTATE_MAP_BUMP_TABLE, p160 TODO */ +/* _3DSTATE_MAP_COLOR_CHROMA_KEY, p161 TODO */ + +#define _3DSTATE_MAP_COORD_TRANSFORM ((3<<29)|(0x1d<<24)|(0x8c<<16)) +#define DISABLE_TEX_TRANSFORM (1<<28) +#define TEXTURE_SET(x) (x<<29) + +#define _3DSTATE_VERTEX_TRANSFORM ((3<<29)|(0x1d<<24)|(0x8b<<16)) +#define DISABLE_VIEWPORT_TRANSFORM (1<<31) +#define DISABLE_PERSPECTIVE_DIVIDE (1<<29) + +/* _3DSTATE_MAP_COORD_SET_BINDINGS, p162 */ +#define _3DSTATE_MAP_COORD_SETBIND_CMD (CMD_3D|(0x1d<<24)|(0x02<<16)) +#define TEXBIND_MASK3 ((1<<15)|(1<<14)|(1<<13)|(1<<12)) +#define TEXBIND_MASK2 ((1<<11)|(1<<10)|(1<<9)|(1<<8)) +#define TEXBIND_MASK1 ((1<<7)|(1<<6)|(1<<5)|(1<<4)) +#define TEXBIND_MASK0 ((1<<3)|(1<<2)|(1<<1)|1) + +#define TEXBIND_SET3(x) ((x)<<12) +#define TEXBIND_SET2(x) ((x)<<8) +#define TEXBIND_SET1(x) ((x)<<4) +#define TEXBIND_SET0(x) (x) + +#define TEXCOORDSRC_KEEP 0 +#define TEXCOORDSRC_DEFAULT 0x01 +#define TEXCOORDSRC_VTXSET_0 0x08 +#define TEXCOORDSRC_VTXSET_1 0x09 +#define TEXCOORDSRC_VTXSET_2 0x0a +#define TEXCOORDSRC_VTXSET_3 0x0b +#define TEXCOORDSRC_VTXSET_4 0x0c +#define TEXCOORDSRC_VTXSET_5 0x0d +#define TEXCOORDSRC_VTXSET_6 0x0e +#define TEXCOORDSRC_VTXSET_7 0x0f + +#define MAP_UNIT(unit) ((unit)<<16) +#define MAP_UNIT_MASK (0x7<<16) + +/* _3DSTATE_MAP_COORD_SETS, p164 */ +#define _3DSTATE_MAP_COORD_SET_CMD (CMD_3D|(0x1c<<24)|(0x01<<19)) +#define TEXCOORD_SET(n) ((n)<<16) +#define ENABLE_TEXCOORD_PARAMS (1<<15) +#define TEXCOORDS_ARE_NORMAL (1<<14) +#define TEXCOORDS_ARE_IN_TEXELUNITS 0 +#define TEXCOORDTYPE_CARTESIAN 0 +#define TEXCOORDTYPE_HOMOGENEOUS (1<<11) +#define TEXCOORDTYPE_VECTOR (2<<11) +#define TEXCOORDTYPE_MASK (0x7<<11) +#define ENABLE_ADDR_V_CNTL (1<<7) +#define ENABLE_ADDR_U_CNTL (1<<3) +#define TEXCOORD_ADDR_V_MODE(x) ((x)<<4) +#define TEXCOORD_ADDR_U_MODE(x) (x) +#define TEXCOORDMODE_WRAP 0 +#define TEXCOORDMODE_MIRROR 1 +#define TEXCOORDMODE_CLAMP 2 +#define TEXCOORDMODE_WRAP_SHORTEST 3 +#define TEXCOORDMODE_CLAMP_BORDER 4 +#define TEXCOORD_ADDR_V_MASK 0x70 +#define TEXCOORD_ADDR_U_MASK 0x7 + +/* _3DSTATE_MAP_CUBE, p168 TODO */ +#define _3DSTATE_MAP_CUBE (CMD_3D|(0x1c<<24)|(0x0a<<19)) +#define CUBE_NEGX_ENABLE (1<<5) +#define CUBE_POSX_ENABLE (1<<4) +#define CUBE_NEGY_ENABLE (1<<3) +#define CUBE_POSY_ENABLE (1<<2) +#define CUBE_NEGZ_ENABLE (1<<1) +#define CUBE_POSZ_ENABLE (1<<0) + +#define _3DSTATE_MAP_INFO_CMD (CMD_3D|(0x1d<<24)|(0x0<<16)|3) +#define TEXMAP_INDEX(x) ((x)<<28) +#define MAP_SURFACE_8BIT (1<<24) +#define MAP_SURFACE_16BIT (2<<24) +#define MAP_SURFACE_32BIT (3<<24) +#define MAP_FORMAT_2D (0) +#define MAP_FORMAT_3D_CUBE (1<<11) + +/* _3DSTATE_MODES_1, p190 */ +#define _3DSTATE_MODES_1_CMD (CMD_3D|(0x08<<24)) +#define BLENDFUNC_MASK 0x3f0000 +#define ENABLE_COLR_BLND_FUNC (1<<21) +#define BLENDFUNC_ADD 0 +#define BLENDFUNC_SUB (1<<16) +#define BLENDFUNC_RVRSE_SUB (2<<16) +#define BLENDFUNC_MIN (3<<16) +#define BLENDFUNC_MAX (4<<16) +#define SRC_DST_BLND_MASK 0xfff +#define ENABLE_SRC_BLND_FACTOR (1<<11) +#define ENABLE_DST_BLND_FACTOR (1<<5) +#define SRC_BLND_FACT(x) ((x)<<6) +#define DST_BLND_FACT(x) (x) + +/* _3DSTATE_MODES_2, p192 */ +#define _3DSTATE_MODES_2_CMD (CMD_3D|(0x0f<<24)) +#define ENABLE_GLOBAL_DEPTH_BIAS (1<<22) +#define GLOBAL_DEPTH_BIAS(x) ((x)<<14) +#define ENABLE_ALPHA_TEST_FUNC (1<<13) +#define ENABLE_ALPHA_REF_VALUE (1<<8) +#define ALPHA_TEST_FUNC(x) ((x)<<9) +#define ALPHA_REF_VALUE(x) (x) + +#define ALPHA_TEST_REF_MASK 0x3fff + +/* _3DSTATE_MODES_3, p193 */ +#define _3DSTATE_MODES_3_CMD (CMD_3D|(0x02<<24)) +#define DEPTH_TEST_FUNC_MASK 0x1f0000 +#define ENABLE_DEPTH_TEST_FUNC (1<<20) +/* Uses COMPAREFUNC */ +#define DEPTH_TEST_FUNC(x) ((x)<<16) +#define ENABLE_ALPHA_SHADE_MODE (1<<11) +#define ENABLE_FOG_SHADE_MODE (1<<9) +#define ENABLE_SPEC_SHADE_MODE (1<<7) +#define ENABLE_COLOR_SHADE_MODE (1<<5) +#define ALPHA_SHADE_MODE(x) ((x)<<10) +#define FOG_SHADE_MODE(x) ((x)<<8) +#define SPEC_SHADE_MODE(x) ((x)<<6) +#define COLOR_SHADE_MODE(x) ((x)<<4) +#define CULLMODE_MASK 0xf +#define ENABLE_CULL_MODE (1<<3) +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_CW 2 +#define CULLMODE_CCW 3 + +#define SHADE_MODE_LINEAR 0 +#define SHADE_MODE_FLAT 0x1 + +/* _3DSTATE_MODES_4, p195 */ +#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x16<<24)) +#define ENABLE_LOGIC_OP_FUNC (1<<23) +#define LOGIC_OP_FUNC(x) ((x)<<18) +#define LOGICOP_MASK ((1<<18)|(1<<19)|(1<<20)|(1<<21)) +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 0x1 +#define LOGICOP_AND_INV 0x2 +#define LOGICOP_COPY_INV 0x3 +#define LOGICOP_AND_RVRSE 0x4 +#define LOGICOP_INV 0x5 +#define LOGICOP_XOR 0x6 +#define LOGICOP_NAND 0x7 +#define LOGICOP_AND 0x8 +#define LOGICOP_EQUIV 0x9 +#define LOGICOP_NOOP 0xa +#define LOGICOP_OR_INV 0xb +#define LOGICOP_COPY 0xc +#define LOGICOP_OR_RVRSE 0xd +#define LOGICOP_OR 0xe +#define LOGICOP_SET 0xf +#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00)) +#define ENABLE_STENCIL_TEST_MASK (1<<17) +#define STENCIL_TEST_MASK(x) ((x)<<8) +#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff)) +#define ENABLE_STENCIL_WRITE_MASK (1<<16) +#define STENCIL_WRITE_MASK(x) ((x)&0xff) + +/* _3DSTATE_MODES_5, p196 */ +#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24)) +#define ENABLE_SPRITE_POINT_TEX (1<<23) +#define SPRITE_POINT_TEX_ON (1<<22) +#define SPRITE_POINT_TEX_OFF 0 +#define FLUSH_RENDER_CACHE (1<<18) +#define FLUSH_TEXTURE_CACHE (1<<16) +#define FIXED_LINE_WIDTH_MASK 0xfc00 +#define ENABLE_FIXED_LINE_WIDTH (1<<15) +#define FIXED_LINE_WIDTH(x) ((x)<<10) +#define FIXED_POINT_WIDTH_MASK 0x3ff +#define ENABLE_FIXED_POINT_WIDTH (1<<9) +#define FIXED_POINT_WIDTH(x) (x) + +/* _3DSTATE_RASTERIZATION_RULES, p198 */ +#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24)) +#define ENABLE_POINT_RASTER_RULE (1<<15) +#define OGL_POINT_RASTER_RULE (1<<13) +#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) +#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define ENABLE_TRI_STRIP_PROVOKE_VRTX (1<<2) +#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) +#define TRI_STRIP_PROVOKE_VRTX(x) (x) + +/* _3DSTATE_SCISSOR_ENABLE, p200 */ +#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19)) +#define ENABLE_SCISSOR_RECT ((1<<1) | 1) +#define DISABLE_SCISSOR_RECT (1<<1) + +/* _3DSTATE_SCISSOR_RECTANGLE_0, p201 */ +#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1) +/* Dword 1 */ +#define SCISSOR_RECT_0_YMIN(x) ((x)<<16) +#define SCISSOR_RECT_0_XMIN(x) (x) +/* Dword 2 */ +#define SCISSOR_RECT_0_YMAX(x) ((x)<<16) +#define SCISSOR_RECT_0_XMAX(x) (x) + +/* _3DSTATE_STENCIL_TEST, p202 */ +#define _3DSTATE_STENCIL_TEST_CMD (CMD_3D|(0x09<<24)) +#define ENABLE_STENCIL_PARMS (1<<23) +#define STENCIL_OPS_MASK (0xffc000) +#define STENCIL_FAIL_OP(x) ((x)<<20) +#define STENCIL_PASS_DEPTH_FAIL_OP(x) ((x)<<17) +#define STENCIL_PASS_DEPTH_PASS_OP(x) ((x)<<14) + +#define ENABLE_STENCIL_TEST_FUNC_MASK ((1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)) +#define ENABLE_STENCIL_TEST_FUNC (1<<13) +/* Uses COMPAREFUNC */ +#define STENCIL_TEST_FUNC(x) ((x)<<9) +#define STENCIL_REF_VALUE_MASK ((1<<8)|0xff) +#define ENABLE_STENCIL_REF_VALUE (1<<8) +#define STENCIL_REF_VALUE(x) (x) + +/* _3DSTATE_VERTEX_FORMAT, p204 */ +#define _3DSTATE_VFT0_CMD (CMD_3D|(0x05<<24)) +#define VFT0_POINT_WIDTH (1<<12) +#define VFT0_TEX_COUNT_MASK (7<<8) +#define VFT0_TEX_COUNT_SHIFT 8 +#define VFT0_TEX_COUNT(x) ((x)<<8) +#define VFT0_SPEC (1<<7) +#define VFT0_DIFFUSE (1<<6) +#define VFT0_DEPTH_OFFSET (1<<5) +#define VFT0_XYZ (1<<1) +#define VFT0_XYZW (2<<1) +#define VFT0_XY (3<<1) +#define VFT0_XYW (4<<1) +#define VFT0_XYZW_MASK (7<<1) + +/* _3DSTATE_VERTEX_FORMAT_2, p206 */ +#define _3DSTATE_VERTEX_FORMAT_2_CMD (CMD_3D|(0x0a<<24)) +#define VFT1_TEX7_FMT(x) ((x)<<14) +#define VFT1_TEX6_FMT(x) ((x)<<12) +#define VFT1_TEX5_FMT(x) ((x)<<10) +#define VFT1_TEX4_FMT(x) ((x)<<8) +#define VFT1_TEX3_FMT(x) ((x)<<6) +#define VFT1_TEX2_FMT(x) ((x)<<4) +#define VFT1_TEX1_FMT(x) ((x)<<2) +#define VFT1_TEX0_FMT(x) (x) +#define VFT1_TEX0_MASK 3 +#define VFT1_TEX1_SHIFT 2 +#define TEXCOORDFMT_2D 0 +#define TEXCOORDFMT_3D 1 +#define TEXCOORDFMT_4D 2 +#define TEXCOORDFMT_1D 3 + +/*New stuff picked up along the way */ + +#define MLC_LOD_BIAS_MASK ((1<<7)-1) + +/* _3DSTATE_VERTEX_TRANSFORM, p207 */ +#define _3DSTATE_VERTEX_TRANS_CMD (CMD_3D|(0x1d<<24)|(0x8b<<16)|0) +#define _3DSTATE_VERTEX_TRANS_MTX_CMD (CMD_3D|(0x1d<<24)|(0x8b<<16)|6) +/* Dword 1 */ +#define ENABLE_VIEWPORT_TRANSFORM ((1<<31)|(1<<30)) +#define DISABLE_VIEWPORT_TRANSFORM (1<<31) +#define ENABLE_PERSP_DIVIDE ((1<<29)|(1<<28)) +#define DISABLE_PERSP_DIVIDE (1<<29) +#define VRTX_TRANS_LOAD_MATRICES 0x7421 +#define VRTX_TRANS_NO_LOAD_MATRICES 0x0000 +/* Dword 2 -> 7 are matrix elements */ + +/* _3DSTATE_W_STATE, p209 */ +#define _3DSTATE_W_STATE_CMD (CMD_3D|(0x1d<<24)|(0x8d<<16)|1) +/* Dword 1 */ +#define MAGIC_W_STATE_DWORD1 0x00000008 +/* Dword 2 */ +#define WFAR_VALUE(x) (x) + +/* Stipple command, carried over from the i810, apparently: + */ +#define _3DSTATE_STIPPLE (CMD_3D|(0x1d<<24)|(0x83<<16)) +#define ST1_ENABLE (1<<16) +#define ST1_MASK (0xffff) + +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D|(0x1d<<24)|(0x04<<16)) +#define I1_LOAD_S(n) (1<<((n)+4)) +#define S3_POINT_WIDTH_SHIFT 23 +#define S3_LINE_WIDTH_SHIFT 19 +#define S3_ALPHA_SHADE_MODE_SHIFT 18 +#define S3_FOG_SHADE_MODE_SHIFT 17 +#define S3_SPEC_SHADE_MODE_SHIFT 16 +#define S3_COLOR_SHADE_MODE_SHIFT 15 +#define S3_CULL_MODE_SHIFT 13 +#define S3_CULLMODE_BOTH (0) +#define S3_CULLMODE_NONE (1<<13) +#define S3_CULLMODE_CW (2<<13) +#define S3_CULLMODE_CCW (3<<13) +#define S3_POINT_WIDTH_PRESENT (1<<12) +#define S3_SPEC_FOG_PRESENT (1<<11) +#define S3_DIFFUSE_PRESENT (1<<10) +#define S3_DEPTH_OFFSET_PRESENT (1<<9) +#define S3_POSITION_SHIFT 6 +#define S3_VERTEXHAS_XYZ (1<<6) +#define S3_VERTEXHAS_XYZW (2<<6) +#define S3_VERTEXHAS_XY (3<<6) +#define S3_VERTEXHAS_XYW (4<<6) +#define S3_ENABLE_SPEC_ADD (1<<5) +#define S3_ENABLE_FOG (1<<4) +#define S3_ENABLE_LOCAL_DEPTH_BIAS (1<<3) +#define S3_ENABLE_SPRITE_POINT (1<<1) +#define S3_ENABLE_ANTIALIASING 1 +#define S8_ENABLE_ALPHA_TEST (1<<31) +#define S8_ALPHA_TEST_FUNC_SHIFT 28 +#define S8_ALPHA_REFVALUE_SHIFT 20 +#define S8_ENABLE_DEPTH_TEST (1<<19) +#define S8_DEPTH_TEST_FUNC_SHIFT 16 +#define S8_ENABLE_COLOR_BLEND (1<<15) +#define S8_COLOR_BLEND_FUNC_SHIFT 12 +#define S8_BLENDFUNC_ADD (0) +#define S8_BLENDFUNC_SUB (1<<12) +#define S8_BLENDFUNC_RVRSE_SUB (2<<12) +#define S8_BLENDFUNC_MIN (3<<12) +#define S8_BLENDFUNC_MAX (4<<12) +#define S8_SRC_BLEND_FACTOR_SHIFT 8 +#define S8_DST_BLEND_FACTOR_SHIFT 4 +#define S8_ENABLE_DEPTH_BUFFER_WRITE (1<<3) +#define S8_ENABLE_COLOR_BUFFER_WRITE (1<<2) + +#define _3DSTATE_LOAD_STATE_IMMEDIATE_2 (CMD_3D|(0x1d<<24)|(0x03<<16)) +#define LOAD_TEXTURE_MAP(x) (1<<((x)+11)) +#define LOAD_TEXTURE_BLEND_STAGE(x) (1<<((x)+7)) +#define LOAD_GLOBAL_COLOR_FACTOR (1<<6) + +#define TM0S0_ADDRESS_MASK 0xfffffffc +#define TM0S0_USE_FENCE (1<<1) + +#define TM0S1_HEIGHT_SHIFT 21 +#define TM0S1_WIDTH_SHIFT 10 +#define TM0S1_PALETTE_SELECT (1<<9) +#define TM0S1_MAPSURF_FORMAT_MASK (0x7 << 6) +#define TM0S1_MAPSURF_FORMAT_SHIFT 6 +#define MAPSURF_8BIT_INDEXED (0<<6) +#define MAPSURF_8BIT (1<<6) +#define MAPSURF_16BIT (2<<6) +#define MAPSURF_32BIT (3<<6) +#define MAPSURF_411 (4<<6) +#define MAPSURF_422 (5<<6) +#define MAPSURF_COMPRESSED (6<<6) +#define MAPSURF_4BIT_INDEXED (7<<6) +#define TM0S1_MT_FORMAT_MASK (0x7 << 3) +#define TM0S1_MT_FORMAT_SHIFT 3 +#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ +#define MT_8BIT_IDX_RGB565 (0<<3) /* SURFACE_8BIT_INDEXED */ +#define MT_8BIT_IDX_ARGB1555 (1<<3) +#define MT_8BIT_IDX_ARGB4444 (2<<3) +#define MT_8BIT_IDX_AY88 (3<<3) +#define MT_8BIT_IDX_ABGR8888 (4<<3) +#define MT_8BIT_IDX_BUMP_88DVDU (5<<3) +#define MT_8BIT_IDX_BUMP_655LDVDU (6<<3) +#define MT_8BIT_IDX_ARGB8888 (7<<3) +#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ +#define MT_8BIT_L8 (1<<3) +#define MT_8BIT_A8 (4<<3) +#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ +#define MT_16BIT_ARGB1555 (1<<3) +#define MT_16BIT_ARGB4444 (2<<3) +#define MT_16BIT_AY88 (3<<3) +#define MT_16BIT_DIB_ARGB1555_8888 (4<<3) +#define MT_16BIT_BUMP_88DVDU (5<<3) +#define MT_16BIT_BUMP_655LDVDU (6<<3) +#define MT_16BIT_DIB_RGB565_8888 (7<<3) +#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ +#define MT_32BIT_ABGR8888 (1<<3) +#define MT_32BIT_XRGB8888 (2<<3) +#define MT_32BIT_XBGR8888 (3<<3) +#define MT_32BIT_BUMP_XLDVDU_8888 (6<<3) +#define MT_32BIT_DIB_8888 (7<<3) +#define MT_411_YUV411 (0<<3) /* SURFACE_411 */ +#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ +#define MT_422_YCRCB_NORMAL (1<<3) +#define MT_422_YCRCB_SWAPUV (2<<3) +#define MT_422_YCRCB_SWAPUVY (3<<3) +#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ +#define MT_COMPRESS_DXT2_3 (1<<3) +#define MT_COMPRESS_DXT4_5 (2<<3) +#define MT_COMPRESS_FXT1 (3<<3) +#define TM0S1_COLORSPACE_CONVERSION (1 << 2) +#define TM0S1_TILED_SURFACE (1 << 1) +#define TM0S1_TILE_WALK (1 << 0) + +#define TM0S2_PITCH_SHIFT 21 +#define TM0S2_CUBE_FACE_ENA_SHIFT 15 +#define TM0S2_CUBE_FACE_ENA_MASK (1<<15) +#define TM0S2_MAP_FORMAT (1<<14) +#define TM0S2_MAP_2D (0<<14) +#define TM0S2_MAP_3D_CUBE (1<<14) +#define TM0S2_VERTICAL_LINE_STRIDE (1<<13) +#define TM0S2_VERITCAL_LINE_STRIDE_OFF (1<<12) +#define TM0S2_OUTPUT_CHAN_SHIFT 10 +#define TM0S2_OUTPUT_CHAN_MASK (3<<10) + +#define TM0S3_MIP_FILTER_MASK (0x3<<30) +#define TM0S3_MIP_FILTER_SHIFT 30 +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 +#define TM0S3_MAG_FILTER_MASK (0x3<<28) +#define TM0S3_MAG_FILTER_SHIFT 28 +#define TM0S3_MIN_FILTER_MASK (0x3<<26) +#define TM0S3_MIN_FILTER_SHIFT 26 +#define FILTER_NEAREST 0 +#define FILTER_LINEAR 1 +#define FILTER_ANISOTROPIC 2 + +#define TM0S3_LOD_BIAS_SHIFT 17 +#define TM0S3_LOD_BIAS_MASK (0x1ff<<17) +#define TM0S3_MAX_MIP_SHIFT 9 +#define TM0S3_MAX_MIP_MASK (0xff<<9) +#define TM0S3_MIN_MIP_SHIFT 3 +#define TM0S3_MIN_MIP_MASK (0x3f<<3) +#define TM0S3_KILL_PIXEL (1<<2) +#define TM0S3_KEYED_FILTER (1<<1) +#define TM0S3_CHROMA_KEY (1<<0) + +/* _3DSTATE_MAP_TEXEL_STREAM, p188 */ +#define _3DSTATE_MAP_TEX_STREAM_CMD (CMD_3D|(0x1c<<24)|(0x05<<19)) +#define DISABLE_TEX_STREAM_BUMP (1<<12) +#define ENABLE_TEX_STREAM_BUMP ((1<<12)|(1<<11)) +#define TEX_MODIFY_UNIT_0 0 +#define TEX_MODIFY_UNIT_1 (1<<8) +#define ENABLE_TEX_STREAM_COORD_SET (1<<7) +#define TEX_STREAM_COORD_SET(x) ((x)<<4) +#define ENABLE_TEX_STREAM_MAP_IDX (1<<3) +#define TEX_STREAM_MAP_IDX(x) (x) + +#define FLUSH_MAP_CACHE (1<<0) + +#define _3DSTATE_MAP_FILTER_CMD (CMD_3D|(0x1c<<24)|(0x02<<19)) +#define FILTER_TEXMAP_INDEX(x) ((x) << 16) +#define MAG_MODE_FILTER_ENABLE (1 << 5) +#define MIN_MODE_FILTER_ENABLE (1 << 2) +#define MAG_MAPFILTER_NEAREST (0 << 3) +#define MAG_MAPFILTER_LINEAR (1 << 3) +#define MAG_MAPFILTER_ANISOTROPIC (2 << 3) +#define MIN_MAPFILTER_NEAREST (0) +#define MIN_MAPFILTER_LINEAR (1) +#define MIN_MAPFILTER_ANISOTROPIC (2) +#define ENABLE_KEYS (1<<15) +#define DISABLE_COLOR_KEY 0 +#define DISABLE_CHROMA_KEY 0 +#define DISABLE_KILL_PIXEL 0 +#define ENABLE_MIP_MODE_FILTER (1 << 9) +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + +#endif diff --git a/src/uxa/i830_render.c b/src/uxa/i830_render.c new file mode 100644 index 00000000..e169cc11 --- /dev/null +++ b/src/uxa/i830_render.c @@ -0,0 +1,876 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Eric Anholt <eric@anholt.net> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "xf86.h" +#include "intel.h" +#include "i830_reg.h" + +struct blendinfo { + Bool dst_alpha; + Bool src_alpha; + uint32_t src_blend; + uint32_t dst_blend; +}; + +struct formatinfo { + int fmt; + uint32_t card_fmt; +}; + +#define TB0C_LAST_STAGE (1 << 31) +#define TB0C_RESULT_SCALE_1X (0 << 29) +#define TB0C_RESULT_SCALE_2X (1 << 29) +#define TB0C_RESULT_SCALE_4X (2 << 29) +#define TB0C_OP_MODULE (3 << 25) +#define TB0C_OUTPUT_WRITE_CURRENT (0 << 24) +#define TB0C_OUTPUT_WRITE_ACCUM (1 << 24) +#define TB0C_ARG3_REPLICATE_ALPHA (1<<23) +#define TB0C_ARG3_INVERT (1<<22) +#define TB0C_ARG3_SEL_XXX +#define TB0C_ARG2_REPLICATE_ALPHA (1<<17) +#define TB0C_ARG2_INVERT (1<<16) +#define TB0C_ARG2_SEL_ONE (0 << 12) +#define TB0C_ARG2_SEL_FACTOR (1 << 12) +#define TB0C_ARG2_SEL_TEXEL0 (6 << 12) +#define TB0C_ARG2_SEL_TEXEL1 (7 << 12) +#define TB0C_ARG2_SEL_TEXEL2 (8 << 12) +#define TB0C_ARG2_SEL_TEXEL3 (9 << 12) +#define TB0C_ARG1_REPLICATE_ALPHA (1<<11) +#define TB0C_ARG1_INVERT (1<<10) +#define TB0C_ARG1_SEL_ONE (0 << 6) +#define TB0C_ARG1_SEL_TEXEL0 (6 << 6) +#define TB0C_ARG1_SEL_TEXEL1 (7 << 6) +#define TB0C_ARG1_SEL_TEXEL2 (8 << 6) +#define TB0C_ARG1_SEL_TEXEL3 (9 << 6) +#define TB0C_ARG0_REPLICATE_ALPHA (1<<5) +#define TB0C_ARG0_SEL_XXX + +#define TB0A_CTR_STAGE_ENABLE (1<<31) +#define TB0A_RESULT_SCALE_1X (0 << 29) +#define TB0A_RESULT_SCALE_2X (1 << 29) +#define TB0A_RESULT_SCALE_4X (2 << 29) +#define TB0A_OP_MODULE (3 << 25) +#define TB0A_OUTPUT_WRITE_CURRENT (0<<24) +#define TB0A_OUTPUT_WRITE_ACCUM (1<<24) +#define TB0A_CTR_STAGE_SEL_BITS_XXX +#define TB0A_ARG3_SEL_XXX +#define TB0A_ARG3_INVERT (1<<17) +#define TB0A_ARG2_INVERT (1<<16) +#define TB0A_ARG2_SEL_ONE (0 << 12) +#define TB0A_ARG2_SEL_TEXEL0 (6 << 12) +#define TB0A_ARG2_SEL_TEXEL1 (7 << 12) +#define TB0A_ARG2_SEL_TEXEL2 (8 << 12) +#define TB0A_ARG2_SEL_TEXEL3 (9 << 12) +#define TB0A_ARG1_INVERT (1<<10) +#define TB0A_ARG1_SEL_ONE (0 << 6) +#define TB0A_ARG1_SEL_TEXEL0 (6 << 6) +#define TB0A_ARG1_SEL_TEXEL1 (7 << 6) +#define TB0A_ARG1_SEL_TEXEL2 (8 << 6) +#define TB0A_ARG1_SEL_TEXEL3 (9 << 6) + +static struct blendinfo i830_blend_op[] = { + /* Clear */ + {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO}, + /* Src */ + {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO}, + /* Dst */ + {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE}, + /* Over */ + {0, 1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA}, + /* OverReverse */ + {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE}, + /* In */ + {1, 0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO}, + /* InReverse */ + {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA}, + /* Out */ + {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO}, + /* OutReverse */ + {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA}, + /* Atop */ + {1, 1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, + /* AtopReverse */ + {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA}, + /* Xor */ + {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, + /* Add */ + {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, +}; + +static struct formatinfo i830_tex_formats[] = { + {PICT_a8, MAPSURF_8BIT | MT_8BIT_A8}, + {PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888}, + {PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888}, + {PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565}, + {PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555}, + {PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444}, +}; + +static struct formatinfo i855_tex_formats[] = { + {PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888}, + {PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888}, +}; + +static Bool i830_get_dest_format(PicturePtr dest_picture, uint32_t * dst_format) +{ + ScrnInfoPtr scrn; + + switch (dest_picture->format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + *dst_format = COLR_BUF_ARGB8888; + break; + case PICT_r5g6b5: + *dst_format = COLR_BUF_RGB565; + break; + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + *dst_format = COLR_BUF_ARGB1555; + break; + case PICT_a8: + *dst_format = COLR_BUF_8BIT; + break; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + *dst_format = COLR_BUF_ARGB4444; + break; + default: + scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen); + intel_debug_fallback(scrn, "Unsupported dest format 0x%x\n", + (int)dest_picture->format); + return FALSE; + } + *dst_format |= DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8); + return TRUE; +} + +static Bool i830_get_blend_cntl(ScrnInfoPtr scrn, int op, PicturePtr mask, + uint32_t dst_format, uint32_t * blendctl) +{ + uint32_t sblend, dblend; + + sblend = i830_blend_op[op].src_blend; + dblend = i830_blend_op[op].dst_blend; + + /* If there's no dst alpha channel, adjust the blend op so that we'll treat + * it as always 1. + */ + if (PICT_FORMAT_A(dst_format) == 0 && i830_blend_op[op].dst_alpha) { + if (sblend == BLENDFACTOR_DST_ALPHA) + sblend = BLENDFACTOR_ONE; + else if (sblend == BLENDFACTOR_INV_DST_ALPHA) + sblend = BLENDFACTOR_ZERO; + } + + /* For blending purposes, COLR_BUF_8BIT values show up in the green + * channel. So we can't use the alpha channel. + */ + if (dst_format == PICT_a8 && ((sblend == BLENDFACTOR_DST_ALPHA || + sblend == BLENDFACTOR_INV_DST_ALPHA))) { + intel_debug_fallback(scrn, "Can't do dst alpha blending with " + "PICT_a8 dest.\n"); + return FALSE; + } + + /* If the source alpha is being used, then we should only be in a case + * where the source blend factor is 0, and the source blend value is the + * mask channels multiplied by the source picture's alpha. + */ + if (mask && mask->componentAlpha && PICT_FORMAT_RGB(mask->format) + && i830_blend_op[op].src_alpha) { + if (dblend == BLENDFACTOR_SRC_ALPHA) { + dblend = BLENDFACTOR_SRC_COLR; + } else if (dblend == BLENDFACTOR_INV_SRC_ALPHA) { + dblend = BLENDFACTOR_INV_SRC_COLR; + } + } + + *blendctl = (sblend << S8_SRC_BLEND_FACTOR_SHIFT) | + (dblend << S8_DST_BLEND_FACTOR_SHIFT); + + return TRUE; +} + +static uint32_t i8xx_get_card_format(intel_screen_private *intel, + PicturePtr picture) +{ + int i; + + for (i = 0; i < sizeof(i830_tex_formats) / sizeof(i830_tex_formats[0]); + i++) { + if (i830_tex_formats[i].fmt == picture->format) + return i830_tex_formats[i].card_fmt; + } + + if (!(IS_I830(intel) || IS_845G(intel))) { + for (i = 0; i < sizeof(i855_tex_formats) / sizeof(i855_tex_formats[0]); + i++) { + if (i855_tex_formats[i].fmt == picture->format) + return i855_tex_formats[i].card_fmt; + } + } + + return 0; +} + +static void i830_texture_setup(PicturePtr picture, PixmapPtr pixmap, int unit) +{ + + ScrnInfoPtr scrn = xf86ScreenToScrn(picture->pDrawable->pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + uint32_t format, tiling_bits, pitch, filter; + uint32_t wrap_mode; + uint32_t texcoordtype; + + pitch = intel_pixmap_pitch(pixmap); + intel->scale_units[unit][0] = pixmap->drawable.width; + intel->scale_units[unit][1] = pixmap->drawable.height; + intel->transform[unit] = picture->transform; + + if (intel_transform_is_affine(intel->transform[unit])) + texcoordtype = TEXCOORDTYPE_CARTESIAN; + else + texcoordtype = TEXCOORDTYPE_HOMOGENEOUS; + + switch (picture->repeatType) { + case RepeatNone: + wrap_mode = TEXCOORDMODE_CLAMP_BORDER; + break; + case RepeatNormal: + wrap_mode = TEXCOORDMODE_WRAP; + break; + case RepeatPad: + wrap_mode = TEXCOORDMODE_CLAMP; + break; + case RepeatReflect: + wrap_mode = TEXCOORDMODE_MIRROR; + break; + default: + FatalError("Unknown repeat type %d\n", picture->repeatType); + } + + switch (picture->filter) { + case PictFilterNearest: + filter = ((FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT) | + (FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT)); + break; + case PictFilterBilinear: + filter = ((FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT) | + (FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT)); + break; + default: + FatalError("Bad filter 0x%x\n", picture->filter); + } + filter |= (MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); + + if (intel_pixmap_tiled(pixmap)) { + tiling_bits = TM0S1_TILED_SURFACE; + if (intel_get_pixmap_private(pixmap)->tiling + == I915_TILING_Y) + tiling_bits |= TM0S1_TILE_WALK; + } else + tiling_bits = 0; + + format = i8xx_get_card_format(intel, picture); + + assert(intel->in_batch_atomic); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | + LOAD_TEXTURE_MAP(unit) | 4); + OUT_RELOC_PIXMAP(pixmap, I915_GEM_DOMAIN_SAMPLER, 0, 0); + OUT_BATCH(((pixmap->drawable.height - + 1) << TM0S1_HEIGHT_SHIFT) | ((pixmap->drawable.width - + 1) << + TM0S1_WIDTH_SHIFT) | + format | tiling_bits); + OUT_BATCH((pitch / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D); + OUT_BATCH(filter); + OUT_BATCH(0); /* default color */ + OUT_BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(unit) | + ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | + texcoordtype | ENABLE_ADDR_V_CNTL | + TEXCOORD_ADDR_V_MODE(wrap_mode) | + ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode)); + /* map texel stream */ + OUT_BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD); + if (unit == 0) + OUT_BATCH(TEXBIND_SET0(TEXCOORDSRC_VTXSET_0) | + TEXBIND_SET1(TEXCOORDSRC_KEEP) | + TEXBIND_SET2(TEXCOORDSRC_KEEP) | + TEXBIND_SET3(TEXCOORDSRC_KEEP)); + else + OUT_BATCH(TEXBIND_SET0(TEXCOORDSRC_VTXSET_0) | + TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | + TEXBIND_SET2(TEXCOORDSRC_KEEP) | + TEXBIND_SET3(TEXCOORDSRC_KEEP)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | (unit << 16) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(unit) | + ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(unit)); +} + +Bool +i830_check_composite(int op, + PicturePtr source_picture, + PicturePtr mask_picture, + PicturePtr dest_picture, + int width, int height) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen); + uint32_t tmp1; + + /* Check for unsupported compositing operations. */ + if (op >= sizeof(i830_blend_op) / sizeof(i830_blend_op[0])) { + intel_debug_fallback(scrn, "Unsupported Composite op 0x%x\n", + op); + return FALSE; + } + + if (mask_picture != NULL && mask_picture->componentAlpha && + PICT_FORMAT_RGB(mask_picture->format)) { + /* Check if it's component alpha that relies on a source alpha and on + * the source value. We can only get one of those into the single + * source value that we get to blend with. + */ + if (i830_blend_op[op].src_alpha && + (i830_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { + intel_debug_fallback(scrn, "Component alpha not " + "supported with source alpha and " + "source value blending.\n"); + return FALSE; + } + } + + if (!i830_get_dest_format(dest_picture, &tmp1)) { + intel_debug_fallback(scrn, "Get Color buffer format\n"); + return FALSE; + } + + if (width > 2048 || height > 2048) { + intel_debug_fallback(scrn, "Operation is too large (%d, %d)\n", width, height); + return FALSE; + } + + return TRUE; +} + +Bool +i830_check_composite_target(PixmapPtr pixmap) +{ + if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048) + return FALSE; + + if(!intel_check_pitch_3d(pixmap)) + return FALSE; + + return TRUE; +} + +Bool +i830_check_composite_texture(ScreenPtr screen, PicturePtr picture) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + + if (picture->repeatType > RepeatReflect) { + intel_debug_fallback(scrn, "Unsupported picture repeat %d\n", + picture->repeatType); + return FALSE; + } + + if (picture->filter != PictFilterNearest && + picture->filter != PictFilterBilinear) { + intel_debug_fallback(scrn, "Unsupported filter 0x%x\n", + picture->filter); + return FALSE; + } + + if (picture->pDrawable) { + int w, h; + + w = picture->pDrawable->width; + h = picture->pDrawable->height; + if ((w > 2048) || (h > 2048)) { + intel_debug_fallback(scrn, + "Picture w/h too large (%dx%d)\n", + w, h); + return FALSE; + } + + /* XXX we can use the xrgb32 types if there the picture covers the clip */ + if (!i8xx_get_card_format(intel, picture)) { + intel_debug_fallback(scrn, "Unsupported picture format " + "0x%x\n", + (int)picture->format); + return FALSE; + } + + return TRUE; + } + + return FALSE; +} + +Bool +i830_prepare_composite(int op, PicturePtr source_picture, + PicturePtr mask_picture, PicturePtr dest_picture, + PixmapPtr source, PixmapPtr mask, PixmapPtr dest) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + drm_intel_bo *bo_table[] = { + NULL, /* batch_bo */ + intel_get_pixmap_bo(source), + mask ? intel_get_pixmap_bo(mask) : NULL, + intel_get_pixmap_bo(dest), + }; + + intel->render_source_picture = source_picture; + intel->render_source = source; + intel->render_mask_picture = mask_picture; + intel->render_mask = mask; + intel->render_dest_picture = dest_picture; + intel->render_dest = dest; + + if (!intel_check_pitch_3d(source)) + return FALSE; + if (mask) { + if (mask_picture->componentAlpha && + PICT_FORMAT_RGB(mask_picture->format)) { + /* Check if it's component alpha that relies on a source alpha and on + * the source value. We can only get one of those into the single + * source value that we get to blend with. + */ + if (i830_blend_op[op].src_alpha && + (i830_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { + intel_debug_fallback(scrn, "Component alpha not " + "supported with source alpha and " + "source value blending.\n"); + return FALSE; + } + } + if (!intel_check_pitch_3d(mask)) + return FALSE; + } + if (!intel_check_pitch_3d(dest)) + return FALSE; + + if (!i830_get_dest_format(dest_picture, &intel->render_dest_format)) + return FALSE; + + if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table))) + return FALSE; + + if (mask) { + intel->transform[1] = NULL; + intel->scale_units[1][0] = -1; + intel->scale_units[1][1] = -1; + } + + { + uint32_t cblend, ablend, blendctl; + + /* If component alpha is active in the mask and the blend operation + * uses the source alpha, then we know we don't need the source + * value (otherwise we would have hit a fallback earlier), so we + * provide the source alpha (src.A * mask.X) as output color. + * Conversely, if CA is set and we don't need the source alpha, then + * we produce the source value (src.X * mask.X) and the source alpha + * is unused.. Otherwise, we provide the non-CA source value + * (src.X * mask.A). + * + * The PICT_FORMAT_RGB(pict) == 0 fixups are not needed on 855+'s a8 + * pictures, but we need to implement it for 830/845 and there's no + * harm done in leaving it in. + */ + cblend = + TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_MODULE | + TB0C_OUTPUT_WRITE_CURRENT; + ablend = + TB0A_RESULT_SCALE_1X | TB0A_OP_MODULE | + TB0A_OUTPUT_WRITE_CURRENT; + + /* Get the source picture's channels into TBx_ARG1 */ + if ((mask_picture != NULL && + mask_picture->componentAlpha && + PICT_FORMAT_RGB(mask_picture->format) && + i830_blend_op[op].src_alpha) + || dest_picture->format == PICT_a8) { + /* Producing source alpha value, so the first set of channels + * is src.A instead of src.X. We also do this if the destination + * is a8, in which case src.G is what's written, and the other + * channels are ignored. + */ + ablend |= TB0A_ARG1_SEL_TEXEL0; + cblend |= TB0C_ARG1_SEL_TEXEL0 | TB0C_ARG1_REPLICATE_ALPHA; + } else { + if (PICT_FORMAT_RGB(source_picture->format) != 0) + cblend |= TB0C_ARG1_SEL_TEXEL0; + else + cblend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT; /* 0.0 */ + ablend |= TB0A_ARG1_SEL_TEXEL0; + } + + if (mask) { + cblend |= TB0C_ARG2_SEL_TEXEL1; + if (dest_picture->format == PICT_a8 || + ! mask_picture->componentAlpha || + ! PICT_FORMAT_RGB(mask_picture->format)) + cblend |= TB0C_ARG2_REPLICATE_ALPHA; + ablend |= TB0A_ARG2_SEL_TEXEL1; + } else { + cblend |= TB0C_ARG2_SEL_ONE; + ablend |= TB0A_ARG2_SEL_ONE; + } + + if (!i830_get_blend_cntl + (scrn, op, mask_picture, dest_picture->format, &blendctl)) { + return FALSE; + } + + intel->cblend = cblend; + intel->ablend = ablend; + intel->s8_blendctl = blendctl; + } + + if (intel_pixmap_is_dirty(source) || intel_pixmap_is_dirty(mask)) + intel_batch_emit_flush(scrn); + + intel->needs_render_state_emit = TRUE; + + return TRUE; +} + +static void i830_emit_composite_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + uint32_t vf2, tiling_bits; + uint32_t texcoordfmt = 0; + + intel->needs_render_state_emit = FALSE; + + IntelEmitInvarientState(scrn); + intel->last_3d = LAST_3D_RENDER; + + assert(intel->in_batch_atomic); + + if (intel_pixmap_tiled(intel->render_dest)) { + tiling_bits = BUF_3D_TILED_SURFACE; + if (intel_get_pixmap_private(intel->render_dest)->tiling + == I915_TILING_Y) + tiling_bits |= BUF_3D_TILE_WALK_Y; + } else + tiling_bits = 0; + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits | + BUF_3D_PITCH(intel_pixmap_pitch(intel->render_dest))); + OUT_RELOC_PIXMAP(intel->render_dest, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(intel->render_dest_format); + + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(0); + OUT_BATCH(0); /* ymin, xmin */ + OUT_BATCH(DRAW_YMAX(intel->render_dest->drawable.height - 1) | + DRAW_XMAX(intel->render_dest->drawable.width - 1)); + OUT_BATCH(0); /* yorig, xorig */ + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); + if (intel->render_mask) + vf2 = 2 << 12; /* 2 texture coord sets */ + else + vf2 = 1 << 12; + OUT_BATCH(vf2); /* number of coordinate sets */ + OUT_BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); + OUT_BATCH(S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD | intel-> + s8_blendctl | S8_ENABLE_COLOR_BUFFER_WRITE); + + OUT_BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD | DISABLE_INDPT_ALPHA_BLEND); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | + LOAD_TEXTURE_BLEND_STAGE(0) | 1); + OUT_BATCH(intel->cblend); + OUT_BATCH(intel->ablend); + + OUT_BATCH(_3DSTATE_ENABLES_1_CMD | DISABLE_LOGIC_OP | + DISABLE_STENCIL_TEST | DISABLE_DEPTH_BIAS | + DISABLE_SPEC_ADD | DISABLE_FOG | DISABLE_ALPHA_TEST | + ENABLE_COLOR_BLEND | DISABLE_DEPTH_TEST); + /* We have to explicitly say we don't want write disabled */ + OUT_BATCH(_3DSTATE_ENABLES_2_CMD | ENABLE_COLOR_MASK | + DISABLE_STENCIL_WRITE | ENABLE_TEX_CACHE | + DISABLE_DITHER | ENABLE_COLOR_WRITE | DISABLE_DEPTH_WRITE); + + if (intel_transform_is_affine(intel->render_source_picture->transform)) + texcoordfmt |= (TEXCOORDFMT_2D << 0); + else + texcoordfmt |= (TEXCOORDFMT_3D << 0); + if (intel->render_mask) { + if (intel_transform_is_affine + (intel->render_mask_picture->transform)) + texcoordfmt |= (TEXCOORDFMT_2D << 2); + else + texcoordfmt |= (TEXCOORDFMT_3D << 2); + } + OUT_BATCH(_3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt); + + i830_texture_setup(intel->render_source_picture, intel->render_source, 0); + if (intel->render_mask) { + i830_texture_setup(intel->render_mask_picture, + intel->render_mask, 1); + } +} + +/* Emit the vertices for a single composite rectangle. + * + * This function is no longer shared between i830 and i915 generation code. + */ +static void +i830_emit_composite_primitive(PixmapPtr dest, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, int w, int h) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(dest->drawable.pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + Bool is_affine_src, is_affine_mask = TRUE; + int per_vertex; + float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; + + per_vertex = 2; /* dest x/y */ + + { + float x = srcX, y = srcY; + + is_affine_src = intel_transform_is_affine(intel->transform[0]); + if (is_affine_src) { + if (!intel_get_transformed_coordinates(x, y, + intel-> + transform[0], + &src_x[0], + &src_y[0])) + return; + + if (!intel_get_transformed_coordinates(x, y + h, + intel-> + transform[0], + &src_x[1], + &src_y[1])) + return; + + if (!intel_get_transformed_coordinates(x + w, y + h, + intel-> + transform[0], + &src_x[2], + &src_y[2])) + return; + + per_vertex += 2; /* src x/y */ + } else { + if (!intel_get_transformed_coordinates_3d(x, y, + intel-> + transform[0], + &src_x[0], + &src_y[0], + &src_w[0])) + return; + + if (!intel_get_transformed_coordinates_3d(x, y + h, + intel-> + transform[0], + &src_x[1], + &src_y[1], + &src_w[1])) + return; + + if (!intel_get_transformed_coordinates_3d(x + w, y + h, + intel-> + transform[0], + &src_x[2], + &src_y[2], + &src_w[2])) + return; + + per_vertex += 3; /* src x/y/w */ + } + } + + if (intel->render_mask) { + float x = maskX, y = maskY; + + is_affine_mask = intel_transform_is_affine(intel->transform[1]); + if (is_affine_mask) { + if (!intel_get_transformed_coordinates(x, y, + intel-> + transform[1], + &mask_x[0], + &mask_y[0])) + return; + + if (!intel_get_transformed_coordinates(x, y + h, + intel-> + transform[1], + &mask_x[1], + &mask_y[1])) + return; + + if (!intel_get_transformed_coordinates(x + w, y + h, + intel-> + transform[1], + &mask_x[2], + &mask_y[2])) + return; + + per_vertex += 2; /* mask x/y */ + } else { + if (!intel_get_transformed_coordinates_3d(x, y, + intel-> + transform[1], + &mask_x[0], + &mask_y[0], + &mask_w[0])) + return; + + if (!intel_get_transformed_coordinates_3d(x, y + h, + intel-> + transform[1], + &mask_x[1], + &mask_y[1], + &mask_w[1])) + return; + + if (!intel_get_transformed_coordinates_3d(x + w, y + h, + intel-> + transform[1], + &mask_x[2], + &mask_y[2], + &mask_w[2])) + return; + + per_vertex += 3; /* mask x/y/w */ + } + } + + if (intel->vertex_count == 0) { + intel->vertex_index = intel->batch_used; + OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST); + } + OUT_BATCH_F(dstX + w); + OUT_BATCH_F(dstY + h); + OUT_BATCH_F(src_x[2] / intel->scale_units[0][0]); + OUT_BATCH_F(src_y[2] / intel->scale_units[0][1]); + if (!is_affine_src) { + OUT_BATCH_F(src_w[2]); + } + if (intel->render_mask) { + OUT_BATCH_F(mask_x[2] / intel->scale_units[1][0]); + OUT_BATCH_F(mask_y[2] / intel->scale_units[1][1]); + if (!is_affine_mask) { + OUT_BATCH_F(mask_w[2]); + } + } + + OUT_BATCH_F(dstX); + OUT_BATCH_F(dstY + h); + OUT_BATCH_F(src_x[1] / intel->scale_units[0][0]); + OUT_BATCH_F(src_y[1] / intel->scale_units[0][1]); + if (!is_affine_src) { + OUT_BATCH_F(src_w[1]); + } + if (intel->render_mask) { + OUT_BATCH_F(mask_x[1] / intel->scale_units[1][0]); + OUT_BATCH_F(mask_y[1] / intel->scale_units[1][1]); + if (!is_affine_mask) { + OUT_BATCH_F(mask_w[1]); + } + } + + OUT_BATCH_F(dstX); + OUT_BATCH_F(dstY); + OUT_BATCH_F(src_x[0] / intel->scale_units[0][0]); + OUT_BATCH_F(src_y[0] / intel->scale_units[0][1]); + if (!is_affine_src) { + OUT_BATCH_F(src_w[0]); + } + if (intel->render_mask) { + OUT_BATCH_F(mask_x[0] / intel->scale_units[1][0]); + OUT_BATCH_F(mask_y[0] / intel->scale_units[1][1]); + if (!is_affine_mask) { + OUT_BATCH_F(mask_w[0]); + } + } + + intel->vertex_count += 3 * per_vertex; + +} + +void i830_vertex_flush(intel_screen_private *intel) +{ + if (intel->vertex_count) { + intel->batch_ptr[intel->vertex_index] |= intel->vertex_count - 1; + intel->vertex_count = 0; + } +} + +/** + * Do a single rectangle composite operation. + */ +void +i830_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, + int dstX, int dstY, int w, int h) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(dest->drawable.pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + + intel_batch_start_atomic(scrn, 58 + /* invarient */ + 22 + /* setup */ + 20 + /* 2 * setup_texture */ + 1 + 30 /* verts */ ); + + if (intel->needs_render_state_emit) + i830_emit_composite_state(scrn); + + i830_emit_composite_primitive(dest, srcX, srcY, maskX, maskY, dstX, + dstY, w, h); + + intel_batch_end_atomic(scrn); +} + +void i830_batch_commit_notify(intel_screen_private *intel) +{ + intel->needs_render_state_emit = TRUE; +} diff --git a/src/uxa/i915_3d.c b/src/uxa/i915_3d.c new file mode 100644 index 00000000..77db5685 --- /dev/null +++ b/src/uxa/i915_3d.c @@ -0,0 +1,110 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "xf86.h" +#include "intel.h" + +#include "i915_reg.h" + +void I915EmitInvarientState(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + assert(intel->in_batch_atomic); + + OUT_BATCH(_3DSTATE_AA_CMD | + AA_LINE_ECAAR_WIDTH_ENABLE | + AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); + + /* Disable independent alpha blend */ + OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | + IAB_MODIFY_ENABLE | + IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) | + IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE << + IAB_SRC_FACTOR_SHIFT) | + IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO << + IAB_DST_FACTOR_SHIFT)); + + OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_Z_CMD); + OUT_BATCH(0); + + /* Don't support texture crossbar yet */ + OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7)); + + OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D); + + OUT_BATCH(_3DSTATE_MODES_4_CMD | + ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) | + ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) | + ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff)); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2); + OUT_BATCH(0x00000000); /* Disable texture coordinate wrap-shortest */ + OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | + S4_LINE_WIDTH_ONE | + S4_CULLMODE_NONE | + S4_VFMT_XY); + OUT_BATCH(0x00000000); /* Stencil. */ + + OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); + OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); + + OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */ + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_STIPPLE); + OUT_BATCH(0x00000000); + + OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0); +} diff --git a/src/uxa/i915_3d.h b/src/uxa/i915_3d.h new file mode 100644 index 00000000..04531f33 --- /dev/null +++ b/src/uxa/i915_3d.h @@ -0,0 +1,619 @@ +/* -*- c-basic-offset: 4 -*- */ +/* + * Copyright © 2006,2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +/* Each instruction is 3 dwords long, though most don't require all + * this space. Maximum of 123 instructions. Smaller maxes per insn + * type. + */ +#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16)) + +#define REG_TYPE_R 0 /* temporary regs, no need to + * dcl, must be written before + * read -- Preserved between + * phases. + */ +#define REG_TYPE_T 1 /* Interpolated values, must be + * dcl'ed before use. + * + * 0..7: texture coord, + * 8: diffuse spec, + * 9: specular color, + * 10: fog parameter in w. + */ +#define REG_TYPE_CONST 2 /* Restriction: only one const + * can be referenced per + * instruction, though it may be + * selected for multiple inputs. + * Constants not initialized + * default to zero. + */ +#define REG_TYPE_S 3 /* sampler */ +#define REG_TYPE_OC 4 /* output color (rgba) */ +#define REG_TYPE_OD 5 /* output depth (w), xyz are + * temporaries. If not written, + * interpolated depth is used? + */ +#define REG_TYPE_U 6 /* unpreserved temporaries */ +#define REG_TYPE_MASK 0x7 +#define REG_TYPE_SHIFT 4 +#define REG_NR_MASK 0xf + +/* REG_TYPE_T: +*/ +#define T_TEX0 0 +#define T_TEX1 1 +#define T_TEX2 2 +#define T_TEX3 3 +#define T_TEX4 4 +#define T_TEX5 5 +#define T_TEX6 6 +#define T_TEX7 7 +#define T_DIFFUSE 8 +#define T_SPECULAR 9 +#define T_FOG_W 10 /* interpolated fog is in W coord */ + +/* Arithmetic instructions */ + +/* .replicate_swizzle == selection and replication of a particular + * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww + */ +#define A0_NOP (0x0<<24) /* no operation */ +#define A0_ADD (0x1<<24) /* dst = src0 + src1 */ +#define A0_MOV (0x2<<24) /* dst = src0 */ +#define A0_MUL (0x3<<24) /* dst = src0 * src1 */ +#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */ +#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ +#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */ +#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */ +#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */ +#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */ +#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ +#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */ +#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ +#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE (1<<22) +#define A0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X (1<<10) +#define A0_DEST_CHANNEL_Y (2<<10) +#define A0_DEST_CHANNEL_Z (4<<10) +#define A0_DEST_CHANNEL_W (8<<10) +#define A0_DEST_CHANNEL_ALL (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT 10 +#define A0_SRC0_TYPE_SHIFT 7 +#define A0_SRC0_NR_SHIFT 2 + +#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + +#define SRC_X 0 +#define SRC_Y 1 +#define SRC_Z 2 +#define SRC_W 3 +#define SRC_ZERO 4 +#define SRC_ONE 5 + +#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT 28 +#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT 24 +#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT 20 +#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT 16 +#define A1_SRC1_TYPE_SHIFT 13 +#define A1_SRC1_NR_SHIFT 8 +#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT 4 +#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT 0 + +#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT 28 +#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT 24 +#define A2_SRC2_TYPE_SHIFT 21 +#define A2_SRC2_NR_SHIFT 16 +#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT 12 +#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT 8 +#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT 4 +#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT 0 + +/* Texture instructions */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback) + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction). + */ +#define T0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT 17 +#define T2_MBZ 0 + +/* Declaration instructions */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ +#define D0_SAMPLE_TYPE_SHIFT 22 +#define D0_SAMPLE_TYPE_2D (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) +#define D0_SAMPLE_TYPE_MASK (0x3<<22) + +#define D0_TYPE_SHIFT 19 +/* Allow: T, S */ +#define D0_NR_SHIFT 14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X (1<<10) +#define D0_CHANNEL_Y (2<<10) +#define D0_CHANNEL_Z (4<<10) +#define D0_CHANNEL_W (8<<10) +#define D0_CHANNEL_ALL (0xf<<10) +#define D0_CHANNEL_NONE (0<<10) + +#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations. + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ 0 +#define D2_MBZ 0 + + +/* MASK_* are the unshifted bitmasks of the destination mask in arithmetic + * operations + */ +#define MASK_X 0x1 +#define MASK_Y 0x2 +#define MASK_Z 0x4 +#define MASK_W 0x8 +#define MASK_XYZ (MASK_X | MASK_Y | MASK_Z) +#define MASK_XYZW (MASK_XYZ | MASK_W) +#define MASK_SATURATE 0x10 + +/* Temporary, undeclared regs. Preserved between phases */ +#define FS_R0 ((REG_TYPE_R << REG_TYPE_SHIFT) | 0) +#define FS_R1 ((REG_TYPE_R << REG_TYPE_SHIFT) | 1) +#define FS_R2 ((REG_TYPE_R << REG_TYPE_SHIFT) | 2) +#define FS_R3 ((REG_TYPE_R << REG_TYPE_SHIFT) | 3) + +/* Texture coordinate regs. Must be declared. */ +#define FS_T0 ((REG_TYPE_T << REG_TYPE_SHIFT) | 0) +#define FS_T1 ((REG_TYPE_T << REG_TYPE_SHIFT) | 1) +#define FS_T2 ((REG_TYPE_T << REG_TYPE_SHIFT) | 2) +#define FS_T3 ((REG_TYPE_T << REG_TYPE_SHIFT) | 3) +#define FS_T4 ((REG_TYPE_T << REG_TYPE_SHIFT) | 4) +#define FS_T5 ((REG_TYPE_T << REG_TYPE_SHIFT) | 5) +#define FS_T6 ((REG_TYPE_T << REG_TYPE_SHIFT) | 6) +#define FS_T7 ((REG_TYPE_T << REG_TYPE_SHIFT) | 7) +#define FS_T8 ((REG_TYPE_T << REG_TYPE_SHIFT) | 8) +#define FS_T9 ((REG_TYPE_T << REG_TYPE_SHIFT) | 9) +#define FS_T10 ((REG_TYPE_T << REG_TYPE_SHIFT) | 10) + +/* Constant values */ +#define FS_C0 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 0) +#define FS_C1 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 1) +#define FS_C2 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 2) +#define FS_C3 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 3) +#define FS_C4 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 4) +#define FS_C5 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 5) +#define FS_C6 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 6) +#define FS_C7 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 7) + +/* Sampler regs */ +#define FS_S0 ((REG_TYPE_S << REG_TYPE_SHIFT) | 0) +#define FS_S1 ((REG_TYPE_S << REG_TYPE_SHIFT) | 1) +#define FS_S2 ((REG_TYPE_S << REG_TYPE_SHIFT) | 2) +#define FS_S3 ((REG_TYPE_S << REG_TYPE_SHIFT) | 3) + +/* Output color */ +#define FS_OC ((REG_TYPE_OC << REG_TYPE_SHIFT) | 0) + +/* Output depth */ +#define FS_OD ((REG_TYPE_OD << REG_TYPE_SHIFT) | 0) + +/* Unpreserved temporary regs */ +#define FS_U0 ((REG_TYPE_U << REG_TYPE_SHIFT) | 0) +#define FS_U1 ((REG_TYPE_U << REG_TYPE_SHIFT) | 1) +#define FS_U2 ((REG_TYPE_U << REG_TYPE_SHIFT) | 2) +#define FS_U3 ((REG_TYPE_U << REG_TYPE_SHIFT) | 3) + +#define X_CHANNEL_SHIFT (REG_TYPE_SHIFT + 3) +#define Y_CHANNEL_SHIFT (X_CHANNEL_SHIFT + 4) +#define Z_CHANNEL_SHIFT (Y_CHANNEL_SHIFT + 4) +#define W_CHANNEL_SHIFT (Z_CHANNEL_SHIFT + 4) + +#define REG_CHANNEL_MASK 0xf + +#define REG_NR(reg) ((reg) & REG_NR_MASK) +#define REG_TYPE(reg) (((reg) >> REG_TYPE_SHIFT) & REG_TYPE_MASK) +#define REG_X(reg) (((reg) >> X_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_Y(reg) (((reg) >> Y_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_Z(reg) (((reg) >> Z_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_W(reg) (((reg) >> W_CHANNEL_SHIFT) & REG_CHANNEL_MASK) + +enum i915_fs_channel { + X_CHANNEL_VAL = 0, + Y_CHANNEL_VAL, + Z_CHANNEL_VAL, + W_CHANNEL_VAL, + ZERO_CHANNEL_VAL, + ONE_CHANNEL_VAL, + + NEG_X_CHANNEL_VAL = X_CHANNEL_VAL | 0x8, + NEG_Y_CHANNEL_VAL = Y_CHANNEL_VAL | 0x8, + NEG_Z_CHANNEL_VAL = Z_CHANNEL_VAL | 0x8, + NEG_W_CHANNEL_VAL = W_CHANNEL_VAL | 0x8, + NEG_ONE_CHANNEL_VAL = ONE_CHANNEL_VAL | 0x8 +}; + +#define i915_fs_operand(reg, x, y, z, w) \ + (reg) | \ +(x##_CHANNEL_VAL << X_CHANNEL_SHIFT) | \ +(y##_CHANNEL_VAL << Y_CHANNEL_SHIFT) | \ +(z##_CHANNEL_VAL << Z_CHANNEL_SHIFT) | \ +(w##_CHANNEL_VAL << W_CHANNEL_SHIFT) + +/** + * Construct an operand description for using a register with no swizzling + */ +#define i915_fs_operand_reg(reg) \ + i915_fs_operand(reg, X, Y, Z, W) + +#define i915_fs_operand_reg_negate(reg) \ + i915_fs_operand(reg, NEG_X, NEG_Y, NEG_Z, NEG_W) + +/** + * Returns an operand containing (0.0, 0.0, 0.0, 0.0). + */ +#define i915_fs_operand_zero() i915_fs_operand(FS_R0, ZERO, ZERO, ZERO, ZERO) + +/** + * Returns an unused operand + */ +#define i915_fs_operand_none() i915_fs_operand_zero() + +/** + * Returns an operand containing (1.0, 1.0, 1.0, 1.0). + */ +#define i915_fs_operand_one() i915_fs_operand(FS_R0, ONE, ONE, ONE, ONE) + +#define i915_get_hardware_channel_val(val, shift, negate) \ + (((val & 0x7) << shift) | ((val & 0x8) ? negate : 0)) + +/** + * Outputs a fragment shader command to declare a sampler or texture register. + */ +#define i915_fs_dcl(reg) \ + do { \ + OUT_BATCH(D0_DCL | \ + (REG_TYPE(reg) << D0_TYPE_SHIFT) | \ + (REG_NR(reg) << D0_NR_SHIFT) | \ + ((REG_TYPE(reg) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); \ + OUT_BATCH(0); \ + OUT_BATCH(0); \ + } while (0) + +#define i915_fs_texld(dest_reg, sampler_reg, address_reg) \ + do { \ + OUT_BATCH(T0_TEXLD | \ + (REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \ + (REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \ + OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \ + (REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \ + OUT_BATCH(0); \ + } while (0) + +#define i915_fs_texldp(dest_reg, sampler_reg, address_reg) \ + do { \ + OUT_BATCH(T0_TEXLDP | \ + (REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \ + (REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \ + OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \ + (REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \ + OUT_BATCH(0); \ + } while (0) + +#define i915_fs_arith_masked(op, dest_reg, dest_mask, operand0, operand1, operand2) \ + _i915_fs_arith_masked(A0_##op, dest_reg, dest_mask, operand0, operand1, operand2) + +#define i915_fs_arith(op, dest_reg, operand0, operand1, operand2) \ + _i915_fs_arith(A0_##op, dest_reg, operand0, operand1, operand2) + +#define _i915_fs_arith_masked(cmd, dest_reg, dest_mask, operand0, operand1, operand2) \ + do { \ + /* Set up destination register and write mask */ \ + OUT_BATCH(cmd | \ + (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \ + (((dest_mask) & ~MASK_SATURATE) << A0_DEST_CHANNEL_SHIFT) | \ + (((dest_mask) & MASK_SATURATE) ? A0_DEST_SATURATE : 0) | \ + /* Set up operand 0 */ \ + (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \ + (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \ + OUT_BATCH(i915_get_hardware_channel_val(REG_X(operand0), \ + A1_SRC0_CHANNEL_X_SHIFT, \ + A1_SRC0_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand0), \ + A1_SRC0_CHANNEL_Y_SHIFT, \ + A1_SRC0_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand0), \ + A1_SRC0_CHANNEL_Z_SHIFT, \ + A1_SRC0_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand0), \ + A1_SRC0_CHANNEL_W_SHIFT, \ + A1_SRC0_CHANNEL_W_NEGATE) | \ + /* Set up operand 1 */ \ + (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \ + (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand1), \ + A1_SRC1_CHANNEL_X_SHIFT, \ + A1_SRC1_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand1), \ + A1_SRC1_CHANNEL_Y_SHIFT, \ + A1_SRC1_CHANNEL_Y_NEGATE)); \ + OUT_BATCH(i915_get_hardware_channel_val(REG_Z(operand1), \ + A2_SRC1_CHANNEL_Z_SHIFT, \ + A2_SRC1_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand1), \ + A2_SRC1_CHANNEL_W_SHIFT, \ + A2_SRC1_CHANNEL_W_NEGATE) | \ + /* Set up operand 2 */ \ + (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \ + (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand2), \ + A2_SRC2_CHANNEL_X_SHIFT, \ + A2_SRC2_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand2), \ + A2_SRC2_CHANNEL_Y_SHIFT, \ + A2_SRC2_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand2), \ + A2_SRC2_CHANNEL_Z_SHIFT, \ + A2_SRC2_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand2), \ + A2_SRC2_CHANNEL_W_SHIFT, \ + A2_SRC2_CHANNEL_W_NEGATE)); \ + } while (0) + +#define _i915_fs_arith(cmd, dest_reg, operand0, operand1, operand2) do {\ + /* Set up destination register and write mask */ \ + OUT_BATCH(cmd | \ + (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \ + (A0_DEST_CHANNEL_ALL) | \ + /* Set up operand 0 */ \ + (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \ + (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \ + OUT_BATCH(i915_get_hardware_channel_val(REG_X(operand0), \ + A1_SRC0_CHANNEL_X_SHIFT, \ + A1_SRC0_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand0), \ + A1_SRC0_CHANNEL_Y_SHIFT, \ + A1_SRC0_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand0), \ + A1_SRC0_CHANNEL_Z_SHIFT, \ + A1_SRC0_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand0), \ + A1_SRC0_CHANNEL_W_SHIFT, \ + A1_SRC0_CHANNEL_W_NEGATE) | \ + /* Set up operand 1 */ \ + (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \ + (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand1), \ + A1_SRC1_CHANNEL_X_SHIFT, \ + A1_SRC1_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand1), \ + A1_SRC1_CHANNEL_Y_SHIFT, \ + A1_SRC1_CHANNEL_Y_NEGATE)); \ + OUT_BATCH(i915_get_hardware_channel_val(REG_Z(operand1), \ + A2_SRC1_CHANNEL_Z_SHIFT, \ + A2_SRC1_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand1), \ + A2_SRC1_CHANNEL_W_SHIFT, \ + A2_SRC1_CHANNEL_W_NEGATE) | \ + /* Set up operand 2 */ \ + (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \ + (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand2), \ + A2_SRC2_CHANNEL_X_SHIFT, \ + A2_SRC2_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand2), \ + A2_SRC2_CHANNEL_Y_SHIFT, \ + A2_SRC2_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand2), \ + A2_SRC2_CHANNEL_Z_SHIFT, \ + A2_SRC2_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand2), \ + A2_SRC2_CHANNEL_W_SHIFT, \ + A2_SRC2_CHANNEL_W_NEGATE)); \ +} while (0) + +#define i915_fs_mov(dest_reg, operand0) \ + i915_fs_arith(MOV, dest_reg, \ + operand0, \ + i915_fs_operand_none(), \ + i915_fs_operand_none()) + +#define i915_fs_mov_masked(dest_reg, dest_mask, operand0) \ + i915_fs_arith_masked (MOV, dest_reg, dest_mask, \ + operand0, \ + i915_fs_operand_none(), \ + i915_fs_operand_none()) + + +#define i915_fs_frc(dest_reg, operand0) \ + i915_fs_arith (FRC, dest_reg, \ + operand0, \ + i915_fs_operand_none(), \ + i915_fs_operand_none()) + +/** Add operand0 and operand1 and put the result in dest_reg */ +#define i915_fs_add(dest_reg, operand0, operand1) \ + i915_fs_arith (ADD, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +/** Multiply operand0 and operand1 and put the result in dest_reg */ +#define i915_fs_mul(dest_reg, operand0, operand1) \ + i915_fs_arith (MUL, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +/** Computes 1/sqrt(operand0.replicate_swizzle) puts the result in dest_reg */ +#define i915_fs_rsq(dest_reg, dest_mask, operand0) \ + do { \ + if (dest_mask) { \ + i915_fs_arith_masked (RSQ, dest_reg, dest_mask, \ + operand0, \ + i915_fs_operand_none (), \ + i915_fs_operand_none ()); \ + } else { \ + i915_fs_arith (RSQ, dest_reg, \ + operand0, \ + i915_fs_operand_none (), \ + i915_fs_operand_none ()); \ + } \ + } while (0) + +/** Puts the minimum of operand0 and operand1 in dest_reg */ +#define i915_fs_min(dest_reg, operand0, operand1) \ + i915_fs_arith (MIN, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +/** Puts the maximum of operand0 and operand1 in dest_reg */ +#define i915_fs_max(dest_reg, operand0, operand1) \ + i915_fs_arith (MAX, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +#define i915_fs_cmp(dest_reg, operand0, operand1, operand2) \ + i915_fs_arith (CMP, dest_reg, operand0, operand1, operand2) + +/** Perform operand0 * operand1 + operand2 and put the result in dest_reg */ +#define i915_fs_mad(dest_reg, dest_mask, op0, op1, op2) \ + do { \ + if (dest_mask) { \ + i915_fs_arith_masked (MAD, dest_reg, dest_mask, op0, op1, op2); \ + } else { \ + i915_fs_arith (MAD, dest_reg, op0, op1, op2); \ + } \ + } while (0) + +#define i915_fs_dp2add(dest_reg, dest_mask, op0, op1, op2) \ + do { \ + if (dest_mask) { \ + i915_fs_arith_masked (DP2ADD, dest_reg, dest_mask, op0, op1, op2); \ + } else { \ + i915_fs_arith (DP2ADD, dest_reg, op0, op1, op2); \ + } \ + } while (0) + +/** + * Perform a 3-component dot-product of operand0 and operand1 and put the + * resulting scalar in the channels of dest_reg specified by the dest_mask. + */ +#define i915_fs_dp3(dest_reg, dest_mask, op0, op1) \ + do { \ + if (dest_mask) { \ + i915_fs_arith_masked (DP3, dest_reg, dest_mask, \ + op0, op1,\ + i915_fs_operand_none()); \ + } else { \ + i915_fs_arith (DP3, dest_reg, op0, op1,\ + i915_fs_operand_none()); \ + } \ + } while (0) + +/** + * Sets up local state for accumulating a fragment shader buffer. + * + * \param x maximum number of shader commands that may be used between + * a FS_START and FS_END + */ +#define FS_LOCALS() \ + uint32_t _shader_offset + +#define FS_BEGIN() \ + do { \ + _shader_offset = intel->batch_used++; \ + } while (0) + +#define FS_END() \ + do { \ + intel->batch_ptr[_shader_offset] = \ + _3DSTATE_PIXEL_SHADER_PROGRAM | \ + (intel->batch_used - _shader_offset - 2); \ + } while (0); diff --git a/src/uxa/i915_reg.h b/src/uxa/i915_reg.h new file mode 100644 index 00000000..746a4131 --- /dev/null +++ b/src/uxa/i915_reg.h @@ -0,0 +1,844 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _I915_REG_H_ +#define _I915_REG_H_ + +#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value) + +#define CMD_3D (0x3<<29) + +#define PRIM3D (CMD_3D | (0x1f<<24)) +#define PRIM3D_INDIRECT_SEQUENTIAL ((1<<23) | (0<<17)) +#define PRIM3D_TRILIST (PRIM3D | (0x0<<18)) +#define PRIM3D_TRISTRIP (PRIM3D | (0x1<<18)) +#define PRIM3D_TRISTRIP_RVRSE (PRIM3D | (0x2<<18)) +#define PRIM3D_TRIFAN (PRIM3D | (0x3<<18)) +#define PRIM3D_POLY (PRIM3D | (0x4<<18)) +#define PRIM3D_LINELIST (PRIM3D | (0x5<<18)) +#define PRIM3D_LINESTRIP (PRIM3D | (0x6<<18)) +#define PRIM3D_RECTLIST (PRIM3D | (0x7<<18)) +#define PRIM3D_POINTLIST (PRIM3D | (0x8<<18)) +#define PRIM3D_DIB (PRIM3D | (0x9<<18)) +#define PRIM3D_CLEAR_RECT (PRIM3D | (0xa<<18)) +#define PRIM3D_ZONE_INIT (PRIM3D | (0xd<<18)) +#define PRIM3D_MASK (0x1f<<18) + +/* p137 */ +#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24)) +#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16) +#define AA_LINE_ECAAR_WIDTH_0_5 0 +#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14) +#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14) +#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14) +#define AA_LINE_REGION_WIDTH_ENABLE (1<<8) +#define AA_LINE_REGION_WIDTH_0_5 0 +#define AA_LINE_REGION_WIDTH_1_0 (1<<6) +#define AA_LINE_REGION_WIDTH_2_0 (2<<6) +#define AA_LINE_REGION_WIDTH_4_0 (3<<6) + +/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/ +#define _3DSTATE_BACKFACE_STENCIL_OPS (CMD_3D | (0x8<<24)) +#define BFO_ENABLE_STENCIL_REF (1<<23) +#define BFO_STENCIL_REF_SHIFT 15 +#define BFO_STENCIL_REF_MASK (0xff<<15) +#define BFO_ENABLE_STENCIL_FUNCS (1<<14) +#define BFO_STENCIL_TEST_SHIFT 11 +#define BFO_STENCIL_TEST_MASK (0x7<<11) +#define BFO_STENCIL_FAIL_SHIFT 8 +#define BFO_STENCIL_FAIL_MASK (0x7<<8) +#define BFO_STENCIL_PASS_Z_FAIL_SHIFT 5 +#define BFO_STENCIL_PASS_Z_FAIL_MASK (0x7<<5) +#define BFO_STENCIL_PASS_Z_PASS_SHIFT 2 +#define BFO_STENCIL_PASS_Z_PASS_MASK (0x7<<2) +#define BFO_ENABLE_STENCIL_TWO_SIDE (1<<1) +#define BFO_STENCIL_TWO_SIDE (1<<0) + +/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */ +#define _3DSTATE_BACKFACE_STENCIL_MASKS (CMD_3D | (0x9<<24)) +#define BFM_ENABLE_STENCIL_TEST_MASK (1<<17) +#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16) +#define BFM_STENCIL_TEST_MASK_SHIFT 8 +#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8) +#define BFM_STENCIL_WRITE_MASK_SHIFT 0 +#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0) + +/* 3DSTATE_BIN_CONTROL p141 */ + +/* p143 */ +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + +/* 3DSTATE_CHROMA_KEY */ + +/* 3DSTATE_CLEAR_PARAMETERS, p150 */ +#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5) +/* Dword 1 */ +#define CLEARPARAM_CLEAR_RECT (1 << 16) +#define CLEARPARAM_ZONE_INIT (0 << 16) +#define CLEARPARAM_WRITE_COLOR (1 << 2) +#define CLEARPARAM_WRITE_DEPTH (1 << 1) +#define CLEARPARAM_WRITE_STENCIL (1 << 0) + +/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */ +#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16)) + +/* 3DSTATE_COORD_SET_BINDINGS, p154 */ +#define _3DSTATE_COORD_SET_BINDINGS (CMD_3D | (0x16<<24)) +#define CSB_TCB(iunit, eunit) ((eunit)<<(iunit*3)) + +/* p156 */ +#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16)) + +/* p157 */ +#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16)) + +/* p158 */ +#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16)) + +/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */ +#define _3DSTATE_DEPTH_OFFSET_SCALE (CMD_3D | (0x1d<<24) | (0x97<<16)) +/* scale in dword 1 */ + +/* The depth subrectangle is not supported, but must be disabled. */ +/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */ +#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | (1 << 1) | (0 << 0)) + +/* p161 */ +#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) +/* Dword 1 */ +#define TEX_DEFAULT_COLOR_OGL (0<<30) +#define TEX_DEFAULT_COLOR_D3D (1<<30) +#define ZR_EARLY_DEPTH (1<<29) +#define LOD_PRECLAMP_OGL (1<<28) +#define LOD_PRECLAMP_D3D (0<<28) +#define DITHER_FULL_ALWAYS (0<<26) +#define DITHER_FULL_ON_FB_BLEND (1<<26) +#define DITHER_CLAMPED_ALWAYS (2<<26) +#define LINEAR_GAMMA_BLEND_32BPP (1<<25) +#define DEBUG_DISABLE_ENH_DITHER (1<<24) +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) +#define COLOR_4_2_2_CHNL_WRT_ALL 0 +#define COLOR_4_2_2_CHNL_WRT_Y (1<<12) +#define COLOR_4_2_2_CHNL_WRT_CR (2<<12) +#define COLOR_4_2_2_CHNL_WRT_CB (3<<12) +#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12) +#define COLR_BUF_8BIT 0 +#define COLR_BUF_RGB555 (1<<8) +#define COLR_BUF_RGB565 (2<<8) +#define COLR_BUF_ARGB8888 (3<<8) +#define COLR_BUF_ARGB4444 (8<<8) +#define COLR_BUF_ARGB1555 (9<<8) +#define COLR_BUF_ARGB2AAA (0xa<<8) +#define DEPTH_FRMT_16_FIXED 0 +#define DEPTH_FRMT_16_FLOAT (1<<2) +#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2) +#define VERT_LINE_STRIDE_1 (1<<1) +#define VERT_LINE_STRIDE_0 (0<<1) +#define VERT_LINE_STRIDE_OFS_1 1 +#define VERT_LINE_STRIDE_OFS_0 0 + +/* p166 */ +#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3) +/* Dword 1 */ +#define DRAW_RECT_DIS_DEPTH_OFS (1<<30) +#define DRAW_DITHER_OFS_X(x) ((x)<<26) +#define DRAW_DITHER_OFS_Y(x) ((x)<<24) +/* Dword 2 */ +#define DRAW_YMIN(x) ((x)<<16) +#define DRAW_XMIN(x) (x) +/* Dword 3 */ +#define DRAW_YMAX(x) ((x)<<16) +#define DRAW_XMAX(x) (x) +/* Dword 4 */ +#define DRAW_YORG(x) ((x)<<16) +#define DRAW_XORG(x) (x) + +/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */ + +/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */ + +/* _3DSTATE_FOG_COLOR, p173 */ +#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24)) +#define FOG_COLOR_RED(x) ((x)<<16) +#define FOG_COLOR_GREEN(x) ((x)<<8) +#define FOG_COLOR_BLUE(x) (x) + +/* _3DSTATE_FOG_MODE, p174 */ +#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2) +/* Dword 1 */ +#define FMC1_FOGFUNC_MODIFY_ENABLE (1<<31) +#define FMC1_FOGFUNC_VERTEX (0<<28) +#define FMC1_FOGFUNC_PIXEL_EXP (1<<28) +#define FMC1_FOGFUNC_PIXEL_EXP2 (2<<28) +#define FMC1_FOGFUNC_PIXEL_LINEAR (3<<28) +#define FMC1_FOGFUNC_MASK (3<<28) +#define FMC1_FOGINDEX_MODIFY_ENABLE (1<<27) +#define FMC1_FOGINDEX_Z (0<<25) +#define FMC1_FOGINDEX_W (1<<25) +#define FMC1_C1_C2_MODIFY_ENABLE (1<<24) +#define FMC1_DENSITY_MODIFY_ENABLE (1<<23) +#define FMC1_C1_ONE (1<<13) +#define FMC1_C1_MASK (0xffff<<4) +/* Dword 2 */ +#define FMC2_C2_ONE (1<<16) +/* Dword 3 */ +#define FMC3_D_ONE (1<<16) + +/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */ +#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24)) +#define IAB_MODIFY_ENABLE (1<<23) +#define IAB_ENABLE (1<<22) +#define IAB_MODIFY_FUNC (1<<21) +#define IAB_FUNC_SHIFT 16 +#define IAB_MODIFY_SRC_FACTOR (1<<11) +#define IAB_SRC_FACTOR_SHIFT 6 +#define IAB_SRC_FACTOR_MASK (BLENDFACT_MASK<<6) +#define IAB_MODIFY_DST_FACTOR (1<<5) +#define IAB_DST_FACTOR_SHIFT 0 +#define IAB_DST_FACTOR_MASK (BLENDFACT_MASK<<0) + +#define BLENDFACT_ZERO 0x01 +#define BLENDFACT_ONE 0x02 +#define BLENDFACT_SRC_COLR 0x03 +#define BLENDFACT_INV_SRC_COLR 0x04 +#define BLENDFACT_SRC_ALPHA 0x05 +#define BLENDFACT_INV_SRC_ALPHA 0x06 +#define BLENDFACT_DST_ALPHA 0x07 +#define BLENDFACT_INV_DST_ALPHA 0x08 +#define BLENDFACT_DST_COLR 0x09 +#define BLENDFACT_INV_DST_COLR 0x0a +#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACT_CONST_COLOR 0x0c +#define BLENDFACT_INV_CONST_COLOR 0x0d +#define BLENDFACT_CONST_ALPHA 0x0e +#define BLENDFACT_INV_CONST_ALPHA 0x0f +#define BLENDFACT_MASK 0x0f + +#define BLENDFUNC_ADD 0x0 +#define BLENDFUNC_SUBTRACT 0x1 +#define BLENDFUNC_REVERSE_SUBTRACT 0x2 +#define BLENDFUNC_MIN 0x3 +#define BLENDFUNC_MAX 0x4 +#define BLENDFUNC_MASK 0x7 + +/* 3DSTATE_LOAD_INDIRECT, p180 */ + +#define _3DSTATE_LOAD_INDIRECT (CMD_3D|(0x1d<<24)|(0x7<<16)) +#define LI0_STATE_STATIC_INDIRECT (0x01<<8) +#define LI0_STATE_DYNAMIC_INDIRECT (0x02<<8) +#define LI0_STATE_SAMPLER (0x04<<8) +#define LI0_STATE_MAP (0x08<<8) +#define LI0_STATE_PROGRAM (0x10<<8) +#define LI0_STATE_CONSTANTS (0x20<<8) + +#define SIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SIS0_FORCE_LOAD (1<<1) +#define SIS0_BUFFER_VALID (1<<0) +#define SIS1_BUFFER_LENGTH(x) ((x)&0xff) + +#define DIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define DIS0_BUFFER_RESET (1<<1) +#define DIS0_BUFFER_VALID (1<<0) + +#define SSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SSB0_FORCE_LOAD (1<<1) +#define SSB0_BUFFER_VALID (1<<0) +#define SSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define MSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define MSB0_FORCE_LOAD (1<<1) +#define MSB0_BUFFER_VALID (1<<0) +#define MSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSP0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSP0_FORCE_LOAD (1<<1) +#define PSP0_BUFFER_VALID (1<<0) +#define PSP1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSC0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSC0_FORCE_LOAD (1<<1) +#define PSC0_BUFFER_VALID (1<<0) +#define PSC1_BUFFER_LENGTH(x) ((x)&0xff) + +/* _3DSTATE_RASTERIZATION_RULES */ +#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24)) +#define ENABLE_POINT_RASTER_RULE (1<<15) +#define OGL_POINT_RASTER_RULE (1<<13) +#define ENABLE_TEXKILL_3D_4D (1<<10) +#define TEXKILL_3D (0<<9) +#define TEXKILL_4D (1<<9) +#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) +#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) + +/* _3DSTATE_SCISSOR_ENABLE, p256 */ +#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19)) +#define ENABLE_SCISSOR_RECT ((1<<1) | 1) +#define DISABLE_SCISSOR_RECT (1<<1) + +/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */ +#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1) +/* Dword 1 */ +#define SCISSOR_RECT_0_YMIN(x) ((x)<<16) +#define SCISSOR_RECT_0_XMIN(x) (x) +/* Dword 2 */ +#define SCISSOR_RECT_0_YMAX(x) ((x)<<16) +#define SCISSOR_RECT_0_XMAX(x) (x) + +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16)) +#define I1_LOAD_S(n) (1<<(4+n)) + +#define S0_VB_OFFSET_MASK 0xffffffc +#define S0_AUTO_CACHE_INV_DISABLE (1<<0) + +#define S1_VERTEX_WIDTH_SHIFT 24 +#define S1_VERTEX_WIDTH_MASK (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT 16 +#define S1_VERTEX_PITCH_MASK (0x3f<<16) + +#define TEXCOORDFMT_2D 0x0 +#define TEXCOORDFMT_3D 0x1 +#define TEXCOORDFMT_4D 0x2 +#define TEXCOORDFMT_1D 0x3 +#define TEXCOORDFMT_2D_16 0x4 +#define TEXCOORDFMT_4D_16 0x5 +#define TEXCOORDFMT_NOT_PRESENT 0xf +#define S2_TEXCOORD_FMT0_MASK 0xf +#define S2_TEXCOORD_FMT1_SHIFT 4 +#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE (~0) + +#define TEXCOORD_WRAP_SHORTEST_TCX 8 +#define TEXCOORD_WRAP_SHORTEST_TCY 4 +#define TEXCOORD_WRAP_SHORTEST_TCZ 2 +#define TEXCOORD_PERSPECTIVE_DISABLE 1 + +#define S3_WRAP_SHORTEST_TCX(unit) (TEXCOORD_WRAP_SHORTEST_TCX << ((unit) * 4)) +#define S3_WRAP_SHORTEST_TCY(unit) (TEXCOORD_WRAP_SHORTEST_TCY << ((unit) * 4)) +#define S3_WRAP_SHORTEST_TCZ(unit) (TEXCOORD_WRAP_SHORTEST_TCZ << ((unit) * 4)) +#define S3_PERSPECTIVE_DISABLE(unit) (TEXCOORD_PERSPECTIVE_DISABLE << ((unit) * 4)) + +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT 23 +#define S4_POINT_WIDTH_MASK (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT 19 +#define S4_LINE_WIDTH_ONE (0x2<<19) +#define S4_LINE_WIDTH_MASK (0xf<<19) +#define S4_FLATSHADE_ALPHA (1<<18) +#define S4_FLATSHADE_FOG (1<<17) +#define S4_FLATSHADE_SPECULAR (1<<16) +#define S4_FLATSHADE_COLOR (1<<15) +#define S4_CULLMODE_BOTH (0<<13) +#define S4_CULLMODE_NONE (1<<13) +#define S4_CULLMODE_CW (2<<13) +#define S4_CULLMODE_CCW (3<<13) +#define S4_CULLMODE_MASK (3<<13) +#define S4_VFMT_POINT_WIDTH (1<<12) +#define S4_VFMT_SPEC_FOG (1<<11) +#define S4_VFMT_COLOR (1<<10) +#define S4_VFMT_DEPTH_OFFSET (1<<9) +#define S4_VFMT_XYZ (1<<6) +#define S4_VFMT_XYZW (2<<6) +#define S4_VFMT_XY (3<<6) +#define S4_VFMT_XYW (4<<6) +#define S4_VFMT_XYZW_MASK (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) +#define S4_VFMT_FOG_PARAM (1<<2) +#define S4_SPRITE_POINT_ENABLE (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ + S4_VFMT_SPEC_FOG | \ + S4_VFMT_COLOR | \ + S4_VFMT_DEPTH_OFFSET | \ + S4_VFMT_XYZW_MASK | \ + S4_VFMT_FOG_PARAM) + +#define S5_WRITEDISABLE_ALPHA (1<<31) +#define S5_WRITEDISABLE_RED (1<<30) +#define S5_WRITEDISABLE_GREEN (1<<29) +#define S5_WRITEDISABLE_BLUE (1<<28) +#define S5_WRITEDISABLE_MASK (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) +#define S5_LAST_PIXEL_ENABLE (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) +#define S5_FOG_ENABLE (1<<24) +#define S5_STENCIL_REF_SHIFT 16 +#define S5_STENCIL_REF_MASK (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT 13 +#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT 10 +#define S5_STENCIL_FAIL_MASK (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 +#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 +#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE (1<<3) +#define S5_STENCIL_TEST_ENABLE (1<<2) +#define S5_COLOR_DITHER_ENABLE (1<<1) +#define S5_LOGICOP_ENABLE (1<<0) + +#define S6_ALPHA_TEST_ENABLE (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT 28 +#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) +#define S6_ALPHA_REF_SHIFT 20 +#define S6_ALPHA_REF_MASK (0xff<<20) +#define S6_DEPTH_TEST_ENABLE (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT 16 +#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) +#define S6_CBUF_BLEND_ENABLE (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT 12 +#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 +#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 +#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE (1<<3) +#define S6_COLOR_WRITE_ENABLE (1<<2) +#define S6_TRISTRIP_PV_SHIFT 0 +#define S6_TRISTRIP_PV_MASK (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK ~0 + +/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */ +/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */ + +/* _3DSTATE_MODES_4, p218 */ +#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x0d<<24)) +#define ENABLE_LOGIC_OP_FUNC (1<<23) +#define LOGIC_OP_FUNC(x) ((x)<<18) +#define LOGICOP_MASK (0xf<<18) +#define LOGICOP_COPY 0xc +#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00)) +#define ENABLE_STENCIL_TEST_MASK (1<<17) +#define STENCIL_TEST_MASK(x) ((x)<<8) +#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff)) +#define ENABLE_STENCIL_WRITE_MASK (1<<16) +#define STENCIL_WRITE_MASK(x) ((x)&0xff) + +/* _3DSTATE_MODES_5, p220 */ +#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24)) +#define PIPELINE_FLUSH_RENDER_CACHE (1<<18) +#define PIPELINE_FLUSH_TEXTURE_CACHE (1<<16) + +/* p221 */ +#define _3DSTATE_PIXEL_SHADER_CONSTANTS (CMD_3D|(0x1d<<24)|(0x6<<16)) +#define PS1_REG(n) (1<<(n)) +#define PS2_CONST_X(n) (n) +#define PS3_CONST_Y(n) (n) +#define PS4_CONST_Z(n) (n) +#define PS5_CONST_W(n) (n) + +/* p222 */ + +#define I915_MAX_TEX_INDIRECT 4 +#define I915_MAX_TEX_INSN 32 +#define I915_MAX_ALU_INSN 64 +#define I915_MAX_DECL_INSN 27 +#define I915_MAX_TEMPORARY 16 + +/* Each instruction is 3 dwords long, though most don't require all + * this space. Maximum of 123 instructions. Smaller maxes per insn + * type. + */ +#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16)) + +#define REG_TYPE_R 0 /* temporary regs, no need to + * dcl, must be written before + * read -- Preserved between + * phases. + */ +#define REG_TYPE_T 1 /* Interpolated values, must be + * dcl'ed before use. + * + * 0..7: texture coord, + * 8: diffuse spec, + * 9: specular color, + * 10: fog parameter in w. + */ +#define REG_TYPE_CONST 2 /* Restriction: only one const + * can be referenced per + * instruction, though it may be + * selected for multiple inputs. + * Constants not initialized + * default to zero. + */ +#define REG_TYPE_S 3 /* sampler */ +#define REG_TYPE_OC 4 /* output color (rgba) */ +#define REG_TYPE_OD 5 /* output depth (w), xyz are + * temporaries. If not written, + * interpolated depth is used? + */ +#define REG_TYPE_U 6 /* unpreserved temporaries */ +#define REG_TYPE_MASK 0x7 +#define REG_NR_MASK 0xf + +/* REG_TYPE_T: + */ +#define T_TEX0 0 +#define T_TEX1 1 +#define T_TEX2 2 +#define T_TEX3 3 +#define T_TEX4 4 +#define T_TEX5 5 +#define T_TEX6 6 +#define T_TEX7 7 +#define T_DIFFUSE 8 +#define T_SPECULAR 9 +#define T_FOG_W 10 /* interpolated fog is in W coord */ + +/* Arithmetic instructions */ + +/* .replicate_swizzle == selection and replication of a particular + * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww + */ +#define A0_NOP (0x0<<24) /* no operation */ +#define A0_ADD (0x1<<24) /* dst = src0 + src1 */ +#define A0_MOV (0x2<<24) /* dst = src0 */ +#define A0_MUL (0x3<<24) /* dst = src0 * src1 */ +#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */ +#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ +#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */ +#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */ +#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */ +#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */ +#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ +#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */ +#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ +#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE (1<<22) +#define A0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X (1<<10) +#define A0_DEST_CHANNEL_Y (2<<10) +#define A0_DEST_CHANNEL_Z (4<<10) +#define A0_DEST_CHANNEL_W (8<<10) +#define A0_DEST_CHANNEL_ALL (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT 10 +#define A0_SRC0_TYPE_SHIFT 7 +#define A0_SRC0_NR_SHIFT 2 + +#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + +#define SRC_X 0 +#define SRC_Y 1 +#define SRC_Z 2 +#define SRC_W 3 +#define SRC_ZERO 4 +#define SRC_ONE 5 + +#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT 28 +#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT 24 +#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT 20 +#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT 16 +#define A1_SRC1_TYPE_SHIFT 13 +#define A1_SRC1_NR_SHIFT 8 +#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT 4 +#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT 0 + +#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT 28 +#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT 24 +#define A2_SRC2_TYPE_SHIFT 21 +#define A2_SRC2_NR_SHIFT 16 +#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT 12 +#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT 8 +#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT 4 +#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT 0 + +/* Texture instructions */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback) + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction). + */ +#define T0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT 17 +#define T2_MBZ 0 + +/* Declaration instructions */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ +#define D0_SAMPLE_TYPE_SHIFT 22 +#define D0_SAMPLE_TYPE_2D (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) +#define D0_SAMPLE_TYPE_MASK (0x3<<22) + +#define D0_TYPE_SHIFT 19 +/* Allow: T, S */ +#define D0_NR_SHIFT 14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X (1<<10) +#define D0_CHANNEL_Y (2<<10) +#define D0_CHANNEL_Z (4<<10) +#define D0_CHANNEL_W (8<<10) +#define D0_CHANNEL_ALL (0xf<<10) +#define D0_CHANNEL_NONE (0<<10) + +#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations. + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ 0 +#define D2_MBZ 0 + +/* p207. + * The DWORD count is 3 times the number of bits set in MS1_MAPMASK_MASK + */ +#define _3DSTATE_MAP_STATE (CMD_3D|(0x1d<<24)|(0x0<<16)) + +#define MS1_MAPMASK_SHIFT 0 +#define MS1_MAPMASK_MASK (0x8fff<<0) + +#define MS2_UNTRUSTED_SURFACE (1<<31) +#define MS2_ADDRESS_MASK 0xfffffffc +#define MS2_VERTICAL_LINE_STRIDE (1<<1) +#define MS2_VERTICAL_OFFSET (1<<1) + +#define MS3_HEIGHT_SHIFT 21 +#define MS3_WIDTH_SHIFT 10 +#define MS3_PALETTE_SELECT (1<<9) +#define MS3_MAPSURF_FORMAT_SHIFT 7 +#define MS3_MAPSURF_FORMAT_MASK (0x7<<7) +#define MAPSURF_8BIT (1<<7) +#define MAPSURF_16BIT (2<<7) +#define MAPSURF_32BIT (3<<7) +#define MAPSURF_422 (5<<7) +#define MAPSURF_COMPRESSED (6<<7) +#define MAPSURF_4BIT_INDEXED (7<<7) +#define MS3_MT_FORMAT_MASK (0x7 << 3) +#define MS3_MT_FORMAT_SHIFT 3 +#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ +#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ +#define MT_8BIT_L8 (1<<3) +#define MT_8BIT_A8 (4<<3) +#define MT_8BIT_MONO8 (5<<3) +#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ +#define MT_16BIT_ARGB1555 (1<<3) +#define MT_16BIT_ARGB4444 (2<<3) +#define MT_16BIT_AY88 (3<<3) +#define MT_16BIT_88DVDU (5<<3) +#define MT_16BIT_BUMP_655LDVDU (6<<3) +#define MT_16BIT_I16 (7<<3) +#define MT_16BIT_L16 (8<<3) +#define MT_16BIT_A16 (9<<3) +#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ +#define MT_32BIT_ABGR8888 (1<<3) +#define MT_32BIT_XRGB8888 (2<<3) +#define MT_32BIT_XBGR8888 (3<<3) +#define MT_32BIT_QWVU8888 (4<<3) +#define MT_32BIT_AXVU8888 (5<<3) +#define MT_32BIT_LXVU8888 (6<<3) +#define MT_32BIT_XLVU8888 (7<<3) +#define MT_32BIT_ARGB2101010 (8<<3) +#define MT_32BIT_ABGR2101010 (9<<3) +#define MT_32BIT_AWVU2101010 (0xA<<3) +#define MT_32BIT_GR1616 (0xB<<3) +#define MT_32BIT_VU1616 (0xC<<3) +#define MT_32BIT_xI824 (0xD<<3) +#define MT_32BIT_xA824 (0xE<<3) +#define MT_32BIT_xL824 (0xF<<3) +#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ +#define MT_422_YCRCB_NORMAL (1<<3) +#define MT_422_YCRCB_SWAPUV (2<<3) +#define MT_422_YCRCB_SWAPUVY (3<<3) +#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ +#define MT_COMPRESS_DXT2_3 (1<<3) +#define MT_COMPRESS_DXT4_5 (2<<3) +#define MT_COMPRESS_FXT1 (3<<3) +#define MT_COMPRESS_DXT1_RGB (4<<3) +#define MS3_USE_FENCE_REGS (1<<2) +#define MS3_TILED_SURFACE (1<<1) +#define MS3_TILE_WALK (1<<0) + +/* The pitch is the pitch measured in DWORDS, minus 1 */ +#define MS4_PITCH_SHIFT 21 +#define MS4_CUBE_FACE_ENA_NEGX (1<<20) +#define MS4_CUBE_FACE_ENA_POSX (1<<19) +#define MS4_CUBE_FACE_ENA_NEGY (1<<18) +#define MS4_CUBE_FACE_ENA_POSY (1<<17) +#define MS4_CUBE_FACE_ENA_NEGZ (1<<16) +#define MS4_CUBE_FACE_ENA_POSZ (1<<15) +#define MS4_CUBE_FACE_ENA_MASK (0x3f<<15) +#define MS4_MAX_LOD_SHIFT 9 +#define MS4_MAX_LOD_MASK (0x3f<<9) +#define MS4_MIP_LAYOUT_LEGACY (0<<8) +#define MS4_MIP_LAYOUT_BELOW_LPT (0<<8) +#define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8) +#define MS4_VOLUME_DEPTH_SHIFT 0 +#define MS4_VOLUME_DEPTH_MASK (0xff<<0) + +/* p244. + * The DWORD count is 3 times the number of bits set in SS1_MAPMASK_MASK. + */ +#define _3DSTATE_SAMPLER_STATE (CMD_3D|(0x1d<<24)|(0x1<<16)) + +#define SS1_MAPMASK_SHIFT 0 +#define SS1_MAPMASK_MASK (0x8fff<<0) + +#define SS2_REVERSE_GAMMA_ENABLE (1<<31) +#define SS2_PACKED_TO_PLANAR_ENABLE (1<<30) +#define SS2_COLORSPACE_CONVERSION (1<<29) +#define SS2_CHROMAKEY_SHIFT 27 +#define SS2_BASE_MIP_LEVEL_SHIFT 22 +#define SS2_BASE_MIP_LEVEL_MASK (0x1f<<22) +#define SS2_MIP_FILTER_SHIFT 20 +#define SS2_MIP_FILTER_MASK (0x3<<20) +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 +#define SS2_MAG_FILTER_SHIFT 17 +#define SS2_MAG_FILTER_MASK (0x7<<17) +#define FILTER_NEAREST 0 +#define FILTER_LINEAR 1 +#define FILTER_ANISOTROPIC 2 +#define FILTER_4X4_1 3 +#define FILTER_4X4_2 4 +#define FILTER_4X4_FLAT 5 +#define FILTER_6X5_MONO 6 /* XXX - check */ +#define SS2_MIN_FILTER_SHIFT 14 +#define SS2_MIN_FILTER_MASK (0x7<<14) +#define SS2_LOD_BIAS_SHIFT 5 +#define SS2_LOD_BIAS_ONE (0x10<<5) +#define SS2_LOD_BIAS_MASK (0x1ff<<5) +/* Shadow requires: + * MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format + * FILTER_4X4_x MIN and MAG filters + */ +#define SS2_SHADOW_ENABLE (1<<4) +#define SS2_MAX_ANISO_MASK (1<<3) +#define SS2_MAX_ANISO_2 (0<<3) +#define SS2_MAX_ANISO_4 (1<<3) +#define SS2_SHADOW_FUNC_SHIFT 0 +#define SS2_SHADOW_FUNC_MASK (0x7<<0) +/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */ + +#define SS3_MIN_LOD_SHIFT 24 +#define SS3_MIN_LOD_ONE (0x10<<24) +#define SS3_MIN_LOD_MASK (0xff<<24) +#define SS3_KILL_PIXEL_ENABLE (1<<17) +#define SS3_TCX_ADDR_MODE_SHIFT 12 +#define SS3_TCX_ADDR_MODE_MASK (0x7<<12) +#define TEXCOORDMODE_WRAP 0 +#define TEXCOORDMODE_MIRROR 1 +#define TEXCOORDMODE_CLAMP_EDGE 2 +#define TEXCOORDMODE_CUBE 3 +#define TEXCOORDMODE_CLAMP_BORDER 4 +#define TEXCOORDMODE_MIRROR_ONCE 5 +#define SS3_TCY_ADDR_MODE_SHIFT 9 +#define SS3_TCY_ADDR_MODE_MASK (0x7<<9) +#define SS3_TCZ_ADDR_MODE_SHIFT 6 +#define SS3_TCZ_ADDR_MODE_MASK (0x7<<6) +#define SS3_NORMALIZED_COORDS (1<<5) +#define SS3_TEXTUREMAP_INDEX_SHIFT 1 +#define SS3_TEXTUREMAP_INDEX_MASK (0xf<<1) +#define SS3_DEINTERLACER_ENABLE (1<<0) + +#define SS4_BORDER_COLOR_MASK (~0) + +/* 3DSTATE_SPAN_STIPPLE, p258 + */ +#define _3DSTATE_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) +#define ST1_ENABLE (1<<16) +#define ST1_MASK (0xffff) + +#define FLUSH_MAP_CACHE (1<<0) +#define FLUSH_RENDER_CACHE (1<<1) + +#endif diff --git a/src/uxa/i915_render.c b/src/uxa/i915_render.c new file mode 100644 index 00000000..6d3400e7 --- /dev/null +++ b/src/uxa/i915_render.c @@ -0,0 +1,1010 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Eric Anholt <eric@anholt.net> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "xf86.h" +#include "intel.h" +#include "i915_reg.h" +#include "i915_3d.h" + +struct formatinfo { + int fmt; + uint32_t card_fmt; +}; + +struct blendinfo { + Bool dst_alpha; + Bool src_alpha; + uint32_t src_blend; + uint32_t dst_blend; +}; + +static struct blendinfo i915_blend_op[] = { + /* Clear */ + {0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO}, + /* Src */ + {0, 0, BLENDFACT_ONE, BLENDFACT_ZERO}, + /* Dst */ + {0, 0, BLENDFACT_ZERO, BLENDFACT_ONE}, + /* Over */ + {0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA}, + /* OverReverse */ + {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE}, + /* In */ + {1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO}, + /* InReverse */ + {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA}, + /* Out */ + {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO}, + /* OutReverse */ + {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA}, + /* Atop */ + {1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA}, + /* AtopReverse */ + {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA}, + /* Xor */ + {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA}, + /* Add */ + {0, 0, BLENDFACT_ONE, BLENDFACT_ONE}, +}; + +static struct formatinfo i915_tex_formats[] = { + {PICT_a8, MAPSURF_8BIT | MT_8BIT_A8}, + {PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888}, + {PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888}, + {PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888}, + {PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888}, +#if XORG_VERSION_CURRENT >= 10699900 + {PICT_a2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010}, + {PICT_a2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010}, +#endif + {PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565}, + {PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555}, + {PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444}, +}; + +static uint32_t i915_get_blend_cntl(int op, PicturePtr mask, + uint32_t dst_format) +{ + uint32_t sblend, dblend; + + sblend = i915_blend_op[op].src_blend; + dblend = i915_blend_op[op].dst_blend; + + /* If there's no dst alpha channel, adjust the blend op so that we'll + * treat it as always 1. + */ + if (PICT_FORMAT_A(dst_format) == 0 && i915_blend_op[op].dst_alpha) { + if (sblend == BLENDFACT_DST_ALPHA) + sblend = BLENDFACT_ONE; + else if (sblend == BLENDFACT_INV_DST_ALPHA) + sblend = BLENDFACT_ZERO; + } + + /* i915 engine reads 8bit color buffer into green channel in cases + like color buffer blending .etc, and also writes back green channel. + So with dst_alpha blend we should use color factor. See spec on + "8-bit rendering" */ + if ((dst_format == PICT_a8) && i915_blend_op[op].dst_alpha) { + if (sblend == BLENDFACT_DST_ALPHA) + sblend = BLENDFACT_DST_COLR; + else if (sblend == BLENDFACT_INV_DST_ALPHA) + sblend = BLENDFACT_INV_DST_COLR; + } + + /* If the source alpha is being used, then we should only be in a case + * where the source blend factor is 0, and the source blend value is the + * mask channels multiplied by the source picture's alpha. + */ + if (mask && mask->componentAlpha && PICT_FORMAT_RGB(mask->format) && + i915_blend_op[op].src_alpha) { + if (dblend == BLENDFACT_SRC_ALPHA) { + dblend = BLENDFACT_SRC_COLR; + } else if (dblend == BLENDFACT_INV_SRC_ALPHA) { + dblend = BLENDFACT_INV_SRC_COLR; + } + } + + return S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE | + (BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) | + (sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT) | + (dblend << S6_CBUF_DST_BLEND_FACT_SHIFT); +} + +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) + +static Bool i915_get_dest_format(PicturePtr dest_picture, uint32_t * dst_format) +{ + ScrnInfoPtr scrn; + + switch (dest_picture->format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + *dst_format = COLR_BUF_ARGB8888; + break; + case PICT_r5g6b5: + *dst_format = COLR_BUF_RGB565; + break; + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + *dst_format = COLR_BUF_ARGB1555; + break; +#if XORG_VERSION_CURRENT >= 10699900 + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + *dst_format = COLR_BUF_ARGB2AAA; + break; +#endif + case PICT_a8: + *dst_format = COLR_BUF_8BIT; + break; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + *dst_format = COLR_BUF_ARGB4444; + break; + default: + scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen); + intel_debug_fallback(scrn, + "Unsupported dest format 0x%x\n", + (int)dest_picture->format); + return FALSE; + } + *dst_format |= DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8); + return TRUE; +} + +Bool +i915_check_composite(int op, + PicturePtr source_picture, + PicturePtr mask_picture, + PicturePtr dest_picture, + int width, int height) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen); + uint32_t tmp1; + + /* Check for unsupported compositing operations. */ + if (op >= sizeof(i915_blend_op) / sizeof(i915_blend_op[0])) { + intel_debug_fallback(scrn, "Unsupported Composite op 0x%x\n", + op); + return FALSE; + } + if (mask_picture != NULL && mask_picture->componentAlpha && + PICT_FORMAT_RGB(mask_picture->format)) { + /* Check if it's component alpha that relies on a source alpha + * and on the source value. We can only get one of those + * into the single source value that we get to blend with. + */ + if (i915_blend_op[op].src_alpha && + (i915_blend_op[op].src_blend != BLENDFACT_ZERO)) { + if (op != PictOpOver) { + intel_debug_fallback(scrn, + "Component alpha not supported " + "with source alpha and source " + "value blending.\n"); + return FALSE; + } + } + } + + if (!i915_get_dest_format(dest_picture, &tmp1)) { + intel_debug_fallback(scrn, "Get Color buffer format\n"); + return FALSE; + } + + if (width > 2048 || height > 2048) + return FALSE; + + return TRUE; +} + +Bool +i915_check_composite_target(PixmapPtr pixmap) +{ + if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048) + return FALSE; + + if(!intel_check_pitch_3d(pixmap)) + return FALSE; + + return TRUE; +} + +Bool +i915_check_composite_texture(ScreenPtr screen, PicturePtr picture) +{ + if (picture->repeatType > RepeatReflect) { + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_debug_fallback(scrn, "Unsupported picture repeat %d\n", + picture->repeatType); + return FALSE; + } + + if (picture->filter != PictFilterNearest && + picture->filter != PictFilterBilinear) { + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_debug_fallback(scrn, "Unsupported filter 0x%x\n", + picture->filter); + return FALSE; + } + + if (picture->pSourcePict) + return FALSE; + + if (picture->pDrawable) { + int w, h, i; + + w = picture->pDrawable->width; + h = picture->pDrawable->height; + if ((w > 2048) || (h > 2048)) { + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_debug_fallback(scrn, + "Picture w/h too large (%dx%d)\n", + w, h); + return FALSE; + } + + for (i = 0; + i < sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0]); + i++) { + if (i915_tex_formats[i].fmt == picture->format) + break; + } + if (i == sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0])) + { + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_debug_fallback(scrn, "Unsupported picture format " + "0x%x\n", + (int)picture->format); + return FALSE; + } + + return TRUE; + } + + return FALSE; +} + +static Bool i915_texture_setup(PicturePtr picture, PixmapPtr pixmap, int unit) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(picture->pDrawable->pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + uint32_t format, pitch, filter; + uint32_t wrap_mode, tiling_bits; + int i; + + pitch = intel_pixmap_pitch(pixmap); + intel->scale_units[unit][0] = 1. / pixmap->drawable.width; + intel->scale_units[unit][1] = 1. / pixmap->drawable.height; + + for (i = 0; i < sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0]); + i++) { + if (i915_tex_formats[i].fmt == picture->format) + break; + } + if (i == sizeof(i915_tex_formats) / sizeof(i915_tex_formats[0])) { + intel_debug_fallback(scrn, "unknown texture format\n"); + return FALSE; + } + format = i915_tex_formats[i].card_fmt; + + switch (picture->repeatType) { + case RepeatNone: + wrap_mode = TEXCOORDMODE_CLAMP_BORDER; + break; + case RepeatNormal: + wrap_mode = TEXCOORDMODE_WRAP; + break; + case RepeatPad: + wrap_mode = TEXCOORDMODE_CLAMP_EDGE; + break; + case RepeatReflect: + wrap_mode = TEXCOORDMODE_MIRROR; + break; + default: + FatalError("Unknown repeat type %d\n", picture->repeatType); + } + + switch (picture->filter) { + case PictFilterNearest: + filter = (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT) | + (FILTER_NEAREST << SS2_MIN_FILTER_SHIFT); + break; + case PictFilterBilinear: + filter = (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | + (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT); + break; + default: + intel_debug_fallback(scrn, "Bad filter 0x%x\n", + picture->filter); + return FALSE; + } + + /* offset filled in at emit time */ + if (intel_pixmap_tiled(pixmap)) { + tiling_bits = MS3_TILED_SURFACE; + if (intel_get_pixmap_private(pixmap)->tiling + == I915_TILING_Y) + tiling_bits |= MS3_TILE_WALK; + } else + tiling_bits = 0; + + intel->texture[unit] = pixmap; + intel->mapstate[unit * 3 + 0] = 0; + intel->mapstate[unit * 3 + 1] = format | + tiling_bits | + ((pixmap->drawable.height - 1) << MS3_HEIGHT_SHIFT) | + ((pixmap->drawable.width - 1) << MS3_WIDTH_SHIFT); + intel->mapstate[unit * 3 + 2] = ((pitch / 4) - 1) << MS4_PITCH_SHIFT; + + intel->samplerstate[unit * 3 + 0] = (MIPFILTER_NONE << + SS2_MIP_FILTER_SHIFT); + intel->samplerstate[unit * 3 + 0] |= filter; + intel->samplerstate[unit * 3 + 1] = SS3_NORMALIZED_COORDS; + intel->samplerstate[unit * 3 + 1] |= + wrap_mode << SS3_TCX_ADDR_MODE_SHIFT; + intel->samplerstate[unit * 3 + 1] |= + wrap_mode << SS3_TCY_ADDR_MODE_SHIFT; + intel->samplerstate[unit * 3 + 1] |= unit << SS3_TEXTUREMAP_INDEX_SHIFT; + intel->samplerstate[unit * 3 + 2] = 0x00000000; /* border color */ + + intel->transform[unit] = picture->transform; + + return TRUE; +} + +static void +i915_emit_composite_primitive_identity_source(intel_screen_private *intel, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) +{ + OUT_VERTEX(dstX + w); + OUT_VERTEX(dstY + h); + OUT_VERTEX((srcX + w) * intel->scale_units[0][0]); + OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY + h); + OUT_VERTEX(srcX * intel->scale_units[0][0]); + OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); + OUT_VERTEX(srcX * intel->scale_units[0][0]); + OUT_VERTEX(srcY * intel->scale_units[0][1]); +} + +static void +i915_emit_composite_primitive_affine_source(intel_screen_private *intel, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) +{ + float src_x[3], src_y[3]; + + if (!intel_get_transformed_coordinates(srcX, srcY, + intel->transform[0], + &src_x[0], + &src_y[0])) + return; + + if (!intel_get_transformed_coordinates(srcX, srcY + h, + intel->transform[0], + &src_x[1], + &src_y[1])) + return; + + if (!intel_get_transformed_coordinates(srcX + w, srcY + h, + intel->transform[0], + &src_x[2], + &src_y[2])) + return; + + OUT_VERTEX(dstX + w); + OUT_VERTEX(dstY + h); + OUT_VERTEX(src_x[2] * intel->scale_units[0][0]); + OUT_VERTEX(src_y[2] * intel->scale_units[0][1]); + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY + h); + OUT_VERTEX(src_x[1] * intel->scale_units[0][0]); + OUT_VERTEX(src_y[1] * intel->scale_units[0][1]); + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); + OUT_VERTEX(src_x[0] * intel->scale_units[0][0]); + OUT_VERTEX(src_y[0] * intel->scale_units[0][1]); +} + +static void +i915_emit_composite_primitive_identity_source_mask(intel_screen_private *intel, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) +{ + OUT_VERTEX(dstX + w); + OUT_VERTEX(dstY + h); + OUT_VERTEX((srcX + w) * intel->scale_units[0][0]); + OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); + OUT_VERTEX((maskX + w) * intel->scale_units[1][0]); + OUT_VERTEX((maskY + h) * intel->scale_units[1][1]); + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY + h); + OUT_VERTEX(srcX * intel->scale_units[0][0]); + OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); + OUT_VERTEX(maskX * intel->scale_units[1][0]); + OUT_VERTEX((maskY + h) * intel->scale_units[1][1]); + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); + OUT_VERTEX(srcX * intel->scale_units[0][0]); + OUT_VERTEX(srcY * intel->scale_units[0][1]); + OUT_VERTEX(maskX * intel->scale_units[1][0]); + OUT_VERTEX(maskY * intel->scale_units[1][1]); +} + +static void +i915_emit_composite_primitive(intel_screen_private *intel, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) +{ + Bool is_affine_src = TRUE, is_affine_mask = TRUE; + int tex_unit = 0; + int src_unit = -1, mask_unit = -1; + float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; + + src_unit = tex_unit++; + + is_affine_src = intel_transform_is_affine(intel->transform[src_unit]); + if (is_affine_src) { + if (!intel_get_transformed_coordinates(srcX, srcY, + intel-> + transform[src_unit], + &src_x[0], + &src_y[0])) + return; + + if (!intel_get_transformed_coordinates(srcX, srcY + h, + intel-> + transform[src_unit], + &src_x[1], + &src_y[1])) + return; + + if (!intel_get_transformed_coordinates(srcX + w, srcY + h, + intel-> + transform[src_unit], + &src_x[2], + &src_y[2])) + return; + } else { + if (!intel_get_transformed_coordinates_3d(srcX, srcY, + intel-> + transform[src_unit], + &src_x[0], + &src_y[0], + &src_w[0])) + return; + + if (!intel_get_transformed_coordinates_3d(srcX, srcY + h, + intel-> + transform[src_unit], + &src_x[1], + &src_y[1], + &src_w[1])) + return; + + if (!intel_get_transformed_coordinates_3d(srcX + w, srcY + h, + intel-> + transform[src_unit], + &src_x[2], + &src_y[2], + &src_w[2])) + return; + } + + if (intel->render_mask) { + mask_unit = tex_unit++; + + is_affine_mask = intel_transform_is_affine(intel->transform[mask_unit]); + if (is_affine_mask) { + if (!intel_get_transformed_coordinates(maskX, maskY, + intel-> + transform[mask_unit], + &mask_x[0], + &mask_y[0])) + return; + + if (!intel_get_transformed_coordinates(maskX, maskY + h, + intel-> + transform[mask_unit], + &mask_x[1], + &mask_y[1])) + return; + + if (!intel_get_transformed_coordinates(maskX + w, maskY + h, + intel-> + transform[mask_unit], + &mask_x[2], + &mask_y[2])) + return; + } else { + if (!intel_get_transformed_coordinates_3d(maskX, maskY, + intel-> + transform[mask_unit], + &mask_x[0], + &mask_y[0], + &mask_w[0])) + return; + + if (!intel_get_transformed_coordinates_3d(maskX, maskY + h, + intel-> + transform[mask_unit], + &mask_x[1], + &mask_y[1], + &mask_w[1])) + return; + + if (!intel_get_transformed_coordinates_3d(maskX + w, maskY + h, + intel-> + transform[mask_unit], + &mask_x[2], + &mask_y[2], + &mask_w[2])) + return; + } + } + + OUT_VERTEX(dstX + w); + OUT_VERTEX(dstY + h); + OUT_VERTEX(src_x[2] * intel->scale_units[src_unit][0]); + OUT_VERTEX(src_y[2] * intel->scale_units[src_unit][1]); + if (!is_affine_src) { + OUT_VERTEX(0.0); + OUT_VERTEX(src_w[2]); + } + if (intel->render_mask) { + OUT_VERTEX(mask_x[2] * intel->scale_units[mask_unit][0]); + OUT_VERTEX(mask_y[2] * intel->scale_units[mask_unit][1]); + if (!is_affine_mask) { + OUT_VERTEX(0.0); + OUT_VERTEX(mask_w[2]); + } + } + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY + h); + OUT_VERTEX(src_x[1] * intel->scale_units[src_unit][0]); + OUT_VERTEX(src_y[1] * intel->scale_units[src_unit][1]); + if (!is_affine_src) { + OUT_VERTEX(0.0); + OUT_VERTEX(src_w[1]); + } + if (intel->render_mask) { + OUT_VERTEX(mask_x[1] * intel->scale_units[mask_unit][0]); + OUT_VERTEX(mask_y[1] * intel->scale_units[mask_unit][1]); + if (!is_affine_mask) { + OUT_VERTEX(0.0); + OUT_VERTEX(mask_w[1]); + } + } + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); + OUT_VERTEX(src_x[0] * intel->scale_units[src_unit][0]); + OUT_VERTEX(src_y[0] * intel->scale_units[src_unit][1]); + if (!is_affine_src) { + OUT_VERTEX(0.0); + OUT_VERTEX(src_w[0]); + } + if (intel->render_mask) { + OUT_VERTEX(mask_x[0] * intel->scale_units[mask_unit][0]); + OUT_VERTEX(mask_y[0] * intel->scale_units[mask_unit][1]); + if (!is_affine_mask) { + OUT_VERTEX(0.0); + OUT_VERTEX(mask_w[0]); + } + } +} + +Bool +i915_prepare_composite(int op, PicturePtr source_picture, + PicturePtr mask_picture, PicturePtr dest_picture, + PixmapPtr source, PixmapPtr mask, PixmapPtr dest) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + drm_intel_bo *bo_table[] = { + NULL, /* batch_bo */ + intel_get_pixmap_bo(dest), + intel_get_pixmap_bo(source), + mask ? intel_get_pixmap_bo(mask) : NULL, + }; + int tex_unit = 0; + int floats_per_vertex; + + intel->render_source_picture = source_picture; + intel->render_source = source; + intel->render_mask_picture = mask_picture; + intel->render_mask = mask; + intel->render_dest_picture = dest_picture; + intel->render_dest = dest; + + if (!intel_check_pitch_3d(source)) + return FALSE; + + if (mask && !intel_check_pitch_3d(mask)) + return FALSE; + + if (!intel_check_pitch_3d(dest)) + return FALSE; + + if (!i915_get_dest_format(dest_picture, + &intel->i915_render_state.dst_format)) + return FALSE; + + if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table))) + return FALSE; + + if (mask_picture != NULL && mask_picture->componentAlpha && + PICT_FORMAT_RGB(mask_picture->format)) { + /* Check if it's component alpha that relies on a source alpha + * and on the source value. We can only get one of those + * into the single source value that we get to blend with. + */ + if (i915_blend_op[op].src_alpha && + (i915_blend_op[op].src_blend != BLENDFACT_ZERO)) + return FALSE; + } + + intel->transform[0] = NULL; + intel->scale_units[0][0] = -1; + intel->scale_units[0][1] = -1; + intel->transform[1] = NULL; + intel->scale_units[1][0] = -1; + intel->scale_units[1][1] = -1; + + floats_per_vertex = 2; /* dest x/y */ + if (!i915_texture_setup(source_picture, source, tex_unit++)) { + intel_debug_fallback(scrn, "fail to setup src texture\n"); + return FALSE; + } + + if (intel_transform_is_affine(source_picture->transform)) + floats_per_vertex += 2; /* src x/y */ + else + floats_per_vertex += 4; /* src x/y/z/w */ + + if (mask_picture != NULL) { + assert(mask != NULL); + if (!i915_texture_setup(mask_picture, mask, tex_unit++)) { + intel_debug_fallback(scrn, + "fail to setup mask texture\n"); + return FALSE; + } + + if (intel_transform_is_affine(mask_picture->transform)) + floats_per_vertex += 2; /* mask x/y */ + else + floats_per_vertex += 4; /* mask x/y/z/w */ + } + + intel->i915_render_state.op = op; + + if (intel_pixmap_is_dirty(source) || intel_pixmap_is_dirty(mask)) + intel_batch_emit_flush(scrn); + + intel->needs_render_state_emit = TRUE; + + intel->prim_emit = i915_emit_composite_primitive; + if (!mask) { + if (intel->transform[0] == NULL) + intel->prim_emit = i915_emit_composite_primitive_identity_source; + else if (intel_transform_is_affine(intel->transform[0])) + intel->prim_emit = i915_emit_composite_primitive_affine_source; + } else { + if (intel->transform[0] == NULL) { + if (intel->transform[1] == NULL) + intel->prim_emit = i915_emit_composite_primitive_identity_source_mask; + } + } + + if (floats_per_vertex != intel->floats_per_vertex) { + intel->floats_per_vertex = floats_per_vertex; + intel->needs_render_vertex_emit = TRUE; + } + + return TRUE; +} + +static void +i915_composite_emit_shader(intel_screen_private *intel, CARD8 op) +{ + PicturePtr mask_picture = intel->render_mask_picture; + PixmapPtr mask = intel->render_mask; + int src_reg, mask_reg; + Bool dest_is_alpha = PIXMAN_FORMAT_RGB(intel->render_dest_picture->format) == 0; + FS_LOCALS(); + + FS_BEGIN(); + + /* Declare the registers necessary for our program. */ + i915_fs_dcl(FS_T0); + i915_fs_dcl(FS_S0); + if (!mask) { + /* No mask, so load directly to output color */ + if (dest_is_alpha) + src_reg = FS_R0; + else + src_reg = FS_OC; + + if (intel_transform_is_affine(intel->transform[0])) + i915_fs_texld(src_reg, FS_S0, FS_T0); + else + i915_fs_texldp(src_reg, FS_S0, FS_T0); + + if (src_reg != FS_OC) + i915_fs_mov(FS_OC, i915_fs_operand(src_reg, W, W, W, W)); + } else { + i915_fs_dcl(FS_T1); + i915_fs_dcl(FS_S1); + + /* Load the source_picture texel */ + if (intel_transform_is_affine(intel->transform[0])) + i915_fs_texld(FS_R0, FS_S0, FS_T0); + else + i915_fs_texldp(FS_R0, FS_S0, FS_T0); + + src_reg = FS_R0; + + /* Load the mask_picture texel */ + if (intel_transform_is_affine(intel->transform[1])) + i915_fs_texld(FS_R1, FS_S1, FS_T1); + else + i915_fs_texldp(FS_R1, FS_S1, FS_T1); + + mask_reg = FS_R1; + + if (dest_is_alpha) { + i915_fs_mul(FS_OC, + i915_fs_operand(src_reg, W, W, W, W), + i915_fs_operand(mask_reg, W, W, W, W)); + } else { + /* If component alpha is active in the mask and the blend + * operation uses the source alpha, then we know we don't + * need the source value (otherwise we would have hit a + * fallback earlier), so we provide the source alpha (src.A * + * mask.X) as output color. + * Conversely, if CA is set and we don't need the source alpha, + * then we produce the source value (src.X * mask.X) and the + * source alpha is unused. Otherwise, we provide the non-CA + * source value (src.X * mask.A). + */ + if (mask_picture->componentAlpha && + PICT_FORMAT_RGB(mask_picture->format)) { + if (i915_blend_op[op].src_alpha) { + i915_fs_mul(FS_OC, + i915_fs_operand(src_reg, W, W, W, W), + i915_fs_operand_reg(mask_reg)); + } else { + i915_fs_mul(FS_OC, + i915_fs_operand_reg(src_reg), + i915_fs_operand_reg(mask_reg)); + } + } else { + i915_fs_mul(FS_OC, + i915_fs_operand_reg(src_reg), + i915_fs_operand(mask_reg, W, W, W, W)); + } + } + } + + FS_END(); +} + +static void i915_emit_composite_setup(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + int op = intel->i915_render_state.op; + PicturePtr mask_picture = intel->render_mask_picture; + PicturePtr dest_picture = intel->render_dest_picture; + PixmapPtr mask = intel->render_mask; + PixmapPtr dest = intel->render_dest; + int tex_count, t; + + intel->needs_render_state_emit = FALSE; + + IntelEmitInvarientState(scrn); + intel->last_3d = LAST_3D_RENDER; + + tex_count = 1 + (mask != NULL); + + assert(intel->in_batch_atomic); + + if (tex_count != 0) { + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count)); + OUT_BATCH((1 << tex_count) - 1); + for (t = 0; t < tex_count; t++) { + OUT_RELOC_PIXMAP(intel->texture[t], I915_GEM_DOMAIN_SAMPLER, 0, 0); + OUT_BATCH(intel->mapstate[3*t + 1]); + OUT_BATCH(intel->mapstate[3*t + 2]); + } + + OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count)); + OUT_BATCH((1 << tex_count) - 1); + for (t = 0; t < tex_count; t++) { + OUT_BATCH(intel->samplerstate[3*t + 0]); + OUT_BATCH(intel->samplerstate[3*t + 1]); + OUT_BATCH(intel->samplerstate[3*t + 2]); + } + } + + /* BUF_INFO is an implicit flush, so avoid if the target has not changed. + * XXX However for reasons unfathomed, correct rendering in KDE requires + * at least a MI_FLUSH | INHIBIT_RENDER_CACHE_FLUSH here. + */ + if (1) { + uint32_t tiling_bits; + + if (intel_pixmap_tiled(dest)) { + tiling_bits = BUF_3D_TILED_SURFACE; + if (intel_get_pixmap_private(dest)->tiling + == I915_TILING_Y) + tiling_bits |= BUF_3D_TILE_WALK_Y; + } else + tiling_bits = 0; + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits | + BUF_3D_PITCH(intel_pixmap_pitch(dest))); + OUT_RELOC_PIXMAP(dest, I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(intel->i915_render_state.dst_format); + + /* draw rect is unconditional */ + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(0x00000000); + OUT_BATCH(0x00000000); /* ymin, xmin */ + OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) | + DRAW_XMAX(dest->drawable.width - 1)); + /* yorig, xorig (relate to color buffer?) */ + OUT_BATCH(0x00000000); + } + + { + uint32_t ss2; + + ss2 = ~0; + ss2 &= ~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT); + ss2 |= S2_TEXCOORD_FMT(0, + intel_transform_is_affine(intel->transform[0]) ? + TEXCOORDFMT_2D : TEXCOORDFMT_4D); + if (mask) { + ss2 &= ~S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT); + ss2 |= S2_TEXCOORD_FMT(1, + intel_transform_is_affine(intel->transform[1]) ? + TEXCOORDFMT_2D : TEXCOORDFMT_4D); + } + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1); + OUT_BATCH(ss2); + OUT_BATCH(i915_get_blend_cntl(op, mask_picture, dest_picture->format)); + } + + i915_composite_emit_shader(intel, op); +} + +void +i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, + int dstX, int dstY, int w, int h) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(dest->drawable.pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + + /* 28 + 16 + 10 + 20 + 32 + 16 */ + intel_batch_start_atomic(scrn, 150); + + if (intel->needs_render_state_emit) + i915_emit_composite_setup(scrn); + + if (intel->needs_render_vertex_emit || + intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) { + i915_vertex_flush(intel); + + if (intel_vertex_space(intel) < 256) { + intel_next_vertex(intel); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(0) | I1_LOAD_S(1) | 1); + OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); + OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) | + (intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT)); + intel->vertex_index = 0; + } else if (intel->floats_per_vertex != intel->last_floats_per_vertex){ + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(1) | 0); + OUT_BATCH((intel->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) | + (intel->floats_per_vertex << S1_VERTEX_PITCH_SHIFT)); + + intel->vertex_index = + (intel->vertex_used + intel->floats_per_vertex - 1) / intel->floats_per_vertex; + intel->vertex_used = intel->vertex_index * intel->floats_per_vertex; + } + + intel->last_floats_per_vertex = intel->floats_per_vertex; + intel->needs_render_vertex_emit = FALSE; + } + + if (intel->prim_offset == 0) { + intel->prim_offset = intel->batch_used; + OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL); + OUT_BATCH(intel->vertex_index); + } + intel->vertex_count += 3; + + intel->prim_emit(intel, + srcX, srcY, + maskX, maskY, + dstX, dstY, + w, h); + + intel_batch_end_atomic(scrn); +} + +void +i915_vertex_flush(intel_screen_private *intel) +{ + if (intel->prim_offset == 0) + return; + + intel->batch_ptr[intel->prim_offset] |= intel->vertex_count; + intel->prim_offset = 0; + + intel->vertex_index += intel->vertex_count; + intel->vertex_count = 0; +} + +void +i915_batch_commit_notify(intel_screen_private *intel) +{ + intel->needs_render_state_emit = TRUE; + intel->last_floats_per_vertex = 0; +} diff --git a/src/uxa/i915_video.c b/src/uxa/i915_video.c new file mode 100644 index 00000000..ae2e6bb5 --- /dev/null +++ b/src/uxa/i915_video.c @@ -0,0 +1,486 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "xf86.h" +#include "xf86_OSproc.h" +#include "xf86xv.h" +#include "fourcc.h" +#include "gcstruct.h" + +#include "intel.h" +#include "intel_video.h" +#include "i915_reg.h" +#include "i915_3d.h" + +void +I915DisplayVideoTextured(ScrnInfoPtr scrn, + intel_adaptor_private *adaptor_priv, int id, + RegionPtr dstRegion, + short width, short height, int video_pitch, + int video_pitch2, + short src_w, short src_h, short drw_w, short drw_h, + PixmapPtr pixmap) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + uint32_t format, ms3, s5, tiling; + BoxPtr pbox = REGION_RECTS(dstRegion); + int nbox_total = REGION_NUM_RECTS(dstRegion); + int nbox_this_time; + int dxo, dyo, pix_xoff, pix_yoff; + PixmapPtr target; + +#if 0 + ErrorF("I915DisplayVideo: %dx%d (pitch %d)\n", width, height, + video_pitch); +#endif + + dxo = dstRegion->extents.x1; + dyo = dstRegion->extents.y1; + + if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048 || + !intel_check_pitch_3d(pixmap)) { + ScreenPtr screen = pixmap->drawable.pScreen; + + target = screen->CreatePixmap(screen, + dstRegion->extents.x2 - dxo, + dstRegion->extents.y2 - dyo, + pixmap->drawable.depth, + CREATE_PIXMAP_USAGE_SCRATCH); + if (target == NULL) + return; + + pix_xoff = -dxo; + pix_yoff = -dyo; + } else { + target = pixmap; + + /* Set up the offset for translating from the given region + * (in screen coordinates) to the backing pixmap. + */ +#ifdef COMPOSITE + pix_xoff = -target->screen_x + target->drawable.x; + pix_yoff = -target->screen_y + target->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + } + +#define BYTES_FOR_BOXES(n) ((200 + (n) * 20) * 4) +#define BOXES_IN_BYTES(s) ((((s)/4) - 200) / 20) +#define BATCH_BYTES(p) ((p)->batch_bo->size - 16) + + while (nbox_total) { + nbox_this_time = nbox_total; + if (BYTES_FOR_BOXES(nbox_this_time) > BATCH_BYTES(intel)) + nbox_this_time = BOXES_IN_BYTES(BATCH_BYTES(intel)); + nbox_total -= nbox_this_time; + + intel_batch_start_atomic(scrn, 200 + 20 * nbox_this_time); + + IntelEmitInvarientState(scrn); + intel->last_3d = LAST_3D_VIDEO; + + /* draw rect -- just clipping */ + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(DRAW_DITHER_OFS_X(pixmap->drawable.x & 3) | + DRAW_DITHER_OFS_Y(pixmap->drawable.y & 3)); + OUT_BATCH(0x00000000); /* ymin, xmin */ + /* ymax, xmax */ + OUT_BATCH((target->drawable.width - 1) | + (target->drawable.height - 1) << 16); + OUT_BATCH(0x00000000); /* yorigin, xorigin */ + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | + I1_LOAD_S(5) | I1_LOAD_S(6) | 2); + OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) | + S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT)); + s5 = 0x0; + if (intel->cpp == 2) + s5 |= S5_COLOR_DITHER_ENABLE; + OUT_BATCH(s5); /* S5 - enable bits */ + OUT_BATCH((2 << S6_DEPTH_TEST_FUNC_SHIFT) | + (2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) | + (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) | + S6_COLOR_WRITE_ENABLE | (2 << S6_TRISTRIP_PV_SHIFT)); + + OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD); + OUT_BATCH(0x00000000); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + if (intel->cpp == 2) + format = COLR_BUF_RGB565; + else + format = + COLR_BUF_ARGB8888 | DEPTH_FRMT_24_FIXED_8_OTHER; + + OUT_BATCH(LOD_PRECLAMP_OGL | + DSTORG_HORT_BIAS(0x8) | + DSTORG_VERT_BIAS(0x8) | format); + + /* front buffer, pitch, offset */ + if (intel_pixmap_tiled(target)) { + tiling = BUF_3D_TILED_SURFACE; + if (intel_get_pixmap_private(target)->tiling == I915_TILING_Y) + tiling |= BUF_3D_TILE_WALK_Y; + } else + tiling = 0; + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling | + BUF_3D_PITCH(intel_pixmap_pitch(target))); + OUT_RELOC_PIXMAP(target, I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + + if (!is_planar_fourcc(id)) { + FS_LOCALS(); + + OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4); + OUT_BATCH(0x0000001); /* constant 0 */ + /* constant 0: brightness/contrast */ + OUT_BATCH_F(adaptor_priv->brightness / 128.0); + OUT_BATCH_F(adaptor_priv->contrast / 255.0); + OUT_BATCH_F(0.0); + OUT_BATCH_F(0.0); + + OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3); + OUT_BATCH(0x00000001); + OUT_BATCH(SS2_COLORSPACE_CONVERSION | + (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | + (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); + OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << + SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_CLAMP_EDGE << + SS3_TCY_ADDR_MODE_SHIFT) | + (0 << SS3_TEXTUREMAP_INDEX_SHIFT) | + SS3_NORMALIZED_COORDS); + OUT_BATCH(0x00000000); + + OUT_BATCH(_3DSTATE_MAP_STATE | 3); + OUT_BATCH(0x00000001); /* texture map #1 */ + if (adaptor_priv->buf) + OUT_RELOC(adaptor_priv->buf, + I915_GEM_DOMAIN_SAMPLER, 0, + adaptor_priv->YBufOffset); + else + OUT_BATCH(adaptor_priv->YBufOffset); + + ms3 = MAPSURF_422; + switch (id) { + case FOURCC_YUY2: + ms3 |= MT_422_YCRCB_NORMAL; + break; + case FOURCC_UYVY: + ms3 |= MT_422_YCRCB_SWAPY; + break; + } + ms3 |= (height - 1) << MS3_HEIGHT_SHIFT; + ms3 |= (width - 1) << MS3_WIDTH_SHIFT; + OUT_BATCH(ms3); + OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT); + + FS_BEGIN(); + i915_fs_dcl(FS_S0); + i915_fs_dcl(FS_T0); + i915_fs_texld(FS_OC, FS_S0, FS_T0); + if (adaptor_priv->brightness != 0) { + i915_fs_add(FS_OC, + i915_fs_operand_reg(FS_OC), + i915_fs_operand(FS_C0, X, X, X, + ZERO)); + } + FS_END(); + } else { + FS_LOCALS(); + + /* For the planar formats, we set up three samplers -- + * one for each plane, in a Y8 format. Because I + * couldn't get the special PLANAR_TO_PACKED + * shader setup to work, I did the manual pixel shader: + * + * y' = y - .0625 + * u' = u - .5 + * v' = v - .5; + * + * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' + * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' + * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' + * + * register assignment: + * r0 = (y',u',v',0) + * r1 = (y,y,y,y) + * r2 = (u,u,u,u) + * r3 = (v,v,v,v) + * OC = (r,g,b,1) + */ + OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2)); + OUT_BATCH(0x000001f); /* constants 0-4 */ + /* constant 0: normalization offsets */ + OUT_BATCH_F(-0.0625); + OUT_BATCH_F(-0.5); + OUT_BATCH_F(-0.5); + OUT_BATCH_F(0.0); + /* constant 1: r coefficients */ + OUT_BATCH_F(1.1643); + OUT_BATCH_F(0.0); + OUT_BATCH_F(1.5958); + OUT_BATCH_F(0.0); + /* constant 2: g coefficients */ + OUT_BATCH_F(1.1643); + OUT_BATCH_F(-0.39173); + OUT_BATCH_F(-0.81290); + OUT_BATCH_F(0.0); + /* constant 3: b coefficients */ + OUT_BATCH_F(1.1643); + OUT_BATCH_F(2.017); + OUT_BATCH_F(0.0); + OUT_BATCH_F(0.0); + /* constant 4: brightness/contrast */ + OUT_BATCH_F(adaptor_priv->brightness / 128.0); + OUT_BATCH_F(adaptor_priv->contrast / 255.0); + OUT_BATCH_F(0.0); + OUT_BATCH_F(0.0); + + OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9); + OUT_BATCH(0x00000007); + /* sampler 0 */ + OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | + (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); + OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << + SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_CLAMP_EDGE << + SS3_TCY_ADDR_MODE_SHIFT) | + (0 << SS3_TEXTUREMAP_INDEX_SHIFT) | + SS3_NORMALIZED_COORDS); + OUT_BATCH(0x00000000); + /* sampler 1 */ + OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | + (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); + OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << + SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_CLAMP_EDGE << + SS3_TCY_ADDR_MODE_SHIFT) | + (1 << SS3_TEXTUREMAP_INDEX_SHIFT) | + SS3_NORMALIZED_COORDS); + OUT_BATCH(0x00000000); + /* sampler 2 */ + OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | + (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); + OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << + SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_CLAMP_EDGE << + SS3_TCY_ADDR_MODE_SHIFT) | + (2 << SS3_TEXTUREMAP_INDEX_SHIFT) | + SS3_NORMALIZED_COORDS); + OUT_BATCH(0x00000000); + + OUT_BATCH(_3DSTATE_MAP_STATE | 9); + OUT_BATCH(0x00000007); + + if (adaptor_priv->buf) + OUT_RELOC(adaptor_priv->buf, + I915_GEM_DOMAIN_SAMPLER, 0, + adaptor_priv->YBufOffset); + else + OUT_BATCH(adaptor_priv->YBufOffset); + + ms3 = MAPSURF_8BIT | MT_8BIT_I8; + ms3 |= (height - 1) << MS3_HEIGHT_SHIFT; + ms3 |= (width - 1) << MS3_WIDTH_SHIFT; + OUT_BATCH(ms3); + /* check to see if Y has special pitch than normal + * double u/v pitch, e.g i915 XvMC hw requires at + * least 1K alignment, so Y pitch might + * be same as U/V's.*/ + if (video_pitch2) + OUT_BATCH(((video_pitch2 / 4) - + 1) << MS4_PITCH_SHIFT); + else + OUT_BATCH(((video_pitch * 2 / 4) - + 1) << MS4_PITCH_SHIFT); + + if (adaptor_priv->buf) + OUT_RELOC(adaptor_priv->buf, + I915_GEM_DOMAIN_SAMPLER, 0, + adaptor_priv->UBufOffset); + else + OUT_BATCH(adaptor_priv->UBufOffset); + + ms3 = MAPSURF_8BIT | MT_8BIT_I8; + ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT; + ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT; + OUT_BATCH(ms3); + OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT); + + if (adaptor_priv->buf) + OUT_RELOC(adaptor_priv->buf, + I915_GEM_DOMAIN_SAMPLER, 0, + adaptor_priv->VBufOffset); + else + OUT_BATCH(adaptor_priv->VBufOffset); + + ms3 = MAPSURF_8BIT | MT_8BIT_I8; + ms3 |= (height / 2 - 1) << MS3_HEIGHT_SHIFT; + ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT; + OUT_BATCH(ms3); + OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT); + + FS_BEGIN(); + /* Declare samplers */ + i915_fs_dcl(FS_S0); /* Y */ + i915_fs_dcl(FS_S1); /* U */ + i915_fs_dcl(FS_S2); /* V */ + i915_fs_dcl(FS_T0); /* normalized coords */ + + /* Load samplers to temporaries. */ + i915_fs_texld(FS_R1, FS_S0, FS_T0); + i915_fs_texld(FS_R2, FS_S1, FS_T0); + i915_fs_texld(FS_R3, FS_S2, FS_T0); + + /* Move the sampled YUV data in R[123] to the first + * 3 channels of R0. + */ + i915_fs_mov_masked(FS_R0, MASK_X, + i915_fs_operand_reg(FS_R1)); + i915_fs_mov_masked(FS_R0, MASK_Y, + i915_fs_operand_reg(FS_R2)); + i915_fs_mov_masked(FS_R0, MASK_Z, + i915_fs_operand_reg(FS_R3)); + + /* Normalize the YUV data */ + i915_fs_add(FS_R0, i915_fs_operand_reg(FS_R0), + i915_fs_operand_reg(FS_C0)); + /* dot-product the YUV data in R0 by the vectors of + * coefficients for calculating R, G, and B, storing + * the results in the R, G, or B channels of the output + * color. The OC results are implicitly clamped + * at the end of the program. + */ + i915_fs_dp3(FS_OC, MASK_X, + i915_fs_operand_reg(FS_R0), + i915_fs_operand_reg(FS_C1)); + i915_fs_dp3(FS_OC, MASK_Y, + i915_fs_operand_reg(FS_R0), + i915_fs_operand_reg(FS_C2)); + i915_fs_dp3(FS_OC, MASK_Z, + i915_fs_operand_reg(FS_R0), + i915_fs_operand_reg(FS_C3)); + /* Set alpha of the output to 1.0, by wiring W to 1 + * and not actually using the source. + */ + i915_fs_mov_masked(FS_OC, MASK_W, + i915_fs_operand_one()); + + if (adaptor_priv->brightness != 0) { + i915_fs_add(FS_OC, + i915_fs_operand_reg(FS_OC), + i915_fs_operand(FS_C4, X, X, X, + ZERO)); + } + FS_END(); + } + + OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1)); + while (nbox_this_time--) { + int box_x1 = pbox->x1; + int box_y1 = pbox->y1; + int box_x2 = pbox->x2; + int box_y2 = pbox->y2; + float src_scale_x, src_scale_y; + + pbox++; + + src_scale_x = ((float)src_w / width) / drw_w; + src_scale_y = ((float)src_h / height) / drw_h; + + /* vertex data - rect list consists of bottom right, + * bottom left, and top left vertices. + */ + + /* bottom right */ + OUT_BATCH_F(box_x2 + pix_xoff); + OUT_BATCH_F(box_y2 + pix_yoff); + OUT_BATCH_F((box_x2 - dxo) * src_scale_x); + OUT_BATCH_F((box_y2 - dyo) * src_scale_y); + + /* bottom left */ + OUT_BATCH_F(box_x1 + pix_xoff); + OUT_BATCH_F(box_y2 + pix_yoff); + OUT_BATCH_F((box_x1 - dxo) * src_scale_x); + OUT_BATCH_F((box_y2 - dyo) * src_scale_y); + + /* top left */ + OUT_BATCH_F(box_x1 + pix_xoff); + OUT_BATCH_F(box_y1 + pix_yoff); + OUT_BATCH_F((box_x1 - dxo) * src_scale_x); + OUT_BATCH_F((box_y1 - dyo) * src_scale_y); + } + + intel_batch_end_atomic(scrn); + } + + if (target != pixmap) { + GCPtr gc; + + gc = GetScratchGC(pixmap->drawable.depth, + pixmap->drawable.pScreen); + if (gc) { + gc->subWindowMode = ClipByChildren; + + if (REGION_NUM_RECTS(dstRegion) > 1) { + RegionPtr tmp; + + tmp = REGION_CREATE(pixmap->drawable.pScreen, NULL, 0); + if (tmp) { + REGION_COPY(pixmap->drawable.pScreen, tmp, dstRegion); + gc->funcs->ChangeClip(gc, CT_REGION, tmp, 0); + } + } + + ValidateGC(&pixmap->drawable, gc); + gc->ops->CopyArea(&target->drawable, &pixmap->drawable, gc, + 0, 0, + target->drawable.width, + target->drawable.height, + -pix_xoff, -pix_yoff); + FreeScratchGC(gc); + } + + target->drawable.pScreen->DestroyPixmap(target); + } + + intel_debug_flush(scrn); +} diff --git a/src/uxa/i965_3d.c b/src/uxa/i965_3d.c new file mode 100644 index 00000000..fe2d9aa6 --- /dev/null +++ b/src/uxa/i965_3d.c @@ -0,0 +1,443 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <string.h> + +#include "intel.h" +#include "i965_reg.h" +#include "brw_defines.h" +#include "brw_structs.h" + +void +gen6_upload_invariant_states(intel_screen_private *intel) +{ + Bool ivb = INTEL_INFO(intel)->gen >= 070; + + OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH | + BRW_PIPE_CONTROL_WC_FLUSH | + BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH | + BRW_PIPE_CONTROL_NOWRITE); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + + OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); + + OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | ((ivb ? 4 : 3) - 2)); + OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | + GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ + OUT_BATCH(0); + if (ivb) + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); + OUT_BATCH(1); + + /* Set system instruction pointer */ + OUT_BATCH(BRW_STATE_SIP | 0); + OUT_BATCH(0); +} + +void +gen6_upload_viewport_state_pointers(intel_screen_private *intel, + drm_intel_bo *cc_vp_bo) +{ + OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS | + GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC | + (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_RELOC(cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); +} + +void +gen7_upload_viewport_state_pointers(intel_screen_private *intel, + drm_intel_bo *cc_vp_bo) +{ + OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2)); + OUT_RELOC(cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + + OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2)); + OUT_BATCH(0); +} + +void +gen6_upload_urb(intel_screen_private *intel) +{ + OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2)); + OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | + (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ + OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | + (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */ +} + +/* + * URB layout on GEN7 + * ---------------------------------------- + * | PS Push Constants (8KB) | VS entries | + * ---------------------------------------- + */ +void +gen7_upload_urb(intel_screen_private *intel) +{ + unsigned int num_urb_entries = 32; + + if (IS_HSW(intel)) + num_urb_entries = 64; + + OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); + OUT_BATCH(8); /* in 1KBs */ + + OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2)); + OUT_BATCH( + (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) | + (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | + (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + + OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2)); + OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (1 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + + OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2)); + OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + + OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2)); + OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); +} + +void +gen6_upload_cc_state_pointers(intel_screen_private *intel, + drm_intel_bo *blend_bo, + drm_intel_bo *cc_bo, + drm_intel_bo *depth_stencil_bo, + uint32_t blend_offset) +{ + OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); + if (blend_bo) + OUT_RELOC(blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + blend_offset | 1); + else + OUT_BATCH(0); + + if (depth_stencil_bo) + OUT_RELOC(depth_stencil_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + else + OUT_BATCH(0); + + if (cc_bo) + OUT_RELOC(cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + else + OUT_BATCH(0); +} + +void +gen7_upload_cc_state_pointers(intel_screen_private *intel, + drm_intel_bo *blend_bo, + drm_intel_bo *cc_bo, + drm_intel_bo *depth_stencil_bo, + uint32_t blend_offset) +{ + OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2)); + if (blend_bo) + OUT_RELOC(blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + blend_offset | 1); + else + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2)); + if (cc_bo) + OUT_RELOC(cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + else + OUT_BATCH(0); + + OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2)); + if (depth_stencil_bo) + OUT_RELOC(depth_stencil_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + else + OUT_BATCH(0); +} + +void +gen6_upload_sampler_state_pointers(intel_screen_private *intel, + drm_intel_bo *sampler_bo) +{ + OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS | + GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS | + (4 - 2)); + OUT_BATCH(0); /* VS */ + OUT_BATCH(0); /* GS */ + OUT_RELOC(sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); +} + +void +gen7_upload_sampler_state_pointers(intel_screen_private *intel, + drm_intel_bo *sampler_bo) +{ + OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2)); + OUT_RELOC(sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); +} + +void +gen7_upload_bypass_states(intel_screen_private *intel) +{ + /* bypass GS */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2)); + OUT_BATCH(0); /* without GS kernel */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ + + OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2)); + OUT_BATCH(0); + + /* disable HS */ + OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2)); + OUT_BATCH(0); + + /* Disable TE */ + OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + /* Disable DS */ + OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2)); + OUT_BATCH(0); + + /* Disable STREAMOUT */ + OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); +} + +void +gen6_upload_vs_state(intel_screen_private *intel) +{ + Bool ivb = INTEL_INFO(intel)->gen >= 070; + /* disable VS constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | ((ivb ? 7 : 5) - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + if (ivb) { + OUT_BATCH(0); + OUT_BATCH(0); + } + + OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2)); + OUT_BATCH(0); /* without VS kernel */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ +} + +void +gen6_upload_gs_state(intel_screen_private *intel) +{ + /* disable GS constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2)); + OUT_BATCH(0); /* without GS kernel */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ +} + +void +gen6_upload_clip_state(intel_screen_private *intel) +{ + OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ + OUT_BATCH(0); +} + +void +gen6_upload_sf_state(intel_screen_private *intel, + int num_sf_outputs, + int read_offset) +{ + OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2)); + OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) | + (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) | + (read_offset << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH(0); + OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE); + OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW9 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW14 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW19 */ +} + +void +gen7_upload_sf_state(intel_screen_private *intel, + int num_sf_outputs, + int read_offset) +{ + OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2)); + OUT_BATCH((num_sf_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT) | + (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) | + (read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW4 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW9 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_SF | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE); + OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); +} + +void +gen6_upload_binding_table(intel_screen_private *intel, + uint32_t ps_binding_table_offset) +{ + /* Binding table pointers */ + OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | + GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS | + (4 - 2)); + OUT_BATCH(0); /* VS */ + OUT_BATCH(0); /* GS */ + /* Only the PS uses the binding table */ + OUT_BATCH(ps_binding_table_offset); +} + +void +gen7_upload_binding_table(intel_screen_private *intel, + uint32_t ps_binding_table_offset) +{ + OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); + OUT_BATCH(ps_binding_table_offset); +} + +void +gen6_upload_depth_buffer_state(intel_screen_private *intel) +{ + OUT_BATCH(BRW_3DSTATE_DEPTH_BUFFER | (7 - 2)); + OUT_BATCH((BRW_SURFACE_NULL << BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT) | + (BRW_DEPTHFORMAT_D32_FLOAT << BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(BRW_3DSTATE_CLEAR_PARAMS | (2 - 2)); + OUT_BATCH(0); +} + +void +gen7_upload_depth_buffer_state(intel_screen_private *intel) +{ + OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2)); + OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | (BRW_SURFACE_NULL << 29)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); +} diff --git a/src/uxa/i965_reg.h b/src/uxa/i965_reg.h new file mode 100644 index 00000000..4bb5e4d2 --- /dev/null +++ b/src/uxa/i965_reg.h @@ -0,0 +1,476 @@ +/* + * New regs for broadwater -- we need to split this file up sensibly somehow. + */ +#define BRW_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \ + ((Pipeline) << 27) | \ + ((Opcode) << 24) | \ + ((Subopcode) << 16)) + +#define BRW_URB_FENCE BRW_3D(0, 0, 0) +#define BRW_CS_URB_STATE BRW_3D(0, 0, 1) +#define BRW_CONSTANT_BUFFER BRW_3D(0, 0, 2) +#define BRW_STATE_PREFETCH BRW_3D(0, 0, 3) + +#define BRW_STATE_BASE_ADDRESS BRW_3D(0, 1, 1) +#define BRW_STATE_SIP BRW_3D(0, 1, 2) +#define BRW_PIPELINE_SELECT BRW_3D(0, 1, 4) + +#define NEW_PIPELINE_SELECT BRW_3D(1, 1, 4) + +#define BRW_MEDIA_STATE_POINTERS BRW_3D(2, 0, 0) +#define BRW_MEDIA_OBJECT BRW_3D(2, 1, 0) + +#define BRW_3DSTATE_PIPELINED_POINTERS BRW_3D(3, 0, 0) +#define BRW_3DSTATE_BINDING_TABLE_POINTERS BRW_3D(3, 0, 1) +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */ +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */ +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */ + +#define BRW_3DSTATE_VERTEX_BUFFERS BRW_3D(3, 0, 8) +#define BRW_3DSTATE_VERTEX_ELEMENTS BRW_3D(3, 0, 9) +#define BRW_3DSTATE_INDEX_BUFFER BRW_3D(3, 0, 0xa) +#define BRW_3DSTATE_VF_STATISTICS BRW_3D(3, 0, 0xb) + +#define BRW_3DSTATE_DRAWING_RECTANGLE BRW_3D(3, 1, 0) +#define BRW_3DSTATE_CONSTANT_COLOR BRW_3D(3, 1, 1) +#define BRW_3DSTATE_SAMPLER_PALETTE_LOAD BRW_3D(3, 1, 2) +#define BRW_3DSTATE_CHROMA_KEY BRW_3D(3, 1, 4) +#define BRW_3DSTATE_DEPTH_BUFFER BRW_3D(3, 1, 5) +# define BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29 +# define BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18 + +#define BRW_3DSTATE_POLY_STIPPLE_OFFSET BRW_3D(3, 1, 6) +#define BRW_3DSTATE_POLY_STIPPLE_PATTERN BRW_3D(3, 1, 7) +#define BRW_3DSTATE_LINE_STIPPLE BRW_3D(3, 1, 8) +#define BRW_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP BRW_3D(3, 1, 9) +/* These two are BLC and CTG only, not BW or CL */ +#define BRW_3DSTATE_AA_LINE_PARAMS BRW_3D(3, 1, 0xa) +#define BRW_3DSTATE_GS_SVB_INDEX BRW_3D(3, 1, 0xb) + +#define BRW_PIPE_CONTROL BRW_3D(3, 2, 0) + +#define BRW_3DPRIMITIVE BRW_3D(3, 3, 0) + +#define BRW_3DSTATE_CLEAR_PARAMS BRW_3D(3, 1, 0x10) +/* DW1 */ +# define BRW_3DSTATE_DEPTH_CLEAR_VALID (1 << 15) + +/* for GEN6+ */ +#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS BRW_3D(3, 0, 0x02) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8) + +#define GEN6_3DSTATE_URB BRW_3D(3, 0, 0x05) +/* DW1 */ +# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16 +# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0 +/* DW2 */ +# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8 +# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0 + +#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS BRW_3D(3, 0, 0x0d) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10) + +#define GEN6_3DSTATE_CC_STATE_POINTERS BRW_3D(3, 0, 0x0e) + +#define GEN6_3DSTATE_VS BRW_3D(3, 0, 0x10) + +#define GEN6_3DSTATE_GS BRW_3D(3, 0, 0x11) +/* DW4 */ +# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0 + +#define GEN6_3DSTATE_CLIP BRW_3D(3, 0, 0x12) + +#define GEN6_3DSTATE_SF BRW_3D(3, 0, 0x13) +/* DW1 */ +# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +/* DW3 */ +# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29) +# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29) +# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29) +# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29) +/* DW4 */ +# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25 + + +#define GEN6_3DSTATE_WM BRW_3D(3, 0, 0x14) +/* DW2 */ +# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27 +# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16 +/* DW5 */ +# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25 +# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) + + +#define GEN6_3DSTATE_CONSTANT_VS BRW_3D(3, 0, 0x15) +#define GEN6_3DSTATE_CONSTANT_GS BRW_3D(3, 0, 0x16) +#define GEN6_3DSTATE_CONSTANT_PS BRW_3D(3, 0, 0x17) + +#define GEN6_3DSTATE_SAMPLE_MASK BRW_3D(3, 0, 0x18) + +#define GEN6_3DSTATE_MULTISAMPLE BRW_3D(3, 1, 0x0d) +/* DW1 */ +# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4) +# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1) + +/* on GEN7+ */ +/* _3DSTATE_VERTEX_BUFFERS on GEN7*/ +/* DW1 */ +#define GEN7_VB0_ADDRESS_MODIFYENABLE (1 << 14) + +/* _3DPRIMITIVE on GEN7 */ +/* DW1 */ +# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8) +# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8) + +/* 3DSTATE_WM on GEN7 */ +/* DW1 */ +# define GEN7_WM_STATISTICS_ENABLE (1 << 31) +# define GEN7_WM_DEPTH_CLEAR (1 << 30) +# define GEN7_WM_DISPATCH_ENABLE (1 << 29) +# define GEN6_WM_DEPTH_RESOLVE (1 << 28) +# define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) +# define GEN7_WM_KILL_ENABLE (1 << 25) +# define GEN7_WM_PSCDEPTH_OFF (0 << 23) +# define GEN7_WM_PSCDEPTH_ON (1 << 23) +# define GEN7_WM_PSCDEPTH_ON_GE (2 << 23) +# define GEN7_WM_PSCDEPTH_ON_LE (3 << 23) +# define GEN7_WM_USES_SOURCE_DEPTH (1 << 20) +# define GEN7_WM_USES_SOURCE_W (1 << 19) +# define GEN7_WM_POSITION_ZW_PIXEL (0 << 17) +# define GEN7_WM_POSITION_ZW_CENTROID (2 << 17) +# define GEN7_WM_POSITION_ZW_SAMPLE (3 << 17) +# define GEN7_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 16) +# define GEN7_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 15) +# define GEN7_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 14) +# define GEN7_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 13) +# define GEN7_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 12) +# define GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 11) +# define GEN7_WM_USES_INPUT_COVERAGE_MASK (1 << 10) +# define GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8) +# define GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8) +# define GEN7_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8) +# define GEN7_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8) +# define GEN7_WM_LINE_AA_WIDTH_0_5 (0 << 6) +# define GEN7_WM_LINE_AA_WIDTH_1_0 (1 << 6) +# define GEN7_WM_LINE_AA_WIDTH_2_0 (2 << 6) +# define GEN7_WM_LINE_AA_WIDTH_4_0 (3 << 6) +# define GEN7_WM_POLYGON_STIPPLE_ENABLE (1 << 4) +# define GEN7_WM_LINE_STIPPLE_ENABLE (1 << 3) +# define GEN7_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2) +# define GEN7_WM_MSRAST_OFF_PIXEL (0 << 0) +# define GEN7_WM_MSRAST_OFF_PATTERN (1 << 0) +# define GEN7_WM_MSRAST_ON_PIXEL (2 << 0) +# define GEN7_WM_MSRAST_ON_PATTERN (3 << 0) +/* DW2 */ +# define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31) + +#define GEN7_3DSTATE_CLEAR_PARAMS BRW_3D(3, 0, 0x04) +#define GEN7_3DSTATE_DEPTH_BUFFER BRW_3D(3, 0, 0x05) + +#define GEN7_3DSTATE_CONSTANT_HS BRW_3D(3, 0, 0x19) +#define GEN7_3DSTATE_CONSTANT_DS BRW_3D(3, 0, 0x1a) + +#define GEN7_3DSTATE_HS BRW_3D(3, 0, 0x1b) +#define GEN7_3DSTATE_TE BRW_3D(3, 0, 0x1c) +#define GEN7_3DSTATE_DS BRW_3D(3, 0, 0x1d) +#define GEN7_3DSTATE_STREAMOUT BRW_3D(3, 0, 0x1e) +#define GEN7_3DSTATE_SBE BRW_3D(3, 0, 0x1f) + +/* DW1 */ +# define GEN7_SBE_SWIZZLE_CONTROL_MODE (1 << 28) +# define GEN7_SBE_NUM_OUTPUTS_SHIFT 22 +# define GEN7_SBE_SWIZZLE_ENABLE (1 << 21) +# define GEN7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20) +# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4 + +#define GEN7_3DSTATE_PS BRW_3D(3, 0, 0x20) +/* DW1: kernel pointer */ +/* DW2 */ +# define GEN7_PS_SPF_MODE (1 << 31) +# define GEN7_PS_VECTOR_MASK_ENABLE (1 << 30) +# define GEN7_PS_SAMPLER_COUNT_SHIFT 27 +# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN7_PS_FLOATING_POINT_MODE_ALT (1 << 16) +/* DW3: scratch space */ +/* DW4 */ +# define GEN7_PS_MAX_THREADS_SHIFT_IVB 24 +# define GEN7_PS_MAX_THREADS_SHIFT_HSW 23 +# define GEN7_PS_SAMPLE_MASK_SHIFT_HSW 12 +# define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11) +# define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10) +# define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9) +# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7) +# define GEN7_PS_POSOFFSET_NONE (0 << 3) +# define GEN7_PS_POSOFFSET_CENTROID (2 << 3) +# define GEN7_PS_POSOFFSET_SAMPLE (3 << 3) +# define GEN7_PS_32_DISPATCH_ENABLE (1 << 2) +# define GEN7_PS_16_DISPATCH_ENABLE (1 << 1) +# define GEN7_PS_8_DISPATCH_ENABLE (1 << 0) +/* DW5 */ +# define GEN7_PS_DISPATCH_START_GRF_SHIFT_0 16 +# define GEN7_PS_DISPATCH_START_GRF_SHIFT_1 8 +# define GEN7_PS_DISPATCH_START_GRF_SHIFT_2 0 +/* DW6: kernel 1 pointer */ +/* DW7: kernel 2 pointer */ + +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL BRW_3D(3, 0, 0x21) +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC BRW_3D(3, 0, 0x23) + +#define GEN7_3DSTATE_BLEND_STATE_POINTERS BRW_3D(3, 0, 0x24) +#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS BRW_3D(3, 0, 0x25) + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS BRW_3D(3, 0, 0x26) +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS BRW_3D(3, 0, 0x27) +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS BRW_3D(3, 0, 0x28) +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS BRW_3D(3, 0, 0x29) +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS BRW_3D(3, 0, 0x2a) + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS BRW_3D(3, 0, 0x2b) +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS BRW_3D(3, 0, 0x2e) +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS BRW_3D(3, 0, 0x2f) + +#define GEN7_3DSTATE_URB_VS BRW_3D(3, 0, 0x30) +#define GEN7_3DSTATE_URB_HS BRW_3D(3, 0, 0x31) +#define GEN7_3DSTATE_URB_DS BRW_3D(3, 0, 0x32) +#define GEN7_3DSTATE_URB_GS BRW_3D(3, 0, 0x33) +/* DW1 */ +# define GEN7_URB_ENTRY_NUMBER_SHIFT 0 +# define GEN7_URB_ENTRY_SIZE_SHIFT 16 +# define GEN7_URB_STARTING_ADDRESS_SHIFT 25 + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS BRW_3D(3, 1, 0x12) +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS BRW_3D(3, 1, 0x16) +/* DW1 */ +# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 + + +#define PIPELINE_SELECT_3D 0 +#define PIPELINE_SELECT_MEDIA 1 + +#define UF0_CS_REALLOC (1 << 13) +#define UF0_VFE_REALLOC (1 << 12) +#define UF0_SF_REALLOC (1 << 11) +#define UF0_CLIP_REALLOC (1 << 10) +#define UF0_GS_REALLOC (1 << 9) +#define UF0_VS_REALLOC (1 << 8) +#define UF1_CLIP_FENCE_SHIFT 20 +#define UF1_GS_FENCE_SHIFT 10 +#define UF1_VS_FENCE_SHIFT 0 +#define UF2_CS_FENCE_SHIFT 20 +#define UF2_VFE_FENCE_SHIFT 10 +#define UF2_SF_FENCE_SHIFT 0 + +/* for BRW_STATE_BASE_ADDRESS */ +#define BASE_ADDRESS_MODIFY (1 << 0) + +/* for BRW_3DSTATE_PIPELINED_POINTERS */ +#define BRW_GS_DISABLE 0 +#define BRW_GS_ENABLE 1 +#define BRW_CLIP_DISABLE 0 +#define BRW_CLIP_ENABLE 1 + +/* for BRW_PIPE_CONTROL */ +#define BRW_PIPE_CONTROL_CS_STALL (1 << 20) +#define BRW_PIPE_CONTROL_NOWRITE (0 << 14) +#define BRW_PIPE_CONTROL_WRITE_QWORD (1 << 14) +#define BRW_PIPE_CONTROL_WRITE_DEPTH (2 << 14) +#define BRW_PIPE_CONTROL_WRITE_TIME (3 << 14) +#define BRW_PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define BRW_PIPE_CONTROL_WC_FLUSH (1 << 12) +#define BRW_PIPE_CONTROL_IS_FLUSH (1 << 11) +#define BRW_PIPE_CONTROL_TC_FLUSH (1 << 10) +#define BRW_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define BRW_PIPE_CONTROL_GLOBAL_GTT (1 << 2) +#define BRW_PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define BRW_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) +#define BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) + +/* VERTEX_BUFFER_STATE Structure */ +#define VB0_BUFFER_INDEX_SHIFT 27 +#define GEN6_VB0_BUFFER_INDEX_SHIFT 26 +#define VB0_VERTEXDATA (0 << 26) +#define VB0_INSTANCEDATA (1 << 26) +#define GEN6_VB0_VERTEXDATA (0 << 20) +#define GEN6_VB0_INSTANCEDATA (1 << 20) +#define VB0_BUFFER_PITCH_SHIFT 0 + +/* VERTEX_ELEMENT_STATE Structure */ +#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27 +#define GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */ +#define VE0_VALID (1 << 26) +#define GEN6_VE0_VALID (1 << 25) /* for GEN6 */ +#define VE0_FORMAT_SHIFT 16 +#define VE0_OFFSET_SHIFT 0 +#define VE1_VFCOMPONENT_0_SHIFT 28 +#define VE1_VFCOMPONENT_1_SHIFT 24 +#define VE1_VFCOMPONENT_2_SHIFT 20 +#define VE1_VFCOMPONENT_3_SHIFT 16 +#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0 + +/* 3DPRIMITIVE bits */ +#define BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15) +#define BRW_3DPRIMITIVE_VERTEX_RANDOM (1 << 15) +/* Primitive types are in brw_defines.h */ +#define BRW_3DPRIMITIVE_TOPOLOGY_SHIFT 10 + +#define BRW_SVG_CTL 0x7400 + +#define BRW_SVG_CTL_GS_BA (0 << 8) +#define BRW_SVG_CTL_SS_BA (1 << 8) +#define BRW_SVG_CTL_IO_BA (2 << 8) +#define BRW_SVG_CTL_GS_AUB (3 << 8) +#define BRW_SVG_CTL_IO_AUB (4 << 8) +#define BRW_SVG_CTL_SIP (5 << 8) + +#define BRW_SVG_RDATA 0x7404 +#define BRW_SVG_WORK_CTL 0x7408 + +#define BRW_VF_CTL 0x7500 + +#define BRW_VF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define BRW_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8) +#define BRW_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8) +#define BRW_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4) +#define BRW_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4) +#define BRW_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3) +#define BRW_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2) +#define BRW_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1) +#define BRW_VF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define BRW_VF_STRG_VAL 0x7504 +#define BRW_VF_STR_VL_OVR 0x7508 +#define BRW_VF_VC_OVR 0x750c +#define BRW_VF_STR_PSKIP 0x7510 +#define BRW_VF_MAX_PRIM 0x7514 +#define BRW_VF_RDATA 0x7518 + +#define BRW_VS_CTL 0x7600 +#define BRW_VS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define BRW_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8) +#define BRW_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8) +#define BRW_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8) +#define BRW_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8) +#define BRW_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define BRW_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define BRW_VS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define BRW_VS_STRG_VAL 0x7604 +#define BRW_VS_RDATA 0x7608 + +#define BRW_SF_CTL 0x7b00 +#define BRW_SF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8) +#define BRW_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8) +#define BRW_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4) +#define BRW_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3) +#define BRW_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define BRW_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define BRW_SF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define BRW_SF_STRG_VAL 0x7b04 +#define BRW_SF_RDATA 0x7b18 + +#define BRW_WIZ_CTL 0x7c00 +#define BRW_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define BRW_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16 +#define BRW_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8) +#define BRW_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8) +#define BRW_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8) +#define BRW_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6) +#define BRW_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5) +#define BRW_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4) +#define BRW_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3) +#define BRW_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define BRW_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define BRW_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define BRW_WIZ_STRG_VAL 0x7c04 +#define BRW_WIZ_RDATA 0x7c18 + +#define BRW_TS_CTL 0x7e00 +#define BRW_TS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define BRW_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8) +#define BRW_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8) +#define BRW_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2) +#define BRW_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1) +#define BRW_TS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define BRW_TS_STRG_VAL 0x7e04 +#define BRW_TS_RDATA 0x7e08 + +#define BRW_TD_CTL 0x8000 +#define BRW_TD_CTL_MUX_SHIFT 8 +#define BRW_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7) +#define BRW_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6) +#define BRW_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5) +#define BRW_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4) +#define BRW_TD_CTL_BREAKPOINT_ENABLE (1 << 2) +#define BRW_TD_CTL2 0x8004 +#define BRW_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28) +#define BRW_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26) +#define BRW_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25) +#define BRW_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16 +#define BRW_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8) +#define BRW_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7) +#define BRW_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6) +#define BRW_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5) +#define BRW_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4) +#define BRW_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3) +#define BRW_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0) +#define BRW_TD_VF_VS_EMSK 0x8008 +#define BRW_TD_GS_EMSK 0x800c +#define BRW_TD_CLIP_EMSK 0x8010 +#define BRW_TD_SF_EMSK 0x8014 +#define BRW_TD_WIZ_EMSK 0x8018 +#define BRW_TD_0_6_EHTRG_VAL 0x801c +#define BRW_TD_0_7_EHTRG_VAL 0x8020 +#define BRW_TD_0_6_EHTRG_MSK 0x8024 +#define BRW_TD_0_7_EHTRG_MSK 0x8028 +#define BRW_TD_RDATA 0x802c +#define BRW_TD_TS_EMSK 0x8030 + +#define BRW_EU_CTL 0x8800 +#define BRW_EU_CTL_SELECT_SHIFT 16 +#define BRW_EU_CTL_DATA_MUX_SHIFT 8 +#define BRW_EU_ATT_0 0x8810 +#define BRW_EU_ATT_1 0x8814 +#define BRW_EU_ATT_DATA_0 0x8820 +#define BRW_EU_ATT_DATA_1 0x8824 +#define BRW_EU_ATT_CLR_0 0x8830 +#define BRW_EU_ATT_CLR_1 0x8834 +#define BRW_EU_RDATA 0x8840 + +/* End regs for broadwater */ + diff --git a/src/uxa/i965_render.c b/src/uxa/i965_render.c new file mode 100644 index 00000000..39698b0d --- /dev/null +++ b/src/uxa/i965_render.c @@ -0,0 +1,2962 @@ +/* + * Copyright © 2006,2008 Intel Corporation + * Copyright © 2007 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Eric Anholt <eric@anholt.net> + * Carl Worth <cworth@redhat.com> + * Keith Packard <keithp@keithp.com> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include "xf86.h" +#include "intel.h" +#include "i830_reg.h" +#include "i965_reg.h" + +/* bring in brw structs */ +#include "brw_defines.h" +#include "brw_structs.h" + +// refer vol2, 3d rasterization 3.8.1 + +/* defined in brw_defines.h */ +static const struct blendinfo { + Bool dst_alpha; + Bool src_alpha; + uint32_t src_blend; + uint32_t dst_blend; +} i965_blend_op[] = { + /* Clear */ + {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ZERO}, + /* Src */ + {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO}, + /* Dst */ + {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ONE}, + /* Over */ + {0, 1, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_INV_SRC_ALPHA}, + /* OverReverse */ + {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE}, + /* In */ + {1, 0, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, + /* InReverse */ + {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_SRC_ALPHA}, + /* Out */ + {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO}, + /* OutReverse */ + {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_INV_SRC_ALPHA}, + /* Atop */ + {1, 1, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, + /* AtopReverse */ + {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA}, + /* Xor */ + {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA}, + /* Add */ + {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ONE}, +}; + +/** + * Highest-valued BLENDFACTOR used in i965_blend_op. + * + * This leaves out BRW_BLENDFACTOR_INV_DST_COLOR, + * BRW_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, + * BRW_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} + */ +#define BRW_BLENDFACTOR_COUNT (BRW_BLENDFACTOR_INV_DST_ALPHA + 1) + +/* FIXME: surface format defined in brw_defines.h, shared Sampling engine + * 1.7.2 + */ +static const struct formatinfo { + int fmt; + uint32_t card_fmt; +} i965_tex_formats[] = { + {PICT_a8, BRW_SURFACEFORMAT_A8_UNORM}, + {PICT_a8r8g8b8, BRW_SURFACEFORMAT_B8G8R8A8_UNORM}, + {PICT_x8r8g8b8, BRW_SURFACEFORMAT_B8G8R8X8_UNORM}, + {PICT_a8b8g8r8, BRW_SURFACEFORMAT_R8G8B8A8_UNORM}, + {PICT_x8b8g8r8, BRW_SURFACEFORMAT_R8G8B8X8_UNORM}, + {PICT_r8g8b8, BRW_SURFACEFORMAT_R8G8B8_UNORM}, + {PICT_r5g6b5, BRW_SURFACEFORMAT_B5G6R5_UNORM}, + {PICT_a1r5g5b5, BRW_SURFACEFORMAT_B5G5R5A1_UNORM}, +#if XORG_VERSION_CURRENT >= 10699900 + {PICT_a2r10g10b10, BRW_SURFACEFORMAT_B10G10R10A2_UNORM}, + {PICT_x2r10g10b10, BRW_SURFACEFORMAT_B10G10R10X2_UNORM}, + {PICT_a2b10g10r10, BRW_SURFACEFORMAT_R10G10B10A2_UNORM}, + {PICT_x2r10g10b10, BRW_SURFACEFORMAT_B10G10R10X2_UNORM}, +#endif + {PICT_a4r4g4b4, BRW_SURFACEFORMAT_B4G4R4A4_UNORM}, +}; + +static void i965_get_blend_cntl(int op, PicturePtr mask, uint32_t dst_format, + uint32_t * sblend, uint32_t * dblend) +{ + + *sblend = i965_blend_op[op].src_blend; + *dblend = i965_blend_op[op].dst_blend; + + /* If there's no dst alpha channel, adjust the blend op so that we'll treat + * it as always 1. + */ + if (PICT_FORMAT_A(dst_format) == 0 && i965_blend_op[op].dst_alpha) { + if (*sblend == BRW_BLENDFACTOR_DST_ALPHA) + *sblend = BRW_BLENDFACTOR_ONE; + else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA) + *sblend = BRW_BLENDFACTOR_ZERO; + } + + /* If the source alpha is being used, then we should only be in a case where + * the source blend factor is 0, and the source blend value is the mask + * channels multiplied by the source picture's alpha. + */ + if (mask && mask->componentAlpha && PICT_FORMAT_RGB(mask->format) + && i965_blend_op[op].src_alpha) { + if (*dblend == BRW_BLENDFACTOR_SRC_ALPHA) { + *dblend = BRW_BLENDFACTOR_SRC_COLOR; + } else if (*dblend == BRW_BLENDFACTOR_INV_SRC_ALPHA) { + *dblend = BRW_BLENDFACTOR_INV_SRC_COLOR; + } + } + +} + +static uint32_t i965_get_dest_format(PicturePtr dest_picture) +{ + switch (dest_picture->format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; +#if XORG_VERSION_CURRENT >= 10699900 + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + return BRW_SURFACEFORMAT_B10G10R10A2_UNORM; +#endif + case PICT_r5g6b5: + return BRW_SURFACEFORMAT_B5G6R5_UNORM; + case PICT_x1r5g5b5: + case PICT_a1r5g5b5: + return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + case PICT_a8: + return BRW_SURFACEFORMAT_A8_UNORM; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + default: + return -1; + } +} + +Bool +i965_check_composite(int op, + PicturePtr source_picture, + PicturePtr mask_picture, + PicturePtr dest_picture, + int width, int height) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen); + + /* Check for unsupported compositing operations. */ + if (op >= sizeof(i965_blend_op) / sizeof(i965_blend_op[0])) { + intel_debug_fallback(scrn, + "Unsupported Composite op 0x%x\n", op); + return FALSE; + } + + if (mask_picture && mask_picture->componentAlpha && + PICT_FORMAT_RGB(mask_picture->format)) { + /* Check if it's component alpha that relies on a source alpha and on + * the source value. We can only get one of those into the single + * source value that we get to blend with. + */ + if (i965_blend_op[op].src_alpha && + (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) { + intel_debug_fallback(scrn, + "Component alpha not supported " + "with source alpha and source " + "value blending.\n"); + return FALSE; + } + } + + if (i965_get_dest_format(dest_picture) == -1) { + intel_debug_fallback(scrn, "Usupported Color buffer format 0x%x\n", + (int)dest_picture->format); + return FALSE; + } + + return TRUE; +} + +Bool +i965_check_composite_texture(ScreenPtr screen, PicturePtr picture) +{ + if (picture->repeatType > RepeatReflect) { + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_debug_fallback(scrn, + "extended repeat (%d) not supported\n", + picture->repeatType); + return FALSE; + } + + if (picture->filter != PictFilterNearest && + picture->filter != PictFilterBilinear) { + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_debug_fallback(scrn, "Unsupported filter 0x%x\n", + picture->filter); + return FALSE; + } + + if (picture->pDrawable) { + int w, h, i; + + w = picture->pDrawable->width; + h = picture->pDrawable->height; + if ((w > 8192) || (h > 8192)) { + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_debug_fallback(scrn, + "Picture w/h too large (%dx%d)\n", + w, h); + return FALSE; + } + + for (i = 0; + i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]); + i++) { + if (i965_tex_formats[i].fmt == picture->format) + break; + } + if (i == sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0])) + { + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_debug_fallback(scrn, + "Unsupported picture format " + "0x%x\n", + (int)picture->format); + return FALSE; + } + + return TRUE; + } + + return FALSE; +} + + +#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) + +/* Set up a default static partitioning of the URB, which is supposed to + * allow anything we would want to do, at potentially lower performance. + */ +#define URB_CS_ENTRY_SIZE 0 +#define URB_CS_ENTRIES 0 + +#define URB_VS_ENTRY_SIZE 1 // each 512-bit row +#define URB_VS_ENTRIES 8 // we needs at least 8 entries + +#define URB_GS_ENTRY_SIZE 0 +#define URB_GS_ENTRIES 0 + +#define URB_CLIP_ENTRY_SIZE 0 +#define URB_CLIP_ENTRIES 0 + +#define URB_SF_ENTRY_SIZE 2 +#define URB_SF_ENTRIES 1 + +/* + * this program computes dA/dx and dA/dy for the texture coordinates along + * with the base texture coordinate. It was extracted from the Mesa driver + */ + +#define SF_KERNEL_NUM_GRF 16 +#define SF_MAX_THREADS 2 + +static const uint32_t sf_kernel_static[][4] = { +#include "exa_sf.g4b" +}; + +static const uint32_t sf_kernel_mask_static[][4] = { +#include "exa_sf_mask.g4b" +}; + +/* ps kernels */ +#define PS_KERNEL_NUM_GRF 32 +#define PS_MAX_THREADS 48 + +static const uint32_t ps_kernel_nomask_affine_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_nomask_projective_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_projective.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_maskca_affine_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_mask_affine.g4b" +#include "exa_wm_mask_sample_argb.g4b" +#include "exa_wm_ca.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_maskca_projective_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_projective.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_mask_projective.g4b" +#include "exa_wm_mask_sample_argb.g4b" +#include "exa_wm_ca.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_affine_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_a.g4b" +#include "exa_wm_mask_affine.g4b" +#include "exa_wm_mask_sample_argb.g4b" +#include "exa_wm_ca_srcalpha.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_projective_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_projective.g4b" +#include "exa_wm_src_sample_a.g4b" +#include "exa_wm_mask_projective.g4b" +#include "exa_wm_mask_sample_argb.g4b" +#include "exa_wm_ca_srcalpha.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_masknoca_affine_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_mask_affine.g4b" +#include "exa_wm_mask_sample_a.g4b" +#include "exa_wm_noca.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_masknoca_projective_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_projective.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_mask_projective.g4b" +#include "exa_wm_mask_sample_a.g4b" +#include "exa_wm_noca.g4b" +#include "exa_wm_write.g4b" +}; + +/* new programs for Ironlake */ +static const uint32_t sf_kernel_static_gen5[][4] = { +#include "exa_sf.g4b.gen5" +}; + +static const uint32_t sf_kernel_mask_static_gen5[][4] = { +#include "exa_sf_mask.g4b.gen5" +}; + +static const uint32_t ps_kernel_nomask_affine_static_gen5[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_affine.g4b.gen5" +#include "exa_wm_src_sample_argb.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_nomask_projective_static_gen5[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_projective.g4b.gen5" +#include "exa_wm_src_sample_argb.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_maskca_affine_static_gen5[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_affine.g4b.gen5" +#include "exa_wm_src_sample_argb.g4b.gen5" +#include "exa_wm_mask_affine.g4b.gen5" +#include "exa_wm_mask_sample_argb.g4b.gen5" +#include "exa_wm_ca.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_maskca_projective_static_gen5[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_projective.g4b.gen5" +#include "exa_wm_src_sample_argb.g4b.gen5" +#include "exa_wm_mask_projective.g4b.gen5" +#include "exa_wm_mask_sample_argb.g4b.gen5" +#include "exa_wm_ca.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen5[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_affine.g4b.gen5" +#include "exa_wm_src_sample_a.g4b.gen5" +#include "exa_wm_mask_affine.g4b.gen5" +#include "exa_wm_mask_sample_argb.g4b.gen5" +#include "exa_wm_ca_srcalpha.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen5[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_projective.g4b.gen5" +#include "exa_wm_src_sample_a.g4b.gen5" +#include "exa_wm_mask_projective.g4b.gen5" +#include "exa_wm_mask_sample_argb.g4b.gen5" +#include "exa_wm_ca_srcalpha.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_masknoca_affine_static_gen5[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_affine.g4b.gen5" +#include "exa_wm_src_sample_argb.g4b.gen5" +#include "exa_wm_mask_affine.g4b.gen5" +#include "exa_wm_mask_sample_a.g4b.gen5" +#include "exa_wm_noca.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_masknoca_projective_static_gen5[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_projective.g4b.gen5" +#include "exa_wm_src_sample_argb.g4b.gen5" +#include "exa_wm_mask_projective.g4b.gen5" +#include "exa_wm_mask_sample_a.g4b.gen5" +#include "exa_wm_noca.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +/* programs for GEN6 */ +static const uint32_t ps_kernel_nomask_affine_static_gen6[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_nomask_projective_static_gen6[][4] = { +#include "exa_wm_src_projective.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_maskca_affine_static_gen6[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_mask_affine.g6b" +#include "exa_wm_mask_sample_argb.g6b" +#include "exa_wm_ca.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_maskca_projective_static_gen6[][4] = { +#include "exa_wm_src_projective.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_mask_projective.g6b" +#include "exa_wm_mask_sample_argb.g6b" +#include "exa_wm_ca.g4b.gen5" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen6[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_a.g6b" +#include "exa_wm_mask_affine.g6b" +#include "exa_wm_mask_sample_argb.g6b" +#include "exa_wm_ca_srcalpha.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen6[][4] = { +#include "exa_wm_src_projective.g6b" +#include "exa_wm_src_sample_a.g6b" +#include "exa_wm_mask_projective.g6b" +#include "exa_wm_mask_sample_argb.g6b" +#include "exa_wm_ca_srcalpha.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_masknoca_affine_static_gen6[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_mask_affine.g6b" +#include "exa_wm_mask_sample_a.g6b" +#include "exa_wm_noca.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_masknoca_projective_static_gen6[][4] = { +#include "exa_wm_src_projective.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_mask_projective.g6b" +#include "exa_wm_mask_sample_a.g6b" +#include "exa_wm_noca.g6b" +#include "exa_wm_write.g6b" +}; + +/* programs for GEN7 */ +static const uint32_t ps_kernel_nomask_affine_static_gen7[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_nomask_projective_static_gen7[][4] = { +#include "exa_wm_src_projective.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_maskca_affine_static_gen7[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_mask_affine.g7b" +#include "exa_wm_mask_sample_argb.g7b" +#include "exa_wm_ca.g6b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_maskca_projective_static_gen7[][4] = { +#include "exa_wm_src_projective.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_mask_projective.g7b" +#include "exa_wm_mask_sample_argb.g7b" +#include "exa_wm_ca.g4b.gen5" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen7[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_a.g7b" +#include "exa_wm_mask_affine.g7b" +#include "exa_wm_mask_sample_argb.g7b" +#include "exa_wm_ca_srcalpha.g6b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen7[][4] = { +#include "exa_wm_src_projective.g7b" +#include "exa_wm_src_sample_a.g7b" +#include "exa_wm_mask_projective.g7b" +#include "exa_wm_mask_sample_argb.g7b" +#include "exa_wm_ca_srcalpha.g6b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_masknoca_affine_static_gen7[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_mask_affine.g7b" +#include "exa_wm_mask_sample_a.g7b" +#include "exa_wm_noca.g6b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_masknoca_projective_static_gen7[][4] = { +#include "exa_wm_src_projective.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_mask_projective.g7b" +#include "exa_wm_mask_sample_a.g7b" +#include "exa_wm_noca.g6b" +#include "exa_wm_write.g7b" +}; + + +typedef enum { + SS_INVALID_FILTER = -1, + SS_FILTER_NEAREST, + SS_FILTER_BILINEAR, + FILTER_COUNT, +} sampler_state_filter_t; + +typedef enum { + SS_INVALID_EXTEND = -1, + SS_EXTEND_NONE, + SS_EXTEND_REPEAT, + SS_EXTEND_PAD, + SS_EXTEND_REFLECT, + EXTEND_COUNT, +} sampler_state_extend_t; + +typedef enum { + WM_KERNEL_NOMASK_AFFINE, + WM_KERNEL_NOMASK_PROJECTIVE, + WM_KERNEL_MASKCA_AFFINE, + WM_KERNEL_MASKCA_PROJECTIVE, + WM_KERNEL_MASKCA_SRCALPHA_AFFINE, + WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, + WM_KERNEL_MASKNOCA_AFFINE, + WM_KERNEL_MASKNOCA_PROJECTIVE, + KERNEL_COUNT +} wm_kernel_t; + +#define KERNEL(kernel_enum, kernel, masked) \ + [kernel_enum] = {&kernel, sizeof(kernel), masked} +struct wm_kernel_info { + const void *data; + unsigned int size; + Bool has_mask; +}; + +static const struct wm_kernel_info wm_kernels_gen4[] = { + KERNEL(WM_KERNEL_NOMASK_AFFINE, + ps_kernel_nomask_affine_static, FALSE), + KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, + ps_kernel_nomask_projective_static, FALSE), + KERNEL(WM_KERNEL_MASKCA_AFFINE, + ps_kernel_maskca_affine_static, TRUE), + KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, + ps_kernel_maskca_projective_static, TRUE), + KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, + ps_kernel_maskca_srcalpha_affine_static, TRUE), + KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, + ps_kernel_maskca_srcalpha_projective_static, TRUE), + KERNEL(WM_KERNEL_MASKNOCA_AFFINE, + ps_kernel_masknoca_affine_static, TRUE), + KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, + ps_kernel_masknoca_projective_static, TRUE), +}; + +static const struct wm_kernel_info wm_kernels_gen5[] = { + KERNEL(WM_KERNEL_NOMASK_AFFINE, + ps_kernel_nomask_affine_static_gen5, FALSE), + KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, + ps_kernel_nomask_projective_static_gen5, FALSE), + KERNEL(WM_KERNEL_MASKCA_AFFINE, + ps_kernel_maskca_affine_static_gen5, TRUE), + KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, + ps_kernel_maskca_projective_static_gen5, TRUE), + KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, + ps_kernel_maskca_srcalpha_affine_static_gen5, TRUE), + KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, + ps_kernel_maskca_srcalpha_projective_static_gen5, TRUE), + KERNEL(WM_KERNEL_MASKNOCA_AFFINE, + ps_kernel_masknoca_affine_static_gen5, TRUE), + KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, + ps_kernel_masknoca_projective_static_gen5, TRUE), +}; + +static const struct wm_kernel_info wm_kernels_gen6[] = { + KERNEL(WM_KERNEL_NOMASK_AFFINE, + ps_kernel_nomask_affine_static_gen6, FALSE), + KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, + ps_kernel_nomask_projective_static_gen6, FALSE), + KERNEL(WM_KERNEL_MASKCA_AFFINE, + ps_kernel_maskca_affine_static_gen6, TRUE), + KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, + ps_kernel_maskca_projective_static_gen6, TRUE), + KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, + ps_kernel_maskca_srcalpha_affine_static_gen6, TRUE), + KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, + ps_kernel_maskca_srcalpha_projective_static_gen6, TRUE), + KERNEL(WM_KERNEL_MASKNOCA_AFFINE, + ps_kernel_masknoca_affine_static_gen6, TRUE), + KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, + ps_kernel_masknoca_projective_static_gen6, TRUE), +}; + +static const struct wm_kernel_info wm_kernels_gen7[] = { + KERNEL(WM_KERNEL_NOMASK_AFFINE, + ps_kernel_nomask_affine_static_gen7, FALSE), + KERNEL(WM_KERNEL_NOMASK_PROJECTIVE, + ps_kernel_nomask_projective_static_gen7, FALSE), + KERNEL(WM_KERNEL_MASKCA_AFFINE, + ps_kernel_maskca_affine_static_gen7, TRUE), + KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, + ps_kernel_maskca_projective_static_gen7, TRUE), + KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE, + ps_kernel_maskca_srcalpha_affine_static_gen7, TRUE), + KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, + ps_kernel_maskca_srcalpha_projective_static_gen7, TRUE), + KERNEL(WM_KERNEL_MASKNOCA_AFFINE, + ps_kernel_masknoca_affine_static_gen7, TRUE), + KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE, + ps_kernel_masknoca_projective_static_gen7, TRUE), +}; + +#undef KERNEL + +typedef struct _brw_cc_unit_state_padded { + struct brw_cc_unit_state state; + char pad[64 - sizeof(struct brw_cc_unit_state)]; +} brw_cc_unit_state_padded; + +#ifndef MAX +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#endif +#define SURFACE_STATE_PADDED_SIZE ALIGN(MAX(sizeof(struct brw_surface_state), sizeof(struct gen7_surface_state)), 32) + +struct gen4_cc_unit_state { + /* Index by [src_blend][dst_blend] */ + brw_cc_unit_state_padded cc_state[BRW_BLENDFACTOR_COUNT][BRW_BLENDFACTOR_COUNT]; +}; + +typedef struct gen4_composite_op { + int op; + sampler_state_filter_t src_filter; + sampler_state_filter_t mask_filter; + sampler_state_extend_t src_extend; + sampler_state_extend_t mask_extend; + Bool is_affine; + wm_kernel_t wm_kernel; + int vertex_id; +} gen4_composite_op; + +/** Private data for gen4 render accel implementation. */ +struct gen4_render_state { + drm_intel_bo *vs_state_bo; + drm_intel_bo *sf_state_bo; + drm_intel_bo *sf_mask_state_bo; + drm_intel_bo *cc_state_bo; + drm_intel_bo *wm_state_bo[KERNEL_COUNT] + [FILTER_COUNT] [EXTEND_COUNT] + [FILTER_COUNT] [EXTEND_COUNT]; + drm_intel_bo *wm_kernel_bo[KERNEL_COUNT]; + + drm_intel_bo *cc_vp_bo; + drm_intel_bo *gen6_blend_bo; + drm_intel_bo *gen6_depth_stencil_bo; + drm_intel_bo *ps_sampler_state_bo[FILTER_COUNT] + [EXTEND_COUNT] + [FILTER_COUNT] + [EXTEND_COUNT]; + gen4_composite_op composite_op; +}; + +static void gen6_emit_composite_state(struct intel_screen_private *intel); +static void gen6_render_state_init(ScrnInfoPtr scrn); + +/** + * Sets up the SF state pointing at an SF kernel. + * + * The SF kernel does coord interp: for each attribute, + * calculate dA/dx and dA/dy. Hand these interpolation coefficients + * back to SF which then hands pixels off to WM. + */ +static drm_intel_bo *gen4_create_sf_state(intel_screen_private *intel, + drm_intel_bo * kernel_bo) +{ + struct brw_sf_unit_state *sf_state; + drm_intel_bo *sf_state_bo; + int ret; + + sf_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 SF state", + sizeof(*sf_state), 4096); + assert(sf_state_bo); + + ret = drm_intel_bo_map(sf_state_bo, TRUE); + assert(ret == 0); + + sf_state = memset(sf_state_bo->virtual, 0, sizeof(*sf_state)); + sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); + sf_state->thread0.kernel_start_pointer = + intel_emit_reloc(sf_state_bo, + offsetof(struct brw_sf_unit_state, thread0), + kernel_bo, sf_state->thread0.grf_reg_count << 1, + I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; + sf_state->sf1.single_program_flow = 1; + sf_state->sf1.binding_table_entry_count = 0; + sf_state->sf1.thread_priority = 0; + sf_state->sf1.floating_point_mode = 0; /* Mesa does this */ + sf_state->sf1.illegal_op_exception_enable = 1; + sf_state->sf1.mask_stack_exception_enable = 1; + sf_state->sf1.sw_exception_enable = 1; + sf_state->thread2.per_thread_scratch_space = 0; + /* scratch space is not used in our kernel */ + sf_state->thread2.scratch_space_base_pointer = 0; + sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */ + sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ + sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ + /* don't smash vertex header, read start from dw8 */ + sf_state->thread3.urb_entry_read_offset = 1; + sf_state->thread3.dispatch_grf_start_reg = 3; + sf_state->thread4.max_threads = SF_MAX_THREADS - 1; + sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; + sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES; + sf_state->sf5.viewport_transform = FALSE; /* skip viewport */ + sf_state->sf6.cull_mode = BRW_CULLMODE_NONE; + sf_state->sf6.scissor = 0; + sf_state->sf7.trifan_pv = 2; + sf_state->sf6.dest_org_vbias = 0x8; + sf_state->sf6.dest_org_hbias = 0x8; + + drm_intel_bo_unmap(sf_state_bo); + + return sf_state_bo; + (void)ret; +} + +static drm_intel_bo *sampler_border_color_create(intel_screen_private *intel) +{ + struct brw_sampler_legacy_border_color sampler_border_color; + + /* Set up the sampler border color (always transparent black) */ + memset(&sampler_border_color, 0, sizeof(sampler_border_color)); + sampler_border_color.color[0] = 0; /* R */ + sampler_border_color.color[1] = 0; /* G */ + sampler_border_color.color[2] = 0; /* B */ + sampler_border_color.color[3] = 0; /* A */ + + return intel_bo_alloc_for_data(intel, + &sampler_border_color, + sizeof(sampler_border_color), + "gen4 render sampler border color"); +} + +static void +gen4_sampler_state_init(drm_intel_bo * sampler_state_bo, + struct brw_sampler_state *sampler_state, + sampler_state_filter_t filter, + sampler_state_extend_t extend, + drm_intel_bo * border_color_bo) +{ + uint32_t sampler_state_offset; + + sampler_state_offset = (char *)sampler_state - + (char *)sampler_state_bo->virtual; + + /* PS kernel use this sampler */ + memset(sampler_state, 0, sizeof(*sampler_state)); + + sampler_state->ss0.lod_preclamp = 1; /* GL mode */ + + /* We use the legacy mode to get the semantics specified by + * the Render extension. */ + sampler_state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY; + + switch (filter) { + default: + case SS_FILTER_NEAREST: + sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; + sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + break; + case SS_FILTER_BILINEAR: + sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + } + + switch (extend) { + default: + case SS_EXTEND_NONE: + sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; + break; + case SS_EXTEND_REPEAT: + sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; + sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; + sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; + break; + case SS_EXTEND_PAD: + sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + break; + case SS_EXTEND_REFLECT: + sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR; + sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR; + sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR; + break; + } + + sampler_state->ss2.border_color_pointer = + intel_emit_reloc(sampler_state_bo, sampler_state_offset + + offsetof(struct brw_sampler_state, ss2), + border_color_bo, 0, + I915_GEM_DOMAIN_SAMPLER, 0) >> 5; + + sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ +} + +static void +gen7_sampler_state_init(drm_intel_bo * sampler_state_bo, + struct gen7_sampler_state *sampler_state, + sampler_state_filter_t filter, + sampler_state_extend_t extend, + drm_intel_bo * border_color_bo) +{ + uint32_t sampler_state_offset; + + sampler_state_offset = (char *)sampler_state - + (char *)sampler_state_bo->virtual; + + /* PS kernel use this sampler */ + memset(sampler_state, 0, sizeof(*sampler_state)); + + sampler_state->ss0.lod_preclamp = 1; /* GL mode */ + + /* We use the legacy mode to get the semantics specified by + * the Render extension. */ + sampler_state->ss0.default_color_mode = BRW_BORDER_COLOR_MODE_LEGACY; + + switch (filter) { + default: + case SS_FILTER_NEAREST: + sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; + sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + break; + case SS_FILTER_BILINEAR: + sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + } + + switch (extend) { + default: + case SS_EXTEND_NONE: + sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; + break; + case SS_EXTEND_REPEAT: + sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; + sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; + sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; + break; + case SS_EXTEND_PAD: + sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + break; + case SS_EXTEND_REFLECT: + sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR; + sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR; + sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR; + break; + } + + sampler_state->ss2.default_color_pointer = + intel_emit_reloc(sampler_state_bo, sampler_state_offset + + offsetof(struct gen7_sampler_state, ss2), + border_color_bo, 0, + I915_GEM_DOMAIN_SAMPLER, 0) >> 5; + + sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ +} + + + +static drm_intel_bo *gen4_create_sampler_state(intel_screen_private *intel, + sampler_state_filter_t src_filter, + sampler_state_extend_t src_extend, + sampler_state_filter_t mask_filter, + sampler_state_extend_t mask_extend, + drm_intel_bo * border_color_bo) +{ + drm_intel_bo *sampler_state_bo; + struct brw_sampler_state *sampler_state; + int ret; + + sampler_state_bo = + drm_intel_bo_alloc(intel->bufmgr, "gen4 sampler state", + sizeof(struct brw_sampler_state) * 2, 4096); + assert(sampler_state_bo); + + ret = drm_intel_bo_map(sampler_state_bo, TRUE); + assert(ret == 0); + + sampler_state = sampler_state_bo->virtual; + + gen4_sampler_state_init(sampler_state_bo, + &sampler_state[0], + src_filter, src_extend, border_color_bo); + gen4_sampler_state_init(sampler_state_bo, + &sampler_state[1], + mask_filter, mask_extend, border_color_bo); + + drm_intel_bo_unmap(sampler_state_bo); + + return sampler_state_bo; + (void)ret; +} + +static drm_intel_bo * +gen7_create_sampler_state(intel_screen_private *intel, + sampler_state_filter_t src_filter, + sampler_state_extend_t src_extend, + sampler_state_filter_t mask_filter, + sampler_state_extend_t mask_extend, + drm_intel_bo * border_color_bo) +{ + drm_intel_bo *sampler_state_bo; + struct gen7_sampler_state *sampler_state; + int ret; + + sampler_state_bo = + drm_intel_bo_alloc(intel->bufmgr, "gen7 sampler state", + sizeof(struct gen7_sampler_state) * 2, 4096); + assert(sampler_state_bo); + + ret = drm_intel_bo_map(sampler_state_bo, TRUE); + assert(ret == 0); + + sampler_state = sampler_state_bo->virtual; + + gen7_sampler_state_init(sampler_state_bo, + &sampler_state[0], + src_filter, src_extend, border_color_bo); + gen7_sampler_state_init(sampler_state_bo, + &sampler_state[1], + mask_filter, mask_extend, border_color_bo); + + drm_intel_bo_unmap(sampler_state_bo); + + return sampler_state_bo; + (void)ret; +} + +static inline drm_intel_bo * +i965_create_sampler_state(intel_screen_private *intel, + sampler_state_filter_t src_filter, + sampler_state_extend_t src_extend, + sampler_state_filter_t mask_filter, + sampler_state_extend_t mask_extend, + drm_intel_bo * border_color_bo) +{ + if (INTEL_INFO(intel)->gen < 070) + return gen4_create_sampler_state(intel, src_filter, src_extend, + mask_filter, mask_extend, + border_color_bo); + return gen7_create_sampler_state(intel, src_filter, src_extend, + mask_filter, mask_extend, + border_color_bo); +} + + +static void +cc_state_init(drm_intel_bo * cc_state_bo, + uint32_t cc_state_offset, + int src_blend, int dst_blend, drm_intel_bo * cc_vp_bo) +{ + struct brw_cc_unit_state *cc_state; + + cc_state = (struct brw_cc_unit_state *)((char *)cc_state_bo->virtual + + cc_state_offset); + + memset(cc_state, 0, sizeof(*cc_state)); + cc_state->cc0.stencil_enable = 0; /* disable stencil */ + cc_state->cc2.depth_test = 0; /* disable depth test */ + cc_state->cc2.logicop_enable = 0; /* disable logic op */ + cc_state->cc3.ia_blend_enable = 0; /* blend alpha same as colors */ + cc_state->cc3.blend_enable = 1; /* enable color blend */ + cc_state->cc3.alpha_test = 0; /* disable alpha test */ + + cc_state->cc4.cc_viewport_state_offset = + intel_emit_reloc(cc_state_bo, cc_state_offset + + offsetof(struct brw_cc_unit_state, cc4), + cc_vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; + + cc_state->cc5.dither_enable = 0; /* disable dither */ + cc_state->cc5.logicop_func = 0xc; /* COPY */ + cc_state->cc5.statistics_enable = 1; + cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; + + /* Fill in alpha blend factors same as color, for the future. */ + cc_state->cc5.ia_src_blend_factor = src_blend; + cc_state->cc5.ia_dest_blend_factor = dst_blend; + + cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD; + cc_state->cc6.clamp_post_alpha_blend = 1; + cc_state->cc6.clamp_pre_alpha_blend = 1; + cc_state->cc6.clamp_range = 0; /* clamp range [0,1] */ + + cc_state->cc6.src_blend_factor = src_blend; + cc_state->cc6.dest_blend_factor = dst_blend; +} + +static drm_intel_bo *gen4_create_wm_state(intel_screen_private *intel, + Bool has_mask, + drm_intel_bo * kernel_bo, + drm_intel_bo * sampler_bo) +{ + struct brw_wm_unit_state *state; + drm_intel_bo *wm_state_bo; + int ret; + + wm_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 WM state", + sizeof(*state), 4096); + assert(wm_state_bo); + + ret = drm_intel_bo_map(wm_state_bo, TRUE); + assert(ret == 0); + + state = memset(wm_state_bo->virtual, 0, sizeof(*state)); + state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); + state->thread0.kernel_start_pointer = + intel_emit_reloc(wm_state_bo, + offsetof(struct brw_wm_unit_state, thread0), + kernel_bo, state->thread0.grf_reg_count << 1, + I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; + + state->thread1.single_program_flow = 0; + + /* scratch space is not used in our kernel */ + state->thread2.scratch_space_base_pointer = 0; + state->thread2.per_thread_scratch_space = 0; + + state->thread3.const_urb_entry_read_length = 0; + state->thread3.const_urb_entry_read_offset = 0; + + state->thread3.urb_entry_read_offset = 0; + /* wm kernel use urb from 3, see wm_program in compiler module */ + state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */ + + if (IS_GEN5(intel)) + state->wm4.sampler_count = 0; /* hardware requirement */ + else + state->wm4.sampler_count = 1; /* 1-4 samplers used */ + + state->wm4.sampler_state_pointer = + intel_emit_reloc(wm_state_bo, + offsetof(struct brw_wm_unit_state, wm4), + sampler_bo, + state->wm4.sampler_count << 2, + I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; + state->wm5.max_threads = PS_MAX_THREADS - 1; + state->wm5.transposed_urb_read = 0; + state->wm5.thread_dispatch_enable = 1; + /* just use 16-pixel dispatch (4 subspans), don't need to change kernel + * start point + */ + state->wm5.enable_16_pix = 1; + state->wm5.enable_8_pix = 0; + state->wm5.early_depth_test = 1; + + /* Each pair of attributes (src/mask coords) is two URB entries */ + if (has_mask) { + state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */ + state->thread3.urb_entry_read_length = 4; + } else { + state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ + state->thread3.urb_entry_read_length = 2; + } + + /* binding table entry count is only used for prefetching, and it has to + * be set 0 for Ironlake + */ + if (IS_GEN5(intel)) + state->thread1.binding_table_entry_count = 0; + + drm_intel_bo_unmap(wm_state_bo); + + return wm_state_bo; + (void)ret; +} + +static drm_intel_bo *gen4_create_cc_viewport(intel_screen_private *intel) +{ + drm_intel_bo *bo; + struct brw_cc_viewport vp; + int ret; + + vp.min_depth = -1.e35; + vp.max_depth = 1.e35; + + bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 render unit state", + sizeof(vp), 4096); + assert(bo); + + ret = drm_intel_bo_subdata(bo, 0, sizeof(vp), &vp); + assert(ret == 0); + + return bo; + (void)ret; +} + +static drm_intel_bo *gen4_create_vs_unit_state(intel_screen_private *intel) +{ + struct brw_vs_unit_state vs_state; + memset(&vs_state, 0, sizeof(vs_state)); + + /* Set up the vertex shader to be disabled (passthrough) */ + if (IS_GEN5(intel)) + vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; /* hardware requirement */ + else + vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES; + vs_state.thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; + vs_state.vs6.vs_enable = 0; + vs_state.vs6.vert_cache_disable = 1; + + return intel_bo_alloc_for_data(intel, &vs_state, sizeof(vs_state), + "gen4 render VS state"); +} + +/** + * Set up all combinations of cc state: each blendfactor for source and + * dest. + */ +static drm_intel_bo *gen4_create_cc_unit_state(intel_screen_private *intel) +{ + drm_intel_bo *cc_state_bo, *cc_vp_bo; + int i, j, ret; + + cc_vp_bo = gen4_create_cc_viewport(intel); + + cc_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 CC state", + sizeof(struct gen4_cc_unit_state), + 4096); + assert(cc_state_bo); + + ret = drm_intel_bo_map(cc_state_bo, TRUE); + assert(ret == 0); + + for (i = 0; i < BRW_BLENDFACTOR_COUNT; i++) { + for (j = 0; j < BRW_BLENDFACTOR_COUNT; j++) { + cc_state_init(cc_state_bo, + offsetof(struct gen4_cc_unit_state, + cc_state[i][j].state), + i, j, cc_vp_bo); + } + } + drm_intel_bo_unmap(cc_state_bo); + + drm_intel_bo_unreference(cc_vp_bo); + + return cc_state_bo; + (void)ret; +} + +static uint32_t i965_get_card_format(PicturePtr picture) +{ + int i; + + for (i = 0; i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]); + i++) { + if (i965_tex_formats[i].fmt == picture->format) + break; + } + assert(i != sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0])); + + return i965_tex_formats[i].card_fmt; +} + +static sampler_state_filter_t sampler_state_filter_from_picture(int filter) +{ + switch (filter) { + case PictFilterNearest: + return SS_FILTER_NEAREST; + case PictFilterBilinear: + return SS_FILTER_BILINEAR; + default: + return SS_INVALID_FILTER; + } +} + +static sampler_state_extend_t sampler_state_extend_from_picture(int repeat_type) +{ + switch (repeat_type) { + case RepeatNone: + return SS_EXTEND_NONE; + case RepeatNormal: + return SS_EXTEND_REPEAT; + case RepeatPad: + return SS_EXTEND_PAD; + case RepeatReflect: + return SS_EXTEND_REFLECT; + default: + return SS_INVALID_EXTEND; + } +} + +/** + * Sets up the common fields for a surface state buffer for the given + * picture in the given surface state buffer. + */ +static int +gen4_set_picture_surface_state(intel_screen_private *intel, + PicturePtr picture, PixmapPtr pixmap, + Bool is_dst) +{ + struct intel_pixmap *priv = intel_get_pixmap_private(pixmap); + struct brw_surface_state *ss; + uint32_t write_domain, read_domains; + int offset; + + if (is_dst) { + write_domain = I915_GEM_DOMAIN_RENDER; + read_domains = I915_GEM_DOMAIN_RENDER; + } else { + write_domain = 0; + read_domains = I915_GEM_DOMAIN_SAMPLER; + } + intel_batch_mark_pixmap_domains(intel, priv, + read_domains, write_domain); + ss = (struct brw_surface_state *) + (intel->surface_data + intel->surface_used); + + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = BRW_SURFACE_2D; + if (is_dst) + ss->ss0.surface_format = i965_get_dest_format(picture); + else + ss->ss0.surface_format = i965_get_card_format(picture); + + ss->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; + ss->ss0.color_blend = 1; + ss->ss1.base_addr = priv->bo->offset; + + ss->ss2.height = pixmap->drawable.height - 1; + ss->ss2.width = pixmap->drawable.width - 1; + ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1; + ss->ss3.tile_walk = 0; /* Tiled X */ + ss->ss3.tiled_surface = intel_pixmap_tiled(pixmap) ? 1 : 0; + + dri_bo_emit_reloc(intel->surface_bo, + read_domains, write_domain, + 0, + intel->surface_used + + offsetof(struct brw_surface_state, ss1), + priv->bo); + + offset = intel->surface_used; + intel->surface_used += SURFACE_STATE_PADDED_SIZE; + + return offset; +} + +static int +gen7_set_picture_surface_state(intel_screen_private *intel, + PicturePtr picture, PixmapPtr pixmap, + Bool is_dst) +{ + struct intel_pixmap *priv = intel_get_pixmap_private(pixmap); + struct gen7_surface_state *ss; + uint32_t write_domain, read_domains; + int offset; + + if (is_dst) { + write_domain = I915_GEM_DOMAIN_RENDER; + read_domains = I915_GEM_DOMAIN_RENDER; + } else { + write_domain = 0; + read_domains = I915_GEM_DOMAIN_SAMPLER; + } + intel_batch_mark_pixmap_domains(intel, priv, + read_domains, write_domain); + ss = (struct gen7_surface_state *) + (intel->surface_data + intel->surface_used); + + memset(ss, 0, sizeof(*ss)); + ss->ss0.surface_type = BRW_SURFACE_2D; + if (is_dst) + ss->ss0.surface_format = i965_get_dest_format(picture); + else + ss->ss0.surface_format = i965_get_card_format(picture); + + ss->ss0.tile_walk = 0; /* Tiled X */ + ss->ss0.tiled_surface = intel_pixmap_tiled(pixmap) ? 1 : 0; + ss->ss1.base_addr = priv->bo->offset; + + ss->ss2.height = pixmap->drawable.height - 1; + ss->ss2.width = pixmap->drawable.width - 1; + ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1; + + if (IS_HSW(intel)) { + ss->ss7.shader_chanel_select_r = HSW_SCS_RED; + ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN; + ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE; + ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA; + } + + dri_bo_emit_reloc(intel->surface_bo, + read_domains, write_domain, + 0, + intel->surface_used + + offsetof(struct gen7_surface_state, ss1), + priv->bo); + + offset = intel->surface_used; + intel->surface_used += SURFACE_STATE_PADDED_SIZE; + + return offset; +} + +static inline int +i965_set_picture_surface_state(intel_screen_private *intel, + PicturePtr picture, PixmapPtr pixmap, + Bool is_dst) +{ + if (INTEL_INFO(intel)->gen < 070) + return gen4_set_picture_surface_state(intel, picture, pixmap, is_dst); + return gen7_set_picture_surface_state(intel, picture, pixmap, is_dst); +} + +static void gen4_composite_vertex_elements(struct intel_screen_private *intel) +{ + struct gen4_render_state *render_state = intel->gen4_render_state; + gen4_composite_op *composite_op = &render_state->composite_op; + Bool has_mask = intel->render_mask != NULL; + Bool is_affine = composite_op->is_affine; + /* + * number of extra parameters per vertex + */ + int nelem = has_mask ? 2 : 1; + /* + * size of extra parameters: + * 3 for homogenous (xyzw) + * 2 for cartesian (xy) + */ + int selem = is_affine ? 2 : 3; + uint32_t w_component; + uint32_t src_format; + int id; + + id = has_mask << 1 | is_affine; + + if (composite_op->vertex_id == id) + return; + + composite_op->vertex_id = id; + + if (is_affine) { + src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; + w_component = BRW_VFCOMPONENT_STORE_1_FLT; + } else { + src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; + w_component = BRW_VFCOMPONENT_STORE_SRC; + } + + if (IS_GEN5(intel)) { + /* + * The reason to add this extra vertex element in the header is that + * Ironlake has different vertex header definition and origin method to + * set destination element offset doesn't exist anymore, which means + * hardware requires a predefined vertex element layout. + * + * haihao proposed this approach to fill the first vertex element, so + * origin layout for Gen4 doesn't need to change, and origin shader + * programs behavior is also kept. + * + * I think this is not bad. - zhenyu + */ + + OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | + ((2 * (2 + nelem)) - 1)); + OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + + OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); + } else { + /* Set up our vertex elements, sourced from the single vertex buffer. + * that will be set up later. + */ + OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | + ((2 * (1 + nelem)) - 1)); + } + + /* x,y */ + OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + + if (IS_GEN5(intel)) + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + else + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | + (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); + /* u0, v0, w0 */ + OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | + (src_format << VE0_FORMAT_SHIFT) | + ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ + + if (IS_GEN5(intel)) + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (w_component << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + else + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (w_component << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | + ((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ + /* u1, v1, w1 */ + if (has_mask) { + OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | + (src_format << VE0_FORMAT_SHIFT) | + (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ + + if (IS_GEN5(intel)) + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (w_component << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + else + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (w_component << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | + ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */ + } +} + +static void i965_emit_composite_state(struct intel_screen_private *intel) +{ + struct gen4_render_state *render_state = intel->gen4_render_state; + gen4_composite_op *composite_op = &render_state->composite_op; + int op = composite_op->op; + PicturePtr mask_picture = intel->render_mask_picture; + PicturePtr dest_picture = intel->render_dest_picture; + PixmapPtr mask = intel->render_mask; + PixmapPtr dest = intel->render_dest; + sampler_state_filter_t src_filter = composite_op->src_filter; + sampler_state_filter_t mask_filter = composite_op->mask_filter; + sampler_state_extend_t src_extend = composite_op->src_extend; + sampler_state_extend_t mask_extend = composite_op->mask_extend; + uint32_t src_blend, dst_blend; + + intel->needs_render_state_emit = FALSE; + + /* Begin the long sequence of commands needed to set up the 3D + * rendering pipe + */ + + if (intel->needs_3d_invariant) { + if (IS_GEN5(intel)) { + /* Ironlake errata workaround: Before disabling the clipper, + * you have to MI_FLUSH to get the pipeline idle. + */ + OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); + } + + /* Match Mesa driver setup */ + if (INTEL_INFO(intel)->gen >= 045) + OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); + else + OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); + + /* Set system instruction pointer */ + OUT_BATCH(BRW_STATE_SIP | 0); + OUT_BATCH(0); + + intel->needs_3d_invariant = FALSE; + } + + if (intel->surface_reloc == 0) { + /* Zero out the two base address registers so all offsets are + * absolute. + */ + if (IS_GEN5(intel)) { + OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6); + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ + intel->surface_reloc = intel->batch_used; + intel_batch_emit_dword(intel, + intel->surface_bo->offset | BASE_ADDRESS_MODIFY); + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Instruction base address */ + /* general state max addr, disabled */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); + /* media object state max addr, disabled */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); + /* Instruction max addr, disabled */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); + } else { + OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4); + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ + intel->surface_reloc = intel->batch_used; + intel_batch_emit_dword(intel, + intel->surface_bo->offset | BASE_ADDRESS_MODIFY); + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ + /* general state max addr, disabled */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); + /* media object state max addr, disabled */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); + } + } + + i965_get_blend_cntl(op, mask_picture, dest_picture->format, + &src_blend, &dst_blend); + + /* Binding table pointers */ + OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); + OUT_BATCH(0); /* vs */ + OUT_BATCH(0); /* gs */ + OUT_BATCH(0); /* clip */ + OUT_BATCH(0); /* sf */ + /* Only the PS uses the binding table */ + OUT_BATCH(intel->surface_table); + + /* The drawing rectangle clipping is always on. Set it to values that + * shouldn't do any clipping. + */ + OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); + OUT_BATCH(0x00000000); /* ymin, xmin */ + OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) | + DRAW_XMAX(dest->drawable.width - 1)); /* ymax, xmax */ + OUT_BATCH(0x00000000); /* yorigin, xorigin */ + + /* skip the depth buffer */ + /* skip the polygon stipple */ + /* skip the polygon stipple offset */ + /* skip the line stipple */ + + /* Set the pointers to the 3d pipeline state */ + OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5); + OUT_RELOC(render_state->vs_state_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */ + OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */ + if (mask) { + OUT_RELOC(render_state->sf_mask_state_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + } else { + OUT_RELOC(render_state->sf_state_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + } + + OUT_RELOC(render_state->wm_state_bo[composite_op->wm_kernel] + [src_filter][src_extend] + [mask_filter][mask_extend], + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + + OUT_RELOC(render_state->cc_state_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + offsetof(struct gen4_cc_unit_state, + cc_state[src_blend][dst_blend])); + + { + int urb_vs_start, urb_vs_size; + int urb_gs_start, urb_gs_size; + int urb_clip_start, urb_clip_size; + int urb_sf_start, urb_sf_size; + int urb_cs_start, urb_cs_size; + + urb_vs_start = 0; + urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; + urb_gs_start = urb_vs_start + urb_vs_size; + urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; + urb_clip_start = urb_gs_start + urb_gs_size; + urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; + urb_sf_start = urb_clip_start + urb_clip_size; + urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; + urb_cs_start = urb_sf_start + urb_sf_size; + urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; + + /* Erratum (Vol 1a, p32): + * URB_FENCE must not cross a cache-line (64 bytes). + */ + if ((intel->batch_used & 15) > (16 - 3)) { + int cnt = 16 - (intel->batch_used & 15); + while (cnt--) + OUT_BATCH(MI_NOOP); + } + + OUT_BATCH(BRW_URB_FENCE | + UF0_CS_REALLOC | + UF0_SF_REALLOC | + UF0_CLIP_REALLOC | + UF0_GS_REALLOC | + UF0_VS_REALLOC | + 1); + OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | + ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | + ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); + OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | + ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); + + /* Constant buffer state */ + OUT_BATCH(BRW_CS_URB_STATE | 0); + OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) | + (URB_CS_ENTRIES << 0)); + } + + gen4_composite_vertex_elements(intel); +} + +/** + * Returns whether the current set of composite state plus vertex buffer is + * expected to fit in the aperture. + */ +static Bool i965_composite_check_aperture(intel_screen_private *intel) +{ + struct gen4_render_state *render_state = intel->gen4_render_state; + gen4_composite_op *composite_op = &render_state->composite_op; + drm_intel_bo *bo_table[] = { + intel->batch_bo, + intel->vertex_bo, + intel->surface_bo, + render_state->vs_state_bo, + render_state->sf_state_bo, + render_state->sf_mask_state_bo, + render_state->wm_state_bo[composite_op->wm_kernel] + [composite_op->src_filter] + [composite_op->src_extend] + [composite_op->mask_filter] + [composite_op->mask_extend], + render_state->cc_state_bo, + }; + drm_intel_bo *gen6_bo_table[] = { + intel->batch_bo, + intel->vertex_bo, + intel->surface_bo, + render_state->wm_kernel_bo[composite_op->wm_kernel], + render_state->ps_sampler_state_bo[composite_op->src_filter] + [composite_op->src_extend] + [composite_op->mask_filter] + [composite_op->mask_extend], + render_state->cc_vp_bo, + render_state->cc_state_bo, + render_state->gen6_blend_bo, + render_state->gen6_depth_stencil_bo, + }; + + if (INTEL_INFO(intel)->gen >= 060) + return drm_intel_bufmgr_check_aperture_space(gen6_bo_table, + ARRAY_SIZE(gen6_bo_table)) == 0; + else + return drm_intel_bufmgr_check_aperture_space(bo_table, + ARRAY_SIZE(bo_table)) == 0; +} + +static void i965_surface_flush(struct intel_screen_private *intel) +{ + int ret; + + ret = drm_intel_bo_subdata(intel->surface_bo, + 0, intel->surface_used, + intel->surface_data); + assert(ret == 0); + intel->surface_used = 0; + + assert (intel->surface_reloc != 0); + drm_intel_bo_emit_reloc(intel->batch_bo, + intel->surface_reloc * 4, + intel->surface_bo, BASE_ADDRESS_MODIFY, + I915_GEM_DOMAIN_INSTRUCTION, 0); + intel->surface_reloc = 0; + + drm_intel_bo_unreference(intel->surface_bo); + intel->surface_bo = + drm_intel_bo_alloc(intel->bufmgr, "surface data", + sizeof(intel->surface_data), 4096); + assert(intel->surface_bo); + + return; + (void)ret; +} + +static void +i965_emit_composite_primitive_identity_source(intel_screen_private *intel, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) +{ + OUT_VERTEX(dstX + w); + OUT_VERTEX(dstY + h); + OUT_VERTEX((srcX + w) * intel->scale_units[0][0]); + OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY + h); + OUT_VERTEX(srcX * intel->scale_units[0][0]); + OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); + OUT_VERTEX(srcX * intel->scale_units[0][0]); + OUT_VERTEX(srcY * intel->scale_units[0][1]); +} + +static void +i965_emit_composite_primitive_affine_source(intel_screen_private *intel, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) +{ + float src_x[3], src_y[3]; + + if (!intel_get_transformed_coordinates(srcX, srcY, + intel->transform[0], + &src_x[0], + &src_y[0])) + return; + + if (!intel_get_transformed_coordinates(srcX, srcY + h, + intel->transform[0], + &src_x[1], + &src_y[1])) + return; + + if (!intel_get_transformed_coordinates(srcX + w, srcY + h, + intel->transform[0], + &src_x[2], + &src_y[2])) + return; + + OUT_VERTEX(dstX + w); + OUT_VERTEX(dstY + h); + OUT_VERTEX(src_x[2] * intel->scale_units[0][0]); + OUT_VERTEX(src_y[2] * intel->scale_units[0][1]); + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY + h); + OUT_VERTEX(src_x[1] * intel->scale_units[0][0]); + OUT_VERTEX(src_y[1] * intel->scale_units[0][1]); + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); + OUT_VERTEX(src_x[0] * intel->scale_units[0][0]); + OUT_VERTEX(src_y[0] * intel->scale_units[0][1]); +} + +static void +i965_emit_composite_primitive_identity_source_mask(intel_screen_private *intel, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) +{ + OUT_VERTEX(dstX + w); + OUT_VERTEX(dstY + h); + OUT_VERTEX((srcX + w) * intel->scale_units[0][0]); + OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); + OUT_VERTEX((maskX + w) * intel->scale_units[1][0]); + OUT_VERTEX((maskY + h) * intel->scale_units[1][1]); + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY + h); + OUT_VERTEX(srcX * intel->scale_units[0][0]); + OUT_VERTEX((srcY + h) * intel->scale_units[0][1]); + OUT_VERTEX(maskX * intel->scale_units[1][0]); + OUT_VERTEX((maskY + h) * intel->scale_units[1][1]); + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); + OUT_VERTEX(srcX * intel->scale_units[0][0]); + OUT_VERTEX(srcY * intel->scale_units[0][1]); + OUT_VERTEX(maskX * intel->scale_units[1][0]); + OUT_VERTEX(maskY * intel->scale_units[1][1]); +} + +static void +i965_emit_composite_primitive(intel_screen_private *intel, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) +{ + float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; + Bool is_affine = intel->gen4_render_state->composite_op.is_affine; + + if (is_affine) { + if (!intel_get_transformed_coordinates(srcX, srcY, + intel->transform[0], + &src_x[0], + &src_y[0])) + return; + + if (!intel_get_transformed_coordinates(srcX, srcY + h, + intel->transform[0], + &src_x[1], + &src_y[1])) + return; + + if (!intel_get_transformed_coordinates(srcX + w, srcY + h, + intel->transform[0], + &src_x[2], + &src_y[2])) + return; + } else { + if (!intel_get_transformed_coordinates_3d(srcX, srcY, + intel->transform[0], + &src_x[0], + &src_y[0], + &src_w[0])) + return; + + if (!intel_get_transformed_coordinates_3d(srcX, srcY + h, + intel->transform[0], + &src_x[1], + &src_y[1], + &src_w[1])) + return; + + if (!intel_get_transformed_coordinates_3d(srcX + w, srcY + h, + intel->transform[0], + &src_x[2], + &src_y[2], + &src_w[2])) + return; + } + + if (intel->render_mask) { + if (is_affine) { + if (!intel_get_transformed_coordinates(maskX, maskY, + intel->transform[1], + &mask_x[0], + &mask_y[0])) + return; + + if (!intel_get_transformed_coordinates(maskX, maskY + h, + intel->transform[1], + &mask_x[1], + &mask_y[1])) + return; + + if (!intel_get_transformed_coordinates(maskX + w, maskY + h, + intel->transform[1], + &mask_x[2], + &mask_y[2])) + return; + } else { + if (!intel_get_transformed_coordinates_3d(maskX, maskY, + intel->transform[1], + &mask_x[0], + &mask_y[0], + &mask_w[0])) + return; + + if (!intel_get_transformed_coordinates_3d(maskX, maskY + h, + intel->transform[1], + &mask_x[1], + &mask_y[1], + &mask_w[1])) + return; + + if (!intel_get_transformed_coordinates_3d(maskX + w, maskY + h, + intel->transform[1], + &mask_x[2], + &mask_y[2], + &mask_w[2])) + return; + } + } + + OUT_VERTEX(dstX + w); + OUT_VERTEX(dstY + h); + OUT_VERTEX(src_x[2] * intel->scale_units[0][0]); + OUT_VERTEX(src_y[2] * intel->scale_units[0][1]); + if (!is_affine) + OUT_VERTEX(src_w[2]); + if (intel->render_mask) { + OUT_VERTEX(mask_x[2] * intel->scale_units[1][0]); + OUT_VERTEX(mask_y[2] * intel->scale_units[1][1]); + if (!is_affine) + OUT_VERTEX(mask_w[2]); + } + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY + h); + OUT_VERTEX(src_x[1] * intel->scale_units[0][0]); + OUT_VERTEX(src_y[1] * intel->scale_units[0][1]); + if (!is_affine) + OUT_VERTEX(src_w[1]); + if (intel->render_mask) { + OUT_VERTEX(mask_x[1] * intel->scale_units[1][0]); + OUT_VERTEX(mask_y[1] * intel->scale_units[1][1]); + if (!is_affine) + OUT_VERTEX(mask_w[1]); + } + + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); + OUT_VERTEX(src_x[0] * intel->scale_units[0][0]); + OUT_VERTEX(src_y[0] * intel->scale_units[0][1]); + if (!is_affine) + OUT_VERTEX(src_w[0]); + if (intel->render_mask) { + OUT_VERTEX(mask_x[0] * intel->scale_units[1][0]); + OUT_VERTEX(mask_y[0] * intel->scale_units[1][1]); + if (!is_affine) + OUT_VERTEX(mask_w[0]); + } +} + +Bool +i965_prepare_composite(int op, PicturePtr source_picture, + PicturePtr mask_picture, PicturePtr dest_picture, + PixmapPtr source, PixmapPtr mask, PixmapPtr dest) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(dest_picture->pDrawable->pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + struct gen4_render_state *render_state = intel->gen4_render_state; + gen4_composite_op *composite_op = &render_state->composite_op; + + composite_op->src_filter = + sampler_state_filter_from_picture(source_picture->filter); + if (composite_op->src_filter == SS_INVALID_FILTER) { + intel_debug_fallback(scrn, "Bad src filter 0x%x\n", + source_picture->filter); + return FALSE; + } + composite_op->src_extend = + sampler_state_extend_from_picture(source_picture->repeatType); + if (composite_op->src_extend == SS_INVALID_EXTEND) { + intel_debug_fallback(scrn, "Bad src repeat 0x%x\n", + source_picture->repeatType); + return FALSE; + } + + if (mask_picture) { + if (mask_picture->componentAlpha && + PICT_FORMAT_RGB(mask_picture->format)) { + /* Check if it's component alpha that relies on a source alpha and on + * the source value. We can only get one of those into the single + * source value that we get to blend with. + */ + if (i965_blend_op[op].src_alpha && + (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) { + intel_debug_fallback(scrn, + "Component alpha not supported " + "with source alpha and source " + "value blending.\n"); + return FALSE; + } + } + + composite_op->mask_filter = + sampler_state_filter_from_picture(mask_picture->filter); + if (composite_op->mask_filter == SS_INVALID_FILTER) { + intel_debug_fallback(scrn, "Bad mask filter 0x%x\n", + mask_picture->filter); + return FALSE; + } + composite_op->mask_extend = + sampler_state_extend_from_picture(mask_picture->repeatType); + if (composite_op->mask_extend == SS_INVALID_EXTEND) { + intel_debug_fallback(scrn, "Bad mask repeat 0x%x\n", + mask_picture->repeatType); + return FALSE; + } + } else { + composite_op->mask_filter = SS_FILTER_NEAREST; + composite_op->mask_extend = SS_EXTEND_NONE; + } + + /* Flush any pending writes prior to relocating the textures. */ + if (intel_pixmap_is_dirty(source) || intel_pixmap_is_dirty(mask)) + intel_batch_emit_flush(scrn); + + composite_op->op = op; + intel->render_source_picture = source_picture; + intel->render_mask_picture = mask_picture; + intel->render_dest_picture = dest_picture; + intel->render_source = source; + intel->render_mask = mask; + intel->render_dest = dest; + + intel->scale_units[0][0] = 1. / source->drawable.width; + intel->scale_units[0][1] = 1. / source->drawable.height; + + intel->transform[0] = source_picture->transform; + composite_op->is_affine = intel_transform_is_affine(intel->transform[0]); + + if (mask_picture == NULL) { + intel->transform[1] = NULL; + intel->scale_units[1][0] = -1; + intel->scale_units[1][1] = -1; + } else { + assert(mask != NULL); + intel->transform[1] = mask_picture->transform; + intel->scale_units[1][0] = 1. / mask->drawable.width; + intel->scale_units[1][1] = 1. / mask->drawable.height; + composite_op->is_affine &= + intel_transform_is_affine(intel->transform[1]); + } + + if (mask) { + if (mask_picture->componentAlpha && + PICT_FORMAT_RGB(mask_picture->format)) { + if (i965_blend_op[op].src_alpha) { + if (composite_op->is_affine) + composite_op->wm_kernel = + WM_KERNEL_MASKCA_SRCALPHA_AFFINE; + else + composite_op->wm_kernel = + WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE; + } else { + if (composite_op->is_affine) + composite_op->wm_kernel = + WM_KERNEL_MASKCA_AFFINE; + else + composite_op->wm_kernel = + WM_KERNEL_MASKCA_PROJECTIVE; + } + } else { + if (composite_op->is_affine) + composite_op->wm_kernel = + WM_KERNEL_MASKNOCA_AFFINE; + else + composite_op->wm_kernel = + WM_KERNEL_MASKNOCA_PROJECTIVE; + } + } else { + if (composite_op->is_affine) + composite_op->wm_kernel = WM_KERNEL_NOMASK_AFFINE; + else + composite_op->wm_kernel = WM_KERNEL_NOMASK_PROJECTIVE; + } + + intel->prim_emit = i965_emit_composite_primitive; + if (!mask) { + if (intel->transform[0] == NULL) + intel->prim_emit = i965_emit_composite_primitive_identity_source; + else if (composite_op->is_affine) + intel->prim_emit = i965_emit_composite_primitive_affine_source; + } else { + if (intel->transform[0] == NULL && intel->transform[1] == NULL) + intel->prim_emit = i965_emit_composite_primitive_identity_source_mask; + } + + intel->floats_per_vertex = + 2 + (mask ? 2 : 1) * (composite_op->is_affine ? 2: 3); + + if (!i965_composite_check_aperture(intel)) { + intel_batch_submit(scrn); + if (!i965_composite_check_aperture(intel)) { + intel_debug_fallback(scrn, + "Couldn't fit render operation " + "in aperture\n"); + return FALSE; + } + } + + if (sizeof(intel->surface_data) - intel->surface_used < + 4 * SURFACE_STATE_PADDED_SIZE) + i965_surface_flush(intel); + + intel->needs_render_state_emit = TRUE; + + return TRUE; +} + +static void i965_select_vertex_buffer(struct intel_screen_private *intel) +{ + int id = intel->gen4_render_state->composite_op.vertex_id; + int modifyenable = 0; + + if (intel->vertex_id & (1 << id)) + return; + + if (INTEL_INFO(intel)->gen >= 070) + modifyenable = GEN7_VB0_ADDRESS_MODIFYENABLE; + + /* Set up the pointer to our (single) vertex buffer */ + OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); + + /* XXX could use multiple vbo to reduce relocations if + * frequently switching between vertex sizes, like rgb10text. + */ + if (INTEL_INFO(intel)->gen >= 060) { + OUT_BATCH((id << GEN6_VB0_BUFFER_INDEX_SHIFT) | + GEN6_VB0_VERTEXDATA | + modifyenable | + (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); + } else { + OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | + VB0_VERTEXDATA | + (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); + } + OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); + if (INTEL_INFO(intel)->gen >= 050) + OUT_RELOC(intel->vertex_bo, + I915_GEM_DOMAIN_VERTEX, 0, + sizeof(intel->vertex_ptr) - 1); + else + OUT_BATCH(0); + OUT_BATCH(0); // ignore for VERTEXDATA, but still there + + intel->vertex_id |= 1 << id; +} + +static void i965_bind_surfaces(struct intel_screen_private *intel) +{ + uint32_t *binding_table; + + assert(intel->surface_used + 4 * SURFACE_STATE_PADDED_SIZE <= sizeof(intel->surface_data)); + + binding_table = (uint32_t*) (intel->surface_data + intel->surface_used); + intel->surface_table = intel->surface_used; + intel->surface_used += SURFACE_STATE_PADDED_SIZE; + + binding_table[0] = + i965_set_picture_surface_state(intel, + intel->render_dest_picture, + intel->render_dest, + TRUE); + binding_table[1] = + i965_set_picture_surface_state(intel, + intel->render_source_picture, + intel->render_source, + FALSE); + if (intel->render_mask) { + binding_table[2] = + i965_set_picture_surface_state(intel, + intel->render_mask_picture, + intel->render_mask, + FALSE); + } +} + +void +i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY, + int dstX, int dstY, int w, int h) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(dest->drawable.pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + + intel_batch_start_atomic(scrn, 200); + if (intel->needs_render_state_emit) { + i965_bind_surfaces(intel); + + if (INTEL_INFO(intel)->gen >= 060) + gen6_emit_composite_state(intel); + else + i965_emit_composite_state(intel); + } + + if (intel->floats_per_vertex != intel->last_floats_per_vertex) { + intel->vertex_index = (intel->vertex_used + intel->floats_per_vertex - 1) / intel->floats_per_vertex; + intel->vertex_used = intel->vertex_index * intel->floats_per_vertex; + intel->last_floats_per_vertex = intel->floats_per_vertex; + } + if (intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) { + i965_vertex_flush(intel); + intel_next_vertex(intel); + intel->vertex_index = 0; + } + i965_select_vertex_buffer(intel); + + if (intel->vertex_offset == 0) { + if (INTEL_INFO(intel)->gen >= 070) { + OUT_BATCH(BRW_3DPRIMITIVE | (7 - 2)); + OUT_BATCH(BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | + _3DPRIM_RECTLIST); + } else { + OUT_BATCH(BRW_3DPRIMITIVE | + BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | + (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | + (0 << 9) | + 4); + } + intel->vertex_offset = intel->batch_used; + OUT_BATCH(0); /* vertex count, to be filled in later */ + OUT_BATCH(intel->vertex_index); + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + intel->vertex_count = intel->vertex_index; + } + + intel->prim_emit(intel, + srcX, srcY, + maskX, maskY, + dstX, dstY, + w, h); + intel->vertex_index += 3; + + if (INTEL_INFO(intel)->gen < 050) { + /* XXX OMG! */ + i965_vertex_flush(intel); + OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); + } + + intel_batch_end_atomic(scrn); +} + +void i965_batch_commit_notify(intel_screen_private *intel) +{ + intel->needs_render_state_emit = TRUE; + intel->needs_3d_invariant = TRUE; + intel->last_floats_per_vertex = 0; + intel->vertex_index = 0; + + intel->gen4_render_state->composite_op.vertex_id = -1; + + intel->gen6_render_state.num_sf_outputs = 0; + intel->gen6_render_state.samplers = NULL; + intel->gen6_render_state.blend = -1; + intel->gen6_render_state.kernel = NULL; + intel->gen6_render_state.drawrect = -1; + + assert(intel->surface_reloc == 0); +} + +/** + * Called at EnterVT so we can set up our offsets into the state buffer. + */ +void gen4_render_state_init(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct gen4_render_state *render; + const struct wm_kernel_info *wm_kernels; + sampler_state_filter_t src_filter; + sampler_state_extend_t src_extend; + sampler_state_filter_t mask_filter; + sampler_state_extend_t mask_extend; + drm_intel_bo *sf_kernel_bo, *sf_kernel_mask_bo; + drm_intel_bo *border_color_bo; + int m; + + intel->needs_3d_invariant = TRUE; + + intel->surface_bo = + drm_intel_bo_alloc(intel->bufmgr, "surface data", + sizeof(intel->surface_data), 4096); + assert(intel->surface_bo); + + intel->surface_used = 0; + + if (intel->gen4_render_state == NULL) { + intel->gen4_render_state = calloc(1, sizeof(*render)); + assert(intel->gen4_render_state != NULL); + } + + if (INTEL_INFO(intel)->gen >= 060) + return gen6_render_state_init(scrn); + + render = intel->gen4_render_state; + render->composite_op.vertex_id = -1; + + render->vs_state_bo = gen4_create_vs_unit_state(intel); + + /* Set up the two SF states (one for blending with a mask, one without) */ + if (IS_GEN5(intel)) { + sf_kernel_bo = intel_bo_alloc_for_data(intel, + sf_kernel_static_gen5, + sizeof + (sf_kernel_static_gen5), + "sf kernel gen5"); + sf_kernel_mask_bo = + intel_bo_alloc_for_data(intel, sf_kernel_mask_static_gen5, + sizeof(sf_kernel_mask_static_gen5), + "sf mask kernel"); + } else { + sf_kernel_bo = intel_bo_alloc_for_data(intel, + sf_kernel_static, + sizeof(sf_kernel_static), + "sf kernel"); + sf_kernel_mask_bo = intel_bo_alloc_for_data(intel, + sf_kernel_mask_static, + sizeof + (sf_kernel_mask_static), + "sf mask kernel"); + } + render->sf_state_bo = gen4_create_sf_state(intel, sf_kernel_bo); + render->sf_mask_state_bo = gen4_create_sf_state(intel, sf_kernel_mask_bo); + drm_intel_bo_unreference(sf_kernel_bo); + drm_intel_bo_unreference(sf_kernel_mask_bo); + + wm_kernels = IS_GEN5(intel) ? wm_kernels_gen5 : wm_kernels_gen4; + for (m = 0; m < KERNEL_COUNT; m++) { + render->wm_kernel_bo[m] = + intel_bo_alloc_for_data(intel, + wm_kernels[m].data, + wm_kernels[m].size, + "WM kernel"); + } + + /* Set up the WM states: each filter/extend type for source and mask, per + * kernel. + */ + border_color_bo = sampler_border_color_create(intel); + for (src_filter = 0; src_filter < FILTER_COUNT; src_filter++) { + for (src_extend = 0; src_extend < EXTEND_COUNT; src_extend++) { + for (mask_filter = 0; mask_filter < FILTER_COUNT; mask_filter++) { + for (mask_extend = 0; mask_extend < EXTEND_COUNT; mask_extend++) { + drm_intel_bo *sampler_state_bo; + + sampler_state_bo = + i965_create_sampler_state(intel, + src_filter, src_extend, + mask_filter, mask_extend, + border_color_bo); + + for (m = 0; m < KERNEL_COUNT; m++) { + render->wm_state_bo[m][src_filter][src_extend][mask_filter][mask_extend] = + gen4_create_wm_state + (intel, + wm_kernels[m]. has_mask, + render->wm_kernel_bo[m], + sampler_state_bo); + } + drm_intel_bo_unreference(sampler_state_bo); + } + } + } + } + drm_intel_bo_unreference(border_color_bo); + + render->cc_state_bo = gen4_create_cc_unit_state(intel); +} + +/** + * Called at LeaveVT. + */ +void gen4_render_state_cleanup(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct gen4_render_state *render_state = intel->gen4_render_state; + int i, j, k, l, m; + + drm_intel_bo_unreference(intel->surface_bo); + drm_intel_bo_unreference(render_state->vs_state_bo); + drm_intel_bo_unreference(render_state->sf_state_bo); + drm_intel_bo_unreference(render_state->sf_mask_state_bo); + + for (i = 0; i < KERNEL_COUNT; i++) + drm_intel_bo_unreference(render_state->wm_kernel_bo[i]); + + for (i = 0; i < FILTER_COUNT; i++) + for (j = 0; j < EXTEND_COUNT; j++) + for (k = 0; k < FILTER_COUNT; k++) + for (l = 0; l < EXTEND_COUNT; l++) + for (m = 0; m < KERNEL_COUNT; m++) + drm_intel_bo_unreference + (render_state-> + wm_state_bo[m][i][j][k] + [l]); + + for (i = 0; i < FILTER_COUNT; i++) + for (j = 0; j < EXTEND_COUNT; j++) + for (k = 0; k < FILTER_COUNT; k++) + for (l = 0; l < EXTEND_COUNT; l++) + drm_intel_bo_unreference(render_state->ps_sampler_state_bo[i][j][k][l]); + + drm_intel_bo_unreference(render_state->cc_state_bo); + + drm_intel_bo_unreference(render_state->cc_vp_bo); + drm_intel_bo_unreference(render_state->gen6_blend_bo); + drm_intel_bo_unreference(render_state->gen6_depth_stencil_bo); + + free(intel->gen4_render_state); + intel->gen4_render_state = NULL; +} + +/* + * for GEN6+ + */ +#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64) + +static drm_intel_bo * +gen6_composite_create_cc_state(intel_screen_private *intel) +{ + struct gen6_color_calc_state *state; + drm_intel_bo *cc_bo; + int ret; + + cc_bo = drm_intel_bo_alloc(intel->bufmgr, + "gen6 CC state", + sizeof(*state), + 4096); + assert(cc_bo); + + ret = drm_intel_bo_map(cc_bo, TRUE); + assert(ret == 0); + + state = memset(cc_bo->virtual, 0, sizeof(*state)); + state->constant_r = 1.0; + state->constant_g = 0.0; + state->constant_b = 1.0; + state->constant_a = 1.0; + drm_intel_bo_unmap(cc_bo); + + return cc_bo; + (void)ret; +} + +static drm_intel_bo * +gen6_composite_create_blend_state(intel_screen_private *intel) +{ + drm_intel_bo *blend_bo; + int src, dst, ret; + + blend_bo = drm_intel_bo_alloc(intel->bufmgr, + "gen6 BLEND state", + BRW_BLENDFACTOR_COUNT * BRW_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE, + 4096); + assert(blend_bo); + + ret = drm_intel_bo_map(blend_bo, TRUE); + assert(ret == 0); + + memset(blend_bo->virtual, 0, blend_bo->size); + for (src = 0; src < BRW_BLENDFACTOR_COUNT; src++) { + for (dst = 0; dst < BRW_BLENDFACTOR_COUNT; dst++) { + uint32_t blend_state_offset = (src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE; + struct gen6_blend_state *blend; + + blend = (struct gen6_blend_state *)((char *)blend_bo->virtual + blend_state_offset); + blend->blend0.dest_blend_factor = dst; + blend->blend0.source_blend_factor = src; + blend->blend0.blend_func = BRW_BLENDFUNCTION_ADD; + blend->blend0.blend_enable = 1; + + blend->blend1.post_blend_clamp_enable = 1; + blend->blend1.pre_blend_clamp_enable = 1; + } + } + + drm_intel_bo_unmap(blend_bo); + return blend_bo; + (void)ret; +} + +static drm_intel_bo * +gen6_composite_create_depth_stencil_state(intel_screen_private *intel) +{ + drm_intel_bo *depth_stencil_bo; + int ret; + + depth_stencil_bo = + drm_intel_bo_alloc(intel->bufmgr, + "gen6 DEPTH_STENCIL state", + sizeof(struct gen6_depth_stencil_state), + 4096); + assert(depth_stencil_bo); + + ret = drm_intel_bo_map(depth_stencil_bo, TRUE); + assert(ret == 0); + + memset(depth_stencil_bo->virtual, 0, + sizeof(struct gen6_depth_stencil_state)); + drm_intel_bo_unmap(depth_stencil_bo); + + return depth_stencil_bo; + (void)ret; +} + +static void +gen6_composite_state_base_address(intel_screen_private *intel) +{ + OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2)); + OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */ + intel->surface_reloc = intel->batch_used; + intel_batch_emit_dword(intel, + intel->surface_bo->offset | BASE_ADDRESS_MODIFY); + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ +} + +static void +gen6_composite_cc_state_pointers(intel_screen_private *intel, + uint32_t blend_offset) +{ + struct gen4_render_state *render_state = intel->gen4_render_state; + drm_intel_bo *cc_bo = NULL; + drm_intel_bo *depth_stencil_bo = NULL; + + if (intel->gen6_render_state.blend == blend_offset) + return; + + if (intel->gen6_render_state.blend == -1) { + cc_bo = render_state->cc_state_bo; + depth_stencil_bo = render_state->gen6_depth_stencil_bo; + } + if (INTEL_INFO(intel)->gen >= 070) { + gen7_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset); + } else { + gen6_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset); + } + + intel->gen6_render_state.blend = blend_offset; +} + +static void +gen6_composite_sampler_state_pointers(intel_screen_private *intel, + drm_intel_bo *bo) +{ + if (intel->gen6_render_state.samplers == bo) + return; + + intel->gen6_render_state.samplers = bo; + + if (INTEL_INFO(intel)->gen >= 070) + gen7_upload_sampler_state_pointers(intel, bo); + else + gen6_upload_sampler_state_pointers(intel, bo); +} + +static void +gen6_composite_wm_constants(intel_screen_private *intel) +{ + Bool ivb = INTEL_INFO(intel)->gen >= 070; + /* disable WM constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | ((ivb ? 7 : 5) - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + if (ivb) { + OUT_BATCH(0); + OUT_BATCH(0); + } +} + +static void +gen6_composite_sf_state(intel_screen_private *intel, + Bool has_mask) +{ + int num_sf_outputs = has_mask ? 2 : 1; + + if (intel->gen6_render_state.num_sf_outputs == num_sf_outputs) + return; + + intel->gen6_render_state.num_sf_outputs = num_sf_outputs; + + if (INTEL_INFO(intel)->gen >= 070) + gen7_upload_sf_state(intel, num_sf_outputs, 1); + else + gen6_upload_sf_state(intel, num_sf_outputs, 1); +} + +static void +gen6_composite_wm_state(intel_screen_private *intel, + Bool has_mask, + drm_intel_bo *bo) +{ + int num_surfaces = has_mask ? 3 : 2; + int num_sf_outputs = has_mask ? 2 : 1; + + if (intel->gen6_render_state.kernel == bo) + return; + + intel->gen6_render_state.kernel = bo; + + OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); + OUT_RELOC(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | + (num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(0); + OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ + OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | + GEN6_3DSTATE_WM_DISPATCH_ENABLE | + GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); + OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | + GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void +gen7_composite_wm_state(intel_screen_private *intel, + Bool has_mask, + drm_intel_bo *bo) +{ + int num_surfaces = has_mask ? 3 : 2; + unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB; + unsigned int num_samples = 0; + + if (IS_HSW(intel)) { + max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW; + num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW; + } + + if (intel->gen6_render_state.kernel == bo) + return; + + intel->gen6_render_state.kernel = bo; + + OUT_BATCH(GEN6_3DSTATE_WM | (3 - 2)); + OUT_BATCH(GEN7_WM_DISPATCH_ENABLE | + GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); + OUT_BATCH(0); + + OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2)); + OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | + (num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH(((48 - 1) << max_threads_shift) | num_samples | + GEN7_PS_ATTRIBUTE_ENABLE | + GEN7_PS_16_DISPATCH_ENABLE); + OUT_BATCH((6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0)); + OUT_BATCH(0); /* kernel 1 pointer */ + OUT_BATCH(0); /* kernel 2 pointer */ +} + + +static void +gen6_composite_drawing_rectangle(intel_screen_private *intel, + PixmapPtr dest) +{ + uint32_t dw = + DRAW_YMAX(dest->drawable.height - 1) | + DRAW_XMAX(dest->drawable.width - 1); + + /* XXX cacomposite depends upon the implicit non-pipelined flush */ + if (0 && intel->gen6_render_state.drawrect == dw) + return; + intel->gen6_render_state.drawrect = dw; + + OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); + OUT_BATCH(0x00000000); /* ymin, xmin */ + OUT_BATCH(dw); /* ymax, xmax */ + OUT_BATCH(0x00000000); /* yorigin, xorigin */ +} + +static void +gen6_composite_vertex_element_state(intel_screen_private *intel, + Bool has_mask, + Bool is_affine) +{ + /* + * vertex data in vertex buffer + * position: (x, y) + * texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0) + * texture coordinate 1 if (has_mask is TRUE): same as above + */ + gen4_composite_op *composite_op = &intel->gen4_render_state->composite_op; + int nelem = has_mask ? 2 : 1; + int selem = is_affine ? 2 : 3; + uint32_t w_component; + uint32_t src_format; + int id; + + id = has_mask << 1 | is_affine; + + if (composite_op->vertex_id == id) + return; + + composite_op->vertex_id = id; + + if (is_affine) { + src_format = BRW_SURFACEFORMAT_R32G32_FLOAT; + w_component = BRW_VFCOMPONENT_STORE_1_FLT; + } else { + src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT; + w_component = BRW_VFCOMPONENT_STORE_SRC; + } + + /* The VUE layout + * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) + * dword 4-7: position (x, y, 1.0, 1.0), + * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) + * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) + * + * dword 4-15 are fetched from vertex buffer + */ + OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | + ((2 * (2 + nelem)) + 1 - 2)); + + OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); + + /* x,y */ + OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */ + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + + /* u0, v0, w0 */ + OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID | + (src_format << VE0_FORMAT_SHIFT) | + ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (w_component << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + + /* u1, v1, w1 */ + if (has_mask) { + OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN6_VE0_VALID | + (src_format << VE0_FORMAT_SHIFT) | + (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (w_component << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + } +} + +static void +gen6_emit_composite_state(struct intel_screen_private *intel) +{ + struct gen4_render_state *render = intel->gen4_render_state; + gen4_composite_op *composite_op = &render->composite_op; + sampler_state_filter_t src_filter = composite_op->src_filter; + sampler_state_filter_t mask_filter = composite_op->mask_filter; + sampler_state_extend_t src_extend = composite_op->src_extend; + sampler_state_extend_t mask_extend = composite_op->mask_extend; + Bool is_affine = composite_op->is_affine; + Bool has_mask = intel->render_mask != NULL; + Bool ivb = INTEL_INFO(intel)->gen >= 070; + uint32_t src, dst; + drm_intel_bo *ps_sampler_state_bo = render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend]; + + intel->needs_render_state_emit = FALSE; + if (intel->needs_3d_invariant) { + gen6_upload_invariant_states(intel); + + if (ivb) { + gen7_upload_viewport_state_pointers(intel, render->cc_vp_bo); + gen7_upload_urb(intel); + gen7_upload_bypass_states(intel); + gen7_upload_depth_buffer_state(intel); + } else { + gen6_upload_invariant_states(intel); + gen6_upload_viewport_state_pointers(intel, render->cc_vp_bo); + gen6_upload_urb(intel); + + gen6_upload_gs_state(intel); + gen6_upload_depth_buffer_state(intel); + } + gen6_composite_wm_constants(intel); + gen6_upload_vs_state(intel); + gen6_upload_clip_state(intel); + + intel->needs_3d_invariant = FALSE; + } + + i965_get_blend_cntl(composite_op->op, + intel->render_mask_picture, + intel->render_dest_picture->format, + &src, &dst); + + if (intel->surface_reloc == 0) + gen6_composite_state_base_address(intel); + + gen6_composite_cc_state_pointers(intel, + (src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE); + gen6_composite_sampler_state_pointers(intel, ps_sampler_state_bo); + gen6_composite_sf_state(intel, has_mask); + if (ivb) { + gen7_composite_wm_state(intel, has_mask, + render->wm_kernel_bo[composite_op->wm_kernel]); + gen7_upload_binding_table(intel, intel->surface_table); + } else { + gen6_composite_wm_state(intel, has_mask, + render->wm_kernel_bo[composite_op->wm_kernel]); + gen6_upload_binding_table(intel, intel->surface_table); + } + gen6_composite_drawing_rectangle(intel, intel->render_dest); + gen6_composite_vertex_element_state(intel, has_mask, is_affine); +} + +static void +gen6_render_state_init(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct gen4_render_state *render; + sampler_state_filter_t src_filter; + sampler_state_filter_t mask_filter; + sampler_state_extend_t src_extend; + sampler_state_extend_t mask_extend; + int m; + drm_intel_bo *border_color_bo; + const struct wm_kernel_info *wm_kernels; + + render= intel->gen4_render_state; + render->composite_op.vertex_id = -1; + + intel->gen6_render_state.num_sf_outputs = 0; + intel->gen6_render_state.samplers = NULL; + intel->gen6_render_state.blend = -1; + intel->gen6_render_state.kernel = NULL; + intel->gen6_render_state.drawrect = -1; + + wm_kernels = IS_GEN7(intel) ? wm_kernels_gen7 : wm_kernels_gen6; + for (m = 0; m < KERNEL_COUNT; m++) { + render->wm_kernel_bo[m] = + intel_bo_alloc_for_data(intel, + wm_kernels[m].data, + wm_kernels[m].size, + "WM kernel gen6/7"); + } + + border_color_bo = sampler_border_color_create(intel); + + for (src_filter = 0; src_filter < FILTER_COUNT; src_filter++) { + for (src_extend = 0; src_extend < EXTEND_COUNT; src_extend++) { + for (mask_filter = 0; mask_filter < FILTER_COUNT; mask_filter++) { + for (mask_extend = 0; mask_extend < EXTEND_COUNT; mask_extend++) { + render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend] = + i965_create_sampler_state(intel, + src_filter, src_extend, + mask_filter, mask_extend, + border_color_bo); + } + } + } + } + + drm_intel_bo_unreference(border_color_bo); + render->cc_vp_bo = gen4_create_cc_viewport(intel); + render->cc_state_bo = gen6_composite_create_cc_state(intel); + render->gen6_blend_bo = gen6_composite_create_blend_state(intel); + render->gen6_depth_stencil_bo = gen6_composite_create_depth_stencil_state(intel); +} + +void i965_vertex_flush(struct intel_screen_private *intel) +{ + if (intel->vertex_offset) { + intel->batch_ptr[intel->vertex_offset] = + intel->vertex_index - intel->vertex_count; + intel->vertex_offset = 0; + } +} + +void i965_batch_flush(struct intel_screen_private *intel) +{ + if (intel->surface_used) + i965_surface_flush(intel); +} diff --git a/src/uxa/i965_video.c b/src/uxa/i965_video.c new file mode 100644 index 00000000..5706b201 --- /dev/null +++ b/src/uxa/i965_video.c @@ -0,0 +1,1939 @@ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * Keith Packard <keithp@keithp.com> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "xf86.h" +#include "xf86_OSproc.h" +#include "xf86xv.h" +#include "fourcc.h" + +#include "intel.h" +#include "intel_xvmc.h" +#include "intel_video.h" +#include "i830_reg.h" +#include "i965_reg.h" +#include "brw_defines.h" +#include "brw_structs.h" +#include <string.h> + + +/* Make assert() work. */ +#undef NDEBUG +#include <assert.h> + +static const uint32_t sip_kernel_static[][4] = { +/* wait (1) a0<1>UW a145<0,1,0>UW { align1 + } */ + {0x00000030, 0x20000108, 0x00001220, 0x00000000}, +/* nop (4) g0<1>UD { align1 + } */ + {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, +/* nop (4) g0<1>UD { align1 + } */ + {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, +/* nop (4) g0<1>UD { align1 + } */ + {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, +/* nop (4) g0<1>UD { align1 + } */ + {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, +/* nop (4) g0<1>UD { align1 + } */ + {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, +/* nop (4) g0<1>UD { align1 + } */ + {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, +/* nop (4) g0<1>UD { align1 + } */ + {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, +/* nop (4) g0<1>UD { align1 + } */ + {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, +/* nop (4) g0<1>UD { align1 + } */ + {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, +}; + +/* + * this program computes dA/dx and dA/dy for the texture coordinates along + * with the base texture coordinate. It was extracted from the Mesa driver. + * It uses about 10 GRF registers. + */ + +#define SF_KERNEL_NUM_GRF 16 +#define SF_MAX_THREADS 1 + +static const uint32_t sf_kernel_static[][4] = { +#include "exa_sf.g4b" +}; + +/* + * Ok, this kernel picks up the required data flow values in g0 and g1 + * and passes those along in m0 and m1. In m2-m9, it sticks constant + * values (bright pink). + */ + +/* Our PS kernel uses less than 32 GRF registers (about 20) */ +#define PS_KERNEL_NUM_GRF 32 +#define PS_MAX_THREADS 32 + +#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) + +static const uint32_t ps_kernel_packed_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_yuv_rgb.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_planar_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_planar.g4b" +#include "exa_wm_yuv_rgb.g4b" +#include "exa_wm_write.g4b" +}; + +/* new program for Ironlake */ +static const uint32_t sf_kernel_static_gen5[][4] = { +#include "exa_sf.g4b.gen5" +}; + +static const uint32_t ps_kernel_packed_static_gen5[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_affine.g4b.gen5" +#include "exa_wm_src_sample_argb.g4b.gen5" +#include "exa_wm_yuv_rgb.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_planar_static_gen5[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_affine.g4b.gen5" +#include "exa_wm_src_sample_planar.g4b.gen5" +#include "exa_wm_yuv_rgb.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +/* programs for Sandybridge */ +static const uint32_t ps_kernel_packed_static_gen6[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_yuv_rgb.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_planar_static_gen6[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_planar.g6b" +#include "exa_wm_yuv_rgb.g6b" +#include "exa_wm_write.g6b" +}; + +/* programs for Ivybridge */ +static const uint32_t ps_kernel_packed_static_gen7[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_argb.g7b" +#include "exa_wm_yuv_rgb.g7b" +#include "exa_wm_write.g7b" +}; + +static const uint32_t ps_kernel_planar_static_gen7[][4] = { +#include "exa_wm_src_affine.g7b" +#include "exa_wm_src_sample_planar.g7b" +#include "exa_wm_yuv_rgb.g7b" +#include "exa_wm_write.g7b" +}; + +#ifndef MAX2 +#define MAX2(a,b) ((a) > (b) ? (a) : (b)) +#endif + +#define SURFACE_STATE_PADDED_SIZE_I965 ALIGN(sizeof(struct brw_surface_state), 32) +#define SURFACE_STATE_PADDED_SIZE_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) +#define SURFACE_STATE_PADDED_SIZE MAX2(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7) +#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) + +static uint32_t float_to_uint(float f) +{ + union { + uint32_t i; + float f; + } x; + x.f = f; + return x.i; +} + +#if 0 +static struct { + uint32_t svg_ctl; + char *name; +} svg_ctl_bits[] = { + { + BRW_SVG_CTL_GS_BA, "General State Base Address"}, { + BRW_SVG_CTL_SS_BA, "Surface State Base Address"}, { + BRW_SVG_CTL_IO_BA, "Indirect Object Base Address"}, { + BRW_SVG_CTL_GS_AUB, "Generate State Access Upper Bound"}, { + BRW_SVG_CTL_IO_AUB, "Indirect Object Access Upper Bound"}, { + BRW_SVG_CTL_SIP, "System Instruction Pointer"}, { +0, 0},}; + +static void brw_debug(ScrnInfoPtr scrn, char *when) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + int i; + uint32_t v; + + ErrorF("brw_debug: %s\n", when); + for (i = 0; svg_ctl_bits[i].name; i++) { + OUTREG(BRW_SVG_CTL, svg_ctl_bits[i].svg_ctl); + v = INREG(BRW_SVG_RDATA); + ErrorF("\t%34.34s: 0x%08x\n", svg_ctl_bits[i].name, v); + } +} +#endif + +#define WATCH_SF 0 +#define WATCH_WIZ 0 +#define WATCH_STATS 0 + +static void i965_pre_draw_debug(ScrnInfoPtr scrn) +{ +#if 0 + intel_screen_private *intel = intel_get_screen_private(scrn); +#endif + +#if 0 + ErrorF("before EU_ATT 0x%08x%08x EU_ATT_DATA 0x%08x%08x\n", + INREG(BRW_EU_ATT_1), INREG(BRW_EU_ATT_0), + INREG(BRW_EU_ATT_DATA_1), INREG(BRW_EU_ATT_DATA_0)); + + OUTREG(BRW_VF_CTL, + BRW_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID | + BRW_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX | + BRW_VF_CTL_SNAPSHOT_ENABLE); + OUTREG(BRW_VF_STRG_VAL, 0); +#endif + +#if 0 + OUTREG(BRW_VS_CTL, + BRW_VS_CTL_SNAPSHOT_ALL_THREADS | + BRW_VS_CTL_SNAPSHOT_MUX_VALID_COUNT | + BRW_VS_CTL_THREAD_SNAPSHOT_ENABLE); + + OUTREG(BRW_VS_STRG_VAL, 0); +#endif + +#if WATCH_SF + OUTREG(BRW_SF_CTL, + BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT | + BRW_SF_CTL_SNAPSHOT_ALL_THREADS | + BRW_SF_CTL_THREAD_SNAPSHOT_ENABLE); + OUTREG(BRW_SF_STRG_VAL, 0); +#endif + +#if WATCH_WIZ + OUTREG(BRW_WIZ_CTL, + BRW_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE | + BRW_WIZ_CTL_SNAPSHOT_ALL_THREADS | BRW_WIZ_CTL_SNAPSHOT_ENABLE); + OUTREG(BRW_WIZ_STRG_VAL, (box_x1) | (box_y1 << 16)); +#endif + +#if 0 + OUTREG(BRW_TS_CTL, + BRW_TS_CTL_SNAPSHOT_MESSAGE_ERROR | + BRW_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS | + BRW_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS | + BRW_TS_CTL_SNAPSHOT_ENABLE); +#endif +} + +static void i965_post_draw_debug(ScrnInfoPtr scrn) +{ +#if 0 + intel_screen_private *intel = intel_get_screen_private(scrn); +#endif + +#if 0 + for (j = 0; j < 100000; j++) { + ctl = INREG(BRW_VF_CTL); + if (ctl & BRW_VF_CTL_SNAPSHOT_COMPLETE) + break; + } + + rdata = INREG(BRW_VF_RDATA); + OUTREG(BRW_VF_CTL, 0); + ErrorF("VF_CTL: 0x%08x VF_RDATA: 0x%08x\n", ctl, rdata); +#endif + +#if 0 + for (j = 0; j < 1000000; j++) { + ctl = INREG(BRW_VS_CTL); + if (ctl & BRW_VS_CTL_SNAPSHOT_COMPLETE) + break; + } + + rdata = INREG(BRW_VS_RDATA); + for (k = 0; k <= 3; k++) { + OUTREG(BRW_VS_CTL, BRW_VS_CTL_SNAPSHOT_COMPLETE | (k << 8)); + rdata = INREG(BRW_VS_RDATA); + ErrorF("VS_CTL: 0x%08x VS_RDATA(%d): 0x%08x\n", ctl, k, rdata); + } + + OUTREG(BRW_VS_CTL, 0); +#endif + +#if WATCH_SF + for (j = 0; j < 1000000; j++) { + ctl = INREG(BRW_SF_CTL); + if (ctl & BRW_SF_CTL_SNAPSHOT_COMPLETE) + break; + } + + for (k = 0; k <= 7; k++) { + OUTREG(BRW_SF_CTL, BRW_SF_CTL_SNAPSHOT_COMPLETE | (k << 8)); + rdata = INREG(BRW_SF_RDATA); + ErrorF("SF_CTL: 0x%08x SF_RDATA(%d): 0x%08x\n", ctl, k, rdata); + } + + OUTREG(BRW_SF_CTL, 0); +#endif + +#if WATCH_WIZ + for (j = 0; j < 100000; j++) { + ctl = INREG(BRW_WIZ_CTL); + if (ctl & BRW_WIZ_CTL_SNAPSHOT_COMPLETE) + break; + } + + rdata = INREG(BRW_WIZ_RDATA); + OUTREG(BRW_WIZ_CTL, 0); + ErrorF("WIZ_CTL: 0x%08x WIZ_RDATA: 0x%08x\n", ctl, rdata); +#endif + +#if 0 + for (j = 0; j < 100000; j++) { + ctl = INREG(BRW_TS_CTL); + if (ctl & BRW_TS_CTL_SNAPSHOT_COMPLETE) + break; + } + + rdata = INREG(BRW_TS_RDATA); + OUTREG(BRW_TS_CTL, 0); + ErrorF("TS_CTL: 0x%08x TS_RDATA: 0x%08x\n", ctl, rdata); + + ErrorF("after EU_ATT 0x%08x%08x EU_ATT_DATA 0x%08x%08x\n", + INREG(BRW_EU_ATT_1), INREG(BRW_EU_ATT_0), + INREG(BRW_EU_ATT_DATA_1), INREG(BRW_EU_ATT_DATA_0)); +#endif + +#if 0 + for (j = 0; j < 256; j++) { + OUTREG(BRW_TD_CTL, j << BRW_TD_CTL_MUX_SHIFT); + rdata = INREG(BRW_TD_RDATA); + ErrorF("TD_RDATA(%d): 0x%08x\n", j, rdata); + } +#endif +} + +/* For 3D, the VS must have 8, 12, 16, 24, or 32 VUEs allocated to it. + * A VUE consists of a 256-bit vertex header followed by the vertex data, + * which in our case is 4 floats (128 bits), thus a single 512-bit URB + * entry. + */ +#define URB_VS_ENTRIES 8 +#define URB_VS_ENTRY_SIZE 1 + +#define URB_GS_ENTRIES 0 +#define URB_GS_ENTRY_SIZE 0 + +#define URB_CLIP_ENTRIES 0 +#define URB_CLIP_ENTRY_SIZE 0 + +/* The SF kernel we use outputs only 4 256-bit registers, leading to an + * entry size of 2 512-bit URBs. We don't need to have many entries to + * output as we're generally working on large rectangles and don't care + * about having WM threads running on different rectangles simultaneously. + */ +#define URB_SF_ENTRIES 1 +#define URB_SF_ENTRY_SIZE 2 + +#define URB_CS_ENTRIES 0 +#define URB_CS_ENTRY_SIZE 0 + +static void i965_create_dst_surface_state(ScrnInfoPtr scrn, + PixmapPtr pixmap, + drm_intel_bo *surf_bo, + uint32_t offset) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct brw_surface_state dest_surf_state; + drm_intel_bo *pixmap_bo = intel_get_pixmap_bo(pixmap); + assert(pixmap_bo != NULL); + + memset(&dest_surf_state, 0, sizeof(dest_surf_state)); + + dest_surf_state.ss0.surface_type = BRW_SURFACE_2D; + dest_surf_state.ss0.data_return_format = + BRW_SURFACERETURNFORMAT_FLOAT32; + if (intel->cpp == 2) { + dest_surf_state.ss0.surface_format = + BRW_SURFACEFORMAT_B5G6R5_UNORM; + } else { + dest_surf_state.ss0.surface_format = + BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + } + dest_surf_state.ss0.writedisable_alpha = 0; + dest_surf_state.ss0.writedisable_red = 0; + dest_surf_state.ss0.writedisable_green = 0; + dest_surf_state.ss0.writedisable_blue = 0; + dest_surf_state.ss0.color_blend = 1; + dest_surf_state.ss0.vert_line_stride = 0; + dest_surf_state.ss0.vert_line_stride_ofs = 0; + dest_surf_state.ss0.mipmap_layout_mode = 0; + dest_surf_state.ss0.render_cache_read_mode = 0; + + dest_surf_state.ss1.base_addr = + intel_emit_reloc(surf_bo, offset + offsetof(struct brw_surface_state, ss1), + pixmap_bo, 0, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); + + dest_surf_state.ss2.height = pixmap->drawable.height - 1; + dest_surf_state.ss2.width = pixmap->drawable.width - 1; + dest_surf_state.ss2.mip_count = 0; + dest_surf_state.ss2.render_target_rotation = 0; + dest_surf_state.ss3.pitch = intel_pixmap_pitch(pixmap) - 1; + dest_surf_state.ss3.tiled_surface = intel_pixmap_tiled(pixmap); + dest_surf_state.ss3.tile_walk = 0; /* TileX */ + + dri_bo_subdata(surf_bo, + offset, sizeof(dest_surf_state), + &dest_surf_state); +} + +static void i965_create_src_surface_state(ScrnInfoPtr scrn, + drm_intel_bo * src_bo, + uint32_t src_offset, + int src_width, + int src_height, + int src_pitch, + uint32_t src_surf_format, + drm_intel_bo *surface_bo, + uint32_t offset) +{ + struct brw_surface_state src_surf_state; + + memset(&src_surf_state, 0, sizeof(src_surf_state)); + + /* Set up the source surface state buffer */ + src_surf_state.ss0.surface_type = BRW_SURFACE_2D; + src_surf_state.ss0.surface_format = src_surf_format; + src_surf_state.ss0.writedisable_alpha = 0; + src_surf_state.ss0.writedisable_red = 0; + src_surf_state.ss0.writedisable_green = 0; + src_surf_state.ss0.writedisable_blue = 0; + src_surf_state.ss0.color_blend = 1; + src_surf_state.ss0.vert_line_stride = 0; + src_surf_state.ss0.vert_line_stride_ofs = 0; + src_surf_state.ss0.mipmap_layout_mode = 0; + src_surf_state.ss0.render_cache_read_mode = 0; + + src_surf_state.ss2.width = src_width - 1; + src_surf_state.ss2.height = src_height - 1; + src_surf_state.ss2.mip_count = 0; + src_surf_state.ss2.render_target_rotation = 0; + src_surf_state.ss3.pitch = src_pitch - 1; + + if (src_bo) { + src_surf_state.ss1.base_addr = + intel_emit_reloc(surface_bo, + offset + offsetof(struct brw_surface_state, ss1), + src_bo, src_offset, + I915_GEM_DOMAIN_SAMPLER, 0); + } else { + src_surf_state.ss1.base_addr = src_offset; + } + + dri_bo_subdata(surface_bo, + offset, sizeof(src_surf_state), + &src_surf_state); +} + +static void gen7_create_dst_surface_state(ScrnInfoPtr scrn, + PixmapPtr pixmap, + drm_intel_bo *surf_bo, + uint32_t offset) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct gen7_surface_state dest_surf_state; + drm_intel_bo *pixmap_bo = intel_get_pixmap_bo(pixmap); + assert(pixmap_bo != NULL); + + memset(&dest_surf_state, 0, sizeof(dest_surf_state)); + + dest_surf_state.ss0.surface_type = BRW_SURFACE_2D; + dest_surf_state.ss0.tiled_surface = intel_pixmap_tiled(pixmap); + dest_surf_state.ss0.tile_walk = 0; /* TileX */ + + if (intel->cpp == 2) { + dest_surf_state.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + } else { + dest_surf_state.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + } + + dest_surf_state.ss1.base_addr = + intel_emit_reloc(surf_bo, + offset + offsetof(struct gen7_surface_state, ss1), + pixmap_bo, 0, + I915_GEM_DOMAIN_SAMPLER, 0); + + dest_surf_state.ss2.height = pixmap->drawable.height - 1; + dest_surf_state.ss2.width = pixmap->drawable.width - 1; + + dest_surf_state.ss3.pitch = intel_pixmap_pitch(pixmap) - 1; + + if (IS_HSW(intel)) { + dest_surf_state.ss7.shader_chanel_select_r = HSW_SCS_RED; + dest_surf_state.ss7.shader_chanel_select_g = HSW_SCS_GREEN; + dest_surf_state.ss7.shader_chanel_select_b = HSW_SCS_BLUE; + dest_surf_state.ss7.shader_chanel_select_a = HSW_SCS_ALPHA; + } + + dri_bo_subdata(surf_bo, + offset, sizeof(dest_surf_state), + &dest_surf_state); +} + +static void gen7_create_src_surface_state(ScrnInfoPtr scrn, + drm_intel_bo * src_bo, + uint32_t src_offset, + int src_width, + int src_height, + int src_pitch, + uint32_t src_surf_format, + drm_intel_bo *surface_bo, + uint32_t offset) +{ + intel_screen_private * const intel = intel_get_screen_private(scrn); + struct gen7_surface_state src_surf_state; + + memset(&src_surf_state, 0, sizeof(src_surf_state)); + + src_surf_state.ss0.surface_type = BRW_SURFACE_2D; + src_surf_state.ss0.surface_format = src_surf_format; + + if (src_bo) { + src_surf_state.ss1.base_addr = + intel_emit_reloc(surface_bo, + offset + offsetof(struct gen7_surface_state, ss1), + src_bo, src_offset, + I915_GEM_DOMAIN_SAMPLER, 0); + } else { + src_surf_state.ss1.base_addr = src_offset; + } + + src_surf_state.ss2.width = src_width - 1; + src_surf_state.ss2.height = src_height - 1; + + src_surf_state.ss3.pitch = src_pitch - 1; + + if (IS_HSW(intel)) { + src_surf_state.ss7.shader_chanel_select_r = HSW_SCS_RED; + src_surf_state.ss7.shader_chanel_select_g = HSW_SCS_GREEN; + src_surf_state.ss7.shader_chanel_select_b = HSW_SCS_BLUE; + src_surf_state.ss7.shader_chanel_select_a = HSW_SCS_ALPHA; + } + + dri_bo_subdata(surface_bo, + offset, sizeof(src_surf_state), + &src_surf_state); +} + +static void i965_create_binding_table(ScrnInfoPtr scrn, + drm_intel_bo *bind_bo, + int n_surf) +{ + uint32_t binding_table[n_surf]; + int i; + + /* Set up a binding table for our surfaces. Only the PS will use it */ + for (i = 0; i < n_surf; i++) + binding_table[i] = i * SURFACE_STATE_PADDED_SIZE; + + dri_bo_subdata(bind_bo, + n_surf * SURFACE_STATE_PADDED_SIZE, + sizeof(binding_table), binding_table); +} + +static drm_intel_bo *i965_create_sampler_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct brw_sampler_state sampler_state; + + memset(&sampler_state, 0, sizeof(sampler_state)); + sampler_state.ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler_state.ss0.mag_filter = BRW_MAPFILTER_LINEAR; + sampler_state.ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler_state.ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler_state.ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + + return intel_bo_alloc_for_data(intel, + &sampler_state, sizeof(sampler_state), + "textured video sampler state"); +} + +static drm_intel_bo *gen7_create_sampler_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct gen7_sampler_state sampler_state; + + memset(&sampler_state, 0, sizeof(sampler_state)); + sampler_state.ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler_state.ss0.mag_filter = BRW_MAPFILTER_LINEAR; + sampler_state.ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler_state.ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler_state.ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + + return intel_bo_alloc_for_data(intel, + &sampler_state, sizeof(sampler_state), + "textured video sampler state"); +} + +static drm_intel_bo *i965_create_vs_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct brw_vs_unit_state vs_state; + + /* Set up the vertex shader to be disabled (passthrough) */ + memset(&vs_state, 0, sizeof(vs_state)); + if (IS_GEN5(intel)) + vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; + else + vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES; + vs_state.thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; + vs_state.vs6.vs_enable = 0; + vs_state.vs6.vert_cache_disable = 1; + + return intel_bo_alloc_for_data(intel, + &vs_state, sizeof(vs_state), + "textured video vs state"); +} + +static drm_intel_bo *i965_create_program(ScrnInfoPtr scrn, + const uint32_t * program, + unsigned int program_size) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + return intel_bo_alloc_for_data(intel, + program, program_size, + "textured video program"); +} + +static drm_intel_bo *i965_create_sf_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + drm_intel_bo *sf_bo, *kernel_bo; + struct brw_sf_unit_state sf_state; + + if (IS_GEN5(intel)) + kernel_bo = i965_create_program(scrn, + &sf_kernel_static_gen5[0][0], + sizeof(sf_kernel_static_gen5)); + else + kernel_bo = i965_create_program(scrn, + &sf_kernel_static[0][0], + sizeof(sf_kernel_static)); + if (!kernel_bo) + return NULL; + + sf_bo = drm_intel_bo_alloc(intel->bufmgr, + "textured video sf state", 4096, + sizeof(sf_state)); + if (sf_bo == NULL) { + drm_intel_bo_unreference(kernel_bo); + return NULL; + } + + /* Set up the SF kernel to do coord interp: for each attribute, + * calculate dA/dx and dA/dy. Hand these interpolation coefficients + * back to SF which then hands pixels off to WM. + */ + memset(&sf_state, 0, sizeof(sf_state)); + sf_state.thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); + sf_state.thread0.kernel_start_pointer = + intel_emit_reloc(sf_bo, offsetof(struct brw_sf_unit_state, thread0), + kernel_bo, sf_state.thread0.grf_reg_count << 1, + I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; + sf_state.sf1.single_program_flow = 1; /* XXX */ + sf_state.sf1.binding_table_entry_count = 0; + sf_state.sf1.thread_priority = 0; + sf_state.sf1.floating_point_mode = 0; /* Mesa does this */ + sf_state.sf1.illegal_op_exception_enable = 1; + sf_state.sf1.mask_stack_exception_enable = 1; + sf_state.sf1.sw_exception_enable = 1; + sf_state.thread2.per_thread_scratch_space = 0; + /* scratch space is not used in our kernel */ + sf_state.thread2.scratch_space_base_pointer = 0; + sf_state.thread3.const_urb_entry_read_length = 0; /* no const URBs */ + sf_state.thread3.const_urb_entry_read_offset = 0; /* no const URBs */ + sf_state.thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ + sf_state.thread3.urb_entry_read_offset = 0; + sf_state.thread3.dispatch_grf_start_reg = 3; + sf_state.thread4.max_threads = SF_MAX_THREADS - 1; + sf_state.thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; + sf_state.thread4.nr_urb_entries = URB_SF_ENTRIES; + sf_state.thread4.stats_enable = 1; + sf_state.sf5.viewport_transform = FALSE; /* skip viewport */ + sf_state.sf6.cull_mode = BRW_CULLMODE_NONE; + sf_state.sf6.scissor = 0; + sf_state.sf7.trifan_pv = 2; + sf_state.sf6.dest_org_vbias = 0x8; + sf_state.sf6.dest_org_hbias = 0x8; + + dri_bo_subdata(sf_bo, 0, sizeof(sf_state), &sf_state); + return sf_bo; +} + +static drm_intel_bo *i965_create_wm_state(ScrnInfoPtr scrn, + drm_intel_bo * sampler_bo, + Bool is_packed) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + drm_intel_bo *wm_bo, *kernel_bo; + struct brw_wm_unit_state wm_state; + + if (is_packed) { + if (IS_GEN5(intel)) + kernel_bo = + i965_create_program(scrn, + &ps_kernel_packed_static_gen5[0] + [0], + sizeof + (ps_kernel_packed_static_gen5)); + else + kernel_bo = + i965_create_program(scrn, + &ps_kernel_packed_static[0][0], + sizeof + (ps_kernel_packed_static)); + } else { + if (IS_GEN5(intel)) + kernel_bo = + i965_create_program(scrn, + &ps_kernel_planar_static_gen5[0] + [0], + sizeof + (ps_kernel_planar_static_gen5)); + else + kernel_bo = + i965_create_program(scrn, + &ps_kernel_planar_static[0][0], + sizeof + (ps_kernel_planar_static)); + } + if (!kernel_bo) + return NULL; + + wm_bo = drm_intel_bo_alloc(intel->bufmgr, + "textured video wm state", + sizeof(wm_state), 0); + if (wm_bo == NULL) { + drm_intel_bo_unreference(kernel_bo); + return NULL; + } + + memset(&wm_state, 0, sizeof(wm_state)); + wm_state.thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); + wm_state.thread0.kernel_start_pointer = + intel_emit_reloc(wm_bo, offsetof(struct brw_wm_unit_state, thread0), + kernel_bo, wm_state.thread0.grf_reg_count << 1, + I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; + wm_state.thread1.single_program_flow = 1; /* XXX */ + if (is_packed) + wm_state.thread1.binding_table_entry_count = 2; + else + wm_state.thread1.binding_table_entry_count = 7; + + /* binding table entry count is only used for prefetching, and it has to + * be set 0 for Ironlake + */ + if (IS_GEN5(intel)) + wm_state.thread1.binding_table_entry_count = 0; + + /* Though we never use the scratch space in our WM kernel, it has to be + * set, and the minimum allocation is 1024 bytes. + */ + wm_state.thread2.scratch_space_base_pointer = 0; + wm_state.thread2.per_thread_scratch_space = 0; /* 1024 bytes */ + wm_state.thread3.dispatch_grf_start_reg = 3; /* XXX */ + wm_state.thread3.const_urb_entry_read_length = 0; + wm_state.thread3.const_urb_entry_read_offset = 0; + wm_state.thread3.urb_entry_read_length = 1; /* XXX */ + wm_state.thread3.urb_entry_read_offset = 0; /* XXX */ + wm_state.wm4.stats_enable = 1; + wm_state.wm4.sampler_state_pointer = + intel_emit_reloc(wm_bo, offsetof(struct brw_wm_unit_state, wm4), + sampler_bo, 0, + I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; + if (IS_GEN5(intel)) + wm_state.wm4.sampler_count = 0; + else + wm_state.wm4.sampler_count = 1; /* 1-4 samplers used */ + wm_state.wm5.max_threads = PS_MAX_THREADS - 1; + wm_state.wm5.thread_dispatch_enable = 1; + wm_state.wm5.enable_16_pix = 1; + wm_state.wm5.enable_8_pix = 0; + wm_state.wm5.early_depth_test = 1; + + dri_bo_subdata(wm_bo, 0, sizeof(wm_state), &wm_state); + drm_intel_bo_unreference(kernel_bo); + return wm_bo; +} + +static drm_intel_bo *i965_create_cc_vp_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct brw_cc_viewport cc_viewport; + + memset(&cc_viewport, 0, sizeof(cc_viewport)); + cc_viewport.min_depth = -1.e35; + cc_viewport.max_depth = 1.e35; + + return intel_bo_alloc_for_data(intel, + &cc_viewport, sizeof(cc_viewport), + "textured video cc viewport"); +} + +static drm_intel_bo *i965_create_cc_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + drm_intel_bo *cc_bo, *cc_vp_bo; + struct brw_cc_unit_state cc_state; + + cc_vp_bo = i965_create_cc_vp_state(scrn); + if (!cc_vp_bo) + return NULL; + + cc_bo = drm_intel_bo_alloc(intel->bufmgr, + "textured video cc state", + sizeof(cc_state), 0); + if (cc_bo == NULL){ + drm_intel_bo_unreference(cc_vp_bo); + return NULL; + } + + /* Color calculator state */ + memset(&cc_state, 0, sizeof(cc_state)); + cc_state.cc0.stencil_enable = 0; /* disable stencil */ + cc_state.cc2.depth_test = 0; /* disable depth test */ + cc_state.cc2.logicop_enable = 1; /* enable logic op */ + cc_state.cc3.ia_blend_enable = 1; /* blend alpha just like colors */ + cc_state.cc3.blend_enable = 0; /* disable color blend */ + cc_state.cc3.alpha_test = 0; /* disable alpha test */ + cc_state.cc4.cc_viewport_state_offset = + intel_emit_reloc(cc_bo, offsetof(struct brw_cc_unit_state, cc4), + cc_vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; + cc_state.cc5.dither_enable = 0; /* disable dither */ + cc_state.cc5.logicop_func = 0xc; /* WHITE */ + cc_state.cc5.statistics_enable = 1; + cc_state.cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; + cc_state.cc5.ia_src_blend_factor = BRW_BLENDFACTOR_ONE; + cc_state.cc5.ia_dest_blend_factor = BRW_BLENDFACTOR_ONE; + + dri_bo_subdata(cc_bo, 0, sizeof(cc_state), &cc_state); + drm_intel_bo_unreference(cc_vp_bo); + + return cc_bo; +} + +static void +i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * surface_state_binding_table_bo, int n_src_surf, PixmapPtr pixmap) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + int urb_vs_start, urb_vs_size; + int urb_gs_start, urb_gs_size; + int urb_clip_start, urb_clip_size; + int urb_sf_start, urb_sf_size; + int urb_cs_start, urb_cs_size; + int pipe_ctl; + + IntelEmitInvarientState(scrn); + intel->last_3d = LAST_3D_VIDEO; + intel->needs_3d_invariant = TRUE; + + urb_vs_start = 0; + urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; + urb_gs_start = urb_vs_start + urb_vs_size; + urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; + urb_clip_start = urb_gs_start + urb_gs_size; + urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; + urb_sf_start = urb_clip_start + urb_clip_size; + urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; + urb_cs_start = urb_sf_start + urb_sf_size; + urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; + + OUT_BATCH(MI_FLUSH | + MI_STATE_INSTRUCTION_CACHE_FLUSH | + BRW_MI_GLOBAL_SNAPSHOT_RESET); + OUT_BATCH(MI_NOOP); + + /* brw_debug (scrn, "before base address modify"); */ + /* Match Mesa driver setup */ + if (INTEL_INFO(intel)->gen >= 045) + OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); + else + OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); + + /* Mesa does this. Who knows... */ + OUT_BATCH(BRW_CS_URB_STATE | 0); + OUT_BATCH((0 << 4) | /* URB Entry Allocation Size */ + (0 << 0)); /* Number of URB Entries */ + + /* Zero out the two base address registers so all offsets are + * absolute + */ + if (IS_GEN5(intel)) { + OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6); + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ + OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Instruction base address */ + /* general state max addr, disabled */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); + /* media object state max addr, disabled */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); + /* Instruction max addr, disabled */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); + } else { + OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4); + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ + OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ + /* general state max addr, disabled */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); + /* media object state max addr, disabled */ + OUT_BATCH(0 | BASE_ADDRESS_MODIFY); + } + + /* Set system instruction pointer */ + OUT_BATCH(BRW_STATE_SIP | 0); + /* system instruction pointer */ + OUT_RELOC(intel->video.gen4_sip_kernel_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + + /* brw_debug (scrn, "after base address modify"); */ + + if (IS_GEN5(intel)) + pipe_ctl = BRW_PIPE_CONTROL_NOWRITE; + else + pipe_ctl = BRW_PIPE_CONTROL_NOWRITE | BRW_PIPE_CONTROL_IS_FLUSH; + + /* Pipe control */ + OUT_BATCH(BRW_PIPE_CONTROL | pipe_ctl | 2); + OUT_BATCH(0); /* Destination address */ + OUT_BATCH(0); /* Immediate data low DW */ + OUT_BATCH(0); /* Immediate data high DW */ + + /* Binding table pointers */ + OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); + OUT_BATCH(0); /* vs */ + OUT_BATCH(0); /* gs */ + OUT_BATCH(0); /* clip */ + OUT_BATCH(0); /* sf */ + /* Only the PS uses the binding table */ + OUT_BATCH((n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE); + + /* Blend constant color (magenta is fun) */ + OUT_BATCH(BRW_3DSTATE_CONSTANT_COLOR | 3); + OUT_BATCH(float_to_uint(1.0)); + OUT_BATCH(float_to_uint(0.0)); + OUT_BATCH(float_to_uint(1.0)); + OUT_BATCH(float_to_uint(1.0)); + + /* The drawing rectangle clipping is always on. Set it to values that + * shouldn't do any clipping. + */ + OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); /* XXX 3 for BLC or CTG */ + OUT_BATCH(0x00000000); /* ymin, xmin */ + OUT_BATCH((pixmap->drawable.width - 1) | (pixmap->drawable.height - 1) << 16); /* ymax, xmax */ + OUT_BATCH(0x00000000); /* yorigin, xorigin */ + + /* skip the depth buffer */ + /* skip the polygon stipple */ + /* skip the polygon stipple offset */ + /* skip the line stipple */ + + /* Set the pointers to the 3d pipeline state */ + OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5); + OUT_RELOC(intel->video.gen4_vs_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + /* disable GS, resulting in passthrough */ + OUT_BATCH(BRW_GS_DISABLE); + /* disable CLIP, resulting in passthrough */ + OUT_BATCH(BRW_CLIP_DISABLE); + OUT_RELOC(intel->video.gen4_sf_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + if (n_src_surf == 1) + OUT_RELOC(intel->video.gen4_wm_packed_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + else + OUT_RELOC(intel->video.gen4_wm_planar_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(intel->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + + /* URB fence */ + OUT_BATCH(BRW_URB_FENCE | + UF0_CS_REALLOC | + UF0_SF_REALLOC | + UF0_CLIP_REALLOC | UF0_GS_REALLOC | UF0_VS_REALLOC | 1); + OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | + ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | + ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); + OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | + ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); + + /* Constant buffer state */ + OUT_BATCH(BRW_CS_URB_STATE | 0); + OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) | (URB_CS_ENTRIES << 0)); + + /* Set up our vertex elements, sourced from the single vertex buffer. */ + + if (IS_GEN5(intel)) { + OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 3); + /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ + OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) + | (BRW_VFCOMPONENT_STORE_SRC << + VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << + VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << + VE1_VFCOMPONENT_3_SHIFT)); + /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ + OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (8 << VE0_OFFSET_SHIFT)); + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) + | (BRW_VFCOMPONENT_STORE_SRC << + VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << + VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << + VE1_VFCOMPONENT_3_SHIFT)); + } else { + OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 3); + /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ + OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) + | (BRW_VFCOMPONENT_STORE_SRC << + VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << + VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << + VE1_VFCOMPONENT_3_SHIFT) | (0 << + VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); + /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ + OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | + VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (8 << VE0_OFFSET_SHIFT)); + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) + | (BRW_VFCOMPONENT_STORE_SRC << + VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << + VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << + VE1_VFCOMPONENT_3_SHIFT) | (4 << + VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); + } +} + +void +I965DisplayVideoTextured(ScrnInfoPtr scrn, + intel_adaptor_private *adaptor_priv, int id, + RegionPtr dstRegion, + short width, short height, + int video_pitch, int video_pitch2, + short src_w, short src_h, + short drw_w, short drw_h, PixmapPtr pixmap) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + BoxPtr pbox; + int nbox, dxo, dyo, pix_xoff, pix_yoff; + float src_scale_x, src_scale_y; + int src_surf; + int n_src_surf; + uint32_t src_surf_format; + uint32_t src_surf_base[6]; + int src_width[6]; + int src_height[6]; + int src_pitch[6]; + drm_intel_bo *surface_state_binding_table_bo; + +#if 0 + ErrorF("BroadwaterDisplayVideoTextured: %dx%d (pitch %d)\n", width, + height, video_pitch); +#endif + +#if 0 + /* enable debug */ + OUTREG(INST_PM, (1 << (16 + 4)) | (1 << 4)); + ErrorF("INST_PM 0x%08x\n", INREG(INST_PM)); +#endif + + src_surf_base[0] = adaptor_priv->YBufOffset; + src_surf_base[1] = adaptor_priv->YBufOffset; + src_surf_base[2] = adaptor_priv->VBufOffset; + src_surf_base[3] = adaptor_priv->VBufOffset; + src_surf_base[4] = adaptor_priv->UBufOffset; + src_surf_base[5] = adaptor_priv->UBufOffset; +#if 0 + ErrorF("base 0 0x%x base 1 0x%x base 2 0x%x\n", + src_surf_base[0], src_surf_base[1], src_surf_base[2]); +#endif + + if (is_planar_fourcc(id)) { + src_surf_format = BRW_SURFACEFORMAT_R8_UNORM; + src_width[1] = src_width[0] = width; + src_height[1] = src_height[0] = height; + src_pitch[1] = src_pitch[0] = video_pitch2; + src_width[4] = src_width[5] = src_width[2] = src_width[3] = + width / 2; + src_height[4] = src_height[5] = src_height[2] = src_height[3] = + height / 2; + src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = + video_pitch; + n_src_surf = 6; + } else { + if (id == FOURCC_UYVY) + src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY; + else + src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL; + + src_width[0] = width; + src_height[0] = height; + src_pitch[0] = video_pitch; + n_src_surf = 1; + } + +#if 0 + ErrorF("dst surf: 0x%08x\n", state_base_offset + dest_surf_offset); + ErrorF("src surf: 0x%08x\n", state_base_offset + src_surf_offset); +#endif + + /* We'll be poking the state buffers that could be in use by the 3d + * hardware here, but we should have synced the 3D engine already in + * I830PutImage. + */ + + surface_state_binding_table_bo = + drm_intel_bo_alloc(intel->bufmgr, + "surface state & binding table", + (n_src_surf + 1) * (SURFACE_STATE_PADDED_SIZE + sizeof(uint32_t)), + 4096); + + if (!surface_state_binding_table_bo) + return; + + i965_create_dst_surface_state(scrn, pixmap, surface_state_binding_table_bo, 0); + + for (src_surf = 0; src_surf < n_src_surf; src_surf++) { + i965_create_src_surface_state(scrn, + adaptor_priv->buf, + src_surf_base[src_surf], + src_width[src_surf], + src_height[src_surf], + src_pitch[src_surf], + src_surf_format, + surface_state_binding_table_bo, + (src_surf + 1) * SURFACE_STATE_PADDED_SIZE); + } + + i965_create_binding_table(scrn, surface_state_binding_table_bo, n_src_surf + 1); + + if (intel->video.gen4_sampler_bo == NULL) + intel->video.gen4_sampler_bo = i965_create_sampler_state(scrn); + if (intel->video.gen4_sip_kernel_bo == NULL) { + intel->video.gen4_sip_kernel_bo = + i965_create_program(scrn, &sip_kernel_static[0][0], + sizeof(sip_kernel_static)); + if (!intel->video.gen4_sip_kernel_bo) { + drm_intel_bo_unreference(surface_state_binding_table_bo); + return; + } + } + + if (intel->video.gen4_vs_bo == NULL) { + intel->video.gen4_vs_bo = i965_create_vs_state(scrn); + if (!intel->video.gen4_vs_bo) { + drm_intel_bo_unreference(surface_state_binding_table_bo); + return; + } + } + if (intel->video.gen4_sf_bo == NULL) { + intel->video.gen4_sf_bo = i965_create_sf_state(scrn); + if (!intel->video.gen4_sf_bo) { + drm_intel_bo_unreference(surface_state_binding_table_bo); + return; + } + } + if (intel->video.gen4_wm_packed_bo == NULL) { + intel->video.gen4_wm_packed_bo = + i965_create_wm_state(scrn, intel->video.gen4_sampler_bo, + TRUE); + if (!intel->video.gen4_wm_packed_bo) { + drm_intel_bo_unreference(surface_state_binding_table_bo); + return; + } + } + + if (intel->video.gen4_wm_planar_bo == NULL) { + intel->video.gen4_wm_planar_bo = + i965_create_wm_state(scrn, intel->video.gen4_sampler_bo, + FALSE); + if (!intel->video.gen4_wm_planar_bo) { + drm_intel_bo_unreference(surface_state_binding_table_bo); + return; + } + } + + if (intel->video.gen4_cc_bo == NULL) { + intel->video.gen4_cc_bo = i965_create_cc_state(scrn); + if (!intel->video.gen4_cc_bo) { + drm_intel_bo_unreference(surface_state_binding_table_bo); + return; + } + } + + /* Set up the offset for translating from the given region (in screen + * coordinates) to the backing pixmap. + */ +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + + dxo = dstRegion->extents.x1; + dyo = dstRegion->extents.y1; + + /* Use normalized texture coordinates */ + src_scale_x = ((float)src_w / width) / (float)drw_w; + src_scale_y = ((float)src_h / height) / (float)drw_h; + + pbox = REGION_RECTS(dstRegion); + nbox = REGION_NUM_RECTS(dstRegion); + while (nbox--) { + int box_x1 = pbox->x1; + int box_y1 = pbox->y1; + int box_x2 = pbox->x2; + int box_y2 = pbox->y2; + int i; + float vb[12]; + drm_intel_bo *bo_table[] = { + NULL, /* vb_bo */ + intel->batch_bo, + surface_state_binding_table_bo, + intel->video.gen4_sampler_bo, + intel->video.gen4_sip_kernel_bo, + intel->video.gen4_vs_bo, + intel->video.gen4_sf_bo, + intel->video.gen4_wm_packed_bo, + intel->video.gen4_wm_planar_bo, + intel->video.gen4_cc_bo, + }; + + pbox++; + + i = 0; + vb[i++] = (box_x2 - dxo) * src_scale_x; + vb[i++] = (box_y2 - dyo) * src_scale_y; + vb[i++] = (float)box_x2 + pix_xoff; + vb[i++] = (float)box_y2 + pix_yoff; + + vb[i++] = (box_x1 - dxo) * src_scale_x; + vb[i++] = (box_y2 - dyo) * src_scale_y; + vb[i++] = (float)box_x1 + pix_xoff; + vb[i++] = (float)box_y2 + pix_yoff; + + vb[i++] = (box_x1 - dxo) * src_scale_x; + vb[i++] = (box_y1 - dyo) * src_scale_y; + vb[i++] = (float)box_x1 + pix_xoff; + vb[i++] = (float)box_y1 + pix_yoff; + + bo_table[0] = intel_bo_alloc_for_data(intel, + vb, sizeof(vb), + "textured video vbo"); + + if (IS_GEN4(intel)) + i965_pre_draw_debug(scrn); + + /* If this command won't fit in the current batch, flush. + * Assume that it does after being flushed. + */ + if (drm_intel_bufmgr_check_aperture_space(bo_table, + ARRAY_SIZE(bo_table)) + < 0) { + intel_batch_submit(scrn); + } + + intel_batch_start_atomic(scrn, 150); + + i965_emit_video_setup(scrn, surface_state_binding_table_bo, n_src_surf, pixmap); + + /* Set up the pointer to our vertex buffer */ + OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); + /* four 32-bit floats per vertex */ + OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) | + VB0_VERTEXDATA | ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); + OUT_RELOC(bo_table[0], I915_GEM_DOMAIN_VERTEX, 0, 0); + if (IS_GEN5(intel)) + OUT_RELOC(bo_table[0], I915_GEM_DOMAIN_VERTEX, 0, + i * 4); + else + OUT_BATCH(3); /* four corners to our rectangle */ + OUT_BATCH(0); /* reserved */ + + OUT_BATCH(BRW_3DPRIMITIVE | BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | (0 << 9) | /* CTG - indirect vertex count */ + 4); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); /* start vertex offset */ + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + OUT_BATCH(MI_NOOP); + + intel_batch_end_atomic(scrn); + + drm_intel_bo_unreference(bo_table[0]); + + if (IS_GEN4(intel)) + i965_post_draw_debug(scrn); + + } + + /* release reference once we're finished */ + drm_intel_bo_unreference(surface_state_binding_table_bo); + + intel_debug_flush(scrn); +} + +void i965_free_video(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + drm_intel_bo_unreference(intel->video.gen4_vs_bo); + intel->video.gen4_vs_bo = NULL; + drm_intel_bo_unreference(intel->video.gen4_sf_bo); + intel->video.gen4_sf_bo = NULL; + drm_intel_bo_unreference(intel->video.gen4_cc_bo); + intel->video.gen4_cc_bo = NULL; + drm_intel_bo_unreference(intel->video.gen4_wm_packed_bo); + intel->video.gen4_wm_packed_bo = NULL; + drm_intel_bo_unreference(intel->video.gen4_wm_planar_bo); + intel->video.gen4_wm_planar_bo = NULL; + drm_intel_bo_unreference(intel->video.gen4_cc_vp_bo); + intel->video.gen4_cc_vp_bo = NULL; + drm_intel_bo_unreference(intel->video.gen4_sampler_bo); + intel->video.gen4_sampler_bo = NULL; + drm_intel_bo_unreference(intel->video.gen4_sip_kernel_bo); + intel->video.gen4_sip_kernel_bo = NULL; + drm_intel_bo_unreference(intel->video.wm_prog_packed_bo); + intel->video.wm_prog_packed_bo = NULL; + drm_intel_bo_unreference(intel->video.wm_prog_planar_bo); + intel->video.wm_prog_planar_bo = NULL; + drm_intel_bo_unreference(intel->video.gen6_blend_bo); + intel->video.gen6_blend_bo = NULL; + drm_intel_bo_unreference(intel->video.gen6_depth_stencil_bo); + intel->video.gen6_depth_stencil_bo = NULL; +} + +/* for GEN6+ */ +static drm_intel_bo * +gen6_create_cc_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct gen6_color_calc_state cc_state; + + memset(&cc_state, 0, sizeof(cc_state)); + cc_state.constant_r = 1.0; + cc_state.constant_g = 0.0; + cc_state.constant_b = 1.0; + cc_state.constant_a = 1.0; + + return intel_bo_alloc_for_data(intel, + &cc_state, sizeof(cc_state), + "textured video cc state"); +} + +static drm_intel_bo * +gen6_create_blend_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct gen6_blend_state blend_state; + + memset(&blend_state, 0, sizeof(blend_state)); + blend_state.blend1.logic_op_enable = 1; + blend_state.blend1.logic_op_func = 0xc; + blend_state.blend1.pre_blend_clamp_enable = 1; + + return intel_bo_alloc_for_data(intel, + &blend_state, sizeof(blend_state), + "textured video blend state"); +} + +static drm_intel_bo * +gen6_create_depth_stencil_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct gen6_depth_stencil_state depth_stencil_state; + + memset(&depth_stencil_state, 0, sizeof(depth_stencil_state)); + return intel_bo_alloc_for_data(intel, + &depth_stencil_state, + sizeof(depth_stencil_state), + "textured video blend state"); +} + +static Bool +gen6_create_vidoe_objects(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + drm_intel_bo *(*create_sampler_state)(ScrnInfoPtr); + const uint32_t *packed_ps_kernel, *planar_ps_kernel; + unsigned int packed_ps_size, planar_ps_size; + + if (INTEL_INFO(intel)->gen >= 070) { + create_sampler_state = gen7_create_sampler_state; + packed_ps_kernel = &ps_kernel_packed_static_gen7[0][0]; + packed_ps_size = sizeof(ps_kernel_packed_static_gen7); + planar_ps_kernel = &ps_kernel_planar_static_gen7[0][0]; + planar_ps_size = sizeof(ps_kernel_planar_static_gen7); + } else { + create_sampler_state = i965_create_sampler_state; + packed_ps_kernel = &ps_kernel_packed_static_gen6[0][0]; + packed_ps_size = sizeof(ps_kernel_packed_static_gen6); + planar_ps_kernel = &ps_kernel_planar_static_gen6[0][0]; + planar_ps_size = sizeof(ps_kernel_planar_static_gen6); + } + + if (intel->video.gen4_sampler_bo == NULL) + intel->video.gen4_sampler_bo = create_sampler_state(scrn); + + if (intel->video.wm_prog_packed_bo == NULL) + intel->video.wm_prog_packed_bo = + i965_create_program(scrn, + packed_ps_kernel, + packed_ps_size); + + if (intel->video.wm_prog_planar_bo == NULL) + intel->video.wm_prog_planar_bo = + i965_create_program(scrn, + planar_ps_kernel, + planar_ps_size); + + if (intel->video.gen4_cc_vp_bo == NULL) + intel->video.gen4_cc_vp_bo = i965_create_cc_vp_state(scrn); + + if (intel->video.gen4_cc_bo == NULL) + intel->video.gen4_cc_bo = gen6_create_cc_state(scrn); + + if (intel->video.gen6_blend_bo == NULL) + intel->video.gen6_blend_bo = gen6_create_blend_state(scrn); + + if (intel->video.gen6_depth_stencil_bo == NULL) + intel->video.gen6_depth_stencil_bo = gen6_create_depth_stencil_state(scrn); + + + return (intel->video.gen4_sampler_bo != NULL && + intel->video.wm_prog_packed_bo != NULL && + intel->video.wm_prog_planar_bo != NULL && + intel->video.gen4_cc_vp_bo != NULL && + intel->video.gen4_cc_bo != NULL && + intel->video.gen6_blend_bo != NULL && + intel->video.gen6_depth_stencil_bo != NULL); +} + +static void +gen6_upload_state_base_address(ScrnInfoPtr scrn, drm_intel_bo *surface_state_binding_table_bo) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2)); + OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */ + OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ + OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ +} + +static void +gen6_upload_drawing_rectangle(ScrnInfoPtr scrn, PixmapPtr pixmap) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); + OUT_BATCH(0x00000000); /* ymin, xmin */ + OUT_BATCH((pixmap->drawable.width - 1) | (pixmap->drawable.height - 1) << 16); /* ymax, xmax */ + OUT_BATCH(0x00000000); /* yorigin, xorigin */ +} + +static void +gen6_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + /* disable WM constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); + if (is_packed) { + OUT_RELOC(intel->video.wm_prog_packed_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | + (2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + } else { + OUT_RELOC(intel->video.wm_prog_planar_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | + (7 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + } + OUT_BATCH(0); + OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ + OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | + GEN6_3DSTATE_WM_DISPATCH_ENABLE | + GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); + OUT_BATCH((1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | + GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void +gen6_upload_vertex_element_state(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + /* Set up our vertex elements, sourced from the single vertex buffer. */ + OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | (5 - 2)); + /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ + OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN6_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ + OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | + GEN6_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | + (8 << VE0_OFFSET_SHIFT)); + OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); +} + +static void +gen6_upload_vertex_buffer(ScrnInfoPtr scrn, drm_intel_bo *vertex_bo, uint32_t end_address_offset) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + /* Set up the pointer to our vertex buffer */ + OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | (5 - 2)); + /* four 32-bit floats per vertex */ + OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) | + GEN6_VB0_VERTEXDATA | + ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); + OUT_RELOC(vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); + OUT_RELOC(vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, end_address_offset); + OUT_BATCH(0); /* reserved */ +} + +static void +gen6_upload_primitive(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + OUT_BATCH(BRW_3DPRIMITIVE | + BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | + (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | + (0 << 9) | /* Internal Vertex Count */ + (6 - 2)); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); /* start vertex offset */ + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ +} + +static void +gen6_emit_video_setup(ScrnInfoPtr scrn, + drm_intel_bo *surface_state_binding_table_bo, int n_src_surf, + PixmapPtr pixmap, + drm_intel_bo *vertex_bo, uint32_t end_address_offset) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + assert(n_src_surf == 1 || n_src_surf == 6); + IntelEmitInvarientState(scrn); + intel->last_3d = LAST_3D_VIDEO; + intel->needs_3d_invariant = TRUE; + + gen6_upload_invariant_states(intel); + gen6_upload_state_base_address(scrn, surface_state_binding_table_bo); + gen6_upload_viewport_state_pointers(intel, intel->video.gen4_cc_vp_bo); + gen6_upload_urb(intel); + gen6_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo, 0); + gen6_upload_sampler_state_pointers(intel, intel->video.gen4_sampler_bo); + gen6_upload_vs_state(intel); + gen6_upload_gs_state(intel); + gen6_upload_clip_state(intel); + gen6_upload_sf_state(intel, 1, 0); + gen6_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE); + gen6_upload_binding_table(intel, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE); + gen6_upload_depth_buffer_state(intel); + gen6_upload_drawing_rectangle(scrn, pixmap); + gen6_upload_vertex_element_state(scrn); + gen6_upload_vertex_buffer(scrn, vertex_bo, end_address_offset); + gen6_upload_primitive(scrn); +} + +static void +gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB; + unsigned int num_samples = 0; + + if (IS_HSW(intel)) { + max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW; + num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW; + } + + /* disable WM constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_WM | (3 - 2)); + OUT_BATCH(GEN7_WM_DISPATCH_ENABLE | + GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); + OUT_BATCH(0); + + OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2)); + + if (is_packed) { + OUT_RELOC(intel->video.wm_prog_packed_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | + (2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + } else { + OUT_RELOC(intel->video.wm_prog_planar_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | + (7 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + } + + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH( + ((48 - 1) << max_threads_shift) | num_samples | + GEN7_PS_ATTRIBUTE_ENABLE | + GEN7_PS_16_DISPATCH_ENABLE); + OUT_BATCH( + (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0)); + OUT_BATCH(0); /* kernel 1 pointer */ + OUT_BATCH(0); /* kernel 2 pointer */ +} + +static void +gen7_upload_vertex_buffer(ScrnInfoPtr scrn, drm_intel_bo *vertex_bo, uint32_t end_address_offset) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + /* Set up the pointer to our vertex buffer */ + OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | (5 - 2)); + /* four 32-bit floats per vertex */ + OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) | + GEN6_VB0_VERTEXDATA | + GEN7_VB0_ADDRESS_MODIFYENABLE | + ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); + OUT_RELOC(vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); + OUT_RELOC(vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, end_address_offset); + OUT_BATCH(0); /* reserved */ +} + +static void +gen7_upload_primitive(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + OUT_BATCH(BRW_3DPRIMITIVE | (7 - 2)); + OUT_BATCH(_3DPRIM_RECTLIST | + GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); /* start vertex offset */ + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); +} + +static void +gen7_emit_video_setup(ScrnInfoPtr scrn, + drm_intel_bo *surface_state_binding_table_bo, int n_src_surf, + PixmapPtr pixmap, + drm_intel_bo *vertex_bo, uint32_t end_address_offset) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + assert(n_src_surf == 1 || n_src_surf == 6); + IntelEmitInvarientState(scrn); + intel->last_3d = LAST_3D_VIDEO; + intel->needs_3d_invariant = TRUE; + + gen6_upload_invariant_states(intel); + gen6_upload_state_base_address(scrn, surface_state_binding_table_bo); + gen7_upload_viewport_state_pointers(intel, intel->video.gen4_cc_vp_bo); + gen7_upload_urb(intel); + gen7_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo, 0); + gen7_upload_sampler_state_pointers(intel, intel->video.gen4_sampler_bo); + gen7_upload_bypass_states(intel); + gen6_upload_vs_state(intel); + gen6_upload_clip_state(intel); + gen7_upload_sf_state(intel, 1, 0); + gen7_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE); + gen7_upload_binding_table(intel, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE); + gen7_upload_depth_buffer_state(intel); + gen6_upload_drawing_rectangle(scrn, pixmap); + gen6_upload_vertex_element_state(scrn); + gen7_upload_vertex_buffer(scrn, vertex_bo, end_address_offset); + gen7_upload_primitive(scrn); +} + +void Gen6DisplayVideoTextured(ScrnInfoPtr scrn, + intel_adaptor_private *adaptor_priv, int id, + RegionPtr dstRegion, + short width, short height, + int video_pitch, int video_pitch2, + short src_w, short src_h, + short drw_w, short drw_h, PixmapPtr pixmap) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + BoxPtr pbox; + int nbox, dxo, dyo, pix_xoff, pix_yoff; + float src_scale_x, src_scale_y; + int src_surf; + int n_src_surf; + uint32_t src_surf_format; + uint32_t src_surf_base[6]; + int src_width[6]; + int src_height[6]; + int src_pitch[6]; + drm_intel_bo *surface_state_binding_table_bo; + void (*create_dst_surface_state)(ScrnInfoPtr, + PixmapPtr, + drm_intel_bo *, + uint32_t); + void (*create_src_surface_state)(ScrnInfoPtr, + drm_intel_bo *, + uint32_t, int, + int, int, uint32_t, + drm_intel_bo *, uint32_t); + void (*emit_video_setup)(ScrnInfoPtr, + drm_intel_bo *, int, + PixmapPtr, + drm_intel_bo *, uint32_t); + + if (INTEL_INFO(intel)->gen >= 070) { + create_dst_surface_state = gen7_create_dst_surface_state; + create_src_surface_state = gen7_create_src_surface_state; + emit_video_setup = gen7_emit_video_setup; + } else { + create_dst_surface_state = i965_create_dst_surface_state; + create_src_surface_state = i965_create_src_surface_state; + emit_video_setup = gen6_emit_video_setup; + } + + src_surf_base[0] = adaptor_priv->YBufOffset; + src_surf_base[1] = adaptor_priv->YBufOffset; + src_surf_base[2] = adaptor_priv->VBufOffset; + src_surf_base[3] = adaptor_priv->VBufOffset; + src_surf_base[4] = adaptor_priv->UBufOffset; + src_surf_base[5] = adaptor_priv->UBufOffset; + + if (is_planar_fourcc(id)) { + src_surf_format = BRW_SURFACEFORMAT_R8_UNORM; + src_width[1] = src_width[0] = width; + src_height[1] = src_height[0] = height; + src_pitch[1] = src_pitch[0] = video_pitch2; + src_width[4] = src_width[5] = src_width[2] = src_width[3] = + width / 2; + src_height[4] = src_height[5] = src_height[2] = src_height[3] = + height / 2; + src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = + video_pitch; + n_src_surf = 6; + } else { + if (id == FOURCC_UYVY) + src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY; + else + src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL; + + src_width[0] = width; + src_height[0] = height; + src_pitch[0] = video_pitch; + n_src_surf = 1; + } + + surface_state_binding_table_bo = + drm_intel_bo_alloc(intel->bufmgr, + "surface state & binding table", + (n_src_surf + 1) * (SURFACE_STATE_PADDED_SIZE + sizeof(uint32_t)), + 4096); + + if (!surface_state_binding_table_bo) + return; + + create_dst_surface_state(scrn, pixmap, surface_state_binding_table_bo, 0); + + for (src_surf = 0; src_surf < n_src_surf; src_surf++) { + create_src_surface_state(scrn, + adaptor_priv->buf, + src_surf_base[src_surf], + src_width[src_surf], + src_height[src_surf], + src_pitch[src_surf], + src_surf_format, + surface_state_binding_table_bo, + (src_surf + 1) * SURFACE_STATE_PADDED_SIZE); + } + + i965_create_binding_table(scrn, surface_state_binding_table_bo, n_src_surf + 1); + + if (!gen6_create_vidoe_objects(scrn)) { + drm_intel_bo_unreference(surface_state_binding_table_bo); + return; + } + + /* Set up the offset for translating from the given region (in screen + * coordinates) to the backing pixmap. + */ +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + + dxo = dstRegion->extents.x1; + dyo = dstRegion->extents.y1; + + /* Use normalized texture coordinates */ + src_scale_x = ((float)src_w / width) / (float)drw_w; + src_scale_y = ((float)src_h / height) / (float)drw_h; + + pbox = REGION_RECTS(dstRegion); + nbox = REGION_NUM_RECTS(dstRegion); + while (nbox--) { + int box_x1 = pbox->x1; + int box_y1 = pbox->y1; + int box_x2 = pbox->x2; + int box_y2 = pbox->y2; + int i; + float vb[12]; + drm_intel_bo *bo_table[] = { + NULL, /* vb_bo */ + intel->batch_bo, + surface_state_binding_table_bo, + intel->video.gen4_sampler_bo, + intel->video.wm_prog_packed_bo, + intel->video.wm_prog_planar_bo, + intel->video.gen4_cc_vp_bo, + intel->video.gen4_cc_bo, + intel->video.gen6_blend_bo, + intel->video.gen6_depth_stencil_bo, + }; + + pbox++; + + i = 0; + vb[i++] = (box_x2 - dxo) * src_scale_x; + vb[i++] = (box_y2 - dyo) * src_scale_y; + vb[i++] = (float)box_x2 + pix_xoff; + vb[i++] = (float)box_y2 + pix_yoff; + + vb[i++] = (box_x1 - dxo) * src_scale_x; + vb[i++] = (box_y2 - dyo) * src_scale_y; + vb[i++] = (float)box_x1 + pix_xoff; + vb[i++] = (float)box_y2 + pix_yoff; + + vb[i++] = (box_x1 - dxo) * src_scale_x; + vb[i++] = (box_y1 - dyo) * src_scale_y; + vb[i++] = (float)box_x1 + pix_xoff; + vb[i++] = (float)box_y1 + pix_yoff; + + bo_table[0] = intel_bo_alloc_for_data(intel, + vb, sizeof(vb), + "video vbo"); + + /* If this command won't fit in the current batch, flush. + * Assume that it does after being flushed. + */ + if (drm_intel_bufmgr_check_aperture_space(bo_table, ARRAY_SIZE(bo_table)) < 0) + intel_batch_submit(scrn); + + intel_batch_start_atomic(scrn, 200); + emit_video_setup(scrn, surface_state_binding_table_bo, n_src_surf, pixmap, bo_table[0], i * 4); + intel_batch_end_atomic(scrn); + + drm_intel_bo_unreference(bo_table[0]); + } + + /* release reference once we're finished */ + drm_intel_bo_unreference(surface_state_binding_table_bo); + intel_debug_flush(scrn); +} diff --git a/src/uxa/intel.h b/src/uxa/intel.h new file mode 100644 index 00000000..d4c9aff2 --- /dev/null +++ b/src/uxa/intel.h @@ -0,0 +1,663 @@ +/************************************************************************** + +Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. +Copyright © 2002 David Dawes + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * David Dawes <dawes@xfree86.org> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#if 0 +#define I830DEBUG +#endif + +#include <stdint.h> + +#ifndef REMAP_RESERVED +#define REMAP_RESERVED 0 +#endif + +#ifndef _I830_H_ +#define _I830_H_ + +#include "xf86_OSproc.h" +#include "compiler.h" +#include "xf86Pci.h" +#include "xf86Cursor.h" +#include "xf86xv.h" +#include "xf86Crtc.h" +#include "xf86RandR12.h" + +#include "xorg-server.h" +#include <pciaccess.h> + +#define _XF86DRI_SERVER_ +#include "dri2.h" +#include "intel_bufmgr.h" +#include "i915_drm.h" + +#include "intel_driver.h" +#include "intel_options.h" +#include "intel_list.h" +#include "compat-api.h" + +#if HAVE_UDEV +#include <libudev.h> +#endif + +/* remain compatible to xorg-server 1.6 */ +#ifndef MONITOR_EDID_COMPLETE_RAWDATA +#define MONITOR_EDID_COMPLETE_RAWDATA EDID_COMPLETE_RAWDATA +#endif + +#if XF86_CRTC_VERSION >= 5 +#define INTEL_PIXMAP_SHARING 1 +#endif + +struct intel_pixmap { + dri_bo *bo; + + struct list batch; + + uint16_t stride; + uint8_t tiling; + int8_t busy :2; + uint8_t dirty :1; + uint8_t offscreen :1; + uint8_t pinned :3; +#define PIN_SCANOUT 0x1 +#define PIN_DRI 0x2 +#define PIN_GLAMOR 0x4 +}; + +#if HAS_DEVPRIVATEKEYREC +extern DevPrivateKeyRec uxa_pixmap_index; +#else +extern int uxa_pixmap_index; +#endif + +static inline struct intel_pixmap *intel_get_pixmap_private(PixmapPtr pixmap) +{ +#if HAS_DEVPRIVATEKEYREC + return dixGetPrivate(&pixmap->devPrivates, &uxa_pixmap_index); +#else + return dixLookupPrivate(&pixmap->devPrivates, &uxa_pixmap_index); +#endif +} + +static inline Bool intel_pixmap_is_busy(struct intel_pixmap *priv) +{ + if (priv->busy == -1) + priv->busy = drm_intel_bo_busy(priv->bo); + return priv->busy; +} + +static inline void intel_set_pixmap_private(PixmapPtr pixmap, struct intel_pixmap *intel) +{ + dixSetPrivate(&pixmap->devPrivates, &uxa_pixmap_index, intel); +} + +static inline Bool intel_pixmap_is_dirty(PixmapPtr pixmap) +{ + return pixmap && intel_get_pixmap_private(pixmap)->dirty; +} + +static inline Bool intel_pixmap_tiled(PixmapPtr pixmap) +{ + return intel_get_pixmap_private(pixmap)->tiling != I915_TILING_NONE; +} + +dri_bo *intel_get_pixmap_bo(PixmapPtr pixmap); +void intel_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo); + +#include "common.h" + +#define PITCH_NONE 0 + +/** enumeration of 3d consumers so some can maintain invariant state. */ +enum last_3d { + LAST_3D_OTHER, + LAST_3D_VIDEO, + LAST_3D_RENDER, + LAST_3D_ROTATION +}; + +enum dri_type { + DRI_DISABLED, + DRI_NONE, + DRI_DRI2 +}; + +typedef struct intel_screen_private { + ScrnInfoPtr scrn; + int cpp; + +#define RENDER_BATCH I915_EXEC_RENDER +#define BLT_BATCH I915_EXEC_BLT + unsigned int current_batch; + + void *modes; + drm_intel_bo *front_buffer, *back_buffer; + PixmapPtr back_pixmap; + unsigned int back_name; + long front_pitch, front_tiling; + + dri_bufmgr *bufmgr; + + uint32_t batch_ptr[4096]; + /** Byte offset in batch_ptr for the next dword to be emitted. */ + unsigned int batch_used; + /** Position in batch_ptr at the start of the current BEGIN_BATCH */ + unsigned int batch_emit_start; + /** Number of bytes to be emitted in the current BEGIN_BATCH. */ + uint32_t batch_emitting; + dri_bo *batch_bo, *last_batch_bo[2]; + /** Whether we're in a section of code that can't tolerate flushing */ + Bool in_batch_atomic; + /** Ending batch_used that was verified by intel_start_batch_atomic() */ + int batch_atomic_limit; + struct list batch_pixmaps; + drm_intel_bo *wa_scratch_bo; + OsTimerPtr cache_expire; + + /* For Xvideo */ + Bool use_overlay; +#ifdef INTEL_XVMC + /* For XvMC */ + Bool XvMCEnabled; +#endif + + CreateScreenResourcesProcPtr CreateScreenResources; + + Bool shadow_present; + + unsigned int tiling; +#define INTEL_TILING_FB 0x1 +#define INTEL_TILING_2D 0x2 +#define INTEL_TILING_3D 0x4 +#define INTEL_TILING_ALL (~0) + + Bool swapbuffers_wait; + Bool has_relaxed_fencing; + + int Chipset; + EntityInfoPtr pEnt; + struct pci_device *PciInfo; + const struct intel_device_info *info; + + unsigned int BR[20]; + + CloseScreenProcPtr CloseScreen; + + void (*context_switch) (struct intel_screen_private *intel, + int new_mode); + void (*vertex_flush) (struct intel_screen_private *intel); + void (*batch_flush) (struct intel_screen_private *intel); + void (*batch_commit_notify) (struct intel_screen_private *intel); + + struct _UxaDriver *uxa_driver; + int uxa_flags; + Bool need_sync; + int accel_pixmap_offset_alignment; + int accel_max_x; + int accel_max_y; + int max_bo_size; + int max_gtt_map_size; + int max_tiling_size; + + Bool XvDisabled; /* Xv disabled in PreInit. */ + Bool XvEnabled; /* Xv enabled for this generation. */ + Bool XvPreferOverlay; + + int colorKey; + XF86VideoAdaptorPtr adaptor; + ScreenBlockHandlerProcPtr BlockHandler; + Bool overlayOn; + + struct { + drm_intel_bo *gen4_vs_bo; + drm_intel_bo *gen4_sf_bo; + drm_intel_bo *gen4_wm_packed_bo; + drm_intel_bo *gen4_wm_planar_bo; + drm_intel_bo *gen4_cc_bo; + drm_intel_bo *gen4_cc_vp_bo; + drm_intel_bo *gen4_sampler_bo; + drm_intel_bo *gen4_sip_kernel_bo; + drm_intel_bo *wm_prog_packed_bo; + drm_intel_bo *wm_prog_planar_bo; + drm_intel_bo *gen6_blend_bo; + drm_intel_bo *gen6_depth_stencil_bo; + } video; + + /* Render accel state */ + float scale_units[2][2]; + /** Transform pointers for src/mask, or NULL if identity */ + PictTransform *transform[2]; + + PixmapPtr render_source, render_mask, render_dest; + PicturePtr render_source_picture, render_mask_picture, render_dest_picture; + Bool needs_3d_invariant; + Bool needs_render_state_emit; + Bool needs_render_vertex_emit; + + /* i830 render accel state */ + uint32_t render_dest_format; + uint32_t cblend, ablend, s8_blendctl; + + /* i915 render accel state */ + PixmapPtr texture[2]; + uint32_t mapstate[6]; + uint32_t samplerstate[6]; + + struct { + int op; + uint32_t dst_format; + } i915_render_state; + + struct { + int num_sf_outputs; + int drawrect; + uint32_t blend; + dri_bo *samplers; + dri_bo *kernel; + } gen6_render_state; + + uint32_t prim_offset; + void (*prim_emit)(struct intel_screen_private *intel, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h); + int floats_per_vertex; + int last_floats_per_vertex; + uint16_t vertex_offset; + uint16_t vertex_count; + uint16_t vertex_index; + uint16_t vertex_used; + uint32_t vertex_id; + float vertex_ptr[4*1024]; + dri_bo *vertex_bo; + + uint8_t surface_data[16*1024]; + uint16_t surface_used; + uint16_t surface_table; + uint32_t surface_reloc; + dri_bo *surface_bo; + + /* 965 render acceleration state */ + struct gen4_render_state *gen4_render_state; + + enum dri_type directRenderingType; /* DRI enabled this generation. */ + + Bool directRenderingOpen; + int drmSubFD; + char *deviceName; + + Bool use_pageflipping; + Bool use_triple_buffer; + Bool force_fallback; + Bool has_kernel_flush; + Bool needs_flush; + + struct _DRI2FrameEvent *pending_flip[2]; + + /* Broken-out options. */ + OptionInfoPtr Options; + + /* Driver phase/state information */ + Bool suspended; + + enum last_3d last_3d; + + /** + * User option to print acceleration fallback info to the server log. + */ + Bool fallback_debug; + unsigned debug_flush; +#if HAVE_UDEV + struct udev_monitor *uevent_monitor; + InputHandlerProc uevent_handler; +#endif + Bool has_prime_vmap_flush; +} intel_screen_private; + +#ifndef I915_PARAM_HAS_PRIME_VMAP_FLUSH +#define I915_PARAM_HAS_PRIME_VMAP_FLUSH 21 +#endif + +enum { + DEBUG_FLUSH_BATCHES = 0x1, + DEBUG_FLUSH_CACHES = 0x2, + DEBUG_FLUSH_WAIT = 0x4, +}; + +extern Bool intel_mode_pre_init(ScrnInfoPtr pScrn, int fd, int cpp); +extern void intel_mode_init(struct intel_screen_private *intel); +extern void intel_mode_disable_unused_functions(ScrnInfoPtr scrn); +extern void intel_mode_remove_fb(intel_screen_private *intel); +extern void intel_mode_close(intel_screen_private *intel); +extern void intel_mode_fini(intel_screen_private *intel); + +extern int intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, xf86CrtcPtr crtc); +extern int intel_crtc_id(xf86CrtcPtr crtc); +extern int intel_output_dpms_status(xf86OutputPtr output); +extern void intel_copy_fb(ScrnInfoPtr scrn); + +enum DRI2FrameEventType { + DRI2_SWAP, + DRI2_SWAP_CHAIN, + DRI2_FLIP, + DRI2_WAITMSC, +}; + +#if XORG_VERSION_CURRENT <= XORG_VERSION_NUMERIC(1,7,99,3,0) +typedef void (*DRI2SwapEventPtr)(ClientPtr client, void *data, int type, + CARD64 ust, CARD64 msc, CARD64 sbc); +#endif + +typedef struct _DRI2FrameEvent { + struct intel_screen_private *intel; + + XID drawable_id; + ClientPtr client; + enum DRI2FrameEventType type; + int frame; + int pipe; + + struct list drawable_resource, client_resource; + + /* for swaps & flips only */ + DRI2SwapEventPtr event_complete; + void *event_data; + DRI2BufferPtr front; + DRI2BufferPtr back; + + struct _DRI2FrameEvent *chain; +} DRI2FrameEventRec, *DRI2FrameEventPtr; + +extern Bool intel_do_pageflip(intel_screen_private *intel, + dri_bo *new_front, + DRI2FrameEventPtr flip_info, int ref_crtc_hw_id); + +static inline intel_screen_private * +intel_get_screen_private(ScrnInfoPtr scrn) +{ + return (intel_screen_private *)(scrn->driverPrivate); +} + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#endif + +#ifndef ALIGN +#define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1)) +#endif + +#ifndef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + +static inline unsigned long intel_pixmap_pitch(PixmapPtr pixmap) +{ + return (unsigned long)pixmap->devKind; +} + +/* Batchbuffer support macros and functions */ +#include "intel_batchbuffer.h" + +/* I830 specific functions */ +extern void IntelEmitInvarientState(ScrnInfoPtr scrn); +extern void I830EmitInvarientState(ScrnInfoPtr scrn); +extern void I915EmitInvarientState(ScrnInfoPtr scrn); + +extern void I830EmitFlush(ScrnInfoPtr scrn); + +extern void I830InitVideo(ScreenPtr pScreen); +extern xf86CrtcPtr intel_covering_crtc(ScrnInfoPtr scrn, BoxPtr box, + xf86CrtcPtr desired, BoxPtr crtc_box_ret); + +Bool I830DRI2ScreenInit(ScreenPtr pScreen); +void I830DRI2CloseScreen(ScreenPtr pScreen); +void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, + unsigned int tv_usec, DRI2FrameEventPtr flip_info); +void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec, + unsigned int tv_usec, DRI2FrameEventPtr flip_info); + +extern Bool intel_crtc_on(xf86CrtcPtr crtc); +int intel_crtc_to_pipe(xf86CrtcPtr crtc); + +/* intel_memory.c */ +unsigned long intel_get_fence_size(intel_screen_private *intel, unsigned long size); +unsigned long intel_get_fence_pitch(intel_screen_private *intel, unsigned long pitch, + uint32_t tiling_mode); + +drm_intel_bo *intel_allocate_framebuffer(ScrnInfoPtr scrn, + int w, int h, int cpp, + unsigned long *pitch, + uint32_t *tiling); + +/* i830_render.c */ +Bool i830_check_composite(int op, + PicturePtr sourcec, PicturePtr mask, PicturePtr dest, + int width, int height); +Bool i830_check_composite_target(PixmapPtr pixmap); +Bool i830_check_composite_texture(ScreenPtr screen, PicturePtr picture); +Bool i830_prepare_composite(int op, PicturePtr sourcec, PicturePtr mask, + PicturePtr dest, PixmapPtr sourcecPixmap, + PixmapPtr maskPixmap, PixmapPtr destPixmap); +void i830_composite(PixmapPtr dest, int srcX, int srcY, + int maskX, int maskY, int dstX, int dstY, int w, int h); +void i830_vertex_flush(intel_screen_private *intel); + +/* i915_render.c */ +Bool i915_check_composite(int op, + PicturePtr sourcec, PicturePtr mask, PicturePtr dest, + int width, int height); +Bool i915_check_composite_target(PixmapPtr pixmap); +Bool i915_check_composite_texture(ScreenPtr screen, PicturePtr picture); +Bool i915_prepare_composite(int op, PicturePtr sourcec, PicturePtr mask, + PicturePtr dest, PixmapPtr sourcecPixmap, + PixmapPtr maskPixmap, PixmapPtr destPixmap); +void i915_composite(PixmapPtr dest, int srcX, int srcY, + int maskX, int maskY, int dstX, int dstY, int w, int h); +void i915_vertex_flush(intel_screen_private *intel); +void i915_batch_commit_notify(intel_screen_private *intel); +void i830_batch_commit_notify(intel_screen_private *intel); +/* i965_render.c */ +unsigned int gen4_render_state_size(ScrnInfoPtr scrn); +void gen4_render_state_init(ScrnInfoPtr scrn); +void gen4_render_state_cleanup(ScrnInfoPtr scrn); +Bool i965_check_composite(int op, + PicturePtr sourcec, PicturePtr mask, PicturePtr dest, + int width, int height); +Bool i965_check_composite_texture(ScreenPtr screen, PicturePtr picture); +Bool i965_prepare_composite(int op, PicturePtr sourcec, PicturePtr mask, + PicturePtr dest, PixmapPtr sourcecPixmap, + PixmapPtr maskPixmap, PixmapPtr destPixmap); +void i965_composite(PixmapPtr dest, int srcX, int srcY, + int maskX, int maskY, int dstX, int dstY, int w, int h); + +void i965_vertex_flush(intel_screen_private *intel); +void i965_batch_flush(intel_screen_private *intel); +void i965_batch_commit_notify(intel_screen_private *intel); + +/* i965_3d.c */ +void gen6_upload_invariant_states(intel_screen_private *intel); +void gen6_upload_viewport_state_pointers(intel_screen_private *intel, + drm_intel_bo *cc_vp_bo); +void gen7_upload_viewport_state_pointers(intel_screen_private *intel, + drm_intel_bo *cc_vp_bo); +void gen6_upload_urb(intel_screen_private *intel); +void gen7_upload_urb(intel_screen_private *intel); +void gen6_upload_cc_state_pointers(intel_screen_private *intel, + drm_intel_bo *blend_bo, drm_intel_bo *cc_bo, + drm_intel_bo *depth_stencil_bo, + uint32_t blend_offset); +void gen7_upload_cc_state_pointers(intel_screen_private *intel, + drm_intel_bo *blend_bo, drm_intel_bo *cc_bo, + drm_intel_bo *depth_stencil_bo, + uint32_t blend_offset); +void gen6_upload_sampler_state_pointers(intel_screen_private *intel, + drm_intel_bo *sampler_bo); +void gen7_upload_sampler_state_pointers(intel_screen_private *intel, + drm_intel_bo *sampler_bo); +void gen7_upload_bypass_states(intel_screen_private *intel); +void gen6_upload_gs_state(intel_screen_private *intel); +void gen6_upload_vs_state(intel_screen_private *intel); +void gen6_upload_clip_state(intel_screen_private *intel); +void gen6_upload_sf_state(intel_screen_private *intel, int num_sf_outputs, int read_offset); +void gen7_upload_sf_state(intel_screen_private *intel, int num_sf_outputs, int read_offset); +void gen6_upload_binding_table(intel_screen_private *intel, uint32_t ps_binding_table_offset); +void gen7_upload_binding_table(intel_screen_private *intel, uint32_t ps_binding_table_offset); +void gen6_upload_depth_buffer_state(intel_screen_private *intel); +void gen7_upload_depth_buffer_state(intel_screen_private *intel); + +Bool intel_transform_is_affine(PictTransformPtr t); +Bool +intel_get_transformed_coordinates(int x, int y, PictTransformPtr transform, + float *x_out, float *y_out); + +Bool +intel_get_transformed_coordinates_3d(int x, int y, PictTransformPtr transform, + float *x_out, float *y_out, float *z_out); + +static inline void +intel_debug_fallback(ScrnInfoPtr scrn, const char *format, ...) _X_ATTRIBUTE_PRINTF(2, 3); + +static inline void +intel_debug_fallback(ScrnInfoPtr scrn, const char *format, ...) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + va_list ap; + + va_start(ap, format); + if (intel->fallback_debug) { + xf86DrvMsg(scrn->scrnIndex, X_INFO, "fallback: "); + LogVMessageVerb(X_INFO, 1, format, ap); + } + va_end(ap); +} + +static inline Bool +intel_check_pitch_2d(PixmapPtr pixmap) +{ + uint32_t pitch = intel_pixmap_pitch(pixmap); + if (pitch > KB(32)) { + ScrnInfoPtr scrn = xf86ScreenToScrn(pixmap->drawable.pScreen); + intel_debug_fallback(scrn, "pitch exceeds 2d limit 32K\n"); + return FALSE; + } + return TRUE; +} + +/* For pre-965 chip only, as they have 8KB limit for 3D */ +static inline Bool +intel_check_pitch_3d(PixmapPtr pixmap) +{ + uint32_t pitch = intel_pixmap_pitch(pixmap); + if (pitch > KB(8)) { + ScrnInfoPtr scrn = xf86ScreenToScrn(pixmap->drawable.pScreen); + intel_debug_fallback(scrn, "pitch exceeds 3d limit 8K\n"); + return FALSE; + } + return TRUE; +} + +/** + * Little wrapper around drm_intel_bo_reloc to return the initial value you + * should stuff into the relocation entry. + * + * If only we'd done this before settling on the library API. + */ +static inline uint32_t +intel_emit_reloc(drm_intel_bo * bo, uint32_t offset, + drm_intel_bo * target_bo, uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain) +{ + drm_intel_bo_emit_reloc(bo, offset, target_bo, target_offset, + read_domains, write_domain); + + return target_bo->offset + target_offset; +} + +static inline drm_intel_bo *intel_bo_alloc_for_data(intel_screen_private *intel, + const void *data, + unsigned int size, + const char *name) +{ + drm_intel_bo *bo; + int ret; + + bo = drm_intel_bo_alloc(intel->bufmgr, name, size, 4096); + assert(bo); + + ret = drm_intel_bo_subdata(bo, 0, size, data); + assert(ret == 0); + + return bo; + (void)ret; +} + +void intel_debug_flush(ScrnInfoPtr scrn); + +static inline PixmapPtr get_drawable_pixmap(DrawablePtr drawable) +{ + ScreenPtr screen = drawable->pScreen; + + if (drawable->type == DRAWABLE_PIXMAP) + return (PixmapPtr) drawable; + else + return screen->GetWindowPixmap((WindowPtr) drawable); +} + +static inline Bool pixmap_is_scanout(PixmapPtr pixmap) +{ + ScreenPtr screen = pixmap->drawable.pScreen; + + return pixmap == screen->GetScreenPixmap(screen); +} + +Bool intel_uxa_init(ScreenPtr pScreen); +Bool intel_uxa_create_screen_resources(ScreenPtr pScreen); +void intel_uxa_block_handler(intel_screen_private *intel); +Bool intel_get_aperture_space(ScrnInfoPtr scrn, drm_intel_bo ** bo_table, + int num_bos); + +static inline Bool intel_pixmap_is_offscreen(PixmapPtr pixmap) +{ + struct intel_pixmap *priv = intel_get_pixmap_private(pixmap); + return priv && priv->offscreen; +} + +#endif /* _I830_H_ */ diff --git a/src/uxa/intel_batchbuffer.c b/src/uxa/intel_batchbuffer.c new file mode 100644 index 00000000..a44a1563 --- /dev/null +++ b/src/uxa/intel_batchbuffer.c @@ -0,0 +1,314 @@ +/* -*- c-basic-offset: 4 -*- */ +/* + * Copyright © 2006 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <stdlib.h> +#include <errno.h> + +#include "xf86.h" +#include "intel.h" +#include "i830_reg.h" +#include "i915_drm.h" +#include "i965_reg.h" + +#include "uxa.h" + +#define DUMP_BATCHBUFFERS NULL // "/tmp/i915-batchbuffers.dump" + +static void intel_end_vertex(intel_screen_private *intel) +{ + if (intel->vertex_bo) { + if (intel->vertex_used) { + dri_bo_subdata(intel->vertex_bo, 0, intel->vertex_used*4, intel->vertex_ptr); + intel->vertex_used = 0; + } + + dri_bo_unreference(intel->vertex_bo); + intel->vertex_bo = NULL; + } + + intel->vertex_id = 0; +} + +void intel_next_vertex(intel_screen_private *intel) +{ + intel_end_vertex(intel); + + intel->vertex_bo = + dri_bo_alloc(intel->bufmgr, "vertex", sizeof (intel->vertex_ptr), 4096); +} + +static dri_bo *bo_alloc(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + int size = 4 * 4096; + /* The 865 has issues with larger-than-page-sized batch buffers. */ + if (IS_I865G(intel)) + size = 4096; + return dri_bo_alloc(intel->bufmgr, "batch", size, 4096); +} + +static void intel_next_batch(ScrnInfoPtr scrn, int mode) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + dri_bo *tmp; + + drm_intel_gem_bo_clear_relocs(intel->batch_bo, 0); + + tmp = intel->last_batch_bo[mode]; + intel->last_batch_bo[mode] = intel->batch_bo; + intel->batch_bo = tmp; + + intel->batch_used = 0; + + /* We don't know when another client has executed, so we have + * to reinitialize our 3D state per batch. + */ + intel->last_3d = LAST_3D_OTHER; +} + +void intel_batch_init(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + intel->batch_emit_start = 0; + intel->batch_emitting = 0; + intel->vertex_id = 0; + + intel->last_batch_bo[0] = bo_alloc(scrn); + intel->last_batch_bo[1] = bo_alloc(scrn); + + intel->batch_bo = bo_alloc(scrn); + intel->batch_used = 0; + intel->last_3d = LAST_3D_OTHER; +} + +void intel_batch_teardown(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + int i; + + for (i = 0; i < ARRAY_SIZE(intel->last_batch_bo); i++) { + if (intel->last_batch_bo[i] != NULL) { + dri_bo_unreference(intel->last_batch_bo[i]); + intel->last_batch_bo[i] = NULL; + } + } + + if (intel->batch_bo != NULL) { + dri_bo_unreference(intel->batch_bo); + intel->batch_bo = NULL; + } + + if (intel->vertex_bo) { + dri_bo_unreference(intel->vertex_bo); + intel->vertex_bo = NULL; + } + + while (!list_is_empty(&intel->batch_pixmaps)) + list_del(intel->batch_pixmaps.next); +} + +static void intel_batch_do_flush(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct intel_pixmap *priv; + + list_for_each_entry(priv, &intel->batch_pixmaps, batch) + priv->dirty = 0; +} + +static void intel_emit_post_sync_nonzero_flush(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + /* keep this entire sequence of 3 PIPE_CONTROL cmds in one batch to + * avoid upsetting the gpu. */ + BEGIN_BATCH(3*4); + OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(BRW_PIPE_CONTROL_CS_STALL | + BRW_PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + + OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(BRW_PIPE_CONTROL_WRITE_QWORD); + OUT_RELOC(intel->wa_scratch_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); + OUT_BATCH(0); /* write data */ + + /* now finally the _real flush */ + OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(BRW_PIPE_CONTROL_WC_FLUSH | + BRW_PIPE_CONTROL_TC_FLUSH | + BRW_PIPE_CONTROL_NOWRITE); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); +} + +void intel_batch_emit_flush(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + int flags; + + assert (!intel->in_batch_atomic); + + /* Big hammer, look to the pipelined flushes in future. */ + if ((INTEL_INFO(intel)->gen >= 060)) { + if (intel->current_batch == BLT_BATCH) { + BEGIN_BATCH_BLT(4); + OUT_BATCH(MI_FLUSH_DW | 2); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { + if ((INTEL_INFO(intel)->gen == 060)) { + /* HW-Workaround for Sandybdrige */ + intel_emit_post_sync_nonzero_flush(scrn); + } else { + BEGIN_BATCH(4); + OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(BRW_PIPE_CONTROL_WC_FLUSH | + BRW_PIPE_CONTROL_TC_FLUSH | + BRW_PIPE_CONTROL_NOWRITE); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + } + } + } else { + flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE; + if (INTEL_INFO(intel)->gen >= 040) + flags = 0; + + BEGIN_BATCH(1); + OUT_BATCH(MI_FLUSH | flags); + ADVANCE_BATCH(); + } + intel_batch_do_flush(scrn); +} + +void intel_batch_submit(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + int ret; + + assert (!intel->in_batch_atomic); + + if (intel->vertex_flush) + intel->vertex_flush(intel); + intel_end_vertex(intel); + + if (intel->batch_flush) + intel->batch_flush(intel); + + if (intel->batch_used == 0) + return; + + /* Mark the end of the batchbuffer. */ + OUT_BATCH(MI_BATCH_BUFFER_END); + /* Emit a padding dword if we aren't going to be quad-word aligned. */ + if (intel->batch_used & 1) + OUT_BATCH(MI_NOOP); + + if (DUMP_BATCHBUFFERS) { + FILE *file = fopen(DUMP_BATCHBUFFERS, "a"); + if (file) { + fwrite (intel->batch_ptr, intel->batch_used*4, 1, file); + fclose(file); + } + } + + ret = dri_bo_subdata(intel->batch_bo, 0, intel->batch_used*4, intel->batch_ptr); + if (ret == 0) { + ret = drm_intel_bo_mrb_exec(intel->batch_bo, + intel->batch_used*4, + NULL, 0, 0xffffffff, + (HAS_BLT(intel) ? + intel->current_batch: + I915_EXEC_DEFAULT)); + } + + if (ret != 0) { + static int once; + if (!once) { + if (ret == -EIO) { + /* The GPU has hung and unlikely to recover by this point. */ + xf86DrvMsg(scrn->scrnIndex, X_ERROR, "Detected a hung GPU, disabling acceleration.\n"); + xf86DrvMsg(scrn->scrnIndex, X_ERROR, "When reporting this, please include i915_error_state from debugfs and the full dmesg.\n"); + } else { + /* The driver is broken. */ + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Failed to submit batch buffer, expect rendering corruption: %s.\n ", + strerror(-ret)); + } + uxa_set_force_fallback(xf86ScrnToScreen(scrn), TRUE); + intel->force_fallback = TRUE; + once = 1; + } + } + + while (!list_is_empty(&intel->batch_pixmaps)) { + struct intel_pixmap *entry; + + entry = list_first_entry(&intel->batch_pixmaps, + struct intel_pixmap, + batch); + + entry->busy = -1; + entry->dirty = 0; + list_del(&entry->batch); + } + + if (intel->debug_flush & DEBUG_FLUSH_WAIT) + drm_intel_bo_wait_rendering(intel->batch_bo); + + intel_next_batch(scrn, intel->current_batch == I915_EXEC_BLT); + + if (intel->batch_commit_notify) + intel->batch_commit_notify(intel); + + intel->current_batch = 0; +} + +void intel_debug_flush(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + if (intel->debug_flush & DEBUG_FLUSH_CACHES) + intel_batch_emit_flush(scrn); + + if (intel->debug_flush & DEBUG_FLUSH_BATCHES) + intel_batch_submit(scrn); +} diff --git a/src/uxa/intel_batchbuffer.h b/src/uxa/intel_batchbuffer.h new file mode 100644 index 00000000..b2bb390c --- /dev/null +++ b/src/uxa/intel_batchbuffer.h @@ -0,0 +1,226 @@ +/************************************************************************** + +Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. +Copyright © 2002 David Dawes + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +#ifndef _INTEL_BATCHBUFFER_H +#define _INTEL_BATCHBUFFER_H + +#define BATCH_RESERVED 16 + + +void intel_batch_init(ScrnInfoPtr scrn); +void intel_batch_teardown(ScrnInfoPtr scrn); +void intel_batch_emit_flush(ScrnInfoPtr scrn); +void intel_batch_submit(ScrnInfoPtr scrn); + +static inline int intel_batch_space(intel_screen_private *intel) +{ + return (intel->batch_bo->size - BATCH_RESERVED) - (4*intel->batch_used); +} + +static inline int intel_vertex_space(intel_screen_private *intel) +{ + return intel->vertex_bo ? intel->vertex_bo->size - (4*intel->vertex_used) : 0; +} + +static inline void +intel_batch_require_space(ScrnInfoPtr scrn, intel_screen_private *intel, int sz) +{ + assert(sz < intel->batch_bo->size - 8); + if (intel_batch_space(intel) < sz) + intel_batch_submit(scrn); +} + +static inline void intel_batch_start_atomic(ScrnInfoPtr scrn, int sz) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + assert(!intel->in_batch_atomic); + + if (intel->current_batch != RENDER_BATCH) { + if (intel->current_batch && intel->context_switch) + intel->context_switch(intel, RENDER_BATCH); + } + + intel_batch_require_space(scrn, intel, sz * 4); + intel->current_batch = RENDER_BATCH; + + intel->in_batch_atomic = TRUE; + intel->batch_atomic_limit = intel->batch_used + sz; +} + +static inline void intel_batch_end_atomic(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + assert(intel->in_batch_atomic); + assert(intel->batch_used <= intel->batch_atomic_limit); + intel->in_batch_atomic = FALSE; +} + +static inline void intel_batch_emit_dword(intel_screen_private *intel, uint32_t dword) +{ + intel->batch_ptr[intel->batch_used++] = dword; +} + +static inline void intel_batch_align(intel_screen_private *intel, uint32_t align) +{ + uint32_t delta; + + align /= 4; + assert(align); + + if ((delta = intel->batch_used & (align - 1))) { + delta = align - delta; + memset (intel->batch_ptr + intel->batch_used, 0, 4*delta); + intel->batch_used += delta; + } +} + +static inline void +intel_batch_emit_reloc(intel_screen_private *intel, + dri_bo * bo, + uint32_t read_domains, + uint32_t write_domains, uint32_t delta, int needs_fence) +{ + if (needs_fence) + drm_intel_bo_emit_reloc_fence(intel->batch_bo, + intel->batch_used * 4, + bo, delta, + read_domains, write_domains); + else + drm_intel_bo_emit_reloc(intel->batch_bo, intel->batch_used * 4, + bo, delta, + read_domains, write_domains); + + intel_batch_emit_dword(intel, bo->offset + delta); +} + +static inline void +intel_batch_mark_pixmap_domains(intel_screen_private *intel, + struct intel_pixmap *priv, + uint32_t read_domains, uint32_t write_domain) +{ + assert (read_domains); + assert (write_domain == 0 || write_domain == read_domains); + + if (list_is_empty(&priv->batch)) + list_add(&priv->batch, &intel->batch_pixmaps); + + priv->dirty |= write_domain != 0; + priv->busy = 1; + + intel->needs_flush |= write_domain != 0; +} + +static inline void +intel_batch_emit_reloc_pixmap(intel_screen_private *intel, PixmapPtr pixmap, + uint32_t read_domains, uint32_t write_domain, + uint32_t delta, int needs_fence) +{ + struct intel_pixmap *priv = intel_get_pixmap_private(pixmap); + + intel_batch_mark_pixmap_domains(intel, priv, read_domains, write_domain); + + intel_batch_emit_reloc(intel, priv->bo, + read_domains, write_domain, + delta, needs_fence); +} + +#define ALIGN_BATCH(align) intel_batch_align(intel, align); +#define OUT_BATCH(dword) intel_batch_emit_dword(intel, dword) + +#define OUT_RELOC(bo, read_domains, write_domains, delta) \ + intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 0) + +#define OUT_RELOC_FENCED(bo, read_domains, write_domains, delta) \ + intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 1) + +#define OUT_RELOC_PIXMAP(pixmap, reads, write, delta) \ + intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 0) + +#define OUT_RELOC_PIXMAP_FENCED(pixmap, reads, write, delta) \ + intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 1) + +union intfloat { + float f; + unsigned int ui; +}; + +#define OUT_BATCH_F(x) do { \ + union intfloat tmp; \ + tmp.f = (float)(x); \ + OUT_BATCH(tmp.ui); \ +} while(0) + +#define __BEGIN_BATCH(n,batch_idx) \ +do { \ + if (intel->batch_emitting != 0) \ + FatalError("%s: BEGIN_BATCH called without closing " \ + "ADVANCE_BATCH\n", __FUNCTION__); \ + assert(!intel->in_batch_atomic); \ + if (intel->current_batch != batch_idx) { \ + if (intel->current_batch && intel->context_switch) \ + intel->context_switch(intel, batch_idx); \ + } \ + intel_batch_require_space(scrn, intel, (n) * 4); \ + intel->current_batch = batch_idx; \ + intel->batch_emitting = (n); \ + intel->batch_emit_start = intel->batch_used; \ +} while (0) + +#define BEGIN_BATCH(n) __BEGIN_BATCH(n,RENDER_BATCH) +#define BEGIN_BATCH_BLT(n) __BEGIN_BATCH(n,BLT_BATCH) + +#define ADVANCE_BATCH() do { \ + if (intel->batch_emitting == 0) \ + FatalError("%s: ADVANCE_BATCH called with no matching " \ + "BEGIN_BATCH\n", __FUNCTION__); \ + if (intel->batch_used > \ + intel->batch_emit_start + intel->batch_emitting) \ + FatalError("%s: ADVANCE_BATCH: exceeded allocation %d/%d\n ", \ + __FUNCTION__, \ + intel->batch_used - intel->batch_emit_start, \ + intel->batch_emitting); \ + if (intel->batch_used < intel->batch_emit_start + \ + intel->batch_emitting) \ + FatalError("%s: ADVANCE_BATCH: under-used allocation %d/%d\n ", \ + __FUNCTION__, \ + intel->batch_used - intel->batch_emit_start, \ + intel->batch_emitting); \ + intel->batch_emitting = 0; \ +} while (0) + +void intel_next_vertex(intel_screen_private *intel); +static inline void intel_vertex_emit(intel_screen_private *intel, float v) +{ + intel->vertex_ptr[intel->vertex_used++] = v; +} +#define OUT_VERTEX(v) intel_vertex_emit(intel, v) + +#endif /* _INTEL_BATCHBUFFER_H */ diff --git a/src/uxa/intel_display.c b/src/uxa/intel_display.c new file mode 100644 index 00000000..0acb86d4 --- /dev/null +++ b/src/uxa/intel_display.c @@ -0,0 +1,2124 @@ +/* + * Copyright © 2007 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Dave Airlie <airlied@redhat.com> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/poll.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> +#include <poll.h> +#include <sys/ioctl.h> + +#include "xorgVersion.h" + +#include "intel.h" +#include "intel_bufmgr.h" +#include "intel_options.h" +#include "xf86drm.h" +#include "xf86drmMode.h" +#include "X11/Xatom.h" +#include "X11/extensions/dpmsconst.h" +#include "xf86DDC.h" +#include "fb.h" +#include "uxa.h" + +#include "intel_glamor.h" + +#define KNOWN_MODE_FLAGS ((1<<14)-1) + +struct intel_mode { + int fd; + uint32_t fb_id; + drmModeResPtr mode_res; + int cpp; + + drmEventContext event_context; + DRI2FrameEventPtr flip_info; + int old_fb_id; + int flip_count; + unsigned int fe_frame; + unsigned int fe_tv_sec; + unsigned int fe_tv_usec; + + struct list outputs; + struct list crtcs; +}; + +struct intel_pageflip { + struct intel_mode *mode; + Bool dispatch_me; +}; + +struct intel_crtc { + struct intel_mode *mode; + drmModeModeInfo kmode; + drmModeCrtcPtr mode_crtc; + int pipe; + dri_bo *cursor; + dri_bo *rotate_bo; + uint32_t rotate_pitch; + uint32_t rotate_fb_id; + xf86CrtcPtr crtc; + struct list link; + PixmapPtr scanout_pixmap; + uint32_t scanout_fb_id; +}; + +struct intel_property { + drmModePropertyPtr mode_prop; + uint64_t value; + int num_atoms; /* if range prop, num_atoms == 1; if enum prop, num_atoms == num_enums + 1 */ + Atom *atoms; +}; + +struct intel_output { + struct intel_mode *mode; + int output_id; + drmModeConnectorPtr mode_output; + drmModeEncoderPtr mode_encoder; + int num_props; + struct intel_property *props; + void *private_data; + + Bool has_panel_limits; + int panel_hdisplay; + int panel_vdisplay; + + int dpms_mode; + const char *backlight_iface; + int backlight_active_level; + int backlight_max; + xf86OutputPtr output; + struct list link; +}; + +static void +intel_output_dpms(xf86OutputPtr output, int mode); + +static void +intel_output_dpms_backlight(xf86OutputPtr output, int oldmode, int mode); + +static inline int +crtc_id(struct intel_crtc *crtc) +{ + return crtc->mode_crtc->crtc_id; +} + +#ifdef __OpenBSD__ + +#include <dev/wscons/wsconsio.h> +#include "xf86Priv.h" + +static void +intel_output_backlight_set(xf86OutputPtr output, int level) +{ + struct intel_output *intel_output = output->driver_private; + struct wsdisplay_param param; + + if (level > intel_output->backlight_max) + level = intel_output->backlight_max; + if (! intel_output->backlight_iface || level < 0) + return; + + param.param = WSDISPLAYIO_PARAM_BRIGHTNESS; + param.curval = level; + if (ioctl(xf86Info.consoleFd, WSDISPLAYIO_SETPARAM, ¶m) == -1) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "Failed to set backlight level: %s\n", + strerror(errno)); + } +} + +static int +intel_output_backlight_get(xf86OutputPtr output) +{ + struct wsdisplay_param param; + + param.param = WSDISPLAYIO_PARAM_BRIGHTNESS; + if (ioctl(xf86Info.consoleFd, WSDISPLAYIO_GETPARAM, ¶m) == -1) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "Failed to get backlight level: %s\n", + strerror(errno)); + return -1; + } + + return param.curval; +} + +static void +intel_output_backlight_init(xf86OutputPtr output) +{ + struct intel_output *intel_output = output->driver_private; + struct wsdisplay_param param; + + param.param = WSDISPLAYIO_PARAM_BRIGHTNESS; + if (ioctl(xf86Info.consoleFd, WSDISPLAYIO_GETPARAM, ¶m) == -1) { + intel_output->backlight_iface = NULL; + return; + } + + intel_output->backlight_iface = "wscons"; + intel_output->backlight_max = param.max; + intel_output->backlight_active_level = param.curval; +} + +#else + +#define BACKLIGHT_CLASS "/sys/class/backlight" + +/* + * List of available kernel interfaces in priority order + */ +static const char *backlight_interfaces[] = { + "gmux_backlight", + "asus-laptop", + "asus-nb-wmi", + "eeepc", + "thinkpad_screen", + "mbp_backlight", + "fujitsu-laptop", + "sony", + "samsung", + "acpi_video1", /* finally fallback to the generic acpi drivers */ + "acpi_video0", + "intel_backlight", + NULL, +}; +/* + * Must be long enough for BACKLIGHT_CLASS + '/' + longest in above table + + * '/' + "max_backlight" + */ +#define BACKLIGHT_PATH_LEN 80 +/* Enough for 10 digits of backlight + '\n' + '\0' */ +#define BACKLIGHT_VALUE_LEN 12 + +static void +intel_output_backlight_set(xf86OutputPtr output, int level) +{ + struct intel_output *intel_output = output->driver_private; + char path[BACKLIGHT_PATH_LEN], val[BACKLIGHT_VALUE_LEN]; + int fd, len, ret; + + if (level > intel_output->backlight_max) + level = intel_output->backlight_max; + if (! intel_output->backlight_iface || level < 0) + return; + + len = snprintf(val, BACKLIGHT_VALUE_LEN, "%d\n", level); + sprintf(path, "%s/%s/brightness", + BACKLIGHT_CLASS, intel_output->backlight_iface); + fd = open(path, O_RDWR); + if (fd == -1) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, "failed to open %s for backlight " + "control: %s\n", path, strerror(errno)); + return; + } + + ret = write(fd, val, len); + if (ret == -1) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, "write to %s for backlight " + "control failed: %s\n", path, strerror(errno)); + } + + close(fd); +} + +static int +intel_output_backlight_get(xf86OutputPtr output) +{ + struct intel_output *intel_output = output->driver_private; + char path[BACKLIGHT_PATH_LEN], val[BACKLIGHT_VALUE_LEN]; + int fd, level; + + sprintf(path, "%s/%s/actual_brightness", + BACKLIGHT_CLASS, intel_output->backlight_iface); + fd = open(path, O_RDONLY); + if (fd == -1) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, "failed to open %s " + "for backlight control: %s\n", path, strerror(errno)); + return -1; + } + + memset(val, 0, sizeof(val)); + if (read(fd, val, BACKLIGHT_VALUE_LEN) == -1) { + close(fd); + return -1; + } + + close(fd); + + level = atoi(val); + if (level > intel_output->backlight_max) + level = intel_output->backlight_max; + if (level < 0) + level = -1; + return level; +} + +static int +intel_output_backlight_get_max(xf86OutputPtr output) +{ + struct intel_output *intel_output = output->driver_private; + char path[BACKLIGHT_PATH_LEN], val[BACKLIGHT_VALUE_LEN]; + int fd, max = 0; + + sprintf(path, "%s/%s/max_brightness", + BACKLIGHT_CLASS, intel_output->backlight_iface); + fd = open(path, O_RDONLY); + if (fd == -1) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, "failed to open %s " + "for backlight control: %s\n", path, strerror(errno)); + return -1; + } + + memset(val, 0, sizeof(val)); + if (read(fd, val, BACKLIGHT_VALUE_LEN) == -1) { + close(fd); + return -1; + } + + close(fd); + + max = atoi(val); + if (max <= 0) + max = -1; + return max; +} + +static void +intel_output_backlight_init(xf86OutputPtr output) +{ + struct intel_output *intel_output = output->driver_private; + intel_screen_private *intel = intel_get_screen_private(output->scrn); + char path[BACKLIGHT_PATH_LEN]; + struct stat buf; + char *str; + int i; + + str = xf86GetOptValString(intel->Options, OPTION_BACKLIGHT); + if (str != NULL) { + sprintf(path, "%s/%s", BACKLIGHT_CLASS, str); + if (!stat(path, &buf)) { + intel_output->backlight_iface = str; + intel_output->backlight_max = intel_output_backlight_get_max(output); + if (intel_output->backlight_max > 0) { + intel_output->backlight_active_level = intel_output_backlight_get(output); + xf86DrvMsg(output->scrn->scrnIndex, X_CONFIG, + "found backlight control interface %s\n", path); + return; + } + } + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "unrecognised backlight control interface %s\n", str); + } + + for (i = 0; backlight_interfaces[i] != NULL; i++) { + sprintf(path, "%s/%s", BACKLIGHT_CLASS, backlight_interfaces[i]); + if (!stat(path, &buf)) { + intel_output->backlight_iface = backlight_interfaces[i]; + intel_output->backlight_max = intel_output_backlight_get_max(output); + if (intel_output->backlight_max > 0) { + intel_output->backlight_active_level = intel_output_backlight_get(output); + xf86DrvMsg(output->scrn->scrnIndex, X_PROBED, + "found backlight control interface %s\n", path); + return; + } + } + } + intel_output->backlight_iface = NULL; +} + +#endif + +static void +mode_from_kmode(ScrnInfoPtr scrn, + drmModeModeInfoPtr kmode, + DisplayModePtr mode) +{ + memset(mode, 0, sizeof(DisplayModeRec)); + mode->status = MODE_OK; + + mode->Clock = kmode->clock; + + mode->HDisplay = kmode->hdisplay; + mode->HSyncStart = kmode->hsync_start; + mode->HSyncEnd = kmode->hsync_end; + mode->HTotal = kmode->htotal; + mode->HSkew = kmode->hskew; + + mode->VDisplay = kmode->vdisplay; + mode->VSyncStart = kmode->vsync_start; + mode->VSyncEnd = kmode->vsync_end; + mode->VTotal = kmode->vtotal; + mode->VScan = kmode->vscan; + + mode->Flags = kmode->flags; + mode->name = strdup(kmode->name); + + if (kmode->type & DRM_MODE_TYPE_DRIVER) + mode->type = M_T_DRIVER; + if (kmode->type & DRM_MODE_TYPE_PREFERRED) + mode->type |= M_T_PREFERRED; + + if (mode->status == MODE_OK && kmode->flags & ~KNOWN_MODE_FLAGS) + mode->status = MODE_BAD; /* unknown flags => unhandled */ + + xf86SetModeCrtc (mode, scrn->adjustFlags); +} + +static void +mode_to_kmode(ScrnInfoPtr scrn, + drmModeModeInfoPtr kmode, + DisplayModePtr mode) +{ + memset(kmode, 0, sizeof(*kmode)); + + kmode->clock = mode->Clock; + kmode->hdisplay = mode->HDisplay; + kmode->hsync_start = mode->HSyncStart; + kmode->hsync_end = mode->HSyncEnd; + kmode->htotal = mode->HTotal; + kmode->hskew = mode->HSkew; + + kmode->vdisplay = mode->VDisplay; + kmode->vsync_start = mode->VSyncStart; + kmode->vsync_end = mode->VSyncEnd; + kmode->vtotal = mode->VTotal; + kmode->vscan = mode->VScan; + + kmode->flags = mode->Flags; + if (mode->name) + strncpy(kmode->name, mode->name, DRM_DISPLAY_MODE_LEN); + kmode->name[DRM_DISPLAY_MODE_LEN-1] = 0; +} + +static void +intel_crtc_dpms(xf86CrtcPtr crtc, int mode) +{ +} + +void +intel_mode_disable_unused_functions(ScrnInfoPtr scrn) +{ + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); + struct intel_mode *mode = intel_get_screen_private(scrn)->modes; + int i; + + /* Force off for consistency between kernel and ddx */ + for (i = 0; i < xf86_config->num_crtc; i++) { + xf86CrtcPtr crtc = xf86_config->crtc[i]; + if (!crtc->enabled) + drmModeSetCrtc(mode->fd, crtc_id(crtc->driver_private), + 0, 0, 0, NULL, 0, NULL); + } +} + +static Bool +intel_crtc_apply(xf86CrtcPtr crtc) +{ + ScrnInfoPtr scrn = crtc->scrn; + struct intel_crtc *intel_crtc = crtc->driver_private; + struct intel_mode *mode = intel_crtc->mode; + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(crtc->scrn); + uint32_t *output_ids; + int output_count = 0; + int fb_id, x, y; + int i, ret = FALSE; + + output_ids = calloc(sizeof(uint32_t), xf86_config->num_output); + if (!output_ids) + return FALSE; + + for (i = 0; i < xf86_config->num_output; i++) { + xf86OutputPtr output = xf86_config->output[i]; + struct intel_output *intel_output; + + if (output->crtc != crtc) + continue; + + intel_output = output->driver_private; + output_ids[output_count] = + intel_output->mode_output->connector_id; + output_count++; + } + + if (!intel_crtc->scanout_fb_id) { +#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,5,99,0,0) + if (!xf86CrtcRotate(crtc, mode, rotation)) + goto done; +#else + if (!xf86CrtcRotate(crtc)) + goto done; +#endif + } + +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,0,0,0) + crtc->funcs->gamma_set(crtc, crtc->gamma_red, crtc->gamma_green, + crtc->gamma_blue, crtc->gamma_size); +#endif + + x = crtc->x; + y = crtc->y; + fb_id = mode->fb_id; + if (intel_crtc->rotate_fb_id) { + fb_id = intel_crtc->rotate_fb_id; + x = 0; + y = 0; + } else if (intel_crtc->scanout_fb_id && intel_crtc->scanout_pixmap->drawable.width >= crtc->mode.HDisplay && intel_crtc->scanout_pixmap->drawable.height >= crtc->mode.VDisplay) { + fb_id = intel_crtc->scanout_fb_id; + x = 0; + y = 0; + } + ret = drmModeSetCrtc(mode->fd, crtc_id(intel_crtc), + fb_id, x, y, output_ids, output_count, + &intel_crtc->kmode); + if (ret) { + xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, + "failed to set mode: %s\n", strerror(-ret)); + ret = FALSE; + } else { + ret = TRUE; + + /* Force DPMS to On for all outputs, which the kernel will have done + * with the mode set. Also, restore the backlight level + */ + for (i = 0; i < xf86_config->num_output; i++) { + xf86OutputPtr output = xf86_config->output[i]; + struct intel_output *intel_output; + + if (output->crtc != crtc) + continue; + + intel_output = output->driver_private; + intel_output_dpms_backlight(output, intel_output->dpms_mode, DPMSModeOn); + intel_output->dpms_mode = DPMSModeOn; + } + } + + if (scrn->pScreen) + xf86_reload_cursors(scrn->pScreen); + +done: + free(output_ids); + return ret; +} + +static Bool +intel_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, + Rotation rotation, int x, int y) +{ + ScrnInfoPtr scrn = crtc->scrn; + intel_screen_private *intel = intel_get_screen_private(scrn); + struct intel_crtc *intel_crtc = crtc->driver_private; + struct intel_mode *intel_mode = intel_crtc->mode; + int saved_x, saved_y; + Rotation saved_rotation; + DisplayModeRec saved_mode; + int ret = TRUE; + unsigned int pitch = scrn->displayWidth * intel->cpp; + + if (intel_mode->fb_id == 0) { + ret = drmModeAddFB(intel_mode->fd, + scrn->virtualX, scrn->virtualY, + scrn->depth, scrn->bitsPerPixel, + pitch, intel->front_buffer->handle, + &intel_mode->fb_id); + if (ret < 0) { + ErrorF("failed to add fb\n"); + return FALSE; + } + + drm_intel_bo_disable_reuse(intel->front_buffer); + } + + saved_mode = crtc->mode; + saved_x = crtc->x; + saved_y = crtc->y; + saved_rotation = crtc->rotation; + + crtc->mode = *mode; + crtc->x = x; + crtc->y = y; + crtc->rotation = rotation; + + intel_glamor_flush(intel); + intel_batch_submit(crtc->scrn); + + mode_to_kmode(crtc->scrn, &intel_crtc->kmode, mode); + ret = intel_crtc_apply(crtc); + if (!ret) { + crtc->x = saved_x; + crtc->y = saved_y; + crtc->rotation = saved_rotation; + crtc->mode = saved_mode; + } + return ret; +} + +static void +intel_crtc_set_cursor_colors(xf86CrtcPtr crtc, int bg, int fg) +{ + +} + +static void +intel_crtc_set_cursor_position (xf86CrtcPtr crtc, int x, int y) +{ + struct intel_crtc *intel_crtc = crtc->driver_private; + struct intel_mode *mode = intel_crtc->mode; + + drmModeMoveCursor(mode->fd, crtc_id(intel_crtc), x, y); +} + +static void +intel_crtc_load_cursor_argb(xf86CrtcPtr crtc, CARD32 *image) +{ + struct intel_crtc *intel_crtc = crtc->driver_private; + int ret; + + ret = dri_bo_subdata(intel_crtc->cursor, 0, 64*64*4, image); + if (ret) + xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, + "failed to set cursor: %s\n", strerror(-ret)); +} + +static void +intel_crtc_hide_cursor(xf86CrtcPtr crtc) +{ + struct intel_crtc *intel_crtc = crtc->driver_private; + struct intel_mode *mode = intel_crtc->mode; + + drmModeSetCursor(mode->fd, crtc_id(intel_crtc), 0, 64, 64); +} + +static void +intel_crtc_show_cursor(xf86CrtcPtr crtc) +{ + struct intel_crtc *intel_crtc = crtc->driver_private; + struct intel_mode *mode = intel_crtc->mode; + + drmModeSetCursor(mode->fd, crtc_id(intel_crtc), + intel_crtc->cursor->handle, 64, 64); +} + +static void * +intel_crtc_shadow_allocate(xf86CrtcPtr crtc, int width, int height) +{ + ScrnInfoPtr scrn = crtc->scrn; + struct intel_crtc *intel_crtc = crtc->driver_private; + struct intel_mode *mode = intel_crtc->mode; + unsigned long rotate_pitch; + uint32_t tiling; + int ret; + + intel_crtc->rotate_bo = intel_allocate_framebuffer(scrn, + width, height, + mode->cpp, + &rotate_pitch, + &tiling); + + if (!intel_crtc->rotate_bo) { + xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, + "Couldn't allocate shadow memory for rotated CRTC\n"); + return NULL; + } + + ret = drmModeAddFB(mode->fd, width, height, crtc->scrn->depth, + crtc->scrn->bitsPerPixel, rotate_pitch, + intel_crtc->rotate_bo->handle, + &intel_crtc->rotate_fb_id); + if (ret) { + ErrorF("failed to add rotate fb\n"); + drm_intel_bo_unreference(intel_crtc->rotate_bo); + return NULL; + } + + drm_intel_bo_disable_reuse(intel_crtc->rotate_bo); + + intel_crtc->rotate_pitch = rotate_pitch; + return intel_crtc->rotate_bo; +} + +static PixmapPtr +intel_crtc_shadow_create(xf86CrtcPtr crtc, void *data, int width, int height) +{ + ScrnInfoPtr scrn = crtc->scrn; + intel_screen_private *intel = intel_get_screen_private(scrn); + struct intel_crtc *intel_crtc = crtc->driver_private; + PixmapPtr rotate_pixmap; + + if (!data) { + data = intel_crtc_shadow_allocate (crtc, width, height); + if (!data) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Couldn't allocate shadow pixmap for rotated CRTC\n"); + return NULL; + } + } + if (intel_crtc->rotate_bo == NULL) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Couldn't allocate shadow pixmap for rotated CRTC\n"); + return NULL; + } + + rotate_pixmap = GetScratchPixmapHeader(scrn->pScreen, + width, height, + scrn->depth, + scrn->bitsPerPixel, + intel_crtc->rotate_pitch, + NULL); + + if (rotate_pixmap == NULL) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Couldn't allocate shadow pixmap for rotated CRTC\n"); + return NULL; + } + + intel_set_pixmap_bo(rotate_pixmap, intel_crtc->rotate_bo); + + intel->shadow_present = TRUE; + + return rotate_pixmap; +} + +static void +intel_crtc_shadow_destroy(xf86CrtcPtr crtc, PixmapPtr rotate_pixmap, void *data) +{ + ScrnInfoPtr scrn = crtc->scrn; + intel_screen_private *intel = intel_get_screen_private(scrn); + struct intel_crtc *intel_crtc = crtc->driver_private; + struct intel_mode *mode = intel_crtc->mode; + + if (rotate_pixmap) { + intel_set_pixmap_bo(rotate_pixmap, NULL); + FreeScratchPixmapHeader(rotate_pixmap); + } + + if (data) { + /* Be sure to sync acceleration before the memory gets + * unbound. */ + drmModeRmFB(mode->fd, intel_crtc->rotate_fb_id); + intel_crtc->rotate_fb_id = 0; + + dri_bo_unreference(intel_crtc->rotate_bo); + intel_crtc->rotate_bo = NULL; + } + + intel->shadow_present = FALSE; +} + +static void +intel_crtc_gamma_set(xf86CrtcPtr crtc, + CARD16 *red, CARD16 *green, CARD16 *blue, int size) +{ + struct intel_crtc *intel_crtc = crtc->driver_private; + struct intel_mode *mode = intel_crtc->mode; + + drmModeCrtcSetGamma(mode->fd, crtc_id(intel_crtc), + size, red, green, blue); +} + +static void +intel_crtc_destroy(xf86CrtcPtr crtc) +{ + struct intel_crtc *intel_crtc = crtc->driver_private; + + if (intel_crtc->cursor) { + drmModeSetCursor(intel_crtc->mode->fd, crtc_id(intel_crtc), 0, 64, 64); + drm_intel_bo_unreference(intel_crtc->cursor); + intel_crtc->cursor = NULL; + } + + list_del(&intel_crtc->link); + free(intel_crtc); + + crtc->driver_private = NULL; +} + +#ifdef INTEL_PIXMAP_SHARING +static Bool +intel_set_scanout_pixmap(xf86CrtcPtr crtc, PixmapPtr ppix) +{ + struct intel_crtc *intel_crtc = crtc->driver_private; + ScrnInfoPtr scrn = crtc->scrn; + intel_screen_private *intel = intel_get_screen_private(scrn); + dri_bo *bo; + + if (ppix == intel_crtc->scanout_pixmap) + return TRUE; + + if (!ppix) { + intel_crtc->scanout_pixmap = NULL; + if (intel_crtc->scanout_fb_id) { + drmModeRmFB(intel->drmSubFD, intel_crtc->scanout_fb_id); + intel_crtc->scanout_fb_id = 0; + } + return TRUE; + } + + bo = intel_get_pixmap_bo(ppix); + if (intel->front_buffer) { + ErrorF("have front buffer\n"); + } + + drm_intel_bo_disable_reuse(bo); + + intel_crtc->scanout_pixmap = ppix; + return drmModeAddFB(intel->drmSubFD, ppix->drawable.width, + ppix->drawable.height, ppix->drawable.depth, + ppix->drawable.bitsPerPixel, ppix->devKind, + bo->handle, &intel_crtc->scanout_fb_id) == 0; +} +#endif + +static const xf86CrtcFuncsRec intel_crtc_funcs = { + .dpms = intel_crtc_dpms, + .set_mode_major = intel_crtc_set_mode_major, + .set_cursor_colors = intel_crtc_set_cursor_colors, + .set_cursor_position = intel_crtc_set_cursor_position, + .show_cursor = intel_crtc_show_cursor, + .hide_cursor = intel_crtc_hide_cursor, + .load_cursor_argb = intel_crtc_load_cursor_argb, + .shadow_create = intel_crtc_shadow_create, + .shadow_allocate = intel_crtc_shadow_allocate, + .shadow_destroy = intel_crtc_shadow_destroy, + .gamma_set = intel_crtc_gamma_set, + .destroy = intel_crtc_destroy, +#ifdef INTEL_PIXMAP_SHARING + .set_scanout_pixmap = intel_set_scanout_pixmap, +#endif +}; + +static void +intel_crtc_init(ScrnInfoPtr scrn, struct intel_mode *mode, int num) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + xf86CrtcPtr crtc; + struct intel_crtc *intel_crtc; + + intel_crtc = calloc(sizeof(struct intel_crtc), 1); + if (intel_crtc == NULL) + return; + + crtc = xf86CrtcCreate(scrn, &intel_crtc_funcs); + if (crtc == NULL) { + free(intel_crtc); + return; + } + + intel_crtc->mode_crtc = drmModeGetCrtc(mode->fd, + mode->mode_res->crtcs[num]); + if (intel_crtc->mode_crtc == NULL) { + free(intel_crtc); + return; + } + + intel_crtc->mode = mode; + crtc->driver_private = intel_crtc; + + intel_crtc->pipe = drm_intel_get_pipe_from_crtc_id(intel->bufmgr, + crtc_id(intel_crtc)); + + intel_crtc->cursor = drm_intel_bo_alloc(intel->bufmgr, "ARGB cursor", + 4*64*64, 4096); + + intel_crtc->crtc = crtc; + list_add(&intel_crtc->link, &mode->crtcs); +} + +static Bool +is_panel(int type) +{ + return (type == DRM_MODE_CONNECTOR_LVDS || + type == DRM_MODE_CONNECTOR_eDP); +} + +static xf86OutputStatus +intel_output_detect(xf86OutputPtr output) +{ + /* go to the hw and retrieve a new output struct */ + struct intel_output *intel_output = output->driver_private; + struct intel_mode *mode = intel_output->mode; + xf86OutputStatus status; + + drmModeFreeConnector(intel_output->mode_output); + intel_output->mode_output = + drmModeGetConnector(mode->fd, intel_output->output_id); + if (intel_output->mode_output == NULL) { + /* and hope we are safe everywhere else */ + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "drmModeGetConnector failed, reporting output disconnected\n"); + return XF86OutputStatusDisconnected; + } + + switch (intel_output->mode_output->connection) { + case DRM_MODE_CONNECTED: + status = XF86OutputStatusConnected; + break; + case DRM_MODE_DISCONNECTED: + status = XF86OutputStatusDisconnected; + break; + default: + case DRM_MODE_UNKNOWNCONNECTION: + status = XF86OutputStatusUnknown; + break; + } + return status; +} + +static Bool +intel_output_mode_valid(xf86OutputPtr output, DisplayModePtr pModes) +{ + struct intel_output *intel_output = output->driver_private; + + /* + * If the connector type is a panel, we will use the panel limit to + * verfiy whether the mode is valid. + */ + if (intel_output->has_panel_limits) { + if (pModes->HDisplay > intel_output->panel_hdisplay || + pModes->VDisplay > intel_output->panel_vdisplay) + return MODE_PANEL; + } + + return MODE_OK; +} + +static void +intel_output_attach_edid(xf86OutputPtr output) +{ + struct intel_output *intel_output = output->driver_private; + drmModeConnectorPtr koutput = intel_output->mode_output; + struct intel_mode *mode = intel_output->mode; + drmModePropertyBlobPtr edid_blob = NULL; + xf86MonPtr mon = NULL; + int i; + + /* look for an EDID property */ + for (i = 0; i < koutput->count_props; i++) { + drmModePropertyPtr props; + + props = drmModeGetProperty(mode->fd, koutput->props[i]); + if (!props) + continue; + + if (!(props->flags & DRM_MODE_PROP_BLOB)) { + drmModeFreeProperty(props); + continue; + } + + if (!strcmp(props->name, "EDID")) { + drmModeFreePropertyBlob(edid_blob); + edid_blob = + drmModeGetPropertyBlob(mode->fd, + koutput->prop_values[i]); + } + drmModeFreeProperty(props); + } + + if (edid_blob) { + mon = xf86InterpretEDID(output->scrn->scrnIndex, + edid_blob->data); + + if (mon && edid_blob->length > 128) + mon->flags |= MONITOR_EDID_COMPLETE_RAWDATA; + } + + xf86OutputSetEDID(output, mon); + + if (edid_blob) + drmModeFreePropertyBlob(edid_blob); +} + +static DisplayModePtr +intel_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes) +{ + xf86MonPtr mon = output->MonInfo; + + if (!mon || !GTF_SUPPORTED(mon->features.msc)) { + DisplayModePtr i, m, p = NULL; + int max_x = 0, max_y = 0; + float max_vrefresh = 0.0; + + for (m = modes; m; m = m->next) { + if (m->type & M_T_PREFERRED) + p = m; + max_x = max(max_x, m->HDisplay); + max_y = max(max_y, m->VDisplay); + max_vrefresh = max(max_vrefresh, xf86ModeVRefresh(m)); + } + + max_vrefresh = max(max_vrefresh, 60.0); + max_vrefresh *= (1 + SYNC_TOLERANCE); + +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0) + m = xf86GetDefaultModes(); +#else + m = xf86GetDefaultModes(0,0); +#endif + + xf86ValidateModesSize(output->scrn, m, max_x, max_y, 0); + + for (i = m; i; i = i->next) { + if (xf86ModeVRefresh(i) > max_vrefresh) + i->status = MODE_VSYNC; + if (p && i->HDisplay >= p->HDisplay && + i->VDisplay >= p->VDisplay && + xf86ModeVRefresh(i) >= xf86ModeVRefresh(p)) + i->status = MODE_VSYNC; + } + + xf86PruneInvalidModes(output->scrn, &m, FALSE); + + modes = xf86ModesAdd(modes, m); + } + + return modes; +} + +static DisplayModePtr +intel_output_get_modes(xf86OutputPtr output) +{ + struct intel_output *intel_output = output->driver_private; + drmModeConnectorPtr koutput = intel_output->mode_output; + DisplayModePtr Modes = NULL; + int i; + + intel_output_attach_edid(output); + + /* modes should already be available */ + for (i = 0; i < koutput->count_modes; i++) { + DisplayModePtr Mode; + + Mode = calloc(1, sizeof(DisplayModeRec)); + if (Mode) { + mode_from_kmode(output->scrn, &koutput->modes[i], Mode); + Modes = xf86ModesAdd(Modes, Mode); + } + } + + /* + * If the connector type is a panel, we will traverse the kernel mode to + * get the panel limit. And then add all the standard modes to fake + * the fullscreen experience. + * If it is incorrect, please fix me. + */ + intel_output->has_panel_limits = FALSE; + if (is_panel(koutput->connector_type)) { + for (i = 0; i < koutput->count_modes; i++) { + drmModeModeInfo *mode_ptr; + + mode_ptr = &koutput->modes[i]; + if (mode_ptr->hdisplay > intel_output->panel_hdisplay) + intel_output->panel_hdisplay = mode_ptr->hdisplay; + if (mode_ptr->vdisplay > intel_output->panel_vdisplay) + intel_output->panel_vdisplay = mode_ptr->vdisplay; + } + + intel_output->has_panel_limits = + intel_output->panel_hdisplay && + intel_output->panel_vdisplay; + + Modes = intel_output_panel_edid(output, Modes); + } + + return Modes; +} + +static void +intel_output_destroy(xf86OutputPtr output) +{ + struct intel_output *intel_output = output->driver_private; + int i; + + for (i = 0; i < intel_output->num_props; i++) { + drmModeFreeProperty(intel_output->props[i].mode_prop); + free(intel_output->props[i].atoms); + } + free(intel_output->props); + + drmModeFreeConnector(intel_output->mode_output); + intel_output->mode_output = NULL; + + list_del(&intel_output->link); + free(intel_output); + + output->driver_private = NULL; +} + +static void +intel_output_dpms_backlight(xf86OutputPtr output, int oldmode, int mode) +{ + struct intel_output *intel_output = output->driver_private; + + if (!intel_output->backlight_iface) + return; + + if (mode == DPMSModeOn) { + /* If we're going from off->on we may need to turn on the backlight. */ + if (oldmode != DPMSModeOn) + intel_output_backlight_set(output, + intel_output->backlight_active_level); + } else { + /* Only save the current backlight value if we're going from on to off. */ + if (oldmode == DPMSModeOn) + intel_output->backlight_active_level = intel_output_backlight_get(output); + intel_output_backlight_set(output, 0); + } +} + +static void +intel_output_dpms(xf86OutputPtr output, int dpms) +{ + struct intel_output *intel_output = output->driver_private; + drmModeConnectorPtr koutput = intel_output->mode_output; + struct intel_mode *mode = intel_output->mode; + int i; + + for (i = 0; i < koutput->count_props; i++) { + drmModePropertyPtr props; + + props = drmModeGetProperty(mode->fd, koutput->props[i]); + if (!props) + continue; + + if (!strcmp(props->name, "DPMS")) { + /* Make sure to reverse the order between on and off. */ + if (dpms == DPMSModeOff) + intel_output_dpms_backlight(output, + intel_output->dpms_mode, + dpms); + drmModeConnectorSetProperty(mode->fd, + intel_output->output_id, + props->prop_id, + dpms); + if (dpms != DPMSModeOff) + intel_output_dpms_backlight(output, + intel_output->dpms_mode, + dpms); + intel_output->dpms_mode = dpms; + drmModeFreeProperty(props); + return; + } + + drmModeFreeProperty(props); + } +} + +int +intel_output_dpms_status(xf86OutputPtr output) +{ + struct intel_output *intel_output = output->driver_private; + return intel_output->dpms_mode; +} + +static Bool +intel_property_ignore(drmModePropertyPtr prop) +{ + if (!prop) + return TRUE; + + /* ignore blob prop */ + if (prop->flags & DRM_MODE_PROP_BLOB) + return TRUE; + + /* ignore standard property */ + if (!strcmp(prop->name, "EDID") || + !strcmp(prop->name, "DPMS")) + return TRUE; + + return FALSE; +} + +static void +intel_output_create_ranged_atom(xf86OutputPtr output, Atom *atom, + const char *name, INT32 min, INT32 max, + uint64_t value, Bool immutable) +{ + int err; + INT32 atom_range[2]; + + atom_range[0] = min; + atom_range[1] = max; + + *atom = MakeAtom(name, strlen(name), TRUE); + + err = RRConfigureOutputProperty(output->randr_output, *atom, FALSE, + TRUE, immutable, 2, atom_range); + if (err != 0) + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "RRConfigureOutputProperty error, %d\n", err); + + err = RRChangeOutputProperty(output->randr_output, *atom, XA_INTEGER, + 32, PropModeReplace, 1, &value, FALSE, + TRUE); + if (err != 0) + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "RRChangeOutputProperty error, %d\n", err); +} + +#define BACKLIGHT_NAME "Backlight" +#define BACKLIGHT_DEPRECATED_NAME "BACKLIGHT" +static Atom backlight_atom, backlight_deprecated_atom; + +static void +intel_output_create_resources(xf86OutputPtr output) +{ + struct intel_output *intel_output = output->driver_private; + drmModeConnectorPtr mode_output = intel_output->mode_output; + struct intel_mode *mode = intel_output->mode; + int i, j, err; + + intel_output->props = calloc(mode_output->count_props, + sizeof(struct intel_property)); + if (!intel_output->props) + return; + + intel_output->num_props = 0; + for (i = j = 0; i < mode_output->count_props; i++) { + drmModePropertyPtr drmmode_prop; + + drmmode_prop = drmModeGetProperty(mode->fd, + mode_output->props[i]); + if (intel_property_ignore(drmmode_prop)) { + drmModeFreeProperty(drmmode_prop); + continue; + } + + intel_output->props[j].mode_prop = drmmode_prop; + intel_output->props[j].value = mode_output->prop_values[i]; + j++; + } + intel_output->num_props = j; + + for (i = 0; i < intel_output->num_props; i++) { + struct intel_property *p = &intel_output->props[i]; + drmModePropertyPtr drmmode_prop = p->mode_prop; + + if (drmmode_prop->flags & DRM_MODE_PROP_RANGE) { + p->num_atoms = 1; + p->atoms = calloc(p->num_atoms, sizeof(Atom)); + if (!p->atoms) + continue; + + intel_output_create_ranged_atom(output, &p->atoms[0], + drmmode_prop->name, + drmmode_prop->values[0], + drmmode_prop->values[1], + p->value, + drmmode_prop->flags & DRM_MODE_PROP_IMMUTABLE ? TRUE : FALSE); + + } else if (drmmode_prop->flags & DRM_MODE_PROP_ENUM) { + p->num_atoms = drmmode_prop->count_enums + 1; + p->atoms = calloc(p->num_atoms, sizeof(Atom)); + if (!p->atoms) + continue; + + p->atoms[0] = MakeAtom(drmmode_prop->name, strlen(drmmode_prop->name), TRUE); + for (j = 1; j <= drmmode_prop->count_enums; j++) { + struct drm_mode_property_enum *e = &drmmode_prop->enums[j-1]; + p->atoms[j] = MakeAtom(e->name, strlen(e->name), TRUE); + } + + err = RRConfigureOutputProperty(output->randr_output, p->atoms[0], + FALSE, FALSE, + drmmode_prop->flags & DRM_MODE_PROP_IMMUTABLE ? TRUE : FALSE, + p->num_atoms - 1, (INT32 *)&p->atoms[1]); + if (err != 0) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "RRConfigureOutputProperty error, %d\n", err); + } + + for (j = 0; j < drmmode_prop->count_enums; j++) + if (drmmode_prop->enums[j].value == p->value) + break; + /* there's always a matching value */ + err = RRChangeOutputProperty(output->randr_output, p->atoms[0], + XA_ATOM, 32, PropModeReplace, 1, &p->atoms[j+1], FALSE, TRUE); + if (err != 0) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "RRChangeOutputProperty error, %d\n", err); + } + } + } + + if (intel_output->backlight_iface) { + /* Set up the backlight property, which takes effect + * immediately and accepts values only within the + * backlight_range. + */ + intel_output_create_ranged_atom(output, &backlight_atom, + BACKLIGHT_NAME, 0, + intel_output->backlight_max, + intel_output->backlight_active_level, + FALSE); + intel_output_create_ranged_atom(output, + &backlight_deprecated_atom, + BACKLIGHT_DEPRECATED_NAME, 0, + intel_output->backlight_max, + intel_output->backlight_active_level, + FALSE); + } +} + +static Bool +intel_output_set_property(xf86OutputPtr output, Atom property, + RRPropertyValuePtr value) +{ + struct intel_output *intel_output = output->driver_private; + struct intel_mode *mode = intel_output->mode; + int i; + + if (property == backlight_atom || property == backlight_deprecated_atom) { + INT32 val; + + if (value->type != XA_INTEGER || value->format != 32 || + value->size != 1) + { + return FALSE; + } + + val = *(INT32 *)value->data; + if (val < 0 || val > intel_output->backlight_max) + return FALSE; + + if (intel_output->dpms_mode == DPMSModeOn) + intel_output_backlight_set(output, val); + intel_output->backlight_active_level = val; + return TRUE; + } + + for (i = 0; i < intel_output->num_props; i++) { + struct intel_property *p = &intel_output->props[i]; + + if (p->atoms[0] != property) + continue; + + if (p->mode_prop->flags & DRM_MODE_PROP_RANGE) { + uint32_t val; + + if (value->type != XA_INTEGER || value->format != 32 || + value->size != 1) + return FALSE; + val = *(uint32_t *)value->data; + + drmModeConnectorSetProperty(mode->fd, intel_output->output_id, + p->mode_prop->prop_id, (uint64_t)val); + return TRUE; + } else if (p->mode_prop->flags & DRM_MODE_PROP_ENUM) { + Atom atom; + const char *name; + int j; + + if (value->type != XA_ATOM || value->format != 32 || value->size != 1) + return FALSE; + memcpy(&atom, value->data, 4); + name = NameForAtom(atom); + if (name == NULL) + return FALSE; + + /* search for matching name string, then set its value down */ + for (j = 0; j < p->mode_prop->count_enums; j++) { + if (!strcmp(p->mode_prop->enums[j].name, name)) { + drmModeConnectorSetProperty(mode->fd, intel_output->output_id, + p->mode_prop->prop_id, p->mode_prop->enums[j].value); + return TRUE; + } + } + return FALSE; + } + } + + /* We didn't recognise this property, just report success in order + * to allow the set to continue, otherwise we break setting of + * common properties like EDID. + */ + return TRUE; +} + +static Bool +intel_output_get_property(xf86OutputPtr output, Atom property) +{ + struct intel_output *intel_output = output->driver_private; + int err; + + if (property == backlight_atom || property == backlight_deprecated_atom) { + INT32 val; + + if (! intel_output->backlight_iface) + return FALSE; + + val = intel_output_backlight_get(output); + if (val < 0) + return FALSE; + + err = RRChangeOutputProperty(output->randr_output, property, + XA_INTEGER, 32, PropModeReplace, 1, &val, + FALSE, TRUE); + if (err != 0) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "RRChangeOutputProperty error, %d\n", err); + return FALSE; + } + + return TRUE; + } + + return FALSE; +} + +static const xf86OutputFuncsRec intel_output_funcs = { + .create_resources = intel_output_create_resources, +#ifdef RANDR_12_INTERFACE + .set_property = intel_output_set_property, + .get_property = intel_output_get_property, +#endif + .dpms = intel_output_dpms, +#if 0 + + .save = drmmode_crt_save, + .restore = drmmode_crt_restore, + .mode_fixup = drmmode_crt_mode_fixup, + .prepare = intel_output_prepare, + .mode_set = drmmode_crt_mode_set, + .commit = intel_output_commit, +#endif + .detect = intel_output_detect, + .mode_valid = intel_output_mode_valid, + + .get_modes = intel_output_get_modes, + .destroy = intel_output_destroy +}; + +static const int subpixel_conv_table[7] = { + 0, + SubPixelUnknown, + SubPixelHorizontalRGB, + SubPixelHorizontalBGR, + SubPixelVerticalRGB, + SubPixelVerticalBGR, + SubPixelNone +}; + +static const char *output_names[] = { + "None", + "VGA", + "DVI", + "DVI", + "DVI", + "Composite", + "TV", + "LVDS", + "CTV", + "DIN", + "DP", + "HDMI", + "HDMI", + "TV", + "eDP", +}; + +static void +intel_output_init(ScrnInfoPtr scrn, struct intel_mode *mode, int num) +{ + xf86OutputPtr output; + drmModeConnectorPtr koutput; + drmModeEncoderPtr kencoder; + struct intel_output *intel_output; + const char *output_name; + char name[32]; + + koutput = drmModeGetConnector(mode->fd, + mode->mode_res->connectors[num]); + if (!koutput) + return; + + kencoder = drmModeGetEncoder(mode->fd, koutput->encoders[0]); + if (!kencoder) { + drmModeFreeConnector(koutput); + return; + } + + if (koutput->connector_type < ARRAY_SIZE(output_names)) + output_name = output_names[koutput->connector_type]; + else + output_name = "UNKNOWN"; + snprintf(name, 32, "%s%d", output_name, koutput->connector_type_id); + + output = xf86OutputCreate (scrn, &intel_output_funcs, name); + if (!output) { + drmModeFreeEncoder(kencoder); + drmModeFreeConnector(koutput); + return; + } + + intel_output = calloc(sizeof(struct intel_output), 1); + if (!intel_output) { + xf86OutputDestroy(output); + drmModeFreeConnector(koutput); + drmModeFreeEncoder(kencoder); + return; + } + + intel_output->output_id = mode->mode_res->connectors[num]; + intel_output->mode_output = koutput; + intel_output->mode_encoder = kencoder; + intel_output->mode = mode; + + output->mm_width = koutput->mmWidth; + output->mm_height = koutput->mmHeight; + + output->subpixel_order = subpixel_conv_table[koutput->subpixel]; + output->driver_private = intel_output; + + if (is_panel(koutput->connector_type)) + intel_output_backlight_init(output); + + output->possible_crtcs = kencoder->possible_crtcs; + output->interlaceAllowed = TRUE; + + intel_output->output = output; + list_add(&intel_output->link, &mode->outputs); +} + +static Bool +intel_xf86crtc_resize(ScrnInfoPtr scrn, int width, int height) +{ + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); + struct intel_crtc *intel_crtc = xf86_config->crtc[0]->driver_private; + struct intel_mode *mode = intel_crtc->mode; + intel_screen_private *intel = intel_get_screen_private(scrn); + drm_intel_bo *old_front = NULL; + Bool ret; + uint32_t old_fb_id; + int i, old_width, old_height, old_pitch; + unsigned long pitch; + uint32_t tiling; + ScreenPtr screen; + + if (scrn->virtualX == width && scrn->virtualY == height) + return TRUE; + + intel_glamor_flush(intel); + intel_batch_submit(scrn); + + old_width = scrn->virtualX; + old_height = scrn->virtualY; + old_pitch = scrn->displayWidth; + old_fb_id = mode->fb_id; + old_front = intel->front_buffer; + + if (intel->back_pixmap) { + screen = intel->back_pixmap->drawable.pScreen; + screen->DestroyPixmap(intel->back_pixmap); + intel->back_pixmap = NULL; + } + + if (intel->back_buffer) { + drm_intel_bo_unreference(intel->back_buffer); + intel->back_buffer = NULL; + } + + intel->front_buffer = intel_allocate_framebuffer(scrn, + width, height, + intel->cpp, + &pitch, + &tiling); + if (!intel->front_buffer) + goto fail; + + ret = drmModeAddFB(mode->fd, width, height, scrn->depth, + scrn->bitsPerPixel, pitch, + intel->front_buffer->handle, + &mode->fb_id); + if (ret) + goto fail; + + drm_intel_bo_disable_reuse(intel->front_buffer); + intel->front_pitch = pitch; + intel->front_tiling = tiling; + + scrn->virtualX = width; + scrn->virtualY = height; + + for (i = 0; i < xf86_config->num_crtc; i++) { + xf86CrtcPtr crtc = xf86_config->crtc[i]; + + if (!crtc->enabled) + continue; + + if (!intel_crtc_apply(crtc)) + goto fail; + } + + intel_uxa_create_screen_resources(scrn->pScreen); + + if (old_fb_id) + drmModeRmFB(mode->fd, old_fb_id); + if (old_front) + drm_intel_bo_unreference(old_front); + + return TRUE; + +fail: + if (intel->front_buffer) + drm_intel_bo_unreference(intel->front_buffer); + intel->front_buffer = old_front; + scrn->virtualX = old_width; + scrn->virtualY = old_height; + scrn->displayWidth = old_pitch; + if (old_fb_id != mode->fb_id) + drmModeRmFB(mode->fd, mode->fb_id); + mode->fb_id = old_fb_id; + + return FALSE; +} + +Bool +intel_do_pageflip(intel_screen_private *intel, + dri_bo *new_front, + DRI2FrameEventPtr flip_info, int ref_crtc_hw_id) +{ + ScrnInfoPtr scrn = intel->scrn; + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); + struct intel_crtc *crtc = config->crtc[0]->driver_private; + struct intel_mode *mode = crtc->mode; + unsigned int pitch = scrn->displayWidth * intel->cpp; + struct intel_pageflip *flip; + uint32_t new_fb_id; + int i; + + /* + * Create a new handle for the back buffer + */ + if (drmModeAddFB(mode->fd, scrn->virtualX, scrn->virtualY, + scrn->depth, scrn->bitsPerPixel, pitch, + new_front->handle, &new_fb_id)) + goto error_out; + + drm_intel_bo_disable_reuse(new_front); + intel_glamor_flush(intel); + intel_batch_submit(scrn); + + /* + * Queue flips on all enabled CRTCs + * Note that if/when we get per-CRTC buffers, we'll have to update this. + * Right now it assumes a single shared fb across all CRTCs, with the + * kernel fixing up the offset of each CRTC as necessary. + * + * Also, flips queued on disabled or incorrectly configured displays + * may never complete; this is a configuration error. + */ + mode->fe_frame = 0; + mode->fe_tv_sec = 0; + mode->fe_tv_usec = 0; + + for (i = 0; i < config->num_crtc; i++) { + if (!intel_crtc_on(config->crtc[i])) + continue; + + mode->flip_info = flip_info; + mode->flip_count++; + + crtc = config->crtc[i]->driver_private; + + flip = calloc(1, sizeof(struct intel_pageflip)); + if (flip == NULL) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "flip queue: carrier alloc failed.\n"); + goto error_undo; + } + + /* Only the reference crtc will finally deliver its page flip + * completion event. All other crtc's events will be discarded. + */ + flip->dispatch_me = (intel_crtc_to_pipe(crtc->crtc) == ref_crtc_hw_id); + flip->mode = mode; + + if (drmModePageFlip(mode->fd, + crtc_id(crtc), + new_fb_id, + DRM_MODE_PAGE_FLIP_EVENT, flip)) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "flip queue failed: %s\n", strerror(errno)); + free(flip); + goto error_undo; + } + } + + mode->old_fb_id = mode->fb_id; + mode->fb_id = new_fb_id; + return TRUE; + +error_undo: + drmModeRmFB(mode->fd, new_fb_id); + for (i = 0; i < config->num_crtc; i++) { + if (config->crtc[i]->enabled) + intel_crtc_apply(config->crtc[i]); + } + +error_out: + xf86DrvMsg(scrn->scrnIndex, X_WARNING, "Page flip failed: %s\n", + strerror(errno)); + return FALSE; +} + +static const xf86CrtcConfigFuncsRec intel_xf86crtc_config_funcs = { + intel_xf86crtc_resize +}; + +static void +intel_vblank_handler(int fd, unsigned int frame, unsigned int tv_sec, + unsigned int tv_usec, void *event) +{ + I830DRI2FrameEventHandler(frame, tv_sec, tv_usec, event); +} + +static void +intel_page_flip_handler(int fd, unsigned int frame, unsigned int tv_sec, + unsigned int tv_usec, void *event_data) +{ + struct intel_pageflip *flip = event_data; + struct intel_mode *mode = flip->mode; + + /* Is this the event whose info shall be delivered to higher level? */ + if (flip->dispatch_me) { + /* Yes: Cache msc, ust for later delivery. */ + mode->fe_frame = frame; + mode->fe_tv_sec = tv_sec; + mode->fe_tv_usec = tv_usec; + } + free(flip); + + /* Last crtc completed flip? */ + mode->flip_count--; + if (mode->flip_count > 0) + return; + + /* Release framebuffer */ + drmModeRmFB(mode->fd, mode->old_fb_id); + + if (mode->flip_info == NULL) + return; + + /* Deliver cached msc, ust from reference crtc to flip event handler */ + I830DRI2FlipEventHandler(mode->fe_frame, mode->fe_tv_sec, + mode->fe_tv_usec, mode->flip_info); +} + +static void +drm_wakeup_handler(pointer data, int err, pointer p) +{ + struct intel_mode *mode; + fd_set *read_mask; + + if (data == NULL || err < 0) + return; + + mode = data; + read_mask = p; + if (FD_ISSET(mode->fd, read_mask)) + drmHandleEvent(mode->fd, &mode->event_context); +} + +static drmModeEncoderPtr +intel_get_kencoder(struct intel_mode *mode, int num) +{ + struct intel_output *iterator; + int id = mode->mode_res->encoders[num]; + + list_for_each_entry(iterator, &mode->outputs, link) + if (iterator->mode_encoder->encoder_id == id) + return iterator->mode_encoder; + + return NULL; +} + +/* + * Libdrm's possible_clones is a mask of encoders, Xorg's possible_clones is a + * mask of outputs. This function sets Xorg's possible_clones based on the + * values read from libdrm. + */ +static void +intel_compute_possible_clones(ScrnInfoPtr scrn, struct intel_mode *mode) +{ + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(scrn); + struct intel_output *intel_output, *clone; + drmModeEncoderPtr cloned_encoder; + uint32_t mask; + int i, j, k; + CARD32 possible_clones; + + for (i = 0; i < config->num_output; i++) { + possible_clones = 0; + intel_output = config->output[i]->driver_private; + + mask = intel_output->mode_encoder->possible_clones; + for (j = 0; mask != 0; j++, mask >>= 1) { + + if ((mask & 1) == 0) + continue; + + cloned_encoder = intel_get_kencoder(mode, j); + if (!cloned_encoder) + continue; + + for (k = 0; k < config->num_output; k++) { + clone = config->output[k]->driver_private; + if (clone->mode_encoder->encoder_id == + cloned_encoder->encoder_id) + possible_clones |= (1 << k); + } + } + + config->output[i]->possible_clones = possible_clones; + } +} + +Bool intel_mode_pre_init(ScrnInfoPtr scrn, int fd, int cpp) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct drm_i915_getparam gp; + struct intel_mode *mode; + unsigned int i; + int has_flipping; + + mode = calloc(1, sizeof *mode); + if (!mode) + return FALSE; + + mode->fd = fd; + + list_init(&mode->crtcs); + list_init(&mode->outputs); + + xf86CrtcConfigInit(scrn, &intel_xf86crtc_config_funcs); + + mode->cpp = cpp; + mode->mode_res = drmModeGetResources(mode->fd); + if (!mode->mode_res) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "failed to get resources: %s\n", strerror(errno)); + free(mode); + return FALSE; + } + + xf86CrtcSetSizeRange(scrn, 320, 200, mode->mode_res->max_width, + mode->mode_res->max_height); + for (i = 0; i < mode->mode_res->count_crtcs; i++) + intel_crtc_init(scrn, mode, i); + + for (i = 0; i < mode->mode_res->count_connectors; i++) + intel_output_init(scrn, mode, i); + + intel_compute_possible_clones(scrn, mode); + +#ifdef INTEL_PIXMAP_SHARING + xf86ProviderSetup(scrn, NULL, "Intel"); +#endif + + xf86InitialConfiguration(scrn, TRUE); + + mode->event_context.version = DRM_EVENT_CONTEXT_VERSION; + mode->event_context.vblank_handler = intel_vblank_handler; + mode->event_context.page_flip_handler = intel_page_flip_handler; + + has_flipping = 0; + gp.param = I915_PARAM_HAS_PAGEFLIPPING; + gp.value = &has_flipping; + (void)drmCommandWriteRead(intel->drmSubFD, DRM_I915_GETPARAM, &gp, + sizeof(gp)); + if (has_flipping && intel->swapbuffers_wait) { + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "Kernel page flipping support detected, enabling\n"); + intel->use_pageflipping = TRUE; + } + + intel->modes = mode; + return TRUE; +} + +void +intel_mode_init(struct intel_screen_private *intel) +{ + struct intel_mode *mode = intel->modes; + + /* We need to re-register the mode->fd for the synchronisation + * feedback on every server generation, so perform the + * registration within ScreenInit and not PreInit. + */ + mode->flip_count = 0; + AddGeneralSocket(mode->fd); + RegisterBlockAndWakeupHandlers((BlockHandlerProcPtr)NoopDDA, + drm_wakeup_handler, mode); +} + +void +intel_mode_remove_fb(intel_screen_private *intel) +{ + struct intel_mode *mode = intel->modes; + + if (mode->fb_id) { + drmModeRmFB(mode->fd, mode->fb_id); + mode->fb_id = 0; + } +} + +static Bool has_pending_events(int fd) +{ + struct pollfd pfd; + pfd.fd = fd; + pfd.events = POLLIN; + return poll(&pfd, 1, 0) == 1; +} + +void +intel_mode_close(intel_screen_private *intel) +{ + struct intel_mode *mode = intel->modes; + + if (mode == NULL) + return; + + while (has_pending_events(mode->fd)) + drmHandleEvent(mode->fd, &mode->event_context); + + RemoveBlockAndWakeupHandlers((BlockHandlerProcPtr)NoopDDA, + drm_wakeup_handler, mode); + RemoveGeneralSocket(mode->fd); +} + +void +intel_mode_fini(intel_screen_private *intel) +{ + struct intel_mode *mode = intel->modes; + + if (mode == NULL) + return; + + while(!list_is_empty(&mode->crtcs)) { + xf86CrtcDestroy(list_first_entry(&mode->crtcs, + struct intel_crtc, + link)->crtc); + } + + while(!list_is_empty(&mode->outputs)) { + xf86OutputDestroy(list_first_entry(&mode->outputs, + struct intel_output, + link)->output); + } + + if (mode->fb_id) + drmModeRmFB(mode->fd, mode->fb_id); + + /* mode->rotate_fb_id should have been destroyed already */ + + free(mode); + intel->modes = NULL; +} + +/* for the mode overlay */ +int +intel_crtc_id(xf86CrtcPtr crtc) +{ + return crtc_id(crtc->driver_private); +} + +int intel_crtc_to_pipe(xf86CrtcPtr crtc) +{ + struct intel_crtc *intel_crtc = crtc->driver_private; + return intel_crtc->pipe; +} + +Bool intel_crtc_on(xf86CrtcPtr crtc) +{ + struct intel_crtc *intel_crtc = crtc->driver_private; + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(crtc->scrn); + drmModeCrtcPtr drm_crtc; + Bool ret; + int i; + + if (!crtc->enabled) + return FALSE; + + /* Kernel manages CRTC status based on output config */ + ret = FALSE; + for (i = 0; i < xf86_config->num_output; i++) { + xf86OutputPtr output = xf86_config->output[i]; + if (output->crtc == crtc && + intel_output_dpms_status(output) == DPMSModeOn) { + ret = TRUE; + break; + } + } + if (!ret) + return FALSE; + + /* And finally check with the kernel that the fb is bound */ + drm_crtc = drmModeGetCrtc(intel_crtc->mode->fd, crtc_id(intel_crtc)); + if (drm_crtc == NULL) + return FALSE; + + ret = (drm_crtc->mode_valid && + intel_crtc->mode->fb_id == drm_crtc->buffer_id); + free(drm_crtc); + + return ret; +} + +static PixmapPtr +intel_create_pixmap_for_bo(ScreenPtr pScreen, dri_bo *bo, + int width, int height, + int depth, int bpp, + int pitch) +{ + PixmapPtr pixmap; + + pixmap = pScreen->CreatePixmap(pScreen, 0, 0, depth, 0); + if (pixmap == NullPixmap) + return pixmap; + + if (!pScreen->ModifyPixmapHeader(pixmap, + width, height, + depth, bpp, + pitch, NULL)) { + pScreen->DestroyPixmap(pixmap); + return NullPixmap; + } + + intel_set_pixmap_bo(pixmap, bo); + return pixmap; +} + +static PixmapPtr +intel_create_pixmap_for_fbcon(ScrnInfoPtr scrn, int fbcon_id) +{ + ScreenPtr pScreen = xf86ScrnToScreen(scrn); + intel_screen_private *intel = intel_get_screen_private(scrn); + struct intel_mode *mode = intel->modes; + int fd = mode->fd; + drmModeFBPtr fbcon; + struct drm_gem_flink flink; + drm_intel_bo *bo; + PixmapPtr pixmap = NullPixmap; + + fbcon = drmModeGetFB(fd, fbcon_id); + if (fbcon == NULL) + return NULL; + + if (fbcon->depth != scrn->depth || + fbcon->width != scrn->virtualX || + fbcon->height != scrn->virtualY) + goto out_free_fb; + + flink.handle = fbcon->handle; + if (ioctl(fd, DRM_IOCTL_GEM_FLINK, &flink) < 0) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Couldn't flink fbcon handle\n"); + goto out_free_fb; + } + + bo = drm_intel_bo_gem_create_from_name(intel->bufmgr, + "fbcon", flink.name); + if (bo == NULL) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Couldn't allocate bo for fbcon handle\n"); + goto out_free_fb; + } + + pixmap = intel_create_pixmap_for_bo(pScreen, bo, + fbcon->width, fbcon->height, + fbcon->depth, fbcon->bpp, + fbcon->pitch); + if (pixmap == NullPixmap) + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Couldn't allocate pixmap fbcon contents\n"); + drm_intel_bo_unreference(bo); +out_free_fb: + drmModeFreeFB(fbcon); + + return pixmap; +} + +void intel_copy_fb(ScrnInfoPtr scrn) +{ + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); + ScreenPtr pScreen = xf86ScrnToScreen(scrn); + intel_screen_private *intel = intel_get_screen_private(scrn); + PixmapPtr src, dst; + unsigned int pitch = scrn->displayWidth * intel->cpp; + struct intel_crtc *intel_crtc; + int i, fbcon_id; + + if (intel->force_fallback) + return; + + fbcon_id = 0; + for (i = 0; i < xf86_config->num_crtc; i++) { + intel_crtc = xf86_config->crtc[i]->driver_private; + if (intel_crtc->mode_crtc->buffer_id) + fbcon_id = intel_crtc->mode_crtc->buffer_id; + } + if (!fbcon_id) + return; + + src = intel_create_pixmap_for_fbcon(scrn, fbcon_id); + if (src == NULL) + return; + + /* We dont have a screen Pixmap yet */ + dst = intel_create_pixmap_for_bo(pScreen, intel->front_buffer, + scrn->virtualX, scrn->virtualY, + scrn->depth, scrn->bitsPerPixel, + pitch); + if (dst == NullPixmap) + goto cleanup_src; + + if (!intel->uxa_driver->prepare_copy(src, dst, + -1, -1, + GXcopy, FB_ALLONES)) + goto cleanup_dst; + + intel->uxa_driver->copy(dst, + 0, 0, + 0, 0, + scrn->virtualX, scrn->virtualY); + intel->uxa_driver->done_copy(dst); +#if ABI_VIDEODRV_VERSION >= SET_ABI_VERSION(10, 0) + pScreen->canDoBGNoneRoot = TRUE; +#endif + +cleanup_dst: + (*pScreen->DestroyPixmap)(dst); +cleanup_src: + (*pScreen->DestroyPixmap)(src); +} diff --git a/src/uxa/intel_dri.c b/src/uxa/intel_dri.c new file mode 100644 index 00000000..03700343 --- /dev/null +++ b/src/uxa/intel_dri.c @@ -0,0 +1,1639 @@ +/************************************************************************** + +Copyright 2001 VA Linux Systems Inc., Fremont, California. +Copyright © 2002 by David Dawes + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: Jeff Hartmann <jhartmann@valinux.com> + * David Dawes <dawes@xfree86.org> + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/time.h> +#include <time.h> +#include <errno.h> + +#include "xf86.h" +#include "xf86_OSproc.h" + +#include "xf86Pci.h" +#include "xf86drm.h" + +#include "windowstr.h" +#include "shadow.h" +#include "fb.h" + +#include "intel.h" +#include "i830_reg.h" + +#include "i915_drm.h" + +#include "dri2.h" + +#include "intel_glamor.h" +#include "uxa.h" + +typedef struct { + int refcnt; + PixmapPtr pixmap; +} I830DRI2BufferPrivateRec, *I830DRI2BufferPrivatePtr; + +#if HAS_DEVPRIVATEKEYREC +static DevPrivateKeyRec i830_client_key; +#else +static int i830_client_key; +#endif + +static uint32_t pixmap_flink(PixmapPtr pixmap) +{ + struct intel_pixmap *priv = intel_get_pixmap_private(pixmap); + uint32_t name; + + if (priv == NULL || priv->bo == NULL) + return 0; + + if (dri_bo_flink(priv->bo, &name) != 0) + return 0; + + priv->pinned |= PIN_DRI; + return name; +} + +static PixmapPtr get_front_buffer(DrawablePtr drawable) +{ + PixmapPtr pixmap; + + pixmap = get_drawable_pixmap(drawable); + if (!intel_get_pixmap_bo(pixmap)) + return NULL; + + pixmap->refcnt++; + return pixmap; +} + +static PixmapPtr fixup_glamor(DrawablePtr drawable, PixmapPtr pixmap) +{ + ScreenPtr screen = drawable->pScreen; + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + PixmapPtr old = get_drawable_pixmap(drawable); + struct intel_pixmap *priv = intel_get_pixmap_private(pixmap); + GCPtr gc; + + /* With a glamor pixmap, 2D pixmaps are created in texture + * and without a static BO attached to it. To support DRI, + * we need to create a new textured-drm pixmap and + * need to copy the original content to this new textured-drm + * pixmap, and then convert the old pixmap to a coherent + * textured-drm pixmap which has a valid BO attached to it + * and also has a valid texture, thus both glamor and DRI2 + * can access it. + * + */ + + /* Copy the current contents of the pixmap to the bo. */ + gc = GetScratchGC(drawable->depth, screen); + if (gc) { + ValidateGC(&pixmap->drawable, gc); + gc->ops->CopyArea(drawable, &pixmap->drawable, + gc, + 0, 0, + drawable->width, + drawable->height, + 0, 0); + FreeScratchGC(gc); + } + + intel_set_pixmap_private(pixmap, NULL); + + /* Exchange the underlying texture/image. */ + intel_glamor_exchange_buffers(intel, old, pixmap); + /* And redirect the pixmap to the new bo (for 3D). */ + intel_set_pixmap_private(old, priv); + old->refcnt++; + + screen->ModifyPixmapHeader(old, + drawable->width, + drawable->height, + 0, 0, + priv->stride, + NULL); + screen->DestroyPixmap(pixmap); + intel_get_screen_private(xf86ScreenToScrn(screen))->needs_flush = TRUE; + return old; +} + +#if DRI2INFOREC_VERSION < 2 +static DRI2BufferPtr +I830DRI2CreateBuffers(DrawablePtr drawable, unsigned int *attachments, + int count) +{ + ScreenPtr screen = drawable->pScreen; + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + DRI2BufferPtr buffers; + I830DRI2BufferPrivatePtr privates; + PixmapPtr pixmap, pDepthPixmap; + Bool is_glamor_pixmap = FALSE; + int i; + + buffers = calloc(count, sizeof *buffers); + if (buffers == NULL) + return NULL; + privates = calloc(count, sizeof *privates); + if (privates == NULL) { + free(buffers); + return NULL; + } + + pDepthPixmap = NULL; + for (i = 0; i < count; i++) { + pixmap = NULL; + if (attachments[i] == DRI2BufferFrontLeft) { + pixmap = get_front_buffer(drawable); + + if (pixmap == NULL) { + drawable = &(get_drawable_pixmap(drawable)->drawable); + is_glamor_pixmap = TRUE; + } + } else if (attachments[i] == DRI2BufferStencil && pDepthPixmap) { + pixmap = pDepthPixmap; + pixmap->refcnt++; + } + + if (pixmap == NULL) { + unsigned int hint = INTEL_CREATE_PIXMAP_DRI2; + + if (intel->tiling & INTEL_TILING_3D) { + switch (attachments[i]) { + case DRI2BufferDepth: + if (SUPPORTS_YTILING(intel)) + hint |= INTEL_CREATE_PIXMAP_TILING_Y; + else + hint |= INTEL_CREATE_PIXMAP_TILING_X; + break; + case DRI2BufferFakeFrontLeft: + case DRI2BufferFakeFrontRight: + case DRI2BufferBackLeft: + case DRI2BufferBackRight: + hint |= INTEL_CREATE_PIXMAP_TILING_X; + break; + } + } + + pixmap = screen->CreatePixmap(screen, + drawable->width, + drawable->height, + drawable->depth, + hint); + if (pixmap == NULL || + intel_get_pixmap_bo(pixmap) == NULL) + { + if (pixmap) + screen->DestroyPixmap(pixmap); + goto unwind; + } + + if (is_glamor_pixmap) + pixmap = fixup_glamor(drawable, pixmap); + } + + if (attachments[i] == DRI2BufferDepth) + pDepthPixmap = pixmap; + + buffers[i].attachment = attachments[i]; + buffers[i].pitch = pixmap->devKind; + buffers[i].cpp = pixmap->drawable.bitsPerPixel / 8; + buffers[i].driverPrivate = &privates[i]; + buffers[i].flags = 0; /* not tiled */ + privates[i].refcnt = 1; + privates[i].pixmap = pixmap; + + if ((buffers[i].name = pixmap_flink(pixmap)) == 0) { + /* failed to name buffer */ + screen->DestroyPixmap(pixmap); + goto unwind; + } + } + + return buffers; + +unwind: + while (i--) + screen->DestroyPixmap(privates[i].pixmap); + free(privates); + free(buffers); + return NULL; +} + +static void +I830DRI2DestroyBuffers(DrawablePtr drawable, DRI2BufferPtr buffers, int count) +{ + ScreenPtr screen = drawable->pScreen; + I830DRI2BufferPrivatePtr private; + int i; + + for (i = 0; i < count; i++) { + private = buffers[i].driverPrivate; + screen->DestroyPixmap(private->pixmap); + } + + if (buffers) { + free(buffers[0].driverPrivate); + free(buffers); + } +} + +#else + +static DRI2Buffer2Ptr +I830DRI2CreateBuffer(DrawablePtr drawable, unsigned int attachment, + unsigned int format) +{ + ScreenPtr screen = drawable->pScreen; + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + DRI2Buffer2Ptr buffer; + I830DRI2BufferPrivatePtr privates; + PixmapPtr pixmap; + Bool is_glamor_pixmap = FALSE; + + buffer = calloc(1, sizeof *buffer); + if (buffer == NULL) + return NULL; + privates = calloc(1, sizeof *privates); + if (privates == NULL) { + free(buffer); + return NULL; + } + + pixmap = NULL; + if (attachment == DRI2BufferFrontLeft) { + pixmap = get_front_buffer(drawable); + + if (pixmap == NULL) { + drawable = &(get_drawable_pixmap(drawable)->drawable); + is_glamor_pixmap = TRUE; + } + } + + if (pixmap == NULL) { + unsigned int hint = INTEL_CREATE_PIXMAP_DRI2; + int pixmap_width = drawable->width; + int pixmap_height = drawable->height; + int pixmap_cpp = (format != 0) ? format : drawable->depth; + + if (intel->tiling & INTEL_TILING_3D) { + switch (attachment) { + case DRI2BufferDepth: + case DRI2BufferDepthStencil: + case DRI2BufferHiz: + if (SUPPORTS_YTILING(intel)) { + hint |= INTEL_CREATE_PIXMAP_TILING_Y; + break; + } + case DRI2BufferAccum: + case DRI2BufferBackLeft: + case DRI2BufferBackRight: + case DRI2BufferFakeFrontLeft: + case DRI2BufferFakeFrontRight: + case DRI2BufferFrontLeft: + case DRI2BufferFrontRight: + hint |= INTEL_CREATE_PIXMAP_TILING_X; + break; + case DRI2BufferStencil: + /* + * The stencil buffer is W tiled. However, we + * request from the kernel a non-tiled buffer + * because the GTT is incapable of W fencing. + */ + hint |= INTEL_CREATE_PIXMAP_TILING_NONE; + break; + default: + free(privates); + free(buffer); + return NULL; + } + } + + /* + * The stencil buffer has quirky pitch requirements. From Vol + * 2a, 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface + * Pitch": + * The pitch must be set to 2x the value computed based on + * width, as the stencil buffer is stored with two rows + * interleaved. + * To accomplish this, we resort to the nasty hack of doubling + * the drm region's cpp and halving its height. + * + * If we neglect to double the pitch, then render corruption + * occurs. + */ + if (attachment == DRI2BufferStencil) { + pixmap_width = ALIGN(pixmap_width, 64); + pixmap_height = ALIGN((pixmap_height + 1) / 2, 64); + pixmap_cpp *= 2; + } + + pixmap = screen->CreatePixmap(screen, + pixmap_width, + pixmap_height, + pixmap_cpp, + hint); + if (pixmap == NULL || intel_get_pixmap_bo(pixmap) == NULL) { + if (pixmap) + screen->DestroyPixmap(pixmap); + free(privates); + free(buffer); + return NULL; + } + if (is_glamor_pixmap) + pixmap = fixup_glamor(drawable, pixmap); + } + + buffer->attachment = attachment; + buffer->pitch = pixmap->devKind; + buffer->cpp = pixmap->drawable.bitsPerPixel / 8; + buffer->driverPrivate = privates; + buffer->format = format; + buffer->flags = 0; /* not tiled */ + privates->refcnt = 1; + privates->pixmap = pixmap; + + if ((buffer->name = pixmap_flink(pixmap)) == 0) { + /* failed to name buffer */ + screen->DestroyPixmap(pixmap); + free(privates); + free(buffer); + return NULL; + } + + return buffer; +} + +static void I830DRI2DestroyBuffer(DrawablePtr drawable, DRI2Buffer2Ptr buffer) +{ + if (buffer && buffer->driverPrivate) { + I830DRI2BufferPrivatePtr private = buffer->driverPrivate; + if (--private->refcnt == 0) { + ScreenPtr screen = private->pixmap->drawable.pScreen; + screen->DestroyPixmap(private->pixmap); + + free(private); + free(buffer); + } + } else + free(buffer); +} + +#endif + +static void +I830DRI2CopyRegion(DrawablePtr drawable, RegionPtr pRegion, + DRI2BufferPtr destBuffer, DRI2BufferPtr sourceBuffer) +{ + I830DRI2BufferPrivatePtr srcPrivate = sourceBuffer->driverPrivate; + I830DRI2BufferPrivatePtr dstPrivate = destBuffer->driverPrivate; + ScreenPtr screen = drawable->pScreen; + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + DrawablePtr src = (sourceBuffer->attachment == DRI2BufferFrontLeft) + ? drawable : &srcPrivate->pixmap->drawable; + DrawablePtr dst = (destBuffer->attachment == DRI2BufferFrontLeft) + ? drawable : &dstPrivate->pixmap->drawable; + RegionPtr pCopyClip; + GCPtr gc; + + gc = GetScratchGC(dst->depth, screen); + if (!gc) + return; + + pCopyClip = REGION_CREATE(screen, NULL, 0); + REGION_COPY(screen, pCopyClip, pRegion); + (*gc->funcs->ChangeClip) (gc, CT_REGION, pCopyClip, 0); + ValidateGC(dst, gc); + + /* Wait for the scanline to be outside the region to be copied */ + if (scrn->vtSema && + pixmap_is_scanout(get_drawable_pixmap(dst)) && + intel->swapbuffers_wait && INTEL_INFO(intel)->gen < 060) { + BoxPtr box; + BoxRec crtcbox; + int y1, y2; + int event, load_scan_lines_pipe; + xf86CrtcPtr crtc; + Bool full_height = FALSE; + + box = REGION_EXTENTS(unused, gc->pCompositeClip); + crtc = intel_covering_crtc(scrn, box, NULL, &crtcbox); + + /* + * Make sure the CRTC is valid and this is the real front + * buffer + */ + if (crtc != NULL && !crtc->rotatedData) { + int pipe = intel_crtc_to_pipe(crtc); + + /* + * Make sure we don't wait for a scanline that will + * never occur + */ + y1 = (crtcbox.y1 <= box->y1) ? box->y1 - crtcbox.y1 : 0; + y2 = (box->y2 <= crtcbox.y2) ? + box->y2 - crtcbox.y1 : crtcbox.y2 - crtcbox.y1; + + if (y1 == 0 && y2 == (crtcbox.y2 - crtcbox.y1)) + full_height = TRUE; + + /* + * Pre-965 doesn't have SVBLANK, so we need a bit + * of extra time for the blitter to start up and + * do its job for a full height blit + */ + if (full_height && INTEL_INFO(intel)->gen < 040) + y2 -= 2; + + if (pipe == 0) { + event = MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW; + load_scan_lines_pipe = + MI_LOAD_SCAN_LINES_DISPLAY_PIPEA; + if (full_height && INTEL_INFO(intel)->gen >= 040) + event = MI_WAIT_FOR_PIPEA_SVBLANK; + } else { + event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW; + load_scan_lines_pipe = + MI_LOAD_SCAN_LINES_DISPLAY_PIPEB; + if (full_height && INTEL_INFO(intel)->gen >= 040) + event = MI_WAIT_FOR_PIPEB_SVBLANK; + } + + if (crtc->mode.Flags & V_INTERLACE) { + /* DSL count field lines */ + y1 /= 2; + y2 /= 2; + } + + BEGIN_BATCH(5); + /* + * The documentation says that the LOAD_SCAN_LINES + * command always comes in pairs. Don't ask me why. + */ + OUT_BATCH(MI_LOAD_SCAN_LINES_INCL | + load_scan_lines_pipe); + OUT_BATCH((y1 << 16) | (y2-1)); + OUT_BATCH(MI_LOAD_SCAN_LINES_INCL | + load_scan_lines_pipe); + OUT_BATCH((y1 << 16) | (y2-1)); + OUT_BATCH(MI_WAIT_FOR_EVENT | event); + ADVANCE_BATCH(); + } + } + + /* It's important that this copy gets submitted before the + * direct rendering client submits rendering for the next + * frame, but we don't actually need to submit right now. The + * client will wait for the DRI2CopyRegion reply or the swap + * buffer event before rendering, and we'll hit the flush + * callback chain before those messages are sent. We submit + * our batch buffers from the flush callback chain so we know + * that will happen before the client tries to render + * again. */ + + gc->ops->CopyArea(src, dst, gc, + 0, 0, + drawable->width, drawable->height, + 0, 0); + + FreeScratchGC(gc); + + /* And make sure the WAIT_FOR_EVENT is queued before any + * modesetting/dpms operations on the pipe. + */ + intel_batch_submit(scrn); +} + +static void +I830DRI2FallbackBlitSwap(DrawablePtr drawable, + DRI2BufferPtr dst, + DRI2BufferPtr src) +{ + BoxRec box; + RegionRec region; + + box.x1 = 0; + box.y1 = 0; + box.x2 = drawable->width; + box.y2 = drawable->height; + REGION_INIT(pScreen, ®ion, &box, 0); + + I830DRI2CopyRegion(drawable, ®ion, dst, src); +} + +#if DRI2INFOREC_VERSION >= 4 + +static void I830DRI2ReferenceBuffer(DRI2Buffer2Ptr buffer) +{ + if (buffer) { + I830DRI2BufferPrivatePtr private = buffer->driverPrivate; + private->refcnt++; + } +} + +static int +I830DRI2DrawablePipe(DrawablePtr pDraw) +{ + ScreenPtr pScreen = pDraw->pScreen; + ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); + BoxRec box, crtcbox; + xf86CrtcPtr crtc; + int pipe = -1; + + box.x1 = pDraw->x; + box.y1 = pDraw->y; + box.x2 = box.x1 + pDraw->width; + box.y2 = box.y1 + pDraw->height; + + crtc = intel_covering_crtc(pScrn, &box, NULL, &crtcbox); + + /* Make sure the CRTC is valid and this is the real front buffer */ + if (crtc != NULL && !crtc->rotatedData) + pipe = intel_crtc_to_pipe(crtc); + + return pipe; +} + +static RESTYPE frame_event_client_type, frame_event_drawable_type; + +struct i830_dri2_resource { + XID id; + RESTYPE type; + struct list list; +}; + +static struct i830_dri2_resource * +get_resource(XID id, RESTYPE type) +{ + struct i830_dri2_resource *resource; + void *ptr; + + ptr = NULL; + dixLookupResourceByType(&ptr, id, type, NULL, DixWriteAccess); + if (ptr) + return ptr; + + resource = malloc(sizeof(*resource)); + if (resource == NULL) + return NULL; + + if (!AddResource(id, type, resource)) { + free(resource); + return NULL; + } + + resource->id = id; + resource->type = type; + list_init(&resource->list); + return resource; +} + +static int +i830_dri2_frame_event_client_gone(void *data, XID id) +{ + struct i830_dri2_resource *resource = data; + + while (!list_is_empty(&resource->list)) { + DRI2FrameEventPtr info = + list_first_entry(&resource->list, + DRI2FrameEventRec, + client_resource); + + list_del(&info->client_resource); + info->client = NULL; + } + free(resource); + + return Success; +} + +static int +i830_dri2_frame_event_drawable_gone(void *data, XID id) +{ + struct i830_dri2_resource *resource = data; + + while (!list_is_empty(&resource->list)) { + DRI2FrameEventPtr info = + list_first_entry(&resource->list, + DRI2FrameEventRec, + drawable_resource); + + list_del(&info->drawable_resource); + info->drawable_id = None; + } + free(resource); + + return Success; +} + +static Bool +i830_dri2_register_frame_event_resource_types(void) +{ + frame_event_client_type = CreateNewResourceType(i830_dri2_frame_event_client_gone, "Frame Event Client"); + if (!frame_event_client_type) + return FALSE; + + frame_event_drawable_type = CreateNewResourceType(i830_dri2_frame_event_drawable_gone, "Frame Event Drawable"); + if (!frame_event_drawable_type) + return FALSE; + + return TRUE; +} + +static XID +get_client_id(ClientPtr client) +{ +#if HAS_DIXREGISTERPRIVATEKEY + XID *ptr = dixGetPrivateAddr(&client->devPrivates, &i830_client_key); +#else + XID *ptr = dixLookupPrivate(&client->devPrivates, &i830_client_key); +#endif + if (*ptr == 0) + *ptr = FakeClientID(client->index); + return *ptr; +} + +/* + * Hook this frame event into the server resource + * database so we can clean it up if the drawable or + * client exits while the swap is pending + */ +static Bool +i830_dri2_add_frame_event(DRI2FrameEventPtr info) +{ + struct i830_dri2_resource *resource; + + resource = get_resource(get_client_id(info->client), + frame_event_client_type); + if (resource == NULL) + return FALSE; + + list_add(&info->client_resource, &resource->list); + + resource = get_resource(info->drawable_id, frame_event_drawable_type); + if (resource == NULL) { + list_del(&info->client_resource); + return FALSE; + } + + list_add(&info->drawable_resource, &resource->list); + + return TRUE; +} + +static void +i830_dri2_del_frame_event(DrawablePtr drawable, DRI2FrameEventPtr info) +{ + list_del(&info->client_resource); + list_del(&info->drawable_resource); + + if (info->front) + I830DRI2DestroyBuffer(drawable, info->front); + if (info->back) + I830DRI2DestroyBuffer(drawable, info->back); + + free(info); +} + +static struct intel_pixmap * +intel_exchange_pixmap_buffers(struct intel_screen_private *intel, PixmapPtr front, PixmapPtr back) +{ + struct intel_pixmap *new_front, *new_back; + RegionRec region; + + /* Post damage on the front buffer so that listeners, such + * as DisplayLink know take a copy and shove it over the USB. + * also for sw cursors. + */ + region.extents.x1 = region.extents.y1 = 0; + region.extents.x2 = front->drawable.width; + region.extents.y2 = front->drawable.height; + region.data = NULL; + DamageRegionAppend(&front->drawable, ®ion); + + new_front = intel_get_pixmap_private(back); + new_back = intel_get_pixmap_private(front); + intel_set_pixmap_private(front, new_front); + intel_set_pixmap_private(back, new_back); + new_front->busy = 1; + new_back->busy = -1; + + intel_glamor_exchange_buffers(intel, front, back); + + DamageRegionProcessPending(&front->drawable); + + return new_front; +} + +static void +I830DRI2ExchangeBuffers(struct intel_screen_private *intel, DRI2BufferPtr front, DRI2BufferPtr back) +{ + I830DRI2BufferPrivatePtr front_priv, back_priv; + int tmp; + struct intel_pixmap *new_front; + + front_priv = front->driverPrivate; + back_priv = back->driverPrivate; + + /* Swap BO names so DRI works */ + tmp = front->name; + front->name = back->name; + back->name = tmp; + + /* Swap pixmap bos */ + new_front = intel_exchange_pixmap_buffers(intel, + front_priv->pixmap, + back_priv->pixmap); + dri_bo_unreference (intel->front_buffer); + intel->front_buffer = new_front->bo; + dri_bo_reference (intel->front_buffer); +} + +static PixmapPtr +intel_glamor_create_back_pixmap(ScreenPtr screen, + PixmapPtr front_pixmap, + drm_intel_bo *back_bo) +{ + PixmapPtr back_pixmap; + + back_pixmap = screen->CreatePixmap(screen, + 0, + 0, + front_pixmap->drawable.depth, + 0); + if (back_pixmap == NULL) + return NULL; + + screen->ModifyPixmapHeader(back_pixmap, + front_pixmap->drawable.width, + front_pixmap->drawable.height, + 0, 0, + front_pixmap->devKind, + 0); + intel_set_pixmap_bo(back_pixmap, back_bo); + if (!intel_glamor_create_textured_pixmap(back_pixmap)) { + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "Failed to create textured back pixmap.\n"); + screen->DestroyPixmap(back_pixmap); + return NULL; + } + return back_pixmap; +} + +static drm_intel_bo *get_pixmap_bo(I830DRI2BufferPrivatePtr priv) +{ + drm_intel_bo *bo = intel_get_pixmap_bo(priv->pixmap); + assert(bo != NULL); /* guaranteed by construction of the DRI2 buffer */ + return bo; +} + +/* + * Our internal swap routine takes care of actually exchanging, blitting, or + * flipping buffers as necessary. + */ +static Bool +I830DRI2ScheduleFlip(struct intel_screen_private *intel, + DrawablePtr draw, + DRI2FrameEventPtr info) +{ + I830DRI2BufferPrivatePtr priv = info->back->driverPrivate; + drm_intel_bo *new_back, *old_back; + int tmp_name; + + if (!intel->use_triple_buffer) { + info->type = DRI2_SWAP; + if (!intel_do_pageflip(intel, + get_pixmap_bo(priv), + info, info->pipe)) + return FALSE; + + I830DRI2ExchangeBuffers(intel, info->front, info->back); + return TRUE; + } + + if (intel->pending_flip[info->pipe]) { + assert(intel->pending_flip[info->pipe]->chain == NULL); + intel->pending_flip[info->pipe]->chain = info; + return TRUE; + } + + if (intel->back_buffer == NULL) { + I830DRI2BufferPrivatePtr priv; + PixmapPtr front_pixmap, back_pixmap; + ScreenPtr screen; + + new_back = drm_intel_bo_alloc(intel->bufmgr, "front buffer", + intel->front_buffer->size, 0); + if (new_back == NULL) + return FALSE; + + if (intel->front_tiling != I915_TILING_NONE) { + uint32_t tiling = intel->front_tiling; + drm_intel_bo_set_tiling(new_back, &tiling, intel->front_pitch); + if (tiling != intel->front_tiling) { + drm_intel_bo_unreference(new_back); + return FALSE; + } + } + + drm_intel_bo_disable_reuse(new_back); + dri_bo_flink(new_back, &intel->back_name); + + if ((intel->uxa_flags & UXA_USE_GLAMOR)) { + screen = draw->pScreen; + priv = info->front->driverPrivate; + front_pixmap = priv->pixmap; + + back_pixmap = intel_glamor_create_back_pixmap(screen, + front_pixmap, + new_back); + if (back_pixmap == NULL) { + drm_intel_bo_unreference(new_back); + return FALSE; + } + intel->back_pixmap = back_pixmap; + } + } else { + new_back = intel->back_buffer; + intel->back_buffer = NULL; + } + + old_back = get_pixmap_bo(priv); + if (!intel_do_pageflip(intel, old_back, info, info->pipe)) { + intel->back_buffer = new_back; + return FALSE; + } + info->type = DRI2_SWAP_CHAIN; + intel->pending_flip[info->pipe] = info; + + priv = info->front->driverPrivate; + + /* Exchange the current front-buffer with the fresh bo */ + + intel->back_buffer = intel->front_buffer; + drm_intel_bo_reference(intel->back_buffer); + if (!(intel->uxa_flags & UXA_USE_GLAMOR)) { + intel_set_pixmap_bo(priv->pixmap, new_back); + drm_intel_bo_unreference(new_back); + } + else + intel_exchange_pixmap_buffers(intel, priv->pixmap, + intel->back_pixmap); + + tmp_name = info->front->name; + info->front->name = intel->back_name; + intel->back_name = tmp_name; + + /* Then flip DRI2 pointers and update the screen pixmap */ + I830DRI2ExchangeBuffers(intel, info->front, info->back); + DRI2SwapComplete(info->client, draw, 0, 0, 0, + DRI2_EXCHANGE_COMPLETE, + info->event_complete, + info->event_data); + return TRUE; +} + +static Bool +can_exchange(DrawablePtr drawable, DRI2BufferPtr front, DRI2BufferPtr back) +{ + ScrnInfoPtr pScrn = xf86ScreenToScrn(drawable->pScreen); + struct intel_screen_private *intel = intel_get_screen_private(pScrn); + I830DRI2BufferPrivatePtr front_priv = front->driverPrivate; + I830DRI2BufferPrivatePtr back_priv = back->driverPrivate; + PixmapPtr front_pixmap = front_priv->pixmap; + PixmapPtr back_pixmap = back_priv->pixmap; + struct intel_pixmap *front_intel = intel_get_pixmap_private(front_pixmap); + struct intel_pixmap *back_intel = intel_get_pixmap_private(back_pixmap); + + if (!pScrn->vtSema) + return FALSE; + + if (I830DRI2DrawablePipe(drawable) < 0) + return FALSE; + + if (!DRI2CanFlip(drawable)) + return FALSE; + + if (intel->shadow_present) + return FALSE; + + if (!intel->use_pageflipping) + return FALSE; + + if (front_pixmap->drawable.width != back_pixmap->drawable.width) + return FALSE; + + if (front_pixmap->drawable.height != back_pixmap->drawable.height) + return FALSE; + + /* XXX should we be checking depth instead of bpp? */ +#if 0 + if (front_pixmap->drawable.depth != back_pixmap->drawable.depth) + return FALSE; +#else + if (front_pixmap->drawable.bitsPerPixel != back_pixmap->drawable.bitsPerPixel) + return FALSE; +#endif + + /* prevent an implicit tiling mode change */ + if (front_intel->tiling != back_intel->tiling) + return FALSE; + + return TRUE; +} + +void I830DRI2FrameEventHandler(unsigned int frame, unsigned int tv_sec, + unsigned int tv_usec, DRI2FrameEventPtr swap_info) +{ + intel_screen_private *intel = swap_info->intel; + DrawablePtr drawable; + int status; + + if (!swap_info->drawable_id) + status = BadDrawable; + else + status = dixLookupDrawable(&drawable, swap_info->drawable_id, serverClient, + M_ANY, DixWriteAccess); + if (status != Success) { + i830_dri2_del_frame_event(NULL, swap_info); + return; + } + + + switch (swap_info->type) { + case DRI2_FLIP: + /* If we can still flip... */ + if (can_exchange(drawable, swap_info->front, swap_info->back) && + I830DRI2ScheduleFlip(intel, drawable, swap_info)) + return; + + /* else fall through to exchange/blit */ + case DRI2_SWAP: { + I830DRI2FallbackBlitSwap(drawable, + swap_info->front, swap_info->back); + DRI2SwapComplete(swap_info->client, drawable, frame, tv_sec, tv_usec, + DRI2_BLIT_COMPLETE, + swap_info->client ? swap_info->event_complete : NULL, + swap_info->event_data); + break; + } + case DRI2_WAITMSC: + if (swap_info->client) + DRI2WaitMSCComplete(swap_info->client, drawable, + frame, tv_sec, tv_usec); + break; + default: + xf86DrvMsg(intel->scrn->scrnIndex, X_WARNING, + "%s: unknown vblank event received\n", __func__); + /* Unknown type */ + break; + } + + i830_dri2_del_frame_event(drawable, swap_info); +} + +void I830DRI2FlipEventHandler(unsigned int frame, unsigned int tv_sec, + unsigned int tv_usec, DRI2FrameEventPtr flip_info) +{ + struct intel_screen_private *intel = flip_info->intel; + DrawablePtr drawable; + DRI2FrameEventPtr chain; + + drawable = NULL; + if (flip_info->drawable_id) + dixLookupDrawable(&drawable, flip_info->drawable_id, serverClient, + M_ANY, DixWriteAccess); + + + /* We assume our flips arrive in order, so we don't check the frame */ + switch (flip_info->type) { + case DRI2_SWAP: + if (!drawable) + break; + + /* Check for too small vblank count of pageflip completion, taking wraparound + * into account. This usually means some defective kms pageflip completion, + * causing wrong (msc, ust) return values and possible visual corruption. + */ + if ((frame < flip_info->frame) && (flip_info->frame - frame < 5)) { + static int limit = 5; + + /* XXX we are currently hitting this path with older + * kernels, so make it quieter. + */ + if (limit) { + xf86DrvMsg(intel->scrn->scrnIndex, X_WARNING, + "%s: Pageflip completion has impossible msc %d < target_msc %d\n", + __func__, frame, flip_info->frame); + limit--; + } + + /* All-0 values signal timestamping failure. */ + frame = tv_sec = tv_usec = 0; + } + + DRI2SwapComplete(flip_info->client, drawable, frame, tv_sec, tv_usec, + DRI2_FLIP_COMPLETE, flip_info->client ? flip_info->event_complete : NULL, + flip_info->event_data); + break; + + case DRI2_SWAP_CHAIN: + assert(intel->pending_flip[flip_info->pipe] == flip_info); + intel->pending_flip[flip_info->pipe] = NULL; + + chain = flip_info->chain; + if (chain) { + DrawablePtr chain_drawable = NULL; + if (chain->drawable_id) + dixLookupDrawable(&chain_drawable, + chain->drawable_id, + serverClient, + M_ANY, DixWriteAccess); + if (chain_drawable == NULL) { + i830_dri2_del_frame_event(chain_drawable, chain); + } else if (!can_exchange(chain_drawable, chain->front, chain->back) || + !I830DRI2ScheduleFlip(intel, chain_drawable, chain)) { + I830DRI2FallbackBlitSwap(chain_drawable, + chain->front, + chain->back); + + DRI2SwapComplete(chain->client, chain_drawable, frame, tv_sec, tv_usec, + DRI2_BLIT_COMPLETE, + chain->client ? chain->event_complete : NULL, + chain->event_data); + i830_dri2_del_frame_event(chain_drawable, chain); + } + } + break; + + default: + xf86DrvMsg(intel->scrn->scrnIndex, X_WARNING, + "%s: unknown vblank event received\n", __func__); + /* Unknown type */ + break; + } + + i830_dri2_del_frame_event(drawable, flip_info); +} + +static uint32_t pipe_select(int pipe) +{ + if (pipe > 1) + return pipe << DRM_VBLANK_HIGH_CRTC_SHIFT; + else if (pipe > 0) + return DRM_VBLANK_SECONDARY; + else + return 0; +} + +/* + * ScheduleSwap is responsible for requesting a DRM vblank event for the + * appropriate frame. + * + * In the case of a blit (e.g. for a windowed swap) or buffer exchange, + * the vblank requested can simply be the last queued swap frame + the swap + * interval for the drawable. + * + * In the case of a page flip, we request an event for the last queued swap + * frame + swap interval - 1, since we'll need to queue the flip for the frame + * immediately following the received event. + * + * The client will be blocked if it tries to perform further GL commands + * after queueing a swap, though in the Intel case after queueing a flip, the + * client is free to queue more commands; they'll block in the kernel if + * they access buffers busy with the flip. + * + * When the swap is complete, the driver should call into the server so it + * can send any swap complete events that have been requested. + */ +static int +I830DRI2ScheduleSwap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, + DRI2BufferPtr back, CARD64 *target_msc, CARD64 divisor, + CARD64 remainder, DRI2SwapEventPtr func, void *data) +{ + ScreenPtr screen = draw->pScreen; + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + drmVBlank vbl; + int ret, pipe = I830DRI2DrawablePipe(draw), flip = 0; + DRI2FrameEventPtr swap_info = NULL; + enum DRI2FrameEventType swap_type = DRI2_SWAP; + CARD64 current_msc; + + /* Drawable not displayed... just complete the swap */ + if (pipe == -1) + goto blit_fallback; + + /* Truncate to match kernel interfaces; means occasional overflow + * misses, but that's generally not a big deal */ + *target_msc &= 0xffffffff; + divisor &= 0xffffffff; + remainder &= 0xffffffff; + + swap_info = calloc(1, sizeof(DRI2FrameEventRec)); + if (!swap_info) + goto blit_fallback; + + swap_info->intel = intel; + swap_info->drawable_id = draw->id; + swap_info->client = client; + swap_info->event_complete = func; + swap_info->event_data = data; + swap_info->front = front; + swap_info->back = back; + swap_info->pipe = pipe; + + if (!i830_dri2_add_frame_event(swap_info)) { + free(swap_info); + swap_info = NULL; + goto blit_fallback; + } + + I830DRI2ReferenceBuffer(front); + I830DRI2ReferenceBuffer(back); + + /* Get current count */ + vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe); + vbl.request.sequence = 0; + ret = drmWaitVBlank(intel->drmSubFD, &vbl); + if (ret) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "first get vblank counter failed: %s\n", + strerror(errno)); + goto blit_fallback; + } + + current_msc = vbl.reply.sequence; + + /* Flips need to be submitted one frame before */ + if (can_exchange(draw, front, back)) { + swap_type = DRI2_FLIP; + flip = 1; + } + + swap_info->type = swap_type; + + /* Correct target_msc by 'flip' if swap_type == DRI2_FLIP. + * Do it early, so handling of different timing constraints + * for divisor, remainder and msc vs. target_msc works. + */ + if (*target_msc > 0) + *target_msc -= flip; + + /* + * If divisor is zero, or current_msc is smaller than target_msc + * we just need to make sure target_msc passes before initiating + * the swap. + */ + if (divisor == 0 || current_msc < *target_msc) { + /* + * If we can, schedule the flip directly from here rather + * than waiting for an event from the kernel for the current + * (or a past) MSC. + */ + if (flip && divisor == 0 && current_msc >= *target_msc && + I830DRI2ScheduleFlip(intel, draw, swap_info)) + return TRUE; + + vbl.request.type = + DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT | pipe_select(pipe); + + /* If non-pageflipping, but blitting/exchanging, we need to use + * DRM_VBLANK_NEXTONMISS to avoid unreliable timestamping later + * on. + */ + if (flip == 0) + vbl.request.type |= DRM_VBLANK_NEXTONMISS; + + /* If target_msc already reached or passed, set it to + * current_msc to ensure we return a reasonable value back + * to the caller. This makes swap_interval logic more robust. + */ + if (current_msc >= *target_msc) + *target_msc = current_msc; + + vbl.request.sequence = *target_msc; + vbl.request.signal = (unsigned long)swap_info; + ret = drmWaitVBlank(intel->drmSubFD, &vbl); + if (ret) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "divisor 0 get vblank counter failed: %s\n", + strerror(errno)); + goto blit_fallback; + } + + *target_msc = vbl.reply.sequence + flip; + swap_info->frame = *target_msc; + + return TRUE; + } + + /* + * If we get here, target_msc has already passed or we don't have one, + * and we need to queue an event that will satisfy the divisor/remainder + * equation. + */ + vbl.request.type = + DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT | pipe_select(pipe); + if (flip == 0) + vbl.request.type |= DRM_VBLANK_NEXTONMISS; + + vbl.request.sequence = current_msc - (current_msc % divisor) + + remainder; + + /* + * If the calculated deadline vbl.request.sequence is smaller than + * or equal to current_msc, it means we've passed the last point + * when effective onset frame seq could satisfy + * seq % divisor == remainder, so we need to wait for the next time + * this will happen. + + * This comparison takes the 1 frame swap delay in pageflipping mode + * into account, as well as a potential DRM_VBLANK_NEXTONMISS delay + * if we are blitting/exchanging instead of flipping. + */ + if (vbl.request.sequence <= current_msc) + vbl.request.sequence += divisor; + + /* Account for 1 frame extra pageflip delay if flip > 0 */ + vbl.request.sequence -= flip; + + vbl.request.signal = (unsigned long)swap_info; + ret = drmWaitVBlank(intel->drmSubFD, &vbl); + if (ret) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "final get vblank counter failed: %s\n", + strerror(errno)); + goto blit_fallback; + } + + /* Adjust returned value for 1 fame pageflip offset of flip > 0 */ + *target_msc = vbl.reply.sequence + flip; + swap_info->frame = *target_msc; + + return TRUE; + +blit_fallback: + I830DRI2FallbackBlitSwap(draw, front, back); + DRI2SwapComplete(client, draw, 0, 0, 0, DRI2_BLIT_COMPLETE, func, data); + if (swap_info) + i830_dri2_del_frame_event(draw, swap_info); + *target_msc = 0; /* offscreen, so zero out target vblank count */ + return TRUE; +} + +static uint64_t gettime_us(void) +{ + struct timespec tv; + + if (clock_gettime(CLOCK_MONOTONIC, &tv)) + return 0; + + return (uint64_t)tv.tv_sec * 1000000 + tv.tv_nsec / 1000; +} + +/* + * Get current frame count and frame count timestamp, based on drawable's + * crtc. + */ +static int +I830DRI2GetMSC(DrawablePtr draw, CARD64 *ust, CARD64 *msc) +{ + ScreenPtr screen = draw->pScreen; + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + drmVBlank vbl; + int ret, pipe = I830DRI2DrawablePipe(draw); + + /* Drawable not displayed, make up a *monotonic* value */ + if (pipe == -1) { + *ust = gettime_us(); + *msc = 0; + return TRUE; + } + + vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe); + vbl.request.sequence = 0; + + ret = drmWaitVBlank(intel->drmSubFD, &vbl); + if (ret) { + static int limit = 5; + if (limit) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s:%d get vblank counter failed: %s\n", + __FUNCTION__, __LINE__, + strerror(errno)); + limit--; + } + return FALSE; + } + + *ust = ((CARD64)vbl.reply.tval_sec * 1000000) + vbl.reply.tval_usec; + *msc = vbl.reply.sequence; + + return TRUE; +} + +/* + * Request a DRM event when the requested conditions will be satisfied. + * + * We need to handle the event and ask the server to wake up the client when + * we receive it. + */ +static int +I830DRI2ScheduleWaitMSC(ClientPtr client, DrawablePtr draw, CARD64 target_msc, + CARD64 divisor, CARD64 remainder) +{ + ScreenPtr screen = draw->pScreen; + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + DRI2FrameEventPtr wait_info; + drmVBlank vbl; + int ret, pipe = I830DRI2DrawablePipe(draw); + CARD64 current_msc; + + /* Truncate to match kernel interfaces; means occasional overflow + * misses, but that's generally not a big deal */ + target_msc &= 0xffffffff; + divisor &= 0xffffffff; + remainder &= 0xffffffff; + + /* Drawable not visible, return immediately */ + if (pipe == -1) + goto out_complete; + + wait_info = calloc(1, sizeof(DRI2FrameEventRec)); + if (!wait_info) + goto out_complete; + + wait_info->intel = intel; + wait_info->drawable_id = draw->id; + wait_info->client = client; + wait_info->type = DRI2_WAITMSC; + + if (!i830_dri2_add_frame_event(wait_info)) { + free(wait_info); + wait_info = NULL; + goto out_complete; + } + + /* Get current count */ + vbl.request.type = DRM_VBLANK_RELATIVE | pipe_select(pipe); + vbl.request.sequence = 0; + ret = drmWaitVBlank(intel->drmSubFD, &vbl); + if (ret) { + static int limit = 5; + if (limit) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s:%d get vblank counter failed: %s\n", + __FUNCTION__, __LINE__, + strerror(errno)); + limit--; + } + goto out_free; + } + + current_msc = vbl.reply.sequence; + + /* + * If divisor is zero, or current_msc is smaller than target_msc, + * we just need to make sure target_msc passes before waking up the + * client. + */ + if (divisor == 0 || current_msc < target_msc) { + /* If target_msc already reached or passed, set it to + * current_msc to ensure we return a reasonable value back + * to the caller. This keeps the client from continually + * sending us MSC targets from the past by forcibly updating + * their count on this call. + */ + if (current_msc >= target_msc) + target_msc = current_msc; + vbl.request.type = + DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT | pipe_select(pipe); + vbl.request.sequence = target_msc; + vbl.request.signal = (unsigned long)wait_info; + ret = drmWaitVBlank(intel->drmSubFD, &vbl); + if (ret) { + static int limit = 5; + if (limit) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s:%d get vblank counter failed: %s\n", + __FUNCTION__, __LINE__, + strerror(errno)); + limit--; + } + goto out_free; + } + + wait_info->frame = vbl.reply.sequence; + DRI2BlockClient(client, draw); + return TRUE; + } + + /* + * If we get here, target_msc has already passed or we don't have one, + * so we queue an event that will satisfy the divisor/remainder equation. + */ + vbl.request.type = + DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT | pipe_select(pipe); + + vbl.request.sequence = current_msc - (current_msc % divisor) + + remainder; + + /* + * If calculated remainder is larger than requested remainder, + * it means we've passed the last point where + * seq % divisor == remainder, so we need to wait for the next time + * that will happen. + */ + if ((current_msc % divisor) >= remainder) + vbl.request.sequence += divisor; + + vbl.request.signal = (unsigned long)wait_info; + ret = drmWaitVBlank(intel->drmSubFD, &vbl); + if (ret) { + static int limit = 5; + if (limit) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s:%d get vblank counter failed: %s\n", + __FUNCTION__, __LINE__, + strerror(errno)); + limit--; + } + goto out_free; + } + + wait_info->frame = vbl.reply.sequence; + DRI2BlockClient(client, draw); + + return TRUE; + +out_free: + i830_dri2_del_frame_event(draw, wait_info); +out_complete: + DRI2WaitMSCComplete(client, draw, target_msc, 0, 0); + return TRUE; +} + +static int dri2_server_generation; +#endif + +static int has_i830_dri(void) +{ + return access(DRI_DRIVER_PATH "/i830_dri.so", R_OK) == 0; +} + +static const char *dri_driver_name(intel_screen_private *intel) +{ + const char *s = xf86GetOptValString(intel->Options, OPTION_DRI); + Bool dummy; + + if (s == NULL || xf86getBoolValue(&dummy, s)) { + if (INTEL_INFO(intel)->gen < 030) + return has_i830_dri() ? "i830" : "i915"; + else if (INTEL_INFO(intel)->gen < 040) + return "i915"; + else + return "i965"; + } + + return s; +} + +Bool I830DRI2ScreenInit(ScreenPtr screen) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + DRI2InfoRec info; + int dri2_major = 1; + int dri2_minor = 0; +#if DRI2INFOREC_VERSION >= 4 + const char *driverNames[1]; +#endif + + if (intel->force_fallback) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "cannot enable DRI2 whilst forcing software fallbacks\n"); + return FALSE; + } + + if (xf86LoaderCheckSymbol("DRI2Version")) + DRI2Version(&dri2_major, &dri2_minor); + + if (dri2_minor < 1) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "DRI2 requires DRI2 module version 1.1.0 or later\n"); + return FALSE; + } + +#if HAS_DIXREGISTERPRIVATEKEY + if (!dixRegisterPrivateKey(&i830_client_key, PRIVATE_CLIENT, sizeof(XID))) + return FALSE; +#else + if (!dixRequestPrivate(&i830_client_key, sizeof(XID))) + return FALSE; +#endif + + +#if DRI2INFOREC_VERSION >= 4 + if (serverGeneration != dri2_server_generation) { + dri2_server_generation = serverGeneration; + if (!i830_dri2_register_frame_event_resource_types()) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "Cannot register DRI2 frame event resources\n"); + return FALSE; + } + } +#endif + + intel->deviceName = drmGetDeviceNameFromFd(intel->drmSubFD); + memset(&info, '\0', sizeof(info)); + info.fd = intel->drmSubFD; + info.driverName = dri_driver_name(intel); + info.deviceName = intel->deviceName; + +#if DRI2INFOREC_VERSION == 1 + info.version = 1; + info.CreateBuffers = I830DRI2CreateBuffers; + info.DestroyBuffers = I830DRI2DestroyBuffers; +#elif DRI2INFOREC_VERSION == 2 + /* The ABI between 2 and 3 was broken so we could get rid of + * the multi-buffer alloc functions. Make sure we indicate the + * right version so DRI2 can reject us if it's version 3 or above. */ + info.version = 2; + info.CreateBuffer = I830DRI2CreateBuffer; + info.DestroyBuffer = I830DRI2DestroyBuffer; +#else + info.version = 3; + info.CreateBuffer = I830DRI2CreateBuffer; + info.DestroyBuffer = I830DRI2DestroyBuffer; +#endif + + info.CopyRegion = I830DRI2CopyRegion; +#if DRI2INFOREC_VERSION >= 4 + info.version = 4; + info.ScheduleSwap = I830DRI2ScheduleSwap; + info.GetMSC = I830DRI2GetMSC; + info.ScheduleWaitMSC = I830DRI2ScheduleWaitMSC; + info.numDrivers = 1; + info.driverNames = driverNames; + driverNames[0] = info.driverName; +#endif + + return DRI2ScreenInit(screen, &info); +} + +void I830DRI2CloseScreen(ScreenPtr screen) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + + DRI2CloseScreen(screen); + intel->directRenderingType = DRI_NONE; + drmFree(intel->deviceName); +} diff --git a/src/uxa/intel_driver.c b/src/uxa/intel_driver.c new file mode 100644 index 00000000..726f0b8b --- /dev/null +++ b/src/uxa/intel_driver.c @@ -0,0 +1,1310 @@ +/************************************************************************** + +Copyright 2001 VA Linux Systems Inc., Fremont, California. +Copyright © 2002 by David Dawes + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: Jeff Hartmann <jhartmann@valinux.com> + * Abraham van der Merwe <abraham@2d3d.co.za> + * David Dawes <dawes@xfree86.org> + * Alan Hourihane <alanh@tungstengraphics.com> + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> + +#include "xf86.h" +#include "xf86_OSproc.h" +#include "xf86cmap.h" +#include "xf86drm.h" +#include "compiler.h" +#include "mipointer.h" +#include "micmap.h" +#include "shadowfb.h" +#include <X11/extensions/randr.h> +#include <X11/extensions/dpmsconst.h> +#include "fb.h" +#include "miscstruct.h" +#include "dixstruct.h" +#include "xf86xv.h" +#include <X11/extensions/Xv.h> +#include "shadow.h" +#include "intel.h" +#include "intel_video.h" + +#ifdef INTEL_XVMC +#define _INTEL_XVMC_SERVER_ +#include "intel_xvmc.h" +#endif + +#include "legacy/legacy.h" +#include "uxa.h" + +#include <sys/ioctl.h> +#include "i915_drm.h" +#include <xf86drmMode.h> + +#include "intel_glamor.h" +#include "intel_options.h" + +static void i830AdjustFrame(ADJUST_FRAME_ARGS_DECL); +static Bool I830CloseScreen(CLOSE_SCREEN_ARGS_DECL); +static Bool I830EnterVT(VT_FUNC_ARGS_DECL); + +/* temporary */ +extern void xf86SetCursor(ScreenPtr screen, CursorPtr pCurs, int x, int y); + +/* Export I830 options to i830 driver where necessary */ +static void +I830LoadPalette(ScrnInfoPtr scrn, int numColors, int *indices, + LOCO * colors, VisualPtr pVisual) +{ + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); + int i, j, index; + int p; + uint16_t lut_r[256], lut_g[256], lut_b[256]; + + for (p = 0; p < xf86_config->num_crtc; p++) { + xf86CrtcPtr crtc = xf86_config->crtc[p]; + + switch (scrn->depth) { + case 15: + for (i = 0; i < numColors; i++) { + index = indices[i]; + for (j = 0; j < 8; j++) { + lut_r[index * 8 + j] = + colors[index].red << 8; + lut_g[index * 8 + j] = + colors[index].green << 8; + lut_b[index * 8 + j] = + colors[index].blue << 8; + } + } + break; + case 16: + for (i = 0; i < numColors; i++) { + index = indices[i]; + + if (index <= 31) { + for (j = 0; j < 8; j++) { + lut_r[index * 8 + j] = + colors[index].red << 8; + lut_b[index * 8 + j] = + colors[index].blue << 8; + } + } + + for (j = 0; j < 4; j++) { + lut_g[index * 4 + j] = + colors[index].green << 8; + } + } + break; + default: + for (i = 0; i < numColors; i++) { + index = indices[i]; + lut_r[index] = colors[index].red << 8; + lut_g[index] = colors[index].green << 8; + lut_b[index] = colors[index].blue << 8; + } + break; + } + + /* Make the change through RandR */ +#ifdef RANDR_12_INTERFACE + RRCrtcGammaSet(crtc->randr_crtc, lut_r, lut_g, lut_b); +#else + crtc->funcs->gamma_set(crtc, lut_r, lut_g, lut_b, 256); +#endif + } +} + +/** + * Adjust the screen pixmap for the current location of the front buffer. + * This is done at EnterVT when buffers are bound as long as the resources + * have already been created, but the first EnterVT happens before + * CreateScreenResources. + */ +static Bool i830CreateScreenResources(ScreenPtr screen) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + + screen->CreateScreenResources = intel->CreateScreenResources; + if (!(*screen->CreateScreenResources) (screen)) + return FALSE; + + if (!intel_uxa_create_screen_resources(screen)) + return FALSE; + + intel_copy_fb(scrn); + return TRUE; +} + +static void PreInitCleanup(ScrnInfoPtr scrn) +{ + if (!scrn || !scrn->driverPrivate) + return; + + free(scrn->driverPrivate); + scrn->driverPrivate = NULL; +} + +static void intel_check_chipset_option(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + intel_detect_chipset(scrn, intel->pEnt, intel->PciInfo); +} + +static Bool I830GetEarlyOptions(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + /* Process the options */ + intel->Options = intel_options_get(scrn); + if (!intel->Options) + return FALSE; + + intel->fallback_debug = xf86ReturnOptValBool(intel->Options, + OPTION_FALLBACKDEBUG, + FALSE); + + intel->debug_flush = 0; + + if (xf86ReturnOptValBool(intel->Options, + OPTION_DEBUG_FLUSH_BATCHES, + FALSE)) + intel->debug_flush |= DEBUG_FLUSH_BATCHES; + + if (xf86ReturnOptValBool(intel->Options, + OPTION_DEBUG_FLUSH_CACHES, + FALSE)) + intel->debug_flush |= DEBUG_FLUSH_CACHES; + + if (xf86ReturnOptValBool(intel->Options, + OPTION_DEBUG_WAIT, + FALSE)) + intel->debug_flush |= DEBUG_FLUSH_WAIT; + + return TRUE; +} + +static Bool intel_option_cast_string_to_bool(intel_screen_private *intel, + int id, Bool val) +{ + xf86getBoolValue(&val, xf86GetOptValString(intel->Options, id)); + return val; +} + +static void intel_check_dri_option(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + intel->directRenderingType = DRI_NONE; + if (!intel_option_cast_string_to_bool(intel, OPTION_DRI, TRUE)) + intel->directRenderingType = DRI_DISABLED; + + if (scrn->depth != 16 && scrn->depth != 24 && scrn->depth != 30) { + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, + "DRI is disabled because it " + "runs only at depths 16, 24, and 30.\n"); + intel->directRenderingType = DRI_DISABLED; + } +} + +static Bool intel_open_drm_master(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct pci_device *dev = intel->PciInfo; + drmSetVersion sv; + struct drm_i915_getparam gp; + int err, has_gem; + char busid[20]; + + snprintf(busid, sizeof(busid), "pci:%04x:%02x:%02x.%d", + dev->domain, dev->bus, dev->dev, dev->func); + + intel->drmSubFD = drmOpen(NULL, busid); + if (intel->drmSubFD == -1) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "[drm] Failed to open DRM device for %s: %s\n", + busid, strerror(errno)); + return FALSE; + } + + /* Check that what we opened was a master or a master-capable FD, + * by setting the version of the interface we'll use to talk to it. + * (see DRIOpenDRMMaster() in DRI1) + */ + sv.drm_di_major = 1; + sv.drm_di_minor = 1; + sv.drm_dd_major = -1; + sv.drm_dd_minor = -1; + err = drmSetInterfaceVersion(intel->drmSubFD, &sv); + if (err != 0) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "[drm] failed to set drm interface version.\n"); + drmClose(intel->drmSubFD); + intel->drmSubFD = -1; + return FALSE; + } + + has_gem = FALSE; + gp.param = I915_PARAM_HAS_GEM; + gp.value = &has_gem; + (void)drmCommandWriteRead(intel->drmSubFD, DRM_I915_GETPARAM, + &gp, sizeof(gp)); + if (!has_gem) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "[drm] Failed to detect GEM. Kernel 2.6.28 required.\n"); + drmClose(intel->drmSubFD); + intel->drmSubFD = -1; + return FALSE; + } + + return TRUE; +} + +static void intel_close_drm_master(intel_screen_private *intel) +{ + if (intel && intel->drmSubFD > 0) { + drmClose(intel->drmSubFD); + intel->drmSubFD = -1; + } +} + +static int intel_init_bufmgr(intel_screen_private *intel) +{ + int batch_size; + + batch_size = 4096 * 4; + if (IS_I865G(intel)) + /* The 865 has issues with larger-than-page-sized batch buffers. */ + batch_size = 4096; + + intel->bufmgr = drm_intel_bufmgr_gem_init(intel->drmSubFD, batch_size); + if (!intel->bufmgr) + return FALSE; + + if (xf86ReturnOptValBool(intel->Options, OPTION_BUFFER_CACHE, TRUE)) + drm_intel_bufmgr_gem_enable_reuse(intel->bufmgr); + drm_intel_bufmgr_gem_set_vma_cache_size(intel->bufmgr, 512); + drm_intel_bufmgr_gem_enable_fenced_relocs(intel->bufmgr); + + list_init(&intel->batch_pixmaps); + + if ((INTEL_INFO(intel)->gen == 060)) { + intel->wa_scratch_bo = + drm_intel_bo_alloc(intel->bufmgr, "wa scratch", + 4096, 4096); + } + + return TRUE; +} + +static void intel_bufmgr_fini(intel_screen_private *intel) +{ + if (intel->bufmgr == NULL) + return; + + drm_intel_bo_unreference(intel->wa_scratch_bo); + drm_intel_bufmgr_destroy(intel->bufmgr); +} + +static void I830XvInit(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + MessageType from = X_PROBED; + + intel->XvPreferOverlay = + xf86ReturnOptValBool(intel->Options, OPTION_PREFER_OVERLAY, FALSE); + + if (xf86GetOptValInteger(intel->Options, OPTION_VIDEO_KEY, + &(intel->colorKey))) { + from = X_CONFIG; + } else if (xf86GetOptValInteger(intel->Options, OPTION_COLOR_KEY, + &(intel->colorKey))) { + from = X_CONFIG; + } else { + intel->colorKey = + (1 << scrn->offset.red) | (1 << scrn->offset.green) | + (((scrn->mask.blue >> scrn->offset.blue) - 1) << + scrn->offset.blue); + from = X_DEFAULT; + } + xf86DrvMsg(scrn->scrnIndex, from, "video overlay key set to 0x%x\n", + intel->colorKey); +} + +static Bool drm_has_boolean_param(struct intel_screen_private *intel, + int param) +{ + drm_i915_getparam_t gp; + int value; + + gp.value = &value; + gp.param = param; + if (drmIoctl(intel->drmSubFD, DRM_IOCTL_I915_GETPARAM, &gp)) + return FALSE; + + return value; +} + +static Bool has_kernel_flush(struct intel_screen_private *intel) +{ + /* The BLT ring was introduced at the same time as the + * automatic flush for the busy-ioctl. + */ + return drm_has_boolean_param(intel, I915_PARAM_HAS_BLT); +} + +static Bool has_relaxed_fencing(struct intel_screen_private *intel) +{ + return drm_has_boolean_param(intel, I915_PARAM_HAS_RELAXED_FENCING); +} + +static Bool has_prime_vmap_flush(struct intel_screen_private *intel) +{ + return drm_has_boolean_param(intel, I915_PARAM_HAS_PRIME_VMAP_FLUSH); +} + +static Bool can_accelerate_blt(struct intel_screen_private *intel) +{ + if (INTEL_INFO(intel)->gen == -1) + return FALSE; + + if (xf86ReturnOptValBool(intel->Options, OPTION_ACCEL_DISABLE, FALSE) || + !intel_option_cast_string_to_bool(intel, OPTION_ACCEL_METHOD, TRUE)) { + xf86DrvMsg(intel->scrn->scrnIndex, X_CONFIG, + "Disabling hardware acceleration.\n"); + return FALSE; + } + + if (INTEL_INFO(intel)->gen == 060) { + struct pci_device *const device = intel->PciInfo; + + /* Sandybridge rev07 locks up easily, even with the + * BLT ring workaround in place. + * Thus use shadowfb by default. + */ + if (device->revision < 8) { + xf86DrvMsg(intel->scrn->scrnIndex, X_WARNING, + "Disabling hardware acceleration on this pre-production hardware.\n"); + + return FALSE; + } + } + + if (INTEL_INFO(intel)->gen >= 060) { + drm_i915_getparam_t gp; + int value; + + /* On Sandybridge we need the BLT in order to do anything since + * it so frequently used in the acceleration code paths. + */ + gp.value = &value; + gp.param = I915_PARAM_HAS_BLT; + if (drmIoctl(intel->drmSubFD, DRM_IOCTL_I915_GETPARAM, &gp)) + return FALSE; + } + + return TRUE; +} + +static void intel_setup_capabilities(ScrnInfoPtr scrn) +{ +#ifdef INTEL_PIXMAP_SHARING + intel_screen_private *intel = intel_get_screen_private(scrn); + uint64_t value; + int ret; + + scrn->capabilities = 0; + + ret = drmGetCap(intel->drmSubFD, DRM_CAP_PRIME, &value); + if (ret == 0) { + if (value & DRM_PRIME_CAP_EXPORT) + scrn->capabilities |= RR_Capability_SourceOutput | RR_Capability_SinkOffload; + if (value & DRM_PRIME_CAP_IMPORT) + scrn->capabilities |= RR_Capability_SinkOutput; + } +#endif +} + +/** + * This is called before ScreenInit to do any require probing of screen + * configuration. + * + * This code generally covers probing, module loading, option handling + * card mapping, and RandR setup. + * + * Since xf86InitialConfiguration ends up requiring that we set video modes + * in order to detect configuration, we end up having to do a lot of driver + * setup (talking to the DRM, mapping the device, etc.) in this function. + * As a result, we want to set up that server initialization once rather + * that doing it per generation. + */ +static Bool I830PreInit(ScrnInfoPtr scrn, int flags) +{ + intel_screen_private *intel; + rgb defaultWeight = { 0, 0, 0 }; + EntityInfoPtr pEnt; + int flags24; + Gamma zeros = { 0.0, 0.0, 0.0 }; + + if (scrn->numEntities != 1) + return FALSE; + + pEnt = xf86GetEntityInfo(scrn->entityList[0]); + if (pEnt == NULL) + return FALSE; + + if (pEnt->location.type != BUS_PCI +#ifdef XSERVER_PLATFORM_BUS + && pEnt->location.type != BUS_PLATFORM +#endif + ) + return FALSE; + + if (flags & PROBE_DETECT) + return TRUE; + + if (((uintptr_t)scrn->driverPrivate) & 1) { + intel = xnfcalloc(sizeof(*intel), 1); + if (intel == NULL) + return FALSE; + + intel->info = (void *)((uintptr_t)scrn->driverPrivate & ~1); + scrn->driverPrivate = intel; + } + intel = intel_get_screen_private(scrn); + intel->scrn = scrn; + intel->pEnt = pEnt; + + scrn->displayWidth = 640; /* default it */ + + intel->PciInfo = xf86GetPciInfoForEntity(intel->pEnt->index); + + if (!intel_open_drm_master(scrn)) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Failed to become DRM master.\n"); + return FALSE; + } + + scrn->monitor = scrn->confScreen->monitor; + scrn->progClock = TRUE; + scrn->rgbBits = 8; + + flags24 = Support32bppFb | PreferConvert24to32 | SupportConvert24to32; + + if (!xf86SetDepthBpp(scrn, 0, 0, 0, flags24)) + return FALSE; + + switch (scrn->depth) { + case 15: + case 16: + case 24: + case 30: + break; + case 8: + default: + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Given depth (%d) is not supported by intel driver\n", + scrn->depth); + return FALSE; + } + xf86PrintDepthBpp(scrn); + + if (!xf86SetWeight(scrn, defaultWeight, defaultWeight)) + return FALSE; + if (!xf86SetDefaultVisual(scrn, -1)) + return FALSE; + + intel->cpp = scrn->bitsPerPixel / 8; + + if (!I830GetEarlyOptions(scrn)) + return FALSE; + + intel_setup_capabilities(scrn); + intel_check_chipset_option(scrn); + intel_check_dri_option(scrn); + + if (!intel_init_bufmgr(intel)) { + PreInitCleanup(scrn); + return FALSE; + } + + intel->force_fallback = + drmCommandNone(intel->drmSubFD, DRM_I915_GEM_THROTTLE) != 0; + + /* Enable tiling by default */ + intel->tiling = INTEL_TILING_ALL; + + /* Allow user override if they set a value */ + if (!xf86ReturnOptValBool(intel->Options, OPTION_TILING_2D, TRUE)) + intel->tiling &= ~INTEL_TILING_2D; + if (xf86ReturnOptValBool(intel->Options, OPTION_TILING_FB, FALSE)) + intel->tiling &= ~INTEL_TILING_FB; + if (!can_accelerate_blt(intel)) { + intel->force_fallback = TRUE; + intel->tiling &= ~INTEL_TILING_FB; + } + + intel->has_kernel_flush = has_kernel_flush(intel); + + intel->has_prime_vmap_flush = has_prime_vmap_flush(intel); + + intel->has_relaxed_fencing = INTEL_INFO(intel)->gen >= 033; + /* And override the user if there is no kernel support */ + if (intel->has_relaxed_fencing) + intel->has_relaxed_fencing = has_relaxed_fencing(intel); + + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, + "Relaxed fencing %s\n", + intel->has_relaxed_fencing ? "enabled" : "disabled"); + + /* SwapBuffers delays to avoid tearing */ + intel->swapbuffers_wait = xf86ReturnOptValBool(intel->Options, + OPTION_SWAPBUFFERS_WAIT, + TRUE); + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Wait on SwapBuffers? %s\n", + intel->swapbuffers_wait ? "enabled" : "disabled"); + + intel->use_triple_buffer = + xf86ReturnOptValBool(intel->Options, + OPTION_TRIPLE_BUFFER, + TRUE); + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Triple buffering? %s\n", + intel->use_triple_buffer ? "enabled" : "disabled"); + + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Framebuffer %s\n", + intel->tiling & INTEL_TILING_FB ? "tiled" : "linear"); + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Pixmaps %s\n", + intel->tiling & INTEL_TILING_2D ? "tiled" : "linear"); + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "3D buffers %s\n", + intel->tiling & INTEL_TILING_3D ? "tiled" : "linear"); + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "SwapBuffers wait %sabled\n", + intel->swapbuffers_wait ? "en" : "dis"); + + I830XvInit(scrn); + + if (!intel_mode_pre_init(scrn, intel->drmSubFD, intel->cpp)) { + PreInitCleanup(scrn); + return FALSE; + } + + if (!xf86SetGamma(scrn, zeros)) { + PreInitCleanup(scrn); + return FALSE; + } + + if (scrn->modes == NULL) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, "No modes.\n"); + PreInitCleanup(scrn); + return FALSE; + } + scrn->currentMode = scrn->modes; + + /* Set display resolution */ + xf86SetDpi(scrn, 0, 0); + + /* Load the required sub modules */ + if (!xf86LoadSubModule(scrn, "fb")) { + PreInitCleanup(scrn); + return FALSE; + } + + if (!intel_glamor_pre_init(scrn)) { + PreInitCleanup(scrn); + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Failed to pre init glamor display.\n"); + return FALSE; + } + + /* Load the dri2 module if requested. */ + if (intel->directRenderingType != DRI_DISABLED) + xf86LoadSubModule(scrn, "dri2"); + + return TRUE; +} + +/** + * Intialiazes the hardware for the 3D pipeline use in the 2D driver. + * + * Some state caching is performed to avoid redundant state emits. This + * function is also responsible for marking the state as clobbered for DRI + * clients. + */ +void IntelEmitInvarientState(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + /* If we've emitted our state since the last clobber by another client, + * skip it. + */ + if (intel->last_3d != LAST_3D_OTHER) + return; + + if (IS_GEN2(intel)) + I830EmitInvarientState(scrn); + else if IS_GEN3(intel) + I915EmitInvarientState(scrn); +} + +#ifdef INTEL_PIXMAP_SHARING +static void +redisplay_dirty(ScreenPtr screen, PixmapDirtyUpdatePtr dirty) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + RegionRec pixregion; + int was_blocked; + + PixmapRegionInit(&pixregion, dirty->slave_dst->master_pixmap); + RegionTranslate(&pixregion, dirty->x, dirty->y); + RegionIntersect(&pixregion, &pixregion, DamageRegion(dirty->damage)); + RegionTranslate(&pixregion, -dirty->x, -dirty->y); + was_blocked = RegionNil(&pixregion); + DamageRegionAppend(&dirty->slave_dst->drawable, &pixregion); + RegionUninit(&pixregion); + if (was_blocked) + return; + + PixmapRegionInit(&pixregion, dirty->slave_dst->master_pixmap); + PixmapSyncDirtyHelper(dirty, &pixregion); + RegionUninit(&pixregion); + + intel_batch_submit(scrn); + if (!intel->has_prime_vmap_flush) { + drm_intel_bo *bo = intel_get_pixmap_bo(dirty->slave_dst->master_pixmap); + was_blocked = xf86BlockSIGIO(); + drm_intel_bo_map(bo, FALSE); + drm_intel_bo_unmap(bo); + xf86UnblockSIGIO(was_blocked); + } + + DamageRegionProcessPending(&dirty->slave_dst->drawable); + return; +} + +static void +intel_dirty_update(ScreenPtr screen) +{ + RegionPtr region; + PixmapDirtyUpdatePtr ent; + + if (xorg_list_is_empty(&screen->pixmap_dirty_list)) + return; + + xorg_list_for_each_entry(ent, &screen->pixmap_dirty_list, ent) { + region = DamageRegion(ent->damage); + if (RegionNotEmpty(region)) { + redisplay_dirty(screen, ent); + DamageEmpty(ent->damage); + } + } +} +#endif + +static void +I830BlockHandler(BLOCKHANDLER_ARGS_DECL) +{ + SCREEN_PTR(arg); + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + + screen->BlockHandler = intel->BlockHandler; + + (*screen->BlockHandler) (BLOCKHANDLER_ARGS); + + intel->BlockHandler = screen->BlockHandler; + screen->BlockHandler = I830BlockHandler; + + intel_uxa_block_handler(intel); + intel_video_block_handler(intel); +#ifdef INTEL_PIXMAP_SHARING + intel_dirty_update(screen); +#endif +} + +static Bool +intel_init_initial_framebuffer(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + int width = scrn->virtualX; + int height = scrn->virtualY; + unsigned long pitch; + uint32_t tiling; + + intel->front_buffer = intel_allocate_framebuffer(scrn, + width, height, + intel->cpp, + &pitch, + &tiling); + + if (!intel->front_buffer) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Couldn't allocate initial framebuffer.\n"); + return FALSE; + } + + intel->front_pitch = pitch; + intel->front_tiling = tiling; + scrn->displayWidth = pitch / intel->cpp; + + return TRUE; +} + +static void +intel_flush_callback(CallbackListPtr *list, + pointer user_data, pointer call_data) +{ + ScrnInfoPtr scrn = user_data; + if (scrn->vtSema) { + intel_batch_submit(scrn); + intel_glamor_flush(intel_get_screen_private(scrn)); + } +} + +#if HAVE_UDEV +static void +I830HandleUEvents(int fd, void *closure) +{ + ScrnInfoPtr scrn = closure; + intel_screen_private *intel = intel_get_screen_private(scrn); + struct udev_device *dev; + const char *hotplug; + struct stat s; + dev_t udev_devnum; + + dev = udev_monitor_receive_device(intel->uevent_monitor); + if (!dev) + return; + + udev_devnum = udev_device_get_devnum(dev); + if (fstat(intel->drmSubFD, &s)) { + udev_device_unref(dev); + return; + } + /* + * Check to make sure this event is directed at our + * device (by comparing dev_t values), then make + * sure it's a hotplug event (HOTPLUG=1) + */ + + hotplug = udev_device_get_property_value(dev, "HOTPLUG"); + + if (memcmp(&s.st_rdev, &udev_devnum, sizeof (dev_t)) == 0 && + hotplug && atoi(hotplug) == 1) + RRGetInfo(xf86ScrnToScreen(scrn), TRUE); + + udev_device_unref(dev); +} + +static void +I830UeventInit(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct udev *u; + struct udev_monitor *mon; + Bool hotplug; + MessageType from = X_CONFIG; + + if (!xf86GetOptValBool(intel->Options, OPTION_HOTPLUG, &hotplug)) { + from = X_DEFAULT; + hotplug = TRUE; + } + + xf86DrvMsg(scrn->scrnIndex, from, "hotplug detection: \"%s\"\n", + hotplug ? "enabled" : "disabled"); + if (!hotplug) + return; + + u = udev_new(); + if (!u) + return; + + mon = udev_monitor_new_from_netlink(u, "udev"); + + if (!mon) { + udev_unref(u); + return; + } + + if (udev_monitor_filter_add_match_subsystem_devtype(mon, + "drm", + "drm_minor") < 0 || + udev_monitor_enable_receiving(mon) < 0) + { + udev_monitor_unref(mon); + udev_unref(u); + return; + } + + intel->uevent_handler = + xf86AddGeneralHandler(udev_monitor_get_fd(mon), + I830HandleUEvents, + scrn); + if (!intel->uevent_handler) { + udev_monitor_unref(mon); + udev_unref(u); + return; + } + + intel->uevent_monitor = mon; +} + +static void +I830UeventFini(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + if (intel->uevent_handler) { + struct udev *u = udev_monitor_get_udev(intel->uevent_monitor); + + xf86RemoveGeneralHandler(intel->uevent_handler); + + udev_monitor_unref(intel->uevent_monitor); + udev_unref(u); + intel->uevent_handler = NULL; + intel->uevent_monitor = NULL; + } +} +#endif /* HAVE_UDEV */ + +static Bool +I830ScreenInit(SCREEN_INIT_ARGS_DECL) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + VisualPtr visual; +#ifdef INTEL_XVMC + MessageType from; +#endif + struct pci_device *const device = intel->PciInfo; + int fb_bar = IS_GEN2(intel) ? 0 : 2; + + scrn->videoRam = device->regions[fb_bar].size / 1024; + + intel->last_3d = LAST_3D_OTHER; + intel->overlayOn = FALSE; + + /* + * Set this so that the overlay allocation is factored in when + * appropriate. + */ + intel->XvEnabled = TRUE; + +#ifdef DRI2 + if (intel->directRenderingType == DRI_NONE + && I830DRI2ScreenInit(screen)) + intel->directRenderingType = DRI_DRI2; +#endif + + if (!intel_init_initial_framebuffer(scrn)) + return FALSE; + + intel_batch_init(scrn); + + if (INTEL_INFO(intel)->gen >= 040) + gen4_render_state_init(scrn); + + miClearVisualTypes(); + if (!miSetVisualTypes(scrn->depth, + miGetDefaultVisualMask(scrn->depth), + scrn->rgbBits, scrn->defaultVisual)) + return FALSE; + if (!miSetPixmapDepths()) + return FALSE; + + if (!fbScreenInit(screen, NULL, + scrn->virtualX, scrn->virtualY, + scrn->xDpi, scrn->yDpi, + scrn->displayWidth, scrn->bitsPerPixel)) + return FALSE; + + if (scrn->bitsPerPixel > 8) { + /* Fixup RGB ordering */ + visual = screen->visuals + screen->numVisuals; + while (--visual >= screen->visuals) { + if ((visual->class | DynamicClass) == DirectColor) { + visual->offsetRed = scrn->offset.red; + visual->offsetGreen = scrn->offset.green; + visual->offsetBlue = scrn->offset.blue; + visual->redMask = scrn->mask.red; + visual->greenMask = scrn->mask.green; + visual->blueMask = scrn->mask.blue; + } + } + } + + fbPictureInit(screen, NULL, 0); + + xf86SetBlackWhitePixels(screen); + + if (!intel_uxa_init(screen)) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Hardware acceleration initialization failed\n"); + return FALSE; + } + + xf86SetBackingStore(screen); + xf86SetSilkenMouse(screen); + miDCInitialize(screen, xf86GetPointerScreenFuncs()); + + xf86DrvMsg(scrn->scrnIndex, X_INFO, "Initializing HW Cursor\n"); + if (!xf86_cursors_init(screen, 64, 64, + (HARDWARE_CURSOR_TRUECOLOR_AT_8BPP | + HARDWARE_CURSOR_BIT_ORDER_MSBFIRST | + HARDWARE_CURSOR_INVERT_MASK | + HARDWARE_CURSOR_SWAP_SOURCE_AND_MASK | + HARDWARE_CURSOR_AND_SOURCE_WITH_MASK | + HARDWARE_CURSOR_SOURCE_MASK_INTERLEAVE_64 | + HARDWARE_CURSOR_UPDATE_UNHIDDEN | + HARDWARE_CURSOR_ARGB))) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Hardware cursor initialization failed\n"); + } + + intel->BlockHandler = screen->BlockHandler; + screen->BlockHandler = I830BlockHandler; + +#ifdef INTEL_PIXMAP_SHARING + screen->StartPixmapTracking = PixmapStartDirtyTracking; + screen->StopPixmapTracking = PixmapStopDirtyTracking; +#endif + + if (!AddCallback(&FlushCallback, intel_flush_callback, scrn)) + return FALSE; + + screen->SaveScreen = xf86SaveScreen; + intel->CloseScreen = screen->CloseScreen; + screen->CloseScreen = I830CloseScreen; + intel->CreateScreenResources = screen->CreateScreenResources; + screen->CreateScreenResources = i830CreateScreenResources; + + intel_glamor_init(screen); + if (!xf86CrtcScreenInit(screen)) + return FALSE; + + if (!miCreateDefColormap(screen)) + return FALSE; + + if (!xf86HandleColormaps(screen, 256, 8, I830LoadPalette, NULL, + CMAP_RELOAD_ON_MODE_SWITCH | + CMAP_PALETTED_TRUECOLOR)) { + return FALSE; + } + + xf86DPMSInit(screen, xf86DPMSSet, 0); + +#ifdef INTEL_XVMC + if (INTEL_INFO(intel)->gen >= 040) + intel->XvMCEnabled = TRUE; + from = ((intel->directRenderingType == DRI_DRI2) && + xf86GetOptValBool(intel->Options, OPTION_XVMC, + &intel->XvMCEnabled) ? X_CONFIG : X_DEFAULT); + xf86DrvMsg(scrn->scrnIndex, from, "Intel XvMC decoder %sabled\n", + intel->XvMCEnabled ? "en" : "dis"); +#endif + /* Init video */ + if (intel->XvEnabled) + I830InitVideo(screen); + +#if defined(DRI2) + switch (intel->directRenderingType) { + case DRI_DRI2: + intel->directRenderingOpen = TRUE; + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "direct rendering: DRI2 Enabled\n"); + break; + case DRI_DISABLED: + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "direct rendering: Disabled\n"); + break; + case DRI_NONE: + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "direct rendering: Failed\n"); + break; + } +#else + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "direct rendering: Not available\n"); +#endif + + if (serverGeneration == 1) + xf86ShowUnusedOptions(scrn->scrnIndex, scrn->options); + + intel_mode_init(intel); + + intel->suspended = FALSE; + +#if HAVE_UDEV + I830UeventInit(scrn); +#endif + + /* Must force it before EnterVT, so we are in control of VT and + * later memory should be bound when allocating, e.g rotate_mem */ + scrn->vtSema = TRUE; + + return I830EnterVT(VT_FUNC_ARGS(0)); +} + +static void i830AdjustFrame(ADJUST_FRAME_ARGS_DECL) +{ +} + +static void I830FreeScreen(FREE_SCREEN_ARGS_DECL) +{ + SCRN_INFO_PTR(arg); + intel_screen_private *intel = intel_get_screen_private(scrn); + + if (intel && !((uintptr_t)intel & 1)) { + intel_mode_fini(intel); + intel_close_drm_master(intel); + intel_bufmgr_fini(intel); + + free(intel); + scrn->driverPrivate = NULL; + } +} + +static void I830LeaveVT(VT_FUNC_ARGS_DECL) +{ + SCRN_INFO_PTR(arg); + intel_screen_private *intel = intel_get_screen_private(scrn); + int ret; + + xf86RotateFreeShadow(scrn); + + xf86_hide_cursors(scrn); + + ret = drmDropMaster(intel->drmSubFD); + if (ret) + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "drmDropMaster failed: %s\n", strerror(errno)); +} + +/* + * This gets called when gaining control of the VT, and from ScreenInit(). + */ +static Bool I830EnterVT(VT_FUNC_ARGS_DECL) +{ + SCRN_INFO_PTR(arg); + intel_screen_private *intel = intel_get_screen_private(scrn); + int ret; + + ret = drmSetMaster(intel->drmSubFD); + if (ret) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "drmSetMaster failed: %s\n", + strerror(errno)); + } + + if (!xf86SetDesiredModes(scrn)) + return FALSE; + + intel_mode_disable_unused_functions(scrn); + return TRUE; +} + +static Bool I830SwitchMode(SWITCH_MODE_ARGS_DECL) +{ + SCRN_INFO_PTR(arg); + + return xf86SetSingleMode(scrn, mode, RR_Rotate_0); +} + +static Bool I830CloseScreen(CLOSE_SCREEN_ARGS_DECL) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + +#if HAVE_UDEV + I830UeventFini(scrn); +#endif + + intel_mode_close(intel); + + DeleteCallback(&FlushCallback, intel_flush_callback, scrn); + + intel_glamor_close_screen(screen); + + TimerFree(intel->cache_expire); + intel->cache_expire = NULL; + + if (intel->uxa_driver) { + uxa_driver_fini(screen); + free(intel->uxa_driver); + intel->uxa_driver = NULL; + } + + if (intel->back_pixmap) { + screen->DestroyPixmap(intel->back_pixmap); + intel->back_pixmap = NULL; + } + + if (intel->back_buffer) { + drm_intel_bo_unreference(intel->back_buffer); + intel->back_buffer = NULL; + } + + if (intel->front_buffer) { + intel_mode_remove_fb(intel); + drm_intel_bo_unreference(intel->front_buffer); + intel->front_buffer = NULL; + } + + if (scrn->vtSema == TRUE) { + I830LeaveVT(VT_FUNC_ARGS(0)); + } + + intel_batch_teardown(scrn); + + if (INTEL_INFO(intel)->gen >= 040) + gen4_render_state_cleanup(scrn); + + xf86_cursors_fini(screen); + + i965_free_video(scrn); + + screen->CloseScreen = intel->CloseScreen; + (*screen->CloseScreen) (CLOSE_SCREEN_ARGS); + + if (intel->directRenderingOpen + && intel->directRenderingType == DRI_DRI2) { + intel->directRenderingOpen = FALSE; + I830DRI2CloseScreen(screen); + } + + xf86GARTCloseScreen(scrn->scrnIndex); + + scrn->vtSema = FALSE; + return TRUE; +} + +static ModeStatus +I830ValidMode(SCRN_ARG_TYPE arg, DisplayModePtr mode, Bool verbose, int flags) +{ + SCRN_INFO_PTR(arg); + if (mode->Flags & V_INTERLACE) { + if (verbose) { + xf86DrvMsg(scrn->scrnIndex, X_PROBED, + "Removing interlaced mode \"%s\"\n", + mode->name); + } + return MODE_BAD; + } + return MODE_OK; +} + +#ifndef SUSPEND_SLEEP +#define SUSPEND_SLEEP 0 +#endif +#ifndef RESUME_SLEEP +#define RESUME_SLEEP 0 +#endif + +/* + * This function is only required if we need to do anything differently from + * DoApmEvent() in common/xf86PM.c, including if we want to see events other + * than suspend/resume. + */ +static Bool I830PMEvent(SCRN_ARG_TYPE arg, pmEvent event, Bool undo) +{ + SCRN_INFO_PTR(arg); + intel_screen_private *intel = intel_get_screen_private(scrn); + + switch (event) { + case XF86_APM_SYS_SUSPEND: + case XF86_APM_CRITICAL_SUSPEND: /*do we want to delay a critical suspend? */ + case XF86_APM_USER_SUSPEND: + case XF86_APM_SYS_STANDBY: + case XF86_APM_USER_STANDBY: + if (!undo && !intel->suspended) { + scrn->LeaveVT(VT_FUNC_ARGS(0)); + intel->suspended = TRUE; + sleep(SUSPEND_SLEEP); + } else if (undo && intel->suspended) { + sleep(RESUME_SLEEP); + scrn->EnterVT(VT_FUNC_ARGS(0)); + intel->suspended = FALSE; + } + break; + case XF86_APM_STANDBY_RESUME: + case XF86_APM_NORMAL_RESUME: + case XF86_APM_CRITICAL_RESUME: + if (intel->suspended) { + sleep(RESUME_SLEEP); + scrn->EnterVT(VT_FUNC_ARGS(0)); + intel->suspended = FALSE; + /* + * Turn the screen saver off when resuming. This seems to be + * needed to stop xscreensaver kicking in (when used). + * + * XXX DoApmEvent() should probably call this just like + * xf86VTSwitch() does. Maybe do it here only in 4.2 + * compatibility mode. + */ + SaveScreens(SCREEN_SAVER_FORCER, ScreenSaverReset); + } + break; + /* This is currently used for ACPI */ + case XF86_APM_CAPABILITY_CHANGED: + ErrorF("I830PMEvent: Capability change\n"); + + SaveScreens(SCREEN_SAVER_FORCER, ScreenSaverReset); + + break; + default: + ErrorF("I830PMEvent: received APM event %d\n", event); + } + return TRUE; +} + +Bool intel_init_scrn(ScrnInfoPtr scrn) +{ + __intel_uxa_release_device(scrn); + + scrn->PreInit = I830PreInit; + scrn->ScreenInit = I830ScreenInit; + scrn->SwitchMode = I830SwitchMode; + scrn->AdjustFrame = i830AdjustFrame; + scrn->EnterVT = I830EnterVT; + scrn->LeaveVT = I830LeaveVT; + scrn->FreeScreen = I830FreeScreen; + scrn->ValidMode = I830ValidMode; + scrn->PMEvent = I830PMEvent; + return TRUE; +} diff --git a/src/uxa/intel_glamor.c b/src/uxa/intel_glamor.c new file mode 100644 index 00000000..0c4e3a77 --- /dev/null +++ b/src/uxa/intel_glamor.c @@ -0,0 +1,254 @@ +/* + * Copyright © 2011 Intel Corporation. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including + * the next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Zhigang Gong <zhigang.gong@linux.intel.com> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <xf86.h> +#define GLAMOR_FOR_XORG 1 +#include <glamor.h> + +#include "intel.h" +#include "i915_drm.h" +#include "intel_glamor.h" +#include "uxa.h" +#include "intel_options.h" + +void +intel_glamor_exchange_buffers(struct intel_screen_private *intel, + PixmapPtr src, + PixmapPtr dst) +{ + if (!(intel->uxa_flags & UXA_USE_GLAMOR)) + return; + glamor_egl_exchange_buffers(src, dst); +} + +Bool +intel_glamor_create_screen_resources(ScreenPtr screen) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + + if (!(intel->uxa_flags & UXA_USE_GLAMOR)) + return TRUE; + + if (!glamor_glyphs_init(screen)) + return FALSE; + + if (!glamor_egl_create_textured_screen_ext(screen, + intel->front_buffer->handle, + intel->front_pitch, + &intel->back_pixmap)) + return FALSE; + + return TRUE; +} + +static Bool +intel_glamor_enabled(intel_screen_private *intel) +{ + const char *s; + + s = xf86GetOptValString(intel->Options, OPTION_ACCEL_METHOD); + if (s == NULL) + return FALSE; + + return strcasecmp(s, "glamor") == 0; +} + +Bool +intel_glamor_pre_init(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + pointer glamor_module; + CARD32 version; + + if (!intel_glamor_enabled(intel)) + return TRUE; + + if (!xf86LoaderCheckSymbol("glamor_egl_init")) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "glamor requires Load \"glamoregl\" in " + "Section \"Module\", disabling.\n"); + return TRUE; + } + + /* Load glamor module */ + if ((glamor_module = xf86LoadSubModule(scrn, GLAMOR_EGL_MODULE_NAME))) { + version = xf86GetModuleVersion(glamor_module); + if (version < MODULE_VERSION_NUMERIC(0,3,1)) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Incompatible glamor version, required >= 0.3.0.\n"); + } else { + if (glamor_egl_init(scrn, intel->drmSubFD)) { + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "glamor detected, initialising egl layer.\n"); + intel->uxa_flags = UXA_GLAMOR_EGL_INITIALIZED; + } else + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "glamor detected, failed to initialize egl.\n"); + } + } else + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "glamor not available\n"); + + return TRUE; +} + +PixmapPtr +intel_glamor_create_pixmap(ScreenPtr screen, int w, int h, + int depth, unsigned int usage) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + + if (intel->uxa_flags & UXA_USE_GLAMOR) + return glamor_create_pixmap(screen, w, h, depth, usage); + else + return NULL; +} + +Bool +intel_glamor_create_textured_pixmap(PixmapPtr pixmap) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(pixmap->drawable.pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + struct intel_pixmap *priv; + + if ((intel->uxa_flags & UXA_USE_GLAMOR) == 0) + return TRUE; + + priv = intel_get_pixmap_private(pixmap); + if (glamor_egl_create_textured_pixmap(pixmap, priv->bo->handle, + priv->stride)) { + drm_intel_bo_disable_reuse(priv->bo); + priv->pinned |= PIN_GLAMOR; + return TRUE; + } else + return FALSE; +} + +void +intel_glamor_destroy_pixmap(PixmapPtr pixmap) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(pixmap->drawable.pScreen); + intel_screen_private * intel; + + intel = intel_get_screen_private(scrn); + if (intel->uxa_flags & UXA_USE_GLAMOR) + glamor_egl_destroy_textured_pixmap(pixmap); +} + +static void +intel_glamor_need_flush(DrawablePtr pDrawable) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(pDrawable->pScreen); + intel_screen_private * intel; + + intel = intel_get_screen_private(scrn); + intel->needs_flush = TRUE; +} + +static void +intel_glamor_finish_access(PixmapPtr pixmap, uxa_access_t access) +{ + switch(access) { + case UXA_ACCESS_RO: + case UXA_ACCESS_RW: + case UXA_GLAMOR_ACCESS_RO: + break; + case UXA_GLAMOR_ACCESS_RW: + intel_glamor_need_flush(&pixmap->drawable); + break; + default: + ErrorF("Invalid access mode %d\n", access); + } + + return; +} + +Bool +intel_glamor_init(ScreenPtr screen) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + + if ((intel->uxa_flags & UXA_GLAMOR_EGL_INITIALIZED) == 0) + goto fail; + + if (!glamor_init(screen, GLAMOR_INVERTED_Y_AXIS | GLAMOR_USE_EGL_SCREEN)) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Failed to initialize glamor.\n"); + goto fail; + } + + if (!glamor_egl_init_textured_pixmap(screen)) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Failed to initialize textured pixmap of screen for glamor.\n"); + goto fail; + } + + intel->uxa_driver->flags |= UXA_USE_GLAMOR; + intel->uxa_flags |= intel->uxa_driver->flags; + + intel->uxa_driver->finish_access = intel_glamor_finish_access; + + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "Use GLAMOR acceleration.\n"); + return TRUE; + + fail: + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "Use standard UXA acceleration.\n"); + return FALSE; +} + +void +intel_glamor_flush(intel_screen_private * intel) +{ + ScreenPtr screen; + + screen = xf86ScrnToScreen(intel->scrn); + if (intel->uxa_flags & UXA_USE_GLAMOR) + glamor_block_handler(screen); +} + +Bool +intel_glamor_close_screen(ScreenPtr screen) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + + if (intel->uxa_flags & UXA_USE_GLAMOR) + intel->uxa_flags &= ~UXA_USE_GLAMOR; + + return TRUE; +} diff --git a/src/uxa/intel_glamor.h b/src/uxa/intel_glamor.h new file mode 100644 index 00000000..46692bc8 --- /dev/null +++ b/src/uxa/intel_glamor.h @@ -0,0 +1,67 @@ +/* + * Copyright © 2011 Intel Corporation. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including + * the next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Zhigang Gong <zhigang.gong@linux.intel.com> + * + */ + +#ifndef INTEL_GLAMOR_H +#define INTEL_GLAMOR_H + +#ifdef USE_GLAMOR + +Bool intel_glamor_pre_init(ScrnInfoPtr scrn); +Bool intel_glamor_init(ScreenPtr screen); +Bool intel_glamor_create_screen_resources(ScreenPtr screen); +Bool intel_glamor_close_screen(ScreenPtr screen); +void intel_glamor_free_screen(int scrnIndex, int flags); + +void intel_glamor_flush(intel_screen_private * intel); + +Bool intel_glamor_create_textured_pixmap(PixmapPtr pixmap); +void intel_glamor_destroy_pixmap(PixmapPtr pixmap); +PixmapPtr intel_glamor_create_pixmap(ScreenPtr screen, int w, int h, + int depth, unsigned int usage); +void intel_glamor_exchange_buffers(struct intel_screen_private *intel, PixmapPtr src, PixmapPtr dst); +#else + +static inline Bool intel_glamor_pre_init(ScrnInfoPtr scrn) { return TRUE; } +static inline Bool intel_glamor_init(ScreenPtr screen) { return TRUE; } +static inline Bool intel_glamor_create_screen_resources(ScreenPtr screen) { return TRUE; } +static inline Bool intel_glamor_close_screen(ScreenPtr screen) { return TRUE; } +static inline void intel_glamor_free_screen(int scrnIndex, int flags) { } + +static inline void intel_glamor_flush(intel_screen_private * intel) { } + +static inline Bool intel_glamor_create_textured_pixmap(PixmapPtr pixmap) { return TRUE; } +static inline void intel_glamor_destroy_pixmap(PixmapPtr pixmap) { } + +static inline PixmapPtr intel_glamor_create_pixmap(ScreenPtr screen, int w, int h, + int depth, unsigned int usage) { return NULL; } + +static inline void intel_glamor_exchange_buffers(struct intel_screen_private *intel, PixmapPtr src, PixmapPtr dst) {} +#endif + +#endif /* INTEL_GLAMOR_H */ diff --git a/src/uxa/intel_hwmc.c b/src/uxa/intel_hwmc.c new file mode 100644 index 00000000..f991aa65 --- /dev/null +++ b/src/uxa/intel_hwmc.c @@ -0,0 +1,260 @@ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Zhenyu Wang <zhenyu.z.wang@intel.com> + * + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#define _INTEL_XVMC_SERVER_ +#include "intel.h" +#include "intel_xvmc.h" + +#include <X11/extensions/Xv.h> +#include <X11/extensions/XvMC.h> +#include <fourcc.h> + +static int create_subpicture(ScrnInfoPtr scrn, XvMCSubpicturePtr subpicture, + int *num_priv, CARD32 ** priv) +{ + return Success; +} + +static void destroy_subpicture(ScrnInfoPtr scrn, XvMCSubpicturePtr subpicture) +{ +} + +static int create_surface(ScrnInfoPtr scrn, XvMCSurfacePtr surface, + int *num_priv, CARD32 ** priv) +{ + return Success; +} + +static void destroy_surface(ScrnInfoPtr scrn, XvMCSurfacePtr surface) +{ +} + +static int create_context(ScrnInfoPtr scrn, XvMCContextPtr pContext, + int *num_priv, CARD32 **priv) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + struct intel_xvmc_hw_context *contextRec; + + *priv = calloc(1, sizeof(struct intel_xvmc_hw_context)); + contextRec = (struct intel_xvmc_hw_context *) *priv; + if (!contextRec) { + *num_priv = 0; + return BadAlloc; + } + + *num_priv = sizeof(struct intel_xvmc_hw_context) >> 2; + + if (IS_GEN3(intel)) { + contextRec->type = XVMC_I915_MPEG2_MC; + contextRec->i915.use_phys_addr = 0; + } else { + if (INTEL_INFO(intel)->gen >= 045) + contextRec->type = XVMC_I965_MPEG2_VLD; + else + contextRec->type = XVMC_I965_MPEG2_MC; + contextRec->i965.is_g4x = INTEL_INFO(intel)->gen == 045; + contextRec->i965.is_965_q = IS_965_Q(intel); + contextRec->i965.is_igdng = IS_GEN5(intel); + } + + return Success; +} + +static void destroy_context(ScrnInfoPtr scrn, XvMCContextPtr context) +{ +} + +/* i915 hwmc support */ +static XF86MCSurfaceInfoRec i915_YV12_mpg2_surface = { + FOURCC_YV12, + XVMC_CHROMA_FORMAT_420, + 0, + 720, + 576, + 720, + 576, + XVMC_MPEG_2, + /* XVMC_OVERLAID_SURFACE | XVMC_SUBPICTURE_INDEPENDENT_SCALING, */ + 0, + /* &yv12_subpicture_list */ + NULL, +}; + +static XF86MCSurfaceInfoRec i915_YV12_mpg1_surface = { + FOURCC_YV12, + XVMC_CHROMA_FORMAT_420, + 0, + 720, + 576, + 720, + 576, + XVMC_MPEG_1, + /* XVMC_OVERLAID_SURFACE | XVMC_SUBPICTURE_INDEPENDENT_SCALING, */ + 0, + NULL, +}; + +static XF86MCSurfaceInfoPtr surface_info_i915[2] = { + (XF86MCSurfaceInfoPtr) & i915_YV12_mpg2_surface, + (XF86MCSurfaceInfoPtr) & i915_YV12_mpg1_surface +}; + +/* i965 and later hwmc support */ +#ifndef XVMC_VLD +#define XVMC_VLD 0x00020000 +#endif + +static XF86MCSurfaceInfoRec yv12_mpeg2_vld_surface = { + FOURCC_YV12, + XVMC_CHROMA_FORMAT_420, + 0, + 1936, + 1096, + 1920, + 1080, + XVMC_MPEG_2 | XVMC_VLD, + XVMC_INTRA_UNSIGNED, + NULL +}; + +static XF86MCSurfaceInfoRec yv12_mpeg2_i965_surface = { + FOURCC_YV12, + XVMC_CHROMA_FORMAT_420, + 0, + 1936, + 1096, + 1920, + 1080, + XVMC_MPEG_2 | XVMC_MOCOMP, + /* XVMC_OVERLAID_SURFACE | XVMC_SUBPICTURE_INDEPENDENT_SCALING, */ + XVMC_INTRA_UNSIGNED, + /* &yv12_subpicture_list */ + NULL +}; + +static XF86MCSurfaceInfoRec yv12_mpeg1_i965_surface = { + FOURCC_YV12, + XVMC_CHROMA_FORMAT_420, + 0, + 1920, + 1080, + 1920, + 1080, + XVMC_MPEG_1 | XVMC_MOCOMP, + /*XVMC_OVERLAID_SURFACE | XVMC_SUBPICTURE_INDEPENDENT_SCALING | + XVMC_INTRA_UNSIGNED, */ + XVMC_INTRA_UNSIGNED, + + /*&yv12_subpicture_list */ + NULL +}; + +static XF86MCSurfaceInfoPtr surface_info_i965[] = { + &yv12_mpeg2_i965_surface, + &yv12_mpeg1_i965_surface +}; + +static XF86MCSurfaceInfoPtr surface_info_vld[] = { + &yv12_mpeg2_vld_surface, + &yv12_mpeg2_i965_surface, +}; + +/* check chip type and load xvmc driver */ +Bool intel_xvmc_adaptor_init(ScreenPtr pScreen) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + static XF86MCAdaptorRec *pAdapt; + char *name; + char buf[64]; + + if (!intel->XvMCEnabled) + return FALSE; + + /* Needs KMS support. */ + if (IS_I915G(intel) || IS_I915GM(intel)) + return FALSE; + + if (IS_GEN2(intel)) { + ErrorF("Your chipset doesn't support XvMC.\n"); + return FALSE; + } + + pAdapt = calloc(1, sizeof(XF86MCAdaptorRec)); + if (!pAdapt) { + ErrorF("Allocation error.\n"); + return FALSE; + } + + pAdapt->name = "Intel(R) Textured Video"; + pAdapt->num_subpictures = 0; + pAdapt->subpictures = NULL; + pAdapt->CreateContext = create_context; + pAdapt->DestroyContext = destroy_context; + pAdapt->CreateSurface = create_surface; + pAdapt->DestroySurface = destroy_surface; + pAdapt->CreateSubpicture = create_subpicture; + pAdapt->DestroySubpicture = destroy_subpicture; + + if (IS_GEN3(intel)) { + name = "i915_xvmc", + pAdapt->num_surfaces = ARRAY_SIZE(surface_info_i915); + pAdapt->surfaces = surface_info_i915; + } else if (INTEL_INFO(intel)->gen >= 045) { + name = "xvmc_vld", + pAdapt->num_surfaces = ARRAY_SIZE(surface_info_vld); + pAdapt->surfaces = surface_info_vld; + } else { + name = "i965_xvmc", + pAdapt->num_surfaces = ARRAY_SIZE(surface_info_i965); + pAdapt->surfaces = surface_info_i965; + } + + if (xf86XvMCScreenInit(pScreen, 1, &pAdapt)) { + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "[XvMC] %s driver initialized.\n", + name); + } else { + intel->XvMCEnabled = FALSE; + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "[XvMC] Failed to initialize XvMC.\n"); + return FALSE; + } + + sprintf(buf, "pci:%04x:%02x:%02x.%d", + intel->PciInfo->domain, + intel->PciInfo->bus, intel->PciInfo->dev, intel->PciInfo->func); + + xf86XvMCRegisterDRInfo(pScreen, INTEL_XVMC_LIBNAME, + buf, + INTEL_XVMC_MAJOR, INTEL_XVMC_MINOR, + INTEL_XVMC_PATCHLEVEL); + return TRUE; +} diff --git a/src/uxa/intel_memory.c b/src/uxa/intel_memory.c new file mode 100644 index 00000000..e51fa33a --- /dev/null +++ b/src/uxa/intel_memory.c @@ -0,0 +1,286 @@ +/************************************************************************** + +Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. +Copyright © 2002 by David Dawes. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * David Dawes <dawes@xfree86.org> + * + * Updated for Dual Head capabilities: + * Alan Hourihane <alanh@tungstengraphics.com> + */ + +/** + * @file intel_memory.c + * + * This is the video memory allocator. Our memory allocation is different from + * other graphics chips, where you have a fixed amount of graphics memory + * available that you want to put to the best use. Instead, we have almost no + * memory pre-allocated, and we have to choose an appropriate amount of sytem + * memory to use. + * + * The allocations we might do: + * + * - Ring buffer + * - HW cursor block (either one block or four) + * - Overlay registers + * - Front buffer (screen 1) + * - Front buffer (screen 2, only in zaphod mode) + * - Back/depth buffer (3D only) + * - Compatibility texture pool (optional, more is always better) + * - New texture pool (optional, more is always better. aperture allocation + * only) + * + * The user may request a specific amount of memory to be used + * (intel->pEnt->videoRam != 0), in which case allocations have to fit within + * that much aperture. If not, the individual allocations will be + * automatically sized, and will be fit within the maximum aperture size. + * Only the actual memory used (not alignment padding) will get actual AGP + * memory allocated. + * + * Given that the allocations listed are generally a page or more than a page, + * our allocator will only return page-aligned offsets, simplifying the memory + * binding process. For smaller allocations, the acceleration architecture's + * linear allocator is preferred. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <inttypes.h> +#include <string.h> +#include <sys/types.h> +#include <sys/ioctl.h> + +#include "xf86.h" +#include "xf86_OSproc.h" + +#include "intel.h" +#include "i915_drm.h" + +/** + * Returns the fence size for a tiled area of the given size. + */ +unsigned long intel_get_fence_size(intel_screen_private *intel, unsigned long size) +{ + unsigned long i; + unsigned long start; + + if (INTEL_INFO(intel)->gen >= 040 || intel->has_relaxed_fencing) { + /* The 965 can have fences at any page boundary. */ + return ALIGN(size, 4096); + } else { + /* Align the size to a power of two greater than the smallest fence + * size. + */ + if (IS_GEN3(intel)) + start = MB(1); + else + start = KB(512); + + for (i = start; i < size; i <<= 1) ; + + return i; + } +} + +/** + * On some chips, pitch width has to be a power of two tile width, so + * calculate that here. + */ +unsigned long +intel_get_fence_pitch(intel_screen_private *intel, unsigned long pitch, + uint32_t tiling_mode) +{ + unsigned long i; + unsigned long tile_width = (tiling_mode == I915_TILING_Y) ? 128 : 512; + + if (tiling_mode == I915_TILING_NONE) + return pitch; + + /* 965+ is flexible */ + if (INTEL_INFO(intel)->gen >= 040) + return ALIGN(pitch, tile_width); + + /* Pre-965 needs power of two tile width */ + for (i = tile_width; i < pitch; i <<= 1) ; + + return i; +} + +static Bool +intel_check_display_stride(ScrnInfoPtr scrn, int stride, Bool tiling) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + int limit = KB(32); + + /* 8xx spec has always 8K limit, but tests show larger limit in + non-tiling mode, which makes large monitor work. */ + if (tiling) { + if (IS_GEN2(intel)) + limit = KB(8); + else if (IS_GEN3(intel)) + limit = KB(8); + else if (IS_GEN4(intel)) + limit = KB(16); + else + limit = KB(32); + } + + if (stride <= limit) + return TRUE; + else + return FALSE; +} + +/* + * Pad to accelerator requirement + */ +static inline int intel_pad_drawable_width(int width) +{ + return ALIGN(width, 64); +} + + +static size_t +agp_aperture_size(struct pci_device *dev, int gen) +{ + return dev->regions[gen < 030 ? 0 : 2].size; +} + +static void intel_set_gem_max_sizes(ScrnInfoPtr scrn) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + size_t agp_size = agp_aperture_size(intel->PciInfo, + INTEL_INFO(intel)->gen); + + /* The chances of being able to mmap an object larger than + * agp_size/2 are slim. Moreover, we may be forced to fallback + * using a gtt mapping as both the source and a mask, as well + * as a destination and all need to fit into the aperture. + */ + intel->max_gtt_map_size = agp_size / 4; + + /* Let objects be tiled up to the size where only 4 would fit in + * the aperture, presuming best case alignment. Also if we + * cannot mmap it using the GTT we will be stuck. */ + intel->max_tiling_size = intel->max_gtt_map_size; + + /* Large BOs will tend to hit SW fallbacks frequently, and also will + * tend to fail to successfully map when doing SW fallbacks because we + * overcommit address space for BO access, or worse cause aperture + * thrashing. + */ + intel->max_bo_size = intel->max_gtt_map_size; +} + +/** + * Allocates a framebuffer for a screen. + * + * Used once for each X screen, so once with RandR 1.2 and twice with classic + * dualhead. + */ +drm_intel_bo *intel_allocate_framebuffer(ScrnInfoPtr scrn, + int width, int height, int cpp, + unsigned long *out_pitch, + uint32_t *out_tiling) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + drm_intel_bo *front_buffer; + uint32_t tiling_mode; + unsigned long pitch; + + if (intel->tiling & INTEL_TILING_FB) + tiling_mode = I915_TILING_X; + else + tiling_mode = I915_TILING_NONE; + + width = intel_pad_drawable_width(width); + if (!intel_check_display_stride(scrn, width * intel->cpp, + tiling_mode != I915_TILING_NONE)) + tiling_mode = I915_TILING_NONE; + if (!intel_check_display_stride(scrn, width * intel->cpp, + tiling_mode != I915_TILING_NONE)) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Expected front buffer stride %d kB " + "will exceed display limit\n", + width * intel->cpp / 1024); + return NULL; + } + +retry: + front_buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "front buffer", + width, height, intel->cpp, + &tiling_mode, &pitch, 0); + if (front_buffer == NULL) { + if (tiling_mode != I915_TILING_NONE) { + tiling_mode = I915_TILING_NONE; + goto retry; + } + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Failed to allocate framebuffer.\n"); + return NULL; + } + + if (!intel_check_display_stride(scrn, pitch, + tiling_mode != I915_TILING_NONE)) { + drm_intel_bo_unreference(front_buffer); + if (tiling_mode != I915_TILING_NONE) { + tiling_mode = I915_TILING_NONE; + goto retry; + } + + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Front buffer stride %ld kB " + "exceeds display limit\n", pitch / 1024); + return NULL; + } + + /* If we could have used tiling but failed, warn */ + if (intel->tiling & INTEL_TILING_FB && + tiling_mode != I915_TILING_X && + intel_check_display_stride(scrn, pitch, I915_TILING_X)) + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "Failed to set tiling on frontbuffer.\n"); + + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "Allocated new frame buffer %dx%d stride %ld, %s\n", + width, height, pitch, + tiling_mode == I915_TILING_NONE ? "untiled" : "tiled"); + + drm_intel_bo_disable_reuse(front_buffer); + + intel_set_gem_max_sizes(scrn); + *out_pitch = pitch; + *out_tiling = tiling_mode; + + return front_buffer; +} diff --git a/src/uxa/intel_uxa.c b/src/uxa/intel_uxa.c new file mode 100644 index 00000000..2f141735 --- /dev/null +++ b/src/uxa/intel_uxa.c @@ -0,0 +1,1420 @@ +/************************************************************************** + +Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. +All Rights Reserved. +Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org> + Based on code from i830_xaa.c. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <xf86.h> +#include <xf86drm.h> +#include <xaarop.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> + +#include "intel.h" +#include "intel_glamor.h" +#include "uxa.h" + +#include "i830_reg.h" +#include "i915_drm.h" +#include "brw_defines.h" + +static const int I830CopyROP[16] = { + ROP_0, /* GXclear */ + ROP_DSa, /* GXand */ + ROP_SDna, /* GXandReverse */ + ROP_S, /* GXcopy */ + ROP_DSna, /* GXandInverted */ + ROP_D, /* GXnoop */ + ROP_DSx, /* GXxor */ + ROP_DSo, /* GXor */ + ROP_DSon, /* GXnor */ + ROP_DSxn, /* GXequiv */ + ROP_Dn, /* GXinvert */ + ROP_SDno, /* GXorReverse */ + ROP_Sn, /* GXcopyInverted */ + ROP_DSno, /* GXorInverted */ + ROP_DSan, /* GXnand */ + ROP_1 /* GXset */ +}; + +static const int I830PatternROP[16] = { + ROP_0, + ROP_DPa, + ROP_PDna, + ROP_P, + ROP_DPna, + ROP_D, + ROP_DPx, + ROP_DPo, + ROP_DPon, + ROP_PDxn, + ROP_Dn, + ROP_PDno, + ROP_Pn, + ROP_DPno, + ROP_DPan, + ROP_1 +}; + +#if HAS_DEVPRIVATEKEYREC +DevPrivateKeyRec uxa_pixmap_index; +#else +int uxa_pixmap_index; +#endif + +static void +gen6_context_switch(intel_screen_private *intel, + int new_mode) +{ + intel_batch_submit(intel->scrn); +} + +static void +gen5_context_switch(intel_screen_private *intel, + int new_mode) +{ + /* Ironlake has a limitation that a 3D or Media command can't + * be the first command after a BLT, unless it's + * non-pipelined. Instead of trying to track it and emit a + * command at the right time, we just emit a dummy + * non-pipelined 3D instruction after each blit. + */ + + if (new_mode == I915_EXEC_BLT) { + OUT_BATCH(MI_FLUSH | + MI_STATE_INSTRUCTION_CACHE_FLUSH | + MI_INHIBIT_RENDER_CACHE_FLUSH); + } else { + OUT_BATCH(CMD_POLY_STIPPLE_OFFSET << 16); + OUT_BATCH(0); + } +} + +static void +gen4_context_switch(intel_screen_private *intel, + int new_mode) +{ + if (new_mode == I915_EXEC_BLT) { + OUT_BATCH(MI_FLUSH | + MI_STATE_INSTRUCTION_CACHE_FLUSH | + MI_INHIBIT_RENDER_CACHE_FLUSH); + } +} + +Bool +intel_get_aperture_space(ScrnInfoPtr scrn, drm_intel_bo ** bo_table, + int num_bos) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + if (intel->batch_bo == NULL) { + intel_debug_fallback(scrn, "VT inactive\n"); + return FALSE; + } + + bo_table[0] = intel->batch_bo; + if (drm_intel_bufmgr_check_aperture_space(bo_table, num_bos) != 0) { + intel_batch_submit(scrn); + bo_table[0] = intel->batch_bo; + if (drm_intel_bufmgr_check_aperture_space(bo_table, num_bos) != + 0) { + intel_debug_fallback(scrn, "Couldn't get aperture " + "space for BOs\n"); + return FALSE; + } + } + return TRUE; +} + +static unsigned int +intel_uxa_pixmap_compute_size(PixmapPtr pixmap, + int w, int h, + uint32_t *tiling, + int *stride, + unsigned usage) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(pixmap->drawable.pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + int pitch, size; + + if (*tiling != I915_TILING_NONE) { + /* First check whether tiling is necessary. */ + pitch = (w * pixmap->drawable.bitsPerPixel + 7) / 8; + pitch = ALIGN(pitch, 64); + size = pitch * ALIGN (h, 2); + if (INTEL_INFO(intel)->gen < 040) { + /* Gen 2/3 has a maximum stride for tiling of + * 8192 bytes. + */ + if (pitch > KB(8)) + *tiling = I915_TILING_NONE; + + /* Narrower than half a tile? */ + if (pitch < 256) + *tiling = I915_TILING_NONE; + + /* Older hardware requires fences to be pot size + * aligned with a minimum of 1 MiB, so causes + * massive overallocation for small textures. + */ + if (size < 1024*1024/2 && !intel->has_relaxed_fencing) + *tiling = I915_TILING_NONE; + } else if (!(usage & INTEL_CREATE_PIXMAP_DRI2) && size <= 4096) { + /* Disable tiling beneath a page size, we will not see + * any benefit from reducing TLB misses and instead + * just incur extra cost when we require a fence. + */ + *tiling = I915_TILING_NONE; + } + } + + pitch = (w * pixmap->drawable.bitsPerPixel + 7) / 8; + if (!(usage & INTEL_CREATE_PIXMAP_DRI2) && pitch <= 256) + *tiling = I915_TILING_NONE; + + if (*tiling != I915_TILING_NONE) { + int aligned_h, tile_height; + + if (IS_GEN2(intel)) + tile_height = 16; + else if (*tiling == I915_TILING_X) + tile_height = 8; + else + tile_height = 32; + aligned_h = ALIGN(h, 2*tile_height); + + *stride = intel_get_fence_pitch(intel, + ALIGN(pitch, 512), + *tiling); + + /* Round the object up to the size of the fence it will live in + * if necessary. We could potentially make the kernel allocate + * a larger aperture space and just bind the subset of pages in, + * but this is easier and also keeps us out of trouble (as much) + * with drm_intel_bufmgr_check_aperture(). + */ + size = intel_get_fence_size(intel, *stride * aligned_h); + + if (size > intel->max_tiling_size) + *tiling = I915_TILING_NONE; + } + + if (*tiling == I915_TILING_NONE) { + /* We only require a 64 byte alignment for scanouts, but + * a 256 byte alignment for sharing with PRIME. + */ + *stride = ALIGN(pitch, 256); + /* Round the height up so that the GPU's access to a 2x2 aligned + * subspan doesn't address an invalid page offset beyond the + * end of the GTT. + */ + size = *stride * ALIGN(h, 2); + } + + return size; +} + +static Bool +intel_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(drawable->pScreen); + + if (!UXA_PM_IS_SOLID(drawable, planemask)) { + intel_debug_fallback(scrn, "planemask is not solid\n"); + return FALSE; + } + + switch (drawable->bitsPerPixel) { + case 8: + case 16: + case 32: + break; + default: + return FALSE; + } + + return TRUE; +} + +/** + * Sets up hardware state for a series of solid fills. + */ +static Bool +intel_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(pixmap->drawable.pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + drm_intel_bo *bo_table[] = { + NULL, /* batch_bo */ + intel_get_pixmap_bo(pixmap), + }; + + if (!intel_check_pitch_2d(pixmap)) + return FALSE; + + if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table))) + return FALSE; + + intel->BR[13] = (I830PatternROP[alu] & 0xff) << 16; + switch (pixmap->drawable.bitsPerPixel) { + case 8: + break; + case 16: + /* RGB565 */ + intel->BR[13] |= (1 << 24); + break; + case 32: + /* RGB8888 */ + intel->BR[13] |= ((1 << 24) | (1 << 25)); + break; + } + intel->BR[16] = fg; + + return TRUE; +} + +static void intel_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(pixmap->drawable.pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + unsigned long pitch; + uint32_t cmd; + + if (x1 < 0) + x1 = 0; + if (y1 < 0) + y1 = 0; + if (x2 > pixmap->drawable.width) + x2 = pixmap->drawable.width; + if (y2 > pixmap->drawable.height) + y2 = pixmap->drawable.height; + + if (x2 <= x1 || y2 <= y1) + return; + + pitch = intel_pixmap_pitch(pixmap); + + { + BEGIN_BATCH_BLT(6); + + cmd = XY_COLOR_BLT_CMD; + + if (pixmap->drawable.bitsPerPixel == 32) + cmd |= + XY_COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB; + + if (INTEL_INFO(intel)->gen >= 040 && intel_pixmap_tiled(pixmap)) { + assert((pitch % 512) == 0); + pitch >>= 2; + cmd |= XY_COLOR_BLT_TILED; + } + + OUT_BATCH(cmd); + + OUT_BATCH(intel->BR[13] | pitch); + OUT_BATCH((y1 << 16) | (x1 & 0xffff)); + OUT_BATCH((y2 << 16) | (x2 & 0xffff)); + OUT_RELOC_PIXMAP_FENCED(pixmap, I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(intel->BR[16]); + ADVANCE_BATCH(); + } +} + +/** + * TODO: + * - support planemask using FULL_BLT_CMD? + */ +static Bool +intel_uxa_check_copy(PixmapPtr source, PixmapPtr dest, + int alu, Pixel planemask) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(dest->drawable.pScreen); + + if (!UXA_PM_IS_SOLID(&source->drawable, planemask)) { + intel_debug_fallback(scrn, "planemask is not solid"); + return FALSE; + } + + if (source->drawable.bitsPerPixel != dest->drawable.bitsPerPixel) { + intel_debug_fallback(scrn, "mixed bpp copies unsupported\n"); + return FALSE; + } + switch (source->drawable.bitsPerPixel) { + case 8: + case 16: + case 32: + break; + default: + return FALSE; + } + + if (!intel_check_pitch_2d(source)) + return FALSE; + if (!intel_check_pitch_2d(dest)) + return FALSE; + + return TRUE; +} + +static Bool +intel_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir, + int ydir, int alu, Pixel planemask) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(dest->drawable.pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + drm_intel_bo *bo_table[] = { + NULL, /* batch_bo */ + intel_get_pixmap_bo(source), + intel_get_pixmap_bo(dest), + }; + + if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table))) + return FALSE; + + intel->render_source = source; + + intel->BR[13] = I830CopyROP[alu] << 16; + switch (source->drawable.bitsPerPixel) { + case 8: + break; + case 16: + intel->BR[13] |= (1 << 24); + break; + case 32: + intel->BR[13] |= ((1 << 25) | (1 << 24)); + break; + } + + return TRUE; +} + +static void +intel_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1, + int dst_y1, int w, int h) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(dest->drawable.pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + uint32_t cmd; + int dst_x2, dst_y2, src_x2, src_y2; + unsigned int dst_pitch, src_pitch; + + dst_x2 = dst_x1 + w; + dst_y2 = dst_y1 + h; + + /* XXX Fixup extents as a lamentable workaround for missing + * source clipping in the upper layers. + */ + if (dst_x1 < 0) + src_x1 -= dst_x1, dst_x1 = 0; + if (dst_y1 < 0) + src_y1 -= dst_y1, dst_y1 = 0; + if (dst_x2 > dest->drawable.width) + dst_x2 = dest->drawable.width; + if (dst_y2 > dest->drawable.height) + dst_y2 = dest->drawable.height; + + src_x2 = src_x1 + (dst_x2 - dst_x1); + src_y2 = src_y1 + (dst_y2 - dst_y1); + + if (src_x1 < 0) + dst_x1 -= src_x1, src_x1 = 0; + if (src_y1 < 0) + dst_y1 -= src_y1, src_y1 = 0; + if (src_x2 > intel->render_source->drawable.width) + dst_x2 -= src_x2 - intel->render_source->drawable.width; + if (src_y2 > intel->render_source->drawable.height) + dst_y2 -= src_y2 - intel->render_source->drawable.height; + + if (dst_x2 <= dst_x1 || dst_y2 <= dst_y1) + return; + + dst_pitch = intel_pixmap_pitch(dest); + src_pitch = intel_pixmap_pitch(intel->render_source); + + { + BEGIN_BATCH_BLT(8); + + cmd = XY_SRC_COPY_BLT_CMD; + + if (dest->drawable.bitsPerPixel == 32) + cmd |= + XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB; + + if (INTEL_INFO(intel)->gen >= 040) { + if (intel_pixmap_tiled(dest)) { + assert((dst_pitch % 512) == 0); + dst_pitch >>= 2; + cmd |= XY_SRC_COPY_BLT_DST_TILED; + } + + if (intel_pixmap_tiled(intel->render_source)) { + assert((src_pitch % 512) == 0); + src_pitch >>= 2; + cmd |= XY_SRC_COPY_BLT_SRC_TILED; + } + } + + OUT_BATCH(cmd); + + OUT_BATCH(intel->BR[13] | dst_pitch); + OUT_BATCH((dst_y1 << 16) | (dst_x1 & 0xffff)); + OUT_BATCH((dst_y2 << 16) | (dst_x2 & 0xffff)); + OUT_RELOC_PIXMAP_FENCED(dest, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + 0); + OUT_BATCH((src_y1 << 16) | (src_x1 & 0xffff)); + OUT_BATCH(src_pitch); + OUT_RELOC_PIXMAP_FENCED(intel->render_source, + I915_GEM_DOMAIN_RENDER, 0, + 0); + + ADVANCE_BATCH(); + } +} + +static void intel_uxa_done(PixmapPtr pixmap) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(pixmap->drawable.pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + + if (IS_GEN6(intel) || IS_GEN7(intel)) { + /* workaround a random BLT hang */ + BEGIN_BATCH_BLT(3); + OUT_BATCH(XY_SETUP_CLIP_BLT_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + intel_debug_flush(scrn); +} + +/** + * Do any cleanup from the Composite operation. + * + * This is shared between i830 through i965. + */ +static void i830_done_composite(PixmapPtr dest) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(dest->drawable.pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + + if (intel->vertex_flush) + intel->vertex_flush(intel); + + intel_debug_flush(scrn); +} + +#define xFixedToFloat(val) \ + ((float)xFixedToInt(val) + ((float)xFixedFrac(val) / 65536.0)) + +static Bool +_intel_transform_point(PictTransformPtr transform, + float x, float y, float result[3]) +{ + int j; + + for (j = 0; j < 3; j++) { + result[j] = (xFixedToFloat(transform->matrix[j][0]) * x + + xFixedToFloat(transform->matrix[j][1]) * y + + xFixedToFloat(transform->matrix[j][2])); + } + if (!result[2]) + return FALSE; + return TRUE; +} + +/** + * Returns the floating-point coordinates transformed by the given transform. + * + * transform may be null. + */ +Bool +intel_get_transformed_coordinates(int x, int y, PictTransformPtr transform, + float *x_out, float *y_out) +{ + if (transform == NULL) { + *x_out = x; + *y_out = y; + } else { + float result[3]; + + if (!_intel_transform_point(transform, + x, y, + result)) + return FALSE; + *x_out = result[0] / result[2]; + *y_out = result[1] / result[2]; + } + return TRUE; +} + +/** + * Returns the un-normalized floating-point coordinates transformed by the given transform. + * + * transform may be null. + */ +Bool +intel_get_transformed_coordinates_3d(int x, int y, PictTransformPtr transform, + float *x_out, float *y_out, float *w_out) +{ + if (transform == NULL) { + *x_out = x; + *y_out = y; + *w_out = 1; + } else { + float result[3]; + + if (!_intel_transform_point(transform, + x, y, + result)) + return FALSE; + *x_out = result[0]; + *y_out = result[1]; + *w_out = result[2]; + } + return TRUE; +} + +/** + * Returns whether the provided transform is affine. + * + * transform may be null. + */ +Bool intel_transform_is_affine(PictTransformPtr t) +{ + if (t == NULL) + return TRUE; + return t->matrix[2][0] == 0 && t->matrix[2][1] == 0; +} + +dri_bo *intel_get_pixmap_bo(PixmapPtr pixmap) +{ + struct intel_pixmap *intel; + + intel = intel_get_pixmap_private(pixmap); + if (intel == NULL) + return NULL; + + return intel->bo; +} + +void intel_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo) +{ + struct intel_pixmap *priv; + + priv = intel_get_pixmap_private(pixmap); + if (priv == NULL && bo == NULL) + return; + + if (priv != NULL) { + if (priv->bo == bo) + return; + + dri_bo_unreference(priv->bo); + list_del(&priv->batch); + + free(priv); + priv = NULL; + } + + if (bo != NULL) { + uint32_t tiling; + uint32_t swizzle_mode; + int ret; + + priv = calloc(1, sizeof (struct intel_pixmap)); + if (priv == NULL) + goto BAIL; + + list_init(&priv->batch); + + dri_bo_reference(bo); + priv->bo = bo; + priv->stride = intel_pixmap_pitch(pixmap); + + ret = drm_intel_bo_get_tiling(bo, &tiling, &swizzle_mode); + if (ret != 0) { + FatalError("Couldn't get tiling on bo %p: %s\n", + bo, strerror(-ret)); + } + + priv->tiling = tiling; + priv->busy = -1; + priv->offscreen = 1; + } + + BAIL: + intel_set_pixmap_private(pixmap, priv); +} + +static Bool intel_uxa_pixmap_is_offscreen(PixmapPtr pixmap) +{ + return intel_pixmap_is_offscreen(pixmap); +} + +static Bool intel_uxa_prepare_access(PixmapPtr pixmap, uxa_access_t access) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(pixmap->drawable.pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + struct intel_pixmap *priv = intel_get_pixmap_private(pixmap); + dri_bo *bo = priv->bo; + int ret; + + /* Transitioning to glamor acceleration, we need to flush all pending + * usage by UXA. */ + if (access == UXA_GLAMOR_ACCESS_RW || access == UXA_GLAMOR_ACCESS_RO) { + if (!list_is_empty(&priv->batch)) + intel_batch_submit(scrn); + return TRUE; + } + + /* When falling back to swrast, flush all pending operations */ + intel_glamor_flush(intel); + if (access == UXA_ACCESS_RW || priv->dirty) + intel_batch_submit(scrn); + + assert(bo->size <= intel->max_gtt_map_size); + ret = drm_intel_gem_bo_map_gtt(bo); + if (ret) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s: bo map (use gtt? %d, access %d) failed: %s\n", + __FUNCTION__, + priv->tiling || bo->size <= intel->max_gtt_map_size, + access, + strerror(-ret)); + return FALSE; + } + + pixmap->devPrivate.ptr = bo->virtual; + priv->busy = 0; + + return TRUE; +} + +static void intel_uxa_finish_access(PixmapPtr pixmap, uxa_access_t access) +{ + struct intel_pixmap *priv; + + if (access == UXA_GLAMOR_ACCESS_RW || access == UXA_GLAMOR_ACCESS_RO) + return; + + priv = intel_get_pixmap_private(pixmap); + if (priv == NULL) + return; + + drm_intel_gem_bo_unmap_gtt(priv->bo); + pixmap->devPrivate.ptr = NULL; +} + +static Bool intel_uxa_pixmap_put_image(PixmapPtr pixmap, + char *src, int src_pitch, + int x, int y, int w, int h) +{ + struct intel_pixmap *priv = intel_get_pixmap_private(pixmap); + int stride = intel_pixmap_pitch(pixmap); + int cpp = pixmap->drawable.bitsPerPixel/8; + int ret = FALSE; + + if (priv == NULL || priv->bo == NULL) + return FALSE; + + if (priv->tiling == I915_TILING_NONE && + (h == 1 || (src_pitch == stride && w == pixmap->drawable.width))) { + return drm_intel_bo_subdata(priv->bo, y*stride + x*cpp, stride*(h-1) + w*cpp, src) == 0; + } else if (drm_intel_gem_bo_map_gtt(priv->bo) == 0) { + char *dst = priv->bo->virtual; + int row_length = w * cpp; + int num_rows = h; + if (row_length == src_pitch && src_pitch == stride) + num_rows = 1, row_length *= h; + dst += y * stride + x * cpp; + do { + memcpy (dst, src, row_length); + src += src_pitch; + dst += stride; + } while (--num_rows); + drm_intel_gem_bo_unmap_gtt(priv->bo); + ret = TRUE; + } + + return ret; +} + +static Bool intel_uxa_put_image(PixmapPtr pixmap, + int x, int y, + int w, int h, + char *src, int src_pitch) +{ + struct intel_pixmap *priv; + + priv = intel_get_pixmap_private(pixmap); + if (!intel_pixmap_is_busy(priv)) { + /* bo is not busy so can be replaced without a stall, upload in-place. */ + return intel_uxa_pixmap_put_image(pixmap, src, src_pitch, x, y, w, h); + } else { + ScreenPtr screen = pixmap->drawable.pScreen; + + if (!priv->pinned && + x == 0 && y == 0 && + w == pixmap->drawable.width && + h == pixmap->drawable.height) + { + intel_screen_private *intel = intel_get_screen_private(xf86ScreenToScrn(screen)); + uint32_t tiling = priv->tiling; + int size, stride; + dri_bo *bo; + + /* Replace busy bo. */ + size = intel_uxa_pixmap_compute_size (pixmap, w, h, + &tiling, &stride, 0); + if (size > intel->max_gtt_map_size) + return FALSE; + + bo = drm_intel_bo_alloc(intel->bufmgr, "pixmap", size, 0); + if (bo == NULL) + return FALSE; + + if (tiling != I915_TILING_NONE) + drm_intel_bo_set_tiling(bo, &tiling, stride); + priv->stride = stride; + priv->tiling = tiling; + + screen->ModifyPixmapHeader(pixmap, + w, h, + 0, 0, + stride, NULL); + intel_set_pixmap_bo(pixmap, bo); + dri_bo_unreference(bo); + + return intel_uxa_pixmap_put_image(pixmap, src, src_pitch, 0, 0, w, h); + } + else + { + PixmapPtr scratch; + Bool ret; + + /* Upload to a linear buffer and queue a blit. */ + scratch = (*screen->CreatePixmap)(screen, w, h, + pixmap->drawable.depth, + UXA_CREATE_PIXMAP_FOR_MAP); + if (!scratch) + return FALSE; + + if (!intel_uxa_pixmap_is_offscreen(scratch)) { + screen->DestroyPixmap(scratch); + return FALSE; + } + + ret = intel_uxa_pixmap_put_image(scratch, src, src_pitch, 0, 0, w, h); + if (ret) { + GCPtr gc = GetScratchGC(pixmap->drawable.depth, screen); + if (gc) { + ValidateGC(&pixmap->drawable, gc); + + (*gc->ops->CopyArea)(&scratch->drawable, + &pixmap->drawable, + gc, 0, 0, w, h, x, y); + + FreeScratchGC(gc); + } else + ret = FALSE; + } + + (*screen->DestroyPixmap)(scratch); + return ret; + } + } +} + +static Bool intel_uxa_pixmap_get_image(PixmapPtr pixmap, + int x, int y, int w, int h, + char *dst, int dst_pitch) +{ + struct intel_pixmap *priv = intel_get_pixmap_private(pixmap); + int stride = intel_pixmap_pitch(pixmap); + int cpp = pixmap->drawable.bitsPerPixel/8; + + /* assert(priv->tiling == I915_TILING_NONE); */ + if (h == 1 || (dst_pitch == stride && w == pixmap->drawable.width)) { + return drm_intel_bo_get_subdata(priv->bo, y*stride + x*cpp, (h-1)*stride + w*cpp, dst) == 0; + } else { + char *src; + + if (drm_intel_gem_bo_map_gtt(priv->bo)) + return FALSE; + + src = (char *) priv->bo->virtual + y * stride + x * cpp; + w *= cpp; + do { + memcpy(dst, src, w); + src += stride; + dst += dst_pitch; + } while (--h); + + drm_intel_gem_bo_unmap_gtt(priv->bo); + + return TRUE; + } +} + +static Bool intel_uxa_get_image(PixmapPtr pixmap, + int x, int y, + int w, int h, + char *dst, int dst_pitch) +{ + struct intel_pixmap *priv; + PixmapPtr scratch = NULL; + Bool ret; + + /* The presumption is that we wish to keep the target hot, so + * copy to a new bo and move that to the CPU in preference to + * causing ping-pong of the original. + * + * Also the gpu is much faster at detiling. + */ + + priv = intel_get_pixmap_private(pixmap); + if (intel_pixmap_is_busy(priv) || priv->tiling != I915_TILING_NONE) { + ScreenPtr screen = pixmap->drawable.pScreen; + GCPtr gc; + + /* Copy to a linear buffer and pull. */ + scratch = screen->CreatePixmap(screen, w, h, + pixmap->drawable.depth, + INTEL_CREATE_PIXMAP_TILING_NONE); + if (!scratch) + return FALSE; + + if (!intel_uxa_pixmap_is_offscreen(scratch)) { + screen->DestroyPixmap(scratch); + return FALSE; + } + + gc = GetScratchGC(pixmap->drawable.depth, screen); + if (!gc) { + screen->DestroyPixmap(scratch); + return FALSE; + } + + ValidateGC(&pixmap->drawable, gc); + + gc->ops->CopyArea(&pixmap->drawable, + &scratch->drawable, + gc, x, y, w, h, 0, 0); + + FreeScratchGC(gc); + + intel_batch_submit(xf86ScreenToScrn(screen)); + + x = y = 0; + pixmap = scratch; + } + + ret = intel_uxa_pixmap_get_image(pixmap, x, y, w, h, dst, dst_pitch); + + if (scratch) + scratch->drawable.pScreen->DestroyPixmap(scratch); + + return ret; +} + +static CARD32 intel_cache_expire(OsTimerPtr timer, CARD32 now, pointer data) +{ + intel_screen_private *intel = data; + + /* We just want to create and destroy a bo as this causes libdrm + * to reap its caches. However, since we can't remove that buffer + * from the cache due to its own activity, we want to use something + * that we know we will reuse later. The most frequently reused buffer + * we have is the batchbuffer, and the best way to trigger its + * reallocation is to submit a flush. + */ + intel_batch_emit_flush(intel->scrn); + intel_batch_submit(intel->scrn); + + return 0; +} + +static void intel_flush_rendering(intel_screen_private *intel) +{ + if (intel->needs_flush == 0) + return; + + if (intel->has_kernel_flush) { + intel_batch_submit(intel->scrn); + drm_intel_bo_busy(intel->front_buffer); + } else { + intel_batch_emit_flush(intel->scrn); + intel_batch_submit(intel->scrn); + } + + intel->cache_expire = TimerSet(intel->cache_expire, 0, 3000, + intel_cache_expire, intel); + + intel->needs_flush = 0; +} + +static void intel_throttle(intel_screen_private *intel) +{ + drmCommandNone(intel->drmSubFD, DRM_I915_GEM_THROTTLE); +} + +void intel_uxa_block_handler(intel_screen_private *intel) +{ + /* Emit a flush of the rendering cache, or on the 965 + * and beyond rendering results may not hit the + * framebuffer until significantly later. + */ + intel_glamor_flush(intel); + intel_flush_rendering(intel); + intel_throttle(intel); +} + +static PixmapPtr +intel_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth, + unsigned usage) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + struct intel_pixmap *priv; + PixmapPtr pixmap, new_pixmap = NULL; + + if (!(usage & INTEL_CREATE_PIXMAP_DRI2)) { + pixmap = intel_glamor_create_pixmap(screen, w, h, depth, usage); + if (pixmap) + return pixmap; + } + + if (w > 32767 || h > 32767) + return NullPixmap; + + if (depth == 1 || intel->force_fallback) + return fbCreatePixmap(screen, w, h, depth, usage); + + if (usage == CREATE_PIXMAP_USAGE_GLYPH_PICTURE && w <= 32 && h <= 32) + return fbCreatePixmap(screen, w, h, depth, usage); + + pixmap = fbCreatePixmap(screen, 0, 0, depth, usage); + if (pixmap == NullPixmap) + return pixmap; + + if (w && h) { + unsigned int size, tiling; + int stride; + + /* Always attempt to tile, compute_size() will remove the + * tiling for pixmaps that are either too large or too small + * to be effectively tiled. + */ + tiling = I915_TILING_X; + if (usage & INTEL_CREATE_PIXMAP_TILING_Y) + tiling = I915_TILING_Y; + if (usage == UXA_CREATE_PIXMAP_FOR_MAP || usage & INTEL_CREATE_PIXMAP_TILING_NONE) + tiling = I915_TILING_NONE; + +#ifdef CREATE_PIXMAP_USAGE_SHARED + if (usage == CREATE_PIXMAP_USAGE_SHARED) + tiling = I915_TILING_NONE; +#endif + /* if tiling is off force to none */ + if (!intel->tiling) + tiling = I915_TILING_NONE; + + if (tiling != I915_TILING_NONE && !(usage & INTEL_CREATE_PIXMAP_DRI2)) { + if (h <= 4) + tiling = I915_TILING_NONE; + if (h <= 16 && tiling == I915_TILING_Y) + tiling = I915_TILING_X; + } + size = intel_uxa_pixmap_compute_size(pixmap, w, h, &tiling, &stride, usage); + + /* Fail very large allocations. Large BOs will tend to hit SW fallbacks + * frequently, and also will tend to fail to successfully map when doing + * SW fallbacks because we overcommit address space for BO access. + */ + if (size > intel->max_bo_size || stride >= KB(32)) + goto fallback_pixmap; + + priv = calloc(1, sizeof (struct intel_pixmap)); + if (priv == NULL) + goto fallback_pixmap; + + if (usage == UXA_CREATE_PIXMAP_FOR_MAP) { + priv->busy = 0; + priv->bo = drm_intel_bo_alloc(intel->bufmgr, + "pixmap", size, 0); + } else { + priv->busy = -1; + priv->bo = drm_intel_bo_alloc_for_render(intel->bufmgr, + "pixmap", + size, 0); + } + if (!priv->bo) + goto fallback_priv; + + if (tiling != I915_TILING_NONE) + drm_intel_bo_set_tiling(priv->bo, &tiling, stride); + priv->stride = stride; + priv->tiling = tiling; + priv->offscreen = 1; + + list_init(&priv->batch); + intel_set_pixmap_private(pixmap, priv); + + screen->ModifyPixmapHeader(pixmap, w, h, 0, 0, stride, NULL); + + if (!intel_glamor_create_textured_pixmap(pixmap)) + goto fallback_glamor; + } + + return pixmap; + +fallback_glamor: + if (usage & INTEL_CREATE_PIXMAP_DRI2) { + /* XXX need further work to handle the DRI2 failure case. + * Glamor don't know how to handle a BO only pixmap. Put + * a warning indicator here. + */ + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "Failed to create textured DRI2 pixmap."); + return pixmap; + } + /* Create textured pixmap failed means glamor failed to + * create a texture from current BO for some reasons. We turn + * to create a new glamor pixmap and clean up current one. + * One thing need to be noted, this new pixmap doesn't + * has a priv and bo attached to it. It's glamor's responsbility + * to take care of it. Glamor will mark this new pixmap as a + * texture only pixmap and will never fallback to DDX layer + * afterwards. + */ + new_pixmap = intel_glamor_create_pixmap(screen, w, h, + depth, usage); + dri_bo_unreference(priv->bo); +fallback_priv: + free(priv); +fallback_pixmap: + fbDestroyPixmap(pixmap); + if (new_pixmap) + return new_pixmap; + else + return fbCreatePixmap(screen, w, h, depth, usage); +} + +static Bool intel_uxa_destroy_pixmap(PixmapPtr pixmap) +{ + if (pixmap->refcnt == 1) { + intel_glamor_destroy_pixmap(pixmap); + intel_set_pixmap_bo(pixmap, NULL); + } + fbDestroyPixmap(pixmap); + return TRUE; +} + +Bool intel_uxa_create_screen_resources(ScreenPtr screen) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + PixmapPtr pixmap; + intel_screen_private *intel = intel_get_screen_private(scrn); + dri_bo *bo = intel->front_buffer; + + if (!uxa_resources_init(screen)) + return FALSE; + + if (drm_intel_gem_bo_map_gtt(bo)) + return FALSE; + + pixmap = screen->GetScreenPixmap(screen); + intel_set_pixmap_bo(pixmap, bo); + intel_get_pixmap_private(pixmap)->pinned |= PIN_SCANOUT; + screen->ModifyPixmapHeader(pixmap, + scrn->virtualX, + scrn->virtualY, + -1, -1, + intel->front_pitch, + NULL); + scrn->displayWidth = intel->front_pitch / intel->cpp; + + if (!intel_glamor_create_screen_resources(screen)) + return FALSE; + + return TRUE; +} + +#ifdef CREATE_PIXMAP_USAGE_SHARED +static Bool +intel_uxa_share_pixmap_backing(PixmapPtr ppix, ScreenPtr slave, void **fd_handle) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(ppix->drawable.pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + struct intel_pixmap *priv = intel_get_pixmap_private(ppix); + unsigned int size, tiling, swizzle; + dri_bo *bo = intel_get_pixmap_bo(ppix), *newbo; + int stride; + int handle; + + if (drm_intel_bo_references(intel->batch_bo, bo)) + intel_batch_submit(intel->scrn); + + drm_intel_bo_get_tiling(bo, &tiling, &swizzle); + + if (tiling == I915_TILING_X) { + if (priv->pinned & ~PIN_DRI) + return FALSE; + + tiling = I915_TILING_NONE; + + size = intel_uxa_pixmap_compute_size(ppix, ppix->drawable.width, ppix->drawable.height, &tiling, &stride, INTEL_CREATE_PIXMAP_DRI2); + + newbo = drm_intel_bo_alloc_for_render(intel->bufmgr, + "pixmap", + size, 0); + + if (tiling != I915_TILING_NONE) + drm_intel_bo_set_tiling(newbo, &tiling, stride); + priv->stride = stride; + priv->tiling = tiling; + intel_set_pixmap_bo(ppix, newbo); + + ppix->drawable.pScreen->ModifyPixmapHeader(ppix, ppix->drawable.width, + ppix->drawable.height, 0, 0, + stride, NULL); + bo = newbo; + } + drm_intel_bo_get_tiling(bo, &tiling, &swizzle); + drm_intel_bo_gem_export_to_prime(bo, &handle); + priv->pinned |= PIN_DRI; + + *fd_handle = (void *)(long)handle; + return TRUE; +} + +static Bool +intel_uxa_set_shared_pixmap_backing(PixmapPtr ppix, void *fd_handle) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(ppix->drawable.pScreen); + intel_screen_private *intel = intel_get_screen_private(scrn); + dri_bo *bo; + int ihandle = (int)(long)fd_handle; + + /* force untiled for now */ + bo = drm_intel_bo_gem_create_from_prime(intel->bufmgr, ihandle, 0); + if (!bo) + return FALSE; + + intel_set_pixmap_bo(ppix, bo); + close(ihandle); + return TRUE; +} +#endif + +static void +intel_limits_init(intel_screen_private *intel) +{ + /* Limits are described in the BLT engine chapter under Graphics Data Size + * Limitations, and the descriptions of SURFACE_STATE, 3DSTATE_BUFFER_INFO, + * 3DSTATE_DRAWING_RECTANGLE, 3DSTATE_MAP_INFO, and 3DSTATE_MAP_INFO. + * + * i845 through i965 limits 2D rendering to 65536 lines and pitch of 32768. + * + * i965 limits 3D surface to (2*element size)-aligned offset if un-tiled. + * i965 limits 3D surface to 4kB-aligned offset if tiled. + * i965 limits 3D surfaces to w,h of ?,8192. + * i965 limits 3D surface to pitch of 1B - 128kB. + * i965 limits 3D surface pitch alignment to 1 or 2 times the element size. + * i965 limits 3D surface pitch alignment to 512B if tiled. + * i965 limits 3D destination drawing rect to w,h of 8192,8192. + * + * i915 limits 3D textures to 4B-aligned offset if un-tiled. + * i915 limits 3D textures to ~4kB-aligned offset if tiled. + * i915 limits 3D textures to width,height of 2048,2048. + * i915 limits 3D textures to pitch of 16B - 8kB, in dwords. + * i915 limits 3D destination to ~4kB-aligned offset if tiled. + * i915 limits 3D destination to pitch of 16B - 8kB, in dwords, if un-tiled. + * i915 limits 3D destination to pitch 64B-aligned if used with depth. + * i915 limits 3D destination to pitch of 512B - 8kB, in tiles, if tiled. + * i915 limits 3D destination to POT aligned pitch if tiled. + * i915 limits 3D destination drawing rect to w,h of 2048,2048. + * + * i845 limits 3D textures to 4B-aligned offset if un-tiled. + * i845 limits 3D textures to ~4kB-aligned offset if tiled. + * i845 limits 3D textures to width,height of 2048,2048. + * i845 limits 3D textures to pitch of 4B - 8kB, in dwords. + * i845 limits 3D destination to 4B-aligned offset if un-tiled. + * i845 limits 3D destination to ~4kB-aligned offset if tiled. + * i845 limits 3D destination to pitch of 8B - 8kB, in dwords. + * i845 limits 3D destination drawing rect to w,h of 2048,2048. + * + * For the tiled issues, the only tiled buffer we draw to should be + * the front, which will have an appropriate pitch/offset already set up, + * so UXA doesn't need to worry. + */ + if (INTEL_INFO(intel)->gen >= 040) { + intel->accel_pixmap_offset_alignment = 4 * 2; + intel->accel_max_x = 8192; + intel->accel_max_y = 8192; + } else { + intel->accel_pixmap_offset_alignment = 4; + intel->accel_max_x = 2048; + intel->accel_max_y = 2048; + } +} + +static Bool intel_option_accel_blt(intel_screen_private *intel) +{ + const char *s; + + s = xf86GetOptValString(intel->Options, OPTION_ACCEL_METHOD); + if (s == NULL) + return FALSE; + + return strcasecmp(s, "blt") == 0; +} + +Bool intel_uxa_init(ScreenPtr screen) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + +#if HAS_DIXREGISTERPRIVATEKEY + if (!dixRegisterPrivateKey(&uxa_pixmap_index, PRIVATE_PIXMAP, 0)) +#else + if (!dixRequestPrivate(&uxa_pixmap_index, 0)) +#endif + return FALSE; + + intel_limits_init(intel); + + intel->uxa_driver = uxa_driver_alloc(); + if (intel->uxa_driver == NULL) + return FALSE; + + memset(intel->uxa_driver, 0, sizeof(*intel->uxa_driver)); + + intel->uxa_driver->uxa_major = 1; + intel->uxa_driver->uxa_minor = 0; + + intel->prim_offset = 0; + intel->vertex_count = 0; + intel->vertex_offset = 0; + intel->vertex_used = 0; + intel->floats_per_vertex = 0; + intel->last_floats_per_vertex = 0; + intel->vertex_bo = NULL; + intel->surface_used = 0; + intel->surface_reloc = 0; + + /* Solid fill */ + intel->uxa_driver->check_solid = intel_uxa_check_solid; + intel->uxa_driver->prepare_solid = intel_uxa_prepare_solid; + intel->uxa_driver->solid = intel_uxa_solid; + intel->uxa_driver->done_solid = intel_uxa_done; + + /* Copy */ + intel->uxa_driver->check_copy = intel_uxa_check_copy; + intel->uxa_driver->prepare_copy = intel_uxa_prepare_copy; + intel->uxa_driver->copy = intel_uxa_copy; + intel->uxa_driver->done_copy = intel_uxa_done; + + /* Composite */ + if (intel_option_accel_blt(intel)) { + } else if (IS_GEN2(intel)) { + intel->uxa_driver->check_composite = i830_check_composite; + intel->uxa_driver->check_composite_target = i830_check_composite_target; + intel->uxa_driver->check_composite_texture = i830_check_composite_texture; + intel->uxa_driver->prepare_composite = i830_prepare_composite; + intel->uxa_driver->composite = i830_composite; + intel->uxa_driver->done_composite = i830_done_composite; + + intel->vertex_flush = i830_vertex_flush; + intel->batch_commit_notify = i830_batch_commit_notify; + } else if (IS_GEN3(intel)) { + intel->uxa_driver->check_composite = i915_check_composite; + intel->uxa_driver->check_composite_target = i915_check_composite_target; + intel->uxa_driver->check_composite_texture = i915_check_composite_texture; + intel->uxa_driver->prepare_composite = i915_prepare_composite; + intel->uxa_driver->composite = i915_composite; + intel->uxa_driver->done_composite = i830_done_composite; + + intel->vertex_flush = i915_vertex_flush; + intel->batch_commit_notify = i915_batch_commit_notify; + } else { + intel->uxa_driver->check_composite = i965_check_composite; + intel->uxa_driver->check_composite_texture = i965_check_composite_texture; + intel->uxa_driver->prepare_composite = i965_prepare_composite; + intel->uxa_driver->composite = i965_composite; + intel->uxa_driver->done_composite = i830_done_composite; + + intel->vertex_flush = i965_vertex_flush; + intel->batch_flush = i965_batch_flush; + intel->batch_commit_notify = i965_batch_commit_notify; + + if (IS_GEN4(intel)) { + intel->context_switch = gen4_context_switch; + } else if (IS_GEN5(intel)) { + intel->context_switch = gen5_context_switch; + } else { + intel->context_switch = gen6_context_switch; + } + } + + /* PutImage */ + intel->uxa_driver->put_image = intel_uxa_put_image; + intel->uxa_driver->get_image = intel_uxa_get_image; + + intel->uxa_driver->prepare_access = intel_uxa_prepare_access; + intel->uxa_driver->finish_access = intel_uxa_finish_access; + intel->uxa_driver->pixmap_is_offscreen = intel_uxa_pixmap_is_offscreen; + + screen->CreatePixmap = intel_uxa_create_pixmap; + screen->DestroyPixmap = intel_uxa_destroy_pixmap; + +#ifdef CREATE_PIXMAP_USAGE_SHARED + screen->SharePixmapBacking = intel_uxa_share_pixmap_backing; + screen->SetSharedPixmapBacking = intel_uxa_set_shared_pixmap_backing; +#endif + + if (!uxa_driver_init(screen, intel->uxa_driver)) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "UXA initialization failed\n"); + free(intel->uxa_driver); + return FALSE; + } + + uxa_set_fallback_debug(screen, intel->fallback_debug); + uxa_set_force_fallback(screen, intel->force_fallback); + + return TRUE; +} diff --git a/src/uxa/intel_video.c b/src/uxa/intel_video.c new file mode 100644 index 00000000..c74b793b --- /dev/null +++ b/src/uxa/intel_video.c @@ -0,0 +1,1776 @@ +/*************************************************************************** + + Copyright 2000 Intel Corporation. All Rights Reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sub license, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial portions + of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + IN NO EVENT SHALL INTEL, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **************************************************************************/ + +/* + * i830_video.c: i830/i845 Xv driver. + * + * Copyright © 2002 by Alan Hourihane and David Dawes + * + * Authors: + * Alan Hourihane <alanh@tungstengraphics.com> + * David Dawes <dawes@xfree86.org> + * + * Derived from i810 Xv driver: + * + * Authors of i810 code: + * Jonathan Bian <jonathan.bian@intel.com> + * Offscreen Images: + * Matt Sottek <matthew.j.sottek@intel.com> + */ + +/* + * XXX Could support more formats. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <inttypes.h> +#include <math.h> +#include <string.h> +#include <assert.h> +#include <errno.h> + +#include "xf86.h" +#include "xf86_OSproc.h" +#include "compiler.h" +#include "xf86Pci.h" +#include "xf86fbman.h" +#include "xf86drm.h" +#include "regionstr.h" +#include "randrstr.h" +#include "windowstr.h" +#include "damage.h" +#include "intel.h" +#include "intel_video.h" +#include "i830_reg.h" +#include "xf86xv.h" +#include <X11/extensions/Xv.h> +#include "dixstruct.h" +#include "fourcc.h" + +#ifdef INTEL_XVMC +#define _INTEL_XVMC_SERVER_ +#include "intel_xvmc.h" +#endif + +#define OFF_DELAY 250 /* milliseconds */ + +#define OFF_TIMER 0x01 +#define CLIENT_VIDEO_ON 0x02 + +static XF86VideoAdaptorPtr I830SetupImageVideoOverlay(ScreenPtr); +static XF86VideoAdaptorPtr I830SetupImageVideoTextured(ScreenPtr); +static void I830StopVideo(ScrnInfoPtr, pointer, Bool); +static int I830SetPortAttributeOverlay(ScrnInfoPtr, Atom, INT32, pointer); +static int I830SetPortAttributeTextured(ScrnInfoPtr, Atom, INT32, pointer); +static int I830GetPortAttribute(ScrnInfoPtr, Atom, INT32 *, pointer); +static void I830QueryBestSize(ScrnInfoPtr, Bool, + short, short, short, short, unsigned int *, + unsigned int *, pointer); +static int I830PutImageTextured(ScrnInfoPtr, short, short, short, short, short, short, + short, short, int, unsigned char *, short, short, + Bool, RegionPtr, pointer, DrawablePtr); +static int I830PutImageOverlay(ScrnInfoPtr, short, short, short, short, short, short, + short, short, int, unsigned char *, short, short, + Bool, RegionPtr, pointer, DrawablePtr); +static int I830QueryImageAttributes(ScrnInfoPtr, int, unsigned short *, + unsigned short *, int *, int *); + +#define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE) + +static Atom xvBrightness, xvContrast, xvSaturation, xvColorKey, xvPipe; +static Atom xvGamma0, xvGamma1, xvGamma2, xvGamma3, xvGamma4, xvGamma5; +static Atom xvSyncToVblank; + +/* Limits for the overlay/textured video source sizes. The documented hardware + * limits are 2048x2048 or better for overlay and both of our textured video + * implementations. Additionally, on the 830 and 845, larger sizes resulted in + * the card hanging, so we keep the limits lower there. + */ +#define IMAGE_MAX_WIDTH 2048 +#define IMAGE_MAX_HEIGHT 2048 +#define IMAGE_MAX_WIDTH_LEGACY 1024 +#define IMAGE_MAX_HEIGHT_LEGACY 1088 + +/* overlay debugging printf function */ +#if 0 +#define OVERLAY_DEBUG ErrorF +#else +#define OVERLAY_DEBUG if (0) ErrorF +#endif + +/* client libraries expect an encoding */ +static const XF86VideoEncodingRec DummyEncoding[1] = { + { + 0, + "XV_IMAGE", + IMAGE_MAX_WIDTH, IMAGE_MAX_HEIGHT, + {1, 1} + } +}; + +#define NUM_FORMATS 3 + +static XF86VideoFormatRec Formats[NUM_FORMATS] = { + {15, TrueColor}, {16, TrueColor}, {24, TrueColor} +}; + +#define NUM_ATTRIBUTES 5 +static XF86AttributeRec Attributes[NUM_ATTRIBUTES] = { + {XvSettable | XvGettable, 0, (1 << 24) - 1, "XV_COLORKEY"}, + {XvSettable | XvGettable, -128, 127, "XV_BRIGHTNESS"}, + {XvSettable | XvGettable, 0, 255, "XV_CONTRAST"}, + {XvSettable | XvGettable, 0, 1023, "XV_SATURATION"}, + {XvSettable | XvGettable, -1, 1, "XV_PIPE"} +}; + +#define GAMMA_ATTRIBUTES 6 +static XF86AttributeRec GammaAttributes[GAMMA_ATTRIBUTES] = { + {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA0"}, + {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA1"}, + {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA2"}, + {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA3"}, + {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA4"}, + {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA5"} +}; + +#ifdef INTEL_XVMC +#define NUM_IMAGES 5 +#define XVMC_IMAGE 1 +#else +#define NUM_IMAGES 4 +#define XVMC_IMAGE 0 +#endif + +static XF86ImageRec Images[NUM_IMAGES] = { + XVIMAGE_YUY2, + XVIMAGE_YV12, + XVIMAGE_I420, + XVIMAGE_UYVY, +#ifdef INTEL_XVMC + { + /* + * Below, a dummy picture type that is used in XvPutImage only to do + * an overlay update. Introduced for the XvMC client lib. + * Defined to have a zero data size. + */ + FOURCC_XVMC, + XvYUV, + LSBFirst, + {'X', 'V', 'M', 'C', + 0x00, 0x00, 0x00, 0x10, 0x80, 0x00, 0x00, 0xAA, 0x00, + 0x38, 0x9B, 0x71}, + 12, + XvPlanar, + 3, + 0, 0, 0, 0, + 8, 8, 8, + 1, 2, 2, + 1, 2, 2, + {'Y', 'V', 'U', + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + XvTopToBottom}, +#endif +}; + +/* kernel modesetting overlay functions */ +static Bool intel_has_overlay(intel_screen_private *intel) +{ + struct drm_i915_getparam gp; + int has_overlay = 0; + int ret; + + gp.param = I915_PARAM_HAS_OVERLAY; + gp.value = &has_overlay; + ret = drmCommandWriteRead(intel->drmSubFD, DRM_I915_GETPARAM, &gp, sizeof(gp)); + + return ret == 0 && !! has_overlay; +} + +static Bool intel_overlay_update_attrs(intel_screen_private *intel) +{ + intel_adaptor_private *adaptor_priv = intel_get_adaptor_private(intel); + struct drm_intel_overlay_attrs attrs; + + attrs.flags = I915_OVERLAY_UPDATE_ATTRS; + attrs.brightness = adaptor_priv->brightness; + attrs.contrast = adaptor_priv->contrast; + attrs.saturation = adaptor_priv->saturation; + attrs.color_key = adaptor_priv->colorKey; + attrs.gamma0 = adaptor_priv->gamma0; + attrs.gamma1 = adaptor_priv->gamma1; + attrs.gamma2 = adaptor_priv->gamma2; + attrs.gamma3 = adaptor_priv->gamma3; + attrs.gamma4 = adaptor_priv->gamma4; + attrs.gamma5 = adaptor_priv->gamma5; + + return drmCommandWriteRead(intel->drmSubFD, DRM_I915_OVERLAY_ATTRS, + &attrs, sizeof(attrs)) == 0; +} + +static void intel_overlay_off(intel_screen_private *intel) +{ + struct drm_intel_overlay_put_image request; + int ret; + + request.flags = 0; + + ret = drmCommandWrite(intel->drmSubFD, DRM_I915_OVERLAY_PUT_IMAGE, + &request, sizeof(request)); + (void) ret; +} + +static Bool +intel_overlay_put_image(intel_screen_private *intel, + xf86CrtcPtr crtc, + int id, short width, short height, + int dstPitch, int dstPitch2, + BoxPtr dstBox, short src_w, short src_h, short drw_w, + short drw_h) +{ + intel_adaptor_private *adaptor_priv = intel_get_adaptor_private(intel); + struct drm_intel_overlay_put_image request; + int ret; + int planar = is_planar_fourcc(id); + float scale; + dri_bo *tmp; + + request.flags = I915_OVERLAY_ENABLE; + + request.bo_handle = adaptor_priv->buf->handle; + if (planar) { + request.stride_Y = dstPitch2; + request.stride_UV = dstPitch; + } else { + request.stride_Y = dstPitch; + request.stride_UV = 0; + } + request.offset_Y = adaptor_priv->YBufOffset; + request.offset_U = adaptor_priv->UBufOffset; + request.offset_V = adaptor_priv->VBufOffset; + OVERLAY_DEBUG("off_Y: %i, off_U: %i, off_V: %i\n", request.offset_Y, + request.offset_U, request.offset_V); + + request.crtc_id = intel_crtc_id(crtc); + request.dst_x = dstBox->x1; + request.dst_y = dstBox->y1; + request.dst_width = dstBox->x2 - dstBox->x1; + request.dst_height = dstBox->y2 - dstBox->y1; + + request.src_width = width; + request.src_height = height; + /* adjust src dimensions */ + if (request.dst_height > 1) { + scale = ((float)request.dst_height - 1) / ((float)drw_h - 1); + request.src_scan_height = src_h * scale; + } else + request.src_scan_height = 1; + + if (request.dst_width > 1) { + scale = ((float)request.dst_width - 1) / ((float)drw_w - 1); + request.src_scan_width = src_w * scale; + } else + request.src_scan_width = 1; + + if (planar) { + request.flags |= I915_OVERLAY_YUV_PLANAR | I915_OVERLAY_YUV420; + } else { + request.flags |= I915_OVERLAY_YUV_PACKED | I915_OVERLAY_YUV422; + if (id == FOURCC_UYVY) + request.flags |= I915_OVERLAY_Y_SWAP; + } + + ret = drmCommandWrite(intel->drmSubFD, DRM_I915_OVERLAY_PUT_IMAGE, + &request, sizeof(request)); + if (ret) + return FALSE; + + if (!adaptor_priv->reusable) { + drm_intel_bo_unreference(adaptor_priv->buf); + adaptor_priv->buf = NULL; + adaptor_priv->reusable = TRUE; + } + + tmp = adaptor_priv->old_buf[1]; + adaptor_priv->old_buf[1] = adaptor_priv->old_buf[0]; + adaptor_priv->old_buf[0] = adaptor_priv->buf; + adaptor_priv->buf = tmp; + + return TRUE; +} + +void I830InitVideo(ScreenPtr screen) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + XF86VideoAdaptorPtr *adaptors = NULL, *newAdaptors = NULL; + XF86VideoAdaptorPtr overlayAdaptor = NULL, texturedAdaptor = NULL; + int num_adaptors = xf86XVListGenericAdaptors(scrn, &adaptors); + /* Give our adaptor list enough space for the overlay and/or texture video + * adaptors. + */ + newAdaptors = realloc(adaptors, + (num_adaptors + 2) * sizeof(XF86VideoAdaptorPtr)); + if (newAdaptors == NULL) { + free(adaptors); + return; + } + adaptors = newAdaptors; + + /* Add the adaptors supported by our hardware. First, set up the atoms + * that will be used by both output adaptors. + */ + xvBrightness = MAKE_ATOM("XV_BRIGHTNESS"); + xvContrast = MAKE_ATOM("XV_CONTRAST"); + + /* Set up textured video if we can do it at this depth and we are on + * supported hardware. + */ + if (scrn->bitsPerPixel >= 16 && + INTEL_INFO(intel)->gen >= 030) { + texturedAdaptor = I830SetupImageVideoTextured(screen); + if (texturedAdaptor != NULL) { + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "Set up textured video\n"); + } else { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Failed to set up textured video\n"); + } + } + + /* Set up overlay video if it is available */ + intel->use_overlay = intel_has_overlay(intel); + if (intel->use_overlay) { + overlayAdaptor = I830SetupImageVideoOverlay(screen); + if (overlayAdaptor != NULL) { + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "Set up overlay video\n"); + } else { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Failed to set up overlay video\n"); + } + } + + if (overlayAdaptor && intel->XvPreferOverlay) + adaptors[num_adaptors++] = overlayAdaptor; + + if (texturedAdaptor) + adaptors[num_adaptors++] = texturedAdaptor; + + if (overlayAdaptor && !intel->XvPreferOverlay) + adaptors[num_adaptors++] = overlayAdaptor; + + if (num_adaptors) { + xf86XVScreenInit(screen, adaptors, num_adaptors); + } else { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "Disabling Xv because no adaptors could be initialized.\n"); + intel->XvEnabled = FALSE; + } + +#ifdef INTEL_XVMC + if (texturedAdaptor) + intel_xvmc_adaptor_init(screen); +#endif + free(adaptors); +} + +static XF86VideoAdaptorPtr I830SetupImageVideoOverlay(ScreenPtr screen) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + XF86VideoAdaptorPtr adapt; + intel_adaptor_private *adaptor_priv; + XF86AttributePtr att; + + OVERLAY_DEBUG("I830SetupImageVideoOverlay\n"); + + if (!(adapt = calloc(1, + sizeof(XF86VideoAdaptorRec) + + sizeof(intel_adaptor_private) + + sizeof(DevUnion)))) + return NULL; + + adapt->type = XvWindowMask | XvInputMask | XvImageMask; + adapt->flags = VIDEO_OVERLAID_IMAGES /*| VIDEO_CLIP_TO_VIEWPORT */ ; + adapt->name = "Intel(R) Video Overlay"; + adapt->nEncodings = 1; + adapt->pEncodings = xnfalloc(sizeof(DummyEncoding)); + memcpy(adapt->pEncodings, DummyEncoding, sizeof(DummyEncoding)); + if (IS_845G(intel) || IS_I830(intel)) { + adapt->pEncodings->width = IMAGE_MAX_WIDTH_LEGACY; + adapt->pEncodings->height = IMAGE_MAX_HEIGHT_LEGACY; + } + adapt->nFormats = NUM_FORMATS; + adapt->pFormats = Formats; + adapt->nPorts = 1; + adapt->pPortPrivates = (DevUnion *) (&adapt[1]); + + adaptor_priv = (intel_adaptor_private *)&adapt->pPortPrivates[1]; + + adapt->pPortPrivates[0].ptr = (pointer) (adaptor_priv); + adapt->nAttributes = NUM_ATTRIBUTES; + if (INTEL_INFO(intel)->gen >= 030) + adapt->nAttributes += GAMMA_ATTRIBUTES; /* has gamma */ + adapt->pAttributes = + xnfalloc(sizeof(XF86AttributeRec) * adapt->nAttributes); + /* Now copy the attributes */ + att = adapt->pAttributes; + memcpy((char *)att, (char *)Attributes, + sizeof(XF86AttributeRec) * NUM_ATTRIBUTES); + att += NUM_ATTRIBUTES; + if (INTEL_INFO(intel)->gen >= 030) { + memcpy((char *)att, (char *)GammaAttributes, + sizeof(XF86AttributeRec) * GAMMA_ATTRIBUTES); + } + adapt->nImages = NUM_IMAGES - XVMC_IMAGE; + + adapt->pImages = Images; + adapt->PutVideo = NULL; + adapt->PutStill = NULL; + adapt->GetVideo = NULL; + adapt->GetStill = NULL; + adapt->StopVideo = I830StopVideo; + adapt->SetPortAttribute = I830SetPortAttributeOverlay; + adapt->GetPortAttribute = I830GetPortAttribute; + adapt->QueryBestSize = I830QueryBestSize; + adapt->PutImage = I830PutImageOverlay; + adapt->QueryImageAttributes = I830QueryImageAttributes; + + adaptor_priv->textured = FALSE; + adaptor_priv->colorKey = intel->colorKey & ((1 << scrn->depth) - 1); + adaptor_priv->videoStatus = 0; + adaptor_priv->brightness = -19; /* (255/219) * -16 */ + adaptor_priv->contrast = 75; /* 255/219 * 64 */ + adaptor_priv->saturation = 146; /* 128/112 * 128 */ + adaptor_priv->desired_crtc = NULL; + adaptor_priv->buf = NULL; + adaptor_priv->old_buf[0] = NULL; + adaptor_priv->old_buf[1] = NULL; + adaptor_priv->gamma5 = 0xc0c0c0; + adaptor_priv->gamma4 = 0x808080; + adaptor_priv->gamma3 = 0x404040; + adaptor_priv->gamma2 = 0x202020; + adaptor_priv->gamma1 = 0x101010; + adaptor_priv->gamma0 = 0x080808; + + adaptor_priv->rotation = RR_Rotate_0; + + /* gotta uninit this someplace */ + REGION_NULL(screen, &adaptor_priv->clip); + + intel->adaptor = adapt; + + xvColorKey = MAKE_ATOM("XV_COLORKEY"); + xvBrightness = MAKE_ATOM("XV_BRIGHTNESS"); + xvContrast = MAKE_ATOM("XV_CONTRAST"); + xvSaturation = MAKE_ATOM("XV_SATURATION"); + + /* Allow the pipe to be switched from pipe A to B when in clone mode */ + xvPipe = MAKE_ATOM("XV_PIPE"); + + if (INTEL_INFO(intel)->gen >= 030) { + xvGamma0 = MAKE_ATOM("XV_GAMMA0"); + xvGamma1 = MAKE_ATOM("XV_GAMMA1"); + xvGamma2 = MAKE_ATOM("XV_GAMMA2"); + xvGamma3 = MAKE_ATOM("XV_GAMMA3"); + xvGamma4 = MAKE_ATOM("XV_GAMMA4"); + xvGamma5 = MAKE_ATOM("XV_GAMMA5"); + } + + intel_overlay_update_attrs(intel); + + return adapt; +} + +static XF86VideoAdaptorPtr I830SetupImageVideoTextured(ScreenPtr screen) +{ + ScrnInfoPtr scrn = xf86ScreenToScrn(screen); + intel_screen_private *intel = intel_get_screen_private(scrn); + XF86VideoAdaptorPtr adapt; + intel_adaptor_private *adaptor_privs; + DevUnion *devUnions; + int nports = 16, i; + + OVERLAY_DEBUG("I830SetupImageVideoOverlay\n"); + + adapt = calloc(1, sizeof(XF86VideoAdaptorRec)); + adaptor_privs = calloc(nports, sizeof(intel_adaptor_private)); + devUnions = calloc(nports, sizeof(DevUnion)); + if (adapt == NULL || adaptor_privs == NULL || devUnions == NULL) { + free(adapt); + free(adaptor_privs); + free(devUnions); + return NULL; + } + + adapt->type = XvWindowMask | XvInputMask | XvImageMask; + adapt->flags = 0; + adapt->name = "Intel(R) Textured Video"; + adapt->nEncodings = 1; + adapt->pEncodings = xnfalloc(sizeof(DummyEncoding)); + memcpy(adapt->pEncodings, DummyEncoding, sizeof(DummyEncoding)); + adapt->nFormats = NUM_FORMATS; + adapt->pFormats = Formats; + adapt->nPorts = nports; + adapt->pPortPrivates = devUnions; + adapt->nAttributes = 0; + adapt->pAttributes = NULL; + if (IS_I915G(intel) || IS_I915GM(intel)) + adapt->nImages = NUM_IMAGES - XVMC_IMAGE; + else + adapt->nImages = NUM_IMAGES; + + adapt->pImages = Images; + adapt->PutVideo = NULL; + adapt->PutStill = NULL; + adapt->GetVideo = NULL; + adapt->GetStill = NULL; + adapt->StopVideo = I830StopVideo; + adapt->SetPortAttribute = I830SetPortAttributeTextured; + adapt->GetPortAttribute = I830GetPortAttribute; + adapt->QueryBestSize = I830QueryBestSize; + adapt->PutImage = I830PutImageTextured; + adapt->QueryImageAttributes = I830QueryImageAttributes; + + for (i = 0; i < nports; i++) { + intel_adaptor_private *adaptor_priv = &adaptor_privs[i]; + + adaptor_priv->textured = TRUE; + adaptor_priv->videoStatus = 0; + adaptor_priv->buf = NULL; + adaptor_priv->old_buf[0] = NULL; + adaptor_priv->old_buf[1] = NULL; + + adaptor_priv->rotation = RR_Rotate_0; + adaptor_priv->SyncToVblank = 1; + + /* gotta uninit this someplace, XXX: shouldn't be necessary for textured */ + REGION_NULL(screen, &adaptor_priv->clip); + + adapt->pPortPrivates[i].ptr = (pointer) (adaptor_priv); + } + + xvSyncToVblank = MAKE_ATOM("XV_SYNC_TO_VBLANK"); + + return adapt; +} + +static void intel_free_video_buffers(intel_adaptor_private *adaptor_priv) +{ + int i; + + for (i = 0; i < 2; i++) { + if (adaptor_priv->old_buf[i]) { + drm_intel_bo_disable_reuse(adaptor_priv->old_buf[i]); + drm_intel_bo_unreference(adaptor_priv->old_buf[i]); + adaptor_priv->old_buf[i] = NULL; + } + } + + if (adaptor_priv->buf) { + drm_intel_bo_unreference(adaptor_priv->buf); + adaptor_priv->buf = NULL; + } +} + +static void I830StopVideo(ScrnInfoPtr scrn, pointer data, Bool shutdown) +{ + intel_adaptor_private *adaptor_priv = (intel_adaptor_private *) data; + + if (adaptor_priv->textured) + return; + + OVERLAY_DEBUG("I830StopVideo\n"); + + REGION_EMPTY(scrn->pScreen, &adaptor_priv->clip); + + if (shutdown) { + if (adaptor_priv->videoStatus & CLIENT_VIDEO_ON) + intel_overlay_off(intel_get_screen_private(scrn)); + + intel_free_video_buffers(adaptor_priv); + adaptor_priv->videoStatus = 0; + } else { + if (adaptor_priv->videoStatus & CLIENT_VIDEO_ON) { + adaptor_priv->videoStatus |= OFF_TIMER; + adaptor_priv->offTime = currentTime.milliseconds + OFF_DELAY; + } + } + +} + +static int +I830SetPortAttributeTextured(ScrnInfoPtr scrn, + Atom attribute, INT32 value, pointer data) +{ + intel_adaptor_private *adaptor_priv = (intel_adaptor_private *) data; + + if (attribute == xvBrightness) { + if ((value < -128) || (value > 127)) + return BadValue; + adaptor_priv->brightness = value; + return Success; + } else if (attribute == xvContrast) { + if ((value < 0) || (value > 255)) + return BadValue; + adaptor_priv->contrast = value; + return Success; + } else if (attribute == xvSyncToVblank) { + if ((value < -1) || (value > 1)) + return BadValue; + adaptor_priv->SyncToVblank = value; + return Success; + } else { + return BadMatch; + } +} + +static int +I830SetPortAttributeOverlay(ScrnInfoPtr scrn, + Atom attribute, INT32 value, pointer data) +{ + intel_adaptor_private *adaptor_priv = (intel_adaptor_private *) data; + intel_screen_private *intel = intel_get_screen_private(scrn); + + if (attribute == xvBrightness) { + if ((value < -128) || (value > 127)) + return BadValue; + adaptor_priv->brightness = value; + OVERLAY_DEBUG("BRIGHTNESS\n"); + } else if (attribute == xvContrast) { + if ((value < 0) || (value > 255)) + return BadValue; + adaptor_priv->contrast = value; + OVERLAY_DEBUG("CONTRAST\n"); + } else if (attribute == xvSaturation) { + if ((value < 0) || (value > 1023)) + return BadValue; + adaptor_priv->saturation = value; + } else if (attribute == xvPipe) { + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); + if ((value < -1) || (value >= xf86_config->num_crtc)) + return BadValue; + if (value < 0) + adaptor_priv->desired_crtc = NULL; + else + adaptor_priv->desired_crtc = xf86_config->crtc[value]; + } else if (attribute == xvGamma0 && (INTEL_INFO(intel)->gen >= 030)) { + adaptor_priv->gamma0 = value; + } else if (attribute == xvGamma1 && (INTEL_INFO(intel)->gen >= 030)) { + adaptor_priv->gamma1 = value; + } else if (attribute == xvGamma2 && (INTEL_INFO(intel)->gen >= 030)) { + adaptor_priv->gamma2 = value; + } else if (attribute == xvGamma3 && (INTEL_INFO(intel)->gen >= 030)) { + adaptor_priv->gamma3 = value; + } else if (attribute == xvGamma4 && (INTEL_INFO(intel)->gen >= 030)) { + adaptor_priv->gamma4 = value; + } else if (attribute == xvGamma5 && (INTEL_INFO(intel)->gen >= 030)) { + adaptor_priv->gamma5 = value; + } else if (attribute == xvColorKey) { + adaptor_priv->colorKey = value; + OVERLAY_DEBUG("COLORKEY\n"); + } else + return BadMatch; + + if ((attribute == xvGamma0 || + attribute == xvGamma1 || + attribute == xvGamma2 || + attribute == xvGamma3 || + attribute == xvGamma4 || + attribute == xvGamma5) && (INTEL_INFO(intel)->gen >= 030)) { + OVERLAY_DEBUG("GAMMA\n"); + } + + if (!intel_overlay_update_attrs(intel)) + return BadValue; + + if (attribute == xvColorKey) + REGION_EMPTY(scrn->pScreen, &adaptor_priv->clip); + + return Success; +} + +static int +I830GetPortAttribute(ScrnInfoPtr scrn, + Atom attribute, INT32 * value, pointer data) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + intel_adaptor_private *adaptor_priv = (intel_adaptor_private *) data; + + if (attribute == xvBrightness) { + *value = adaptor_priv->brightness; + } else if (attribute == xvContrast) { + *value = adaptor_priv->contrast; + } else if (attribute == xvSaturation) { + *value = adaptor_priv->saturation; + } else if (attribute == xvPipe) { + int c; + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); + for (c = 0; c < xf86_config->num_crtc; c++) + if (xf86_config->crtc[c] == adaptor_priv->desired_crtc) + break; + if (c == xf86_config->num_crtc) + c = -1; + *value = c; + } else if (attribute == xvGamma0 && (INTEL_INFO(intel)->gen >= 030)) { + *value = adaptor_priv->gamma0; + } else if (attribute == xvGamma1 && (INTEL_INFO(intel)->gen >= 030)) { + *value = adaptor_priv->gamma1; + } else if (attribute == xvGamma2 && (INTEL_INFO(intel)->gen >= 030)) { + *value = adaptor_priv->gamma2; + } else if (attribute == xvGamma3 && (INTEL_INFO(intel)->gen >= 030)) { + *value = adaptor_priv->gamma3; + } else if (attribute == xvGamma4 && (INTEL_INFO(intel)->gen >= 030)) { + *value = adaptor_priv->gamma4; + } else if (attribute == xvGamma5 && (INTEL_INFO(intel)->gen >= 030)) { + *value = adaptor_priv->gamma5; + } else if (attribute == xvColorKey) { + *value = adaptor_priv->colorKey; + } else if (attribute == xvSyncToVblank) { + *value = adaptor_priv->SyncToVblank; + } else + return BadMatch; + + return Success; +} + +static void +I830QueryBestSize(ScrnInfoPtr scrn, + Bool motion, + short vid_w, short vid_h, + short drw_w, short drw_h, + unsigned int *p_w, unsigned int *p_h, pointer data) +{ + if (vid_w > (drw_w << 1)) + drw_w = vid_w >> 1; + if (vid_h > (drw_h << 1)) + drw_h = vid_h >> 1; + + *p_w = drw_w; + *p_h = drw_h; +} + +static Bool +I830CopyPackedData(intel_adaptor_private *adaptor_priv, + unsigned char *buf, + int srcPitch, int dstPitch, int top, int left, int h, int w) +{ + unsigned char *src, *dst, *dst_base; + int i, j; + unsigned char *s; + +#if 0 + ErrorF("I830CopyPackedData: (%d,%d) (%d,%d)\n" + "srcPitch: %d, dstPitch: %d\n", top, left, h, w, + srcPitch, dstPitch); +#endif + + src = buf + (top * srcPitch) + (left << 1); + + if (drm_intel_gem_bo_map_gtt(adaptor_priv->buf)) + return FALSE; + + dst_base = adaptor_priv->buf->virtual; + + dst = dst_base + adaptor_priv->YBufOffset; + + switch (adaptor_priv->rotation) { + case RR_Rotate_0: + w <<= 1; + for (i = 0; i < h; i++) { + memcpy(dst, src, w); + src += srcPitch; + dst += dstPitch; + } + break; + case RR_Rotate_90: + h <<= 1; + for (i = 0; i < h; i += 2) { + s = src; + for (j = 0; j < w; j++) { + /* Copy Y */ + dst[(i + 0) + ((w - j - 1) * dstPitch)] = *s++; + (void)*s++; + } + src += srcPitch; + } + h >>= 1; + src = buf + (top * srcPitch) + (left << 1); + for (i = 0; i < h; i += 2) { + for (j = 0; j < w; j += 2) { + /* Copy U */ + dst[((i * 2) + 1) + ((w - j - 1) * dstPitch)] = + src[(j * 2) + 1 + (i * srcPitch)]; + dst[((i * 2) + 1) + ((w - j - 2) * dstPitch)] = + src[(j * 2) + 1 + ((i + 1) * srcPitch)]; + /* Copy V */ + dst[((i * 2) + 3) + ((w - j - 1) * dstPitch)] = + src[(j * 2) + 3 + (i * srcPitch)]; + dst[((i * 2) + 3) + ((w - j - 2) * dstPitch)] = + src[(j * 2) + 3 + ((i + 1) * srcPitch)]; + } + } + break; + case RR_Rotate_180: + w <<= 1; + for (i = 0; i < h; i++) { + s = src; + for (j = 0; j < w; j += 4) { + dst[(w - j - 4) + ((h - i - 1) * dstPitch)] = + *s++; + dst[(w - j - 3) + ((h - i - 1) * dstPitch)] = + *s++; + dst[(w - j - 2) + ((h - i - 1) * dstPitch)] = + *s++; + dst[(w - j - 1) + ((h - i - 1) * dstPitch)] = + *s++; + } + src += srcPitch; + } + break; + case RR_Rotate_270: + h <<= 1; + for (i = 0; i < h; i += 2) { + s = src; + for (j = 0; j < w; j++) { + /* Copy Y */ + dst[(h - i - 2) + (j * dstPitch)] = *s++; + (void)*s++; + } + src += srcPitch; + } + h >>= 1; + src = buf + (top * srcPitch) + (left << 1); + for (i = 0; i < h; i += 2) { + for (j = 0; j < w; j += 2) { + /* Copy U */ + dst[(((h - i) * 2) - 3) + (j * dstPitch)] = + src[(j * 2) + 1 + (i * srcPitch)]; + dst[(((h - i) * 2) - 3) + + ((j + 1) * dstPitch)] = + src[(j * 2) + 1 + ((i + 1) * srcPitch)]; + /* Copy V */ + dst[(((h - i) * 2) - 1) + (j * dstPitch)] = + src[(j * 2) + 3 + (i * srcPitch)]; + dst[(((h - i) * 2) - 1) + + ((j + 1) * dstPitch)] = + src[(j * 2) + 3 + ((i + 1) * srcPitch)]; + } + } + break; + } + + drm_intel_gem_bo_unmap_gtt(adaptor_priv->buf); + return TRUE; +} + +static void intel_memcpy_plane(unsigned char *dst, unsigned char *src, + int height, int width, + int dstPitch, int srcPitch, Rotation rotation) +{ + int i, j = 0; + unsigned char *s; + + switch (rotation) { + case RR_Rotate_0: + /* optimise for the case of no clipping */ + if (srcPitch == dstPitch && srcPitch == width) + memcpy(dst, src, srcPitch * height); + else + for (i = 0; i < height; i++) { + memcpy(dst, src, width); + src += srcPitch; + dst += dstPitch; + } + break; + case RR_Rotate_90: + for (i = 0; i < height; i++) { + s = src; + for (j = 0; j < width; j++) { + dst[(i) + ((width - j - 1) * dstPitch)] = *s++; + } + src += srcPitch; + } + break; + case RR_Rotate_180: + for (i = 0; i < height; i++) { + s = src; + for (j = 0; j < width; j++) { + dst[(width - j - 1) + + ((height - i - 1) * dstPitch)] = *s++; + } + src += srcPitch; + } + break; + case RR_Rotate_270: + for (i = 0; i < height; i++) { + s = src; + for (j = 0; j < width; j++) { + dst[(height - i - 1) + (j * dstPitch)] = *s++; + } + src += srcPitch; + } + break; + } +} + +static Bool +I830CopyPlanarData(intel_adaptor_private *adaptor_priv, + unsigned char *buf, int srcPitch, int srcPitch2, + int dstPitch, int dstPitch2, + int srcH, int top, int left, + int h, int w, int id) +{ + unsigned char *src1, *src2, *src3, *dst_base, *dst1, *dst2, *dst3; + +#if 0 + ErrorF("I830CopyPlanarData: srcPitch %d, srcPitch %d, dstPitch %d\n" + "nlines %d, npixels %d, top %d, left %d\n", + srcPitch, srcPitch2, dstPitch, h, w, top, left); +#endif + + /* Copy Y data */ + src1 = buf + (top * srcPitch) + left; +#if 0 + ErrorF("src1 is %p, offset is %ld\n", src1, + (unsigned long)src1 - (unsigned long)buf); +#endif + + if (drm_intel_gem_bo_map_gtt(adaptor_priv->buf)) + return FALSE; + + dst_base = adaptor_priv->buf->virtual; + + dst1 = dst_base + adaptor_priv->YBufOffset; + + intel_memcpy_plane(dst1, src1, h, w, dstPitch2, srcPitch, + adaptor_priv->rotation); + + /* Copy V data for YV12, or U data for I420 */ + src2 = buf + /* start of YUV data */ + (srcH * srcPitch) + /* move over Luma plane */ + ((top >> 1) * srcPitch2) + /* move down from by top lines */ + (left >> 1); /* move left by left pixels */ + +#if 0 + ErrorF("src2 is %p, offset is %ld\n", src2, + (unsigned long)src2 - (unsigned long)buf); +#endif + if (id == FOURCC_I420) + dst2 = dst_base + adaptor_priv->UBufOffset; + else + dst2 = dst_base + adaptor_priv->VBufOffset; + + intel_memcpy_plane(dst2, src2, h / 2, w / 2, + dstPitch, srcPitch2, adaptor_priv->rotation); + + /* Copy U data for YV12, or V data for I420 */ + src3 = buf + /* start of YUV data */ + (srcH * srcPitch) + /* move over Luma plane */ + ((srcH >> 1) * srcPitch2) + /* move over Chroma plane */ + ((top >> 1) * srcPitch2) + /* move down from by top lines */ + (left >> 1); /* move left by left pixels */ +#if 0 + ErrorF("src3 is %p, offset is %ld\n", src3, + (unsigned long)src3 - (unsigned long)buf); +#endif + if (id == FOURCC_I420) + dst3 = dst_base + adaptor_priv->VBufOffset; + else + dst3 = dst_base + adaptor_priv->UBufOffset; + + intel_memcpy_plane(dst3, src3, h / 2, w / 2, + dstPitch, srcPitch2, adaptor_priv->rotation); + + drm_intel_gem_bo_unmap_gtt(adaptor_priv->buf); + return TRUE; +} + +static void intel_box_intersect(BoxPtr dest, BoxPtr a, BoxPtr b) +{ + dest->x1 = a->x1 > b->x1 ? a->x1 : b->x1; + dest->x2 = a->x2 < b->x2 ? a->x2 : b->x2; + dest->y1 = a->y1 > b->y1 ? a->y1 : b->y1; + dest->y2 = a->y2 < b->y2 ? a->y2 : b->y2; + if (dest->x1 >= dest->x2 || dest->y1 >= dest->y2) + dest->x1 = dest->x2 = dest->y1 = dest->y2 = 0; +} + +static void intel_crtc_box(xf86CrtcPtr crtc, BoxPtr crtc_box) +{ + if (crtc->enabled) { + crtc_box->x1 = crtc->x; + crtc_box->x2 = + crtc->x + xf86ModeWidth(&crtc->mode, crtc->rotation); + crtc_box->y1 = crtc->y; + crtc_box->y2 = + crtc->y + xf86ModeHeight(&crtc->mode, crtc->rotation); + } else + crtc_box->x1 = crtc_box->x2 = crtc_box->y1 = crtc_box->y2 = 0; +} + +static int intel_box_area(BoxPtr box) +{ + return (int)(box->x2 - box->x1) * (int)(box->y2 - box->y1); +} + +/* + * Return the crtc covering 'box'. If two crtcs cover a portion of + * 'box', then prefer 'desired'. If 'desired' is NULL, then prefer the crtc + * with greater coverage + */ + +xf86CrtcPtr +intel_covering_crtc(ScrnInfoPtr scrn, + BoxPtr box, xf86CrtcPtr desired, BoxPtr crtc_box_ret) +{ + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); + xf86CrtcPtr crtc, best_crtc; + int coverage, best_coverage; + int c; + BoxRec crtc_box, cover_box; + + best_crtc = NULL; + best_coverage = 0; + crtc_box_ret->x1 = 0; + crtc_box_ret->x2 = 0; + crtc_box_ret->y1 = 0; + crtc_box_ret->y2 = 0; + for (c = 0; c < xf86_config->num_crtc; c++) { + crtc = xf86_config->crtc[c]; + + /* If the CRTC is off, treat it as not covering */ + if (!intel_crtc_on(crtc)) + continue; + + intel_crtc_box(crtc, &crtc_box); + intel_box_intersect(&cover_box, &crtc_box, box); + coverage = intel_box_area(&cover_box); + if (coverage && crtc == desired) { + *crtc_box_ret = crtc_box; + return crtc; + } + if (coverage > best_coverage) { + *crtc_box_ret = crtc_box; + best_crtc = crtc; + best_coverage = coverage; + } + } + return best_crtc; +} + +static void +intel_update_dst_box_to_crtc_coords(ScrnInfoPtr scrn, xf86CrtcPtr crtc, + BoxPtr dstBox) +{ + int tmp; + + /* for overlay, we should take it from crtc's screen + * coordinate to current crtc's display mode. + * yeah, a bit confusing. + */ + switch (crtc->rotation & 0xf) { + case RR_Rotate_0: + dstBox->x1 -= crtc->x; + dstBox->x2 -= crtc->x; + dstBox->y1 -= crtc->y; + dstBox->y2 -= crtc->y; + break; + case RR_Rotate_90: + tmp = dstBox->x1; + dstBox->x1 = dstBox->y1 - crtc->x; + dstBox->y1 = scrn->virtualX - tmp - crtc->y; + tmp = dstBox->x2; + dstBox->x2 = dstBox->y2 - crtc->x; + dstBox->y2 = scrn->virtualX - tmp - crtc->y; + tmp = dstBox->y1; + dstBox->y1 = dstBox->y2; + dstBox->y2 = tmp; + break; + case RR_Rotate_180: + tmp = dstBox->x1; + dstBox->x1 = scrn->virtualX - dstBox->x2 - crtc->x; + dstBox->x2 = scrn->virtualX - tmp - crtc->x; + tmp = dstBox->y1; + dstBox->y1 = scrn->virtualY - dstBox->y2 - crtc->y; + dstBox->y2 = scrn->virtualY - tmp - crtc->y; + break; + case RR_Rotate_270: + tmp = dstBox->x1; + dstBox->x1 = scrn->virtualY - dstBox->y1 - crtc->x; + dstBox->y1 = tmp - crtc->y; + tmp = dstBox->x2; + dstBox->x2 = scrn->virtualY - dstBox->y2 - crtc->x; + dstBox->y2 = tmp - crtc->y; + tmp = dstBox->x1; + dstBox->x1 = dstBox->x2; + dstBox->x2 = tmp; + break; + } + + return; +} + +int is_planar_fourcc(int id) +{ + switch (id) { + case FOURCC_YV12: + case FOURCC_I420: +#ifdef INTEL_XVMC + case FOURCC_XVMC: +#endif + return 1; + case FOURCC_UYVY: + case FOURCC_YUY2: + return 0; + default: + ErrorF("Unknown format 0x%x\n", id); + return 0; + } +} + +static int xvmc_passthrough(int id) +{ +#ifdef INTEL_XVMC + return id == FOURCC_XVMC; +#else + return 0; +#endif +} + +static Bool +intel_display_overlay(ScrnInfoPtr scrn, xf86CrtcPtr crtc, + int id, short width, short height, + int dstPitch, int dstPitch2, + BoxPtr dstBox, short src_w, short src_h, short drw_w, + short drw_h) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + int tmp; + + OVERLAY_DEBUG("I830DisplayVideo: %dx%d (pitch %d)\n", width, height, + dstPitch); + + /* + * If the video isn't visible on any CRTC, turn it off + */ + if (!crtc) { + intel_overlay_off(intel); + return TRUE; + } + + intel_update_dst_box_to_crtc_coords(scrn, crtc, dstBox); + + if (crtc->rotation & (RR_Rotate_90 | RR_Rotate_270)) { + tmp = width; + width = height; + height = tmp; + tmp = drw_w; + drw_w = drw_h; + drw_h = tmp; + tmp = src_w; + src_w = src_h; + src_h = tmp; + } + + return intel_overlay_put_image(intel, crtc, id, + width, height, + dstPitch, dstPitch2, dstBox, + src_w, src_h, drw_w, drw_h); +} + +static Bool +intel_clip_video_helper(ScrnInfoPtr scrn, + intel_adaptor_private *adaptor_priv, + xf86CrtcPtr * crtc_ret, + BoxPtr dst, + short src_x, short src_y, + short drw_x, short drw_y, + short src_w, short src_h, + short drw_w, short drw_h, + int id, + int *top, int* left, int* npixels, int *nlines, + RegionPtr reg, INT32 width, INT32 height) +{ + Bool ret; + RegionRec crtc_region_local; + RegionPtr crtc_region = reg; + BoxRec crtc_box; + INT32 x1, x2, y1, y2; + xf86CrtcPtr crtc; + + x1 = src_x; + x2 = src_x + src_w; + y1 = src_y; + y2 = src_y + src_h; + + dst->x1 = drw_x; + dst->x2 = drw_x + drw_w; + dst->y1 = drw_y; + dst->y2 = drw_y + drw_h; + + /* + * For overlay video, compute the relevant CRTC and + * clip video to that + */ + crtc = intel_covering_crtc(scrn, dst, adaptor_priv->desired_crtc, + &crtc_box); + + /* For textured video, we don't actually want to clip at all. */ + if (crtc && !adaptor_priv->textured) { + REGION_INIT(screen, &crtc_region_local, &crtc_box, 1); + crtc_region = &crtc_region_local; + REGION_INTERSECT(screen, crtc_region, crtc_region, + reg); + } + *crtc_ret = crtc; + + ret = xf86XVClipVideoHelper(dst, &x1, &x2, &y1, &y2, + crtc_region, width, height); + if (crtc_region != reg) + REGION_UNINIT(screen, &crtc_region_local); + + *top = y1 >> 16; + *left = (x1 >> 16) & ~1; + *npixels = ALIGN(((x2 + 0xffff) >> 16), 2) - *left; + if (is_planar_fourcc(id)) { + *top &= ~1; + *nlines = ALIGN(((y2 + 0xffff) >> 16), 2) - *top; + } else + *nlines = ((y2 + 0xffff) >> 16) - *top; + + return ret; +} + +static void +intel_wait_for_scanline(ScrnInfoPtr scrn, PixmapPtr pixmap, + xf86CrtcPtr crtc, RegionPtr clipBoxes) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + pixman_box16_t box, crtc_box; + int pipe, event; + Bool full_height; + int y1, y2; + + pipe = -1; + if (scrn->vtSema && pixmap_is_scanout(pixmap)) + pipe = intel_crtc_to_pipe(crtc); + if (pipe < 0) + return; + + box = *REGION_EXTENTS(unused, clipBoxes); + + if (crtc->transform_in_use) + pixman_f_transform_bounds(&crtc->f_framebuffer_to_crtc, &box); + + /* We could presume the clip was correctly computed... */ + intel_crtc_box(crtc, &crtc_box); + intel_box_intersect(&box, &crtc_box, &box); + + /* + * Make sure we don't wait for a scanline that will + * never occur + */ + y1 = (crtc_box.y1 <= box.y1) ? box.y1 - crtc_box.y1 : 0; + y2 = (box.y2 <= crtc_box.y2) ? + box.y2 - crtc_box.y1 : crtc_box.y2 - crtc_box.y1; + if (y2 <= y1) + return; + + full_height = FALSE; + if (y1 == 0 && y2 == (crtc_box.y2 - crtc_box.y1)) + full_height = TRUE; + + /* + * Pre-965 doesn't have SVBLANK, so we need a bit + * of extra time for the blitter to start up and + * do its job for a full height blit + */ + if (full_height && INTEL_INFO(intel)->gen < 040) + y2 -= 2; + + if (pipe == 0) { + pipe = MI_LOAD_SCAN_LINES_DISPLAY_PIPEA; + event = MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW; + if (full_height && INTEL_INFO(intel)->gen >= 040) + event = MI_WAIT_FOR_PIPEA_SVBLANK; + } else { + pipe = MI_LOAD_SCAN_LINES_DISPLAY_PIPEB; + event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW; + if (full_height && INTEL_INFO(intel)->gen >= 040) + event = MI_WAIT_FOR_PIPEB_SVBLANK; + } + + if (crtc->mode.Flags & V_INTERLACE) { + /* DSL count field lines */ + y1 /= 2; + y2 /= 2; + } + + BEGIN_BATCH(5); + /* The documentation says that the LOAD_SCAN_LINES command + * always comes in pairs. Don't ask me why. */ + OUT_BATCH(MI_LOAD_SCAN_LINES_INCL | pipe); + OUT_BATCH((y1 << 16) | (y2-1)); + OUT_BATCH(MI_LOAD_SCAN_LINES_INCL | pipe); + OUT_BATCH((y1 << 16) | (y2-1)); + OUT_BATCH(MI_WAIT_FOR_EVENT | event); + ADVANCE_BATCH(); +} + +static Bool +intel_setup_video_buffer(ScrnInfoPtr scrn, intel_adaptor_private *adaptor_priv, + int alloc_size, int id, unsigned char *buf) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + + /* Free the current buffer if we're going to have to reallocate */ + if (adaptor_priv->buf && adaptor_priv->buf->size < alloc_size) + intel_free_video_buffers(adaptor_priv); + + if (adaptor_priv->buf == NULL) { + adaptor_priv->buf = drm_intel_bo_alloc(intel->bufmgr, "xv buffer", + alloc_size, 4096); + if (adaptor_priv->buf == NULL) + return FALSE; + + adaptor_priv->reusable = TRUE; + } + + return TRUE; +} + +static void +intel_setup_dst_params(ScrnInfoPtr scrn, intel_adaptor_private *adaptor_priv, short width, + short height, int *dstPitch, int *dstPitch2, int *size, + int id) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + int pitchAlign; + + /* Only needs to be DWORD-aligned for textured on i915, but overlay has + * stricter requirements. + */ + if (adaptor_priv->textured) { + pitchAlign = 4; + } else { + if (INTEL_INFO(intel)->gen >= 040) + /* Actually the alignment is 64 bytes, too. But the + * stride must be at least 512 bytes. Take the easy fix + * and align on 512 bytes unconditionally. */ + pitchAlign = 512; + else if (IS_I830(intel) || IS_845G(intel)) + /* Harsh, errata on these chipsets limit the stride to be + * a multiple of 256 bytes. + */ + pitchAlign = 256; + else + pitchAlign = 64; + } + +#if INTEL_XVMC + /* for i915 xvmc, hw requires 1kb aligned surfaces */ + if ((id == FOURCC_XVMC) && IS_GEN3(intel)) + pitchAlign = 1024; +#endif + + /* Determine the desired destination pitch (representing the chroma's pitch, + * in the planar case. + */ + if (is_planar_fourcc(id)) { + if (adaptor_priv->rotation & (RR_Rotate_90 | RR_Rotate_270)) { + *dstPitch = ALIGN((height / 2), pitchAlign); + *dstPitch2 = ALIGN(height, pitchAlign); + *size = *dstPitch * width * 3; + } else { + *dstPitch = ALIGN((width / 2), pitchAlign); + *dstPitch2 = ALIGN(width, pitchAlign); + *size = *dstPitch * height * 3; + } + } else { + if (adaptor_priv->rotation & (RR_Rotate_90 | RR_Rotate_270)) { + *dstPitch = ALIGN((height << 1), pitchAlign); + *size = *dstPitch * width; + } else { + *dstPitch = ALIGN((width << 1), pitchAlign); + *size = *dstPitch * height; + } + *dstPitch2 = 0; + } +#if 0 + ErrorF("srcPitch: %d, dstPitch: %d, size: %d\n", srcPitch, *dstPitch, + size); +#endif + + adaptor_priv->YBufOffset = 0; + + if (adaptor_priv->rotation & (RR_Rotate_90 | RR_Rotate_270)) { + adaptor_priv->UBufOffset = + adaptor_priv->YBufOffset + (*dstPitch2 * width); + adaptor_priv->VBufOffset = + adaptor_priv->UBufOffset + (*dstPitch * width / 2); + } else { + adaptor_priv->UBufOffset = + adaptor_priv->YBufOffset + (*dstPitch2 * height); + adaptor_priv->VBufOffset = + adaptor_priv->UBufOffset + (*dstPitch * height / 2); + } +} + +static Bool +intel_copy_video_data(ScrnInfoPtr scrn, intel_adaptor_private *adaptor_priv, + short width, short height, int *dstPitch, int *dstPitch2, + int top, int left, int npixels, int nlines, + int id, unsigned char *buf) +{ + int srcPitch = 0, srcPitch2 = 0; + int size; + + if (is_planar_fourcc(id)) { + srcPitch = ALIGN(width, 0x4); + srcPitch2 = ALIGN((width >> 1), 0x4); + } else { + srcPitch = width << 1; + } + + intel_setup_dst_params(scrn, adaptor_priv, width, height, dstPitch, + dstPitch2, &size, id); + + if (!intel_setup_video_buffer(scrn, adaptor_priv, size, id, buf)) + return FALSE; + + /* copy data */ + if (is_planar_fourcc(id)) { + return I830CopyPlanarData(adaptor_priv, buf, srcPitch, srcPitch2, + *dstPitch, *dstPitch2, + height, top, left, nlines, + npixels, id); + } else { + return I830CopyPackedData(adaptor_priv, buf, srcPitch, *dstPitch, top, left, + nlines, npixels); + } +} + +/* + * The source rectangle of the video is defined by (src_x, src_y, src_w, src_h). + * The dest rectangle of the video is defined by (drw_x, drw_y, drw_w, drw_h). + * id is a fourcc code for the format of the video. + * buf is the pointer to the source data in system memory. + * width and height are the w/h of the source data. + * If "sync" is TRUE, then we must be finished with *buf at the point of return + * (which we always are). + * clipBoxes is the clipping region in screen space. + * data is a pointer to our port private. + * drawable is some Drawable, which might not be the screen in the case of + * compositing. It's a new argument to the function in the 1.1 server. + */ +static int +I830PutImageTextured(ScrnInfoPtr scrn, + short src_x, short src_y, + short drw_x, short drw_y, + short src_w, short src_h, + short drw_w, short drw_h, + int id, unsigned char *buf, + short width, short height, + Bool sync, RegionPtr clipBoxes, pointer data, + DrawablePtr drawable) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + intel_adaptor_private *adaptor_priv = (intel_adaptor_private *) data; + PixmapPtr pixmap = get_drawable_pixmap(drawable); + int dstPitch, dstPitch2; + BoxRec dstBox; + xf86CrtcPtr crtc; + int top, left, npixels, nlines; + + if (!intel_pixmap_is_offscreen(pixmap)) + return BadAlloc; + +#if 0 + ErrorF("I830PutImage: src: (%d,%d)(%d,%d), dst: (%d,%d)(%d,%d)\n" + "width %d, height %d\n", src_x, src_y, src_w, src_h, drw_x, + drw_y, drw_w, drw_h, width, height); +#endif + + if (!intel_clip_video_helper(scrn, + adaptor_priv, + &crtc, + &dstBox, + src_x, src_y, drw_x, drw_y, + src_w, src_h, drw_w, drw_h, + id, + &top, &left, &npixels, &nlines, clipBoxes, + width, height)) + return Success; + + if (xvmc_passthrough(id)) { + uint32_t *gem_handle = (uint32_t *)buf; + int size; + + intel_setup_dst_params(scrn, adaptor_priv, width, height, + &dstPitch, &dstPitch2, &size, id); + + if (IS_I915G(intel) || IS_I915GM(intel)) { + /* XXX: i915 is not support and needs some + * serious care. grep for KMS in i915_hwmc.c */ + return BadAlloc; + } + + if (adaptor_priv->buf) + drm_intel_bo_unreference(adaptor_priv->buf); + + adaptor_priv->buf = + drm_intel_bo_gem_create_from_name(intel->bufmgr, + "xvmc surface", + *gem_handle); + if (adaptor_priv->buf == NULL) + return BadAlloc; + + adaptor_priv->reusable = FALSE; + } else { + if (!intel_copy_video_data(scrn, adaptor_priv, width, height, + &dstPitch, &dstPitch2, + top, left, npixels, nlines, id, buf)) + return BadAlloc; + } + + if (crtc && adaptor_priv->SyncToVblank != 0 && INTEL_INFO(intel)->gen < 060) { + intel_wait_for_scanline(scrn, pixmap, crtc, clipBoxes); + } + + if (INTEL_INFO(intel)->gen >= 060) { + Gen6DisplayVideoTextured(scrn, adaptor_priv, id, clipBoxes, + width, height, dstPitch, dstPitch2, + src_w, src_h, + drw_w, drw_h, pixmap); + } else if (INTEL_INFO(intel)->gen >= 040) { + I965DisplayVideoTextured(scrn, adaptor_priv, id, clipBoxes, + width, height, dstPitch, dstPitch2, + src_w, src_h, + drw_w, drw_h, pixmap); + } else { + I915DisplayVideoTextured(scrn, adaptor_priv, id, clipBoxes, + width, height, dstPitch, dstPitch2, + src_w, src_h, drw_w, drw_h, + pixmap); + } + + intel_get_screen_private(scrn)->needs_flush = TRUE; + DamageDamageRegion(drawable, clipBoxes); + + /* And make sure the WAIT_FOR_EVENT is queued before any + * modesetting/dpms operations on the pipe. + */ + intel_batch_submit(scrn); + + return Success; +} + +static int +I830PutImageOverlay(ScrnInfoPtr scrn, + short src_x, short src_y, + short drw_x, short drw_y, + short src_w, short src_h, + short drw_w, short drw_h, + int id, unsigned char *buf, + short width, short height, + Bool sync, RegionPtr clipBoxes, pointer data, + DrawablePtr drawable) +{ + intel_adaptor_private *adaptor_priv = (intel_adaptor_private *) data; + int dstPitch, dstPitch2; + BoxRec dstBox; + xf86CrtcPtr crtc; + int top, left, npixels, nlines; + +#if 0 + ErrorF("I830PutImage: src: (%d,%d)(%d,%d), dst: (%d,%d)(%d,%d)\n" + "width %d, height %d\n", src_x, src_y, src_w, src_h, drw_x, + drw_y, drw_w, drw_h, width, height); +#endif + + /* If dst width and height are less than 1/8th the src size, the + * src/dst scale factor becomes larger than 8 and doesn't fit in + * the scale register. */ + if (src_w >= (drw_w * 8)) + drw_w = src_w / 7; + + if (src_h >= (drw_h * 8)) + drw_h = src_h / 7; + + if (!intel_clip_video_helper(scrn, + adaptor_priv, + &crtc, + &dstBox, + src_x, src_y, drw_x, drw_y, + src_w, src_h, drw_w, drw_h, + id, + &top, &left, &npixels, &nlines, clipBoxes, + width, height)) + return Success; + + /* overlay can't handle rotation natively, store it for the copy func */ + if (crtc) + adaptor_priv->rotation = crtc->rotation; + else { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "Fail to clip video to any crtc!\n"); + return Success; + } + + if (!intel_copy_video_data(scrn, adaptor_priv, width, height, + &dstPitch, &dstPitch2, + top, left, npixels, nlines, id, buf)) + return BadAlloc; + + if (!intel_display_overlay + (scrn, crtc, id, width, height, dstPitch, dstPitch2, + &dstBox, src_w, src_h, drw_w, drw_h)) + return BadAlloc; + + /* update cliplist */ + if (!REGION_EQUAL(scrn->pScreen, &adaptor_priv->clip, clipBoxes)) { + REGION_COPY(scrn->pScreen, &adaptor_priv->clip, clipBoxes); + xf86XVFillKeyHelperDrawable(drawable, + adaptor_priv->colorKey, + clipBoxes); + } + + adaptor_priv->videoStatus = CLIENT_VIDEO_ON; + + return Success; +} + +static int +I830QueryImageAttributes(ScrnInfoPtr scrn, + int id, + unsigned short *w, unsigned short *h, + int *pitches, int *offsets) +{ + intel_screen_private *intel = intel_get_screen_private(scrn); + int size, tmp; + +#if 0 + ErrorF("I830QueryImageAttributes: w is %d, h is %d\n", *w, *h); +#endif + + if (IS_845G(intel) || IS_I830(intel)) { + if (*w > IMAGE_MAX_WIDTH_LEGACY) + *w = IMAGE_MAX_WIDTH_LEGACY; + if (*h > IMAGE_MAX_HEIGHT_LEGACY) + *h = IMAGE_MAX_HEIGHT_LEGACY; + } else { + if (*w > IMAGE_MAX_WIDTH) + *w = IMAGE_MAX_WIDTH; + if (*h > IMAGE_MAX_HEIGHT) + *h = IMAGE_MAX_HEIGHT; + } + + *w = (*w + 1) & ~1; + if (offsets) + offsets[0] = 0; + + switch (id) { + /* IA44 is for XvMC only */ + case FOURCC_IA44: + case FOURCC_AI44: + if (pitches) + pitches[0] = *w; + size = *w * *h; + break; + case FOURCC_YV12: + case FOURCC_I420: + *h = (*h + 1) & ~1; + size = (*w + 3) & ~3; + if (pitches) + pitches[0] = size; + size *= *h; + if (offsets) + offsets[1] = size; + tmp = ((*w >> 1) + 3) & ~3; + if (pitches) + pitches[1] = pitches[2] = tmp; + tmp *= (*h >> 1); + size += tmp; + if (offsets) + offsets[2] = size; + size += tmp; +#if 0 + if (pitches) + ErrorF("pitch 0 is %d, pitch 1 is %d, pitch 2 is %d\n", + pitches[0], pitches[1], pitches[2]); + if (offsets) + ErrorF("offset 1 is %d, offset 2 is %d\n", offsets[1], + offsets[2]); + if (offsets) + ErrorF("size is %d\n", size); +#endif + break; +#ifdef INTEL_XVMC + case FOURCC_XVMC: + *h = (*h + 1) & ~1; + size = sizeof(struct intel_xvmc_command); + if (pitches) + pitches[0] = size; + break; +#endif + case FOURCC_UYVY: + case FOURCC_YUY2: + default: + size = *w << 1; + if (pitches) + pitches[0] = size; + size *= *h; + break; + } + + return size; +} + +void +intel_video_block_handler(intel_screen_private *intel) +{ + intel_adaptor_private *adaptor_priv; + + /* no overlay */ + if (intel->adaptor == NULL) + return; + + adaptor_priv = intel_get_adaptor_private(intel); + if (adaptor_priv->videoStatus & OFF_TIMER) { + Time now = currentTime.milliseconds; + if (adaptor_priv->offTime < now) { + /* Turn off the overlay */ + intel_overlay_off(intel); + intel_free_video_buffers(adaptor_priv); + adaptor_priv->videoStatus = 0; + } + } +} diff --git a/src/uxa/intel_video.h b/src/uxa/intel_video.h new file mode 100644 index 00000000..f405d40b --- /dev/null +++ b/src/uxa/intel_video.h @@ -0,0 +1,95 @@ +/*************************************************************************** + +Copyright 2000 Intel Corporation. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL INTEL, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +#include "xf86.h" +#include "xf86_OSproc.h" + +typedef struct { + uint32_t YBufOffset; + uint32_t UBufOffset; + uint32_t VBufOffset; + + int brightness; + int contrast; + int saturation; + xf86CrtcPtr desired_crtc; + + RegionRec clip; + uint32_t colorKey; + + uint32_t gamma0; + uint32_t gamma1; + uint32_t gamma2; + uint32_t gamma3; + uint32_t gamma4; + uint32_t gamma5; + + /* only used by the overlay */ + uint32_t videoStatus; + Time offTime; + Time freeTime; + /** YUV data buffers */ + drm_intel_bo *buf, *old_buf[2]; + Bool reusable; + + Bool textured; + Rotation rotation; /* should remove intel->rotation later */ + + int SyncToVblank; /* -1: auto, 0: off, 1: on */ +} intel_adaptor_private; + +static inline intel_adaptor_private * +intel_get_adaptor_private(intel_screen_private *intel) +{ + return intel->adaptor->pPortPrivates[0].ptr; +} + +void I915DisplayVideoTextured(ScrnInfoPtr scrn, + intel_adaptor_private *adaptor_priv, + int id, RegionPtr dstRegion, short width, + short height, int video_pitch, int video_pitch2, + short src_w, short src_h, + short drw_w, short drw_h, PixmapPtr pixmap); + +void I965DisplayVideoTextured(ScrnInfoPtr scrn, + intel_adaptor_private *adaptor_priv, + int id, RegionPtr dstRegion, short width, + short height, int video_pitch, int video_pitch2, + short src_w, short src_h, + short drw_w, short drw_h, PixmapPtr pixmap); + +void Gen6DisplayVideoTextured(ScrnInfoPtr scrn, + intel_adaptor_private *adaptor_priv, + int id, RegionPtr dstRegion, short width, + short height, int video_pitch, int video_pitch2, + short src_w, short src_h, + short drw_w, short drw_h, PixmapPtr pixmap); + +void i965_free_video(ScrnInfoPtr scrn); + +int is_planar_fourcc(int id); + +void intel_video_block_handler(intel_screen_private *intel); diff --git a/src/uxa/uxa_module.h b/src/uxa/uxa_module.h new file mode 100644 index 00000000..7a248f08 --- /dev/null +++ b/src/uxa/uxa_module.h @@ -0,0 +1,6 @@ +#ifndef INTEL_MODULE_H +#define INTEL_MODULE_H + +extern Bool intel_init_scrn(ScrnInfoPtr scrn); + +#endif /* INTEL_MODULE_H */ |